17cca33b4SSnehasish Kumar //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===//
27cca33b4SSnehasish Kumar //
37cca33b4SSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47cca33b4SSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information.
57cca33b4SSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67cca33b4SSnehasish Kumar //
77cca33b4SSnehasish Kumar //===----------------------------------------------------------------------===//
87cca33b4SSnehasish Kumar //
97cca33b4SSnehasish Kumar // This file contains support for reading MemProf profiling data.
107cca33b4SSnehasish Kumar //
117cca33b4SSnehasish Kumar //===----------------------------------------------------------------------===//
127cca33b4SSnehasish Kumar 
1311314f40SSnehasish Kumar #include <algorithm>
147cca33b4SSnehasish Kumar #include <cstdint>
15ec51971eSSnehasish Kumar #include <memory>
163a4d373eSSnehasish Kumar #include <type_traits>
177cca33b4SSnehasish Kumar 
1827a4f254SSnehasish Kumar #include "llvm/ADT/ArrayRef.h"
19216575e5SSnehasish Kumar #include "llvm/ADT/DenseMap.h"
2011314f40SSnehasish Kumar #include "llvm/ADT/SmallVector.h"
218a87f42fSSnehasish Kumar #include "llvm/ADT/StringExtras.h"
22216575e5SSnehasish Kumar #include "llvm/DebugInfo/DWARF/DWARFContext.h"
23216575e5SSnehasish Kumar #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
24216575e5SSnehasish Kumar #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
25216575e5SSnehasish Kumar #include "llvm/Object/Binary.h"
26216575e5SSnehasish Kumar #include "llvm/Object/ELFObjectFile.h"
27216575e5SSnehasish Kumar #include "llvm/Object/ObjectFile.h"
287cca33b4SSnehasish Kumar #include "llvm/ProfileData/InstrProf.h"
29216575e5SSnehasish Kumar #include "llvm/ProfileData/MemProf.h"
307cca33b4SSnehasish Kumar #include "llvm/ProfileData/MemProfData.inc"
317cca33b4SSnehasish Kumar #include "llvm/ProfileData/RawMemProfReader.h"
32216575e5SSnehasish Kumar #include "llvm/Support/Endian.h"
3311314f40SSnehasish Kumar #include "llvm/Support/Path.h"
347cca33b4SSnehasish Kumar 
35cb81545eSSnehasish Kumar #define DEBUG_TYPE "memprof"
36cb81545eSSnehasish Kumar 
377cca33b4SSnehasish Kumar namespace llvm {
387cca33b4SSnehasish Kumar namespace memprof {
397cca33b4SSnehasish Kumar namespace {
alignedRead(const char * Ptr)403a4d373eSSnehasish Kumar template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
413a4d373eSSnehasish Kumar   static_assert(std::is_pod<T>::value, "Not a pod type.");
423a4d373eSSnehasish Kumar   assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
433a4d373eSSnehasish Kumar   return *reinterpret_cast<const T *>(Ptr);
443a4d373eSSnehasish Kumar }
453a4d373eSSnehasish Kumar 
checkBuffer(const MemoryBuffer & Buffer)46216575e5SSnehasish Kumar Error checkBuffer(const MemoryBuffer &Buffer) {
47216575e5SSnehasish Kumar   if (!RawMemProfReader::hasFormat(Buffer))
48dbf47d22SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::bad_magic);
49dbf47d22SSnehasish Kumar 
50216575e5SSnehasish Kumar   if (Buffer.getBufferSize() == 0)
51216575e5SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
52216575e5SSnehasish Kumar 
53216575e5SSnehasish Kumar   if (Buffer.getBufferSize() < sizeof(Header)) {
547cca33b4SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::truncated);
557cca33b4SSnehasish Kumar   }
567cca33b4SSnehasish Kumar 
577cca33b4SSnehasish Kumar   // The size of the buffer can be > header total size since we allow repeated
587cca33b4SSnehasish Kumar   // serialization of memprof profiles to the same file.
597cca33b4SSnehasish Kumar   uint64_t TotalSize = 0;
60216575e5SSnehasish Kumar   const char *Next = Buffer.getBufferStart();
61216575e5SSnehasish Kumar   while (Next < Buffer.getBufferEnd()) {
627cca33b4SSnehasish Kumar     auto *H = reinterpret_cast<const Header *>(Next);
637cca33b4SSnehasish Kumar     if (H->Version != MEMPROF_RAW_VERSION) {
647cca33b4SSnehasish Kumar       return make_error<InstrProfError>(instrprof_error::unsupported_version);
657cca33b4SSnehasish Kumar     }
667cca33b4SSnehasish Kumar 
677cca33b4SSnehasish Kumar     TotalSize += H->TotalSize;
687cca33b4SSnehasish Kumar     Next += H->TotalSize;
697cca33b4SSnehasish Kumar   }
707cca33b4SSnehasish Kumar 
71216575e5SSnehasish Kumar   if (Buffer.getBufferSize() != TotalSize) {
727cca33b4SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::malformed);
737cca33b4SSnehasish Kumar   }
74216575e5SSnehasish Kumar   return Error::success();
75216575e5SSnehasish Kumar }
767cca33b4SSnehasish Kumar 
readSegmentEntries(const char * Ptr)77216575e5SSnehasish Kumar llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
78216575e5SSnehasish Kumar   using namespace support;
79216575e5SSnehasish Kumar 
80216575e5SSnehasish Kumar   const uint64_t NumItemsToRead =
81216575e5SSnehasish Kumar       endian::readNext<uint64_t, little, unaligned>(Ptr);
82216575e5SSnehasish Kumar   llvm::SmallVector<SegmentEntry> Items;
83216575e5SSnehasish Kumar   for (uint64_t I = 0; I < NumItemsToRead; I++) {
84216575e5SSnehasish Kumar     Items.push_back(*reinterpret_cast<const SegmentEntry *>(
85216575e5SSnehasish Kumar         Ptr + I * sizeof(SegmentEntry)));
86216575e5SSnehasish Kumar   }
87216575e5SSnehasish Kumar   return Items;
88216575e5SSnehasish Kumar }
89216575e5SSnehasish Kumar 
90216575e5SSnehasish Kumar llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
readMemInfoBlocks(const char * Ptr)91216575e5SSnehasish Kumar readMemInfoBlocks(const char *Ptr) {
92216575e5SSnehasish Kumar   using namespace support;
93216575e5SSnehasish Kumar 
94216575e5SSnehasish Kumar   const uint64_t NumItemsToRead =
95216575e5SSnehasish Kumar       endian::readNext<uint64_t, little, unaligned>(Ptr);
96216575e5SSnehasish Kumar   llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
97216575e5SSnehasish Kumar   for (uint64_t I = 0; I < NumItemsToRead; I++) {
98216575e5SSnehasish Kumar     const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
99216575e5SSnehasish Kumar     const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
100216575e5SSnehasish Kumar     Items.push_back({Id, MIB});
101216575e5SSnehasish Kumar     // Only increment by size of MIB since readNext implicitly increments.
102216575e5SSnehasish Kumar     Ptr += sizeof(MemInfoBlock);
103216575e5SSnehasish Kumar   }
104216575e5SSnehasish Kumar   return Items;
105216575e5SSnehasish Kumar }
106216575e5SSnehasish Kumar 
readStackInfo(const char * Ptr)107216575e5SSnehasish Kumar CallStackMap readStackInfo(const char *Ptr) {
108216575e5SSnehasish Kumar   using namespace support;
109216575e5SSnehasish Kumar 
110216575e5SSnehasish Kumar   const uint64_t NumItemsToRead =
111216575e5SSnehasish Kumar       endian::readNext<uint64_t, little, unaligned>(Ptr);
112216575e5SSnehasish Kumar   CallStackMap Items;
113216575e5SSnehasish Kumar 
114216575e5SSnehasish Kumar   for (uint64_t I = 0; I < NumItemsToRead; I++) {
115216575e5SSnehasish Kumar     const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
116216575e5SSnehasish Kumar     const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
117216575e5SSnehasish Kumar 
118dda7b749SSnehasish Kumar     SmallVector<uint64_t> CallStack;
119216575e5SSnehasish Kumar     for (uint64_t J = 0; J < NumPCs; J++) {
120216575e5SSnehasish Kumar       CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
121216575e5SSnehasish Kumar     }
122216575e5SSnehasish Kumar 
123216575e5SSnehasish Kumar     Items[StackId] = CallStack;
124216575e5SSnehasish Kumar   }
125216575e5SSnehasish Kumar   return Items;
126216575e5SSnehasish Kumar }
127216575e5SSnehasish Kumar 
128216575e5SSnehasish Kumar // Merges the contents of stack information in \p From to \p To. Returns true if
129216575e5SSnehasish Kumar // any stack ids observed previously map to a different set of program counter
130216575e5SSnehasish Kumar // addresses.
mergeStackMap(const CallStackMap & From,CallStackMap & To)131216575e5SSnehasish Kumar bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
132216575e5SSnehasish Kumar   for (const auto &IdStack : From) {
133216575e5SSnehasish Kumar     auto I = To.find(IdStack.first);
134216575e5SSnehasish Kumar     if (I == To.end()) {
135216575e5SSnehasish Kumar       To[IdStack.first] = IdStack.second;
136216575e5SSnehasish Kumar     } else {
137216575e5SSnehasish Kumar       // Check that the PCs are the same (in order).
138216575e5SSnehasish Kumar       if (IdStack.second != I->second)
139216575e5SSnehasish Kumar         return true;
140216575e5SSnehasish Kumar     }
141216575e5SSnehasish Kumar   }
142216575e5SSnehasish Kumar   return false;
143216575e5SSnehasish Kumar }
144216575e5SSnehasish Kumar 
report(Error E,const StringRef Context)145216575e5SSnehasish Kumar Error report(Error E, const StringRef Context) {
146216575e5SSnehasish Kumar   return joinErrors(createStringError(inconvertibleErrorCode(), Context),
147216575e5SSnehasish Kumar                     std::move(E));
148216575e5SSnehasish Kumar }
14911314f40SSnehasish Kumar 
isRuntimePath(const StringRef Path)15011314f40SSnehasish Kumar bool isRuntimePath(const StringRef Path) {
15111314f40SSnehasish Kumar   return StringRef(llvm::sys::path::convert_to_slash(Path))
15211314f40SSnehasish Kumar       .contains("memprof/memprof_");
15311314f40SSnehasish Kumar }
1548a87f42fSSnehasish Kumar 
getBuildIdString(const SegmentEntry & Entry)1558a87f42fSSnehasish Kumar std::string getBuildIdString(const SegmentEntry &Entry) {
1568a87f42fSSnehasish Kumar   constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
1578a87f42fSSnehasish Kumar   constexpr uint8_t Zeros[Size] = {0};
1588a87f42fSSnehasish Kumar   // If the build id is unset print a helpful string instead of all zeros.
1598a87f42fSSnehasish Kumar   if (memcmp(Entry.BuildId, Zeros, Size) == 0)
1608a87f42fSSnehasish Kumar     return "<None>";
1618a87f42fSSnehasish Kumar 
1628a87f42fSSnehasish Kumar   std::string Str;
1638a87f42fSSnehasish Kumar   raw_string_ostream OS(Str);
1648a87f42fSSnehasish Kumar   for (size_t I = 0; I < Size; I++) {
1658a87f42fSSnehasish Kumar     OS << format_hex_no_prefix(Entry.BuildId[I], 2);
1668a87f42fSSnehasish Kumar   }
1678a87f42fSSnehasish Kumar   return OS.str();
1688a87f42fSSnehasish Kumar }
169216575e5SSnehasish Kumar } // namespace
170216575e5SSnehasish Kumar 
171216575e5SSnehasish Kumar Expected<std::unique_ptr<RawMemProfReader>>
create(const Twine & Path,const StringRef ProfiledBinary,bool KeepName)172ec51971eSSnehasish Kumar RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
173ec51971eSSnehasish Kumar                          bool KeepName) {
174216575e5SSnehasish Kumar   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
175216575e5SSnehasish Kumar   if (std::error_code EC = BufferOr.getError())
176216575e5SSnehasish Kumar     return report(errorCodeToError(EC), Path.getSingleStringRef());
177216575e5SSnehasish Kumar 
178216575e5SSnehasish Kumar   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
179216575e5SSnehasish Kumar   if (Error E = checkBuffer(*Buffer))
180216575e5SSnehasish Kumar     return report(std::move(E), Path.getSingleStringRef());
181216575e5SSnehasish Kumar 
182216575e5SSnehasish Kumar   if (ProfiledBinary.empty())
183216575e5SSnehasish Kumar     return report(
184216575e5SSnehasish Kumar         errorCodeToError(make_error_code(std::errc::invalid_argument)),
185216575e5SSnehasish Kumar         "Path to profiled binary is empty!");
186216575e5SSnehasish Kumar 
187216575e5SSnehasish Kumar   auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
188216575e5SSnehasish Kumar   if (!BinaryOr) {
189216575e5SSnehasish Kumar     return report(BinaryOr.takeError(), ProfiledBinary);
190216575e5SSnehasish Kumar   }
191216575e5SSnehasish Kumar 
192ec51971eSSnehasish Kumar   // Use new here since constructor is private.
193962db7deSSnehasish Kumar   std::unique_ptr<RawMemProfReader> Reader(
194962db7deSSnehasish Kumar       new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
195962db7deSSnehasish Kumar   if (Error E = Reader->initialize(std::move(Buffer))) {
196216575e5SSnehasish Kumar     return std::move(E);
197216575e5SSnehasish Kumar   }
198216575e5SSnehasish Kumar   return std::move(Reader);
199216575e5SSnehasish Kumar }
200216575e5SSnehasish Kumar 
hasFormat(const StringRef Path)201216575e5SSnehasish Kumar bool RawMemProfReader::hasFormat(const StringRef Path) {
202216575e5SSnehasish Kumar   auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
203216575e5SSnehasish Kumar   if (!BufferOr)
204216575e5SSnehasish Kumar     return false;
205216575e5SSnehasish Kumar 
206216575e5SSnehasish Kumar   std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
207216575e5SSnehasish Kumar   return hasFormat(*Buffer);
2087cca33b4SSnehasish Kumar }
2097cca33b4SSnehasish Kumar 
hasFormat(const MemoryBuffer & Buffer)2107cca33b4SSnehasish Kumar bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
2117cca33b4SSnehasish Kumar   if (Buffer.getBufferSize() < sizeof(uint64_t))
2127cca33b4SSnehasish Kumar     return false;
2133a4d373eSSnehasish Kumar   // Aligned read to sanity check that the buffer was allocated with at least 8b
2143a4d373eSSnehasish Kumar   // alignment.
2153a4d373eSSnehasish Kumar   const uint64_t Magic = alignedRead(Buffer.getBufferStart());
2167cca33b4SSnehasish Kumar   return Magic == MEMPROF_RAW_MAGIC_64;
2177cca33b4SSnehasish Kumar }
2187cca33b4SSnehasish Kumar 
printYAML(raw_ostream & OS)21914f4f63aSSnehasish Kumar void RawMemProfReader::printYAML(raw_ostream &OS) {
220962db7deSSnehasish Kumar   uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
221962db7deSSnehasish Kumar   for (const auto &KV : FunctionProfileData) {
222962db7deSSnehasish Kumar     const size_t NumAllocSites = KV.second.AllocSites.size();
223962db7deSSnehasish Kumar     if (NumAllocSites > 0) {
224962db7deSSnehasish Kumar       NumAllocFunctions++;
225962db7deSSnehasish Kumar       NumMibInfo += NumAllocSites;
226962db7deSSnehasish Kumar     }
227962db7deSSnehasish Kumar   }
228962db7deSSnehasish Kumar 
22914f4f63aSSnehasish Kumar   OS << "MemprofProfile:\n";
230962db7deSSnehasish Kumar   OS << "  Summary:\n";
231962db7deSSnehasish Kumar   OS << "    Version: " << MEMPROF_RAW_VERSION << "\n";
232962db7deSSnehasish Kumar   OS << "    NumSegments: " << SegmentInfo.size() << "\n";
233962db7deSSnehasish Kumar   OS << "    NumMibInfo: " << NumMibInfo << "\n";
234962db7deSSnehasish Kumar   OS << "    NumAllocFunctions: " << NumAllocFunctions << "\n";
235962db7deSSnehasish Kumar   OS << "    NumStackOffsets: " << StackMap.size() << "\n";
2368a87f42fSSnehasish Kumar   // Print out the segment information.
2378a87f42fSSnehasish Kumar   OS << "  Segments:\n";
2388a87f42fSSnehasish Kumar   for (const auto &Entry : SegmentInfo) {
2398a87f42fSSnehasish Kumar     OS << "  -\n";
2408a87f42fSSnehasish Kumar     OS << "    BuildId: " << getBuildIdString(Entry) << "\n";
2418a87f42fSSnehasish Kumar     OS << "    Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
2428a87f42fSSnehasish Kumar     OS << "    End: 0x" << llvm::utohexstr(Entry.End) << "\n";
2438a87f42fSSnehasish Kumar     OS << "    Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
2448a87f42fSSnehasish Kumar   }
245216575e5SSnehasish Kumar   // Print out the merged contents of the profiles.
246216575e5SSnehasish Kumar   OS << "  Records:\n";
24727a4f254SSnehasish Kumar   for (const auto &Entry : *this) {
248216575e5SSnehasish Kumar     OS << "  -\n";
24927a4f254SSnehasish Kumar     OS << "    FunctionGUID: " << Entry.first << "\n";
25027a4f254SSnehasish Kumar     Entry.second.print(OS);
251216575e5SSnehasish Kumar   }
25214f4f63aSSnehasish Kumar }
25314f4f63aSSnehasish Kumar 
initialize(std::unique_ptr<MemoryBuffer> DataBuffer)254962db7deSSnehasish Kumar Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
255216575e5SSnehasish Kumar   const StringRef FileName = Binary.getBinary()->getFileName();
256216575e5SSnehasish Kumar 
257216575e5SSnehasish Kumar   auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
258216575e5SSnehasish Kumar   if (!ElfObject) {
259216575e5SSnehasish Kumar     return report(make_error<StringError>(Twine("Not an ELF file: "),
260216575e5SSnehasish Kumar                                           inconvertibleErrorCode()),
261216575e5SSnehasish Kumar                   FileName);
262216575e5SSnehasish Kumar   }
263216575e5SSnehasish Kumar 
264*3a1a404aSSnehasish Kumar   // Check whether the profiled binary was built with position independent code
265*3a1a404aSSnehasish Kumar   // (PIC). For now we provide a error message until symbolization support
266*3a1a404aSSnehasish Kumar   // is added for pic.
267*3a1a404aSSnehasish Kumar   auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
268*3a1a404aSSnehasish Kumar   const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
269*3a1a404aSSnehasish Kumar   auto PHdrsOr = ElfFile.program_headers();
270*3a1a404aSSnehasish Kumar   if(!PHdrsOr)
271*3a1a404aSSnehasish Kumar     return report(make_error<StringError>(Twine("Could not read program headers: "),
272*3a1a404aSSnehasish Kumar                                           inconvertibleErrorCode()),
273*3a1a404aSSnehasish Kumar                   FileName);
274*3a1a404aSSnehasish Kumar   auto FirstLoadHeader = PHdrsOr->begin();
275*3a1a404aSSnehasish Kumar   while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD)
276*3a1a404aSSnehasish Kumar     ++FirstLoadHeader;
277*3a1a404aSSnehasish Kumar   if(FirstLoadHeader->p_vaddr == 0)
278*3a1a404aSSnehasish Kumar     return report(make_error<StringError>(Twine("Unsupported position independent code"),
279*3a1a404aSSnehasish Kumar                                           inconvertibleErrorCode()),
280*3a1a404aSSnehasish Kumar                   FileName);
281*3a1a404aSSnehasish Kumar 
282216575e5SSnehasish Kumar   auto Triple = ElfObject->makeTriple();
283216575e5SSnehasish Kumar   if (!Triple.isX86())
284216575e5SSnehasish Kumar     return report(make_error<StringError>(Twine("Unsupported target: ") +
285216575e5SSnehasish Kumar                                               Triple.getArchName(),
286216575e5SSnehasish Kumar                                           inconvertibleErrorCode()),
287216575e5SSnehasish Kumar                   FileName);
288216575e5SSnehasish Kumar 
289216575e5SSnehasish Kumar   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
290216575e5SSnehasish Kumar   std::unique_ptr<DIContext> Context = DWARFContext::create(
291216575e5SSnehasish Kumar       *Object, DWARFContext::ProcessDebugRelocations::Process);
292216575e5SSnehasish Kumar 
293216575e5SSnehasish Kumar   auto SOFOr = symbolize::SymbolizableObjectFile::create(
294216575e5SSnehasish Kumar       Object, std::move(Context), /*UntagAddresses=*/false);
295216575e5SSnehasish Kumar   if (!SOFOr)
296216575e5SSnehasish Kumar     return report(SOFOr.takeError(), FileName);
297216575e5SSnehasish Kumar   Symbolizer = std::move(SOFOr.get());
298216575e5SSnehasish Kumar 
299962db7deSSnehasish Kumar   if (Error E = readRawProfile(std::move(DataBuffer)))
300dda7b749SSnehasish Kumar     return E;
301dda7b749SSnehasish Kumar 
30227a4f254SSnehasish Kumar   if (Error E = symbolizeAndFilterStackFrames())
30327a4f254SSnehasish Kumar     return E;
30427a4f254SSnehasish Kumar 
30527a4f254SSnehasish Kumar   return mapRawProfileToRecords();
30627a4f254SSnehasish Kumar }
30727a4f254SSnehasish Kumar 
mapRawProfileToRecords()30827a4f254SSnehasish Kumar Error RawMemProfReader::mapRawProfileToRecords() {
30927a4f254SSnehasish Kumar   // Hold a mapping from function to each callsite location we encounter within
31027a4f254SSnehasish Kumar   // it that is part of some dynamic allocation context. The location is stored
31127a4f254SSnehasish Kumar   // as a pointer to a symbolized list of inline frames.
3126dd6a616SSnehasish Kumar   using LocationPtr = const llvm::SmallVector<FrameId> *;
31327a4f254SSnehasish Kumar   llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
31427a4f254SSnehasish Kumar       PerFunctionCallSites;
31527a4f254SSnehasish Kumar 
31627a4f254SSnehasish Kumar   // Convert the raw profile callstack data into memprof records. While doing so
31727a4f254SSnehasish Kumar   // keep track of related contexts so that we can fill these in later.
31827a4f254SSnehasish Kumar   for (const auto &Entry : CallstackProfileData) {
31927a4f254SSnehasish Kumar     const uint64_t StackId = Entry.first;
32027a4f254SSnehasish Kumar 
32127a4f254SSnehasish Kumar     auto It = StackMap.find(StackId);
32227a4f254SSnehasish Kumar     if (It == StackMap.end())
32327a4f254SSnehasish Kumar       return make_error<InstrProfError>(
32427a4f254SSnehasish Kumar           instrprof_error::malformed,
32527a4f254SSnehasish Kumar           "memprof callstack record does not contain id: " + Twine(StackId));
32627a4f254SSnehasish Kumar 
32727a4f254SSnehasish Kumar     // Construct the symbolized callstack.
3286dd6a616SSnehasish Kumar     llvm::SmallVector<FrameId> Callstack;
32927a4f254SSnehasish Kumar     Callstack.reserve(It->getSecond().size());
33027a4f254SSnehasish Kumar 
33127a4f254SSnehasish Kumar     llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
33227a4f254SSnehasish Kumar     for (size_t I = 0; I < Addresses.size(); I++) {
33327a4f254SSnehasish Kumar       const uint64_t Address = Addresses[I];
33427a4f254SSnehasish Kumar       assert(SymbolizedFrame.count(Address) > 0 &&
33527a4f254SSnehasish Kumar              "Address not found in SymbolizedFrame map");
3366dd6a616SSnehasish Kumar       const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
33727a4f254SSnehasish Kumar 
3386dd6a616SSnehasish Kumar       assert(!idToFrame(Frames.back()).IsInlineFrame &&
33927a4f254SSnehasish Kumar              "The last frame should not be inlined");
34027a4f254SSnehasish Kumar 
34127a4f254SSnehasish Kumar       // Record the callsites for each function. Skip the first frame of the
34227a4f254SSnehasish Kumar       // first address since it is the allocation site itself that is recorded
34327a4f254SSnehasish Kumar       // as an alloc site.
34427a4f254SSnehasish Kumar       for (size_t J = 0; J < Frames.size(); J++) {
34527a4f254SSnehasish Kumar         if (I == 0 && J == 0)
34627a4f254SSnehasish Kumar           continue;
34727a4f254SSnehasish Kumar         // We attach the entire bottom-up frame here for the callsite even
34827a4f254SSnehasish Kumar         // though we only need the frames up to and including the frame for
34927a4f254SSnehasish Kumar         // Frames[J].Function. This will enable better deduplication for
35027a4f254SSnehasish Kumar         // compression in the future.
3516dd6a616SSnehasish Kumar         const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
3526dd6a616SSnehasish Kumar         PerFunctionCallSites[Guid].insert(&Frames);
35327a4f254SSnehasish Kumar       }
35427a4f254SSnehasish Kumar 
35527a4f254SSnehasish Kumar       // Add all the frames to the current allocation callstack.
35627a4f254SSnehasish Kumar       Callstack.append(Frames.begin(), Frames.end());
35727a4f254SSnehasish Kumar     }
35827a4f254SSnehasish Kumar 
35927a4f254SSnehasish Kumar     // We attach the memprof record to each function bottom-up including the
36027a4f254SSnehasish Kumar     // first non-inline frame.
36127a4f254SSnehasish Kumar     for (size_t I = 0; /*Break out using the condition below*/; I++) {
3626dd6a616SSnehasish Kumar       const Frame &F = idToFrame(Callstack[I]);
36327a4f254SSnehasish Kumar       auto Result =
3646dd6a616SSnehasish Kumar           FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
3656dd6a616SSnehasish Kumar       IndexedMemProfRecord &Record = Result.first->second;
36627a4f254SSnehasish Kumar       Record.AllocSites.emplace_back(Callstack, Entry.second);
36727a4f254SSnehasish Kumar 
3686dd6a616SSnehasish Kumar       if (!F.IsInlineFrame)
36927a4f254SSnehasish Kumar         break;
37027a4f254SSnehasish Kumar     }
37127a4f254SSnehasish Kumar   }
37227a4f254SSnehasish Kumar 
37327a4f254SSnehasish Kumar   // Fill in the related callsites per function.
37427a4f254SSnehasish Kumar   for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
37527a4f254SSnehasish Kumar        I != E; I++) {
37627a4f254SSnehasish Kumar     const GlobalValue::GUID Id = I->first;
37727a4f254SSnehasish Kumar     // Some functions may have only callsite data and no allocation data. Here
37827a4f254SSnehasish Kumar     // we insert a new entry for callsite data if we need to.
3796dd6a616SSnehasish Kumar     auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
3806dd6a616SSnehasish Kumar     IndexedMemProfRecord &Record = Result.first->second;
38127a4f254SSnehasish Kumar     for (LocationPtr Loc : I->getSecond()) {
38227a4f254SSnehasish Kumar       Record.CallSites.push_back(*Loc);
38327a4f254SSnehasish Kumar     }
38427a4f254SSnehasish Kumar   }
38527a4f254SSnehasish Kumar 
38627a4f254SSnehasish Kumar   return Error::success();
387dda7b749SSnehasish Kumar }
388dda7b749SSnehasish Kumar 
symbolizeAndFilterStackFrames()38911314f40SSnehasish Kumar Error RawMemProfReader::symbolizeAndFilterStackFrames() {
390dda7b749SSnehasish Kumar   // The specifier to use when symbolization is requested.
391dda7b749SSnehasish Kumar   const DILineInfoSpecifier Specifier(
392dda7b749SSnehasish Kumar       DILineInfoSpecifier::FileLineInfoKind::RawValue,
393dda7b749SSnehasish Kumar       DILineInfoSpecifier::FunctionNameKind::LinkageName);
394dda7b749SSnehasish Kumar 
39511314f40SSnehasish Kumar   // For entries where all PCs in the callstack are discarded, we erase the
39611314f40SSnehasish Kumar   // entry from the stack map.
39711314f40SSnehasish Kumar   llvm::SmallVector<uint64_t> EntriesToErase;
39811314f40SSnehasish Kumar   // We keep track of all prior discarded entries so that we can avoid invoking
39911314f40SSnehasish Kumar   // the symbolizer for such entries.
40011314f40SSnehasish Kumar   llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
40111314f40SSnehasish Kumar   for (auto &Entry : StackMap) {
402dda7b749SSnehasish Kumar     for (const uint64_t VAddr : Entry.getSecond()) {
40311314f40SSnehasish Kumar       // Check if we have already symbolized and cached the result or if we
40411314f40SSnehasish Kumar       // don't want to attempt symbolization since we know this address is bad.
40511314f40SSnehasish Kumar       // In this case the address is also removed from the current callstack.
40611314f40SSnehasish Kumar       if (SymbolizedFrame.count(VAddr) > 0 ||
40711314f40SSnehasish Kumar           AllVAddrsToDiscard.contains(VAddr))
408dda7b749SSnehasish Kumar         continue;
409dda7b749SSnehasish Kumar 
410dda7b749SSnehasish Kumar       Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
411dda7b749SSnehasish Kumar           getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false);
412dda7b749SSnehasish Kumar       if (!DIOr)
413dda7b749SSnehasish Kumar         return DIOr.takeError();
414dda7b749SSnehasish Kumar       DIInliningInfo DI = DIOr.get();
415dda7b749SSnehasish Kumar 
41611314f40SSnehasish Kumar       // Drop frames which we can't symbolize or if they belong to the runtime.
41711314f40SSnehasish Kumar       if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
41811314f40SSnehasish Kumar           isRuntimePath(DI.getFrame(0).FileName)) {
41911314f40SSnehasish Kumar         AllVAddrsToDiscard.insert(VAddr);
42011314f40SSnehasish Kumar         continue;
42111314f40SSnehasish Kumar       }
42211314f40SSnehasish Kumar 
423c9a3d296SSnehasish Kumar       for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
424c9a3d296SSnehasish Kumar            I++) {
4256dd6a616SSnehasish Kumar         const auto &DIFrame = DI.getFrame(I);
426ec51971eSSnehasish Kumar         const uint64_t Guid =
427ec51971eSSnehasish Kumar             IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
428ec51971eSSnehasish Kumar         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
429c9a3d296SSnehasish Kumar                       // Only the last entry is not an inlined location.
430c9a3d296SSnehasish Kumar                       I != NumFrames - 1);
431ec51971eSSnehasish Kumar         // Here we retain a mapping from the GUID to symbol name instead of
432ec51971eSSnehasish Kumar         // adding it to the frame object directly to reduce memory overhead.
433ec51971eSSnehasish Kumar         // This is because there can be many unique frames, particularly for
434ec51971eSSnehasish Kumar         // callsite frames.
435ec51971eSSnehasish Kumar         if (KeepSymbolName)
436ec51971eSSnehasish Kumar           GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
4376dd6a616SSnehasish Kumar 
438ec51971eSSnehasish Kumar         const FrameId Hash = F.hash();
439ec51971eSSnehasish Kumar         IdToFrame.insert({Hash, F});
440ec51971eSSnehasish Kumar         SymbolizedFrame[VAddr].push_back(Hash);
441dda7b749SSnehasish Kumar       }
442dda7b749SSnehasish Kumar     }
44311314f40SSnehasish Kumar 
44411314f40SSnehasish Kumar     auto &CallStack = Entry.getSecond();
445c2713df3SKazu Hirata     llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
44611314f40SSnehasish Kumar       return AllVAddrsToDiscard.contains(A);
447c2713df3SKazu Hirata     });
44811314f40SSnehasish Kumar     if (CallStack.empty())
44911314f40SSnehasish Kumar       EntriesToErase.push_back(Entry.getFirst());
450dda7b749SSnehasish Kumar   }
45111314f40SSnehasish Kumar 
45211314f40SSnehasish Kumar   // Drop the entries where the callstack is empty.
45311314f40SSnehasish Kumar   for (const uint64_t Id : EntriesToErase) {
45411314f40SSnehasish Kumar     StackMap.erase(Id);
45527a4f254SSnehasish Kumar     CallstackProfileData.erase(Id);
45611314f40SSnehasish Kumar   }
45711314f40SSnehasish Kumar 
45811314f40SSnehasish Kumar   if (StackMap.empty())
45911314f40SSnehasish Kumar     return make_error<InstrProfError>(
46011314f40SSnehasish Kumar         instrprof_error::malformed,
46111314f40SSnehasish Kumar         "no entries in callstack map after symbolization");
46211314f40SSnehasish Kumar 
463dda7b749SSnehasish Kumar   return Error::success();
464216575e5SSnehasish Kumar }
465216575e5SSnehasish Kumar 
readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer)466962db7deSSnehasish Kumar Error RawMemProfReader::readRawProfile(
467962db7deSSnehasish Kumar     std::unique_ptr<MemoryBuffer> DataBuffer) {
468216575e5SSnehasish Kumar   const char *Next = DataBuffer->getBufferStart();
469216575e5SSnehasish Kumar 
470216575e5SSnehasish Kumar   while (Next < DataBuffer->getBufferEnd()) {
471216575e5SSnehasish Kumar     auto *Header = reinterpret_cast<const memprof::Header *>(Next);
472216575e5SSnehasish Kumar 
473216575e5SSnehasish Kumar     // Read in the segment information, check whether its the same across all
474216575e5SSnehasish Kumar     // profiles in this binary file.
475216575e5SSnehasish Kumar     const llvm::SmallVector<SegmentEntry> Entries =
476216575e5SSnehasish Kumar         readSegmentEntries(Next + Header->SegmentOffset);
477216575e5SSnehasish Kumar     if (!SegmentInfo.empty() && SegmentInfo != Entries) {
478216575e5SSnehasish Kumar       // We do not expect segment information to change when deserializing from
479216575e5SSnehasish Kumar       // the same binary profile file. This can happen if dynamic libraries are
480216575e5SSnehasish Kumar       // loaded/unloaded between profile dumping.
481216575e5SSnehasish Kumar       return make_error<InstrProfError>(
482216575e5SSnehasish Kumar           instrprof_error::malformed,
483216575e5SSnehasish Kumar           "memprof raw profile has different segment information");
484216575e5SSnehasish Kumar     }
485216575e5SSnehasish Kumar     SegmentInfo.assign(Entries.begin(), Entries.end());
486216575e5SSnehasish Kumar 
487216575e5SSnehasish Kumar     // Read in the MemInfoBlocks. Merge them based on stack id - we assume that
488216575e5SSnehasish Kumar     // raw profiles in the same binary file are from the same process so the
489216575e5SSnehasish Kumar     // stackdepot ids are the same.
490216575e5SSnehasish Kumar     for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
49127a4f254SSnehasish Kumar       if (CallstackProfileData.count(Value.first)) {
49227a4f254SSnehasish Kumar         CallstackProfileData[Value.first].Merge(Value.second);
493216575e5SSnehasish Kumar       } else {
49427a4f254SSnehasish Kumar         CallstackProfileData[Value.first] = Value.second;
495216575e5SSnehasish Kumar       }
496216575e5SSnehasish Kumar     }
497216575e5SSnehasish Kumar 
498216575e5SSnehasish Kumar     // Read in the callstack for each ids. For multiple raw profiles in the same
499216575e5SSnehasish Kumar     // file, we expect that the callstack is the same for a unique id.
500216575e5SSnehasish Kumar     const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
501216575e5SSnehasish Kumar     if (StackMap.empty()) {
502216575e5SSnehasish Kumar       StackMap = CSM;
503216575e5SSnehasish Kumar     } else {
504216575e5SSnehasish Kumar       if (mergeStackMap(CSM, StackMap))
505216575e5SSnehasish Kumar         return make_error<InstrProfError>(
506216575e5SSnehasish Kumar             instrprof_error::malformed,
507216575e5SSnehasish Kumar             "memprof raw profile got different call stack for same id");
508216575e5SSnehasish Kumar     }
509216575e5SSnehasish Kumar 
510216575e5SSnehasish Kumar     Next += Header->TotalSize;
511216575e5SSnehasish Kumar   }
512216575e5SSnehasish Kumar 
513216575e5SSnehasish Kumar   return Error::success();
514216575e5SSnehasish Kumar }
515216575e5SSnehasish Kumar 
516216575e5SSnehasish Kumar object::SectionedAddress
getModuleOffset(const uint64_t VirtualAddress)517216575e5SSnehasish Kumar RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
518cb81545eSSnehasish Kumar   LLVM_DEBUG({
519216575e5SSnehasish Kumar   SegmentEntry *ContainingSegment = nullptr;
520216575e5SSnehasish Kumar   for (auto &SE : SegmentInfo) {
521216575e5SSnehasish Kumar     if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
522216575e5SSnehasish Kumar       ContainingSegment = &SE;
523216575e5SSnehasish Kumar     }
524216575e5SSnehasish Kumar   }
525216575e5SSnehasish Kumar 
526216575e5SSnehasish Kumar   // Ensure that the virtual address is valid.
527216575e5SSnehasish Kumar   assert(ContainingSegment && "Could not find a segment entry");
528cb81545eSSnehasish Kumar   });
529216575e5SSnehasish Kumar 
530216575e5SSnehasish Kumar   // TODO: Compute the file offset based on the maps and program headers. For
531216575e5SSnehasish Kumar   // now this only works for non PIE binaries.
532216575e5SSnehasish Kumar   return object::SectionedAddress{VirtualAddress};
533216575e5SSnehasish Kumar }
534216575e5SSnehasish Kumar 
readNextRecord(GuidMemProfRecordPair & GuidRecord)53527a4f254SSnehasish Kumar Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
53627a4f254SSnehasish Kumar   if (FunctionProfileData.empty())
537216575e5SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
538216575e5SSnehasish Kumar 
53927a4f254SSnehasish Kumar   if (Iter == FunctionProfileData.end())
540216575e5SSnehasish Kumar     return make_error<InstrProfError>(instrprof_error::eof);
541216575e5SSnehasish Kumar 
5426dd6a616SSnehasish Kumar   auto IdToFrameCallback = [this](const FrameId Id) {
543ec51971eSSnehasish Kumar     Frame F = this->idToFrame(Id);
544ec51971eSSnehasish Kumar     if (!this->KeepSymbolName)
545ec51971eSSnehasish Kumar       return F;
546ec51971eSSnehasish Kumar     auto Iter = this->GuidToSymbolName.find(F.Function);
547ec51971eSSnehasish Kumar     assert(Iter != this->GuidToSymbolName.end());
548ec51971eSSnehasish Kumar     F.SymbolName = Iter->getSecond();
549ec51971eSSnehasish Kumar     return F;
5506dd6a616SSnehasish Kumar   };
551ec51971eSSnehasish Kumar 
5526dd6a616SSnehasish Kumar   const IndexedMemProfRecord &IndexedRecord = Iter->second;
5536dd6a616SSnehasish Kumar   GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
554216575e5SSnehasish Kumar   Iter++;
555216575e5SSnehasish Kumar   return Error::success();
556216575e5SSnehasish Kumar }
5577cca33b4SSnehasish Kumar } // namespace memprof
5587cca33b4SSnehasish Kumar } // namespace llvm
559