1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cstdint> 14 #include <type_traits> 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 21 #include "llvm/Object/Binary.h" 22 #include "llvm/Object/ELFObjectFile.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/InstrProf.h" 25 #include "llvm/ProfileData/MemProf.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/ProfileData/RawMemProfReader.h" 28 #include "llvm/Support/Endian.h" 29 #include "llvm/Support/MD5.h" 30 31 #define DEBUG_TYPE "memprof" 32 33 namespace llvm { 34 namespace memprof { 35 namespace { 36 37 struct Summary { 38 uint64_t Version; 39 uint64_t TotalSizeBytes; 40 uint64_t NumSegments; 41 uint64_t NumMIBInfo; 42 uint64_t NumStackOffsets; 43 }; 44 45 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 46 static_assert(std::is_pod<T>::value, "Not a pod type."); 47 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 48 return *reinterpret_cast<const T *>(Ptr); 49 } 50 51 Summary computeSummary(const char *Start) { 52 auto *H = reinterpret_cast<const Header *>(Start); 53 54 // Check alignment while reading the number of items in each section. 55 return Summary{ 56 H->Version, 57 H->TotalSize, 58 alignedRead(Start + H->SegmentOffset), 59 alignedRead(Start + H->MIBOffset), 60 alignedRead(Start + H->StackOffset), 61 }; 62 } 63 64 Error checkBuffer(const MemoryBuffer &Buffer) { 65 if (!RawMemProfReader::hasFormat(Buffer)) 66 return make_error<InstrProfError>(instrprof_error::bad_magic); 67 68 if (Buffer.getBufferSize() == 0) 69 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 70 71 if (Buffer.getBufferSize() < sizeof(Header)) { 72 return make_error<InstrProfError>(instrprof_error::truncated); 73 } 74 75 // The size of the buffer can be > header total size since we allow repeated 76 // serialization of memprof profiles to the same file. 77 uint64_t TotalSize = 0; 78 const char *Next = Buffer.getBufferStart(); 79 while (Next < Buffer.getBufferEnd()) { 80 auto *H = reinterpret_cast<const Header *>(Next); 81 if (H->Version != MEMPROF_RAW_VERSION) { 82 return make_error<InstrProfError>(instrprof_error::unsupported_version); 83 } 84 85 TotalSize += H->TotalSize; 86 Next += H->TotalSize; 87 } 88 89 if (Buffer.getBufferSize() != TotalSize) { 90 return make_error<InstrProfError>(instrprof_error::malformed); 91 } 92 return Error::success(); 93 } 94 95 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 96 using namespace support; 97 98 const uint64_t NumItemsToRead = 99 endian::readNext<uint64_t, little, unaligned>(Ptr); 100 llvm::SmallVector<SegmentEntry> Items; 101 for (uint64_t I = 0; I < NumItemsToRead; I++) { 102 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 103 Ptr + I * sizeof(SegmentEntry))); 104 } 105 return Items; 106 } 107 108 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 109 readMemInfoBlocks(const char *Ptr) { 110 using namespace support; 111 112 const uint64_t NumItemsToRead = 113 endian::readNext<uint64_t, little, unaligned>(Ptr); 114 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 115 for (uint64_t I = 0; I < NumItemsToRead; I++) { 116 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 117 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 118 Items.push_back({Id, MIB}); 119 // Only increment by size of MIB since readNext implicitly increments. 120 Ptr += sizeof(MemInfoBlock); 121 } 122 return Items; 123 } 124 125 CallStackMap readStackInfo(const char *Ptr) { 126 using namespace support; 127 128 const uint64_t NumItemsToRead = 129 endian::readNext<uint64_t, little, unaligned>(Ptr); 130 CallStackMap Items; 131 132 for (uint64_t I = 0; I < NumItemsToRead; I++) { 133 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 134 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 135 136 SmallVector<uint64_t, 32> CallStack; 137 for (uint64_t J = 0; J < NumPCs; J++) { 138 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 139 } 140 141 Items[StackId] = CallStack; 142 } 143 return Items; 144 } 145 146 // Merges the contents of stack information in \p From to \p To. Returns true if 147 // any stack ids observed previously map to a different set of program counter 148 // addresses. 149 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 150 for (const auto &IdStack : From) { 151 auto I = To.find(IdStack.first); 152 if (I == To.end()) { 153 To[IdStack.first] = IdStack.second; 154 } else { 155 // Check that the PCs are the same (in order). 156 if (IdStack.second != I->second) 157 return true; 158 } 159 } 160 return false; 161 } 162 163 StringRef trimSuffix(const StringRef Name) { 164 const auto Pos = Name.find(".llvm."); 165 return Name.take_front(Pos); 166 } 167 168 Error report(Error E, const StringRef Context) { 169 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 170 std::move(E)); 171 } 172 } // namespace 173 174 Expected<std::unique_ptr<RawMemProfReader>> 175 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { 176 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 177 if (std::error_code EC = BufferOr.getError()) 178 return report(errorCodeToError(EC), Path.getSingleStringRef()); 179 180 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 181 if (Error E = checkBuffer(*Buffer)) 182 return report(std::move(E), Path.getSingleStringRef()); 183 184 if (ProfiledBinary.empty()) 185 return report( 186 errorCodeToError(make_error_code(std::errc::invalid_argument)), 187 "Path to profiled binary is empty!"); 188 189 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 190 if (!BinaryOr) { 191 return report(BinaryOr.takeError(), ProfiledBinary); 192 } 193 194 std::unique_ptr<RawMemProfReader> Reader( 195 new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get()))); 196 if (Error E = Reader->initialize()) { 197 return std::move(E); 198 } 199 return std::move(Reader); 200 } 201 202 bool RawMemProfReader::hasFormat(const StringRef Path) { 203 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 204 if (!BufferOr) 205 return false; 206 207 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 208 return hasFormat(*Buffer); 209 } 210 211 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 212 if (Buffer.getBufferSize() < sizeof(uint64_t)) 213 return false; 214 // Aligned read to sanity check that the buffer was allocated with at least 8b 215 // alignment. 216 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 217 return Magic == MEMPROF_RAW_MAGIC_64; 218 } 219 220 void RawMemProfReader::printYAML(raw_ostream &OS) { 221 OS << "MemprofProfile:\n"; 222 printSummaries(OS); 223 // Print out the merged contents of the profiles. 224 OS << " Records:\n"; 225 for (const auto &Record : *this) { 226 OS << " -\n"; 227 Record.print(OS); 228 } 229 } 230 231 void RawMemProfReader::printSummaries(raw_ostream &OS) const { 232 const char *Next = DataBuffer->getBufferStart(); 233 while (Next < DataBuffer->getBufferEnd()) { 234 auto Summary = computeSummary(Next); 235 OS << " -\n"; 236 OS << " Header:\n"; 237 OS << " Version: " << Summary.Version << "\n"; 238 OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; 239 OS << " NumSegments: " << Summary.NumSegments << "\n"; 240 OS << " NumMibInfo: " << Summary.NumMIBInfo << "\n"; 241 OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; 242 // TODO: Print the build ids once we can record them using the 243 // sanitizer_procmaps library for linux. 244 245 auto *H = reinterpret_cast<const Header *>(Next); 246 Next += H->TotalSize; 247 } 248 } 249 250 Error RawMemProfReader::initialize() { 251 const StringRef FileName = Binary.getBinary()->getFileName(); 252 253 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 254 if (!ElfObject) { 255 return report(make_error<StringError>(Twine("Not an ELF file: "), 256 inconvertibleErrorCode()), 257 FileName); 258 } 259 260 auto Triple = ElfObject->makeTriple(); 261 if (!Triple.isX86()) 262 return report(make_error<StringError>(Twine("Unsupported target: ") + 263 Triple.getArchName(), 264 inconvertibleErrorCode()), 265 FileName); 266 267 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 268 std::unique_ptr<DIContext> Context = DWARFContext::create( 269 *Object, DWARFContext::ProcessDebugRelocations::Process); 270 271 auto SOFOr = symbolize::SymbolizableObjectFile::create( 272 Object, std::move(Context), /*UntagAddresses=*/false); 273 if (!SOFOr) 274 return report(SOFOr.takeError(), FileName); 275 Symbolizer = std::move(SOFOr.get()); 276 277 return readRawProfile(); 278 } 279 280 Error RawMemProfReader::readRawProfile() { 281 const char *Next = DataBuffer->getBufferStart(); 282 283 while (Next < DataBuffer->getBufferEnd()) { 284 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 285 286 // Read in the segment information, check whether its the same across all 287 // profiles in this binary file. 288 const llvm::SmallVector<SegmentEntry> Entries = 289 readSegmentEntries(Next + Header->SegmentOffset); 290 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 291 // We do not expect segment information to change when deserializing from 292 // the same binary profile file. This can happen if dynamic libraries are 293 // loaded/unloaded between profile dumping. 294 return make_error<InstrProfError>( 295 instrprof_error::malformed, 296 "memprof raw profile has different segment information"); 297 } 298 SegmentInfo.assign(Entries.begin(), Entries.end()); 299 300 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 301 // raw profiles in the same binary file are from the same process so the 302 // stackdepot ids are the same. 303 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 304 if (ProfileData.count(Value.first)) { 305 ProfileData[Value.first].Merge(Value.second); 306 } else { 307 ProfileData[Value.first] = Value.second; 308 } 309 } 310 311 // Read in the callstack for each ids. For multiple raw profiles in the same 312 // file, we expect that the callstack is the same for a unique id. 313 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 314 if (StackMap.empty()) { 315 StackMap = CSM; 316 } else { 317 if (mergeStackMap(CSM, StackMap)) 318 return make_error<InstrProfError>( 319 instrprof_error::malformed, 320 "memprof raw profile got different call stack for same id"); 321 } 322 323 Next += Header->TotalSize; 324 } 325 326 return Error::success(); 327 } 328 329 object::SectionedAddress 330 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 331 LLVM_DEBUG({ 332 SegmentEntry *ContainingSegment = nullptr; 333 for (auto &SE : SegmentInfo) { 334 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 335 ContainingSegment = &SE; 336 } 337 } 338 339 // Ensure that the virtual address is valid. 340 assert(ContainingSegment && "Could not find a segment entry"); 341 }); 342 343 // TODO: Compute the file offset based on the maps and program headers. For 344 // now this only works for non PIE binaries. 345 return object::SectionedAddress{VirtualAddress}; 346 } 347 348 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, 349 MemProfRecord &Record) { 350 auto &CallStack = StackMap[Id]; 351 DILineInfoSpecifier Specifier( 352 DILineInfoSpecifier::FileLineInfoKind::RawValue, 353 DILineInfoSpecifier::FunctionNameKind::LinkageName); 354 for (const uint64_t Address : CallStack) { 355 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 356 getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false); 357 358 if (!DIOr) 359 return DIOr.takeError(); 360 DIInliningInfo DI = DIOr.get(); 361 362 for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { 363 const auto &Frame = DI.getFrame(I); 364 Record.CallStack.emplace_back( 365 std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))), 366 Frame.Line - Frame.StartLine, Frame.Column, 367 // Only the first entry is not an inlined location. 368 I != 0); 369 } 370 } 371 Record.Info = MIB; 372 return Error::success(); 373 } 374 375 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) { 376 if (ProfileData.empty()) 377 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 378 379 if (Iter == ProfileData.end()) 380 return make_error<InstrProfError>(instrprof_error::eof); 381 382 Record.clear(); 383 if (Error E = fillRecord(Iter->first, Iter->second, Record)) { 384 return E; 385 } 386 Iter++; 387 return Error::success(); 388 } 389 } // namespace memprof 390 } // namespace llvm 391