1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <memory> 16 #include <type_traits> 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 22 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 23 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 24 #include "llvm/Object/Binary.h" 25 #include "llvm/Object/ELFObjectFile.h" 26 #include "llvm/Object/ObjectFile.h" 27 #include "llvm/ProfileData/InstrProf.h" 28 #include "llvm/ProfileData/MemProf.h" 29 #include "llvm/ProfileData/MemProfData.inc" 30 #include "llvm/ProfileData/RawMemProfReader.h" 31 #include "llvm/Support/Endian.h" 32 #include "llvm/Support/Path.h" 33 34 #define DEBUG_TYPE "memprof" 35 36 namespace llvm { 37 namespace memprof { 38 namespace { 39 40 struct Summary { 41 uint64_t Version; 42 uint64_t TotalSizeBytes; 43 uint64_t NumSegments; 44 uint64_t NumMIBInfo; 45 uint64_t NumStackOffsets; 46 }; 47 48 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 49 static_assert(std::is_pod<T>::value, "Not a pod type."); 50 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 51 return *reinterpret_cast<const T *>(Ptr); 52 } 53 54 Summary computeSummary(const char *Start) { 55 auto *H = reinterpret_cast<const Header *>(Start); 56 57 // Check alignment while reading the number of items in each section. 58 return Summary{ 59 H->Version, 60 H->TotalSize, 61 alignedRead(Start + H->SegmentOffset), 62 alignedRead(Start + H->MIBOffset), 63 alignedRead(Start + H->StackOffset), 64 }; 65 } 66 67 Error checkBuffer(const MemoryBuffer &Buffer) { 68 if (!RawMemProfReader::hasFormat(Buffer)) 69 return make_error<InstrProfError>(instrprof_error::bad_magic); 70 71 if (Buffer.getBufferSize() == 0) 72 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 73 74 if (Buffer.getBufferSize() < sizeof(Header)) { 75 return make_error<InstrProfError>(instrprof_error::truncated); 76 } 77 78 // The size of the buffer can be > header total size since we allow repeated 79 // serialization of memprof profiles to the same file. 80 uint64_t TotalSize = 0; 81 const char *Next = Buffer.getBufferStart(); 82 while (Next < Buffer.getBufferEnd()) { 83 auto *H = reinterpret_cast<const Header *>(Next); 84 if (H->Version != MEMPROF_RAW_VERSION) { 85 return make_error<InstrProfError>(instrprof_error::unsupported_version); 86 } 87 88 TotalSize += H->TotalSize; 89 Next += H->TotalSize; 90 } 91 92 if (Buffer.getBufferSize() != TotalSize) { 93 return make_error<InstrProfError>(instrprof_error::malformed); 94 } 95 return Error::success(); 96 } 97 98 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 99 using namespace support; 100 101 const uint64_t NumItemsToRead = 102 endian::readNext<uint64_t, little, unaligned>(Ptr); 103 llvm::SmallVector<SegmentEntry> Items; 104 for (uint64_t I = 0; I < NumItemsToRead; I++) { 105 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 106 Ptr + I * sizeof(SegmentEntry))); 107 } 108 return Items; 109 } 110 111 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 112 readMemInfoBlocks(const char *Ptr) { 113 using namespace support; 114 115 const uint64_t NumItemsToRead = 116 endian::readNext<uint64_t, little, unaligned>(Ptr); 117 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 118 for (uint64_t I = 0; I < NumItemsToRead; I++) { 119 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 120 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 121 Items.push_back({Id, MIB}); 122 // Only increment by size of MIB since readNext implicitly increments. 123 Ptr += sizeof(MemInfoBlock); 124 } 125 return Items; 126 } 127 128 CallStackMap readStackInfo(const char *Ptr) { 129 using namespace support; 130 131 const uint64_t NumItemsToRead = 132 endian::readNext<uint64_t, little, unaligned>(Ptr); 133 CallStackMap Items; 134 135 for (uint64_t I = 0; I < NumItemsToRead; I++) { 136 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 137 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 138 139 SmallVector<uint64_t> CallStack; 140 for (uint64_t J = 0; J < NumPCs; J++) { 141 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 142 } 143 144 Items[StackId] = CallStack; 145 } 146 return Items; 147 } 148 149 // Merges the contents of stack information in \p From to \p To. Returns true if 150 // any stack ids observed previously map to a different set of program counter 151 // addresses. 152 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 153 for (const auto &IdStack : From) { 154 auto I = To.find(IdStack.first); 155 if (I == To.end()) { 156 To[IdStack.first] = IdStack.second; 157 } else { 158 // Check that the PCs are the same (in order). 159 if (IdStack.second != I->second) 160 return true; 161 } 162 } 163 return false; 164 } 165 166 Error report(Error E, const StringRef Context) { 167 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 168 std::move(E)); 169 } 170 171 bool isRuntimePath(const StringRef Path) { 172 return StringRef(llvm::sys::path::convert_to_slash(Path)) 173 .contains("memprof/memprof_"); 174 } 175 } // namespace 176 177 Expected<std::unique_ptr<RawMemProfReader>> 178 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, 179 bool KeepName) { 180 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 181 if (std::error_code EC = BufferOr.getError()) 182 return report(errorCodeToError(EC), Path.getSingleStringRef()); 183 184 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 185 if (Error E = checkBuffer(*Buffer)) 186 return report(std::move(E), Path.getSingleStringRef()); 187 188 if (ProfiledBinary.empty()) 189 return report( 190 errorCodeToError(make_error_code(std::errc::invalid_argument)), 191 "Path to profiled binary is empty!"); 192 193 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 194 if (!BinaryOr) { 195 return report(BinaryOr.takeError(), ProfiledBinary); 196 } 197 198 // Use new here since constructor is private. 199 std::unique_ptr<RawMemProfReader> Reader(new RawMemProfReader( 200 std::move(Buffer), std::move(BinaryOr.get()), KeepName)); 201 if (Error E = Reader->initialize()) { 202 return std::move(E); 203 } 204 return std::move(Reader); 205 } 206 207 bool RawMemProfReader::hasFormat(const StringRef Path) { 208 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 209 if (!BufferOr) 210 return false; 211 212 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 213 return hasFormat(*Buffer); 214 } 215 216 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 217 if (Buffer.getBufferSize() < sizeof(uint64_t)) 218 return false; 219 // Aligned read to sanity check that the buffer was allocated with at least 8b 220 // alignment. 221 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 222 return Magic == MEMPROF_RAW_MAGIC_64; 223 } 224 225 void RawMemProfReader::printYAML(raw_ostream &OS) { 226 OS << "MemprofProfile:\n"; 227 // TODO: Update printSummaries to print out the data after the profile has 228 // been symbolized and pruned. We can parse some raw profile characteristics 229 // from the data buffer for additional information. 230 printSummaries(OS); 231 // Print out the merged contents of the profiles. 232 OS << " Records:\n"; 233 for (const auto &Entry : *this) { 234 OS << " -\n"; 235 OS << " FunctionGUID: " << Entry.first << "\n"; 236 Entry.second.print(OS); 237 } 238 } 239 240 void RawMemProfReader::printSummaries(raw_ostream &OS) const { 241 const char *Next = DataBuffer->getBufferStart(); 242 while (Next < DataBuffer->getBufferEnd()) { 243 auto Summary = computeSummary(Next); 244 OS << " -\n"; 245 OS << " Header:\n"; 246 OS << " Version: " << Summary.Version << "\n"; 247 OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; 248 OS << " NumSegments: " << Summary.NumSegments << "\n"; 249 OS << " NumMibInfo: " << Summary.NumMIBInfo << "\n"; 250 OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; 251 // TODO: Print the build ids once we can record them using the 252 // sanitizer_procmaps library for linux. 253 254 auto *H = reinterpret_cast<const Header *>(Next); 255 Next += H->TotalSize; 256 } 257 } 258 259 Error RawMemProfReader::initialize() { 260 const StringRef FileName = Binary.getBinary()->getFileName(); 261 262 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 263 if (!ElfObject) { 264 return report(make_error<StringError>(Twine("Not an ELF file: "), 265 inconvertibleErrorCode()), 266 FileName); 267 } 268 269 auto Triple = ElfObject->makeTriple(); 270 if (!Triple.isX86()) 271 return report(make_error<StringError>(Twine("Unsupported target: ") + 272 Triple.getArchName(), 273 inconvertibleErrorCode()), 274 FileName); 275 276 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 277 std::unique_ptr<DIContext> Context = DWARFContext::create( 278 *Object, DWARFContext::ProcessDebugRelocations::Process); 279 280 auto SOFOr = symbolize::SymbolizableObjectFile::create( 281 Object, std::move(Context), /*UntagAddresses=*/false); 282 if (!SOFOr) 283 return report(SOFOr.takeError(), FileName); 284 Symbolizer = std::move(SOFOr.get()); 285 286 if (Error E = readRawProfile()) 287 return E; 288 289 if (Error E = symbolizeAndFilterStackFrames()) 290 return E; 291 292 return mapRawProfileToRecords(); 293 } 294 295 Error RawMemProfReader::mapRawProfileToRecords() { 296 // Hold a mapping from function to each callsite location we encounter within 297 // it that is part of some dynamic allocation context. The location is stored 298 // as a pointer to a symbolized list of inline frames. 299 using LocationPtr = const llvm::SmallVector<FrameId> *; 300 llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>> 301 PerFunctionCallSites; 302 303 // Convert the raw profile callstack data into memprof records. While doing so 304 // keep track of related contexts so that we can fill these in later. 305 for (const auto &Entry : CallstackProfileData) { 306 const uint64_t StackId = Entry.first; 307 308 auto It = StackMap.find(StackId); 309 if (It == StackMap.end()) 310 return make_error<InstrProfError>( 311 instrprof_error::malformed, 312 "memprof callstack record does not contain id: " + Twine(StackId)); 313 314 // Construct the symbolized callstack. 315 llvm::SmallVector<FrameId> Callstack; 316 Callstack.reserve(It->getSecond().size()); 317 318 llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); 319 for (size_t I = 0; I < Addresses.size(); I++) { 320 const uint64_t Address = Addresses[I]; 321 assert(SymbolizedFrame.count(Address) > 0 && 322 "Address not found in SymbolizedFrame map"); 323 const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; 324 325 assert(!idToFrame(Frames.back()).IsInlineFrame && 326 "The last frame should not be inlined"); 327 328 // Record the callsites for each function. Skip the first frame of the 329 // first address since it is the allocation site itself that is recorded 330 // as an alloc site. 331 for (size_t J = 0; J < Frames.size(); J++) { 332 if (I == 0 && J == 0) 333 continue; 334 // We attach the entire bottom-up frame here for the callsite even 335 // though we only need the frames up to and including the frame for 336 // Frames[J].Function. This will enable better deduplication for 337 // compression in the future. 338 const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; 339 PerFunctionCallSites[Guid].insert(&Frames); 340 } 341 342 // Add all the frames to the current allocation callstack. 343 Callstack.append(Frames.begin(), Frames.end()); 344 } 345 346 // We attach the memprof record to each function bottom-up including the 347 // first non-inline frame. 348 for (size_t I = 0; /*Break out using the condition below*/; I++) { 349 const Frame &F = idToFrame(Callstack[I]); 350 auto Result = 351 FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); 352 IndexedMemProfRecord &Record = Result.first->second; 353 Record.AllocSites.emplace_back(Callstack, Entry.second); 354 355 if (!F.IsInlineFrame) 356 break; 357 } 358 } 359 360 // Fill in the related callsites per function. 361 for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end(); 362 I != E; I++) { 363 const GlobalValue::GUID Id = I->first; 364 // Some functions may have only callsite data and no allocation data. Here 365 // we insert a new entry for callsite data if we need to. 366 auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); 367 IndexedMemProfRecord &Record = Result.first->second; 368 for (LocationPtr Loc : I->getSecond()) { 369 Record.CallSites.push_back(*Loc); 370 } 371 } 372 373 return Error::success(); 374 } 375 376 Error RawMemProfReader::symbolizeAndFilterStackFrames() { 377 // The specifier to use when symbolization is requested. 378 const DILineInfoSpecifier Specifier( 379 DILineInfoSpecifier::FileLineInfoKind::RawValue, 380 DILineInfoSpecifier::FunctionNameKind::LinkageName); 381 382 // For entries where all PCs in the callstack are discarded, we erase the 383 // entry from the stack map. 384 llvm::SmallVector<uint64_t> EntriesToErase; 385 // We keep track of all prior discarded entries so that we can avoid invoking 386 // the symbolizer for such entries. 387 llvm::DenseSet<uint64_t> AllVAddrsToDiscard; 388 for (auto &Entry : StackMap) { 389 for (const uint64_t VAddr : Entry.getSecond()) { 390 // Check if we have already symbolized and cached the result or if we 391 // don't want to attempt symbolization since we know this address is bad. 392 // In this case the address is also removed from the current callstack. 393 if (SymbolizedFrame.count(VAddr) > 0 || 394 AllVAddrsToDiscard.contains(VAddr)) 395 continue; 396 397 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 398 getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); 399 if (!DIOr) 400 return DIOr.takeError(); 401 DIInliningInfo DI = DIOr.get(); 402 403 // Drop frames which we can't symbolize or if they belong to the runtime. 404 if (DI.getFrame(0).FunctionName == DILineInfo::BadString || 405 isRuntimePath(DI.getFrame(0).FileName)) { 406 AllVAddrsToDiscard.insert(VAddr); 407 continue; 408 } 409 410 for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; 411 I++) { 412 const auto &DIFrame = DI.getFrame(I); 413 const uint64_t Guid = 414 IndexedMemProfRecord::getGUID(DIFrame.FunctionName); 415 const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, 416 // Only the last entry is not an inlined location. 417 I != NumFrames - 1); 418 // Here we retain a mapping from the GUID to symbol name instead of 419 // adding it to the frame object directly to reduce memory overhead. 420 // This is because there can be many unique frames, particularly for 421 // callsite frames. 422 if (KeepSymbolName) 423 GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); 424 425 const FrameId Hash = F.hash(); 426 IdToFrame.insert({Hash, F}); 427 SymbolizedFrame[VAddr].push_back(Hash); 428 } 429 } 430 431 auto &CallStack = Entry.getSecond(); 432 CallStack.erase(std::remove_if(CallStack.begin(), CallStack.end(), 433 [&AllVAddrsToDiscard](const uint64_t A) { 434 return AllVAddrsToDiscard.contains(A); 435 }), 436 CallStack.end()); 437 if (CallStack.empty()) 438 EntriesToErase.push_back(Entry.getFirst()); 439 } 440 441 // Drop the entries where the callstack is empty. 442 for (const uint64_t Id : EntriesToErase) { 443 StackMap.erase(Id); 444 CallstackProfileData.erase(Id); 445 } 446 447 if (StackMap.empty()) 448 return make_error<InstrProfError>( 449 instrprof_error::malformed, 450 "no entries in callstack map after symbolization"); 451 452 return Error::success(); 453 } 454 455 Error RawMemProfReader::readRawProfile() { 456 const char *Next = DataBuffer->getBufferStart(); 457 458 while (Next < DataBuffer->getBufferEnd()) { 459 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 460 461 // Read in the segment information, check whether its the same across all 462 // profiles in this binary file. 463 const llvm::SmallVector<SegmentEntry> Entries = 464 readSegmentEntries(Next + Header->SegmentOffset); 465 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 466 // We do not expect segment information to change when deserializing from 467 // the same binary profile file. This can happen if dynamic libraries are 468 // loaded/unloaded between profile dumping. 469 return make_error<InstrProfError>( 470 instrprof_error::malformed, 471 "memprof raw profile has different segment information"); 472 } 473 SegmentInfo.assign(Entries.begin(), Entries.end()); 474 475 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 476 // raw profiles in the same binary file are from the same process so the 477 // stackdepot ids are the same. 478 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 479 if (CallstackProfileData.count(Value.first)) { 480 CallstackProfileData[Value.first].Merge(Value.second); 481 } else { 482 CallstackProfileData[Value.first] = Value.second; 483 } 484 } 485 486 // Read in the callstack for each ids. For multiple raw profiles in the same 487 // file, we expect that the callstack is the same for a unique id. 488 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 489 if (StackMap.empty()) { 490 StackMap = CSM; 491 } else { 492 if (mergeStackMap(CSM, StackMap)) 493 return make_error<InstrProfError>( 494 instrprof_error::malformed, 495 "memprof raw profile got different call stack for same id"); 496 } 497 498 Next += Header->TotalSize; 499 } 500 501 return Error::success(); 502 } 503 504 object::SectionedAddress 505 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 506 LLVM_DEBUG({ 507 SegmentEntry *ContainingSegment = nullptr; 508 for (auto &SE : SegmentInfo) { 509 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 510 ContainingSegment = &SE; 511 } 512 } 513 514 // Ensure that the virtual address is valid. 515 assert(ContainingSegment && "Could not find a segment entry"); 516 }); 517 518 // TODO: Compute the file offset based on the maps and program headers. For 519 // now this only works for non PIE binaries. 520 return object::SectionedAddress{VirtualAddress}; 521 } 522 523 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { 524 if (FunctionProfileData.empty()) 525 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 526 527 if (Iter == FunctionProfileData.end()) 528 return make_error<InstrProfError>(instrprof_error::eof); 529 530 auto IdToFrameCallback = [this](const FrameId Id) { 531 Frame F = this->idToFrame(Id); 532 if (!this->KeepSymbolName) 533 return F; 534 auto Iter = this->GuidToSymbolName.find(F.Function); 535 assert(Iter != this->GuidToSymbolName.end()); 536 F.SymbolName = Iter->getSecond(); 537 return F; 538 }; 539 540 const IndexedMemProfRecord &IndexedRecord = Iter->second; 541 GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; 542 Iter++; 543 return Error::success(); 544 } 545 } // namespace memprof 546 } // namespace llvm 547