1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <algorithm> 14 #include <cstdint> 15 #include <memory> 16 #include <type_traits> 17 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 22 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 23 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 24 #include "llvm/Object/Binary.h" 25 #include "llvm/Object/ELFObjectFile.h" 26 #include "llvm/Object/ObjectFile.h" 27 #include "llvm/ProfileData/InstrProf.h" 28 #include "llvm/ProfileData/MemProf.h" 29 #include "llvm/ProfileData/MemProfData.inc" 30 #include "llvm/ProfileData/RawMemProfReader.h" 31 #include "llvm/Support/Endian.h" 32 #include "llvm/Support/Path.h" 33 34 #define DEBUG_TYPE "memprof" 35 36 namespace llvm { 37 namespace memprof { 38 namespace { 39 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 40 static_assert(std::is_pod<T>::value, "Not a pod type."); 41 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 42 return *reinterpret_cast<const T *>(Ptr); 43 } 44 45 Error checkBuffer(const MemoryBuffer &Buffer) { 46 if (!RawMemProfReader::hasFormat(Buffer)) 47 return make_error<InstrProfError>(instrprof_error::bad_magic); 48 49 if (Buffer.getBufferSize() == 0) 50 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 51 52 if (Buffer.getBufferSize() < sizeof(Header)) { 53 return make_error<InstrProfError>(instrprof_error::truncated); 54 } 55 56 // The size of the buffer can be > header total size since we allow repeated 57 // serialization of memprof profiles to the same file. 58 uint64_t TotalSize = 0; 59 const char *Next = Buffer.getBufferStart(); 60 while (Next < Buffer.getBufferEnd()) { 61 auto *H = reinterpret_cast<const Header *>(Next); 62 if (H->Version != MEMPROF_RAW_VERSION) { 63 return make_error<InstrProfError>(instrprof_error::unsupported_version); 64 } 65 66 TotalSize += H->TotalSize; 67 Next += H->TotalSize; 68 } 69 70 if (Buffer.getBufferSize() != TotalSize) { 71 return make_error<InstrProfError>(instrprof_error::malformed); 72 } 73 return Error::success(); 74 } 75 76 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 77 using namespace support; 78 79 const uint64_t NumItemsToRead = 80 endian::readNext<uint64_t, little, unaligned>(Ptr); 81 llvm::SmallVector<SegmentEntry> Items; 82 for (uint64_t I = 0; I < NumItemsToRead; I++) { 83 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 84 Ptr + I * sizeof(SegmentEntry))); 85 } 86 return Items; 87 } 88 89 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 90 readMemInfoBlocks(const char *Ptr) { 91 using namespace support; 92 93 const uint64_t NumItemsToRead = 94 endian::readNext<uint64_t, little, unaligned>(Ptr); 95 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 96 for (uint64_t I = 0; I < NumItemsToRead; I++) { 97 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 98 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 99 Items.push_back({Id, MIB}); 100 // Only increment by size of MIB since readNext implicitly increments. 101 Ptr += sizeof(MemInfoBlock); 102 } 103 return Items; 104 } 105 106 CallStackMap readStackInfo(const char *Ptr) { 107 using namespace support; 108 109 const uint64_t NumItemsToRead = 110 endian::readNext<uint64_t, little, unaligned>(Ptr); 111 CallStackMap Items; 112 113 for (uint64_t I = 0; I < NumItemsToRead; I++) { 114 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 115 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 116 117 SmallVector<uint64_t> CallStack; 118 for (uint64_t J = 0; J < NumPCs; J++) { 119 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 120 } 121 122 Items[StackId] = CallStack; 123 } 124 return Items; 125 } 126 127 // Merges the contents of stack information in \p From to \p To. Returns true if 128 // any stack ids observed previously map to a different set of program counter 129 // addresses. 130 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 131 for (const auto &IdStack : From) { 132 auto I = To.find(IdStack.first); 133 if (I == To.end()) { 134 To[IdStack.first] = IdStack.second; 135 } else { 136 // Check that the PCs are the same (in order). 137 if (IdStack.second != I->second) 138 return true; 139 } 140 } 141 return false; 142 } 143 144 Error report(Error E, const StringRef Context) { 145 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 146 std::move(E)); 147 } 148 149 bool isRuntimePath(const StringRef Path) { 150 return StringRef(llvm::sys::path::convert_to_slash(Path)) 151 .contains("memprof/memprof_"); 152 } 153 } // namespace 154 155 Expected<std::unique_ptr<RawMemProfReader>> 156 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary, 157 bool KeepName) { 158 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 159 if (std::error_code EC = BufferOr.getError()) 160 return report(errorCodeToError(EC), Path.getSingleStringRef()); 161 162 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 163 if (Error E = checkBuffer(*Buffer)) 164 return report(std::move(E), Path.getSingleStringRef()); 165 166 if (ProfiledBinary.empty()) 167 return report( 168 errorCodeToError(make_error_code(std::errc::invalid_argument)), 169 "Path to profiled binary is empty!"); 170 171 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 172 if (!BinaryOr) { 173 return report(BinaryOr.takeError(), ProfiledBinary); 174 } 175 176 // Use new here since constructor is private. 177 std::unique_ptr<RawMemProfReader> Reader( 178 new RawMemProfReader(std::move(BinaryOr.get()), KeepName)); 179 if (Error E = Reader->initialize(std::move(Buffer))) { 180 return std::move(E); 181 } 182 return std::move(Reader); 183 } 184 185 bool RawMemProfReader::hasFormat(const StringRef Path) { 186 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 187 if (!BufferOr) 188 return false; 189 190 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 191 return hasFormat(*Buffer); 192 } 193 194 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 195 if (Buffer.getBufferSize() < sizeof(uint64_t)) 196 return false; 197 // Aligned read to sanity check that the buffer was allocated with at least 8b 198 // alignment. 199 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 200 return Magic == MEMPROF_RAW_MAGIC_64; 201 } 202 203 void RawMemProfReader::printYAML(raw_ostream &OS) { 204 uint64_t NumAllocFunctions = 0, NumMibInfo = 0; 205 for (const auto &KV : FunctionProfileData) { 206 const size_t NumAllocSites = KV.second.AllocSites.size(); 207 if (NumAllocSites > 0) { 208 NumAllocFunctions++; 209 NumMibInfo += NumAllocSites; 210 } 211 } 212 213 OS << "MemprofProfile:\n"; 214 OS << " Summary:\n"; 215 OS << " Version: " << MEMPROF_RAW_VERSION << "\n"; 216 OS << " NumSegments: " << SegmentInfo.size() << "\n"; 217 OS << " NumMibInfo: " << NumMibInfo << "\n"; 218 OS << " NumAllocFunctions: " << NumAllocFunctions << "\n"; 219 OS << " NumStackOffsets: " << StackMap.size() << "\n"; 220 // Print out the merged contents of the profiles. 221 OS << " Records:\n"; 222 for (const auto &Entry : *this) { 223 OS << " -\n"; 224 OS << " FunctionGUID: " << Entry.first << "\n"; 225 Entry.second.print(OS); 226 } 227 } 228 229 Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) { 230 const StringRef FileName = Binary.getBinary()->getFileName(); 231 232 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 233 if (!ElfObject) { 234 return report(make_error<StringError>(Twine("Not an ELF file: "), 235 inconvertibleErrorCode()), 236 FileName); 237 } 238 239 auto Triple = ElfObject->makeTriple(); 240 if (!Triple.isX86()) 241 return report(make_error<StringError>(Twine("Unsupported target: ") + 242 Triple.getArchName(), 243 inconvertibleErrorCode()), 244 FileName); 245 246 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 247 std::unique_ptr<DIContext> Context = DWARFContext::create( 248 *Object, DWARFContext::ProcessDebugRelocations::Process); 249 250 auto SOFOr = symbolize::SymbolizableObjectFile::create( 251 Object, std::move(Context), /*UntagAddresses=*/false); 252 if (!SOFOr) 253 return report(SOFOr.takeError(), FileName); 254 Symbolizer = std::move(SOFOr.get()); 255 256 if (Error E = readRawProfile(std::move(DataBuffer))) 257 return E; 258 259 if (Error E = symbolizeAndFilterStackFrames()) 260 return E; 261 262 return mapRawProfileToRecords(); 263 } 264 265 Error RawMemProfReader::mapRawProfileToRecords() { 266 // Hold a mapping from function to each callsite location we encounter within 267 // it that is part of some dynamic allocation context. The location is stored 268 // as a pointer to a symbolized list of inline frames. 269 using LocationPtr = const llvm::SmallVector<FrameId> *; 270 llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>> 271 PerFunctionCallSites; 272 273 // Convert the raw profile callstack data into memprof records. While doing so 274 // keep track of related contexts so that we can fill these in later. 275 for (const auto &Entry : CallstackProfileData) { 276 const uint64_t StackId = Entry.first; 277 278 auto It = StackMap.find(StackId); 279 if (It == StackMap.end()) 280 return make_error<InstrProfError>( 281 instrprof_error::malformed, 282 "memprof callstack record does not contain id: " + Twine(StackId)); 283 284 // Construct the symbolized callstack. 285 llvm::SmallVector<FrameId> Callstack; 286 Callstack.reserve(It->getSecond().size()); 287 288 llvm::ArrayRef<uint64_t> Addresses = It->getSecond(); 289 for (size_t I = 0; I < Addresses.size(); I++) { 290 const uint64_t Address = Addresses[I]; 291 assert(SymbolizedFrame.count(Address) > 0 && 292 "Address not found in SymbolizedFrame map"); 293 const SmallVector<FrameId> &Frames = SymbolizedFrame[Address]; 294 295 assert(!idToFrame(Frames.back()).IsInlineFrame && 296 "The last frame should not be inlined"); 297 298 // Record the callsites for each function. Skip the first frame of the 299 // first address since it is the allocation site itself that is recorded 300 // as an alloc site. 301 for (size_t J = 0; J < Frames.size(); J++) { 302 if (I == 0 && J == 0) 303 continue; 304 // We attach the entire bottom-up frame here for the callsite even 305 // though we only need the frames up to and including the frame for 306 // Frames[J].Function. This will enable better deduplication for 307 // compression in the future. 308 const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function; 309 PerFunctionCallSites[Guid].insert(&Frames); 310 } 311 312 // Add all the frames to the current allocation callstack. 313 Callstack.append(Frames.begin(), Frames.end()); 314 } 315 316 // We attach the memprof record to each function bottom-up including the 317 // first non-inline frame. 318 for (size_t I = 0; /*Break out using the condition below*/; I++) { 319 const Frame &F = idToFrame(Callstack[I]); 320 auto Result = 321 FunctionProfileData.insert({F.Function, IndexedMemProfRecord()}); 322 IndexedMemProfRecord &Record = Result.first->second; 323 Record.AllocSites.emplace_back(Callstack, Entry.second); 324 325 if (!F.IsInlineFrame) 326 break; 327 } 328 } 329 330 // Fill in the related callsites per function. 331 for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end(); 332 I != E; I++) { 333 const GlobalValue::GUID Id = I->first; 334 // Some functions may have only callsite data and no allocation data. Here 335 // we insert a new entry for callsite data if we need to. 336 auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()}); 337 IndexedMemProfRecord &Record = Result.first->second; 338 for (LocationPtr Loc : I->getSecond()) { 339 Record.CallSites.push_back(*Loc); 340 } 341 } 342 343 return Error::success(); 344 } 345 346 Error RawMemProfReader::symbolizeAndFilterStackFrames() { 347 // The specifier to use when symbolization is requested. 348 const DILineInfoSpecifier Specifier( 349 DILineInfoSpecifier::FileLineInfoKind::RawValue, 350 DILineInfoSpecifier::FunctionNameKind::LinkageName); 351 352 // For entries where all PCs in the callstack are discarded, we erase the 353 // entry from the stack map. 354 llvm::SmallVector<uint64_t> EntriesToErase; 355 // We keep track of all prior discarded entries so that we can avoid invoking 356 // the symbolizer for such entries. 357 llvm::DenseSet<uint64_t> AllVAddrsToDiscard; 358 for (auto &Entry : StackMap) { 359 for (const uint64_t VAddr : Entry.getSecond()) { 360 // Check if we have already symbolized and cached the result or if we 361 // don't want to attempt symbolization since we know this address is bad. 362 // In this case the address is also removed from the current callstack. 363 if (SymbolizedFrame.count(VAddr) > 0 || 364 AllVAddrsToDiscard.contains(VAddr)) 365 continue; 366 367 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 368 getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); 369 if (!DIOr) 370 return DIOr.takeError(); 371 DIInliningInfo DI = DIOr.get(); 372 373 // Drop frames which we can't symbolize or if they belong to the runtime. 374 if (DI.getFrame(0).FunctionName == DILineInfo::BadString || 375 isRuntimePath(DI.getFrame(0).FileName)) { 376 AllVAddrsToDiscard.insert(VAddr); 377 continue; 378 } 379 380 for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames; 381 I++) { 382 const auto &DIFrame = DI.getFrame(I); 383 const uint64_t Guid = 384 IndexedMemProfRecord::getGUID(DIFrame.FunctionName); 385 const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column, 386 // Only the last entry is not an inlined location. 387 I != NumFrames - 1); 388 // Here we retain a mapping from the GUID to symbol name instead of 389 // adding it to the frame object directly to reduce memory overhead. 390 // This is because there can be many unique frames, particularly for 391 // callsite frames. 392 if (KeepSymbolName) 393 GuidToSymbolName.insert({Guid, DIFrame.FunctionName}); 394 395 const FrameId Hash = F.hash(); 396 IdToFrame.insert({Hash, F}); 397 SymbolizedFrame[VAddr].push_back(Hash); 398 } 399 } 400 401 auto &CallStack = Entry.getSecond(); 402 CallStack.erase(std::remove_if(CallStack.begin(), CallStack.end(), 403 [&AllVAddrsToDiscard](const uint64_t A) { 404 return AllVAddrsToDiscard.contains(A); 405 }), 406 CallStack.end()); 407 if (CallStack.empty()) 408 EntriesToErase.push_back(Entry.getFirst()); 409 } 410 411 // Drop the entries where the callstack is empty. 412 for (const uint64_t Id : EntriesToErase) { 413 StackMap.erase(Id); 414 CallstackProfileData.erase(Id); 415 } 416 417 if (StackMap.empty()) 418 return make_error<InstrProfError>( 419 instrprof_error::malformed, 420 "no entries in callstack map after symbolization"); 421 422 return Error::success(); 423 } 424 425 Error RawMemProfReader::readRawProfile( 426 std::unique_ptr<MemoryBuffer> DataBuffer) { 427 const char *Next = DataBuffer->getBufferStart(); 428 429 while (Next < DataBuffer->getBufferEnd()) { 430 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 431 432 // Read in the segment information, check whether its the same across all 433 // profiles in this binary file. 434 const llvm::SmallVector<SegmentEntry> Entries = 435 readSegmentEntries(Next + Header->SegmentOffset); 436 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 437 // We do not expect segment information to change when deserializing from 438 // the same binary profile file. This can happen if dynamic libraries are 439 // loaded/unloaded between profile dumping. 440 return make_error<InstrProfError>( 441 instrprof_error::malformed, 442 "memprof raw profile has different segment information"); 443 } 444 SegmentInfo.assign(Entries.begin(), Entries.end()); 445 446 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 447 // raw profiles in the same binary file are from the same process so the 448 // stackdepot ids are the same. 449 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 450 if (CallstackProfileData.count(Value.first)) { 451 CallstackProfileData[Value.first].Merge(Value.second); 452 } else { 453 CallstackProfileData[Value.first] = Value.second; 454 } 455 } 456 457 // Read in the callstack for each ids. For multiple raw profiles in the same 458 // file, we expect that the callstack is the same for a unique id. 459 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 460 if (StackMap.empty()) { 461 StackMap = CSM; 462 } else { 463 if (mergeStackMap(CSM, StackMap)) 464 return make_error<InstrProfError>( 465 instrprof_error::malformed, 466 "memprof raw profile got different call stack for same id"); 467 } 468 469 Next += Header->TotalSize; 470 } 471 472 return Error::success(); 473 } 474 475 object::SectionedAddress 476 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 477 LLVM_DEBUG({ 478 SegmentEntry *ContainingSegment = nullptr; 479 for (auto &SE : SegmentInfo) { 480 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 481 ContainingSegment = &SE; 482 } 483 } 484 485 // Ensure that the virtual address is valid. 486 assert(ContainingSegment && "Could not find a segment entry"); 487 }); 488 489 // TODO: Compute the file offset based on the maps and program headers. For 490 // now this only works for non PIE binaries. 491 return object::SectionedAddress{VirtualAddress}; 492 } 493 494 Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) { 495 if (FunctionProfileData.empty()) 496 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 497 498 if (Iter == FunctionProfileData.end()) 499 return make_error<InstrProfError>(instrprof_error::eof); 500 501 auto IdToFrameCallback = [this](const FrameId Id) { 502 Frame F = this->idToFrame(Id); 503 if (!this->KeepSymbolName) 504 return F; 505 auto Iter = this->GuidToSymbolName.find(F.Function); 506 assert(Iter != this->GuidToSymbolName.end()); 507 F.SymbolName = Iter->getSecond(); 508 return F; 509 }; 510 511 const IndexedMemProfRecord &IndexedRecord = Iter->second; 512 GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)}; 513 Iter++; 514 return Error::success(); 515 } 516 } // namespace memprof 517 } // namespace llvm 518