1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cstdint> 14 #include <type_traits> 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 21 #include "llvm/Object/Binary.h" 22 #include "llvm/Object/ELFObjectFile.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/InstrProf.h" 25 #include "llvm/ProfileData/MemProf.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/ProfileData/RawMemProfReader.h" 28 #include "llvm/Support/Endian.h" 29 #include "llvm/Support/MD5.h" 30 31 namespace llvm { 32 namespace memprof { 33 namespace { 34 35 struct Summary { 36 uint64_t Version; 37 uint64_t TotalSizeBytes; 38 uint64_t NumSegments; 39 uint64_t NumMIBInfo; 40 uint64_t NumStackOffsets; 41 }; 42 43 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 44 static_assert(std::is_pod<T>::value, "Not a pod type."); 45 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 46 return *reinterpret_cast<const T *>(Ptr); 47 } 48 49 Summary computeSummary(const char *Start) { 50 auto *H = reinterpret_cast<const Header *>(Start); 51 52 // Check alignment while reading the number of items in each section. 53 return Summary{ 54 H->Version, 55 H->TotalSize, 56 alignedRead(Start + H->SegmentOffset), 57 alignedRead(Start + H->MIBOffset), 58 alignedRead(Start + H->StackOffset), 59 }; 60 } 61 62 Error checkBuffer(const MemoryBuffer &Buffer) { 63 if (!RawMemProfReader::hasFormat(Buffer)) 64 return make_error<InstrProfError>(instrprof_error::bad_magic); 65 66 if (Buffer.getBufferSize() == 0) 67 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 68 69 if (Buffer.getBufferSize() < sizeof(Header)) { 70 return make_error<InstrProfError>(instrprof_error::truncated); 71 } 72 73 // The size of the buffer can be > header total size since we allow repeated 74 // serialization of memprof profiles to the same file. 75 uint64_t TotalSize = 0; 76 const char *Next = Buffer.getBufferStart(); 77 while (Next < Buffer.getBufferEnd()) { 78 auto *H = reinterpret_cast<const Header *>(Next); 79 if (H->Version != MEMPROF_RAW_VERSION) { 80 return make_error<InstrProfError>(instrprof_error::unsupported_version); 81 } 82 83 TotalSize += H->TotalSize; 84 Next += H->TotalSize; 85 } 86 87 if (Buffer.getBufferSize() != TotalSize) { 88 return make_error<InstrProfError>(instrprof_error::malformed); 89 } 90 return Error::success(); 91 } 92 93 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 94 using namespace support; 95 96 const uint64_t NumItemsToRead = 97 endian::readNext<uint64_t, little, unaligned>(Ptr); 98 llvm::SmallVector<SegmentEntry> Items; 99 for (uint64_t I = 0; I < NumItemsToRead; I++) { 100 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 101 Ptr + I * sizeof(SegmentEntry))); 102 } 103 return Items; 104 } 105 106 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 107 readMemInfoBlocks(const char *Ptr) { 108 using namespace support; 109 110 const uint64_t NumItemsToRead = 111 endian::readNext<uint64_t, little, unaligned>(Ptr); 112 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 113 for (uint64_t I = 0; I < NumItemsToRead; I++) { 114 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 115 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 116 Items.push_back({Id, MIB}); 117 // Only increment by size of MIB since readNext implicitly increments. 118 Ptr += sizeof(MemInfoBlock); 119 } 120 return Items; 121 } 122 123 CallStackMap readStackInfo(const char *Ptr) { 124 using namespace support; 125 126 const uint64_t NumItemsToRead = 127 endian::readNext<uint64_t, little, unaligned>(Ptr); 128 CallStackMap Items; 129 130 for (uint64_t I = 0; I < NumItemsToRead; I++) { 131 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 132 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 133 134 SmallVector<uint64_t, 32> CallStack; 135 for (uint64_t J = 0; J < NumPCs; J++) { 136 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 137 } 138 139 Items[StackId] = CallStack; 140 } 141 return Items; 142 } 143 144 // Merges the contents of stack information in \p From to \p To. Returns true if 145 // any stack ids observed previously map to a different set of program counter 146 // addresses. 147 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 148 for (const auto &IdStack : From) { 149 auto I = To.find(IdStack.first); 150 if (I == To.end()) { 151 To[IdStack.first] = IdStack.second; 152 } else { 153 // Check that the PCs are the same (in order). 154 if (IdStack.second != I->second) 155 return true; 156 } 157 } 158 return false; 159 } 160 161 StringRef trimSuffix(const StringRef Name) { 162 const auto Pos = Name.find(".llvm."); 163 return Name.take_front(Pos); 164 } 165 166 Error report(Error E, const StringRef Context) { 167 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 168 std::move(E)); 169 } 170 } // namespace 171 172 Expected<std::unique_ptr<RawMemProfReader>> 173 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { 174 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 175 if (std::error_code EC = BufferOr.getError()) 176 return report(errorCodeToError(EC), Path.getSingleStringRef()); 177 178 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 179 if (Error E = checkBuffer(*Buffer)) 180 return report(std::move(E), Path.getSingleStringRef()); 181 182 if (ProfiledBinary.empty()) 183 return report( 184 errorCodeToError(make_error_code(std::errc::invalid_argument)), 185 "Path to profiled binary is empty!"); 186 187 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 188 if (!BinaryOr) { 189 return report(BinaryOr.takeError(), ProfiledBinary); 190 } 191 192 std::unique_ptr<RawMemProfReader> Reader( 193 new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get()))); 194 if (Error E = Reader->initialize()) { 195 return std::move(E); 196 } 197 return std::move(Reader); 198 } 199 200 bool RawMemProfReader::hasFormat(const StringRef Path) { 201 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 202 if (!BufferOr) 203 return false; 204 205 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 206 return hasFormat(*Buffer); 207 } 208 209 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 210 if (Buffer.getBufferSize() < sizeof(uint64_t)) 211 return false; 212 // Aligned read to sanity check that the buffer was allocated with at least 8b 213 // alignment. 214 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 215 return Magic == MEMPROF_RAW_MAGIC_64; 216 } 217 218 void RawMemProfReader::printYAML(raw_ostream &OS) { 219 OS << "MemprofProfile:\n"; 220 printSummaries(OS); 221 // Print out the merged contents of the profiles. 222 OS << " Records:\n"; 223 for (const auto &Record : *this) { 224 OS << " -\n"; 225 Record.print(OS); 226 } 227 } 228 229 void RawMemProfReader::printSummaries(raw_ostream &OS) const { 230 const char *Next = DataBuffer->getBufferStart(); 231 while (Next < DataBuffer->getBufferEnd()) { 232 auto Summary = computeSummary(Next); 233 OS << " -\n"; 234 OS << " Header:\n"; 235 OS << " Version: " << Summary.Version << "\n"; 236 OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; 237 OS << " NumSegments: " << Summary.NumSegments << "\n"; 238 OS << " NumMibInfo: " << Summary.NumMIBInfo << "\n"; 239 OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; 240 // TODO: Print the build ids once we can record them using the 241 // sanitizer_procmaps library for linux. 242 243 auto *H = reinterpret_cast<const Header *>(Next); 244 Next += H->TotalSize; 245 } 246 } 247 248 Error RawMemProfReader::initialize() { 249 const StringRef FileName = Binary.getBinary()->getFileName(); 250 251 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 252 if (!ElfObject) { 253 return report(make_error<StringError>(Twine("Not an ELF file: "), 254 inconvertibleErrorCode()), 255 FileName); 256 } 257 258 auto Triple = ElfObject->makeTriple(); 259 if (!Triple.isX86()) 260 return report(make_error<StringError>(Twine("Unsupported target: ") + 261 Triple.getArchName(), 262 inconvertibleErrorCode()), 263 FileName); 264 265 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 266 std::unique_ptr<DIContext> Context = DWARFContext::create( 267 *Object, DWARFContext::ProcessDebugRelocations::Process); 268 269 auto SOFOr = symbolize::SymbolizableObjectFile::create( 270 Object, std::move(Context), /*UntagAddresses=*/false); 271 if (!SOFOr) 272 return report(SOFOr.takeError(), FileName); 273 Symbolizer = std::move(SOFOr.get()); 274 275 return readRawProfile(); 276 } 277 278 Error RawMemProfReader::readRawProfile() { 279 const char *Next = DataBuffer->getBufferStart(); 280 281 while (Next < DataBuffer->getBufferEnd()) { 282 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 283 284 // Read in the segment information, check whether its the same across all 285 // profiles in this binary file. 286 const llvm::SmallVector<SegmentEntry> Entries = 287 readSegmentEntries(Next + Header->SegmentOffset); 288 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 289 // We do not expect segment information to change when deserializing from 290 // the same binary profile file. This can happen if dynamic libraries are 291 // loaded/unloaded between profile dumping. 292 return make_error<InstrProfError>( 293 instrprof_error::malformed, 294 "memprof raw profile has different segment information"); 295 } 296 SegmentInfo.assign(Entries.begin(), Entries.end()); 297 298 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 299 // raw profiles in the same binary file are from the same process so the 300 // stackdepot ids are the same. 301 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 302 if (ProfileData.count(Value.first)) { 303 ProfileData[Value.first].Merge(Value.second); 304 } else { 305 ProfileData[Value.first] = Value.second; 306 } 307 } 308 309 // Read in the callstack for each ids. For multiple raw profiles in the same 310 // file, we expect that the callstack is the same for a unique id. 311 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 312 if (StackMap.empty()) { 313 StackMap = CSM; 314 } else { 315 if (mergeStackMap(CSM, StackMap)) 316 return make_error<InstrProfError>( 317 instrprof_error::malformed, 318 "memprof raw profile got different call stack for same id"); 319 } 320 321 Next += Header->TotalSize; 322 } 323 324 return Error::success(); 325 } 326 327 object::SectionedAddress 328 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 329 SegmentEntry *ContainingSegment = nullptr; 330 for (auto &SE : SegmentInfo) { 331 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 332 ContainingSegment = &SE; 333 } 334 } 335 336 // Ensure that the virtual address is valid. 337 assert(ContainingSegment && "Could not find a segment entry"); 338 339 // TODO: Compute the file offset based on the maps and program headers. For 340 // now this only works for non PIE binaries. 341 return object::SectionedAddress{VirtualAddress}; 342 } 343 344 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, 345 MemProfRecord &Record) { 346 auto &CallStack = StackMap[Id]; 347 DILineInfoSpecifier Specifier( 348 DILineInfoSpecifier::FileLineInfoKind::RawValue, 349 DILineInfoSpecifier::FunctionNameKind::LinkageName); 350 for (const uint64_t Address : CallStack) { 351 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 352 getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false); 353 354 if (!DIOr) 355 return DIOr.takeError(); 356 DIInliningInfo DI = DIOr.get(); 357 358 for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { 359 const auto &Frame = DI.getFrame(I); 360 Record.CallStack.emplace_back( 361 std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))), 362 Frame.Line - Frame.StartLine, Frame.Column, 363 // Only the first entry is not an inlined location. 364 I != 0); 365 } 366 } 367 Record.Info = MIB; 368 return Error::success(); 369 } 370 371 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) { 372 if (ProfileData.empty()) 373 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 374 375 if (Iter == ProfileData.end()) 376 return make_error<InstrProfError>(instrprof_error::eof); 377 378 Record.clear(); 379 if (Error E = fillRecord(Iter->first, Iter->second, Record)) { 380 return E; 381 } 382 Iter++; 383 return Error::success(); 384 } 385 } // namespace memprof 386 } // namespace llvm 387