1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cstdint> 14 #include <type_traits> 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 18 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 20 #include "llvm/IR/Function.h" 21 #include "llvm/Object/Binary.h" 22 #include "llvm/Object/ELFObjectFile.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/InstrProf.h" 25 #include "llvm/ProfileData/MemProf.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/ProfileData/RawMemProfReader.h" 28 #include "llvm/Support/Endian.h" 29 30 #define DEBUG_TYPE "memprof" 31 32 namespace llvm { 33 namespace memprof { 34 namespace { 35 36 struct Summary { 37 uint64_t Version; 38 uint64_t TotalSizeBytes; 39 uint64_t NumSegments; 40 uint64_t NumMIBInfo; 41 uint64_t NumStackOffsets; 42 }; 43 44 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 45 static_assert(std::is_pod<T>::value, "Not a pod type."); 46 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 47 return *reinterpret_cast<const T *>(Ptr); 48 } 49 50 Summary computeSummary(const char *Start) { 51 auto *H = reinterpret_cast<const Header *>(Start); 52 53 // Check alignment while reading the number of items in each section. 54 return Summary{ 55 H->Version, 56 H->TotalSize, 57 alignedRead(Start + H->SegmentOffset), 58 alignedRead(Start + H->MIBOffset), 59 alignedRead(Start + H->StackOffset), 60 }; 61 } 62 63 Error checkBuffer(const MemoryBuffer &Buffer) { 64 if (!RawMemProfReader::hasFormat(Buffer)) 65 return make_error<InstrProfError>(instrprof_error::bad_magic); 66 67 if (Buffer.getBufferSize() == 0) 68 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 69 70 if (Buffer.getBufferSize() < sizeof(Header)) { 71 return make_error<InstrProfError>(instrprof_error::truncated); 72 } 73 74 // The size of the buffer can be > header total size since we allow repeated 75 // serialization of memprof profiles to the same file. 76 uint64_t TotalSize = 0; 77 const char *Next = Buffer.getBufferStart(); 78 while (Next < Buffer.getBufferEnd()) { 79 auto *H = reinterpret_cast<const Header *>(Next); 80 if (H->Version != MEMPROF_RAW_VERSION) { 81 return make_error<InstrProfError>(instrprof_error::unsupported_version); 82 } 83 84 TotalSize += H->TotalSize; 85 Next += H->TotalSize; 86 } 87 88 if (Buffer.getBufferSize() != TotalSize) { 89 return make_error<InstrProfError>(instrprof_error::malformed); 90 } 91 return Error::success(); 92 } 93 94 llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) { 95 using namespace support; 96 97 const uint64_t NumItemsToRead = 98 endian::readNext<uint64_t, little, unaligned>(Ptr); 99 llvm::SmallVector<SegmentEntry> Items; 100 for (uint64_t I = 0; I < NumItemsToRead; I++) { 101 Items.push_back(*reinterpret_cast<const SegmentEntry *>( 102 Ptr + I * sizeof(SegmentEntry))); 103 } 104 return Items; 105 } 106 107 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> 108 readMemInfoBlocks(const char *Ptr) { 109 using namespace support; 110 111 const uint64_t NumItemsToRead = 112 endian::readNext<uint64_t, little, unaligned>(Ptr); 113 llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items; 114 for (uint64_t I = 0; I < NumItemsToRead; I++) { 115 const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr); 116 const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr); 117 Items.push_back({Id, MIB}); 118 // Only increment by size of MIB since readNext implicitly increments. 119 Ptr += sizeof(MemInfoBlock); 120 } 121 return Items; 122 } 123 124 CallStackMap readStackInfo(const char *Ptr) { 125 using namespace support; 126 127 const uint64_t NumItemsToRead = 128 endian::readNext<uint64_t, little, unaligned>(Ptr); 129 CallStackMap Items; 130 131 for (uint64_t I = 0; I < NumItemsToRead; I++) { 132 const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr); 133 const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr); 134 135 SmallVector<uint64_t, 32> CallStack; 136 for (uint64_t J = 0; J < NumPCs; J++) { 137 CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr)); 138 } 139 140 Items[StackId] = CallStack; 141 } 142 return Items; 143 } 144 145 // Merges the contents of stack information in \p From to \p To. Returns true if 146 // any stack ids observed previously map to a different set of program counter 147 // addresses. 148 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 149 for (const auto &IdStack : From) { 150 auto I = To.find(IdStack.first); 151 if (I == To.end()) { 152 To[IdStack.first] = IdStack.second; 153 } else { 154 // Check that the PCs are the same (in order). 155 if (IdStack.second != I->second) 156 return true; 157 } 158 } 159 return false; 160 } 161 162 StringRef trimSuffix(const StringRef Name) { 163 const auto Pos = Name.find(".llvm."); 164 return Name.take_front(Pos); 165 } 166 167 Error report(Error E, const StringRef Context) { 168 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 169 std::move(E)); 170 } 171 } // namespace 172 173 Expected<std::unique_ptr<RawMemProfReader>> 174 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { 175 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 176 if (std::error_code EC = BufferOr.getError()) 177 return report(errorCodeToError(EC), Path.getSingleStringRef()); 178 179 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 180 if (Error E = checkBuffer(*Buffer)) 181 return report(std::move(E), Path.getSingleStringRef()); 182 183 if (ProfiledBinary.empty()) 184 return report( 185 errorCodeToError(make_error_code(std::errc::invalid_argument)), 186 "Path to profiled binary is empty!"); 187 188 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 189 if (!BinaryOr) { 190 return report(BinaryOr.takeError(), ProfiledBinary); 191 } 192 193 std::unique_ptr<RawMemProfReader> Reader( 194 new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get()))); 195 if (Error E = Reader->initialize()) { 196 return std::move(E); 197 } 198 return std::move(Reader); 199 } 200 201 bool RawMemProfReader::hasFormat(const StringRef Path) { 202 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 203 if (!BufferOr) 204 return false; 205 206 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 207 return hasFormat(*Buffer); 208 } 209 210 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 211 if (Buffer.getBufferSize() < sizeof(uint64_t)) 212 return false; 213 // Aligned read to sanity check that the buffer was allocated with at least 8b 214 // alignment. 215 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 216 return Magic == MEMPROF_RAW_MAGIC_64; 217 } 218 219 void RawMemProfReader::printYAML(raw_ostream &OS) { 220 OS << "MemprofProfile:\n"; 221 printSummaries(OS); 222 // Print out the merged contents of the profiles. 223 OS << " Records:\n"; 224 for (const auto &Record : *this) { 225 OS << " -\n"; 226 Record.print(OS); 227 } 228 } 229 230 void RawMemProfReader::printSummaries(raw_ostream &OS) const { 231 const char *Next = DataBuffer->getBufferStart(); 232 while (Next < DataBuffer->getBufferEnd()) { 233 auto Summary = computeSummary(Next); 234 OS << " -\n"; 235 OS << " Header:\n"; 236 OS << " Version: " << Summary.Version << "\n"; 237 OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; 238 OS << " NumSegments: " << Summary.NumSegments << "\n"; 239 OS << " NumMibInfo: " << Summary.NumMIBInfo << "\n"; 240 OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; 241 // TODO: Print the build ids once we can record them using the 242 // sanitizer_procmaps library for linux. 243 244 auto *H = reinterpret_cast<const Header *>(Next); 245 Next += H->TotalSize; 246 } 247 } 248 249 Error RawMemProfReader::initialize() { 250 const StringRef FileName = Binary.getBinary()->getFileName(); 251 252 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 253 if (!ElfObject) { 254 return report(make_error<StringError>(Twine("Not an ELF file: "), 255 inconvertibleErrorCode()), 256 FileName); 257 } 258 259 auto Triple = ElfObject->makeTriple(); 260 if (!Triple.isX86()) 261 return report(make_error<StringError>(Twine("Unsupported target: ") + 262 Triple.getArchName(), 263 inconvertibleErrorCode()), 264 FileName); 265 266 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 267 std::unique_ptr<DIContext> Context = DWARFContext::create( 268 *Object, DWARFContext::ProcessDebugRelocations::Process); 269 270 auto SOFOr = symbolize::SymbolizableObjectFile::create( 271 Object, std::move(Context), /*UntagAddresses=*/false); 272 if (!SOFOr) 273 return report(SOFOr.takeError(), FileName); 274 Symbolizer = std::move(SOFOr.get()); 275 276 return readRawProfile(); 277 } 278 279 Error RawMemProfReader::readRawProfile() { 280 const char *Next = DataBuffer->getBufferStart(); 281 282 while (Next < DataBuffer->getBufferEnd()) { 283 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 284 285 // Read in the segment information, check whether its the same across all 286 // profiles in this binary file. 287 const llvm::SmallVector<SegmentEntry> Entries = 288 readSegmentEntries(Next + Header->SegmentOffset); 289 if (!SegmentInfo.empty() && SegmentInfo != Entries) { 290 // We do not expect segment information to change when deserializing from 291 // the same binary profile file. This can happen if dynamic libraries are 292 // loaded/unloaded between profile dumping. 293 return make_error<InstrProfError>( 294 instrprof_error::malformed, 295 "memprof raw profile has different segment information"); 296 } 297 SegmentInfo.assign(Entries.begin(), Entries.end()); 298 299 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 300 // raw profiles in the same binary file are from the same process so the 301 // stackdepot ids are the same. 302 for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) { 303 if (ProfileData.count(Value.first)) { 304 ProfileData[Value.first].Merge(Value.second); 305 } else { 306 ProfileData[Value.first] = Value.second; 307 } 308 } 309 310 // Read in the callstack for each ids. For multiple raw profiles in the same 311 // file, we expect that the callstack is the same for a unique id. 312 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 313 if (StackMap.empty()) { 314 StackMap = CSM; 315 } else { 316 if (mergeStackMap(CSM, StackMap)) 317 return make_error<InstrProfError>( 318 instrprof_error::malformed, 319 "memprof raw profile got different call stack for same id"); 320 } 321 322 Next += Header->TotalSize; 323 } 324 325 return Error::success(); 326 } 327 328 object::SectionedAddress 329 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 330 LLVM_DEBUG({ 331 SegmentEntry *ContainingSegment = nullptr; 332 for (auto &SE : SegmentInfo) { 333 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 334 ContainingSegment = &SE; 335 } 336 } 337 338 // Ensure that the virtual address is valid. 339 assert(ContainingSegment && "Could not find a segment entry"); 340 }); 341 342 // TODO: Compute the file offset based on the maps and program headers. For 343 // now this only works for non PIE binaries. 344 return object::SectionedAddress{VirtualAddress}; 345 } 346 347 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, 348 MemProfRecord &Record) { 349 auto &CallStack = StackMap[Id]; 350 DILineInfoSpecifier Specifier( 351 DILineInfoSpecifier::FileLineInfoKind::RawValue, 352 DILineInfoSpecifier::FunctionNameKind::LinkageName); 353 for (const uint64_t Address : CallStack) { 354 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 355 getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false); 356 357 if (!DIOr) 358 return DIOr.takeError(); 359 DIInliningInfo DI = DIOr.get(); 360 361 for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { 362 const auto &Frame = DI.getFrame(I); 363 Record.CallStack.emplace_back( 364 // We use the function guid which we expect to be a uint64_t. At this 365 // time, it is the lower 64 bits of the md5 of the function name. Any 366 // suffix with .llvm. is trimmed since these are added by thinLTO 367 // global promotion. At the time the profile is consumed, these 368 // suffixes will not be present. 369 Function::getGUID(trimSuffix(Frame.FunctionName)), 370 Frame.Line - Frame.StartLine, Frame.Column, 371 // Only the first entry is not an inlined location. 372 I != 0); 373 } 374 } 375 Record.Info = PortableMemInfoBlock(MIB); 376 return Error::success(); 377 } 378 379 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) { 380 if (ProfileData.empty()) 381 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 382 383 if (Iter == ProfileData.end()) 384 return make_error<InstrProfError>(instrprof_error::eof); 385 386 Record.clear(); 387 if (Error E = fillRecord(Iter->first, Iter->second, Record)) { 388 return E; 389 } 390 Iter++; 391 return Error::success(); 392 } 393 } // namespace memprof 394 } // namespace llvm 395