1 //===- RawMemProfReader.cpp - Instrumented memory profiling reader --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading MemProf profiling data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include <cstdint> 14 #include <type_traits> 15 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/DebugInfo/DWARF/DWARFContext.h" 19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" 20 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" 21 #include "llvm/Object/Binary.h" 22 #include "llvm/Object/ELFObjectFile.h" 23 #include "llvm/Object/ObjectFile.h" 24 #include "llvm/ProfileData/InstrProf.h" 25 #include "llvm/ProfileData/MemProf.h" 26 #include "llvm/ProfileData/MemProfData.inc" 27 #include "llvm/ProfileData/RawMemProfReader.h" 28 #include "llvm/Support/MD5.h" 29 30 namespace llvm { 31 namespace memprof { 32 namespace { 33 34 struct Summary { 35 uint64_t Version; 36 uint64_t TotalSizeBytes; 37 uint64_t NumSegments; 38 uint64_t NumMIBInfo; 39 uint64_t NumStackOffsets; 40 }; 41 42 template <class T = uint64_t> inline T alignedRead(const char *Ptr) { 43 static_assert(std::is_pod<T>::value, "Not a pod type."); 44 assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read"); 45 return *reinterpret_cast<const T *>(Ptr); 46 } 47 48 Summary computeSummary(const char *Start) { 49 auto *H = reinterpret_cast<const Header *>(Start); 50 51 // Check alignment while reading the number of items in each section. 52 return Summary{ 53 H->Version, 54 H->TotalSize, 55 alignedRead(Start + H->SegmentOffset), 56 alignedRead(Start + H->MIBOffset), 57 alignedRead(Start + H->StackOffset), 58 }; 59 } 60 61 Error checkBuffer(const MemoryBuffer &Buffer) { 62 if (!RawMemProfReader::hasFormat(Buffer)) 63 return make_error<InstrProfError>(instrprof_error::bad_magic); 64 65 if (Buffer.getBufferSize() == 0) 66 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 67 68 if (Buffer.getBufferSize() < sizeof(Header)) { 69 return make_error<InstrProfError>(instrprof_error::truncated); 70 } 71 72 // The size of the buffer can be > header total size since we allow repeated 73 // serialization of memprof profiles to the same file. 74 uint64_t TotalSize = 0; 75 const char *Next = Buffer.getBufferStart(); 76 while (Next < Buffer.getBufferEnd()) { 77 auto *H = reinterpret_cast<const Header *>(Next); 78 if (H->Version != MEMPROF_RAW_VERSION) { 79 return make_error<InstrProfError>(instrprof_error::unsupported_version); 80 } 81 82 TotalSize += H->TotalSize; 83 Next += H->TotalSize; 84 } 85 86 if (Buffer.getBufferSize() != TotalSize) { 87 return make_error<InstrProfError>(instrprof_error::malformed); 88 } 89 return Error::success(); 90 } 91 92 // A generic method to read binary data for type T where the first 8b indicate 93 // the number of elements of type T to be read. 94 template <typename T> llvm::SmallVector<T, 16> readInfo(const char *Begin) { 95 const uint64_t NumItemsToRead = *reinterpret_cast<const uint64_t *>(Begin); 96 const char *Ptr = Begin + sizeof(uint64_t); 97 llvm::SmallVector<T, 16> Items; 98 for (uint64_t I = 0; I < NumItemsToRead; I++) { 99 Items.emplace_back(*reinterpret_cast<const T *>(Ptr + I * sizeof(T))); 100 } 101 return Items; 102 } 103 104 CallStackMap readStackInfo(const char *Begin) { 105 const uint64_t NumItemsToRead = *reinterpret_cast<const uint64_t *>(Begin); 106 char *Ptr = const_cast<char *>(Begin) + sizeof(uint64_t); 107 CallStackMap Items; 108 109 uint64_t Count = 0; 110 do { 111 const uint64_t StackId = alignedRead(Ptr); 112 Ptr += sizeof(uint64_t); 113 114 const uint64_t NumPCs = alignedRead(Ptr); 115 Ptr += sizeof(uint64_t); 116 117 SmallVector<uint64_t, 32> CallStack; 118 for (uint64_t I = 0; I < NumPCs; I++) { 119 CallStack.push_back(alignedRead(Ptr)); 120 Ptr += sizeof(uint64_t); 121 } 122 123 Items[StackId] = CallStack; 124 } while (++Count < NumItemsToRead); 125 return Items; 126 } 127 128 // Merges the contents of stack information in \p From to \p To. Returns true if 129 // any stack ids observed previously map to a different set of program counter 130 // addresses. 131 bool mergeStackMap(const CallStackMap &From, CallStackMap &To) { 132 for (const auto &IdStack : From) { 133 auto I = To.find(IdStack.first); 134 if (I == To.end()) { 135 To[IdStack.first] = IdStack.second; 136 } else { 137 // Check that the PCs are the same (in order). 138 if (IdStack.second != I->second) 139 return true; 140 } 141 } 142 return false; 143 } 144 145 StringRef trimSuffix(const StringRef Name) { 146 const auto Pos = Name.find(".llvm."); 147 return Name.take_front(Pos); 148 } 149 150 Error report(Error E, const StringRef Context) { 151 return joinErrors(createStringError(inconvertibleErrorCode(), Context), 152 std::move(E)); 153 } 154 } // namespace 155 156 Expected<std::unique_ptr<RawMemProfReader>> 157 RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary) { 158 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 159 if (std::error_code EC = BufferOr.getError()) 160 return report(errorCodeToError(EC), Path.getSingleStringRef()); 161 162 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 163 if (Error E = checkBuffer(*Buffer)) 164 return report(std::move(E), Path.getSingleStringRef()); 165 166 if (ProfiledBinary.empty()) 167 return report( 168 errorCodeToError(make_error_code(std::errc::invalid_argument)), 169 "Path to profiled binary is empty!"); 170 171 auto BinaryOr = llvm::object::createBinary(ProfiledBinary); 172 if (!BinaryOr) { 173 return report(BinaryOr.takeError(), ProfiledBinary); 174 } 175 176 std::unique_ptr<RawMemProfReader> Reader( 177 new RawMemProfReader(std::move(Buffer), std::move(BinaryOr.get()))); 178 if (Error E = Reader->initialize()) { 179 return std::move(E); 180 } 181 return std::move(Reader); 182 } 183 184 bool RawMemProfReader::hasFormat(const StringRef Path) { 185 auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path); 186 if (!BufferOr) 187 return false; 188 189 std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release()); 190 return hasFormat(*Buffer); 191 } 192 193 bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) { 194 if (Buffer.getBufferSize() < sizeof(uint64_t)) 195 return false; 196 // Aligned read to sanity check that the buffer was allocated with at least 8b 197 // alignment. 198 const uint64_t Magic = alignedRead(Buffer.getBufferStart()); 199 return Magic == MEMPROF_RAW_MAGIC_64; 200 } 201 202 void RawMemProfReader::printYAML(raw_ostream &OS) { 203 OS << "MemprofProfile:\n"; 204 printSummaries(OS); 205 // Print out the merged contents of the profiles. 206 OS << " Records:\n"; 207 for (const auto &Record : *this) { 208 OS << " -\n"; 209 Record.print(OS); 210 } 211 } 212 213 void RawMemProfReader::printSummaries(raw_ostream &OS) const { 214 const char *Next = DataBuffer->getBufferStart(); 215 while (Next < DataBuffer->getBufferEnd()) { 216 auto Summary = computeSummary(Next); 217 OS << " -\n"; 218 OS << " Header:\n"; 219 OS << " Version: " << Summary.Version << "\n"; 220 OS << " TotalSizeBytes: " << Summary.TotalSizeBytes << "\n"; 221 OS << " NumSegments: " << Summary.NumSegments << "\n"; 222 OS << " NumMibInfo: " << Summary.NumMIBInfo << "\n"; 223 OS << " NumStackOffsets: " << Summary.NumStackOffsets << "\n"; 224 // TODO: Print the build ids once we can record them using the 225 // sanitizer_procmaps library for linux. 226 227 auto *H = reinterpret_cast<const Header *>(Next); 228 Next += H->TotalSize; 229 } 230 } 231 232 Error RawMemProfReader::initialize() { 233 const StringRef FileName = Binary.getBinary()->getFileName(); 234 235 auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary()); 236 if (!ElfObject) { 237 return report(make_error<StringError>(Twine("Not an ELF file: "), 238 inconvertibleErrorCode()), 239 FileName); 240 } 241 242 auto Triple = ElfObject->makeTriple(); 243 if (!Triple.isX86()) 244 return report(make_error<StringError>(Twine("Unsupported target: ") + 245 Triple.getArchName(), 246 inconvertibleErrorCode()), 247 FileName); 248 249 auto *Object = cast<object::ObjectFile>(Binary.getBinary()); 250 std::unique_ptr<DIContext> Context = DWARFContext::create( 251 *Object, DWARFContext::ProcessDebugRelocations::Process); 252 253 auto SOFOr = symbolize::SymbolizableObjectFile::create( 254 Object, std::move(Context), /*UntagAddresses=*/false); 255 if (!SOFOr) 256 return report(SOFOr.takeError(), FileName); 257 Symbolizer = std::move(SOFOr.get()); 258 259 return readRawProfile(); 260 } 261 262 Error RawMemProfReader::readRawProfile() { 263 const char *Next = DataBuffer->getBufferStart(); 264 265 while (Next < DataBuffer->getBufferEnd()) { 266 auto *Header = reinterpret_cast<const memprof::Header *>(Next); 267 268 // Read in the segment information, check whether its the same across all 269 // profiles in this binary file. 270 if (SegmentInfo.empty()) { 271 SegmentInfo = readInfo<SegmentEntry>(Next + Header->SegmentOffset); 272 } else { 273 auto Info = readInfo<SegmentEntry>(Next + Header->SegmentOffset); 274 // We do not expect segment information to change when deserializing from 275 // the same binary profile file. This can happen if dynamic libraries are 276 // loaded/unloaded between profile dumping. 277 if (SegmentInfo != Info) { 278 return make_error<InstrProfError>(instrprof_error::malformed); 279 } 280 } 281 282 // Read in the MemInfoBlocks. Merge them based on stack id - we assume that 283 // raw profiles in the same binary file are from the same process so the 284 // stackdepot ids are the same. 285 PACKED(struct IDAndMIB { 286 uint64_t Id; 287 MemInfoBlock MIB; 288 }); 289 for (const auto &Value : readInfo<IDAndMIB>(Next + Header->MIBOffset)) { 290 if (ProfileData.count(Value.Id)) { 291 ProfileData[Value.Id].Merge(Value.MIB); 292 } else { 293 ProfileData[Value.Id] = Value.MIB; 294 } 295 } 296 297 // Read in the callstack for each ids. For multiple raw profiles in the same 298 // file, we expect that the callstack is the same for a unique id. 299 const CallStackMap CSM = readStackInfo(Next + Header->StackOffset); 300 if (StackMap.empty()) { 301 StackMap = CSM; 302 } else { 303 if (mergeStackMap(CSM, StackMap)) 304 return make_error<InstrProfError>(instrprof_error::malformed); 305 } 306 307 Next += Header->TotalSize; 308 } 309 310 return Error::success(); 311 } 312 313 object::SectionedAddress 314 RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { 315 SegmentEntry *ContainingSegment = nullptr; 316 for (auto &SE : SegmentInfo) { 317 if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { 318 ContainingSegment = &SE; 319 } 320 } 321 322 // Ensure that the virtual address is valid. 323 assert(ContainingSegment && "Could not find a segment entry"); 324 325 // TODO: Compute the file offset based on the maps and program headers. For 326 // now this only works for non PIE binaries. 327 return object::SectionedAddress{VirtualAddress}; 328 } 329 330 Error RawMemProfReader::fillRecord(const uint64_t Id, const MemInfoBlock &MIB, 331 MemProfRecord &Record) { 332 auto &CallStack = StackMap[Id]; 333 DILineInfoSpecifier Specifier( 334 DILineInfoSpecifier::FileLineInfoKind::RawValue, 335 DILineInfoSpecifier::FunctionNameKind::LinkageName); 336 for (const uint64_t Address : CallStack) { 337 Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode( 338 getModuleOffset(Address), Specifier, /*UseSymbolTable=*/false); 339 340 if (!DIOr) 341 return DIOr.takeError(); 342 DIInliningInfo DI = DIOr.get(); 343 344 for (size_t I = 0; I < DI.getNumberOfFrames(); I++) { 345 const auto &Frame = DI.getFrame(I); 346 Record.CallStack.emplace_back( 347 std::to_string(llvm::MD5Hash(trimSuffix(Frame.FunctionName))), 348 Frame.Line - Frame.StartLine, Frame.Column, 349 // Only the first entry is not an inlined location. 350 I != 0); 351 } 352 } 353 Record.Info = MIB; 354 return Error::success(); 355 } 356 357 Error RawMemProfReader::readNextRecord(MemProfRecord &Record) { 358 if (ProfileData.empty()) 359 return make_error<InstrProfError>(instrprof_error::empty_raw_profile); 360 361 if (Iter == ProfileData.end()) 362 return make_error<InstrProfError>(instrprof_error::eof); 363 364 Record.clear(); 365 if (Error E = fillRecord(Iter->first, Iter->second, Record)) { 366 return E; 367 } 368 Iter++; 369 return Error::success(); 370 } 371 372 } // namespace memprof 373 } // namespace llvm 374