1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/PDBFile.h" 11 #include "llvm/ADT/ArrayRef.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/DebugInfo/MSF/MSFCommon.h" 14 #include "llvm/DebugInfo/MSF/MappedBlockStream.h" 15 #include "llvm/DebugInfo/MSF/StreamArray.h" 16 #include "llvm/DebugInfo/MSF/StreamInterface.h" 17 #include "llvm/DebugInfo/MSF/StreamReader.h" 18 #include "llvm/DebugInfo/PDB/Native/DbiStream.h" 19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h" 20 #include "llvm/DebugInfo/PDB/Native/InfoStream.h" 21 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h" 22 #include "llvm/DebugInfo/PDB/Native/RawError.h" 23 #include "llvm/DebugInfo/PDB/Native/StringTable.h" 24 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h" 25 #include "llvm/DebugInfo/PDB/Native/TpiStream.h" 26 #include "llvm/Support/Endian.h" 27 #include "llvm/Support/Error.h" 28 #include <algorithm> 29 #include <cassert> 30 #include <cstdint> 31 32 using namespace llvm; 33 using namespace llvm::codeview; 34 using namespace llvm::msf; 35 using namespace llvm::pdb; 36 37 namespace { 38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array; 39 } // end anonymous namespace 40 41 PDBFile::PDBFile(std::unique_ptr<ReadableStream> PdbFileBuffer, 42 BumpPtrAllocator &Allocator) 43 : Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {} 44 45 PDBFile::~PDBFile() = default; 46 47 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; } 48 49 uint32_t PDBFile::getFreeBlockMapBlock() const { 50 return ContainerLayout.SB->FreeBlockMapBlock; 51 } 52 53 uint32_t PDBFile::getBlockCount() const { 54 return ContainerLayout.SB->NumBlocks; 55 } 56 57 uint32_t PDBFile::getNumDirectoryBytes() const { 58 return ContainerLayout.SB->NumDirectoryBytes; 59 } 60 61 uint32_t PDBFile::getBlockMapIndex() const { 62 return ContainerLayout.SB->BlockMapAddr; 63 } 64 65 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; } 66 67 uint32_t PDBFile::getNumDirectoryBlocks() const { 68 return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes, 69 ContainerLayout.SB->BlockSize); 70 } 71 72 uint64_t PDBFile::getBlockMapOffset() const { 73 return (uint64_t)ContainerLayout.SB->BlockMapAddr * 74 ContainerLayout.SB->BlockSize; 75 } 76 77 uint32_t PDBFile::getNumStreams() const { 78 return ContainerLayout.StreamSizes.size(); 79 } 80 81 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const { 82 return ContainerLayout.StreamSizes[StreamIndex]; 83 } 84 85 ArrayRef<support::ulittle32_t> 86 PDBFile::getStreamBlockList(uint32_t StreamIndex) const { 87 return ContainerLayout.StreamMap[StreamIndex]; 88 } 89 90 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); } 91 92 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex, 93 uint32_t NumBytes) const { 94 uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize()); 95 96 ArrayRef<uint8_t> Result; 97 if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result)) 98 return std::move(EC); 99 return Result; 100 } 101 102 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset, 103 ArrayRef<uint8_t> Data) const { 104 return make_error<RawError>(raw_error_code::not_writable, 105 "PDBFile is immutable"); 106 } 107 108 Error PDBFile::parseFileHeaders() { 109 StreamReader Reader(*Buffer); 110 111 // Initialize SB. 112 const msf::SuperBlock *SB = nullptr; 113 if (auto EC = Reader.readObject(SB)) { 114 consumeError(std::move(EC)); 115 return make_error<RawError>(raw_error_code::corrupt_file, 116 "Does not contain superblock"); 117 } 118 119 if (auto EC = msf::validateSuperBlock(*SB)) 120 return EC; 121 122 if (Buffer->getLength() % SB->BlockSize != 0) 123 return make_error<RawError>(raw_error_code::corrupt_file, 124 "File size is not a multiple of block size"); 125 ContainerLayout.SB = SB; 126 127 // Initialize Free Page Map. 128 ContainerLayout.FreePageMap.resize(SB->NumBlocks); 129 // The Fpm exists either at block 1 or block 2 of the MSF. However, this 130 // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and 131 // thusly an equal number of total blocks in the file. For a block size 132 // of 4KiB (very common), this would yield 32KiB total blocks in file, for a 133 // maximum file size of 32KiB * 4KiB = 128MiB. Obviously this won't do, so 134 // the Fpm is split across the file at `getBlockSize()` intervals. As a 135 // result, every block whose index is of the form |{1,2} + getBlockSize() * k| 136 // for any non-negative integer k is an Fpm block. In theory, we only really 137 // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but 138 // current versions of the MSF format already expect the Fpm to be arranged 139 // at getBlockSize() intervals, so we have to be compatible. 140 // See the function fpmPn() for more information: 141 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489 142 auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer); 143 StreamReader FpmReader(*FpmStream); 144 ArrayRef<uint8_t> FpmBytes; 145 if (auto EC = FpmReader.readBytes(FpmBytes, 146 msf::getFullFpmByteSize(ContainerLayout))) 147 return EC; 148 uint32_t BlocksRemaining = getBlockCount(); 149 uint32_t BI = 0; 150 for (auto Byte : FpmBytes) { 151 uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U); 152 for (uint32_t I = 0; I < BlocksThisByte; ++I) { 153 if (Byte & (1 << I)) 154 ContainerLayout.FreePageMap[BI] = true; 155 --BlocksRemaining; 156 ++BI; 157 } 158 } 159 160 Reader.setOffset(getBlockMapOffset()); 161 if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks, 162 getNumDirectoryBlocks())) 163 return EC; 164 165 return Error::success(); 166 } 167 168 Error PDBFile::parseStreamData() { 169 assert(ContainerLayout.SB); 170 if (DirectoryStream) 171 return Error::success(); 172 173 uint32_t NumStreams = 0; 174 175 // Normally you can't use a MappedBlockStream without having fully parsed the 176 // PDB file, because it accesses the directory and various other things, which 177 // is exactly what we are attempting to parse. By specifying a custom 178 // subclass of IPDBStreamData which only accesses the fields that have already 179 // been parsed, we can avoid this and reuse MappedBlockStream. 180 auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer); 181 StreamReader Reader(*DS); 182 if (auto EC = Reader.readInteger(NumStreams)) 183 return EC; 184 185 if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams)) 186 return EC; 187 for (uint32_t I = 0; I < NumStreams; ++I) { 188 uint32_t StreamSize = getStreamByteSize(I); 189 // FIXME: What does StreamSize ~0U mean? 190 uint64_t NumExpectedStreamBlocks = 191 StreamSize == UINT32_MAX 192 ? 0 193 : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize); 194 195 // For convenience, we store the block array contiguously. This is because 196 // if someone calls setStreamMap(), it is more convenient to be able to call 197 // it with an ArrayRef instead of setting up a StreamRef. Since the 198 // DirectoryStream is cached in the class and thus lives for the life of the 199 // class, we can be guaranteed that readArray() will return a stable 200 // reference, even if it has to allocate from its internal pool. 201 ArrayRef<support::ulittle32_t> Blocks; 202 if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks)) 203 return EC; 204 for (uint32_t Block : Blocks) { 205 uint64_t BlockEndOffset = 206 (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize; 207 if (BlockEndOffset > getFileSize()) 208 return make_error<RawError>(raw_error_code::corrupt_file, 209 "Stream block map is corrupt."); 210 } 211 ContainerLayout.StreamMap.push_back(Blocks); 212 } 213 214 // We should have read exactly SB->NumDirectoryBytes bytes. 215 assert(Reader.bytesRemaining() == 0); 216 DirectoryStream = std::move(DS); 217 return Error::success(); 218 } 219 220 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const { 221 return ContainerLayout.DirectoryBlocks; 222 } 223 224 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() { 225 if (!Globals) { 226 auto DbiS = getPDBDbiStream(); 227 if (!DbiS) 228 return DbiS.takeError(); 229 230 auto GlobalS = safelyCreateIndexedStream( 231 ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex()); 232 if (!GlobalS) 233 return GlobalS.takeError(); 234 auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS)); 235 if (auto EC = TempGlobals->reload()) 236 return std::move(EC); 237 Globals = std::move(TempGlobals); 238 } 239 return *Globals; 240 } 241 242 Expected<InfoStream &> PDBFile::getPDBInfoStream() { 243 if (!Info) { 244 auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB); 245 if (!InfoS) 246 return InfoS.takeError(); 247 auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS)); 248 if (auto EC = TempInfo->reload()) 249 return std::move(EC); 250 Info = std::move(TempInfo); 251 } 252 return *Info; 253 } 254 255 Expected<DbiStream &> PDBFile::getPDBDbiStream() { 256 if (!Dbi) { 257 auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI); 258 if (!DbiS) 259 return DbiS.takeError(); 260 auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS)); 261 if (auto EC = TempDbi->reload()) 262 return std::move(EC); 263 Dbi = std::move(TempDbi); 264 } 265 return *Dbi; 266 } 267 268 Expected<TpiStream &> PDBFile::getPDBTpiStream() { 269 if (!Tpi) { 270 auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI); 271 if (!TpiS) 272 return TpiS.takeError(); 273 auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS)); 274 if (auto EC = TempTpi->reload()) 275 return std::move(EC); 276 Tpi = std::move(TempTpi); 277 } 278 return *Tpi; 279 } 280 281 Expected<TpiStream &> PDBFile::getPDBIpiStream() { 282 if (!Ipi) { 283 auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI); 284 if (!IpiS) 285 return IpiS.takeError(); 286 auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS)); 287 if (auto EC = TempIpi->reload()) 288 return std::move(EC); 289 Ipi = std::move(TempIpi); 290 } 291 return *Ipi; 292 } 293 294 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() { 295 if (!Publics) { 296 auto DbiS = getPDBDbiStream(); 297 if (!DbiS) 298 return DbiS.takeError(); 299 300 auto PublicS = safelyCreateIndexedStream( 301 ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex()); 302 if (!PublicS) 303 return PublicS.takeError(); 304 auto TempPublics = 305 llvm::make_unique<PublicsStream>(*this, std::move(*PublicS)); 306 if (auto EC = TempPublics->reload()) 307 return std::move(EC); 308 Publics = std::move(TempPublics); 309 } 310 return *Publics; 311 } 312 313 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() { 314 if (!Symbols) { 315 auto DbiS = getPDBDbiStream(); 316 if (!DbiS) 317 return DbiS.takeError(); 318 319 uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex(); 320 auto SymbolS = 321 safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum); 322 if (!SymbolS) 323 return SymbolS.takeError(); 324 325 auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS)); 326 if (auto EC = TempSymbols->reload()) 327 return std::move(EC); 328 Symbols = std::move(TempSymbols); 329 } 330 return *Symbols; 331 } 332 333 Expected<StringTable &> PDBFile::getStringTable() { 334 if (!Strings || !StringTableStream) { 335 auto IS = getPDBInfoStream(); 336 if (!IS) 337 return IS.takeError(); 338 339 uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names"); 340 341 auto NS = 342 safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex); 343 if (!NS) 344 return NS.takeError(); 345 346 StreamReader Reader(**NS); 347 auto N = llvm::make_unique<StringTable>(); 348 if (auto EC = N->load(Reader)) 349 return std::move(EC); 350 Strings = std::move(N); 351 StringTableStream = std::move(*NS); 352 } 353 return *Strings; 354 } 355 356 bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); } 357 358 bool PDBFile::hasPDBGlobalsStream() { 359 auto DbiS = getPDBDbiStream(); 360 if (!DbiS) 361 return false; 362 return DbiS->getGlobalSymbolStreamIndex() < getNumStreams(); 363 } 364 365 bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); } 366 367 bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); } 368 369 bool PDBFile::hasPDBPublicsStream() { 370 auto DbiS = getPDBDbiStream(); 371 if (!DbiS) 372 return false; 373 return DbiS->getPublicSymbolStreamIndex() < getNumStreams(); 374 } 375 376 bool PDBFile::hasPDBSymbolStream() { 377 auto DbiS = getPDBDbiStream(); 378 if (!DbiS) 379 return false; 380 return DbiS->getSymRecordStreamIndex() < getNumStreams(); 381 } 382 383 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); } 384 385 bool PDBFile::hasStringTable() { 386 auto IS = getPDBInfoStream(); 387 if (!IS) 388 return false; 389 return IS->getNamedStreamIndex("/names") < getNumStreams(); 390 } 391 392 /// Wrapper around MappedBlockStream::createIndexedStream() 393 /// that checks if a stream with that index actually exists. 394 /// If it does not, the return value will have an MSFError with 395 /// code msf_error_code::no_stream. Else, the return value will 396 /// contain the stream returned by createIndexedStream(). 397 Expected<std::unique_ptr<MappedBlockStream>> 398 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout, 399 const ReadableStream &MsfData, 400 uint32_t StreamIndex) const { 401 if (StreamIndex >= getNumStreams()) 402 return make_error<RawError>(raw_error_code::no_stream); 403 return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex); 404 } 405