1 //===- PDBFile.cpp - Low level interface to a PDB file ----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/STLExtras.h"
13 #include "llvm/DebugInfo/MSF/MSFCommon.h"
14 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
15 #include "llvm/DebugInfo/MSF/StreamArray.h"
16 #include "llvm/DebugInfo/MSF/StreamInterface.h"
17 #include "llvm/DebugInfo/MSF/StreamReader.h"
18 #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/InfoStream.h"
21 #include "llvm/DebugInfo/PDB/Native/PublicsStream.h"
22 #include "llvm/DebugInfo/PDB/Native/RawError.h"
23 #include "llvm/DebugInfo/PDB/Native/StringTable.h"
24 #include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
25 #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
26 #include "llvm/Support/Endian.h"
27 #include "llvm/Support/Error.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 
32 using namespace llvm;
33 using namespace llvm::codeview;
34 using namespace llvm::msf;
35 using namespace llvm::pdb;
36 
37 namespace {
38 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
39 } // end anonymous namespace
40 
41 PDBFile::PDBFile(std::unique_ptr<ReadableStream> PdbFileBuffer,
42                  BumpPtrAllocator &Allocator)
43     : Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
44 
45 PDBFile::~PDBFile() = default;
46 
47 uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
48 
49 uint32_t PDBFile::getFreeBlockMapBlock() const {
50   return ContainerLayout.SB->FreeBlockMapBlock;
51 }
52 
53 uint32_t PDBFile::getBlockCount() const {
54   return ContainerLayout.SB->NumBlocks;
55 }
56 
57 uint32_t PDBFile::getNumDirectoryBytes() const {
58   return ContainerLayout.SB->NumDirectoryBytes;
59 }
60 
61 uint32_t PDBFile::getBlockMapIndex() const {
62   return ContainerLayout.SB->BlockMapAddr;
63 }
64 
65 uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
66 
67 uint32_t PDBFile::getNumDirectoryBlocks() const {
68   return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
69                             ContainerLayout.SB->BlockSize);
70 }
71 
72 uint64_t PDBFile::getBlockMapOffset() const {
73   return (uint64_t)ContainerLayout.SB->BlockMapAddr *
74          ContainerLayout.SB->BlockSize;
75 }
76 
77 uint32_t PDBFile::getNumStreams() const {
78   return ContainerLayout.StreamSizes.size();
79 }
80 
81 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
82   return ContainerLayout.StreamSizes[StreamIndex];
83 }
84 
85 ArrayRef<support::ulittle32_t>
86 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
87   return ContainerLayout.StreamMap[StreamIndex];
88 }
89 
90 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
91 
92 Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
93                                                   uint32_t NumBytes) const {
94   uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
95 
96   ArrayRef<uint8_t> Result;
97   if (auto EC = Buffer->readBytes(StreamBlockOffset, NumBytes, Result))
98     return std::move(EC);
99   return Result;
100 }
101 
102 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
103                             ArrayRef<uint8_t> Data) const {
104   return make_error<RawError>(raw_error_code::not_writable,
105                               "PDBFile is immutable");
106 }
107 
108 Error PDBFile::parseFileHeaders() {
109   StreamReader Reader(*Buffer);
110 
111   // Initialize SB.
112   const msf::SuperBlock *SB = nullptr;
113   if (auto EC = Reader.readObject(SB)) {
114     consumeError(std::move(EC));
115     return make_error<RawError>(raw_error_code::corrupt_file,
116                                 "Does not contain superblock");
117   }
118 
119   if (auto EC = msf::validateSuperBlock(*SB))
120     return EC;
121 
122   if (Buffer->getLength() % SB->BlockSize != 0)
123     return make_error<RawError>(raw_error_code::corrupt_file,
124                                 "File size is not a multiple of block size");
125   ContainerLayout.SB = SB;
126 
127   // Initialize Free Page Map.
128   ContainerLayout.FreePageMap.resize(SB->NumBlocks);
129   // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
130   // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
131   // thusly an equal number of total blocks in the file.  For a block size
132   // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
133   // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
134   // the Fpm is split across the file at `getBlockSize()` intervals.  As a
135   // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
136   // for any non-negative integer k is an Fpm block.  In theory, we only really
137   // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
138   // current versions of the MSF format already expect the Fpm to be arranged
139   // at getBlockSize() intervals, so we have to be compatible.
140   // See the function fpmPn() for more information:
141   // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
142   auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer);
143   StreamReader FpmReader(*FpmStream);
144   ArrayRef<uint8_t> FpmBytes;
145   if (auto EC = FpmReader.readBytes(FpmBytes,
146                                     msf::getFullFpmByteSize(ContainerLayout)))
147     return EC;
148   uint32_t BlocksRemaining = getBlockCount();
149   uint32_t BI = 0;
150   for (auto Byte : FpmBytes) {
151     uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
152     for (uint32_t I = 0; I < BlocksThisByte; ++I) {
153       if (Byte & (1 << I))
154         ContainerLayout.FreePageMap[BI] = true;
155       --BlocksRemaining;
156       ++BI;
157     }
158   }
159 
160   Reader.setOffset(getBlockMapOffset());
161   if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
162                                  getNumDirectoryBlocks()))
163     return EC;
164 
165   return Error::success();
166 }
167 
168 Error PDBFile::parseStreamData() {
169   assert(ContainerLayout.SB);
170   if (DirectoryStream)
171     return Error::success();
172 
173   uint32_t NumStreams = 0;
174 
175   // Normally you can't use a MappedBlockStream without having fully parsed the
176   // PDB file, because it accesses the directory and various other things, which
177   // is exactly what we are attempting to parse.  By specifying a custom
178   // subclass of IPDBStreamData which only accesses the fields that have already
179   // been parsed, we can avoid this and reuse MappedBlockStream.
180   auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer);
181   StreamReader Reader(*DS);
182   if (auto EC = Reader.readInteger(NumStreams))
183     return EC;
184 
185   if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
186     return EC;
187   for (uint32_t I = 0; I < NumStreams; ++I) {
188     uint32_t StreamSize = getStreamByteSize(I);
189     // FIXME: What does StreamSize ~0U mean?
190     uint64_t NumExpectedStreamBlocks =
191         StreamSize == UINT32_MAX
192             ? 0
193             : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
194 
195     // For convenience, we store the block array contiguously.  This is because
196     // if someone calls setStreamMap(), it is more convenient to be able to call
197     // it with an ArrayRef instead of setting up a StreamRef.  Since the
198     // DirectoryStream is cached in the class and thus lives for the life of the
199     // class, we can be guaranteed that readArray() will return a stable
200     // reference, even if it has to allocate from its internal pool.
201     ArrayRef<support::ulittle32_t> Blocks;
202     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
203       return EC;
204     for (uint32_t Block : Blocks) {
205       uint64_t BlockEndOffset =
206           (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
207       if (BlockEndOffset > getFileSize())
208         return make_error<RawError>(raw_error_code::corrupt_file,
209                                     "Stream block map is corrupt.");
210     }
211     ContainerLayout.StreamMap.push_back(Blocks);
212   }
213 
214   // We should have read exactly SB->NumDirectoryBytes bytes.
215   assert(Reader.bytesRemaining() == 0);
216   DirectoryStream = std::move(DS);
217   return Error::success();
218 }
219 
220 ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
221   return ContainerLayout.DirectoryBlocks;
222 }
223 
224 Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
225   if (!Globals) {
226     auto DbiS = getPDBDbiStream();
227     if (!DbiS)
228       return DbiS.takeError();
229 
230     auto GlobalS = safelyCreateIndexedStream(
231         ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
232     if (!GlobalS)
233       return GlobalS.takeError();
234     auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
235     if (auto EC = TempGlobals->reload())
236       return std::move(EC);
237     Globals = std::move(TempGlobals);
238   }
239   return *Globals;
240 }
241 
242 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
243   if (!Info) {
244     auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
245     if (!InfoS)
246       return InfoS.takeError();
247     auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
248     if (auto EC = TempInfo->reload())
249       return std::move(EC);
250     Info = std::move(TempInfo);
251   }
252   return *Info;
253 }
254 
255 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
256   if (!Dbi) {
257     auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
258     if (!DbiS)
259       return DbiS.takeError();
260     auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
261     if (auto EC = TempDbi->reload())
262       return std::move(EC);
263     Dbi = std::move(TempDbi);
264   }
265   return *Dbi;
266 }
267 
268 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
269   if (!Tpi) {
270     auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
271     if (!TpiS)
272       return TpiS.takeError();
273     auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
274     if (auto EC = TempTpi->reload())
275       return std::move(EC);
276     Tpi = std::move(TempTpi);
277   }
278   return *Tpi;
279 }
280 
281 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
282   if (!Ipi) {
283     auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
284     if (!IpiS)
285       return IpiS.takeError();
286     auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
287     if (auto EC = TempIpi->reload())
288       return std::move(EC);
289     Ipi = std::move(TempIpi);
290   }
291   return *Ipi;
292 }
293 
294 Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
295   if (!Publics) {
296     auto DbiS = getPDBDbiStream();
297     if (!DbiS)
298       return DbiS.takeError();
299 
300     auto PublicS = safelyCreateIndexedStream(
301         ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
302     if (!PublicS)
303       return PublicS.takeError();
304     auto TempPublics =
305         llvm::make_unique<PublicsStream>(*this, std::move(*PublicS));
306     if (auto EC = TempPublics->reload())
307       return std::move(EC);
308     Publics = std::move(TempPublics);
309   }
310   return *Publics;
311 }
312 
313 Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
314   if (!Symbols) {
315     auto DbiS = getPDBDbiStream();
316     if (!DbiS)
317       return DbiS.takeError();
318 
319     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
320     auto SymbolS =
321         safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
322     if (!SymbolS)
323       return SymbolS.takeError();
324 
325     auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
326     if (auto EC = TempSymbols->reload())
327       return std::move(EC);
328     Symbols = std::move(TempSymbols);
329   }
330   return *Symbols;
331 }
332 
333 Expected<StringTable &> PDBFile::getStringTable() {
334   if (!Strings || !StringTableStream) {
335     auto IS = getPDBInfoStream();
336     if (!IS)
337       return IS.takeError();
338 
339     uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names");
340 
341     auto NS =
342         safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
343     if (!NS)
344       return NS.takeError();
345 
346     StreamReader Reader(**NS);
347     auto N = llvm::make_unique<StringTable>();
348     if (auto EC = N->load(Reader))
349       return std::move(EC);
350     Strings = std::move(N);
351     StringTableStream = std::move(*NS);
352   }
353   return *Strings;
354 }
355 
356 bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
357 
358 bool PDBFile::hasPDBGlobalsStream() {
359   auto DbiS = getPDBDbiStream();
360   if (!DbiS)
361     return false;
362   return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
363 }
364 
365 bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); }
366 
367 bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
368 
369 bool PDBFile::hasPDBPublicsStream() {
370   auto DbiS = getPDBDbiStream();
371   if (!DbiS)
372     return false;
373   return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
374 }
375 
376 bool PDBFile::hasPDBSymbolStream() {
377   auto DbiS = getPDBDbiStream();
378   if (!DbiS)
379     return false;
380   return DbiS->getSymRecordStreamIndex() < getNumStreams();
381 }
382 
383 bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
384 
385 bool PDBFile::hasStringTable() {
386   auto IS = getPDBInfoStream();
387   if (!IS)
388     return false;
389   return IS->getNamedStreamIndex("/names") < getNumStreams();
390 }
391 
392 /// Wrapper around MappedBlockStream::createIndexedStream()
393 /// that checks if a stream with that index actually exists.
394 /// If it does not, the return value will have an MSFError with
395 /// code msf_error_code::no_stream. Else, the return value will
396 /// contain the stream returned by createIndexedStream().
397 Expected<std::unique_ptr<MappedBlockStream>>
398 PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
399                                    const ReadableStream &MsfData,
400                                    uint32_t StreamIndex) const {
401   if (StreamIndex >= getNumStreams())
402     return make_error<RawError>(raw_error_code::no_stream);
403   return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex);
404 }
405