1 //===- DbiStreamBuilder.cpp - PDB Dbi Stream Creation -----------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
11 
12 #include "llvm/DebugInfo/CodeView/RecordName.h"
13 #include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
14 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
15 #include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
16 #include "llvm/DebugInfo/MSF/MSFBuilder.h"
17 #include "llvm/DebugInfo/MSF/MSFCommon.h"
18 #include "llvm/DebugInfo/MSF/MappedBlockStream.h"
19 #include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
20 #include "llvm/DebugInfo/PDB/Native/Hash.h"
21 #include "llvm/Support/BinaryItemStream.h"
22 #include "llvm/Support/BinaryStreamWriter.h"
23 #include <algorithm>
24 #include <vector>
25 
26 using namespace llvm;
27 using namespace llvm::msf;
28 using namespace llvm::pdb;
29 using namespace llvm::codeview;
30 
31 struct llvm::pdb::GSIHashStreamBuilder {
32   std::vector<CVSymbol> Records;
33   uint32_t StreamIndex;
34   std::vector<PSHashRecord> HashRecords;
35   std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
36   std::vector<support::ulittle32_t> HashBuckets;
37 
38   uint32_t calculateSerializedLength() const;
39   uint32_t calculateRecordByteSize() const;
40   Error commit(BinaryStreamWriter &Writer);
41   void finalizeBuckets(uint32_t RecordZeroOffset);
42 
43   template <typename T> void addSymbol(const T &Symbol, MSFBuilder &Msf) {
44     T Copy(Symbol);
45     Records.push_back(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
46                                                        CodeViewContainer::Pdb));
47   }
48   void addSymbol(const CVSymbol &Symbol) { Records.push_back(Symbol); }
49 };
50 
51 uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
52   uint32_t Size = sizeof(GSIHashHeader);
53   Size += HashRecords.size() * sizeof(PSHashRecord);
54   Size += HashBitmap.size() * sizeof(uint32_t);
55   Size += HashBuckets.size() * sizeof(uint32_t);
56   return Size;
57 }
58 
59 uint32_t GSIHashStreamBuilder::calculateRecordByteSize() const {
60   uint32_t Size = 0;
61   for (const auto &Sym : Records)
62     Size += Sym.length();
63   return Size;
64 }
65 
66 Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
67   GSIHashHeader Header;
68   Header.VerSignature = GSIHashHeader::HdrSignature;
69   Header.VerHdr = GSIHashHeader::HdrVersion;
70   Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
71   Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
72 
73   if (auto EC = Writer.writeObject(Header))
74     return EC;
75 
76   if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
77     return EC;
78   if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
79     return EC;
80   if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
81     return EC;
82   return Error::success();
83 }
84 
85 static bool isAsciiString(StringRef S) {
86   return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
87 }
88 
89 // See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
90 static bool gsiRecordLess(StringRef S1, StringRef S2) {
91   size_t LS = S1.size();
92   size_t RS = S2.size();
93   // Shorter strings always compare less than longer strings.
94   if (LS != RS)
95     return LS < RS;
96 
97   // If either string contains non ascii characters, memcmp them.
98   if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
99     return memcmp(S1.data(), S2.data(), LS) < 0;
100 
101   // Both strings are ascii, perform a case-insenstive comparison.
102   return S1.compare_lower(S2.data()) < 0;
103 }
104 
105 void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
106   std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
107       TmpBuckets;
108   uint32_t SymOffset = RecordZeroOffset;
109   for (const CVSymbol &Sym : Records) {
110     PSHashRecord HR;
111     // Add one when writing symbol offsets to disk. See GSI1::fixSymRecs.
112     HR.Off = SymOffset + 1;
113     HR.CRef = 1; // Always use a refcount of 1.
114 
115     // Hash the name to figure out which bucket this goes into.
116     StringRef Name = getSymbolName(Sym);
117     size_t BucketIdx = hashStringV1(Name) % IPHR_HASH;
118     TmpBuckets[BucketIdx].push_back(std::make_pair(Name, HR));
119     SymOffset += Sym.length();
120   }
121 
122   // Compute the three tables: the hash records in bucket and chain order, the
123   // bucket presence bitmap, and the bucket chain start offsets.
124   HashRecords.reserve(Records.size());
125   for (ulittle32_t &Word : HashBitmap)
126     Word = 0;
127   for (size_t BucketIdx = 0; BucketIdx < IPHR_HASH + 1; ++BucketIdx) {
128     auto &Bucket = TmpBuckets[BucketIdx];
129     if (Bucket.empty())
130       continue;
131     HashBitmap[BucketIdx / 32] |= 1U << (BucketIdx % 32);
132 
133     // Calculate what the offset of the first hash record in the chain would
134     // be if it were inflated to contain 32-bit pointers. On a 32-bit system,
135     // each record would be 12 bytes. See HROffsetCalc in gsi.h.
136     const int SizeOfHROffsetCalc = 12;
137     ulittle32_t ChainStartOff =
138         ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
139     HashBuckets.push_back(ChainStartOff);
140 
141     // Sort each bucket by memcmp of the symbol's name.  It's important that
142     // we use the same sorting algorithm as is used by the reference
143     // implementation to ensure that the search for a record within a bucket
144     // can properly early-out when it detects the record won't be found.  The
145     // algorithm used here corredsponds to the function
146     // caseInsensitiveComparePchPchCchCch in the reference implementation.
147     llvm::sort(Bucket, [](const std::pair<StringRef, PSHashRecord> &Left,
148                           const std::pair<StringRef, PSHashRecord> &Right) {
149       return gsiRecordLess(Left.first, Right.first);
150     });
151 
152     for (const auto &Entry : Bucket)
153       HashRecords.push_back(Entry.second);
154   }
155 }
156 
157 GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
158     : Msf(Msf), PSH(llvm::make_unique<GSIHashStreamBuilder>()),
159       GSH(llvm::make_unique<GSIHashStreamBuilder>()) {}
160 
161 GSIStreamBuilder::~GSIStreamBuilder() {}
162 
163 uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
164   uint32_t Size = 0;
165   Size += sizeof(PublicsStreamHeader);
166   Size += PSH->calculateSerializedLength();
167   Size += PSH->Records.size() * sizeof(uint32_t); // AddrMap
168   // FIXME: Add thunk map and section offsets for incremental linking.
169 
170   return Size;
171 }
172 
173 uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
174   return GSH->calculateSerializedLength();
175 }
176 
177 Error GSIStreamBuilder::finalizeMsfLayout() {
178   // First we write public symbol records, then we write global symbol records.
179   uint32_t PSHZero = 0;
180   uint32_t GSHZero = PSH->calculateRecordByteSize();
181 
182   PSH->finalizeBuckets(PSHZero);
183   GSH->finalizeBuckets(GSHZero);
184 
185   Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
186   if (!Idx)
187     return Idx.takeError();
188   GSH->StreamIndex = *Idx;
189   Idx = Msf.addStream(calculatePublicsHashStreamSize());
190   if (!Idx)
191     return Idx.takeError();
192   PSH->StreamIndex = *Idx;
193 
194   uint32_t RecordBytes =
195       GSH->calculateRecordByteSize() + PSH->calculateRecordByteSize();
196 
197   Idx = Msf.addStream(RecordBytes);
198   if (!Idx)
199     return Idx.takeError();
200   RecordStreamIdx = *Idx;
201   return Error::success();
202 }
203 
204 static bool comparePubSymByAddrAndName(
205     const std::pair<const CVSymbol *, const PublicSym32 *> &LS,
206     const std::pair<const CVSymbol *, const PublicSym32 *> &RS) {
207   if (LS.second->Segment != RS.second->Segment)
208     return LS.second->Segment < RS.second->Segment;
209   if (LS.second->Offset != RS.second->Offset)
210     return LS.second->Offset < RS.second->Offset;
211 
212   return LS.second->Name < RS.second->Name;
213 }
214 
215 /// Compute the address map. The address map is an array of symbol offsets
216 /// sorted so that it can be binary searched by address.
217 static std::vector<ulittle32_t> computeAddrMap(ArrayRef<CVSymbol> Records) {
218   // Make a vector of pointers to the symbols so we can sort it by address.
219   // Also gather the symbol offsets while we're at it.
220 
221   std::vector<PublicSym32> DeserializedPublics;
222   std::vector<std::pair<const CVSymbol *, const PublicSym32 *>> PublicsByAddr;
223   std::vector<uint32_t> SymOffsets;
224   DeserializedPublics.reserve(Records.size());
225   PublicsByAddr.reserve(Records.size());
226   SymOffsets.reserve(Records.size());
227 
228   uint32_t SymOffset = 0;
229   for (const CVSymbol &Sym : Records) {
230     assert(Sym.kind() == SymbolKind::S_PUB32);
231     DeserializedPublics.push_back(
232         cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(Sym)));
233     PublicsByAddr.emplace_back(&Sym, &DeserializedPublics.back());
234     SymOffsets.push_back(SymOffset);
235     SymOffset += Sym.length();
236   }
237   std::stable_sort(PublicsByAddr.begin(), PublicsByAddr.end(),
238                    comparePubSymByAddrAndName);
239 
240   // Fill in the symbol offsets in the appropriate order.
241   std::vector<ulittle32_t> AddrMap;
242   AddrMap.reserve(Records.size());
243   for (auto &Sym : PublicsByAddr) {
244     ptrdiff_t Idx = std::distance(Records.data(), Sym.first);
245     assert(Idx >= 0 && size_t(Idx) < Records.size());
246     AddrMap.push_back(ulittle32_t(SymOffsets[Idx]));
247   }
248   return AddrMap;
249 }
250 
251 uint32_t GSIStreamBuilder::getPublicsStreamIndex() const {
252   return PSH->StreamIndex;
253 }
254 
255 uint32_t GSIStreamBuilder::getGlobalsStreamIndex() const {
256   return GSH->StreamIndex;
257 }
258 
259 void GSIStreamBuilder::addPublicSymbol(const PublicSym32 &Pub) {
260   PSH->addSymbol(Pub, Msf);
261 }
262 
263 void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
264   GSH->addSymbol(Sym, Msf);
265 }
266 
267 void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
268   GSH->addSymbol(Sym, Msf);
269 }
270 
271 void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
272   GSH->addSymbol(Sym, Msf);
273 }
274 
275 void GSIStreamBuilder::addGlobalSymbol(const UDTSym &Sym) {
276   GSH->addSymbol(Sym, Msf);
277 }
278 
279 void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Sym) {
280   GSH->addSymbol(Sym);
281 }
282 
283 static Error writeRecords(BinaryStreamWriter &Writer,
284                           ArrayRef<CVSymbol> Records) {
285   BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
286   ItemStream.setItems(Records);
287   BinaryStreamRef RecordsRef(ItemStream);
288   return Writer.writeStreamRef(RecordsRef);
289 }
290 
291 Error GSIStreamBuilder::commitSymbolRecordStream(
292     WritableBinaryStreamRef Stream) {
293   BinaryStreamWriter Writer(Stream);
294 
295   // Write public symbol records first, followed by global symbol records.  This
296   // must match the order that we assume in finalizeMsfLayout when computing
297   // PSHZero and GSHZero.
298   if (auto EC = writeRecords(Writer, PSH->Records))
299     return EC;
300   if (auto EC = writeRecords(Writer, GSH->Records))
301     return EC;
302 
303   return Error::success();
304 }
305 
306 Error GSIStreamBuilder::commitPublicsHashStream(
307     WritableBinaryStreamRef Stream) {
308   BinaryStreamWriter Writer(Stream);
309   PublicsStreamHeader Header;
310 
311   // FIXME: Fill these in. They are for incremental linking.
312   Header.SymHash = PSH->calculateSerializedLength();
313   Header.AddrMap = PSH->Records.size() * 4;
314   Header.NumThunks = 0;
315   Header.SizeOfThunk = 0;
316   Header.ISectThunkTable = 0;
317   memset(Header.Padding, 0, sizeof(Header.Padding));
318   Header.OffThunkTable = 0;
319   Header.NumSections = 0;
320   if (auto EC = Writer.writeObject(Header))
321     return EC;
322 
323   if (auto EC = PSH->commit(Writer))
324     return EC;
325 
326   std::vector<ulittle32_t> AddrMap = computeAddrMap(PSH->Records);
327   if (auto EC = Writer.writeArray(makeArrayRef(AddrMap)))
328     return EC;
329 
330   return Error::success();
331 }
332 
333 Error GSIStreamBuilder::commitGlobalsHashStream(
334     WritableBinaryStreamRef Stream) {
335   BinaryStreamWriter Writer(Stream);
336   return GSH->commit(Writer);
337 }
338 
339 Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
340                                WritableBinaryStreamRef Buffer) {
341   auto GS = WritableMappedBlockStream::createIndexedStream(
342       Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
343   auto PS = WritableMappedBlockStream::createIndexedStream(
344       Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
345   auto PRS = WritableMappedBlockStream::createIndexedStream(
346       Layout, Buffer, getRecordStreamIdx(), Msf.getAllocator());
347 
348   if (auto EC = commitSymbolRecordStream(*PRS))
349     return EC;
350   if (auto EC = commitGlobalsHashStream(*GS))
351     return EC;
352   if (auto EC = commitPublicsHashStream(*PS))
353     return EC;
354   return Error::success();
355 }
356