1e204a6c9SZachary Turner //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2e204a6c9SZachary Turner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e204a6c9SZachary Turner //
7e204a6c9SZachary Turner //===----------------------------------------------------------------------===//
8e204a6c9SZachary Turner 
9e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10c504ae3cSZachary Turner 
11e204a6c9SZachary Turner #include "llvm/ADT/ArrayRef.h"
12e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/Hash.h"
13e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14e204a6c9SZachary Turner #include "llvm/Support/BinaryStreamWriter.h"
15e204a6c9SZachary Turner #include "llvm/Support/Endian.h"
16e204a6c9SZachary Turner 
17a6fb536eSZachary Turner #include <map>
18a6fb536eSZachary Turner 
19e204a6c9SZachary Turner using namespace llvm;
20c504ae3cSZachary Turner using namespace llvm::msf;
21e204a6c9SZachary Turner using namespace llvm::support;
22e204a6c9SZachary Turner using namespace llvm::support::endian;
23e204a6c9SZachary Turner using namespace llvm::pdb;
24e204a6c9SZachary Turner 
25f2282762SZachary Turner StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
26f2282762SZachary Turner     : Table(&Table) {}
27f2282762SZachary Turner 
28f2282762SZachary Turner uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
29*e577be4eSNico Weber   // The reference implementation doesn't include code for /src/headerblock
30*e577be4eSNico Weber   // handling, but it can only read natvis entries lld's PDB files if
31*e577be4eSNico Weber   // this hash function truncates the hash to 16 bit.
32*e577be4eSNico Weber   // PDB/include/misc.h in the reference implementation has a hashSz() function
33*e577be4eSNico Weber   // that returns an unsigned short, that seems what's being used for
34*e577be4eSNico Weber   // /src/headerblock.
35*e577be4eSNico Weber   return static_cast<uint16_t>(Table->getIdForString(S));
36f2282762SZachary Turner }
37f2282762SZachary Turner 
38f2282762SZachary Turner StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
39f2282762SZachary Turner   return Table->getStringForId(Offset);
40f2282762SZachary Turner }
41f2282762SZachary Turner 
42f2282762SZachary Turner uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
43f2282762SZachary Turner   return Table->insert(S);
44f2282762SZachary Turner }
45f2282762SZachary Turner 
46e204a6c9SZachary Turner uint32_t PDBStringTableBuilder::insert(StringRef S) {
47c504ae3cSZachary Turner   return Strings.insert(S);
48e204a6c9SZachary Turner }
49e204a6c9SZachary Turner 
5071d36ad9SZachary Turner uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
5171d36ad9SZachary Turner   return Strings.getIdForString(S);
5271d36ad9SZachary Turner }
5371d36ad9SZachary Turner 
5471d36ad9SZachary Turner StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
5571d36ad9SZachary Turner   return Strings.getStringForId(Id);
5671d36ad9SZachary Turner }
5771d36ad9SZachary Turner 
5885e2cdacSReid Kleckner static uint32_t computeBucketCount(uint32_t NumStrings) {
59a6fb536eSZachary Turner   // This is a precomputed list of Buckets given the specified number of
60a6fb536eSZachary Turner   // strings.  Matching the reference algorithm exactly is not strictly
61a6fb536eSZachary Turner   // necessary for correctness, but it helps when comparing LLD's PDBs with
62a6fb536eSZachary Turner   // Microsoft's PDBs so as to eliminate superfluous differences.
63a6fb536eSZachary Turner   static std::map<uint32_t, uint32_t> StringsToBuckets = {
64a6fb536eSZachary Turner       {1, 2},
65a6fb536eSZachary Turner       {2, 4},
66a6fb536eSZachary Turner       {4, 7},
67a6fb536eSZachary Turner       {6, 11},
68a6fb536eSZachary Turner       {9, 17},
69a6fb536eSZachary Turner       {13, 26},
70a6fb536eSZachary Turner       {20, 40},
71a6fb536eSZachary Turner       {31, 61},
72a6fb536eSZachary Turner       {46, 92},
73a6fb536eSZachary Turner       {70, 139},
74a6fb536eSZachary Turner       {105, 209},
75a6fb536eSZachary Turner       {157, 314},
76a6fb536eSZachary Turner       {236, 472},
77a6fb536eSZachary Turner       {355, 709},
78a6fb536eSZachary Turner       {532, 1064},
79a6fb536eSZachary Turner       {799, 1597},
80a6fb536eSZachary Turner       {1198, 2396},
81a6fb536eSZachary Turner       {1798, 3595},
82a6fb536eSZachary Turner       {2697, 5393},
83a6fb536eSZachary Turner       {4045, 8090},
84a6fb536eSZachary Turner       {6068, 12136},
85a6fb536eSZachary Turner       {9103, 18205},
86a6fb536eSZachary Turner       {13654, 27308},
87a6fb536eSZachary Turner       {20482, 40963},
88a6fb536eSZachary Turner       {30723, 61445},
89a6fb536eSZachary Turner       {46084, 92168},
90a6fb536eSZachary Turner       {69127, 138253},
91a6fb536eSZachary Turner       {103690, 207380},
92a6fb536eSZachary Turner       {155536, 311071},
93a6fb536eSZachary Turner       {233304, 466607},
94a6fb536eSZachary Turner       {349956, 699911},
95a6fb536eSZachary Turner       {524934, 1049867},
96a6fb536eSZachary Turner       {787401, 1574801},
97a6fb536eSZachary Turner       {1181101, 2362202},
98a6fb536eSZachary Turner       {1771652, 3543304},
99a6fb536eSZachary Turner       {2657479, 5314957},
100a6fb536eSZachary Turner       {3986218, 7972436},
101a6fb536eSZachary Turner       {5979328, 11958655},
102a6fb536eSZachary Turner       {8968992, 17937983},
103a6fb536eSZachary Turner       {13453488, 26906975},
104a6fb536eSZachary Turner       {20180232, 40360463},
105a6fb536eSZachary Turner       {30270348, 60540695},
106a6fb536eSZachary Turner       {45405522, 90811043},
107a6fb536eSZachary Turner       {68108283, 136216565},
108a6fb536eSZachary Turner       {102162424, 204324848},
109a6fb536eSZachary Turner       {153243637, 306487273},
110a6fb536eSZachary Turner       {229865455, 459730910},
111a6fb536eSZachary Turner       {344798183, 689596366},
112a6fb536eSZachary Turner       {517197275, 1034394550},
113a6fb536eSZachary Turner       {775795913, 1551591826}};
114a6fb536eSZachary Turner   auto Entry = StringsToBuckets.lower_bound(NumStrings);
115a6fb536eSZachary Turner   assert(Entry != StringsToBuckets.end());
116a6fb536eSZachary Turner   return Entry->second;
117e204a6c9SZachary Turner }
118e204a6c9SZachary Turner 
119c504ae3cSZachary Turner uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
120c504ae3cSZachary Turner   uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
121c504ae3cSZachary Turner   Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
122dff096f2SDaniel Jasper 
1237dba20bdSZachary Turner   return Size;
1247dba20bdSZachary Turner }
1257dba20bdSZachary Turner 
126c504ae3cSZachary Turner uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
127c504ae3cSZachary Turner   uint32_t Size = 0;
128c504ae3cSZachary Turner   Size += sizeof(PDBStringTableHeader);
129c504ae3cSZachary Turner   Size += Strings.calculateSerializedSize();
130c504ae3cSZachary Turner   Size += calculateHashTableSize();
131c504ae3cSZachary Turner   Size += sizeof(uint32_t); // The /names stream ends with the string count.
132c504ae3cSZachary Turner   return Size;
133c504ae3cSZachary Turner }
134c504ae3cSZachary Turner 
135a8cfc29cSZachary Turner void PDBStringTableBuilder::setStrings(
136a8cfc29cSZachary Turner     const codeview::DebugStringTableSubsection &Strings) {
137a8cfc29cSZachary Turner   this->Strings = Strings;
138a8cfc29cSZachary Turner }
139a8cfc29cSZachary Turner 
140c504ae3cSZachary Turner Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
141e204a6c9SZachary Turner   // Write a header
142e204a6c9SZachary Turner   PDBStringTableHeader H;
143e204a6c9SZachary Turner   H.Signature = PDBStringTableSignature;
144e204a6c9SZachary Turner   H.HashVersion = 1;
145c504ae3cSZachary Turner   H.ByteSize = Strings.calculateSerializedSize();
146e204a6c9SZachary Turner   if (auto EC = Writer.writeObject(H))
147e204a6c9SZachary Turner     return EC;
148c504ae3cSZachary Turner   assert(Writer.bytesRemaining() == 0);
149c504ae3cSZachary Turner   return Error::success();
1507dba20bdSZachary Turner }
1517dba20bdSZachary Turner 
152c504ae3cSZachary Turner Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
153c504ae3cSZachary Turner   if (auto EC = Strings.commit(Writer))
154c504ae3cSZachary Turner     return EC;
155c504ae3cSZachary Turner 
156c504ae3cSZachary Turner   assert(Writer.bytesRemaining() == 0);
157c504ae3cSZachary Turner   return Error::success();
158c504ae3cSZachary Turner }
159c504ae3cSZachary Turner 
160c504ae3cSZachary Turner Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
161e204a6c9SZachary Turner   // Write a hash table.
162e204a6c9SZachary Turner   uint32_t BucketCount = computeBucketCount(Strings.size());
163e204a6c9SZachary Turner   if (auto EC = Writer.writeInteger(BucketCount))
164e204a6c9SZachary Turner     return EC;
165e204a6c9SZachary Turner   std::vector<ulittle32_t> Buckets(BucketCount);
166e204a6c9SZachary Turner 
167c504ae3cSZachary Turner   for (auto &Pair : Strings) {
168c504ae3cSZachary Turner     StringRef S = Pair.getKey();
169c504ae3cSZachary Turner     uint32_t Offset = Pair.getValue();
170e204a6c9SZachary Turner     uint32_t Hash = hashStringV1(S);
171e204a6c9SZachary Turner 
172e204a6c9SZachary Turner     for (uint32_t I = 0; I != BucketCount; ++I) {
173e204a6c9SZachary Turner       uint32_t Slot = (Hash + I) % BucketCount;
174e204a6c9SZachary Turner       if (Buckets[Slot] != 0)
175e204a6c9SZachary Turner         continue;
176e204a6c9SZachary Turner       Buckets[Slot] = Offset;
177e204a6c9SZachary Turner       break;
178e204a6c9SZachary Turner     }
179e204a6c9SZachary Turner   }
180e204a6c9SZachary Turner 
181e204a6c9SZachary Turner   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
182e204a6c9SZachary Turner     return EC;
183c504ae3cSZachary Turner 
184c504ae3cSZachary Turner   assert(Writer.bytesRemaining() == 0);
185c504ae3cSZachary Turner   return Error::success();
186c504ae3cSZachary Turner }
187c504ae3cSZachary Turner 
188c504ae3cSZachary Turner Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
189c504ae3cSZachary Turner   if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
190e204a6c9SZachary Turner     return EC;
191c504ae3cSZachary Turner   assert(Writer.bytesRemaining() == 0);
192c504ae3cSZachary Turner   return Error::success();
193c504ae3cSZachary Turner }
194c504ae3cSZachary Turner 
195c504ae3cSZachary Turner Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
196c504ae3cSZachary Turner   BinaryStreamWriter SectionWriter;
197c504ae3cSZachary Turner 
198c504ae3cSZachary Turner   std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
199c504ae3cSZachary Turner   if (auto EC = writeHeader(SectionWriter))
200c504ae3cSZachary Turner     return EC;
201c504ae3cSZachary Turner 
202c504ae3cSZachary Turner   std::tie(SectionWriter, Writer) =
203c504ae3cSZachary Turner       Writer.split(Strings.calculateSerializedSize());
204c504ae3cSZachary Turner   if (auto EC = writeStrings(SectionWriter))
205c504ae3cSZachary Turner     return EC;
206c504ae3cSZachary Turner 
207c504ae3cSZachary Turner   std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
208c504ae3cSZachary Turner   if (auto EC = writeHashTable(SectionWriter))
209c504ae3cSZachary Turner     return EC;
210c504ae3cSZachary Turner 
211c504ae3cSZachary Turner   std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
212c504ae3cSZachary Turner   if (auto EC = writeEpilogue(SectionWriter))
213c504ae3cSZachary Turner     return EC;
214c504ae3cSZachary Turner 
215e204a6c9SZachary Turner   return Error::success();
216e204a6c9SZachary Turner }
217