1e204a6c9SZachary Turner //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2e204a6c9SZachary Turner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e204a6c9SZachary Turner //
7e204a6c9SZachary Turner //===----------------------------------------------------------------------===//
8e204a6c9SZachary Turner
9e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
10c504ae3cSZachary Turner
11e204a6c9SZachary Turner #include "llvm/ADT/ArrayRef.h"
12e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/Hash.h"
13e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14e204a6c9SZachary Turner #include "llvm/Support/BinaryStreamWriter.h"
15e204a6c9SZachary Turner #include "llvm/Support/Endian.h"
16e204a6c9SZachary Turner
17a6fb536eSZachary Turner #include <map>
18a6fb536eSZachary Turner
19e204a6c9SZachary Turner using namespace llvm;
20c504ae3cSZachary Turner using namespace llvm::msf;
21e204a6c9SZachary Turner using namespace llvm::support;
22e204a6c9SZachary Turner using namespace llvm::support::endian;
23e204a6c9SZachary Turner using namespace llvm::pdb;
24e204a6c9SZachary Turner
StringTableHashTraits(PDBStringTableBuilder & Table)25f2282762SZachary Turner StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
26f2282762SZachary Turner : Table(&Table) {}
27f2282762SZachary Turner
hashLookupKey(StringRef S) const28f2282762SZachary Turner uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
29e577be4eSNico Weber // The reference implementation doesn't include code for /src/headerblock
30e577be4eSNico Weber // handling, but it can only read natvis entries lld's PDB files if
31e577be4eSNico Weber // this hash function truncates the hash to 16 bit.
32e577be4eSNico Weber // PDB/include/misc.h in the reference implementation has a hashSz() function
33e577be4eSNico Weber // that returns an unsigned short, that seems what's being used for
34e577be4eSNico Weber // /src/headerblock.
35e577be4eSNico Weber return static_cast<uint16_t>(Table->getIdForString(S));
36f2282762SZachary Turner }
37f2282762SZachary Turner
storageKeyToLookupKey(uint32_t Offset) const38f2282762SZachary Turner StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
39f2282762SZachary Turner return Table->getStringForId(Offset);
40f2282762SZachary Turner }
41f2282762SZachary Turner
lookupKeyToStorageKey(StringRef S)42f2282762SZachary Turner uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
43f2282762SZachary Turner return Table->insert(S);
44f2282762SZachary Turner }
45f2282762SZachary Turner
insert(StringRef S)46e204a6c9SZachary Turner uint32_t PDBStringTableBuilder::insert(StringRef S) {
47c504ae3cSZachary Turner return Strings.insert(S);
48e204a6c9SZachary Turner }
49e204a6c9SZachary Turner
getIdForString(StringRef S) const5071d36ad9SZachary Turner uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
5171d36ad9SZachary Turner return Strings.getIdForString(S);
5271d36ad9SZachary Turner }
5371d36ad9SZachary Turner
getStringForId(uint32_t Id) const5471d36ad9SZachary Turner StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
5571d36ad9SZachary Turner return Strings.getStringForId(Id);
5671d36ad9SZachary Turner }
5771d36ad9SZachary Turner
computeBucketCount(uint32_t NumStrings)5885e2cdacSReid Kleckner static uint32_t computeBucketCount(uint32_t NumStrings) {
59a6fb536eSZachary Turner // This is a precomputed list of Buckets given the specified number of
60a6fb536eSZachary Turner // strings. Matching the reference algorithm exactly is not strictly
61a6fb536eSZachary Turner // necessary for correctness, but it helps when comparing LLD's PDBs with
62a6fb536eSZachary Turner // Microsoft's PDBs so as to eliminate superfluous differences.
63ac6375d9SNico Weber // The reference implementation does (in nmt.h, NMT::grow()):
64ac6375d9SNico Weber // unsigned StringCount = 0;
65ac6375d9SNico Weber // unsigned BucketCount = 1;
66ac6375d9SNico Weber // fn insert() {
67ac6375d9SNico Weber // ++StringCount;
68ac6375d9SNico Weber // if (BucketCount * 3 / 4 < StringCount)
69ac6375d9SNico Weber // BucketCount = BucketCount * 3 / 2 + 1;
70ac6375d9SNico Weber // }
71ac6375d9SNico Weber // This list contains all StringCount, BucketCount pairs where BucketCount was
72ac6375d9SNico Weber // just incremented. It ends before the first BucketCount entry where
73ac6375d9SNico Weber // BucketCount * 3 would overflow a 32-bit unsigned int.
74*fbce4a78SBenjamin Kramer static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
75ac6375d9SNico Weber {0, 1},
76a6fb536eSZachary Turner {1, 2},
77a6fb536eSZachary Turner {2, 4},
78a6fb536eSZachary Turner {4, 7},
79a6fb536eSZachary Turner {6, 11},
80a6fb536eSZachary Turner {9, 17},
81a6fb536eSZachary Turner {13, 26},
82a6fb536eSZachary Turner {20, 40},
83a6fb536eSZachary Turner {31, 61},
84a6fb536eSZachary Turner {46, 92},
85a6fb536eSZachary Turner {70, 139},
86a6fb536eSZachary Turner {105, 209},
87a6fb536eSZachary Turner {157, 314},
88a6fb536eSZachary Turner {236, 472},
89a6fb536eSZachary Turner {355, 709},
90a6fb536eSZachary Turner {532, 1064},
91a6fb536eSZachary Turner {799, 1597},
92a6fb536eSZachary Turner {1198, 2396},
93a6fb536eSZachary Turner {1798, 3595},
94a6fb536eSZachary Turner {2697, 5393},
95a6fb536eSZachary Turner {4045, 8090},
96a6fb536eSZachary Turner {6068, 12136},
97a6fb536eSZachary Turner {9103, 18205},
98a6fb536eSZachary Turner {13654, 27308},
99a6fb536eSZachary Turner {20482, 40963},
100a6fb536eSZachary Turner {30723, 61445},
101a6fb536eSZachary Turner {46084, 92168},
102a6fb536eSZachary Turner {69127, 138253},
103a6fb536eSZachary Turner {103690, 207380},
104a6fb536eSZachary Turner {155536, 311071},
105a6fb536eSZachary Turner {233304, 466607},
106a6fb536eSZachary Turner {349956, 699911},
107a6fb536eSZachary Turner {524934, 1049867},
108a6fb536eSZachary Turner {787401, 1574801},
109a6fb536eSZachary Turner {1181101, 2362202},
110a6fb536eSZachary Turner {1771652, 3543304},
111a6fb536eSZachary Turner {2657479, 5314957},
112a6fb536eSZachary Turner {3986218, 7972436},
113a6fb536eSZachary Turner {5979328, 11958655},
114a6fb536eSZachary Turner {8968992, 17937983},
115a6fb536eSZachary Turner {13453488, 26906975},
116a6fb536eSZachary Turner {20180232, 40360463},
117a6fb536eSZachary Turner {30270348, 60540695},
118a6fb536eSZachary Turner {45405522, 90811043},
119a6fb536eSZachary Turner {68108283, 136216565},
120a6fb536eSZachary Turner {102162424, 204324848},
121a6fb536eSZachary Turner {153243637, 306487273},
122a6fb536eSZachary Turner {229865455, 459730910},
123a6fb536eSZachary Turner {344798183, 689596366},
124a6fb536eSZachary Turner {517197275, 1034394550},
125ac6375d9SNico Weber {775795913, 1551591826},
126ac6375d9SNico Weber {1163693870, 2327387740}};
127*fbce4a78SBenjamin Kramer const auto *Entry = llvm::lower_bound(
128*fbce4a78SBenjamin Kramer StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
129*fbce4a78SBenjamin Kramer assert(Entry != std::end(StringsToBuckets));
130a6fb536eSZachary Turner return Entry->second;
131e204a6c9SZachary Turner }
132e204a6c9SZachary Turner
calculateHashTableSize() const133c504ae3cSZachary Turner uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
134c504ae3cSZachary Turner uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
135c504ae3cSZachary Turner Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
136dff096f2SDaniel Jasper
1377dba20bdSZachary Turner return Size;
1387dba20bdSZachary Turner }
1397dba20bdSZachary Turner
calculateSerializedSize() const140c504ae3cSZachary Turner uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
141c504ae3cSZachary Turner uint32_t Size = 0;
142c504ae3cSZachary Turner Size += sizeof(PDBStringTableHeader);
143c504ae3cSZachary Turner Size += Strings.calculateSerializedSize();
144c504ae3cSZachary Turner Size += calculateHashTableSize();
145c504ae3cSZachary Turner Size += sizeof(uint32_t); // The /names stream ends with the string count.
146c504ae3cSZachary Turner return Size;
147c504ae3cSZachary Turner }
148c504ae3cSZachary Turner
setStrings(const codeview::DebugStringTableSubsection & Strings)149a8cfc29cSZachary Turner void PDBStringTableBuilder::setStrings(
150a8cfc29cSZachary Turner const codeview::DebugStringTableSubsection &Strings) {
151a8cfc29cSZachary Turner this->Strings = Strings;
152a8cfc29cSZachary Turner }
153a8cfc29cSZachary Turner
writeHeader(BinaryStreamWriter & Writer) const154c504ae3cSZachary Turner Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
155e204a6c9SZachary Turner // Write a header
156e204a6c9SZachary Turner PDBStringTableHeader H;
157e204a6c9SZachary Turner H.Signature = PDBStringTableSignature;
158e204a6c9SZachary Turner H.HashVersion = 1;
159c504ae3cSZachary Turner H.ByteSize = Strings.calculateSerializedSize();
160e204a6c9SZachary Turner if (auto EC = Writer.writeObject(H))
161e204a6c9SZachary Turner return EC;
162c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0);
163c504ae3cSZachary Turner return Error::success();
1647dba20bdSZachary Turner }
1657dba20bdSZachary Turner
writeStrings(BinaryStreamWriter & Writer) const166c504ae3cSZachary Turner Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
167c504ae3cSZachary Turner if (auto EC = Strings.commit(Writer))
168c504ae3cSZachary Turner return EC;
169c504ae3cSZachary Turner
170c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0);
171c504ae3cSZachary Turner return Error::success();
172c504ae3cSZachary Turner }
173c504ae3cSZachary Turner
writeHashTable(BinaryStreamWriter & Writer) const174c504ae3cSZachary Turner Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
175e204a6c9SZachary Turner // Write a hash table.
176e204a6c9SZachary Turner uint32_t BucketCount = computeBucketCount(Strings.size());
177e204a6c9SZachary Turner if (auto EC = Writer.writeInteger(BucketCount))
178e204a6c9SZachary Turner return EC;
179e204a6c9SZachary Turner std::vector<ulittle32_t> Buckets(BucketCount);
180e204a6c9SZachary Turner
181c504ae3cSZachary Turner for (auto &Pair : Strings) {
182c504ae3cSZachary Turner StringRef S = Pair.getKey();
183c504ae3cSZachary Turner uint32_t Offset = Pair.getValue();
184e204a6c9SZachary Turner uint32_t Hash = hashStringV1(S);
185e204a6c9SZachary Turner
186e204a6c9SZachary Turner for (uint32_t I = 0; I != BucketCount; ++I) {
187e204a6c9SZachary Turner uint32_t Slot = (Hash + I) % BucketCount;
188e204a6c9SZachary Turner if (Buckets[Slot] != 0)
189e204a6c9SZachary Turner continue;
190e204a6c9SZachary Turner Buckets[Slot] = Offset;
191e204a6c9SZachary Turner break;
192e204a6c9SZachary Turner }
193e204a6c9SZachary Turner }
194e204a6c9SZachary Turner
195e204a6c9SZachary Turner if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
196e204a6c9SZachary Turner return EC;
197c504ae3cSZachary Turner
198c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0);
199c504ae3cSZachary Turner return Error::success();
200c504ae3cSZachary Turner }
201c504ae3cSZachary Turner
writeEpilogue(BinaryStreamWriter & Writer) const202c504ae3cSZachary Turner Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
203c504ae3cSZachary Turner if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
204e204a6c9SZachary Turner return EC;
205c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0);
206c504ae3cSZachary Turner return Error::success();
207c504ae3cSZachary Turner }
208c504ae3cSZachary Turner
commit(BinaryStreamWriter & Writer) const209c504ae3cSZachary Turner Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
210c504ae3cSZachary Turner BinaryStreamWriter SectionWriter;
211c504ae3cSZachary Turner
212c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
213c504ae3cSZachary Turner if (auto EC = writeHeader(SectionWriter))
214c504ae3cSZachary Turner return EC;
215c504ae3cSZachary Turner
216c504ae3cSZachary Turner std::tie(SectionWriter, Writer) =
217c504ae3cSZachary Turner Writer.split(Strings.calculateSerializedSize());
218c504ae3cSZachary Turner if (auto EC = writeStrings(SectionWriter))
219c504ae3cSZachary Turner return EC;
220c504ae3cSZachary Turner
221c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
222c504ae3cSZachary Turner if (auto EC = writeHashTable(SectionWriter))
223c504ae3cSZachary Turner return EC;
224c504ae3cSZachary Turner
225c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
226c504ae3cSZachary Turner if (auto EC = writeEpilogue(SectionWriter))
227c504ae3cSZachary Turner return EC;
228c504ae3cSZachary Turner
229e204a6c9SZachary Turner return Error::success();
230e204a6c9SZachary Turner }
231