1f37b6182SDimitry Andric //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2f37b6182SDimitry Andric //
3f37b6182SDimitry Andric // The LLVM Compiler Infrastructure
4f37b6182SDimitry Andric //
5f37b6182SDimitry Andric // This file is distributed under the University of Illinois Open Source
6f37b6182SDimitry Andric // License. See LICENSE.TXT for details.
7f37b6182SDimitry Andric //
8f37b6182SDimitry Andric //===----------------------------------------------------------------------===//
9f37b6182SDimitry Andric
10f37b6182SDimitry Andric #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
11f37b6182SDimitry Andric
12f37b6182SDimitry Andric #include "llvm/ADT/ArrayRef.h"
13f37b6182SDimitry Andric #include "llvm/DebugInfo/PDB/Native/Hash.h"
14f37b6182SDimitry Andric #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
15f37b6182SDimitry Andric #include "llvm/Support/BinaryStreamWriter.h"
16f37b6182SDimitry Andric #include "llvm/Support/Endian.h"
17f37b6182SDimitry Andric
18*4ba319b5SDimitry Andric #include <map>
19*4ba319b5SDimitry Andric
20f37b6182SDimitry Andric using namespace llvm;
21f37b6182SDimitry Andric using namespace llvm::msf;
22f37b6182SDimitry Andric using namespace llvm::support;
23f37b6182SDimitry Andric using namespace llvm::support::endian;
24f37b6182SDimitry Andric using namespace llvm::pdb;
25f37b6182SDimitry Andric
StringTableHashTraits(PDBStringTableBuilder & Table)26*4ba319b5SDimitry Andric StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
27*4ba319b5SDimitry Andric : Table(&Table) {}
28*4ba319b5SDimitry Andric
hashLookupKey(StringRef S) const29*4ba319b5SDimitry Andric uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
30*4ba319b5SDimitry Andric return Table->getIdForString(S);
31*4ba319b5SDimitry Andric }
32*4ba319b5SDimitry Andric
storageKeyToLookupKey(uint32_t Offset) const33*4ba319b5SDimitry Andric StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
34*4ba319b5SDimitry Andric return Table->getStringForId(Offset);
35*4ba319b5SDimitry Andric }
36*4ba319b5SDimitry Andric
lookupKeyToStorageKey(StringRef S)37*4ba319b5SDimitry Andric uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
38*4ba319b5SDimitry Andric return Table->insert(S);
39*4ba319b5SDimitry Andric }
40*4ba319b5SDimitry Andric
insert(StringRef S)41f37b6182SDimitry Andric uint32_t PDBStringTableBuilder::insert(StringRef S) {
42f37b6182SDimitry Andric return Strings.insert(S);
43f37b6182SDimitry Andric }
44f37b6182SDimitry Andric
getIdForString(StringRef S) const45*4ba319b5SDimitry Andric uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
46*4ba319b5SDimitry Andric return Strings.getIdForString(S);
47*4ba319b5SDimitry Andric }
48*4ba319b5SDimitry Andric
getStringForId(uint32_t Id) const49*4ba319b5SDimitry Andric StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
50*4ba319b5SDimitry Andric return Strings.getStringForId(Id);
51*4ba319b5SDimitry Andric }
52*4ba319b5SDimitry Andric
53*4ba319b5SDimitry Andric // This is a precomputed list of Buckets given the specified number of
54*4ba319b5SDimitry Andric // strings. Matching the reference algorithm exactly is not strictly
55*4ba319b5SDimitry Andric // necessary for correctness, but it helps when comparing LLD's PDBs with
56*4ba319b5SDimitry Andric // Microsoft's PDBs so as to eliminate superfluous differences.
57*4ba319b5SDimitry Andric static std::map<uint32_t, uint32_t> StringsToBuckets = {
58*4ba319b5SDimitry Andric {1, 2},
59*4ba319b5SDimitry Andric {2, 4},
60*4ba319b5SDimitry Andric {4, 7},
61*4ba319b5SDimitry Andric {6, 11},
62*4ba319b5SDimitry Andric {9, 17},
63*4ba319b5SDimitry Andric {13, 26},
64*4ba319b5SDimitry Andric {20, 40},
65*4ba319b5SDimitry Andric {31, 61},
66*4ba319b5SDimitry Andric {46, 92},
67*4ba319b5SDimitry Andric {70, 139},
68*4ba319b5SDimitry Andric {105, 209},
69*4ba319b5SDimitry Andric {157, 314},
70*4ba319b5SDimitry Andric {236, 472},
71*4ba319b5SDimitry Andric {355, 709},
72*4ba319b5SDimitry Andric {532, 1064},
73*4ba319b5SDimitry Andric {799, 1597},
74*4ba319b5SDimitry Andric {1198, 2396},
75*4ba319b5SDimitry Andric {1798, 3595},
76*4ba319b5SDimitry Andric {2697, 5393},
77*4ba319b5SDimitry Andric {4045, 8090},
78*4ba319b5SDimitry Andric {6068, 12136},
79*4ba319b5SDimitry Andric {9103, 18205},
80*4ba319b5SDimitry Andric {13654, 27308},
81*4ba319b5SDimitry Andric {20482, 40963},
82*4ba319b5SDimitry Andric {30723, 61445},
83*4ba319b5SDimitry Andric {46084, 92168},
84*4ba319b5SDimitry Andric {69127, 138253},
85*4ba319b5SDimitry Andric {103690, 207380},
86*4ba319b5SDimitry Andric {155536, 311071},
87*4ba319b5SDimitry Andric {233304, 466607},
88*4ba319b5SDimitry Andric {349956, 699911},
89*4ba319b5SDimitry Andric {524934, 1049867},
90*4ba319b5SDimitry Andric {787401, 1574801},
91*4ba319b5SDimitry Andric {1181101, 2362202},
92*4ba319b5SDimitry Andric {1771652, 3543304},
93*4ba319b5SDimitry Andric {2657479, 5314957},
94*4ba319b5SDimitry Andric {3986218, 7972436},
95*4ba319b5SDimitry Andric {5979328, 11958655},
96*4ba319b5SDimitry Andric {8968992, 17937983},
97*4ba319b5SDimitry Andric {13453488, 26906975},
98*4ba319b5SDimitry Andric {20180232, 40360463},
99*4ba319b5SDimitry Andric {30270348, 60540695},
100*4ba319b5SDimitry Andric {45405522, 90811043},
101*4ba319b5SDimitry Andric {68108283, 136216565},
102*4ba319b5SDimitry Andric {102162424, 204324848},
103*4ba319b5SDimitry Andric {153243637, 306487273},
104*4ba319b5SDimitry Andric {229865455, 459730910},
105*4ba319b5SDimitry Andric {344798183, 689596366},
106*4ba319b5SDimitry Andric {517197275, 1034394550},
107*4ba319b5SDimitry Andric {775795913, 1551591826}};
108*4ba319b5SDimitry Andric
computeBucketCount(uint32_t NumStrings)109f37b6182SDimitry Andric static uint32_t computeBucketCount(uint32_t NumStrings) {
110*4ba319b5SDimitry Andric auto Entry = StringsToBuckets.lower_bound(NumStrings);
111*4ba319b5SDimitry Andric assert(Entry != StringsToBuckets.end());
112*4ba319b5SDimitry Andric return Entry->second;
113f37b6182SDimitry Andric }
114f37b6182SDimitry Andric
calculateHashTableSize() const115f37b6182SDimitry Andric uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
116f37b6182SDimitry Andric uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
117f37b6182SDimitry Andric Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
118f37b6182SDimitry Andric
119f37b6182SDimitry Andric return Size;
120f37b6182SDimitry Andric }
121f37b6182SDimitry Andric
calculateSerializedSize() const122f37b6182SDimitry Andric uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
123f37b6182SDimitry Andric uint32_t Size = 0;
124f37b6182SDimitry Andric Size += sizeof(PDBStringTableHeader);
125f37b6182SDimitry Andric Size += Strings.calculateSerializedSize();
126f37b6182SDimitry Andric Size += calculateHashTableSize();
127f37b6182SDimitry Andric Size += sizeof(uint32_t); // The /names stream ends with the string count.
128f37b6182SDimitry Andric return Size;
129f37b6182SDimitry Andric }
130f37b6182SDimitry Andric
setStrings(const codeview::DebugStringTableSubsection & Strings)13124d58133SDimitry Andric void PDBStringTableBuilder::setStrings(
13224d58133SDimitry Andric const codeview::DebugStringTableSubsection &Strings) {
13324d58133SDimitry Andric this->Strings = Strings;
13424d58133SDimitry Andric }
13524d58133SDimitry Andric
writeHeader(BinaryStreamWriter & Writer) const136f37b6182SDimitry Andric Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
137f37b6182SDimitry Andric // Write a header
138f37b6182SDimitry Andric PDBStringTableHeader H;
139f37b6182SDimitry Andric H.Signature = PDBStringTableSignature;
140f37b6182SDimitry Andric H.HashVersion = 1;
141f37b6182SDimitry Andric H.ByteSize = Strings.calculateSerializedSize();
142f37b6182SDimitry Andric if (auto EC = Writer.writeObject(H))
143f37b6182SDimitry Andric return EC;
144f37b6182SDimitry Andric assert(Writer.bytesRemaining() == 0);
145f37b6182SDimitry Andric return Error::success();
146f37b6182SDimitry Andric }
147f37b6182SDimitry Andric
writeStrings(BinaryStreamWriter & Writer) const148f37b6182SDimitry Andric Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
149f37b6182SDimitry Andric if (auto EC = Strings.commit(Writer))
150f37b6182SDimitry Andric return EC;
151f37b6182SDimitry Andric
152f37b6182SDimitry Andric assert(Writer.bytesRemaining() == 0);
153f37b6182SDimitry Andric return Error::success();
154f37b6182SDimitry Andric }
155f37b6182SDimitry Andric
writeHashTable(BinaryStreamWriter & Writer) const156f37b6182SDimitry Andric Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
157f37b6182SDimitry Andric // Write a hash table.
158f37b6182SDimitry Andric uint32_t BucketCount = computeBucketCount(Strings.size());
159f37b6182SDimitry Andric if (auto EC = Writer.writeInteger(BucketCount))
160f37b6182SDimitry Andric return EC;
161f37b6182SDimitry Andric std::vector<ulittle32_t> Buckets(BucketCount);
162f37b6182SDimitry Andric
163f37b6182SDimitry Andric for (auto &Pair : Strings) {
164f37b6182SDimitry Andric StringRef S = Pair.getKey();
165f37b6182SDimitry Andric uint32_t Offset = Pair.getValue();
166f37b6182SDimitry Andric uint32_t Hash = hashStringV1(S);
167f37b6182SDimitry Andric
168f37b6182SDimitry Andric for (uint32_t I = 0; I != BucketCount; ++I) {
169f37b6182SDimitry Andric uint32_t Slot = (Hash + I) % BucketCount;
170f37b6182SDimitry Andric if (Buckets[Slot] != 0)
171f37b6182SDimitry Andric continue;
172f37b6182SDimitry Andric Buckets[Slot] = Offset;
173f37b6182SDimitry Andric break;
174f37b6182SDimitry Andric }
175f37b6182SDimitry Andric }
176f37b6182SDimitry Andric
177f37b6182SDimitry Andric if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
178f37b6182SDimitry Andric return EC;
179f37b6182SDimitry Andric
180f37b6182SDimitry Andric assert(Writer.bytesRemaining() == 0);
181f37b6182SDimitry Andric return Error::success();
182f37b6182SDimitry Andric }
183f37b6182SDimitry Andric
writeEpilogue(BinaryStreamWriter & Writer) const184f37b6182SDimitry Andric Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
185f37b6182SDimitry Andric if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
186f37b6182SDimitry Andric return EC;
187f37b6182SDimitry Andric assert(Writer.bytesRemaining() == 0);
188f37b6182SDimitry Andric return Error::success();
189f37b6182SDimitry Andric }
190f37b6182SDimitry Andric
commit(BinaryStreamWriter & Writer) const191f37b6182SDimitry Andric Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
192f37b6182SDimitry Andric BinaryStreamWriter SectionWriter;
193f37b6182SDimitry Andric
194f37b6182SDimitry Andric std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
195f37b6182SDimitry Andric if (auto EC = writeHeader(SectionWriter))
196f37b6182SDimitry Andric return EC;
197f37b6182SDimitry Andric
198f37b6182SDimitry Andric std::tie(SectionWriter, Writer) =
199f37b6182SDimitry Andric Writer.split(Strings.calculateSerializedSize());
200f37b6182SDimitry Andric if (auto EC = writeStrings(SectionWriter))
201f37b6182SDimitry Andric return EC;
202f37b6182SDimitry Andric
203f37b6182SDimitry Andric std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
204f37b6182SDimitry Andric if (auto EC = writeHashTable(SectionWriter))
205f37b6182SDimitry Andric return EC;
206f37b6182SDimitry Andric
207f37b6182SDimitry Andric std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
208f37b6182SDimitry Andric if (auto EC = writeEpilogue(SectionWriter))
209f37b6182SDimitry Andric return EC;
210f37b6182SDimitry Andric
211f37b6182SDimitry Andric return Error::success();
212f37b6182SDimitry Andric }
213