1e204a6c9SZachary Turner //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===// 2e204a6c9SZachary Turner // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e204a6c9SZachary Turner // 7e204a6c9SZachary Turner //===----------------------------------------------------------------------===// 8e204a6c9SZachary Turner 9e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h" 10c504ae3cSZachary Turner 11e204a6c9SZachary Turner #include "llvm/ADT/ArrayRef.h" 12e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/Hash.h" 13e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/RawTypes.h" 14e204a6c9SZachary Turner #include "llvm/Support/BinaryStreamWriter.h" 15e204a6c9SZachary Turner #include "llvm/Support/Endian.h" 16e204a6c9SZachary Turner 17a6fb536eSZachary Turner #include <map> 18a6fb536eSZachary Turner 19e204a6c9SZachary Turner using namespace llvm; 20c504ae3cSZachary Turner using namespace llvm::msf; 21e204a6c9SZachary Turner using namespace llvm::support; 22e204a6c9SZachary Turner using namespace llvm::support::endian; 23e204a6c9SZachary Turner using namespace llvm::pdb; 24e204a6c9SZachary Turner 25f2282762SZachary Turner StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table) 26f2282762SZachary Turner : Table(&Table) {} 27f2282762SZachary Turner 28f2282762SZachary Turner uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const { 29*e577be4eSNico Weber // The reference implementation doesn't include code for /src/headerblock 30*e577be4eSNico Weber // handling, but it can only read natvis entries lld's PDB files if 31*e577be4eSNico Weber // this hash function truncates the hash to 16 bit. 32*e577be4eSNico Weber // PDB/include/misc.h in the reference implementation has a hashSz() function 33*e577be4eSNico Weber // that returns an unsigned short, that seems what's being used for 34*e577be4eSNico Weber // /src/headerblock. 35*e577be4eSNico Weber return static_cast<uint16_t>(Table->getIdForString(S)); 36f2282762SZachary Turner } 37f2282762SZachary Turner 38f2282762SZachary Turner StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const { 39f2282762SZachary Turner return Table->getStringForId(Offset); 40f2282762SZachary Turner } 41f2282762SZachary Turner 42f2282762SZachary Turner uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) { 43f2282762SZachary Turner return Table->insert(S); 44f2282762SZachary Turner } 45f2282762SZachary Turner 46e204a6c9SZachary Turner uint32_t PDBStringTableBuilder::insert(StringRef S) { 47c504ae3cSZachary Turner return Strings.insert(S); 48e204a6c9SZachary Turner } 49e204a6c9SZachary Turner 5071d36ad9SZachary Turner uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const { 5171d36ad9SZachary Turner return Strings.getIdForString(S); 5271d36ad9SZachary Turner } 5371d36ad9SZachary Turner 5471d36ad9SZachary Turner StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const { 5571d36ad9SZachary Turner return Strings.getStringForId(Id); 5671d36ad9SZachary Turner } 5771d36ad9SZachary Turner 5885e2cdacSReid Kleckner static uint32_t computeBucketCount(uint32_t NumStrings) { 59a6fb536eSZachary Turner // This is a precomputed list of Buckets given the specified number of 60a6fb536eSZachary Turner // strings. Matching the reference algorithm exactly is not strictly 61a6fb536eSZachary Turner // necessary for correctness, but it helps when comparing LLD's PDBs with 62a6fb536eSZachary Turner // Microsoft's PDBs so as to eliminate superfluous differences. 63a6fb536eSZachary Turner static std::map<uint32_t, uint32_t> StringsToBuckets = { 64a6fb536eSZachary Turner {1, 2}, 65a6fb536eSZachary Turner {2, 4}, 66a6fb536eSZachary Turner {4, 7}, 67a6fb536eSZachary Turner {6, 11}, 68a6fb536eSZachary Turner {9, 17}, 69a6fb536eSZachary Turner {13, 26}, 70a6fb536eSZachary Turner {20, 40}, 71a6fb536eSZachary Turner {31, 61}, 72a6fb536eSZachary Turner {46, 92}, 73a6fb536eSZachary Turner {70, 139}, 74a6fb536eSZachary Turner {105, 209}, 75a6fb536eSZachary Turner {157, 314}, 76a6fb536eSZachary Turner {236, 472}, 77a6fb536eSZachary Turner {355, 709}, 78a6fb536eSZachary Turner {532, 1064}, 79a6fb536eSZachary Turner {799, 1597}, 80a6fb536eSZachary Turner {1198, 2396}, 81a6fb536eSZachary Turner {1798, 3595}, 82a6fb536eSZachary Turner {2697, 5393}, 83a6fb536eSZachary Turner {4045, 8090}, 84a6fb536eSZachary Turner {6068, 12136}, 85a6fb536eSZachary Turner {9103, 18205}, 86a6fb536eSZachary Turner {13654, 27308}, 87a6fb536eSZachary Turner {20482, 40963}, 88a6fb536eSZachary Turner {30723, 61445}, 89a6fb536eSZachary Turner {46084, 92168}, 90a6fb536eSZachary Turner {69127, 138253}, 91a6fb536eSZachary Turner {103690, 207380}, 92a6fb536eSZachary Turner {155536, 311071}, 93a6fb536eSZachary Turner {233304, 466607}, 94a6fb536eSZachary Turner {349956, 699911}, 95a6fb536eSZachary Turner {524934, 1049867}, 96a6fb536eSZachary Turner {787401, 1574801}, 97a6fb536eSZachary Turner {1181101, 2362202}, 98a6fb536eSZachary Turner {1771652, 3543304}, 99a6fb536eSZachary Turner {2657479, 5314957}, 100a6fb536eSZachary Turner {3986218, 7972436}, 101a6fb536eSZachary Turner {5979328, 11958655}, 102a6fb536eSZachary Turner {8968992, 17937983}, 103a6fb536eSZachary Turner {13453488, 26906975}, 104a6fb536eSZachary Turner {20180232, 40360463}, 105a6fb536eSZachary Turner {30270348, 60540695}, 106a6fb536eSZachary Turner {45405522, 90811043}, 107a6fb536eSZachary Turner {68108283, 136216565}, 108a6fb536eSZachary Turner {102162424, 204324848}, 109a6fb536eSZachary Turner {153243637, 306487273}, 110a6fb536eSZachary Turner {229865455, 459730910}, 111a6fb536eSZachary Turner {344798183, 689596366}, 112a6fb536eSZachary Turner {517197275, 1034394550}, 113a6fb536eSZachary Turner {775795913, 1551591826}}; 114a6fb536eSZachary Turner auto Entry = StringsToBuckets.lower_bound(NumStrings); 115a6fb536eSZachary Turner assert(Entry != StringsToBuckets.end()); 116a6fb536eSZachary Turner return Entry->second; 117e204a6c9SZachary Turner } 118e204a6c9SZachary Turner 119c504ae3cSZachary Turner uint32_t PDBStringTableBuilder::calculateHashTableSize() const { 120c504ae3cSZachary Turner uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field. 121c504ae3cSZachary Turner Size += sizeof(uint32_t) * computeBucketCount(Strings.size()); 122dff096f2SDaniel Jasper 1237dba20bdSZachary Turner return Size; 1247dba20bdSZachary Turner } 1257dba20bdSZachary Turner 126c504ae3cSZachary Turner uint32_t PDBStringTableBuilder::calculateSerializedSize() const { 127c504ae3cSZachary Turner uint32_t Size = 0; 128c504ae3cSZachary Turner Size += sizeof(PDBStringTableHeader); 129c504ae3cSZachary Turner Size += Strings.calculateSerializedSize(); 130c504ae3cSZachary Turner Size += calculateHashTableSize(); 131c504ae3cSZachary Turner Size += sizeof(uint32_t); // The /names stream ends with the string count. 132c504ae3cSZachary Turner return Size; 133c504ae3cSZachary Turner } 134c504ae3cSZachary Turner 135a8cfc29cSZachary Turner void PDBStringTableBuilder::setStrings( 136a8cfc29cSZachary Turner const codeview::DebugStringTableSubsection &Strings) { 137a8cfc29cSZachary Turner this->Strings = Strings; 138a8cfc29cSZachary Turner } 139a8cfc29cSZachary Turner 140c504ae3cSZachary Turner Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const { 141e204a6c9SZachary Turner // Write a header 142e204a6c9SZachary Turner PDBStringTableHeader H; 143e204a6c9SZachary Turner H.Signature = PDBStringTableSignature; 144e204a6c9SZachary Turner H.HashVersion = 1; 145c504ae3cSZachary Turner H.ByteSize = Strings.calculateSerializedSize(); 146e204a6c9SZachary Turner if (auto EC = Writer.writeObject(H)) 147e204a6c9SZachary Turner return EC; 148c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0); 149c504ae3cSZachary Turner return Error::success(); 1507dba20bdSZachary Turner } 1517dba20bdSZachary Turner 152c504ae3cSZachary Turner Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const { 153c504ae3cSZachary Turner if (auto EC = Strings.commit(Writer)) 154c504ae3cSZachary Turner return EC; 155c504ae3cSZachary Turner 156c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0); 157c504ae3cSZachary Turner return Error::success(); 158c504ae3cSZachary Turner } 159c504ae3cSZachary Turner 160c504ae3cSZachary Turner Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const { 161e204a6c9SZachary Turner // Write a hash table. 162e204a6c9SZachary Turner uint32_t BucketCount = computeBucketCount(Strings.size()); 163e204a6c9SZachary Turner if (auto EC = Writer.writeInteger(BucketCount)) 164e204a6c9SZachary Turner return EC; 165e204a6c9SZachary Turner std::vector<ulittle32_t> Buckets(BucketCount); 166e204a6c9SZachary Turner 167c504ae3cSZachary Turner for (auto &Pair : Strings) { 168c504ae3cSZachary Turner StringRef S = Pair.getKey(); 169c504ae3cSZachary Turner uint32_t Offset = Pair.getValue(); 170e204a6c9SZachary Turner uint32_t Hash = hashStringV1(S); 171e204a6c9SZachary Turner 172e204a6c9SZachary Turner for (uint32_t I = 0; I != BucketCount; ++I) { 173e204a6c9SZachary Turner uint32_t Slot = (Hash + I) % BucketCount; 174e204a6c9SZachary Turner if (Buckets[Slot] != 0) 175e204a6c9SZachary Turner continue; 176e204a6c9SZachary Turner Buckets[Slot] = Offset; 177e204a6c9SZachary Turner break; 178e204a6c9SZachary Turner } 179e204a6c9SZachary Turner } 180e204a6c9SZachary Turner 181e204a6c9SZachary Turner if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets))) 182e204a6c9SZachary Turner return EC; 183c504ae3cSZachary Turner 184c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0); 185c504ae3cSZachary Turner return Error::success(); 186c504ae3cSZachary Turner } 187c504ae3cSZachary Turner 188c504ae3cSZachary Turner Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const { 189c504ae3cSZachary Turner if (auto EC = Writer.writeInteger<uint32_t>(Strings.size())) 190e204a6c9SZachary Turner return EC; 191c504ae3cSZachary Turner assert(Writer.bytesRemaining() == 0); 192c504ae3cSZachary Turner return Error::success(); 193c504ae3cSZachary Turner } 194c504ae3cSZachary Turner 195c504ae3cSZachary Turner Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const { 196c504ae3cSZachary Turner BinaryStreamWriter SectionWriter; 197c504ae3cSZachary Turner 198c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader)); 199c504ae3cSZachary Turner if (auto EC = writeHeader(SectionWriter)) 200c504ae3cSZachary Turner return EC; 201c504ae3cSZachary Turner 202c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = 203c504ae3cSZachary Turner Writer.split(Strings.calculateSerializedSize()); 204c504ae3cSZachary Turner if (auto EC = writeStrings(SectionWriter)) 205c504ae3cSZachary Turner return EC; 206c504ae3cSZachary Turner 207c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize()); 208c504ae3cSZachary Turner if (auto EC = writeHashTable(SectionWriter)) 209c504ae3cSZachary Turner return EC; 210c504ae3cSZachary Turner 211c504ae3cSZachary Turner std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t)); 212c504ae3cSZachary Turner if (auto EC = writeEpilogue(SectionWriter)) 213c504ae3cSZachary Turner return EC; 214c504ae3cSZachary Turner 215e204a6c9SZachary Turner return Error::success(); 216e204a6c9SZachary Turner } 217