1e204a6c9SZachary Turner //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2e204a6c9SZachary Turner //
3e204a6c9SZachary Turner //                     The LLVM Compiler Infrastructure
4e204a6c9SZachary Turner //
5e204a6c9SZachary Turner // This file is distributed under the University of Illinois Open Source
6e204a6c9SZachary Turner // License. See LICENSE.TXT for details.
7e204a6c9SZachary Turner //
8e204a6c9SZachary Turner //===----------------------------------------------------------------------===//
9e204a6c9SZachary Turner 
10e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
11e204a6c9SZachary Turner #include "llvm/ADT/ArrayRef.h"
12e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/Hash.h"
13e204a6c9SZachary Turner #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
14e204a6c9SZachary Turner #include "llvm/Support/BinaryStreamWriter.h"
15e204a6c9SZachary Turner #include "llvm/Support/Endian.h"
16e204a6c9SZachary Turner 
17e204a6c9SZachary Turner using namespace llvm;
18e204a6c9SZachary Turner using namespace llvm::support;
19e204a6c9SZachary Turner using namespace llvm::support::endian;
20e204a6c9SZachary Turner using namespace llvm::pdb;
21e204a6c9SZachary Turner 
22e204a6c9SZachary Turner uint32_t PDBStringTableBuilder::insert(StringRef S) {
23*dff096f2SDaniel Jasper   auto P = Strings.insert({S, StringSize});
24*dff096f2SDaniel Jasper 
25*dff096f2SDaniel Jasper   // If a given string didn't exist in the string table, we want to increment
26*dff096f2SDaniel Jasper   // the string table size.
27*dff096f2SDaniel Jasper   if (P.second)
28*dff096f2SDaniel Jasper     StringSize += S.size() + 1; // +1 for '\0'
29*dff096f2SDaniel Jasper   return P.first->second;
30*dff096f2SDaniel Jasper }
31*dff096f2SDaniel Jasper 
32*dff096f2SDaniel Jasper uint32_t PDBStringTableBuilder::getStringIndex(StringRef S) {
33*dff096f2SDaniel Jasper   auto Iter = Strings.find(S);
34*dff096f2SDaniel Jasper   assert(Iter != Strings.end());
35*dff096f2SDaniel Jasper   return Iter->second;
36e204a6c9SZachary Turner }
37e204a6c9SZachary Turner 
38e204a6c9SZachary Turner static uint32_t computeBucketCount(uint32_t NumStrings) {
39e204a6c9SZachary Turner   // The /names stream is basically an on-disk open-addressing hash table.
40e204a6c9SZachary Turner   // Hash collisions are resolved by linear probing. We cannot make
41e204a6c9SZachary Turner   // utilization 100% because it will make the linear probing extremely
42e204a6c9SZachary Turner   // slow. But lower utilization wastes disk space. As a reasonable
43e204a6c9SZachary Turner   // load factor, we choose 80%. We need +1 because slot 0 is reserved.
44e204a6c9SZachary Turner   return (NumStrings + 1) * 1.25;
45e204a6c9SZachary Turner }
46e204a6c9SZachary Turner 
47*dff096f2SDaniel Jasper uint32_t PDBStringTableBuilder::finalize() {
487dba20bdSZachary Turner   uint32_t Size = 0;
497dba20bdSZachary Turner   Size += sizeof(PDBStringTableHeader);
50*dff096f2SDaniel Jasper   Size += StringSize;
51*dff096f2SDaniel Jasper   Size += sizeof(uint32_t); // Hash table begins with 4-byte size field.
52*dff096f2SDaniel Jasper 
53*dff096f2SDaniel Jasper   uint32_t BucketCount = computeBucketCount(Strings.size());
54*dff096f2SDaniel Jasper   Size += BucketCount * sizeof(uint32_t);
55*dff096f2SDaniel Jasper 
56*dff096f2SDaniel Jasper   Size +=
57*dff096f2SDaniel Jasper       sizeof(uint32_t); // The /names stream ends with the number of strings.
587dba20bdSZachary Turner   return Size;
597dba20bdSZachary Turner }
607dba20bdSZachary Turner 
61*dff096f2SDaniel Jasper Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
62e204a6c9SZachary Turner   // Write a header
63e204a6c9SZachary Turner   PDBStringTableHeader H;
64e204a6c9SZachary Turner   H.Signature = PDBStringTableSignature;
65e204a6c9SZachary Turner   H.HashVersion = 1;
66*dff096f2SDaniel Jasper   H.ByteSize = StringSize;
67e204a6c9SZachary Turner   if (auto EC = Writer.writeObject(H))
68e204a6c9SZachary Turner     return EC;
69e204a6c9SZachary Turner 
70*dff096f2SDaniel Jasper   // Write a string table.
71*dff096f2SDaniel Jasper   uint32_t StringStart = Writer.getOffset();
72*dff096f2SDaniel Jasper   for (auto Pair : Strings) {
73*dff096f2SDaniel Jasper     StringRef S = Pair.first;
74*dff096f2SDaniel Jasper     uint32_t Offset = Pair.second;
75*dff096f2SDaniel Jasper     Writer.setOffset(StringStart + Offset);
76*dff096f2SDaniel Jasper     if (auto EC = Writer.writeCString(S))
777dba20bdSZachary Turner       return EC;
787dba20bdSZachary Turner   }
79*dff096f2SDaniel Jasper   Writer.setOffset(StringStart + StringSize);
807dba20bdSZachary Turner 
81e204a6c9SZachary Turner   // Write a hash table.
82e204a6c9SZachary Turner   uint32_t BucketCount = computeBucketCount(Strings.size());
83e204a6c9SZachary Turner   if (auto EC = Writer.writeInteger(BucketCount))
84e204a6c9SZachary Turner     return EC;
85e204a6c9SZachary Turner   std::vector<ulittle32_t> Buckets(BucketCount);
86e204a6c9SZachary Turner 
87*dff096f2SDaniel Jasper   for (auto Pair : Strings) {
88*dff096f2SDaniel Jasper     StringRef S = Pair.first;
89*dff096f2SDaniel Jasper     uint32_t Offset = Pair.second;
90e204a6c9SZachary Turner     uint32_t Hash = hashStringV1(S);
91e204a6c9SZachary Turner 
92e204a6c9SZachary Turner     for (uint32_t I = 0; I != BucketCount; ++I) {
93e204a6c9SZachary Turner       uint32_t Slot = (Hash + I) % BucketCount;
94e204a6c9SZachary Turner       if (Slot == 0)
95e204a6c9SZachary Turner         continue; // Skip reserved slot
96e204a6c9SZachary Turner       if (Buckets[Slot] != 0)
97e204a6c9SZachary Turner         continue;
98e204a6c9SZachary Turner       Buckets[Slot] = Offset;
99e204a6c9SZachary Turner       break;
100e204a6c9SZachary Turner     }
101e204a6c9SZachary Turner   }
102e204a6c9SZachary Turner 
103e204a6c9SZachary Turner   if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
104e204a6c9SZachary Turner     return EC;
105*dff096f2SDaniel Jasper   if (auto EC = Writer.writeInteger(static_cast<uint32_t>(Strings.size())))
106e204a6c9SZachary Turner     return EC;
107e204a6c9SZachary Turner   return Error::success();
108e204a6c9SZachary Turner }
109