1 //===-- StringTableBuilder.cpp - String table building utility ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/MC/StringTableBuilder.h"
11 #include "llvm/ADT/STLExtras.h"
12 #include "llvm/Support/COFF.h"
13 #include "llvm/Support/Endian.h"
14 
15 #include <vector>
16 
17 using namespace llvm;
18 
19 StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
20     : K(K), Alignment(Alignment) {
21   // Account for leading bytes in table so that offsets returned from add are
22   // correct.
23   switch (K) {
24   case RAW:
25     Size = 0;
26     break;
27   case MachO:
28   case ELF:
29     Size = 1;
30     break;
31   case WinCOFF:
32     Size = 4;
33     break;
34   }
35 }
36 
37 size_t StringTableBuilder::getSize() const {
38   assert(isFinalized());
39   return Size;
40 }
41 
42 typedef std::pair<CachedHash<StringRef>, size_t> StringPair;
43 
44 // Returns the character at Pos from end of a string.
45 static int charTailAt(StringPair *P, size_t Pos) {
46   StringRef S = P->first.Val;
47   if (Pos >= S.size())
48     return -1;
49   return (unsigned char)S[S.size() - Pos - 1];
50 }
51 
52 // Three-way radix quicksort. This is much faster than std::sort with strcmp
53 // because it does not compare characters that we already know the same.
54 static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
55 tailcall:
56   if (End - Begin <= 1)
57     return;
58 
59   // Partition items. Items in [Begin, P) are greater than the pivot,
60   // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
61   int Pivot = charTailAt(*Begin, Pos);
62   StringPair **P = Begin;
63   StringPair **Q = End;
64   for (StringPair **R = Begin + 1; R < Q;) {
65     int C = charTailAt(*R, Pos);
66     if (C > Pivot)
67       std::swap(*P++, *R++);
68     else if (C < Pivot)
69       std::swap(*--Q, *R);
70     else
71       R++;
72   }
73 
74   multikey_qsort(Begin, P, Pos);
75   multikey_qsort(Q, End, Pos);
76   if (Pivot != -1) {
77     // qsort(P, Q, Pos + 1), but with tail call optimization.
78     Begin = P;
79     End = Q;
80     ++Pos;
81     goto tailcall;
82   }
83 }
84 
85 void StringTableBuilder::finalize() {
86   finalizeStringTable(/*Optimize=*/true);
87 }
88 
89 void StringTableBuilder::finalizeInOrder() {
90   finalizeStringTable(/*Optimize=*/false);
91 }
92 
93 void StringTableBuilder::finalizeStringTable(bool Optimize) {
94   typedef std::pair<CachedHash<StringRef>, size_t> StringOffsetPair;
95   std::vector<StringOffsetPair *> Strings;
96   Strings.reserve(StringIndexMap.size());
97   for (StringOffsetPair &P : StringIndexMap)
98     Strings.push_back(&P);
99 
100   if (!Strings.empty()) {
101     // If we're optimizing, sort by name. If not, sort by previously assigned
102     // offset.
103     if (Optimize) {
104       multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
105     } else {
106       std::sort(Strings.begin(), Strings.end(),
107                 [](const StringOffsetPair *LHS, const StringOffsetPair *RHS) {
108                   return LHS->second < RHS->second;
109                 });
110     }
111   }
112 
113   switch (K) {
114   case RAW:
115     break;
116   case ELF:
117   case MachO:
118     // Start the table with a NUL byte.
119     StringTable += '\x00';
120     break;
121   case WinCOFF:
122     // Make room to write the table size later.
123     StringTable.append(4, '\x00');
124     break;
125   }
126 
127   StringRef Previous;
128   for (StringOffsetPair *P : Strings) {
129     StringRef S = P->first.Val;
130     if (K == WinCOFF)
131       assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
132 
133     if (Optimize && Previous.endswith(S)) {
134       size_t Pos = StringTable.size() - S.size() - (K != RAW);
135       if (!(Pos & (Alignment - 1))) {
136         P->second = Pos;
137         continue;
138       }
139     }
140 
141     if (Optimize) {
142       size_t Start = alignTo(StringTable.size(), Alignment);
143       P->second = Start;
144       StringTable.append(Start - StringTable.size(), '\0');
145     } else {
146       assert(P->second == StringTable.size() &&
147              "different strtab offset after finalization");
148     }
149 
150     StringTable += S;
151     if (K != RAW)
152       StringTable += '\x00';
153     Previous = S;
154   }
155 
156   switch (K) {
157   case RAW:
158   case ELF:
159     break;
160   case MachO:
161     // Pad to multiple of 4.
162     while (StringTable.size() % 4)
163       StringTable += '\x00';
164     break;
165   case WinCOFF:
166     // Write the table size in the first word.
167     assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
168     uint32_t Size = static_cast<uint32_t>(StringTable.size());
169     support::endian::write<uint32_t, support::little, support::unaligned>(
170         StringTable.data(), Size);
171     break;
172   }
173 
174   Size = StringTable.size();
175 }
176 
177 void StringTableBuilder::clear() {
178   StringTable.clear();
179   StringIndexMap.clear();
180 }
181 
182 size_t StringTableBuilder::getOffset(StringRef S) const {
183   assert(isFinalized());
184   auto I = StringIndexMap.find(S);
185   assert(I != StringIndexMap.end() && "String is not in table!");
186   return I->second;
187 }
188 
189 size_t StringTableBuilder::add(StringRef S) {
190   assert(!isFinalized());
191   size_t Start = alignTo(Size, Alignment);
192   auto P = StringIndexMap.insert(std::make_pair(S, Start));
193   if (P.second)
194     Size = Start + S.size() + (K != RAW);
195   return P.first->second;
196 }
197