1d3a6c897SEugene Zelenko //===- StringTableBuilder.cpp - String table building utility -------------===//
297de474aSRafael Espindola //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
697de474aSRafael Espindola //
797de474aSRafael Espindola //===----------------------------------------------------------------------===//
897de474aSRafael Espindola 
96bda14b3SChandler Carruth #include "llvm/MC/StringTableBuilder.h"
10*983565a6SNikita Popov #include "llvm/ADT/ArrayRef.h"
11d3a6c897SEugene Zelenko #include "llvm/ADT/CachedHashString.h"
12ee34a734SJustin Lebar #include "llvm/ADT/SmallString.h"
13d3a6c897SEugene Zelenko #include "llvm/ADT/StringRef.h"
14264b5d9eSZachary Turner #include "llvm/BinaryFormat/COFF.h"
15f26bfc16SHans Wennborg #include "llvm/Support/Endian.h"
16d3a6c897SEugene Zelenko #include "llvm/Support/MathExtras.h"
1739751afcSRafael Espindola #include "llvm/Support/raw_ostream.h"
18d3a6c897SEugene Zelenko #include <cassert>
19d3a6c897SEugene Zelenko #include <cstddef>
20d3a6c897SEugene Zelenko #include <cstdint>
21d3a6c897SEugene Zelenko #include <cstring>
22d3a6c897SEugene Zelenko #include <utility>
23c55a5041SZachary Turner #include <vector>
24c55a5041SZachary Turner 
2597de474aSRafael Espindola using namespace llvm;
2697de474aSRafael Espindola 
27d3a6c897SEugene Zelenko StringTableBuilder::~StringTableBuilder() = default;
2839751afcSRafael Espindola 
initSize()2939751afcSRafael Espindola void StringTableBuilder::initSize() {
302214ed89SReid Kleckner   // Account for leading bytes in table so that offsets returned from add are
312214ed89SReid Kleckner   // correct.
322214ed89SReid Kleckner   switch (K) {
332214ed89SReid Kleckner   case RAW:
341f90029aSPaul Robinson   case DWARF:
352214ed89SReid Kleckner     Size = 0;
362214ed89SReid Kleckner     break;
3727e11d71SAlexander Shaposhnikov   case MachOLinked:
3827e11d71SAlexander Shaposhnikov   case MachO64Linked:
3927e11d71SAlexander Shaposhnikov     Size = 2;
4027e11d71SAlexander Shaposhnikov     break;
412214ed89SReid Kleckner   case MachO:
4227e11d71SAlexander Shaposhnikov   case MachO64:
432214ed89SReid Kleckner   case ELF:
4439751afcSRafael Espindola     // Start the table with a NUL byte.
452214ed89SReid Kleckner     Size = 1;
462214ed89SReid Kleckner     break;
471e46d4ceSSean Fertile   case XCOFF:
482214ed89SReid Kleckner   case WinCOFF:
4939751afcSRafael Espindola     // Make room to write the table size later.
502214ed89SReid Kleckner     Size = 4;
512214ed89SReid Kleckner     break;
522214ed89SReid Kleckner   }
532214ed89SReid Kleckner }
5421956e40SRafael Espindola 
StringTableBuilder(Kind K,unsigned Alignment)5539751afcSRafael Espindola StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
5639751afcSRafael Espindola     : K(K), Alignment(Alignment) {
5739751afcSRafael Espindola   initSize();
5839751afcSRafael Espindola }
5939751afcSRafael Espindola 
write(raw_ostream & OS) const6039751afcSRafael Espindola void StringTableBuilder::write(raw_ostream &OS) const {
6139751afcSRafael Espindola   assert(isFinalized());
6239751afcSRafael Espindola   SmallString<0> Data;
6339751afcSRafael Espindola   Data.resize(getSize());
6428a2edefSPeter Collingbourne   write((uint8_t *)Data.data());
6539751afcSRafael Espindola   OS << Data;
6639751afcSRafael Espindola }
6739751afcSRafael Espindola 
687975b99fSEugene Zelenko using StringPair = std::pair<CachedHashStringRef, size_t>;
6939751afcSRafael Espindola 
write(uint8_t * Buf) const7039751afcSRafael Espindola void StringTableBuilder::write(uint8_t *Buf) const {
7139751afcSRafael Espindola   assert(isFinalized());
7239751afcSRafael Espindola   for (const StringPair &P : StringIndexMap) {
7339751afcSRafael Espindola     StringRef Data = P.first.val();
7424db10d8SRafael Espindola     if (!Data.empty())
7539751afcSRafael Espindola       memcpy(Buf + P.second, Data.data(), Data.size());
7639751afcSRafael Espindola   }
771e46d4ceSSean Fertile   // The COFF formats store the size of the string table in the first 4 bytes.
781e46d4ceSSean Fertile   // For Windows, the format is little-endian; for AIX, it is big-endian.
791e46d4ceSSean Fertile   if (K == WinCOFF)
8039751afcSRafael Espindola     support::endian::write32le(Buf, Size);
811e46d4ceSSean Fertile   else if (K == XCOFF)
821e46d4ceSSean Fertile     support::endian::write32be(Buf, Size);
8339751afcSRafael Espindola }
84df94852aSRui Ueyama 
85df94852aSRui Ueyama // Returns the character at Pos from end of a string.
charTailAt(StringPair * P,size_t Pos)86df94852aSRui Ueyama static int charTailAt(StringPair *P, size_t Pos) {
8739751afcSRafael Espindola   StringRef S = P->first.val();
88df94852aSRui Ueyama   if (Pos >= S.size())
89df94852aSRui Ueyama     return -1;
90df94852aSRui Ueyama   return (unsigned char)S[S.size() - Pos - 1];
918e77dbbfSAkira Hatanaka }
92df94852aSRui Ueyama 
93df94852aSRui Ueyama // Three-way radix quicksort. This is much faster than std::sort with strcmp
94df94852aSRui Ueyama // because it does not compare characters that we already know the same.
multikeySort(MutableArrayRef<StringPair * > Vec,int Pos)9515b83279SRui Ueyama static void multikeySort(MutableArrayRef<StringPair *> Vec, int Pos) {
96df94852aSRui Ueyama tailcall:
9715b83279SRui Ueyama   if (Vec.size() <= 1)
98df94852aSRui Ueyama     return;
99df94852aSRui Ueyama 
10015b83279SRui Ueyama   // Partition items so that items in [0, I) are greater than the pivot,
10115b83279SRui Ueyama   // [I, J) are the same as the pivot, and [J, Vec.size()) are less than
10215b83279SRui Ueyama   // the pivot.
10315b83279SRui Ueyama   int Pivot = charTailAt(Vec[0], Pos);
10415b83279SRui Ueyama   size_t I = 0;
10515b83279SRui Ueyama   size_t J = Vec.size();
10615b83279SRui Ueyama   for (size_t K = 1; K < J;) {
10715b83279SRui Ueyama     int C = charTailAt(Vec[K], Pos);
108df94852aSRui Ueyama     if (C > Pivot)
10915b83279SRui Ueyama       std::swap(Vec[I++], Vec[K++]);
110df94852aSRui Ueyama     else if (C < Pivot)
11115b83279SRui Ueyama       std::swap(Vec[--J], Vec[K]);
112df94852aSRui Ueyama     else
11315b83279SRui Ueyama       K++;
114df94852aSRui Ueyama   }
115df94852aSRui Ueyama 
11615b83279SRui Ueyama   multikeySort(Vec.slice(0, I), Pos);
11715b83279SRui Ueyama   multikeySort(Vec.slice(J), Pos);
11815b83279SRui Ueyama 
11915b83279SRui Ueyama   // multikeySort(Vec.slice(I, J - I), Pos + 1), but with
12015b83279SRui Ueyama   // tail call optimization.
121df94852aSRui Ueyama   if (Pivot != -1) {
12215b83279SRui Ueyama     Vec = Vec.slice(I, J - I);
123df94852aSRui Ueyama     ++Pos;
124df94852aSRui Ueyama     goto tailcall;
125df94852aSRui Ueyama   }
1268e77dbbfSAkira Hatanaka }
1278e77dbbfSAkira Hatanaka 
finalize()12821956e40SRafael Espindola void StringTableBuilder::finalize() {
1291f90029aSPaul Robinson   assert(K != DWARF);
1302214ed89SReid Kleckner   finalizeStringTable(/*Optimize=*/true);
1312214ed89SReid Kleckner }
1322214ed89SReid Kleckner 
finalizeInOrder()1332214ed89SReid Kleckner void StringTableBuilder::finalizeInOrder() {
1342214ed89SReid Kleckner   finalizeStringTable(/*Optimize=*/false);
1352214ed89SReid Kleckner }
1362214ed89SReid Kleckner 
finalizeStringTable(bool Optimize)1372214ed89SReid Kleckner void StringTableBuilder::finalizeStringTable(bool Optimize) {
13839751afcSRafael Espindola   Finalized = true;
13939751afcSRafael Espindola 
14039751afcSRafael Espindola   if (Optimize) {
141fda3dc92SRafael Espindola     std::vector<StringPair *> Strings;
142f26bfc16SHans Wennborg     Strings.reserve(StringIndexMap.size());
143fda3dc92SRafael Espindola     for (StringPair &P : StringIndexMap)
144e015f66aSRafael Espindola       Strings.push_back(&P);
14597de474aSRafael Espindola 
14615b83279SRui Ueyama     multikeySort(Strings, 0);
14739751afcSRafael Espindola     initSize();
14897de474aSRafael Espindola 
14997de474aSRafael Espindola     StringRef Previous;
150fda3dc92SRafael Espindola     for (StringPair *P : Strings) {
15139751afcSRafael Espindola       StringRef S = P->first.val();
15239751afcSRafael Espindola       if (Previous.endswith(S)) {
15339751afcSRafael Espindola         size_t Pos = Size - S.size() - (K != RAW);
154758de9caSRafael Espindola         if (!(Pos & (Alignment - 1))) {
155758de9caSRafael Espindola           P->second = Pos;
15697de474aSRafael Espindola           continue;
15797de474aSRafael Espindola         }
158758de9caSRafael Espindola       }
15997de474aSRafael Espindola 
16039751afcSRafael Espindola       Size = alignTo(Size, Alignment);
16139751afcSRafael Espindola       P->second = Size;
1622214ed89SReid Kleckner 
16339751afcSRafael Espindola       Size += S.size();
16421956e40SRafael Espindola       if (K != RAW)
16539751afcSRafael Espindola         ++Size;
166a9b3944cSRafael Espindola       Previous = S;
16797de474aSRafael Espindola     }
168f26bfc16SHans Wennborg   }
16921956e40SRafael Espindola 
17027e11d71SAlexander Shaposhnikov   if (K == MachO || K == MachOLinked)
17139751afcSRafael Espindola     Size = alignTo(Size, 4); // Pad to multiple of 4.
17227e11d71SAlexander Shaposhnikov   if (K == MachO64 || K == MachO64Linked)
17327e11d71SAlexander Shaposhnikov     Size = alignTo(Size, 8); // Pad to multiple of 8.
17427e11d71SAlexander Shaposhnikov 
17527e11d71SAlexander Shaposhnikov   // According to ld64 the string table of a final linked Mach-O binary starts
17627e11d71SAlexander Shaposhnikov   // with " ", i.e. the first byte is ' ' and the second byte is zero. In
17727e11d71SAlexander Shaposhnikov   // 'initSize()' we reserved the first two bytes for holding this string.
17827e11d71SAlexander Shaposhnikov   if (K == MachOLinked || K == MachO64Linked)
17927e11d71SAlexander Shaposhnikov     StringIndexMap[CachedHashStringRef(" ")] = 0;
1801ed6a745SGeorge Rimar 
1811ed6a745SGeorge Rimar   // The first byte in an ELF string table must be null, according to the ELF
1821ed6a745SGeorge Rimar   // specification. In 'initSize()' we reserved the first byte to hold null for
1831ed6a745SGeorge Rimar   // this purpose and here we actually add the string to allow 'getOffset()' to
1841ed6a745SGeorge Rimar   // be called on an empty string.
1851ed6a745SGeorge Rimar   if (K == ELF)
1861ed6a745SGeorge Rimar     StringIndexMap[CachedHashStringRef("")] = 0;
187f26bfc16SHans Wennborg }
188f26bfc16SHans Wennborg 
clear()189f26bfc16SHans Wennborg void StringTableBuilder::clear() {
19039751afcSRafael Espindola   Finalized = false;
191f26bfc16SHans Wennborg   StringIndexMap.clear();
19297de474aSRafael Espindola }
193fc063e8fSRafael Espindola 
getOffset(CachedHashStringRef S) const194ee34a734SJustin Lebar size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
195fc063e8fSRafael Espindola   assert(isFinalized());
196a9b3944cSRafael Espindola   auto I = StringIndexMap.find(S);
197fc063e8fSRafael Espindola   assert(I != StringIndexMap.end() && "String is not in table!");
198fc063e8fSRafael Espindola   return I->second;
199fc063e8fSRafael Espindola }
200fc063e8fSRafael Espindola 
add(CachedHashStringRef S)201ee34a734SJustin Lebar size_t StringTableBuilder::add(CachedHashStringRef S) {
20239751afcSRafael Espindola   if (K == WinCOFF)
20339751afcSRafael Espindola     assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
20439751afcSRafael Espindola 
205fc063e8fSRafael Espindola   assert(!isFinalized());
2064a2ef911SRafael Espindola   auto P = StringIndexMap.insert(std::make_pair(S, 0));
2074a2ef911SRafael Espindola   if (P.second) {
208758de9caSRafael Espindola     size_t Start = alignTo(Size, Alignment);
2094a2ef911SRafael Espindola     P.first->second = Start;
210758de9caSRafael Espindola     Size = Start + S.size() + (K != RAW);
2114a2ef911SRafael Espindola   }
21221956e40SRafael Espindola   return P.first->second;
213fc063e8fSRafael Espindola }
214