1 //===- HashTable.cpp - PDB Hash Table -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "llvm/DebugInfo/PDB/Native/HashTable.h" 11 #include "llvm/ADT/Optional.h" 12 #include "llvm/DebugInfo/PDB/Native/RawError.h" 13 #include "llvm/Support/BinaryStreamReader.h" 14 #include "llvm/Support/BinaryStreamWriter.h" 15 #include "llvm/Support/Error.h" 16 #include "llvm/Support/MathExtras.h" 17 #include <algorithm> 18 #include <cassert> 19 #include <cstdint> 20 #include <utility> 21 22 using namespace llvm; 23 using namespace llvm::pdb; 24 25 HashTable::HashTable() : HashTable(8) {} 26 27 HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } 28 29 Error HashTable::load(BinaryStreamReader &Stream) { 30 const Header *H; 31 if (auto EC = Stream.readObject(H)) 32 return EC; 33 if (H->Capacity == 0) 34 return make_error<RawError>(raw_error_code::corrupt_file, 35 "Invalid Hash Table Capacity"); 36 if (H->Size > maxLoad(H->Capacity)) 37 return make_error<RawError>(raw_error_code::corrupt_file, 38 "Invalid Hash Table Size"); 39 40 Buckets.resize(H->Capacity); 41 42 if (auto EC = readSparseBitVector(Stream, Present)) 43 return EC; 44 if (Present.count() != H->Size) 45 return make_error<RawError>(raw_error_code::corrupt_file, 46 "Present bit vector does not match size!"); 47 48 if (auto EC = readSparseBitVector(Stream, Deleted)) 49 return EC; 50 if (Present.intersects(Deleted)) 51 return make_error<RawError>(raw_error_code::corrupt_file, 52 "Present bit vector interesects deleted!"); 53 54 for (uint32_t P : Present) { 55 if (auto EC = Stream.readInteger(Buckets[P].first)) 56 return EC; 57 if (auto EC = Stream.readInteger(Buckets[P].second)) 58 return EC; 59 } 60 61 return Error::success(); 62 } 63 64 uint32_t HashTable::calculateSerializedLength() const { 65 uint32_t Size = sizeof(Header); 66 67 int NumBitsP = Present.find_last() + 1; 68 int NumBitsD = Deleted.find_last() + 1; 69 70 // Present bit set number of words, followed by that many actual words. 71 Size += sizeof(uint32_t); 72 Size += alignTo(NumBitsP, sizeof(uint32_t)); 73 74 // Deleted bit set number of words, followed by that many actual words. 75 Size += sizeof(uint32_t); 76 Size += alignTo(NumBitsD, sizeof(uint32_t)); 77 78 // One (Key, Value) pair for each entry Present. 79 Size += 2 * sizeof(uint32_t) * size(); 80 81 return Size; 82 } 83 84 Error HashTable::commit(BinaryStreamWriter &Writer) const { 85 Header H; 86 H.Size = size(); 87 H.Capacity = capacity(); 88 if (auto EC = Writer.writeObject(H)) 89 return EC; 90 91 if (auto EC = writeSparseBitVector(Writer, Present)) 92 return EC; 93 94 if (auto EC = writeSparseBitVector(Writer, Deleted)) 95 return EC; 96 97 for (const auto &Entry : *this) { 98 if (auto EC = Writer.writeInteger(Entry.first)) 99 return EC; 100 if (auto EC = Writer.writeInteger(Entry.second)) 101 return EC; 102 } 103 return Error::success(); 104 } 105 106 void HashTable::clear() { 107 Buckets.resize(8); 108 Present.clear(); 109 Deleted.clear(); 110 } 111 112 uint32_t HashTable::capacity() const { return Buckets.size(); } 113 114 uint32_t HashTable::size() const { return Present.count(); } 115 116 HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } 117 118 HashTableIterator HashTable::end() const { 119 return HashTableIterator(*this, 0, true); 120 } 121 122 HashTableIterator HashTable::find(uint32_t K) { 123 uint32_t H = K % capacity(); 124 uint32_t I = H; 125 Optional<uint32_t> FirstUnused; 126 do { 127 if (isPresent(I)) { 128 if (Buckets[I].first == K) 129 return HashTableIterator(*this, I, false); 130 } else { 131 if (!FirstUnused) 132 FirstUnused = I; 133 // Insertion occurs via linear probing from the slot hint, and will be 134 // inserted at the first empty / deleted location. Therefore, if we are 135 // probing and find a location that is neither present nor deleted, then 136 // nothing must have EVER been inserted at this location, and thus it is 137 // not possible for a matching value to occur later. 138 if (!isDeleted(I)) 139 break; 140 } 141 I = (I + 1) % capacity(); 142 } while (I != H); 143 144 // The only way FirstUnused would not be set is if every single entry in the 145 // table were Present. But this would violate the load factor constraints 146 // that we impose, so it should never happen. 147 assert(FirstUnused); 148 return HashTableIterator(*this, *FirstUnused, true); 149 } 150 151 void HashTable::set(uint32_t K, uint32_t V) { 152 auto Entry = find(K); 153 if (Entry != end()) { 154 assert(isPresent(Entry.index())); 155 assert(Buckets[Entry.index()].first == K); 156 // We're updating, no need to do anything special. 157 Buckets[Entry.index()].second = V; 158 return; 159 } 160 161 auto &B = Buckets[Entry.index()]; 162 assert(!isPresent(Entry.index())); 163 assert(Entry.isEnd()); 164 B.first = K; 165 B.second = V; 166 Present.set(Entry.index()); 167 Deleted.reset(Entry.index()); 168 169 grow(); 170 171 assert(find(K) != end()); 172 } 173 174 void HashTable::remove(uint32_t K) { 175 auto Iter = find(K); 176 // It wasn't here to begin with, just exit. 177 if (Iter == end()) 178 return; 179 180 assert(Present.test(Iter.index())); 181 assert(!Deleted.test(Iter.index())); 182 Deleted.set(Iter.index()); 183 Present.reset(Iter.index()); 184 } 185 186 uint32_t HashTable::get(uint32_t K) { 187 auto I = find(K); 188 assert(I != end()); 189 return (*I).second; 190 } 191 192 uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } 193 194 void HashTable::grow() { 195 uint32_t S = size(); 196 if (S < maxLoad(capacity())) 197 return; 198 assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); 199 200 uint32_t NewCapacity = 201 (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; 202 203 // Growing requires rebuilding the table and re-hashing every item. Make a 204 // copy with a larger capacity, insert everything into the copy, then swap 205 // it in. 206 HashTable NewMap(NewCapacity); 207 for (auto I : Present) { 208 NewMap.set(Buckets[I].first, Buckets[I].second); 209 } 210 211 Buckets.swap(NewMap.Buckets); 212 std::swap(Present, NewMap.Present); 213 std::swap(Deleted, NewMap.Deleted); 214 assert(capacity() == NewCapacity); 215 assert(size() == S); 216 } 217 218 Error HashTable::readSparseBitVector(BinaryStreamReader &Stream, 219 SparseBitVector<> &V) { 220 uint32_t NumWords; 221 if (auto EC = Stream.readInteger(NumWords)) 222 return joinErrors( 223 std::move(EC), 224 make_error<RawError>(raw_error_code::corrupt_file, 225 "Expected hash table number of words")); 226 227 for (uint32_t I = 0; I != NumWords; ++I) { 228 uint32_t Word; 229 if (auto EC = Stream.readInteger(Word)) 230 return joinErrors(std::move(EC), 231 make_error<RawError>(raw_error_code::corrupt_file, 232 "Expected hash table word")); 233 for (unsigned Idx = 0; Idx < 32; ++Idx) 234 if (Word & (1U << Idx)) 235 V.set((I * 32) + Idx); 236 } 237 return Error::success(); 238 } 239 240 Error HashTable::writeSparseBitVector(BinaryStreamWriter &Writer, 241 SparseBitVector<> &Vec) { 242 int ReqBits = Vec.find_last() + 1; 243 uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t); 244 if (auto EC = Writer.writeInteger(NumWords)) 245 return joinErrors( 246 std::move(EC), 247 make_error<RawError>(raw_error_code::corrupt_file, 248 "Could not write linear map number of words")); 249 250 uint32_t Idx = 0; 251 for (uint32_t I = 0; I != NumWords; ++I) { 252 uint32_t Word = 0; 253 for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { 254 if (Vec.test(Idx)) 255 Word |= (1 << WordIdx); 256 } 257 if (auto EC = Writer.writeInteger(Word)) 258 return joinErrors(std::move(EC), make_error<RawError>( 259 raw_error_code::corrupt_file, 260 "Could not write linear map word")); 261 } 262 return Error::success(); 263 } 264 265 HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index, 266 bool IsEnd) 267 : Map(&Map), Index(Index), IsEnd(IsEnd) {} 268 269 HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) { 270 int I = Map.Present.find_first(); 271 if (I == -1) { 272 Index = 0; 273 IsEnd = true; 274 } else { 275 Index = static_cast<uint32_t>(I); 276 IsEnd = false; 277 } 278 } 279 280 HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) { 281 Map = R.Map; 282 return *this; 283 } 284 285 bool HashTableIterator::operator==(const HashTableIterator &R) const { 286 if (IsEnd && R.IsEnd) 287 return true; 288 if (IsEnd != R.IsEnd) 289 return false; 290 291 return (Map == R.Map) && (Index == R.Index); 292 } 293 294 const std::pair<uint32_t, uint32_t> &HashTableIterator::operator*() const { 295 assert(Map->Present.test(Index)); 296 return Map->Buckets[Index]; 297 } 298 299 HashTableIterator &HashTableIterator::operator++() { 300 while (Index < Map->Buckets.size()) { 301 ++Index; 302 if (Map->Present.test(Index)) 303 return *this; 304 } 305 306 IsEnd = true; 307 return *this; 308 } 309