1 //===--- MultiOnDiskHashTable.h - Merged set of hash tables -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file provides a hash table data structure suitable for incremental and 11 // distributed storage across a set of files. 12 // 13 // Multiple hash tables from different files are implicitly merged to improve 14 // performance, and on reload the merged table will override those from other 15 // files. 16 // 17 //===----------------------------------------------------------------------===// 18 #ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H 19 #define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H 20 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/PointerUnion.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/TinyPtrVector.h" 26 #include "llvm/Support/EndianStream.h" 27 #include "llvm/Support/OnDiskHashTable.h" 28 29 namespace clang { 30 namespace serialization { 31 32 class ModuleFile; 33 34 /// \brief A collection of on-disk hash tables, merged when relevant for performance. 35 template<typename Info> class MultiOnDiskHashTable { 36 public: 37 /// A handle to a file, used when overriding tables. 38 typedef typename Info::file_type file_type; 39 /// A pointer to an on-disk representation of the hash table. 40 typedef const unsigned char *storage_type; 41 42 typedef typename Info::external_key_type external_key_type; 43 typedef typename Info::internal_key_type internal_key_type; 44 typedef typename Info::data_type data_type; 45 typedef typename Info::data_type_builder data_type_builder; 46 typedef unsigned hash_value_type; 47 48 private: 49 /// \brief A hash table stored on disk. 50 struct OnDiskTable { 51 typedef llvm::OnDiskIterableChainedHashTable<Info> HashTable; 52 53 file_type File; 54 HashTable Table; 55 56 OnDiskTable(file_type File, unsigned NumBuckets, unsigned NumEntries, 57 storage_type Buckets, storage_type Payload, storage_type Base, 58 const Info &InfoObj) 59 : File(File), 60 Table(NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj) {} 61 }; 62 63 struct MergedTable { 64 std::vector<file_type> Files; 65 llvm::DenseMap<internal_key_type, data_type> Data; 66 }; 67 68 typedef llvm::PointerUnion<OnDiskTable*, MergedTable*> Table; 69 typedef llvm::TinyPtrVector<void*> TableVector; 70 71 /// \brief The current set of on-disk and merged tables. 72 /// We manually store the opaque value of the Table because TinyPtrVector 73 /// can't cope with holding a PointerUnion directly. 74 /// There can be at most one MergedTable in this vector, and if present, 75 /// it is the first table. 76 TableVector Tables; 77 78 /// \brief Files corresponding to overridden tables that we've not yet 79 /// discarded. 80 llvm::TinyPtrVector<file_type> PendingOverrides; 81 82 struct AsOnDiskTable { 83 typedef OnDiskTable *result_type; 84 result_type operator()(void *P) const { 85 return Table::getFromOpaqueValue(P).template get<OnDiskTable *>(); 86 } 87 }; 88 typedef llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable> 89 table_iterator; 90 typedef llvm::iterator_range<table_iterator> table_range; 91 92 /// \brief The current set of on-disk tables. 93 table_range tables() { 94 auto Begin = Tables.begin(), End = Tables.end(); 95 if (getMergedTable()) 96 ++Begin; 97 return llvm::make_range(llvm::map_iterator(Begin, AsOnDiskTable()), 98 llvm::map_iterator(End, AsOnDiskTable())); 99 } 100 101 MergedTable *getMergedTable() const { 102 // If we already have a merged table, it's the first one. 103 return Tables.empty() ? nullptr : Table::getFromOpaqueValue(*Tables.begin()) 104 .template dyn_cast<MergedTable*>(); 105 } 106 107 /// \brief Delete all our current on-disk tables. 108 void clear() { 109 for (auto *T : tables()) 110 delete T; 111 if (auto *M = getMergedTable()) 112 delete M; 113 Tables.clear(); 114 } 115 116 void removeOverriddenTables() { 117 llvm::DenseSet<file_type> Files; 118 Files.insert(PendingOverrides.begin(), PendingOverrides.end()); 119 // Explicitly capture Files to work around an MSVC 2015 rejects-valid bug. 120 auto ShouldRemove = [&Files](void *T) -> bool { 121 auto *ODT = Table::getFromOpaqueValue(T).template get<OnDiskTable *>(); 122 bool Remove = Files.count(ODT->File); 123 if (Remove) 124 delete ODT; 125 return Remove; 126 }; 127 Tables.erase(std::remove_if(tables().begin().getCurrent(), Tables.end(), 128 ShouldRemove), 129 Tables.end()); 130 PendingOverrides.clear(); 131 } 132 133 void condense() { 134 MergedTable *Merged = getMergedTable(); 135 if (!Merged) 136 Merged = new MergedTable; 137 138 // Read in all the tables and merge them together. 139 // FIXME: Be smarter about which tables we merge. 140 for (auto *ODT : tables()) { 141 auto &HT = ODT->Table; 142 Info &InfoObj = HT.getInfoObj(); 143 144 for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) { 145 auto *LocalPtr = I.getItem(); 146 147 // FIXME: Don't rely on the OnDiskHashTable format here. 148 auto L = InfoObj.ReadKeyDataLength(LocalPtr); 149 const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); 150 data_type_builder ValueBuilder(Merged->Data[Key]); 151 InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, 152 ValueBuilder); 153 } 154 155 Merged->Files.push_back(ODT->File); 156 delete ODT; 157 } 158 159 Tables.clear(); 160 Tables.push_back(Table(Merged).getOpaqueValue()); 161 } 162 163 /// The generator is permitted to read our merged table. 164 template<typename ReaderInfo, typename WriterInfo> 165 friend class MultiOnDiskHashTableGenerator; 166 167 public: 168 MultiOnDiskHashTable() {} 169 MultiOnDiskHashTable(MultiOnDiskHashTable &&O) 170 : Tables(std::move(O.Tables)), 171 PendingOverrides(std::move(O.PendingOverrides)) { 172 O.Tables.clear(); 173 } 174 MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) { 175 if (&O == this) 176 return *this; 177 clear(); 178 Tables = std::move(O.Tables); 179 O.Tables.clear(); 180 PendingOverrides = std::move(O.PendingOverrides); 181 return *this; 182 } 183 ~MultiOnDiskHashTable() { clear(); } 184 185 /// \brief Add the table \p Data loaded from file \p File. 186 void add(file_type File, storage_type Data, Info InfoObj = Info()) { 187 using namespace llvm::support; 188 storage_type Ptr = Data; 189 190 uint32_t BucketOffset = endian::readNext<uint32_t, little, unaligned>(Ptr); 191 192 // Read the list of overridden files. 193 uint32_t NumFiles = endian::readNext<uint32_t, little, unaligned>(Ptr); 194 // FIXME: Add a reserve() to TinyPtrVector so that we don't need to make 195 // an additional copy. 196 llvm::SmallVector<file_type, 16> OverriddenFiles; 197 OverriddenFiles.reserve(NumFiles); 198 for (/**/; NumFiles != 0; --NumFiles) 199 OverriddenFiles.push_back(InfoObj.ReadFileRef(Ptr)); 200 PendingOverrides.insert(PendingOverrides.end(), OverriddenFiles.begin(), 201 OverriddenFiles.end()); 202 203 // Read the OnDiskChainedHashTable header. 204 storage_type Buckets = Data + BucketOffset; 205 auto NumBucketsAndEntries = 206 OnDiskTable::HashTable::readNumBucketsAndEntries(Buckets); 207 208 // Register the table. 209 Table NewTable = new OnDiskTable(File, NumBucketsAndEntries.first, 210 NumBucketsAndEntries.second, 211 Buckets, Ptr, Data, std::move(InfoObj)); 212 Tables.push_back(NewTable.getOpaqueValue()); 213 } 214 215 /// \brief Find and read the lookup results for \p EKey. 216 data_type find(const external_key_type &EKey) { 217 data_type Result; 218 219 if (!PendingOverrides.empty()) 220 removeOverriddenTables(); 221 222 if (Tables.size() > static_cast<unsigned>(Info::MaxTables)) 223 condense(); 224 225 internal_key_type Key = Info::GetInternalKey(EKey); 226 auto KeyHash = Info::ComputeHash(Key); 227 228 if (MergedTable *M = getMergedTable()) { 229 auto It = M->Data.find(Key); 230 if (It != M->Data.end()) 231 Result = It->second; 232 } 233 234 data_type_builder ResultBuilder(Result); 235 236 for (auto *ODT : tables()) { 237 auto &HT = ODT->Table; 238 auto It = HT.find_hashed(Key, KeyHash); 239 if (It != HT.end()) 240 HT.getInfoObj().ReadDataInto(Key, It.getDataPtr(), It.getDataLen(), 241 ResultBuilder); 242 } 243 244 return Result; 245 } 246 247 /// \brief Read all the lookup results into a single value. This only makes 248 /// sense if merging values across keys is meaningful. 249 data_type findAll() { 250 data_type Result; 251 data_type_builder ResultBuilder(Result); 252 253 if (!PendingOverrides.empty()) 254 removeOverriddenTables(); 255 256 if (MergedTable *M = getMergedTable()) { 257 for (auto &KV : M->Data) 258 Info::MergeDataInto(KV.second, ResultBuilder); 259 } 260 261 for (auto *ODT : tables()) { 262 auto &HT = ODT->Table; 263 Info &InfoObj = HT.getInfoObj(); 264 for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) { 265 auto *LocalPtr = I.getItem(); 266 267 // FIXME: Don't rely on the OnDiskHashTable format here. 268 auto L = InfoObj.ReadKeyDataLength(LocalPtr); 269 const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first); 270 InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, ResultBuilder); 271 } 272 } 273 274 return Result; 275 } 276 }; 277 278 /// \brief Writer for the on-disk hash table. 279 template<typename ReaderInfo, typename WriterInfo> 280 class MultiOnDiskHashTableGenerator { 281 typedef MultiOnDiskHashTable<ReaderInfo> BaseTable; 282 typedef llvm::OnDiskChainedHashTableGenerator<WriterInfo> Generator; 283 284 Generator Gen; 285 286 public: 287 MultiOnDiskHashTableGenerator() : Gen() {} 288 289 void insert(typename WriterInfo::key_type_ref Key, 290 typename WriterInfo::data_type_ref Data, WriterInfo &Info) { 291 Gen.insert(Key, Data, Info); 292 } 293 294 void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info, 295 const BaseTable *Base) { 296 using namespace llvm::support; 297 llvm::raw_svector_ostream OutStream(Out); 298 299 // Write our header information. 300 { 301 endian::Writer<little> Writer(OutStream); 302 303 // Reserve four bytes for the bucket offset. 304 Writer.write<uint32_t>(0); 305 306 if (auto *Merged = Base ? Base->getMergedTable() : nullptr) { 307 // Write list of overridden files. 308 Writer.write<uint32_t>(Merged->Files.size()); 309 for (const auto &F : Merged->Files) 310 Info.EmitFileRef(OutStream, F); 311 312 // Add all merged entries from Base to the generator. 313 for (auto &KV : Merged->Data) { 314 if (!Gen.contains(KV.first, Info)) 315 Gen.insert(KV.first, Info.ImportData(KV.second), Info); 316 } 317 } else { 318 Writer.write<uint32_t>(0); 319 } 320 } 321 322 // Write the table itself. 323 uint32_t BucketOffset = Gen.Emit(OutStream, Info); 324 325 // Replace the first four bytes with the bucket offset. 326 endian::write32le(Out.data(), BucketOffset); 327 } 328 }; 329 330 } // end namespace clang::serialization 331 } // end namespace clang 332 333 334 #endif 335