1 //===-- MappedHash.h --------------------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef liblldb_MappedHash_h_ 11 #define liblldb_MappedHash_h_ 12 13 #include <assert.h> 14 #include <stdint.h> 15 16 #include <algorithm> 17 #include <functional> 18 #include <map> 19 #include <vector> 20 21 #include "lldb/Utility/DataExtractor.h" 22 #include "lldb/Utility/Stream.h" 23 #include "llvm/Support/DJB.h" 24 25 class MappedHash { 26 public: 27 enum HashFunctionType { 28 eHashFunctionDJB = 0u // Daniel J Bernstein hash function that is also used 29 // by the ELF GNU_HASH sections 30 }; 31 HashString(uint32_t hash_function,llvm::StringRef s)32 static uint32_t HashString(uint32_t hash_function, llvm::StringRef s) { 33 switch (hash_function) { 34 case MappedHash::eHashFunctionDJB: 35 return llvm::djbHash(s); 36 37 default: 38 break; 39 } 40 llvm_unreachable("Invalid hash function index"); 41 } 42 43 static const uint32_t HASH_MAGIC = 0x48415348u; 44 static const uint32_t HASH_CIGAM = 0x48534148u; 45 46 template <typename T> struct Header { 47 typedef T HeaderData; 48 49 uint32_t 50 magic; // HASH_MAGIC or HASH_CIGAM magic value to allow endian detection 51 uint16_t version; // Version number 52 uint16_t hash_function; // The hash function enumeration that was used 53 uint32_t bucket_count; // The number of buckets in this hash table 54 uint32_t hashes_count; // The total number of unique hash values and hash 55 // data offsets in this table 56 uint32_t header_data_len; // The size in bytes of the "header_data" template 57 // member below 58 HeaderData header_data; // 59 HeaderHeader60 Header() 61 : magic(HASH_MAGIC), version(1), hash_function(eHashFunctionDJB), 62 bucket_count(0), hashes_count(0), header_data_len(sizeof(T)), 63 header_data() {} 64 65 virtual ~Header() = default; 66 GetByteSizeHeader67 size_t GetByteSize() const { 68 return sizeof(magic) + sizeof(version) + sizeof(hash_function) + 69 sizeof(bucket_count) + sizeof(hashes_count) + 70 sizeof(header_data_len) + header_data_len; 71 } 72 73 virtual size_t GetByteSize(const HeaderData &header_data) = 0; 74 SetHeaderDataByteSizeHeader75 void SetHeaderDataByteSize(uint32_t header_data_byte_size) { 76 header_data_len = header_data_byte_size; 77 } 78 DumpHeader79 void Dump(lldb_private::Stream &s) { 80 s.Printf("header.magic = 0x%8.8x\n", magic); 81 s.Printf("header.version = 0x%4.4x\n", version); 82 s.Printf("header.hash_function = 0x%4.4x\n", hash_function); 83 s.Printf("header.bucket_count = 0x%8.8x %u\n", bucket_count, 84 bucket_count); 85 s.Printf("header.hashes_count = 0x%8.8x %u\n", hashes_count, 86 hashes_count); 87 s.Printf("header.header_data_len = 0x%8.8x %u\n", header_data_len, 88 header_data_len); 89 } 90 ReadHeader91 virtual lldb::offset_t Read(lldb_private::DataExtractor &data, 92 lldb::offset_t offset) { 93 if (data.ValidOffsetForDataOfSize( 94 offset, sizeof(magic) + sizeof(version) + sizeof(hash_function) + 95 sizeof(bucket_count) + sizeof(hashes_count) + 96 sizeof(header_data_len))) { 97 magic = data.GetU32(&offset); 98 if (magic != HASH_MAGIC) { 99 if (magic == HASH_CIGAM) { 100 switch (data.GetByteOrder()) { 101 case lldb::eByteOrderBig: 102 data.SetByteOrder(lldb::eByteOrderLittle); 103 break; 104 case lldb::eByteOrderLittle: 105 data.SetByteOrder(lldb::eByteOrderBig); 106 break; 107 default: 108 return LLDB_INVALID_OFFSET; 109 } 110 } else { 111 // Magic bytes didn't match 112 version = 0; 113 return LLDB_INVALID_OFFSET; 114 } 115 } 116 117 version = data.GetU16(&offset); 118 if (version != 1) { 119 // Unsupported version 120 return LLDB_INVALID_OFFSET; 121 } 122 hash_function = data.GetU16(&offset); 123 if (hash_function == 4) 124 hash_function = 0; // Deal with pre-release version of this table... 125 bucket_count = data.GetU32(&offset); 126 hashes_count = data.GetU32(&offset); 127 header_data_len = data.GetU32(&offset); 128 return offset; 129 } 130 return LLDB_INVALID_OFFSET; 131 } 132 // 133 // // Returns a buffer that contains a serialized version of this 134 // table 135 // // that must be freed with free(). 136 // virtual void * 137 // Write (int fd); 138 }; 139 140 // A class for reading and using a saved hash table from a block of data 141 // in memory 142 template <typename __KeyType, class __HeaderType, class __HashData> 143 class MemoryTable { 144 public: 145 typedef __HeaderType HeaderType; 146 typedef __KeyType KeyType; 147 typedef __HashData HashData; 148 149 enum Result { 150 eResultKeyMatch = 0u, // The entry was found, key matched and "pair" was 151 // filled in successfully 152 eResultKeyMismatch = 153 1u, // Bucket hash data collision, but key didn't match 154 eResultEndOfHashData = 2u, // The chain of items for this hash data in 155 // this bucket is terminated, search no more 156 eResultError = 3u // Status parsing the hash data, abort 157 }; 158 159 struct Pair { 160 KeyType key; 161 HashData value; 162 }; 163 MemoryTable(lldb_private::DataExtractor & data)164 MemoryTable(lldb_private::DataExtractor &data) 165 : m_header(), m_hash_indexes(nullptr), m_hash_values(nullptr), 166 m_hash_offsets(nullptr) { 167 lldb::offset_t offset = m_header.Read(data, 0); 168 if (offset != LLDB_INVALID_OFFSET && IsValid()) { 169 m_hash_indexes = (const uint32_t *)data.GetData( 170 &offset, m_header.bucket_count * sizeof(uint32_t)); 171 m_hash_values = (const uint32_t *)data.GetData( 172 &offset, m_header.hashes_count * sizeof(uint32_t)); 173 m_hash_offsets = (const uint32_t *)data.GetData( 174 &offset, m_header.hashes_count * sizeof(uint32_t)); 175 } 176 } 177 178 virtual ~MemoryTable() = default; 179 IsValid()180 bool IsValid() const { 181 return m_header.version == 1 && 182 m_header.hash_function == eHashFunctionDJB && 183 m_header.bucket_count > 0; 184 } 185 GetHashIndex(uint32_t bucket_idx)186 uint32_t GetHashIndex(uint32_t bucket_idx) const { 187 uint32_t result = UINT32_MAX; 188 if (m_hash_indexes && bucket_idx < m_header.bucket_count) 189 memcpy(&result, m_hash_indexes + bucket_idx, sizeof(uint32_t)); 190 return result; 191 } 192 GetHashValue(uint32_t hash_idx)193 uint32_t GetHashValue(uint32_t hash_idx) const { 194 uint32_t result = UINT32_MAX; 195 if (m_hash_values && hash_idx < m_header.hashes_count) 196 memcpy(&result, m_hash_values + hash_idx, sizeof(uint32_t)); 197 return result; 198 } 199 GetHashDataOffset(uint32_t hash_idx)200 uint32_t GetHashDataOffset(uint32_t hash_idx) const { 201 uint32_t result = UINT32_MAX; 202 if (m_hash_offsets && hash_idx < m_header.hashes_count) 203 memcpy(&result, m_hash_offsets + hash_idx, sizeof(uint32_t)); 204 return result; 205 } 206 Find(llvm::StringRef name,Pair & pair)207 bool Find(llvm::StringRef name, Pair &pair) const { 208 if (name.empty()) 209 return false; 210 211 if (IsValid()) { 212 const uint32_t bucket_count = m_header.bucket_count; 213 const uint32_t hash_count = m_header.hashes_count; 214 const uint32_t hash_value = 215 MappedHash::HashString(m_header.hash_function, name); 216 const uint32_t bucket_idx = hash_value % bucket_count; 217 uint32_t hash_idx = GetHashIndex(bucket_idx); 218 if (hash_idx < hash_count) { 219 for (; hash_idx < hash_count; ++hash_idx) { 220 const uint32_t curr_hash_value = GetHashValue(hash_idx); 221 if (curr_hash_value == hash_value) { 222 lldb::offset_t hash_data_offset = GetHashDataOffset(hash_idx); 223 while (hash_data_offset != UINT32_MAX) { 224 const lldb::offset_t prev_hash_data_offset = hash_data_offset; 225 Result hash_result = 226 GetHashDataForName(name, &hash_data_offset, pair); 227 // Check the result of getting our hash data 228 switch (hash_result) { 229 case eResultKeyMatch: 230 return true; 231 232 case eResultKeyMismatch: 233 if (prev_hash_data_offset == hash_data_offset) 234 return false; 235 break; 236 237 case eResultEndOfHashData: 238 // The last HashData for this key has been reached, stop 239 // searching 240 return false; 241 case eResultError: 242 // Status parsing the hash data, abort 243 return false; 244 } 245 } 246 } 247 if ((curr_hash_value % bucket_count) != bucket_idx) 248 break; 249 } 250 } 251 } 252 return false; 253 } 254 255 // This method must be implemented in any subclasses. The KeyType is user 256 // specified and must somehow result in a string value. For example, the 257 // KeyType might be a string offset in a string table and subclasses can 258 // store their string table as a member of the subclass and return a valie 259 // "const char *" given a "key". The value could also be a C string 260 // pointer, in which case just returning "key" will suffice. 261 virtual const char *GetStringForKeyType(KeyType key) const = 0; 262 263 virtual bool ReadHashData(uint32_t hash_data_offset, 264 HashData &hash_data) const = 0; 265 266 // This method must be implemented in any subclasses and it must try to 267 // read one "Pair" at the offset pointed to by the "hash_data_offset_ptr" 268 // parameter. This offset should be updated as bytes are consumed and a 269 // value "Result" enum should be returned. If the "name" matches the full 270 // name for the "pair.key" (which must be filled in by this call), then the 271 // HashData in the pair ("pair.value") should be extracted and filled in 272 // and "eResultKeyMatch" should be returned. If "name" doesn't match this 273 // string for the key, then "eResultKeyMismatch" should be returned and all 274 // data for the current HashData must be consumed or skipped and the 275 // "hash_data_offset_ptr" offset needs to be updated to point to the next 276 // HashData. If the end of the HashData objects for a given hash value have 277 // been reached, then "eResultEndOfHashData" should be returned. If 278 // anything else goes wrong during parsing, return "eResultError" and the 279 // corresponding "Find()" function will be canceled and return false. 280 virtual Result GetHashDataForName(llvm::StringRef name, 281 lldb::offset_t *hash_data_offset_ptr, 282 Pair &pair) const = 0; 283 GetHeader()284 const HeaderType &GetHeader() { return m_header; } 285 ForEach(std::function<bool (const HashData & hash_data)> const & callback)286 void ForEach( 287 std::function<bool(const HashData &hash_data)> const &callback) const { 288 const size_t num_hash_offsets = m_header.hashes_count; 289 for (size_t i = 0; i < num_hash_offsets; ++i) { 290 uint32_t hash_data_offset = GetHashDataOffset(i); 291 if (hash_data_offset != UINT32_MAX) { 292 HashData hash_data; 293 if (ReadHashData(hash_data_offset, hash_data)) { 294 // If the callback returns false, then we are done and should stop 295 if (callback(hash_data) == false) 296 return; 297 } 298 } 299 } 300 } 301 302 protected: 303 // Implementation agnostic information 304 HeaderType m_header; 305 const uint32_t *m_hash_indexes; 306 const uint32_t *m_hash_values; 307 const uint32_t *m_hash_offsets; 308 }; 309 }; 310 311 #endif // liblldb_MappedHash_h_ 312