1 /* vi:set ts=8 sts=4 sw=4 noet: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * hashtab.c: Handling of a hashtable with Vim-specific properties. 12 * 13 * Each item in a hashtable has a NUL terminated string key. A key can appear 14 * only once in the table. 15 * 16 * A hash number is computed from the key for quick lookup. When the hashes 17 * of two different keys point to the same entry an algorithm is used to 18 * iterate over other entries in the table until the right one is found. 19 * To make the iteration work removed keys are different from entries where a 20 * key was never present. 21 * 22 * The mechanism has been partly based on how Python Dictionaries are 23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4. 24 * 25 * The hashtable grows to accommodate more entries when needed. At least 1/3 26 * of the entries is empty to keep the lookup efficient (at the cost of extra 27 * memory). 28 */ 29 30 #include "vim.h" 31 32 #if 0 33 # define HT_DEBUG /* extra checks for table consistency and statistics */ 34 35 static long hash_count_lookup = 0; /* count number of hashtab lookups */ 36 static long hash_count_perturb = 0; /* count number of "misses" */ 37 #endif 38 39 /* Magic value for algorithm that walks through the array. */ 40 #define PERTURB_SHIFT 5 41 42 static int hash_may_resize(hashtab_T *ht, int minitems); 43 44 #if 0 /* currently not used */ 45 /* 46 * Create an empty hash table. 47 * Returns NULL when out of memory. 48 */ 49 hashtab_T * 50 hash_create(void) 51 { 52 hashtab_T *ht; 53 54 ht = (hashtab_T *)alloc(sizeof(hashtab_T)); 55 if (ht != NULL) 56 hash_init(ht); 57 return ht; 58 } 59 #endif 60 61 /* 62 * Initialize an empty hash table. 63 */ 64 void 65 hash_init(hashtab_T *ht) 66 { 67 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */ 68 vim_memset(ht, 0, sizeof(hashtab_T)); 69 ht->ht_array = ht->ht_smallarray; 70 ht->ht_mask = HT_INIT_SIZE - 1; 71 } 72 73 /* 74 * Free the array of a hash table. Does not free the items it contains! 75 * If "ht" is not freed then you should call hash_init() next! 76 */ 77 void 78 hash_clear(hashtab_T *ht) 79 { 80 if (ht->ht_array != ht->ht_smallarray) 81 vim_free(ht->ht_array); 82 } 83 84 /* 85 * Free the array of a hash table and all the keys it contains. The keys must 86 * have been allocated. "off" is the offset from the start of the allocate 87 * memory to the location of the key (it's always positive). 88 */ 89 void 90 hash_clear_all(hashtab_T *ht, int off) 91 { 92 long todo; 93 hashitem_T *hi; 94 95 todo = (long)ht->ht_used; 96 for (hi = ht->ht_array; todo > 0; ++hi) 97 { 98 if (!HASHITEM_EMPTY(hi)) 99 { 100 vim_free(hi->hi_key - off); 101 --todo; 102 } 103 } 104 hash_clear(ht); 105 } 106 107 /* 108 * Find "key" in hashtable "ht". "key" must not be NULL. 109 * Always returns a pointer to a hashitem. If the item was not found then 110 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key 111 * would be added. 112 * WARNING: The returned pointer becomes invalid when the hashtable is changed 113 * (adding, setting or removing an item)! 114 */ 115 hashitem_T * 116 hash_find(hashtab_T *ht, char_u *key) 117 { 118 return hash_lookup(ht, key, hash_hash(key)); 119 } 120 121 /* 122 * Like hash_find(), but caller computes "hash". 123 */ 124 hashitem_T * 125 hash_lookup(hashtab_T *ht, char_u *key, hash_T hash) 126 { 127 hash_T perturb; 128 hashitem_T *freeitem; 129 hashitem_T *hi; 130 unsigned idx; 131 132 #ifdef HT_DEBUG 133 ++hash_count_lookup; 134 #endif 135 136 /* 137 * Quickly handle the most common situations: 138 * - return if there is no item at all 139 * - skip over a removed item 140 * - return if the item matches 141 */ 142 idx = (unsigned)(hash & ht->ht_mask); 143 hi = &ht->ht_array[idx]; 144 145 if (hi->hi_key == NULL) 146 return hi; 147 if (hi->hi_key == HI_KEY_REMOVED) 148 freeitem = hi; 149 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0) 150 return hi; 151 else 152 freeitem = NULL; 153 154 /* 155 * Need to search through the table to find the key. The algorithm 156 * to step through the table starts with large steps, gradually becoming 157 * smaller down to (1/4 table size + 1). This means it goes through all 158 * table entries in the end. 159 * When we run into a NULL key it's clear that the key isn't there. 160 * Return the first available slot found (can be a slot of a removed 161 * item). 162 */ 163 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) 164 { 165 #ifdef HT_DEBUG 166 ++hash_count_perturb; /* count a "miss" for hashtab lookup */ 167 #endif 168 idx = (unsigned)((idx << 2U) + idx + perturb + 1U); 169 hi = &ht->ht_array[idx & ht->ht_mask]; 170 if (hi->hi_key == NULL) 171 return freeitem == NULL ? hi : freeitem; 172 if (hi->hi_hash == hash 173 && hi->hi_key != HI_KEY_REMOVED 174 && STRCMP(hi->hi_key, key) == 0) 175 return hi; 176 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL) 177 freeitem = hi; 178 } 179 } 180 181 /* 182 * Print the efficiency of hashtable lookups. 183 * Useful when trying different hash algorithms. 184 * Called when exiting. 185 */ 186 void 187 hash_debug_results(void) 188 { 189 #ifdef HT_DEBUG 190 fprintf(stderr, "\r\n\r\n\r\n\r\n"); 191 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup); 192 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb); 193 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n", 194 hash_count_perturb * 100 / hash_count_lookup); 195 #endif 196 } 197 198 /* 199 * Add item with key "key" to hashtable "ht". 200 * Returns FAIL when out of memory or the key is already present. 201 */ 202 int 203 hash_add(hashtab_T *ht, char_u *key) 204 { 205 hash_T hash = hash_hash(key); 206 hashitem_T *hi; 207 208 hi = hash_lookup(ht, key, hash); 209 if (!HASHITEM_EMPTY(hi)) 210 { 211 internal_error("hash_add()"); 212 return FAIL; 213 } 214 return hash_add_item(ht, hi, key, hash); 215 } 216 217 /* 218 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and 219 * "hi" must have been obtained with hash_lookup() and point to an empty item. 220 * "hi" is invalid after this! 221 * Returns OK or FAIL (out of memory). 222 */ 223 int 224 hash_add_item( 225 hashtab_T *ht, 226 hashitem_T *hi, 227 char_u *key, 228 hash_T hash) 229 { 230 /* If resizing failed before and it fails again we can't add an item. */ 231 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL) 232 return FAIL; 233 234 ++ht->ht_used; 235 if (hi->hi_key == NULL) 236 ++ht->ht_filled; 237 hi->hi_key = key; 238 hi->hi_hash = hash; 239 240 /* When the space gets low may resize the array. */ 241 return hash_may_resize(ht, 0); 242 } 243 244 #if 0 /* not used */ 245 /* 246 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that 247 * is to be overwritten. Thus the number of items in the hashtable doesn't 248 * change. 249 * Although the key must be identical, the pointer may be different, thus it's 250 * set anyway (the key is part of an item with that key). 251 * The caller must take care of freeing the old item. 252 * "hi" is invalid after this! 253 */ 254 void 255 hash_set(hashitem_T *hi, char_u *key) 256 { 257 hi->hi_key = key; 258 } 259 #endif 260 261 /* 262 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with 263 * hash_lookup(). 264 * The caller must take care of freeing the item itself. 265 */ 266 void 267 hash_remove(hashtab_T *ht, hashitem_T *hi) 268 { 269 --ht->ht_used; 270 hi->hi_key = HI_KEY_REMOVED; 271 hash_may_resize(ht, 0); 272 } 273 274 /* 275 * Lock a hashtable: prevent that ht_array changes. 276 * Don't use this when items are to be added! 277 * Must call hash_unlock() later. 278 */ 279 void 280 hash_lock(hashtab_T *ht) 281 { 282 ++ht->ht_locked; 283 } 284 285 #if 0 /* currently not used */ 286 /* 287 * Lock a hashtable at the specified number of entries. 288 * Caller must make sure no more than "size" entries will be added. 289 * Must call hash_unlock() later. 290 */ 291 void 292 hash_lock_size(hashtab_T *ht, int size) 293 { 294 (void)hash_may_resize(ht, size); 295 ++ht->ht_locked; 296 } 297 #endif 298 299 /* 300 * Unlock a hashtable: allow ht_array changes again. 301 * Table will be resized (shrink) when necessary. 302 * This must balance a call to hash_lock(). 303 */ 304 void 305 hash_unlock(hashtab_T *ht) 306 { 307 --ht->ht_locked; 308 (void)hash_may_resize(ht, 0); 309 } 310 311 /* 312 * Shrink a hashtable when there is too much empty space. 313 * Grow a hashtable when there is not enough empty space. 314 * Returns OK or FAIL (out of memory). 315 */ 316 static int 317 hash_may_resize( 318 hashtab_T *ht, 319 int minitems) /* minimal number of items */ 320 { 321 hashitem_T temparray[HT_INIT_SIZE]; 322 hashitem_T *oldarray, *newarray; 323 hashitem_T *olditem, *newitem; 324 unsigned newi; 325 int todo; 326 long_u oldsize, newsize; 327 long_u minsize; 328 long_u newmask; 329 hash_T perturb; 330 331 /* Don't resize a locked table. */ 332 if (ht->ht_locked > 0) 333 return OK; 334 335 #ifdef HT_DEBUG 336 if (ht->ht_used > ht->ht_filled) 337 EMSG("hash_may_resize(): more used than filled"); 338 if (ht->ht_filled >= ht->ht_mask + 1) 339 EMSG("hash_may_resize(): table completely filled"); 340 #endif 341 342 if (minitems == 0) 343 { 344 /* Return quickly for small tables with at least two NULL items. NULL 345 * items are required for the lookup to decide a key isn't there. */ 346 if (ht->ht_filled < HT_INIT_SIZE - 1 347 && ht->ht_array == ht->ht_smallarray) 348 return OK; 349 350 /* 351 * Grow or refill the array when it's more than 2/3 full (including 352 * removed items, so that they get cleaned up). 353 * Shrink the array when it's less than 1/5 full. When growing it is 354 * at least 1/4 full (avoids repeated grow-shrink operations) 355 */ 356 oldsize = ht->ht_mask + 1; 357 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5) 358 return OK; 359 360 if (ht->ht_used > 1000) 361 minsize = ht->ht_used * 2; /* it's big, don't make too much room */ 362 else 363 minsize = ht->ht_used * 4; /* make plenty of room */ 364 } 365 else 366 { 367 /* Use specified size. */ 368 if ((long_u)minitems < ht->ht_used) /* just in case... */ 369 minitems = (int)ht->ht_used; 370 minsize = minitems * 3 / 2; /* array is up to 2/3 full */ 371 } 372 373 newsize = HT_INIT_SIZE; 374 while (newsize < minsize) 375 { 376 newsize <<= 1; /* make sure it's always a power of 2 */ 377 if (newsize == 0) 378 return FAIL; /* overflow */ 379 } 380 381 if (newsize == HT_INIT_SIZE) 382 { 383 /* Use the small array inside the hashdict structure. */ 384 newarray = ht->ht_smallarray; 385 if (ht->ht_array == newarray) 386 { 387 /* Moving from ht_smallarray to ht_smallarray! Happens when there 388 * are many removed items. Copy the items to be able to clean up 389 * removed items. */ 390 mch_memmove(temparray, newarray, sizeof(temparray)); 391 oldarray = temparray; 392 } 393 else 394 oldarray = ht->ht_array; 395 } 396 else 397 { 398 /* Allocate an array. */ 399 newarray = (hashitem_T *)alloc((unsigned) 400 (sizeof(hashitem_T) * newsize)); 401 if (newarray == NULL) 402 { 403 /* Out of memory. When there are NULL items still return OK. 404 * Otherwise set ht_error, because lookup may result in a hang if 405 * we add another item. */ 406 if (ht->ht_filled < ht->ht_mask) 407 return OK; 408 ht->ht_error = TRUE; 409 return FAIL; 410 } 411 oldarray = ht->ht_array; 412 } 413 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize)); 414 415 /* 416 * Move all the items from the old array to the new one, placing them in 417 * the right spot. The new array won't have any removed items, thus this 418 * is also a cleanup action. 419 */ 420 newmask = newsize - 1; 421 todo = (int)ht->ht_used; 422 for (olditem = oldarray; todo > 0; ++olditem) 423 if (!HASHITEM_EMPTY(olditem)) 424 { 425 /* 426 * The algorithm to find the spot to add the item is identical to 427 * the algorithm to find an item in hash_lookup(). But we only 428 * need to search for a NULL key, thus it's simpler. 429 */ 430 newi = (unsigned)(olditem->hi_hash & newmask); 431 newitem = &newarray[newi]; 432 433 if (newitem->hi_key != NULL) 434 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT) 435 { 436 newi = (unsigned)((newi << 2U) + newi + perturb + 1U); 437 newitem = &newarray[newi & newmask]; 438 if (newitem->hi_key == NULL) 439 break; 440 } 441 *newitem = *olditem; 442 --todo; 443 } 444 445 if (ht->ht_array != ht->ht_smallarray) 446 vim_free(ht->ht_array); 447 ht->ht_array = newarray; 448 ht->ht_mask = newmask; 449 ht->ht_filled = ht->ht_used; 450 ht->ht_error = FALSE; 451 452 return OK; 453 } 454 455 /* 456 * Get the hash number for a key. 457 * If you think you know a better hash function: Compile with HT_DEBUG set and 458 * run a script that uses hashtables a lot. Vim will then print statistics 459 * when exiting. Try that with the current hash algorithm and yours. The 460 * lower the percentage the better. 461 */ 462 hash_T 463 hash_hash(char_u *key) 464 { 465 hash_T hash; 466 char_u *p; 467 468 if ((hash = *key) == 0) 469 return (hash_T)0; 470 p = key + 1; 471 472 /* A simplistic algorithm that appears to do very well. 473 * Suggested by George Reilly. */ 474 while (*p != NUL) 475 hash = hash * 101 + *p++; 476 477 return hash; 478 } 479