1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10 /* 11 * hashtab.c: Handling of a hashtable with Vim-specific properties. 12 * 13 * Each item in a hashtable has a NUL terminated string key. A key can appear 14 * only once in the table. 15 * 16 * A hash number is computed from the key for quick lookup. When the hashes 17 * of two different keys point to the same entry an algorithm is used to 18 * iterate over other entries in the table until the right one is found. 19 * To make the iteration work removed keys are different from entries where a 20 * key was never present. 21 * 22 * The mechanism has been partly based on how Python Dictionaries are 23 * implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4. 24 * 25 * The hashtable grows to accommodate more entries when needed. At least 1/3 26 * of the entries is empty to keep the lookup efficient (at the cost of extra 27 * memory). 28 */ 29 30 #include "vim.h" 31 32 #if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO) 33 34 #if 0 35 # define HT_DEBUG /* extra checks for table consistency and statistics */ 36 37 static long hash_count_lookup = 0; /* count number of hashtab lookups */ 38 static long hash_count_perturb = 0; /* count number of "misses" */ 39 #endif 40 41 /* Magic value for algorithm that walks through the array. */ 42 #define PERTURB_SHIFT 5 43 44 static int hash_may_resize(hashtab_T *ht, int minitems); 45 46 #if 0 /* currently not used */ 47 /* 48 * Create an empty hash table. 49 * Returns NULL when out of memory. 50 */ 51 hashtab_T * 52 hash_create(void) 53 { 54 hashtab_T *ht; 55 56 ht = (hashtab_T *)alloc(sizeof(hashtab_T)); 57 if (ht != NULL) 58 hash_init(ht); 59 return ht; 60 } 61 #endif 62 63 /* 64 * Initialize an empty hash table. 65 */ 66 void 67 hash_init(hashtab_T *ht) 68 { 69 /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */ 70 vim_memset(ht, 0, sizeof(hashtab_T)); 71 ht->ht_array = ht->ht_smallarray; 72 ht->ht_mask = HT_INIT_SIZE - 1; 73 } 74 75 /* 76 * Free the array of a hash table. Does not free the items it contains! 77 * If "ht" is not freed then you should call hash_init() next! 78 */ 79 void 80 hash_clear(hashtab_T *ht) 81 { 82 if (ht->ht_array != ht->ht_smallarray) 83 vim_free(ht->ht_array); 84 } 85 86 /* 87 * Free the array of a hash table and all the keys it contains. The keys must 88 * have been allocated. "off" is the offset from the start of the allocate 89 * memory to the location of the key (it's always positive). 90 */ 91 void 92 hash_clear_all(hashtab_T *ht, int off) 93 { 94 long todo; 95 hashitem_T *hi; 96 97 todo = (long)ht->ht_used; 98 for (hi = ht->ht_array; todo > 0; ++hi) 99 { 100 if (!HASHITEM_EMPTY(hi)) 101 { 102 vim_free(hi->hi_key - off); 103 --todo; 104 } 105 } 106 hash_clear(ht); 107 } 108 109 /* 110 * Find "key" in hashtable "ht". "key" must not be NULL. 111 * Always returns a pointer to a hashitem. If the item was not found then 112 * HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key 113 * would be added. 114 * WARNING: The returned pointer becomes invalid when the hashtable is changed 115 * (adding, setting or removing an item)! 116 */ 117 hashitem_T * 118 hash_find(hashtab_T *ht, char_u *key) 119 { 120 return hash_lookup(ht, key, hash_hash(key)); 121 } 122 123 /* 124 * Like hash_find(), but caller computes "hash". 125 */ 126 hashitem_T * 127 hash_lookup(hashtab_T *ht, char_u *key, hash_T hash) 128 { 129 hash_T perturb; 130 hashitem_T *freeitem; 131 hashitem_T *hi; 132 unsigned idx; 133 134 #ifdef HT_DEBUG 135 ++hash_count_lookup; 136 #endif 137 138 /* 139 * Quickly handle the most common situations: 140 * - return if there is no item at all 141 * - skip over a removed item 142 * - return if the item matches 143 */ 144 idx = (unsigned)(hash & ht->ht_mask); 145 hi = &ht->ht_array[idx]; 146 147 if (hi->hi_key == NULL) 148 return hi; 149 if (hi->hi_key == HI_KEY_REMOVED) 150 freeitem = hi; 151 else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0) 152 return hi; 153 else 154 freeitem = NULL; 155 156 /* 157 * Need to search through the table to find the key. The algorithm 158 * to step through the table starts with large steps, gradually becoming 159 * smaller down to (1/4 table size + 1). This means it goes through all 160 * table entries in the end. 161 * When we run into a NULL key it's clear that the key isn't there. 162 * Return the first available slot found (can be a slot of a removed 163 * item). 164 */ 165 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) 166 { 167 #ifdef HT_DEBUG 168 ++hash_count_perturb; /* count a "miss" for hashtab lookup */ 169 #endif 170 idx = (unsigned)((idx << 2U) + idx + perturb + 1U); 171 hi = &ht->ht_array[idx & ht->ht_mask]; 172 if (hi->hi_key == NULL) 173 return freeitem == NULL ? hi : freeitem; 174 if (hi->hi_hash == hash 175 && hi->hi_key != HI_KEY_REMOVED 176 && STRCMP(hi->hi_key, key) == 0) 177 return hi; 178 if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL) 179 freeitem = hi; 180 } 181 } 182 183 /* 184 * Print the efficiency of hashtable lookups. 185 * Useful when trying different hash algorithms. 186 * Called when exiting. 187 */ 188 void 189 hash_debug_results(void) 190 { 191 #ifdef HT_DEBUG 192 fprintf(stderr, "\r\n\r\n\r\n\r\n"); 193 fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup); 194 fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb); 195 fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n", 196 hash_count_perturb * 100 / hash_count_lookup); 197 #endif 198 } 199 200 /* 201 * Add item with key "key" to hashtable "ht". 202 * Returns FAIL when out of memory or the key is already present. 203 */ 204 int 205 hash_add(hashtab_T *ht, char_u *key) 206 { 207 hash_T hash = hash_hash(key); 208 hashitem_T *hi; 209 210 hi = hash_lookup(ht, key, hash); 211 if (!HASHITEM_EMPTY(hi)) 212 { 213 EMSG2(_(e_intern2), "hash_add()"); 214 return FAIL; 215 } 216 return hash_add_item(ht, hi, key, hash); 217 } 218 219 /* 220 * Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and 221 * "hi" must have been obtained with hash_lookup() and point to an empty item. 222 * "hi" is invalid after this! 223 * Returns OK or FAIL (out of memory). 224 */ 225 int 226 hash_add_item( 227 hashtab_T *ht, 228 hashitem_T *hi, 229 char_u *key, 230 hash_T hash) 231 { 232 /* If resizing failed before and it fails again we can't add an item. */ 233 if (ht->ht_error && hash_may_resize(ht, 0) == FAIL) 234 return FAIL; 235 236 ++ht->ht_used; 237 if (hi->hi_key == NULL) 238 ++ht->ht_filled; 239 hi->hi_key = key; 240 hi->hi_hash = hash; 241 242 /* When the space gets low may resize the array. */ 243 return hash_may_resize(ht, 0); 244 } 245 246 #if 0 /* not used */ 247 /* 248 * Overwrite hashtable item "hi" with "key". "hi" must point to the item that 249 * is to be overwritten. Thus the number of items in the hashtable doesn't 250 * change. 251 * Although the key must be identical, the pointer may be different, thus it's 252 * set anyway (the key is part of an item with that key). 253 * The caller must take care of freeing the old item. 254 * "hi" is invalid after this! 255 */ 256 void 257 hash_set(hashitem_T *hi, char_u *key) 258 { 259 hi->hi_key = key; 260 } 261 #endif 262 263 /* 264 * Remove item "hi" from hashtable "ht". "hi" must have been obtained with 265 * hash_lookup(). 266 * The caller must take care of freeing the item itself. 267 */ 268 void 269 hash_remove(hashtab_T *ht, hashitem_T *hi) 270 { 271 --ht->ht_used; 272 hi->hi_key = HI_KEY_REMOVED; 273 hash_may_resize(ht, 0); 274 } 275 276 /* 277 * Lock a hashtable: prevent that ht_array changes. 278 * Don't use this when items are to be added! 279 * Must call hash_unlock() later. 280 */ 281 void 282 hash_lock(hashtab_T *ht) 283 { 284 ++ht->ht_locked; 285 } 286 287 #if 0 /* currently not used */ 288 /* 289 * Lock a hashtable at the specified number of entries. 290 * Caller must make sure no more than "size" entries will be added. 291 * Must call hash_unlock() later. 292 */ 293 void 294 hash_lock_size(hashtab_T *ht, int size) 295 { 296 (void)hash_may_resize(ht, size); 297 ++ht->ht_locked; 298 } 299 #endif 300 301 /* 302 * Unlock a hashtable: allow ht_array changes again. 303 * Table will be resized (shrink) when necessary. 304 * This must balance a call to hash_lock(). 305 */ 306 void 307 hash_unlock(hashtab_T *ht) 308 { 309 --ht->ht_locked; 310 (void)hash_may_resize(ht, 0); 311 } 312 313 /* 314 * Shrink a hashtable when there is too much empty space. 315 * Grow a hashtable when there is not enough empty space. 316 * Returns OK or FAIL (out of memory). 317 */ 318 static int 319 hash_may_resize( 320 hashtab_T *ht, 321 int minitems) /* minimal number of items */ 322 { 323 hashitem_T temparray[HT_INIT_SIZE]; 324 hashitem_T *oldarray, *newarray; 325 hashitem_T *olditem, *newitem; 326 unsigned newi; 327 int todo; 328 long_u oldsize, newsize; 329 long_u minsize; 330 long_u newmask; 331 hash_T perturb; 332 333 /* Don't resize a locked table. */ 334 if (ht->ht_locked > 0) 335 return OK; 336 337 #ifdef HT_DEBUG 338 if (ht->ht_used > ht->ht_filled) 339 EMSG("hash_may_resize(): more used than filled"); 340 if (ht->ht_filled >= ht->ht_mask + 1) 341 EMSG("hash_may_resize(): table completely filled"); 342 #endif 343 344 if (minitems == 0) 345 { 346 /* Return quickly for small tables with at least two NULL items. NULL 347 * items are required for the lookup to decide a key isn't there. */ 348 if (ht->ht_filled < HT_INIT_SIZE - 1 349 && ht->ht_array == ht->ht_smallarray) 350 return OK; 351 352 /* 353 * Grow or refill the array when it's more than 2/3 full (including 354 * removed items, so that they get cleaned up). 355 * Shrink the array when it's less than 1/5 full. When growing it is 356 * at least 1/4 full (avoids repeated grow-shrink operations) 357 */ 358 oldsize = ht->ht_mask + 1; 359 if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5) 360 return OK; 361 362 if (ht->ht_used > 1000) 363 minsize = ht->ht_used * 2; /* it's big, don't make too much room */ 364 else 365 minsize = ht->ht_used * 4; /* make plenty of room */ 366 } 367 else 368 { 369 /* Use specified size. */ 370 if ((long_u)minitems < ht->ht_used) /* just in case... */ 371 minitems = (int)ht->ht_used; 372 minsize = minitems * 3 / 2; /* array is up to 2/3 full */ 373 } 374 375 newsize = HT_INIT_SIZE; 376 while (newsize < minsize) 377 { 378 newsize <<= 1; /* make sure it's always a power of 2 */ 379 if (newsize == 0) 380 return FAIL; /* overflow */ 381 } 382 383 if (newsize == HT_INIT_SIZE) 384 { 385 /* Use the small array inside the hashdict structure. */ 386 newarray = ht->ht_smallarray; 387 if (ht->ht_array == newarray) 388 { 389 /* Moving from ht_smallarray to ht_smallarray! Happens when there 390 * are many removed items. Copy the items to be able to clean up 391 * removed items. */ 392 mch_memmove(temparray, newarray, sizeof(temparray)); 393 oldarray = temparray; 394 } 395 else 396 oldarray = ht->ht_array; 397 } 398 else 399 { 400 /* Allocate an array. */ 401 newarray = (hashitem_T *)alloc((unsigned) 402 (sizeof(hashitem_T) * newsize)); 403 if (newarray == NULL) 404 { 405 /* Out of memory. When there are NULL items still return OK. 406 * Otherwise set ht_error, because lookup may result in a hang if 407 * we add another item. */ 408 if (ht->ht_filled < ht->ht_mask) 409 return OK; 410 ht->ht_error = TRUE; 411 return FAIL; 412 } 413 oldarray = ht->ht_array; 414 } 415 vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize)); 416 417 /* 418 * Move all the items from the old array to the new one, placing them in 419 * the right spot. The new array won't have any removed items, thus this 420 * is also a cleanup action. 421 */ 422 newmask = newsize - 1; 423 todo = (int)ht->ht_used; 424 for (olditem = oldarray; todo > 0; ++olditem) 425 if (!HASHITEM_EMPTY(olditem)) 426 { 427 /* 428 * The algorithm to find the spot to add the item is identical to 429 * the algorithm to find an item in hash_lookup(). But we only 430 * need to search for a NULL key, thus it's simpler. 431 */ 432 newi = (unsigned)(olditem->hi_hash & newmask); 433 newitem = &newarray[newi]; 434 435 if (newitem->hi_key != NULL) 436 for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT) 437 { 438 newi = (unsigned)((newi << 2U) + newi + perturb + 1U); 439 newitem = &newarray[newi & newmask]; 440 if (newitem->hi_key == NULL) 441 break; 442 } 443 *newitem = *olditem; 444 --todo; 445 } 446 447 if (ht->ht_array != ht->ht_smallarray) 448 vim_free(ht->ht_array); 449 ht->ht_array = newarray; 450 ht->ht_mask = newmask; 451 ht->ht_filled = ht->ht_used; 452 ht->ht_error = FALSE; 453 454 return OK; 455 } 456 457 /* 458 * Get the hash number for a key. 459 * If you think you know a better hash function: Compile with HT_DEBUG set and 460 * run a script that uses hashtables a lot. Vim will then print statistics 461 * when exiting. Try that with the current hash algorithm and yours. The 462 * lower the percentage the better. 463 */ 464 hash_T 465 hash_hash(char_u *key) 466 { 467 hash_T hash; 468 char_u *p; 469 470 if ((hash = *key) == 0) 471 return (hash_T)0; /* Empty keys are not allowed, but we don't 472 want to crash if we get one. */ 473 p = key + 1; 474 475 /* A simplistic algorithm that appears to do very well. 476 * Suggested by George Reilly. */ 477 while (*p != NUL) 478 hash = hash * 101 + *p++; 479 480 return hash; 481 } 482 483 #endif 484