xref: /vim-8.2.3635/src/hashtab.c (revision cc7ff3fc)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * hashtab.c: Handling of a hashtable with Vim-specific properties.
12  *
13  * Each item in a hashtable has a NUL terminated string key.  A key can appear
14  * only once in the table.
15  *
16  * A hash number is computed from the key for quick lookup.  When the hashes
17  * of two different keys point to the same entry an algorithm is used to
18  * iterate over other entries in the table until the right one is found.
19  * To make the iteration work removed keys are different from entries where a
20  * key was never present.
21  *
22  * The mechanism has been partly based on how Python Dictionaries are
23  * implemented.  The algorithm is from Knuth Vol. 3, Sec. 6.4.
24  *
25  * The hashtable grows to accommodate more entries when needed.  At least 1/3
26  * of the entries is empty to keep the lookup efficient (at the cost of extra
27  * memory).
28  */
29 
30 #include "vim.h"
31 
32 #if defined(FEAT_EVAL) || defined(FEAT_SYN_HL) || defined(PROTO)
33 
34 #if 0
35 # define HT_DEBUG	/* extra checks for table consistency  and statistics */
36 
37 static long hash_count_lookup = 0;	/* count number of hashtab lookups */
38 static long hash_count_perturb = 0;	/* count number of "misses" */
39 #endif
40 
41 /* Magic value for algorithm that walks through the array. */
42 #define PERTURB_SHIFT 5
43 
44 static int hash_may_resize __ARGS((hashtab_T *ht, int minitems));
45 
46 #if 0 /* currently not used */
47 /*
48  * Create an empty hash table.
49  * Returns NULL when out of memory.
50  */
51     hashtab_T *
52 hash_create()
53 {
54     hashtab_T *ht;
55 
56     ht = (hashtab_T *)alloc(sizeof(hashtab_T));
57     if (ht != NULL)
58 	hash_init(ht);
59     return ht;
60 }
61 #endif
62 
63 /*
64  * Initialize an empty hash table.
65  */
66     void
67 hash_init(ht)
68     hashtab_T *ht;
69 {
70     /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
71     vim_memset(ht, 0, sizeof(hashtab_T));
72     ht->ht_array = ht->ht_smallarray;
73     ht->ht_mask = HT_INIT_SIZE - 1;
74 }
75 
76 /*
77  * Free the array of a hash table.  Does not free the items it contains!
78  * If "ht" is not freed then you should call hash_init() next!
79  */
80     void
81 hash_clear(ht)
82     hashtab_T *ht;
83 {
84     if (ht->ht_array != ht->ht_smallarray)
85 	vim_free(ht->ht_array);
86 }
87 
88 /*
89  * Free the array of a hash table and all the keys it contains.  The keys must
90  * have been allocated.  "off" is the offset from the start of the allocate
91  * memory to the location of the key (it's always positive).
92  */
93     void
94 hash_clear_all(ht, off)
95     hashtab_T	*ht;
96     int		off;
97 {
98     long	todo;
99     hashitem_T	*hi;
100 
101     todo = (long)ht->ht_used;
102     for (hi = ht->ht_array; todo > 0; ++hi)
103     {
104 	if (!HASHITEM_EMPTY(hi))
105 	{
106 	    vim_free(hi->hi_key - off);
107 	    --todo;
108 	}
109     }
110     hash_clear(ht);
111 }
112 
113 /*
114  * Find "key" in hashtable "ht".  "key" must not be NULL.
115  * Always returns a pointer to a hashitem.  If the item was not found then
116  * HASHITEM_EMPTY() is TRUE.  The pointer is then the place where the key
117  * would be added.
118  * WARNING: The returned pointer becomes invalid when the hashtable is changed
119  * (adding, setting or removing an item)!
120  */
121     hashitem_T *
122 hash_find(ht, key)
123     hashtab_T	*ht;
124     char_u	*key;
125 {
126     return hash_lookup(ht, key, hash_hash(key));
127 }
128 
129 /*
130  * Like hash_find(), but caller computes "hash".
131  */
132     hashitem_T *
133 hash_lookup(ht, key, hash)
134     hashtab_T	*ht;
135     char_u	*key;
136     hash_T	hash;
137 {
138     hash_T	perturb;
139     hashitem_T	*freeitem;
140     hashitem_T	*hi;
141     unsigned	idx;
142 
143 #ifdef HT_DEBUG
144     ++hash_count_lookup;
145 #endif
146 
147     /*
148      * Quickly handle the most common situations:
149      * - return if there is no item at all
150      * - skip over a removed item
151      * - return if the item matches
152      */
153     idx = (unsigned)(hash & ht->ht_mask);
154     hi = &ht->ht_array[idx];
155 
156     if (hi->hi_key == NULL)
157 	return hi;
158     if (hi->hi_key == HI_KEY_REMOVED)
159 	freeitem = hi;
160     else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
161 	return hi;
162     else
163 	freeitem = NULL;
164 
165     /*
166      * Need to search through the table to find the key.  The algorithm
167      * to step through the table starts with large steps, gradually becoming
168      * smaller down to (1/4 table size + 1).  This means it goes through all
169      * table entries in the end.
170      * When we run into a NULL key it's clear that the key isn't there.
171      * Return the first available slot found (can be a slot of a removed
172      * item).
173      */
174     for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
175     {
176 #ifdef HT_DEBUG
177 	++hash_count_perturb;	    /* count a "miss" for hashtab lookup */
178 #endif
179 	idx = (unsigned)((idx << 2U) + idx + perturb + 1U);
180 	hi = &ht->ht_array[idx & ht->ht_mask];
181 	if (hi->hi_key == NULL)
182 	    return freeitem == NULL ? hi : freeitem;
183 	if (hi->hi_hash == hash
184 		&& hi->hi_key != HI_KEY_REMOVED
185 		&& STRCMP(hi->hi_key, key) == 0)
186 	    return hi;
187 	if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
188 	    freeitem = hi;
189     }
190 }
191 
192 /*
193  * Print the efficiency of hashtable lookups.
194  * Useful when trying different hash algorithms.
195  * Called when exiting.
196  */
197     void
198 hash_debug_results()
199 {
200 #ifdef HT_DEBUG
201     fprintf(stderr, "\r\n\r\n\r\n\r\n");
202     fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
203     fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
204     fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
205 				hash_count_perturb * 100 / hash_count_lookup);
206 #endif
207 }
208 
209 /*
210  * Add item with key "key" to hashtable "ht".
211  * Returns FAIL when out of memory or the key is already present.
212  */
213     int
214 hash_add(ht, key)
215     hashtab_T	*ht;
216     char_u	*key;
217 {
218     hash_T	hash = hash_hash(key);
219     hashitem_T	*hi;
220 
221     hi = hash_lookup(ht, key, hash);
222     if (!HASHITEM_EMPTY(hi))
223     {
224 	EMSG2(_(e_intern2), "hash_add()");
225 	return FAIL;
226     }
227     return hash_add_item(ht, hi, key, hash);
228 }
229 
230 /*
231  * Add item "hi" with "key" to hashtable "ht".  "key" must not be NULL and
232  * "hi" must have been obtained with hash_lookup() and point to an empty item.
233  * "hi" is invalid after this!
234  * Returns OK or FAIL (out of memory).
235  */
236     int
237 hash_add_item(ht, hi, key, hash)
238     hashtab_T	*ht;
239     hashitem_T	*hi;
240     char_u	*key;
241     hash_T	hash;
242 {
243     /* If resizing failed before and it fails again we can't add an item. */
244     if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
245 	return FAIL;
246 
247     ++ht->ht_used;
248     if (hi->hi_key == NULL)
249 	++ht->ht_filled;
250     hi->hi_key = key;
251     hi->hi_hash = hash;
252 
253     /* When the space gets low may resize the array. */
254     return hash_may_resize(ht, 0);
255 }
256 
257 #if 0  /* not used */
258 /*
259  * Overwrite hashtable item "hi" with "key".  "hi" must point to the item that
260  * is to be overwritten.  Thus the number of items in the hashtable doesn't
261  * change.
262  * Although the key must be identical, the pointer may be different, thus it's
263  * set anyway (the key is part of an item with that key).
264  * The caller must take care of freeing the old item.
265  * "hi" is invalid after this!
266  */
267     void
268 hash_set(hi, key)
269     hashitem_T	*hi;
270     char_u	*key;
271 {
272     hi->hi_key = key;
273 }
274 #endif
275 
276 /*
277  * Remove item "hi" from  hashtable "ht".  "hi" must have been obtained with
278  * hash_lookup().
279  * The caller must take care of freeing the item itself.
280  */
281     void
282 hash_remove(ht, hi)
283     hashtab_T	*ht;
284     hashitem_T	*hi;
285 {
286     --ht->ht_used;
287     hi->hi_key = HI_KEY_REMOVED;
288     hash_may_resize(ht, 0);
289 }
290 
291 /*
292  * Lock a hashtable: prevent that ht_array changes.
293  * Don't use this when items are to be added!
294  * Must call hash_unlock() later.
295  */
296     void
297 hash_lock(ht)
298     hashtab_T	*ht;
299 {
300     ++ht->ht_locked;
301 }
302 
303 #if 0	    /* currently not used */
304 /*
305  * Lock a hashtable at the specified number of entries.
306  * Caller must make sure no more than "size" entries will be added.
307  * Must call hash_unlock() later.
308  */
309     void
310 hash_lock_size(ht, size)
311     hashtab_T	*ht;
312     int		size;
313 {
314     (void)hash_may_resize(ht, size);
315     ++ht->ht_locked;
316 }
317 #endif
318 
319 /*
320  * Unlock a hashtable: allow ht_array changes again.
321  * Table will be resized (shrink) when necessary.
322  * This must balance a call to hash_lock().
323  */
324     void
325 hash_unlock(ht)
326     hashtab_T	*ht;
327 {
328     --ht->ht_locked;
329     (void)hash_may_resize(ht, 0);
330 }
331 
332 /*
333  * Shrink a hashtable when there is too much empty space.
334  * Grow a hashtable when there is not enough empty space.
335  * Returns OK or FAIL (out of memory).
336  */
337     static int
338 hash_may_resize(ht, minitems)
339     hashtab_T	*ht;
340     int		minitems;		/* minimal number of items */
341 {
342     hashitem_T	temparray[HT_INIT_SIZE];
343     hashitem_T	*oldarray, *newarray;
344     hashitem_T	*olditem, *newitem;
345     unsigned	newi;
346     int		todo;
347     long_u	oldsize, newsize;
348     long_u	minsize;
349     long_u	newmask;
350     hash_T	perturb;
351 
352     /* Don't resize a locked table. */
353     if (ht->ht_locked > 0)
354 	return OK;
355 
356 #ifdef HT_DEBUG
357     if (ht->ht_used > ht->ht_filled)
358 	EMSG("hash_may_resize(): more used than filled");
359     if (ht->ht_filled >= ht->ht_mask + 1)
360 	EMSG("hash_may_resize(): table completely filled");
361 #endif
362 
363     if (minitems == 0)
364     {
365 	/* Return quickly for small tables with at least two NULL items.  NULL
366 	 * items are required for the lookup to decide a key isn't there. */
367 	if (ht->ht_filled < HT_INIT_SIZE - 1
368 					 && ht->ht_array == ht->ht_smallarray)
369 	    return OK;
370 
371 	/*
372 	 * Grow or refill the array when it's more than 2/3 full (including
373 	 * removed items, so that they get cleaned up).
374 	 * Shrink the array when it's less than 1/5 full.  When growing it is
375 	 * at least 1/4 full (avoids repeated grow-shrink operations)
376 	 */
377 	oldsize = ht->ht_mask + 1;
378 	if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
379 	    return OK;
380 
381 	if (ht->ht_used > 1000)
382 	    minsize = ht->ht_used * 2;  /* it's big, don't make too much room */
383 	else
384 	    minsize = ht->ht_used * 4;  /* make plenty of room */
385     }
386     else
387     {
388 	/* Use specified size. */
389 	if ((long_u)minitems < ht->ht_used)	/* just in case... */
390 	    minitems = (int)ht->ht_used;
391 	minsize = minitems * 3 / 2;	/* array is up to 2/3 full */
392     }
393 
394     newsize = HT_INIT_SIZE;
395     while (newsize < minsize)
396     {
397 	newsize <<= 1;		/* make sure it's always a power of 2 */
398 	if (newsize == 0)
399 	    return FAIL;	/* overflow */
400     }
401 
402     if (newsize == HT_INIT_SIZE)
403     {
404 	/* Use the small array inside the hashdict structure. */
405 	newarray = ht->ht_smallarray;
406 	if (ht->ht_array == newarray)
407 	{
408 	    /* Moving from ht_smallarray to ht_smallarray!  Happens when there
409 	     * are many removed items.  Copy the items to be able to clean up
410 	     * removed items. */
411 	    mch_memmove(temparray, newarray, sizeof(temparray));
412 	    oldarray = temparray;
413 	}
414 	else
415 	    oldarray = ht->ht_array;
416     }
417     else
418     {
419 	/* Allocate an array. */
420 	newarray = (hashitem_T *)alloc((unsigned)
421 					      (sizeof(hashitem_T) * newsize));
422 	if (newarray == NULL)
423 	{
424 	    /* Out of memory.  When there are NULL items still return OK.
425 	     * Otherwise set ht_error, because lookup may result in a hang if
426 	     * we add another item. */
427 	    if (ht->ht_filled < ht->ht_mask)
428 		return OK;
429 	    ht->ht_error = TRUE;
430 	    return FAIL;
431 	}
432 	oldarray = ht->ht_array;
433     }
434     vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));
435 
436     /*
437      * Move all the items from the old array to the new one, placing them in
438      * the right spot.  The new array won't have any removed items, thus this
439      * is also a cleanup action.
440      */
441     newmask = newsize - 1;
442     todo = (int)ht->ht_used;
443     for (olditem = oldarray; todo > 0; ++olditem)
444 	if (!HASHITEM_EMPTY(olditem))
445 	{
446 	    /*
447 	     * The algorithm to find the spot to add the item is identical to
448 	     * the algorithm to find an item in hash_lookup().  But we only
449 	     * need to search for a NULL key, thus it's simpler.
450 	     */
451 	    newi = (unsigned)(olditem->hi_hash & newmask);
452 	    newitem = &newarray[newi];
453 
454 	    if (newitem->hi_key != NULL)
455 		for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
456 		{
457 		    newi = (unsigned)((newi << 2U) + newi + perturb + 1U);
458 		    newitem = &newarray[newi & newmask];
459 		    if (newitem->hi_key == NULL)
460 			break;
461 		}
462 	    *newitem = *olditem;
463 	    --todo;
464 	}
465 
466     if (ht->ht_array != ht->ht_smallarray)
467 	vim_free(ht->ht_array);
468     ht->ht_array = newarray;
469     ht->ht_mask = newmask;
470     ht->ht_filled = ht->ht_used;
471     ht->ht_error = FALSE;
472 
473     return OK;
474 }
475 
476 /*
477  * Get the hash number for a key.
478  * If you think you know a better hash function: Compile with HT_DEBUG set and
479  * run a script that uses hashtables a lot.  Vim will then print statistics
480  * when exiting.  Try that with the current hash algorithm and yours.  The
481  * lower the percentage the better.
482  */
483     hash_T
484 hash_hash(key)
485     char_u	*key;
486 {
487     hash_T	hash;
488     char_u	*p;
489 
490     if ((hash = *key) == 0)
491 	return (hash_T)0;	/* Empty keys are not allowed, but we don't
492 				   want to crash if we get one. */
493     p = key + 1;
494 
495     /* A simplistic algorithm that appears to do very well.
496      * Suggested by George Reilly. */
497     while (*p != NUL)
498 	hash = hash * 101 + *p++;
499 
500     return hash;
501 }
502 
503 #endif
504