xref: /vim-8.2.3635/src/hashtab.c (revision fb094e14)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * VIM - Vi IMproved	by Bram Moolenaar
4  *
5  * Do ":help uganda"  in Vim to read copying and usage conditions.
6  * Do ":help credits" in Vim to see a list of people who contributed.
7  * See README.txt for an overview of the Vim source code.
8  */
9 
10 /*
11  * hashtab.c: Handling of a hashtable with Vim-specific properties.
12  *
13  * Each item in a hashtable has a NUL terminated string key.  A key can appear
14  * only once in the table.
15  *
16  * A hash number is computed from the key for quick lookup.  When the hashes
17  * of two different keys point to the same entry an algorithm is used to
18  * iterate over other entries in the table until the right one is found.
19  * To make the iteration work removed keys are different from entries where a
20  * key was never present.
21  *
22  * The mechanism has been partly based on how Python Dictionaries are
23  * implemented.  The algorithm is from Knuth Vol. 3, Sec. 6.4.
24  *
25  * The hashtable grows to accommodate more entries when needed.  At least 1/3
26  * of the entries is empty to keep the lookup efficient (at the cost of extra
27  * memory).
28  */
29 
30 #include "vim.h"
31 
32 #if 0
33 # define HT_DEBUG	/* extra checks for table consistency  and statistics */
34 
35 static long hash_count_lookup = 0;	/* count number of hashtab lookups */
36 static long hash_count_perturb = 0;	/* count number of "misses" */
37 #endif
38 
39 /* Magic value for algorithm that walks through the array. */
40 #define PERTURB_SHIFT 5
41 
42 static int hash_may_resize(hashtab_T *ht, int minitems);
43 
44 #if 0 /* currently not used */
45 /*
46  * Create an empty hash table.
47  * Returns NULL when out of memory.
48  */
49     hashtab_T *
50 hash_create(void)
51 {
52     hashtab_T *ht;
53 
54     ht = (hashtab_T *)alloc(sizeof(hashtab_T));
55     if (ht != NULL)
56 	hash_init(ht);
57     return ht;
58 }
59 #endif
60 
61 /*
62  * Initialize an empty hash table.
63  */
64     void
65 hash_init(hashtab_T *ht)
66 {
67     /* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */
68     vim_memset(ht, 0, sizeof(hashtab_T));
69     ht->ht_array = ht->ht_smallarray;
70     ht->ht_mask = HT_INIT_SIZE - 1;
71 }
72 
73 /*
74  * Free the array of a hash table.  Does not free the items it contains!
75  * If "ht" is not freed then you should call hash_init() next!
76  */
77     void
78 hash_clear(hashtab_T *ht)
79 {
80     if (ht->ht_array != ht->ht_smallarray)
81 	vim_free(ht->ht_array);
82 }
83 
84 /*
85  * Free the array of a hash table and all the keys it contains.  The keys must
86  * have been allocated.  "off" is the offset from the start of the allocate
87  * memory to the location of the key (it's always positive).
88  */
89     void
90 hash_clear_all(hashtab_T *ht, int off)
91 {
92     long	todo;
93     hashitem_T	*hi;
94 
95     todo = (long)ht->ht_used;
96     for (hi = ht->ht_array; todo > 0; ++hi)
97     {
98 	if (!HASHITEM_EMPTY(hi))
99 	{
100 	    vim_free(hi->hi_key - off);
101 	    --todo;
102 	}
103     }
104     hash_clear(ht);
105 }
106 
107 /*
108  * Find "key" in hashtable "ht".  "key" must not be NULL.
109  * Always returns a pointer to a hashitem.  If the item was not found then
110  * HASHITEM_EMPTY() is TRUE.  The pointer is then the place where the key
111  * would be added.
112  * WARNING: The returned pointer becomes invalid when the hashtable is changed
113  * (adding, setting or removing an item)!
114  */
115     hashitem_T *
116 hash_find(hashtab_T *ht, char_u *key)
117 {
118     return hash_lookup(ht, key, hash_hash(key));
119 }
120 
121 /*
122  * Like hash_find(), but caller computes "hash".
123  */
124     hashitem_T *
125 hash_lookup(hashtab_T *ht, char_u *key, hash_T hash)
126 {
127     hash_T	perturb;
128     hashitem_T	*freeitem;
129     hashitem_T	*hi;
130     unsigned	idx;
131 
132 #ifdef HT_DEBUG
133     ++hash_count_lookup;
134 #endif
135 
136     /*
137      * Quickly handle the most common situations:
138      * - return if there is no item at all
139      * - skip over a removed item
140      * - return if the item matches
141      */
142     idx = (unsigned)(hash & ht->ht_mask);
143     hi = &ht->ht_array[idx];
144 
145     if (hi->hi_key == NULL)
146 	return hi;
147     if (hi->hi_key == HI_KEY_REMOVED)
148 	freeitem = hi;
149     else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)
150 	return hi;
151     else
152 	freeitem = NULL;
153 
154     /*
155      * Need to search through the table to find the key.  The algorithm
156      * to step through the table starts with large steps, gradually becoming
157      * smaller down to (1/4 table size + 1).  This means it goes through all
158      * table entries in the end.
159      * When we run into a NULL key it's clear that the key isn't there.
160      * Return the first available slot found (can be a slot of a removed
161      * item).
162      */
163     for (perturb = hash; ; perturb >>= PERTURB_SHIFT)
164     {
165 #ifdef HT_DEBUG
166 	++hash_count_perturb;	    /* count a "miss" for hashtab lookup */
167 #endif
168 	idx = (unsigned)((idx << 2U) + idx + perturb + 1U);
169 	hi = &ht->ht_array[idx & ht->ht_mask];
170 	if (hi->hi_key == NULL)
171 	    return freeitem == NULL ? hi : freeitem;
172 	if (hi->hi_hash == hash
173 		&& hi->hi_key != HI_KEY_REMOVED
174 		&& STRCMP(hi->hi_key, key) == 0)
175 	    return hi;
176 	if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)
177 	    freeitem = hi;
178     }
179 }
180 
181 /*
182  * Print the efficiency of hashtable lookups.
183  * Useful when trying different hash algorithms.
184  * Called when exiting.
185  */
186     void
187 hash_debug_results(void)
188 {
189 #ifdef HT_DEBUG
190     fprintf(stderr, "\r\n\r\n\r\n\r\n");
191     fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);
192     fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);
193     fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",
194 				hash_count_perturb * 100 / hash_count_lookup);
195 #endif
196 }
197 
198 /*
199  * Add item with key "key" to hashtable "ht".
200  * Returns FAIL when out of memory or the key is already present.
201  */
202     int
203 hash_add(hashtab_T *ht, char_u *key)
204 {
205     hash_T	hash = hash_hash(key);
206     hashitem_T	*hi;
207 
208     hi = hash_lookup(ht, key, hash);
209     if (!HASHITEM_EMPTY(hi))
210     {
211 	internal_error("hash_add()");
212 	return FAIL;
213     }
214     return hash_add_item(ht, hi, key, hash);
215 }
216 
217 /*
218  * Add item "hi" with "key" to hashtable "ht".  "key" must not be NULL and
219  * "hi" must have been obtained with hash_lookup() and point to an empty item.
220  * "hi" is invalid after this!
221  * Returns OK or FAIL (out of memory).
222  */
223     int
224 hash_add_item(
225     hashtab_T	*ht,
226     hashitem_T	*hi,
227     char_u	*key,
228     hash_T	hash)
229 {
230     /* If resizing failed before and it fails again we can't add an item. */
231     if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)
232 	return FAIL;
233 
234     ++ht->ht_used;
235     if (hi->hi_key == NULL)
236 	++ht->ht_filled;
237     hi->hi_key = key;
238     hi->hi_hash = hash;
239 
240     /* When the space gets low may resize the array. */
241     return hash_may_resize(ht, 0);
242 }
243 
244 #if 0  /* not used */
245 /*
246  * Overwrite hashtable item "hi" with "key".  "hi" must point to the item that
247  * is to be overwritten.  Thus the number of items in the hashtable doesn't
248  * change.
249  * Although the key must be identical, the pointer may be different, thus it's
250  * set anyway (the key is part of an item with that key).
251  * The caller must take care of freeing the old item.
252  * "hi" is invalid after this!
253  */
254     void
255 hash_set(hashitem_T *hi, char_u *key)
256 {
257     hi->hi_key = key;
258 }
259 #endif
260 
261 /*
262  * Remove item "hi" from  hashtable "ht".  "hi" must have been obtained with
263  * hash_lookup().
264  * The caller must take care of freeing the item itself.
265  */
266     void
267 hash_remove(hashtab_T *ht, hashitem_T *hi)
268 {
269     --ht->ht_used;
270     hi->hi_key = HI_KEY_REMOVED;
271     hash_may_resize(ht, 0);
272 }
273 
274 /*
275  * Lock a hashtable: prevent that ht_array changes.
276  * Don't use this when items are to be added!
277  * Must call hash_unlock() later.
278  */
279     void
280 hash_lock(hashtab_T *ht)
281 {
282     ++ht->ht_locked;
283 }
284 
285 #if 0	    /* currently not used */
286 /*
287  * Lock a hashtable at the specified number of entries.
288  * Caller must make sure no more than "size" entries will be added.
289  * Must call hash_unlock() later.
290  */
291     void
292 hash_lock_size(hashtab_T *ht, int size)
293 {
294     (void)hash_may_resize(ht, size);
295     ++ht->ht_locked;
296 }
297 #endif
298 
299 /*
300  * Unlock a hashtable: allow ht_array changes again.
301  * Table will be resized (shrink) when necessary.
302  * This must balance a call to hash_lock().
303  */
304     void
305 hash_unlock(hashtab_T *ht)
306 {
307     --ht->ht_locked;
308     (void)hash_may_resize(ht, 0);
309 }
310 
311 /*
312  * Shrink a hashtable when there is too much empty space.
313  * Grow a hashtable when there is not enough empty space.
314  * Returns OK or FAIL (out of memory).
315  */
316     static int
317 hash_may_resize(
318     hashtab_T	*ht,
319     int		minitems)		/* minimal number of items */
320 {
321     hashitem_T	temparray[HT_INIT_SIZE];
322     hashitem_T	*oldarray, *newarray;
323     hashitem_T	*olditem, *newitem;
324     unsigned	newi;
325     int		todo;
326     long_u	oldsize, newsize;
327     long_u	minsize;
328     long_u	newmask;
329     hash_T	perturb;
330 
331     /* Don't resize a locked table. */
332     if (ht->ht_locked > 0)
333 	return OK;
334 
335 #ifdef HT_DEBUG
336     if (ht->ht_used > ht->ht_filled)
337 	EMSG("hash_may_resize(): more used than filled");
338     if (ht->ht_filled >= ht->ht_mask + 1)
339 	EMSG("hash_may_resize(): table completely filled");
340 #endif
341 
342     if (minitems == 0)
343     {
344 	/* Return quickly for small tables with at least two NULL items.  NULL
345 	 * items are required for the lookup to decide a key isn't there. */
346 	if (ht->ht_filled < HT_INIT_SIZE - 1
347 					 && ht->ht_array == ht->ht_smallarray)
348 	    return OK;
349 
350 	/*
351 	 * Grow or refill the array when it's more than 2/3 full (including
352 	 * removed items, so that they get cleaned up).
353 	 * Shrink the array when it's less than 1/5 full.  When growing it is
354 	 * at least 1/4 full (avoids repeated grow-shrink operations)
355 	 */
356 	oldsize = ht->ht_mask + 1;
357 	if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)
358 	    return OK;
359 
360 	if (ht->ht_used > 1000)
361 	    minsize = ht->ht_used * 2;  /* it's big, don't make too much room */
362 	else
363 	    minsize = ht->ht_used * 4;  /* make plenty of room */
364     }
365     else
366     {
367 	/* Use specified size. */
368 	if ((long_u)minitems < ht->ht_used)	/* just in case... */
369 	    minitems = (int)ht->ht_used;
370 	minsize = minitems * 3 / 2;	/* array is up to 2/3 full */
371     }
372 
373     newsize = HT_INIT_SIZE;
374     while (newsize < minsize)
375     {
376 	newsize <<= 1;		/* make sure it's always a power of 2 */
377 	if (newsize == 0)
378 	    return FAIL;	/* overflow */
379     }
380 
381     if (newsize == HT_INIT_SIZE)
382     {
383 	/* Use the small array inside the hashdict structure. */
384 	newarray = ht->ht_smallarray;
385 	if (ht->ht_array == newarray)
386 	{
387 	    /* Moving from ht_smallarray to ht_smallarray!  Happens when there
388 	     * are many removed items.  Copy the items to be able to clean up
389 	     * removed items. */
390 	    mch_memmove(temparray, newarray, sizeof(temparray));
391 	    oldarray = temparray;
392 	}
393 	else
394 	    oldarray = ht->ht_array;
395     }
396     else
397     {
398 	/* Allocate an array. */
399 	newarray = (hashitem_T *)alloc((unsigned)
400 					      (sizeof(hashitem_T) * newsize));
401 	if (newarray == NULL)
402 	{
403 	    /* Out of memory.  When there are NULL items still return OK.
404 	     * Otherwise set ht_error, because lookup may result in a hang if
405 	     * we add another item. */
406 	    if (ht->ht_filled < ht->ht_mask)
407 		return OK;
408 	    ht->ht_error = TRUE;
409 	    return FAIL;
410 	}
411 	oldarray = ht->ht_array;
412     }
413     vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));
414 
415     /*
416      * Move all the items from the old array to the new one, placing them in
417      * the right spot.  The new array won't have any removed items, thus this
418      * is also a cleanup action.
419      */
420     newmask = newsize - 1;
421     todo = (int)ht->ht_used;
422     for (olditem = oldarray; todo > 0; ++olditem)
423 	if (!HASHITEM_EMPTY(olditem))
424 	{
425 	    /*
426 	     * The algorithm to find the spot to add the item is identical to
427 	     * the algorithm to find an item in hash_lookup().  But we only
428 	     * need to search for a NULL key, thus it's simpler.
429 	     */
430 	    newi = (unsigned)(olditem->hi_hash & newmask);
431 	    newitem = &newarray[newi];
432 
433 	    if (newitem->hi_key != NULL)
434 		for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)
435 		{
436 		    newi = (unsigned)((newi << 2U) + newi + perturb + 1U);
437 		    newitem = &newarray[newi & newmask];
438 		    if (newitem->hi_key == NULL)
439 			break;
440 		}
441 	    *newitem = *olditem;
442 	    --todo;
443 	}
444 
445     if (ht->ht_array != ht->ht_smallarray)
446 	vim_free(ht->ht_array);
447     ht->ht_array = newarray;
448     ht->ht_mask = newmask;
449     ht->ht_filled = ht->ht_used;
450     ht->ht_error = FALSE;
451 
452     return OK;
453 }
454 
455 /*
456  * Get the hash number for a key.
457  * If you think you know a better hash function: Compile with HT_DEBUG set and
458  * run a script that uses hashtables a lot.  Vim will then print statistics
459  * when exiting.  Try that with the current hash algorithm and yours.  The
460  * lower the percentage the better.
461  */
462     hash_T
463 hash_hash(char_u *key)
464 {
465     hash_T	hash;
466     char_u	*p;
467 
468     if ((hash = *key) == 0)
469 	return (hash_T)0;
470     p = key + 1;
471 
472     /* A simplistic algorithm that appears to do very well.
473      * Suggested by George Reilly. */
474     while (*p != NUL)
475 	hash = hash * 101 + *p++;
476 
477     return hash;
478 }
479