xref: /memcached-1.4.29/assoc.c (revision cb01d504)
1825db0f9SBrad Fitzpatrick /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2825db0f9SBrad Fitzpatrick /*
3825db0f9SBrad Fitzpatrick  * Hash table
4825db0f9SBrad Fitzpatrick  *
5825db0f9SBrad Fitzpatrick  * The hash function used here is by Bob Jenkins, 1996:
6825db0f9SBrad Fitzpatrick  *    <http://burtleburtle.net/bob/hash/doobs.html>
7825db0f9SBrad Fitzpatrick  *       "By Bob Jenkins, 1996.  [email protected].
8825db0f9SBrad Fitzpatrick  *       You may use this code any way you wish, private, educational,
9825db0f9SBrad Fitzpatrick  *       or commercial.  It's free."
10825db0f9SBrad Fitzpatrick  *
11825db0f9SBrad Fitzpatrick  * The rest of the file is licensed under the BSD license.  See LICENSE.
12825db0f9SBrad Fitzpatrick  */
1356b8339eSSteven Grimm 
1456b8339eSSteven Grimm #include "memcached.h"
15825db0f9SBrad Fitzpatrick #include <sys/stat.h>
16825db0f9SBrad Fitzpatrick #include <sys/socket.h>
17825db0f9SBrad Fitzpatrick #include <sys/resource.h>
18e688e97dSNatanael Copa #include <signal.h>
19825db0f9SBrad Fitzpatrick #include <fcntl.h>
20c6975ef4SPaul Lindner #include <netinet/in.h>
21c6975ef4SPaul Lindner #include <errno.h>
22825db0f9SBrad Fitzpatrick #include <stdlib.h>
23825db0f9SBrad Fitzpatrick #include <stdio.h>
24825db0f9SBrad Fitzpatrick #include <string.h>
2518a72ad2SBrad Fitzpatrick #include <assert.h>
267f09e20bSTrond Norbye #include <pthread.h>
277f09e20bSTrond Norbye 
287f09e20bSTrond Norbye static pthread_cond_t maintenance_cond = PTHREAD_COND_INITIALIZER;
296af7aa0bSdormando static pthread_mutex_t maintenance_lock = PTHREAD_MUTEX_INITIALIZER;
305d7dc88dSdormando static pthread_mutex_t hash_items_counter_lock = PTHREAD_MUTEX_INITIALIZER;
3186969ea4SBrad Fitzpatrick 
32217dcce0SSteven Grimm typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
33217dcce0SSteven Grimm typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */
34217dcce0SSteven Grimm 
35217dcce0SSteven Grimm /* how many powers of 2's worth of buckets we use */
361c94e12cSdormando unsigned int hashpower = HASHPOWER_DEFAULT;
37217dcce0SSteven Grimm 
38217dcce0SSteven Grimm #define hashsize(n) ((ub4)1<<(n))
39217dcce0SSteven Grimm #define hashmask(n) (hashsize(n)-1)
40217dcce0SSteven Grimm 
41217dcce0SSteven Grimm /* Main hash table. This is where we look except during expansion. */
42217dcce0SSteven Grimm static item** primary_hashtable = 0;
43217dcce0SSteven Grimm 
44217dcce0SSteven Grimm /*
45217dcce0SSteven Grimm  * Previous hash table. During expansion, we look here for keys that haven't
46217dcce0SSteven Grimm  * been moved over to the primary yet.
47217dcce0SSteven Grimm  */
48217dcce0SSteven Grimm static item** old_hashtable = 0;
49217dcce0SSteven Grimm 
50217dcce0SSteven Grimm /* Number of items in the hash table. */
5144ef96eeSPaul Lindner static unsigned int hash_items = 0;
52217dcce0SSteven Grimm 
53217dcce0SSteven Grimm /* Flag: Are we in the middle of expanding now? */
5444ef96eeSPaul Lindner static bool expanding = false;
551c94e12cSdormando static bool started_expanding = false;
56217dcce0SSteven Grimm 
57217dcce0SSteven Grimm /*
58217dcce0SSteven Grimm  * During expansion we migrate values with bucket granularity; this is how
59217dcce0SSteven Grimm  * far we've gotten so far. Ranges from 0 .. hashsize(hashpower - 1) - 1.
60217dcce0SSteven Grimm  */
6144ef96eeSPaul Lindner static unsigned int expand_bucket = 0;
6286969ea4SBrad Fitzpatrick 
assoc_init(const int hashtable_init)631db1de38Sdormando void assoc_init(const int hashtable_init) {
641db1de38Sdormando     if (hashtable_init) {
651db1de38Sdormando         hashpower = hashtable_init;
661db1de38Sdormando     }
67984053cdSDustin Sallings     primary_hashtable = calloc(hashsize(hashpower), sizeof(void *));
68217dcce0SSteven Grimm     if (! primary_hashtable) {
69825db0f9SBrad Fitzpatrick         fprintf(stderr, "Failed to init hashtable.\n");
70c8425072SPaul Lindner         exit(EXIT_FAILURE);
71825db0f9SBrad Fitzpatrick     }
72108e2cd6Sdormando     STATS_LOCK();
73*cb01d504Sdormando     stats_state.hash_power_level = hashpower;
74*cb01d504Sdormando     stats_state.hash_bytes = hashsize(hashpower) * sizeof(void *);
75108e2cd6Sdormando     STATS_UNLOCK();
76825db0f9SBrad Fitzpatrick }
7786969ea4SBrad Fitzpatrick 
assoc_find(const char * key,const size_t nkey,const uint32_t hv)78bab9acd1Sdormando item *assoc_find(const char *key, const size_t nkey, const uint32_t hv) {
79217dcce0SSteven Grimm     item *it;
8044ef96eeSPaul Lindner     unsigned int oldbucket;
81217dcce0SSteven Grimm 
82217dcce0SSteven Grimm     if (expanding &&
83217dcce0SSteven Grimm         (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
84217dcce0SSteven Grimm     {
85217dcce0SSteven Grimm         it = old_hashtable[oldbucket];
86217dcce0SSteven Grimm     } else {
87217dcce0SSteven Grimm         it = primary_hashtable[hv & hashmask(hashpower)];
88217dcce0SSteven Grimm     }
8986969ea4SBrad Fitzpatrick 
9068957214STrond Norbye     item *ret = NULL;
9168957214STrond Norbye     int depth = 0;
92f6d334e0SBrad Fitzpatrick     while (it) {
9368957214STrond Norbye         if ((nkey == it->nkey) && (memcmp(key, ITEM_key(it), nkey) == 0)) {
9468957214STrond Norbye             ret = it;
9568957214STrond Norbye             break;
96217dcce0SSteven Grimm         }
97f6d334e0SBrad Fitzpatrick         it = it->h_next;
9868957214STrond Norbye         ++depth;
99825db0f9SBrad Fitzpatrick     }
10080ec0955STrond Norbye     MEMCACHED_ASSOC_FIND(key, nkey, depth);
10168957214STrond Norbye     return ret;
102825db0f9SBrad Fitzpatrick }
10386969ea4SBrad Fitzpatrick 
1047917af40SBrad Fitzpatrick /* returns the address of the item pointer before the key.  if *item == 0,
1057917af40SBrad Fitzpatrick    the item wasn't found */
10686969ea4SBrad Fitzpatrick 
_hashitem_before(const char * key,const size_t nkey,const uint32_t hv)107bab9acd1Sdormando static item** _hashitem_before (const char *key, const size_t nkey, const uint32_t hv) {
108217dcce0SSteven Grimm     item **pos;
10944ef96eeSPaul Lindner     unsigned int oldbucket;
11086969ea4SBrad Fitzpatrick 
111217dcce0SSteven Grimm     if (expanding &&
112217dcce0SSteven Grimm         (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
113217dcce0SSteven Grimm     {
114217dcce0SSteven Grimm         pos = &old_hashtable[oldbucket];
115217dcce0SSteven Grimm     } else {
116217dcce0SSteven Grimm         pos = &primary_hashtable[hv & hashmask(hashpower)];
117217dcce0SSteven Grimm     }
118217dcce0SSteven Grimm 
119217dcce0SSteven Grimm     while (*pos && ((nkey != (*pos)->nkey) || memcmp(key, ITEM_key(*pos), nkey))) {
120f6d334e0SBrad Fitzpatrick         pos = &(*pos)->h_next;
121825db0f9SBrad Fitzpatrick     }
122825db0f9SBrad Fitzpatrick     return pos;
123825db0f9SBrad Fitzpatrick }
12486969ea4SBrad Fitzpatrick 
125217dcce0SSteven Grimm /* grows the hashtable to the next power of 2. */
assoc_expand(void)126217dcce0SSteven Grimm static void assoc_expand(void) {
127217dcce0SSteven Grimm     old_hashtable = primary_hashtable;
128217dcce0SSteven Grimm 
129217dcce0SSteven Grimm     primary_hashtable = calloc(hashsize(hashpower + 1), sizeof(void *));
130217dcce0SSteven Grimm     if (primary_hashtable) {
131217dcce0SSteven Grimm         if (settings.verbose > 1)
132217dcce0SSteven Grimm             fprintf(stderr, "Hash table expansion starting\n");
133217dcce0SSteven Grimm         hashpower++;
13444ef96eeSPaul Lindner         expanding = true;
135217dcce0SSteven Grimm         expand_bucket = 0;
136108e2cd6Sdormando         STATS_LOCK();
137*cb01d504Sdormando         stats_state.hash_power_level = hashpower;
138*cb01d504Sdormando         stats_state.hash_bytes += hashsize(hashpower) * sizeof(void *);
139*cb01d504Sdormando         stats_state.hash_is_expanding = true;
140108e2cd6Sdormando         STATS_UNLOCK();
141217dcce0SSteven Grimm     } else {
142217dcce0SSteven Grimm         primary_hashtable = old_hashtable;
143217dcce0SSteven Grimm         /* Bad news, but we can keep running. */
144217dcce0SSteven Grimm     }
145217dcce0SSteven Grimm }
146217dcce0SSteven Grimm 
assoc_start_expand(void)1471c94e12cSdormando static void assoc_start_expand(void) {
1481c94e12cSdormando     if (started_expanding)
1491c94e12cSdormando         return;
150d2676b4aSJason CHAN 
1511c94e12cSdormando     started_expanding = true;
1521c94e12cSdormando     pthread_cond_signal(&maintenance_cond);
1531c94e12cSdormando }
1541c94e12cSdormando 
1557917af40SBrad Fitzpatrick /* Note: this isn't an assoc_update.  The key must not already exist to call this */
assoc_insert(item * it,const uint32_t hv)156bab9acd1Sdormando int assoc_insert(item *it, const uint32_t hv) {
15744ef96eeSPaul Lindner     unsigned int oldbucket;
158217dcce0SSteven Grimm 
159bab9acd1Sdormando //    assert(assoc_find(ITEM_key(it), it->nkey) == 0);  /* shouldn't have duplicately named things defined */
160217dcce0SSteven Grimm 
161217dcce0SSteven Grimm     if (expanding &&
162217dcce0SSteven Grimm         (oldbucket = (hv & hashmask(hashpower - 1))) >= expand_bucket)
163217dcce0SSteven Grimm     {
164217dcce0SSteven Grimm         it->h_next = old_hashtable[oldbucket];
165217dcce0SSteven Grimm         old_hashtable[oldbucket] = it;
166217dcce0SSteven Grimm     } else {
167217dcce0SSteven Grimm         it->h_next = primary_hashtable[hv & hashmask(hashpower)];
168217dcce0SSteven Grimm         primary_hashtable[hv & hashmask(hashpower)] = it;
169217dcce0SSteven Grimm     }
170217dcce0SSteven Grimm 
1715d7dc88dSdormando     pthread_mutex_lock(&hash_items_counter_lock);
172217dcce0SSteven Grimm     hash_items++;
173217dcce0SSteven Grimm     if (! expanding && hash_items > (hashsize(hashpower) * 3) / 2) {
1741c94e12cSdormando         assoc_start_expand();
175217dcce0SSteven Grimm     }
1765d7dc88dSdormando     pthread_mutex_unlock(&hash_items_counter_lock);
177217dcce0SSteven Grimm 
17880ec0955STrond Norbye     MEMCACHED_ASSOC_INSERT(ITEM_key(it), it->nkey, hash_items);
179825db0f9SBrad Fitzpatrick     return 1;
180825db0f9SBrad Fitzpatrick }
18186969ea4SBrad Fitzpatrick 
assoc_delete(const char * key,const size_t nkey,const uint32_t hv)182bab9acd1Sdormando void assoc_delete(const char *key, const size_t nkey, const uint32_t hv) {
183bab9acd1Sdormando     item **before = _hashitem_before(key, nkey, hv);
184217dcce0SSteven Grimm 
185825db0f9SBrad Fitzpatrick     if (*before) {
18668957214STrond Norbye         item *nxt;
1875d7dc88dSdormando         pthread_mutex_lock(&hash_items_counter_lock);
18868957214STrond Norbye         hash_items--;
1895d7dc88dSdormando         pthread_mutex_unlock(&hash_items_counter_lock);
19068957214STrond Norbye         /* The DTrace probe cannot be triggered as the last instruction
19168957214STrond Norbye          * due to possible tail-optimization by the compiler
19268957214STrond Norbye          */
19380ec0955STrond Norbye         MEMCACHED_ASSOC_DELETE(key, nkey, hash_items);
19468957214STrond Norbye         nxt = (*before)->h_next;
195f6d334e0SBrad Fitzpatrick         (*before)->h_next = 0;   /* probably pointless, but whatever. */
196f6d334e0SBrad Fitzpatrick         *before = nxt;
197f6d334e0SBrad Fitzpatrick         return;
198825db0f9SBrad Fitzpatrick     }
1997917af40SBrad Fitzpatrick     /* Note:  we never actually get here.  the callers don't delete things
200c08383afSBrad Fitzpatrick        they can't find. */
201c08383afSBrad Fitzpatrick     assert(*before != 0);
202825db0f9SBrad Fitzpatrick }
2037f09e20bSTrond Norbye 
2047f09e20bSTrond Norbye 
2057f09e20bSTrond Norbye static volatile int do_run_maintenance_thread = 1;
2067f09e20bSTrond Norbye 
2077f09e20bSTrond Norbye #define DEFAULT_HASH_BULK_MOVE 1
2087f09e20bSTrond Norbye int hash_bulk_move = DEFAULT_HASH_BULK_MOVE;
2097f09e20bSTrond Norbye 
assoc_maintenance_thread(void * arg)2107f09e20bSTrond Norbye static void *assoc_maintenance_thread(void *arg) {
2117f09e20bSTrond Norbye 
2126af7aa0bSdormando     mutex_lock(&maintenance_lock);
2137f09e20bSTrond Norbye     while (do_run_maintenance_thread) {
214dfc5130eSDustin Sallings         int ii = 0;
2157f09e20bSTrond Norbye 
2166af7aa0bSdormando         /* There is only one expansion thread, so no need to global lock. */
217dfc5130eSDustin Sallings         for (ii = 0; ii < hash_bulk_move && expanding; ++ii) {
2187f09e20bSTrond Norbye             item *it, *next;
2197f09e20bSTrond Norbye             int bucket;
220d2676b4aSJason CHAN             void *item_lock = NULL;
2217f09e20bSTrond Norbye 
222d2676b4aSJason CHAN             /* bucket = hv & hashmask(hashpower) =>the bucket of hash table
223d2676b4aSJason CHAN              * is the lowest N bits of the hv, and the bucket of item_locks is
224d2676b4aSJason CHAN              *  also the lowest M bits of hv, and N is greater than M.
225d2676b4aSJason CHAN              *  So we can process expanding with only one item_lock. cool! */
226d2676b4aSJason CHAN             if ((item_lock = item_trylock(expand_bucket))) {
2277f09e20bSTrond Norbye                     for (it = old_hashtable[expand_bucket]; NULL != it; it = next) {
2287f09e20bSTrond Norbye                         next = it->h_next;
22905ca809cSdormando                         bucket = hash(ITEM_key(it), it->nkey) & hashmask(hashpower);
2307f09e20bSTrond Norbye                         it->h_next = primary_hashtable[bucket];
2317f09e20bSTrond Norbye                         primary_hashtable[bucket] = it;
2327f09e20bSTrond Norbye                     }
2337f09e20bSTrond Norbye 
2347f09e20bSTrond Norbye                     old_hashtable[expand_bucket] = NULL;
2357f09e20bSTrond Norbye 
2367f09e20bSTrond Norbye                     expand_bucket++;
2377f09e20bSTrond Norbye                     if (expand_bucket == hashsize(hashpower - 1)) {
2387f09e20bSTrond Norbye                         expanding = false;
2397f09e20bSTrond Norbye                         free(old_hashtable);
240108e2cd6Sdormando                         STATS_LOCK();
241*cb01d504Sdormando                         stats_state.hash_bytes -= hashsize(hashpower - 1) * sizeof(void *);
242*cb01d504Sdormando                         stats_state.hash_is_expanding = false;
243108e2cd6Sdormando                         STATS_UNLOCK();
2447f09e20bSTrond Norbye                         if (settings.verbose > 1)
2457f09e20bSTrond Norbye                             fprintf(stderr, "Hash table expansion done\n");
2467f09e20bSTrond Norbye                     }
247d2676b4aSJason CHAN 
248d2676b4aSJason CHAN             } else {
2496af7aa0bSdormando                 usleep(10*1000);
2507f09e20bSTrond Norbye             }
2517f09e20bSTrond Norbye 
252d2676b4aSJason CHAN             if (item_lock) {
253d2676b4aSJason CHAN                 item_trylock_unlock(item_lock);
254d2676b4aSJason CHAN                 item_lock = NULL;
255d2676b4aSJason CHAN             }
256d2676b4aSJason CHAN         }
2571c94e12cSdormando 
2587f09e20bSTrond Norbye         if (!expanding) {
2597f09e20bSTrond Norbye             /* We are done expanding.. just wait for next invocation */
2608963836cSdormando             started_expanding = false;
2616af7aa0bSdormando             pthread_cond_wait(&maintenance_cond, &maintenance_lock);
2626af7aa0bSdormando             /* assoc_expand() swaps out the hash table entirely, so we need
2636af7aa0bSdormando              * all threads to not hold any references related to the hash
2646af7aa0bSdormando              * table while this happens.
2656af7aa0bSdormando              * This is instead of a more complex, possibly slower algorithm to
2666af7aa0bSdormando              * allow dynamic hash table expansion without causing significant
2676af7aa0bSdormando              * wait times.
2686af7aa0bSdormando              */
2696af7aa0bSdormando             pause_threads(PAUSE_ALL_THREADS);
2701c94e12cSdormando             assoc_expand();
2716af7aa0bSdormando             pause_threads(RESUME_ALL_THREADS);
2721c94e12cSdormando         }
2737f09e20bSTrond Norbye     }
2747f09e20bSTrond Norbye     return NULL;
2757f09e20bSTrond Norbye }
2767f09e20bSTrond Norbye 
2777f09e20bSTrond Norbye static pthread_t maintenance_tid;
2787f09e20bSTrond Norbye 
start_assoc_maintenance_thread()2797f09e20bSTrond Norbye int start_assoc_maintenance_thread() {
2807f09e20bSTrond Norbye     int ret;
2817f09e20bSTrond Norbye     char *env = getenv("MEMCACHED_HASH_BULK_MOVE");
2827f09e20bSTrond Norbye     if (env != NULL) {
2837f09e20bSTrond Norbye         hash_bulk_move = atoi(env);
2847f09e20bSTrond Norbye         if (hash_bulk_move == 0) {
2857f09e20bSTrond Norbye             hash_bulk_move = DEFAULT_HASH_BULK_MOVE;
2867f09e20bSTrond Norbye         }
2877f09e20bSTrond Norbye     }
2886af7aa0bSdormando     pthread_mutex_init(&maintenance_lock, NULL);
2897f09e20bSTrond Norbye     if ((ret = pthread_create(&maintenance_tid, NULL,
2907f09e20bSTrond Norbye                               assoc_maintenance_thread, NULL)) != 0) {
2917f09e20bSTrond Norbye         fprintf(stderr, "Can't create thread: %s\n", strerror(ret));
2927f09e20bSTrond Norbye         return -1;
2937f09e20bSTrond Norbye     }
2947f09e20bSTrond Norbye     return 0;
2957f09e20bSTrond Norbye }
2967f09e20bSTrond Norbye 
stop_assoc_maintenance_thread()2977f09e20bSTrond Norbye void stop_assoc_maintenance_thread() {
2986af7aa0bSdormando     mutex_lock(&maintenance_lock);
2997f09e20bSTrond Norbye     do_run_maintenance_thread = 0;
3007f09e20bSTrond Norbye     pthread_cond_signal(&maintenance_cond);
3016af7aa0bSdormando     mutex_unlock(&maintenance_lock);
3027f09e20bSTrond Norbye 
3037f09e20bSTrond Norbye     /* Wait for the maintenance thread to stop */
3047f09e20bSTrond Norbye     pthread_join(maintenance_tid, NULL);
3057f09e20bSTrond Norbye }
3067f09e20bSTrond Norbye 
307