xref: /memcached-1.4.29/slabs.c (revision ee461d11)
1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  * Slabs memory allocation, based on powers-of-N. Slabs are up to 1MB in size
4  * and are divided into chunks. The chunk sizes start off at the size of the
5  * "item" structure plus space for a small key and value. They increase by
6  * a multiplier factor from there, up to half the maximum slab size. The last
7  * slab size is always 1MB, since that's the maximum item size allowed by the
8  * memcached protocol.
9  */
10 #include "memcached.h"
11 #include <sys/stat.h>
12 #include <sys/socket.h>
13 #include <sys/resource.h>
14 #include <fcntl.h>
15 #include <netinet/in.h>
16 #include <errno.h>
17 #include <stdlib.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <signal.h>
21 #include <assert.h>
22 #include <pthread.h>
23 
24 //#define DEBUG_SLAB_MOVER
25 /* powers-of-N allocation structures */
26 
27 typedef struct {
28     unsigned int size;      /* sizes of items */
29     unsigned int perslab;   /* how many items per slab */
30 
31     void *slots;           /* list of item ptrs */
32     unsigned int sl_curr;   /* total free items in list */
33 
34     unsigned int slabs;     /* how many slabs were allocated for this class */
35 
36     void **slab_list;       /* array of slab pointers */
37     unsigned int list_size; /* size of prev array */
38 
39     size_t requested; /* The number of requested bytes */
40 } slabclass_t;
41 
42 static slabclass_t slabclass[MAX_NUMBER_OF_SLAB_CLASSES];
43 static size_t mem_limit = 0;
44 static size_t mem_malloced = 0;
45 /* If the memory limit has been hit once. Used as a hint to decide when to
46  * early-wake the LRU maintenance thread */
47 static bool mem_limit_reached = false;
48 static int power_largest;
49 
50 static void *mem_base = NULL;
51 static void *mem_current = NULL;
52 static size_t mem_avail = 0;
53 
54 /**
55  * Access to the slab allocator is protected by this lock
56  */
57 static pthread_mutex_t slabs_lock = PTHREAD_MUTEX_INITIALIZER;
58 static pthread_mutex_t slabs_rebalance_lock = PTHREAD_MUTEX_INITIALIZER;
59 
60 /*
61  * Forward Declarations
62  */
63 static int do_slabs_newslab(const unsigned int id);
64 static void *memory_allocate(size_t size);
65 static void do_slabs_free(void *ptr, const size_t size, unsigned int id);
66 
67 /* Preallocate as many slab pages as possible (called from slabs_init)
68    on start-up, so users don't get confused out-of-memory errors when
69    they do have free (in-slab) space, but no space to make new slabs.
70    if maxslabs is 18 (POWER_LARGEST - POWER_SMALLEST + 1), then all
71    slab types can be made.  if max memory is less than 18 MB, only the
72    smaller ones will be made.  */
73 static void slabs_preallocate (const unsigned int maxslabs);
74 
75 /*
76  * Figures out which slab class (chunk size) is required to store an item of
77  * a given size.
78  *
79  * Given object size, return id to use when allocating/freeing memory for object
80  * 0 means error: can't store such a large object
81  */
82 
slabs_clsid(const size_t size)83 unsigned int slabs_clsid(const size_t size) {
84     int res = POWER_SMALLEST;
85 
86     if (size == 0 || size > settings.item_size_max)
87         return 0;
88     while (size > slabclass[res].size)
89         if (res++ == power_largest)     /* won't fit in the biggest slab */
90             return power_largest;
91     return res;
92 }
93 
94 /**
95  * Determines the chunk sizes and initializes the slab class descriptors
96  * accordingly.
97  */
slabs_init(const size_t limit,const double factor,const bool prealloc,const uint32_t * slab_sizes)98 void slabs_init(const size_t limit, const double factor, const bool prealloc, const uint32_t *slab_sizes) {
99     int i = POWER_SMALLEST - 1;
100     unsigned int size = sizeof(item) + settings.chunk_size;
101 
102     mem_limit = limit;
103 
104     if (prealloc) {
105         /* Allocate everything in a big chunk with malloc */
106         mem_base = malloc(mem_limit);
107         if (mem_base != NULL) {
108             mem_current = mem_base;
109             mem_avail = mem_limit;
110         } else {
111             fprintf(stderr, "Warning: Failed to allocate requested memory in"
112                     " one large chunk.\nWill allocate in smaller chunks\n");
113         }
114     }
115 
116     memset(slabclass, 0, sizeof(slabclass));
117 
118     while (++i < MAX_NUMBER_OF_SLAB_CLASSES-1) {
119         if (slab_sizes != NULL) {
120             if (slab_sizes[i-1] == 0)
121                 break;
122             size = slab_sizes[i-1];
123         } else if (size >= settings.slab_chunk_size_max / factor) {
124             break;
125         }
126         /* Make sure items are always n-byte aligned */
127         if (size % CHUNK_ALIGN_BYTES)
128             size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
129 
130         slabclass[i].size = size;
131         slabclass[i].perslab = settings.slab_page_size / slabclass[i].size;
132         if (slab_sizes == NULL)
133             size *= factor;
134         if (settings.verbose > 1) {
135             fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
136                     i, slabclass[i].size, slabclass[i].perslab);
137         }
138     }
139 
140     power_largest = i;
141     slabclass[power_largest].size = settings.slab_chunk_size_max;
142     slabclass[power_largest].perslab = settings.slab_page_size / settings.slab_chunk_size_max;
143     if (settings.verbose > 1) {
144         fprintf(stderr, "slab class %3d: chunk size %9u perslab %7u\n",
145                 i, slabclass[i].size, slabclass[i].perslab);
146     }
147 
148     /* for the test suite:  faking of how much we've already malloc'd */
149     {
150         char *t_initial_malloc = getenv("T_MEMD_INITIAL_MALLOC");
151         if (t_initial_malloc) {
152             mem_malloced = (size_t)atol(t_initial_malloc);
153         }
154 
155     }
156 
157     if (prealloc) {
158         slabs_preallocate(power_largest);
159     }
160 }
161 
slabs_preallocate(const unsigned int maxslabs)162 static void slabs_preallocate (const unsigned int maxslabs) {
163     int i;
164     unsigned int prealloc = 0;
165 
166     /* pre-allocate a 1MB slab in every size class so people don't get
167        confused by non-intuitive "SERVER_ERROR out of memory"
168        messages.  this is the most common question on the mailing
169        list.  if you really don't want this, you can rebuild without
170        these three lines.  */
171 
172     for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
173         if (++prealloc > maxslabs)
174             return;
175         if (do_slabs_newslab(i) == 0) {
176             fprintf(stderr, "Error while preallocating slab memory!\n"
177                 "If using -L or other prealloc options, max memory must be "
178                 "at least %d megabytes.\n", power_largest);
179             exit(1);
180         }
181     }
182 
183 }
184 
grow_slab_list(const unsigned int id)185 static int grow_slab_list (const unsigned int id) {
186     slabclass_t *p = &slabclass[id];
187     if (p->slabs == p->list_size) {
188         size_t new_size =  (p->list_size != 0) ? p->list_size * 2 : 16;
189         void *new_list = realloc(p->slab_list, new_size * sizeof(void *));
190         if (new_list == 0) return 0;
191         p->list_size = new_size;
192         p->slab_list = new_list;
193     }
194     return 1;
195 }
196 
split_slab_page_into_freelist(char * ptr,const unsigned int id)197 static void split_slab_page_into_freelist(char *ptr, const unsigned int id) {
198     slabclass_t *p = &slabclass[id];
199     int x;
200     for (x = 0; x < p->perslab; x++) {
201         do_slabs_free(ptr, 0, id);
202         ptr += p->size;
203     }
204 }
205 
206 /* Fast FIFO queue */
get_page_from_global_pool(void)207 static void *get_page_from_global_pool(void) {
208     slabclass_t *p = &slabclass[SLAB_GLOBAL_PAGE_POOL];
209     if (p->slabs < 1) {
210         return NULL;
211     }
212     char *ret = p->slab_list[p->slabs - 1];
213     p->slabs--;
214     return ret;
215 }
216 
do_slabs_newslab(const unsigned int id)217 static int do_slabs_newslab(const unsigned int id) {
218     slabclass_t *p = &slabclass[id];
219     slabclass_t *g = &slabclass[SLAB_GLOBAL_PAGE_POOL];
220     int len = settings.slab_reassign ? settings.item_size_max
221         : p->size * p->perslab;
222     char *ptr;
223 
224     if ((mem_limit && mem_malloced + len > mem_limit && p->slabs > 0
225          && g->slabs == 0)) {
226         mem_limit_reached = true;
227         MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
228         return 0;
229     }
230 
231     if ((grow_slab_list(id) == 0) ||
232         (((ptr = get_page_from_global_pool()) == NULL) &&
233         ((ptr = memory_allocate((size_t)len)) == 0))) {
234 
235         MEMCACHED_SLABS_SLABCLASS_ALLOCATE_FAILED(id);
236         return 0;
237     }
238 
239     memset(ptr, 0, (size_t)len);
240     split_slab_page_into_freelist(ptr, id);
241 
242     p->slab_list[p->slabs++] = ptr;
243     MEMCACHED_SLABS_SLABCLASS_ALLOCATE(id);
244 
245     return 1;
246 }
247 
248 /* This calculation ends up adding sizeof(void *) to the item size. */
do_slabs_alloc_chunked(const size_t size,slabclass_t * p,unsigned int id)249 static void *do_slabs_alloc_chunked(const size_t size, slabclass_t *p, unsigned int id) {
250     void *ret = NULL;
251     item *it = NULL;
252     int x;
253     int csize = p->size - sizeof(item_chunk);
254     unsigned int chunks_req = size / csize;
255     if (size % csize != 0)
256         chunks_req++;
257     while (p->sl_curr < chunks_req) {
258         if (do_slabs_newslab(id) == 0)
259             break;
260     }
261 
262     if (p->sl_curr >= chunks_req) {
263         item_chunk *chunk = NULL;
264 
265         /* Configure the head item in the chain. */
266         it = (item *)p->slots;
267         p->slots = it->next;
268         if (it->next) it->next->prev = 0;
269 
270         /* Squirrel away the "top chunk" into h_next for now */
271         it->h_next = (item *)p->slots;
272         assert(it->h_next != 0);
273         chunk = (item_chunk *) it->h_next;
274 
275         /* roll down the chunks, marking them as such. */
276         for (x = 0; x < chunks_req-1; x++) {
277             chunk->it_flags &= ~ITEM_SLABBED;
278             chunk->it_flags |= ITEM_CHUNK;
279             /* Chunks always have a direct reference to the head item */
280             chunk->head = it;
281             chunk->size = p->size - sizeof(item_chunk);
282             chunk->used = 0;
283             chunk = chunk->next;
284         }
285 
286         /* The final "next" is now the top of the slab freelist */
287         p->slots = chunk;
288         if (chunk && chunk->prev) {
289             /* Disconnect the final chunk from the chain */
290             chunk->prev->next = 0;
291             chunk->prev = 0;
292         }
293 
294         it->it_flags &= ~ITEM_SLABBED;
295         it->it_flags |= ITEM_CHUNKED;
296         it->refcount = 1;
297         p->sl_curr -= chunks_req;
298         ret = (void *)it;
299     } else {
300         ret = NULL;
301     }
302 
303     return ret;
304 }
305 
306 /*@null@*/
do_slabs_alloc(const size_t size,unsigned int id,uint64_t * total_bytes,unsigned int flags)307 static void *do_slabs_alloc(const size_t size, unsigned int id, uint64_t *total_bytes,
308         unsigned int flags) {
309     slabclass_t *p;
310     void *ret = NULL;
311     item *it = NULL;
312 
313     if (id < POWER_SMALLEST || id > power_largest) {
314         MEMCACHED_SLABS_ALLOCATE_FAILED(size, 0);
315         return NULL;
316     }
317     p = &slabclass[id];
318     assert(p->sl_curr == 0 || ((item *)p->slots)->slabs_clsid == 0);
319     if (total_bytes != NULL) {
320         *total_bytes = p->requested;
321     }
322 
323     if (size <= p->size) {
324         /* fail unless we have space at the end of a recently allocated page,
325            we have something on our freelist, or we could allocate a new page */
326         if (p->sl_curr == 0 && flags != SLABS_ALLOC_NO_NEWPAGE) {
327             do_slabs_newslab(id);
328         }
329 
330         if (p->sl_curr != 0) {
331             /* return off our freelist */
332             it = (item *)p->slots;
333             p->slots = it->next;
334             if (it->next) it->next->prev = 0;
335             /* Kill flag and initialize refcount here for lock safety in slab
336              * mover's freeness detection. */
337             it->it_flags &= ~ITEM_SLABBED;
338             it->refcount = 1;
339             p->sl_curr--;
340             ret = (void *)it;
341         } else {
342             ret = NULL;
343         }
344     } else {
345         /* Dealing with a chunked item. */
346         ret = do_slabs_alloc_chunked(size, p, id);
347     }
348 
349     if (ret) {
350         p->requested += size;
351         MEMCACHED_SLABS_ALLOCATE(size, id, p->size, ret);
352     } else {
353         MEMCACHED_SLABS_ALLOCATE_FAILED(size, id);
354     }
355 
356     return ret;
357 }
358 
do_slabs_free_chunked(item * it,const size_t size,unsigned int id,slabclass_t * p)359 static void do_slabs_free_chunked(item *it, const size_t size, unsigned int id,
360                                   slabclass_t *p) {
361     item_chunk *chunk = (item_chunk *) ITEM_data(it);
362     size_t realsize = size;
363     while (chunk) {
364         realsize += sizeof(item_chunk);
365         chunk = chunk->next;
366     }
367     chunk = (item_chunk *) ITEM_data(it);
368     unsigned int chunks_found = 1;
369 
370     it->it_flags = ITEM_SLABBED;
371     it->slabs_clsid = 0;
372     it->prev = 0;
373     it->next = (item *) chunk->next;
374     assert(it->next);
375     /* top chunk should already point back to head */
376     assert(it->next && (void*)it->next->prev == (void*)chunk);
377     chunk = chunk->next;
378     chunk->prev = (item_chunk *)it;
379 
380     while (chunk) {
381         assert(chunk->it_flags == ITEM_CHUNK);
382         chunk->it_flags = ITEM_SLABBED;
383         chunk->slabs_clsid = 0;
384         chunks_found++;
385         if (chunk->next) {
386             chunk = chunk->next;
387         } else {
388             break;
389         }
390     }
391     /* must have had nothing hanging off of the final chunk */
392     assert(chunk && chunk->next == 0);
393     /* Tail chunk, link the freelist here. */
394     chunk->next = p->slots;
395     if (chunk->next) chunk->next->prev = chunk;
396 
397     p->slots = it;
398     p->sl_curr += chunks_found;
399     p->requested -= size;
400 
401     return;
402 }
403 
404 
do_slabs_free(void * ptr,const size_t size,unsigned int id)405 static void do_slabs_free(void *ptr, const size_t size, unsigned int id) {
406     slabclass_t *p;
407     item *it;
408 
409     assert(id >= POWER_SMALLEST && id <= power_largest);
410     if (id < POWER_SMALLEST || id > power_largest)
411         return;
412 
413     MEMCACHED_SLABS_FREE(size, id, ptr);
414     p = &slabclass[id];
415 
416     it = (item *)ptr;
417     if ((it->it_flags & ITEM_CHUNKED) == 0) {
418         it->it_flags = ITEM_SLABBED;
419         it->slabs_clsid = 0;
420         it->prev = 0;
421         it->next = p->slots;
422         if (it->next) it->next->prev = it;
423         p->slots = it;
424 
425         p->sl_curr++;
426         p->requested -= size;
427     } else {
428         do_slabs_free_chunked(it, size, id, p);
429     }
430     return;
431 }
432 
nz_strcmp(int nzlength,const char * nz,const char * z)433 static int nz_strcmp(int nzlength, const char *nz, const char *z) {
434     int zlength=strlen(z);
435     return (zlength == nzlength) && (strncmp(nz, z, zlength) == 0) ? 0 : -1;
436 }
437 
get_stats(const char * stat_type,int nkey,ADD_STAT add_stats,void * c)438 bool get_stats(const char *stat_type, int nkey, ADD_STAT add_stats, void *c) {
439     bool ret = true;
440 
441     if (add_stats != NULL) {
442         if (!stat_type) {
443             /* prepare general statistics for the engine */
444             STATS_LOCK();
445             APPEND_STAT("bytes", "%llu", (unsigned long long)stats_state.curr_bytes);
446             APPEND_STAT("curr_items", "%llu", (unsigned long long)stats_state.curr_items);
447             APPEND_STAT("total_items", "%llu", (unsigned long long)stats.total_items);
448             STATS_UNLOCK();
449             if (settings.slab_automove > 0) {
450                 pthread_mutex_lock(&slabs_lock);
451                 APPEND_STAT("slab_global_page_pool", "%u", slabclass[SLAB_GLOBAL_PAGE_POOL].slabs);
452                 pthread_mutex_unlock(&slabs_lock);
453             }
454             item_stats_totals(add_stats, c);
455         } else if (nz_strcmp(nkey, stat_type, "items") == 0) {
456             item_stats(add_stats, c);
457         } else if (nz_strcmp(nkey, stat_type, "slabs") == 0) {
458             slabs_stats(add_stats, c);
459         } else if (nz_strcmp(nkey, stat_type, "sizes") == 0) {
460             item_stats_sizes(add_stats, c);
461         } else if (nz_strcmp(nkey, stat_type, "sizes_enable") == 0) {
462             item_stats_sizes_enable(add_stats, c);
463         } else if (nz_strcmp(nkey, stat_type, "sizes_disable") == 0) {
464             item_stats_sizes_disable(add_stats, c);
465         } else {
466             ret = false;
467         }
468     } else {
469         ret = false;
470     }
471 
472     return ret;
473 }
474 
475 /*@null@*/
do_slabs_stats(ADD_STAT add_stats,void * c)476 static void do_slabs_stats(ADD_STAT add_stats, void *c) {
477     int i, total;
478     /* Get the per-thread stats which contain some interesting aggregates */
479     struct thread_stats thread_stats;
480     threadlocal_stats_aggregate(&thread_stats);
481 
482     total = 0;
483     for(i = POWER_SMALLEST; i <= power_largest; i++) {
484         slabclass_t *p = &slabclass[i];
485         if (p->slabs != 0) {
486             uint32_t perslab, slabs;
487             slabs = p->slabs;
488             perslab = p->perslab;
489 
490             char key_str[STAT_KEY_LEN];
491             char val_str[STAT_VAL_LEN];
492             int klen = 0, vlen = 0;
493 
494             APPEND_NUM_STAT(i, "chunk_size", "%u", p->size);
495             APPEND_NUM_STAT(i, "chunks_per_page", "%u", perslab);
496             APPEND_NUM_STAT(i, "total_pages", "%u", slabs);
497             APPEND_NUM_STAT(i, "total_chunks", "%u", slabs * perslab);
498             APPEND_NUM_STAT(i, "used_chunks", "%u",
499                             slabs*perslab - p->sl_curr);
500             APPEND_NUM_STAT(i, "free_chunks", "%u", p->sl_curr);
501             /* Stat is dead, but displaying zero instead of removing it. */
502             APPEND_NUM_STAT(i, "free_chunks_end", "%u", 0);
503             APPEND_NUM_STAT(i, "mem_requested", "%llu",
504                             (unsigned long long)p->requested);
505             APPEND_NUM_STAT(i, "get_hits", "%llu",
506                     (unsigned long long)thread_stats.slab_stats[i].get_hits);
507             APPEND_NUM_STAT(i, "cmd_set", "%llu",
508                     (unsigned long long)thread_stats.slab_stats[i].set_cmds);
509             APPEND_NUM_STAT(i, "delete_hits", "%llu",
510                     (unsigned long long)thread_stats.slab_stats[i].delete_hits);
511             APPEND_NUM_STAT(i, "incr_hits", "%llu",
512                     (unsigned long long)thread_stats.slab_stats[i].incr_hits);
513             APPEND_NUM_STAT(i, "decr_hits", "%llu",
514                     (unsigned long long)thread_stats.slab_stats[i].decr_hits);
515             APPEND_NUM_STAT(i, "cas_hits", "%llu",
516                     (unsigned long long)thread_stats.slab_stats[i].cas_hits);
517             APPEND_NUM_STAT(i, "cas_badval", "%llu",
518                     (unsigned long long)thread_stats.slab_stats[i].cas_badval);
519             APPEND_NUM_STAT(i, "touch_hits", "%llu",
520                     (unsigned long long)thread_stats.slab_stats[i].touch_hits);
521             total++;
522         }
523     }
524 
525     /* add overall slab stats and append terminator */
526 
527     APPEND_STAT("active_slabs", "%d", total);
528     APPEND_STAT("total_malloced", "%llu", (unsigned long long)mem_malloced);
529     add_stats(NULL, 0, NULL, 0, c);
530 }
531 
memory_allocate(size_t size)532 static void *memory_allocate(size_t size) {
533     void *ret;
534 
535     if (mem_base == NULL) {
536         /* We are not using a preallocated large memory chunk */
537         ret = malloc(size);
538     } else {
539         ret = mem_current;
540 
541         if (size > mem_avail) {
542             return NULL;
543         }
544 
545         /* mem_current pointer _must_ be aligned!!! */
546         if (size % CHUNK_ALIGN_BYTES) {
547             size += CHUNK_ALIGN_BYTES - (size % CHUNK_ALIGN_BYTES);
548         }
549 
550         mem_current = ((char*)mem_current) + size;
551         if (size < mem_avail) {
552             mem_avail -= size;
553         } else {
554             mem_avail = 0;
555         }
556     }
557     mem_malloced += size;
558 
559     return ret;
560 }
561 
562 /* Must only be used if all pages are item_size_max */
memory_release()563 static void memory_release() {
564     void *p = NULL;
565     if (mem_base != NULL)
566         return;
567 
568     if (!settings.slab_reassign)
569         return;
570 
571     while (mem_malloced > mem_limit &&
572             (p = get_page_from_global_pool()) != NULL) {
573         free(p);
574         mem_malloced -= settings.item_size_max;
575     }
576 }
577 
slabs_alloc(size_t size,unsigned int id,uint64_t * total_bytes,unsigned int flags)578 void *slabs_alloc(size_t size, unsigned int id, uint64_t *total_bytes,
579         unsigned int flags) {
580     void *ret;
581 
582     pthread_mutex_lock(&slabs_lock);
583     ret = do_slabs_alloc(size, id, total_bytes, flags);
584     pthread_mutex_unlock(&slabs_lock);
585     return ret;
586 }
587 
slabs_free(void * ptr,size_t size,unsigned int id)588 void slabs_free(void *ptr, size_t size, unsigned int id) {
589     pthread_mutex_lock(&slabs_lock);
590     do_slabs_free(ptr, size, id);
591     pthread_mutex_unlock(&slabs_lock);
592 }
593 
slabs_stats(ADD_STAT add_stats,void * c)594 void slabs_stats(ADD_STAT add_stats, void *c) {
595     pthread_mutex_lock(&slabs_lock);
596     do_slabs_stats(add_stats, c);
597     pthread_mutex_unlock(&slabs_lock);
598 }
599 
do_slabs_adjust_mem_limit(size_t new_mem_limit)600 static bool do_slabs_adjust_mem_limit(size_t new_mem_limit) {
601     /* Cannot adjust memory limit at runtime if prealloc'ed */
602     if (mem_base != NULL)
603         return false;
604     settings.maxbytes = new_mem_limit;
605     mem_limit = new_mem_limit;
606     mem_limit_reached = false; /* Will reset on next alloc */
607     memory_release(); /* free what might already be in the global pool */
608     return true;
609 }
610 
slabs_adjust_mem_limit(size_t new_mem_limit)611 bool slabs_adjust_mem_limit(size_t new_mem_limit) {
612     bool ret;
613     pthread_mutex_lock(&slabs_lock);
614     ret = do_slabs_adjust_mem_limit(new_mem_limit);
615     pthread_mutex_unlock(&slabs_lock);
616     return ret;
617 }
618 
slabs_adjust_mem_requested(unsigned int id,size_t old,size_t ntotal)619 void slabs_adjust_mem_requested(unsigned int id, size_t old, size_t ntotal)
620 {
621     pthread_mutex_lock(&slabs_lock);
622     slabclass_t *p;
623     if (id < POWER_SMALLEST || id > power_largest) {
624         fprintf(stderr, "Internal error! Invalid slab class\n");
625         abort();
626     }
627 
628     p = &slabclass[id];
629     p->requested = p->requested - old + ntotal;
630     pthread_mutex_unlock(&slabs_lock);
631 }
632 
slabs_available_chunks(const unsigned int id,bool * mem_flag,uint64_t * total_bytes,unsigned int * chunks_perslab)633 unsigned int slabs_available_chunks(const unsigned int id, bool *mem_flag,
634         uint64_t *total_bytes, unsigned int *chunks_perslab) {
635     unsigned int ret;
636     slabclass_t *p;
637 
638     pthread_mutex_lock(&slabs_lock);
639     p = &slabclass[id];
640     ret = p->sl_curr;
641     if (mem_flag != NULL)
642         *mem_flag = mem_limit_reached;
643     if (total_bytes != NULL)
644         *total_bytes = p->requested;
645     if (chunks_perslab != NULL)
646         *chunks_perslab = p->perslab;
647     pthread_mutex_unlock(&slabs_lock);
648     return ret;
649 }
650 
651 static pthread_cond_t slab_rebalance_cond = PTHREAD_COND_INITIALIZER;
652 static volatile int do_run_slab_thread = 1;
653 static volatile int do_run_slab_rebalance_thread = 1;
654 
655 #define DEFAULT_SLAB_BULK_CHECK 1
656 int slab_bulk_check = DEFAULT_SLAB_BULK_CHECK;
657 
slab_rebalance_start(void)658 static int slab_rebalance_start(void) {
659     slabclass_t *s_cls;
660     int no_go = 0;
661 
662     pthread_mutex_lock(&slabs_lock);
663 
664     if (slab_rebal.s_clsid < POWER_SMALLEST ||
665         slab_rebal.s_clsid > power_largest  ||
666         slab_rebal.d_clsid < SLAB_GLOBAL_PAGE_POOL ||
667         slab_rebal.d_clsid > power_largest  ||
668         slab_rebal.s_clsid == slab_rebal.d_clsid)
669         no_go = -2;
670 
671     s_cls = &slabclass[slab_rebal.s_clsid];
672 
673     if (!grow_slab_list(slab_rebal.d_clsid)) {
674         no_go = -1;
675     }
676 
677     if (s_cls->slabs < 2)
678         no_go = -3;
679 
680     if (no_go != 0) {
681         pthread_mutex_unlock(&slabs_lock);
682         return no_go; /* Should use a wrapper function... */
683     }
684 
685     /* Always kill the first available slab page as it is most likely to
686      * contain the oldest items
687      */
688     slab_rebal.slab_start = s_cls->slab_list[0];
689     slab_rebal.slab_end   = (char *)slab_rebal.slab_start +
690         (s_cls->size * s_cls->perslab);
691     slab_rebal.slab_pos   = slab_rebal.slab_start;
692     slab_rebal.done       = 0;
693 
694     /* Also tells do_item_get to search for items in this slab */
695     slab_rebalance_signal = 2;
696 
697     if (settings.verbose > 1) {
698         fprintf(stderr, "Started a slab rebalance\n");
699     }
700 
701     pthread_mutex_unlock(&slabs_lock);
702 
703     STATS_LOCK();
704     stats_state.slab_reassign_running = true;
705     STATS_UNLOCK();
706 
707     return 0;
708 }
709 
710 /* CALLED WITH slabs_lock HELD */
slab_rebalance_alloc(const size_t size,unsigned int id)711 static void *slab_rebalance_alloc(const size_t size, unsigned int id) {
712     slabclass_t *s_cls;
713     s_cls = &slabclass[slab_rebal.s_clsid];
714     int x;
715     item *new_it = NULL;
716 
717     for (x = 0; x < s_cls->perslab; x++) {
718         new_it = do_slabs_alloc(size, id, NULL, SLABS_ALLOC_NO_NEWPAGE);
719         /* check that memory isn't within the range to clear */
720         if (new_it == NULL) {
721             break;
722         }
723         if ((void *)new_it >= slab_rebal.slab_start
724             && (void *)new_it < slab_rebal.slab_end) {
725             /* Pulled something we intend to free. Mark it as freed since
726              * we've already done the work of unlinking it from the freelist.
727              */
728             s_cls->requested -= size;
729             new_it->refcount = 0;
730             new_it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
731 #ifdef DEBUG_SLAB_MOVER
732             memcpy(ITEM_key(new_it), "deadbeef", 8);
733 #endif
734             new_it = NULL;
735             slab_rebal.inline_reclaim++;
736         } else {
737             break;
738         }
739     }
740     return new_it;
741 }
742 
743 /* CALLED WITH slabs_lock HELD */
744 /* detatches item/chunk from freelist. */
slab_rebalance_cut_free(slabclass_t * s_cls,item * it)745 static void slab_rebalance_cut_free(slabclass_t *s_cls, item *it) {
746     /* Ensure this was on the freelist and nothing else. */
747     assert(it->it_flags == ITEM_SLABBED);
748     if (s_cls->slots == it) {
749         s_cls->slots = it->next;
750     }
751     if (it->next) it->next->prev = it->prev;
752     if (it->prev) it->prev->next = it->next;
753     s_cls->sl_curr--;
754 }
755 
756 enum move_status {
757     MOVE_PASS=0, MOVE_FROM_SLAB, MOVE_FROM_LRU, MOVE_BUSY, MOVE_LOCKED
758 };
759 
760 /* refcount == 0 is safe since nobody can incr while item_lock is held.
761  * refcount != 0 is impossible since flags/etc can be modified in other
762  * threads. instead, note we found a busy one and bail. logic in do_item_get
763  * will prevent busy items from continuing to be busy
764  * NOTE: This is checking it_flags outside of an item lock. I believe this
765  * works since it_flags is 8 bits, and we're only ever comparing a single bit
766  * regardless. ITEM_SLABBED bit will always be correct since we're holding the
767  * lock which modifies that bit. ITEM_LINKED won't exist if we're between an
768  * item having ITEM_SLABBED removed, and the key hasn't been added to the item
769  * yet. The memory barrier from the slabs lock should order the key write and the
770  * flags to the item?
771  * If ITEM_LINKED did exist and was just removed, but we still see it, that's
772  * still safe since it will have a valid key, which we then lock, and then
773  * recheck everything.
774  * This may not be safe on all platforms; If not, slabs_alloc() will need to
775  * seed the item key while holding slabs_lock.
776  */
slab_rebalance_move(void)777 static int slab_rebalance_move(void) {
778     slabclass_t *s_cls;
779     int x;
780     int was_busy = 0;
781     int refcount = 0;
782     uint32_t hv;
783     void *hold_lock;
784     enum move_status status = MOVE_PASS;
785 
786     pthread_mutex_lock(&slabs_lock);
787 
788     s_cls = &slabclass[slab_rebal.s_clsid];
789 
790     for (x = 0; x < slab_bulk_check; x++) {
791         hv = 0;
792         hold_lock = NULL;
793         item *it = slab_rebal.slab_pos;
794         item_chunk *ch = NULL;
795         status = MOVE_PASS;
796         if (it->it_flags & ITEM_CHUNK) {
797             /* This chunk is a chained part of a larger item. */
798             ch = (item_chunk *) it;
799             /* Instead, we use the head chunk to find the item and effectively
800              * lock the entire structure. If a chunk has ITEM_CHUNK flag, its
801              * head cannot be slabbed, so the normal routine is safe. */
802             it = ch->head;
803             assert(it->it_flags & ITEM_CHUNKED);
804         }
805 
806         /* ITEM_FETCHED when ITEM_SLABBED is overloaded to mean we've cleared
807          * the chunk for move. Only these two flags should exist.
808          */
809         if (it->it_flags != (ITEM_SLABBED|ITEM_FETCHED)) {
810             /* ITEM_SLABBED can only be added/removed under the slabs_lock */
811             if (it->it_flags & ITEM_SLABBED) {
812                 assert(ch == NULL);
813                 slab_rebalance_cut_free(s_cls, it);
814                 status = MOVE_FROM_SLAB;
815             } else if ((it->it_flags & ITEM_LINKED) != 0) {
816                 /* If it doesn't have ITEM_SLABBED, the item could be in any
817                  * state on its way to being freed or written to. If no
818                  * ITEM_SLABBED, but it's had ITEM_LINKED, it must be active
819                  * and have the key written to it already.
820                  */
821                 hv = hash(ITEM_key(it), it->nkey);
822                 if ((hold_lock = item_trylock(hv)) == NULL) {
823                     status = MOVE_LOCKED;
824                 } else {
825                     refcount = refcount_incr(&it->refcount);
826                     if (refcount == 2) { /* item is linked but not busy */
827                         /* Double check ITEM_LINKED flag here, since we're
828                          * past a memory barrier from the mutex. */
829                         if ((it->it_flags & ITEM_LINKED) != 0) {
830                             status = MOVE_FROM_LRU;
831                         } else {
832                             /* refcount == 1 + !ITEM_LINKED means the item is being
833                              * uploaded to, or was just unlinked but hasn't been freed
834                              * yet. Let it bleed off on its own and try again later */
835                             status = MOVE_BUSY;
836                         }
837                     } else {
838                         if (settings.verbose > 2) {
839                             fprintf(stderr, "Slab reassign hit a busy item: refcount: %d (%d -> %d)\n",
840                                 it->refcount, slab_rebal.s_clsid, slab_rebal.d_clsid);
841                         }
842                         status = MOVE_BUSY;
843                     }
844                     /* Item lock must be held while modifying refcount */
845                     if (status == MOVE_BUSY) {
846                         refcount_decr(&it->refcount);
847                         item_trylock_unlock(hold_lock);
848                     }
849                 }
850             } else {
851                 /* See above comment. No ITEM_SLABBED or ITEM_LINKED. Mark
852                  * busy and wait for item to complete its upload. */
853                 status = MOVE_BUSY;
854             }
855         }
856 
857         int save_item = 0;
858         item *new_it = NULL;
859         size_t ntotal = 0;
860         switch (status) {
861             case MOVE_FROM_LRU:
862                 /* Lock order is LRU locks -> slabs_lock. unlink uses LRU lock.
863                  * We only need to hold the slabs_lock while initially looking
864                  * at an item, and at this point we have an exclusive refcount
865                  * (2) + the item is locked. Drop slabs lock, drop item to
866                  * refcount 1 (just our own, then fall through and wipe it
867                  */
868                 /* Check if expired or flushed */
869                 ntotal = ITEM_ntotal(it);
870                 /* REQUIRES slabs_lock: CHECK FOR cls->sl_curr > 0 */
871                 if (ch == NULL && (it->it_flags & ITEM_CHUNKED)) {
872                     /* Chunked should be identical to non-chunked, except we need
873                      * to swap out ntotal for the head-chunk-total. */
874                     ntotal = s_cls->size;
875                 }
876                 if ((it->exptime != 0 && it->exptime < current_time)
877                     || item_is_flushed(it)) {
878                     /* Expired, don't save. */
879                     save_item = 0;
880                 } else if (ch == NULL &&
881                         (new_it = slab_rebalance_alloc(ntotal, slab_rebal.s_clsid)) == NULL) {
882                     /* Not a chunk of an item, and nomem. */
883                     save_item = 0;
884                     slab_rebal.evictions_nomem++;
885                 } else if (ch != NULL &&
886                         (new_it = slab_rebalance_alloc(s_cls->size, slab_rebal.s_clsid)) == NULL) {
887                     /* Is a chunk of an item, and nomem. */
888                     save_item = 0;
889                     slab_rebal.evictions_nomem++;
890                 } else {
891                     /* Was whatever it was, and we have memory for it. */
892                     save_item = 1;
893                 }
894                 pthread_mutex_unlock(&slabs_lock);
895                 unsigned int requested_adjust = 0;
896                 if (save_item) {
897                     if (ch == NULL) {
898                         assert((new_it->it_flags & ITEM_CHUNKED) == 0);
899                         /* if free memory, memcpy. clear prev/next/h_bucket */
900                         memcpy(new_it, it, ntotal);
901                         new_it->prev = 0;
902                         new_it->next = 0;
903                         new_it->h_next = 0;
904                         /* These are definitely required. else fails assert */
905                         new_it->it_flags &= ~ITEM_LINKED;
906                         new_it->refcount = 0;
907                         do_item_replace(it, new_it, hv);
908                         /* Need to walk the chunks and repoint head  */
909                         if (new_it->it_flags & ITEM_CHUNKED) {
910                             item_chunk *fch = (item_chunk *) ITEM_data(new_it);
911                             fch->next->prev = fch;
912                             while (fch) {
913                                 fch->head = new_it;
914                                 fch = fch->next;
915                             }
916                         }
917                         it->refcount = 0;
918                         it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
919 #ifdef DEBUG_SLAB_MOVER
920                         memcpy(ITEM_key(it), "deadbeef", 8);
921 #endif
922                         slab_rebal.rescues++;
923                         requested_adjust = ntotal;
924                     } else {
925                         item_chunk *nch = (item_chunk *) new_it;
926                         /* Chunks always have head chunk (the main it) */
927                         ch->prev->next = nch;
928                         if (ch->next)
929                             ch->next->prev = nch;
930                         memcpy(nch, ch, ch->used + sizeof(item_chunk));
931                         ch->refcount = 0;
932                         ch->it_flags = ITEM_SLABBED|ITEM_FETCHED;
933                         slab_rebal.chunk_rescues++;
934 #ifdef DEBUG_SLAB_MOVER
935                         memcpy(ITEM_key((item *)ch), "deadbeef", 8);
936 #endif
937                         refcount_decr(&it->refcount);
938                         requested_adjust = s_cls->size;
939                     }
940                 } else {
941                     /* restore ntotal in case we tried saving a head chunk. */
942                     ntotal = ITEM_ntotal(it);
943                     do_item_unlink(it, hv);
944                     slabs_free(it, ntotal, slab_rebal.s_clsid);
945                     /* Swing around again later to remove it from the freelist. */
946                     slab_rebal.busy_items++;
947                     was_busy++;
948                 }
949                 item_trylock_unlock(hold_lock);
950                 pthread_mutex_lock(&slabs_lock);
951                 /* Always remove the ntotal, as we added it in during
952                  * do_slabs_alloc() when copying the item.
953                  */
954                 s_cls->requested -= requested_adjust;
955                 break;
956             case MOVE_FROM_SLAB:
957                 it->refcount = 0;
958                 it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
959 #ifdef DEBUG_SLAB_MOVER
960                 memcpy(ITEM_key(it), "deadbeef", 8);
961 #endif
962                 break;
963             case MOVE_BUSY:
964             case MOVE_LOCKED:
965                 slab_rebal.busy_items++;
966                 was_busy++;
967                 break;
968             case MOVE_PASS:
969                 break;
970         }
971 
972         slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
973         if (slab_rebal.slab_pos >= slab_rebal.slab_end)
974             break;
975     }
976 
977     if (slab_rebal.slab_pos >= slab_rebal.slab_end) {
978         /* Some items were busy, start again from the top */
979         if (slab_rebal.busy_items) {
980             slab_rebal.slab_pos = slab_rebal.slab_start;
981             STATS_LOCK();
982             stats.slab_reassign_busy_items += slab_rebal.busy_items;
983             STATS_UNLOCK();
984             slab_rebal.busy_items = 0;
985         } else {
986             slab_rebal.done++;
987         }
988     }
989 
990     pthread_mutex_unlock(&slabs_lock);
991 
992     return was_busy;
993 }
994 
slab_rebalance_finish(void)995 static void slab_rebalance_finish(void) {
996     slabclass_t *s_cls;
997     slabclass_t *d_cls;
998     int x;
999     uint32_t rescues;
1000     uint32_t evictions_nomem;
1001     uint32_t inline_reclaim;
1002     uint32_t chunk_rescues;
1003 
1004     pthread_mutex_lock(&slabs_lock);
1005 
1006     s_cls = &slabclass[slab_rebal.s_clsid];
1007     d_cls = &slabclass[slab_rebal.d_clsid];
1008 
1009 #ifdef DEBUG_SLAB_MOVER
1010     /* If the algorithm is broken, live items can sneak in. */
1011     slab_rebal.slab_pos = slab_rebal.slab_start;
1012     while (1) {
1013         item *it = slab_rebal.slab_pos;
1014         assert(it->it_flags == (ITEM_SLABBED|ITEM_FETCHED));
1015         assert(memcmp(ITEM_key(it), "deadbeef", 8) == 0);
1016         it->it_flags = ITEM_SLABBED|ITEM_FETCHED;
1017         slab_rebal.slab_pos = (char *)slab_rebal.slab_pos + s_cls->size;
1018         if (slab_rebal.slab_pos >= slab_rebal.slab_end)
1019             break;
1020     }
1021 #endif
1022 
1023     /* At this point the stolen slab is completely clear.
1024      * We always kill the "first"/"oldest" slab page in the slab_list, so
1025      * shuffle the page list backwards and decrement.
1026      */
1027     s_cls->slabs--;
1028     for (x = 0; x < s_cls->slabs; x++) {
1029         s_cls->slab_list[x] = s_cls->slab_list[x+1];
1030     }
1031 
1032     d_cls->slab_list[d_cls->slabs++] = slab_rebal.slab_start;
1033     /* Don't need to split the page into chunks if we're just storing it */
1034     if (slab_rebal.d_clsid > SLAB_GLOBAL_PAGE_POOL) {
1035         memset(slab_rebal.slab_start, 0, (size_t)settings.item_size_max);
1036         split_slab_page_into_freelist(slab_rebal.slab_start,
1037             slab_rebal.d_clsid);
1038     } else if (slab_rebal.d_clsid == SLAB_GLOBAL_PAGE_POOL) {
1039         /* mem_malloc'ed might be higher than mem_limit. */
1040         memory_release();
1041     }
1042 
1043     slab_rebal.done       = 0;
1044     slab_rebal.s_clsid    = 0;
1045     slab_rebal.d_clsid    = 0;
1046     slab_rebal.slab_start = NULL;
1047     slab_rebal.slab_end   = NULL;
1048     slab_rebal.slab_pos   = NULL;
1049     evictions_nomem    = slab_rebal.evictions_nomem;
1050     inline_reclaim = slab_rebal.inline_reclaim;
1051     rescues   = slab_rebal.rescues;
1052     chunk_rescues = slab_rebal.chunk_rescues;
1053     slab_rebal.evictions_nomem    = 0;
1054     slab_rebal.inline_reclaim = 0;
1055     slab_rebal.rescues  = 0;
1056 
1057     slab_rebalance_signal = 0;
1058 
1059     pthread_mutex_unlock(&slabs_lock);
1060 
1061     STATS_LOCK();
1062     stats.slabs_moved++;
1063     stats.slab_reassign_rescues += rescues;
1064     stats.slab_reassign_evictions_nomem += evictions_nomem;
1065     stats.slab_reassign_inline_reclaim += inline_reclaim;
1066     stats.slab_reassign_chunk_rescues += chunk_rescues;
1067     stats_state.slab_reassign_running = false;
1068     STATS_UNLOCK();
1069 
1070     if (settings.verbose > 1) {
1071         fprintf(stderr, "finished a slab move\n");
1072     }
1073 }
1074 
1075 /* Slab mover thread.
1076  * Sits waiting for a condition to jump off and shovel some memory about
1077  */
slab_rebalance_thread(void * arg)1078 static void *slab_rebalance_thread(void *arg) {
1079     int was_busy = 0;
1080     /* So we first pass into cond_wait with the mutex held */
1081     mutex_lock(&slabs_rebalance_lock);
1082 
1083     while (do_run_slab_rebalance_thread) {
1084         if (slab_rebalance_signal == 1) {
1085             if (slab_rebalance_start() < 0) {
1086                 /* Handle errors with more specifity as required. */
1087                 slab_rebalance_signal = 0;
1088             }
1089 
1090             was_busy = 0;
1091         } else if (slab_rebalance_signal && slab_rebal.slab_start != NULL) {
1092             was_busy = slab_rebalance_move();
1093         }
1094 
1095         if (slab_rebal.done) {
1096             slab_rebalance_finish();
1097         } else if (was_busy) {
1098             /* Stuck waiting for some items to unlock, so slow down a bit
1099              * to give them a chance to free up */
1100             usleep(50);
1101         }
1102 
1103         if (slab_rebalance_signal == 0) {
1104             /* always hold this lock while we're running */
1105             pthread_cond_wait(&slab_rebalance_cond, &slabs_rebalance_lock);
1106         }
1107     }
1108     return NULL;
1109 }
1110 
1111 /* Iterate at most once through the slab classes and pick a "random" source.
1112  * I like this better than calling rand() since rand() is slow enough that we
1113  * can just check all of the classes once instead.
1114  */
slabs_reassign_pick_any(int dst)1115 static int slabs_reassign_pick_any(int dst) {
1116     static int cur = POWER_SMALLEST - 1;
1117     int tries = power_largest - POWER_SMALLEST + 1;
1118     for (; tries > 0; tries--) {
1119         cur++;
1120         if (cur > power_largest)
1121             cur = POWER_SMALLEST;
1122         if (cur == dst)
1123             continue;
1124         if (slabclass[cur].slabs > 1) {
1125             return cur;
1126         }
1127     }
1128     return -1;
1129 }
1130 
do_slabs_reassign(int src,int dst)1131 static enum reassign_result_type do_slabs_reassign(int src, int dst) {
1132     if (slab_rebalance_signal != 0)
1133         return REASSIGN_RUNNING;
1134 
1135     if (src == dst)
1136         return REASSIGN_SRC_DST_SAME;
1137 
1138     /* Special indicator to choose ourselves. */
1139     if (src == -1) {
1140         src = slabs_reassign_pick_any(dst);
1141         /* TODO: If we end up back at -1, return a new error type */
1142     }
1143 
1144     if (src < POWER_SMALLEST        || src > power_largest ||
1145         dst < SLAB_GLOBAL_PAGE_POOL || dst > power_largest)
1146         return REASSIGN_BADCLASS;
1147 
1148     if (slabclass[src].slabs < 2)
1149         return REASSIGN_NOSPARE;
1150 
1151     slab_rebal.s_clsid = src;
1152     slab_rebal.d_clsid = dst;
1153 
1154     slab_rebalance_signal = 1;
1155     pthread_cond_signal(&slab_rebalance_cond);
1156 
1157     return REASSIGN_OK;
1158 }
1159 
slabs_reassign(int src,int dst)1160 enum reassign_result_type slabs_reassign(int src, int dst) {
1161     enum reassign_result_type ret;
1162     if (pthread_mutex_trylock(&slabs_rebalance_lock) != 0) {
1163         return REASSIGN_RUNNING;
1164     }
1165     ret = do_slabs_reassign(src, dst);
1166     pthread_mutex_unlock(&slabs_rebalance_lock);
1167     return ret;
1168 }
1169 
1170 /* If we hold this lock, rebalancer can't wake up or move */
slabs_rebalancer_pause(void)1171 void slabs_rebalancer_pause(void) {
1172     pthread_mutex_lock(&slabs_rebalance_lock);
1173 }
1174 
slabs_rebalancer_resume(void)1175 void slabs_rebalancer_resume(void) {
1176     pthread_mutex_unlock(&slabs_rebalance_lock);
1177 }
1178 
1179 static pthread_t rebalance_tid;
1180 
start_slab_maintenance_thread(void)1181 int start_slab_maintenance_thread(void) {
1182     int ret;
1183     slab_rebalance_signal = 0;
1184     slab_rebal.slab_start = NULL;
1185     char *env = getenv("MEMCACHED_SLAB_BULK_CHECK");
1186     if (env != NULL) {
1187         slab_bulk_check = atoi(env);
1188         if (slab_bulk_check == 0) {
1189             slab_bulk_check = DEFAULT_SLAB_BULK_CHECK;
1190         }
1191     }
1192 
1193     if (pthread_cond_init(&slab_rebalance_cond, NULL) != 0) {
1194         fprintf(stderr, "Can't intiialize rebalance condition\n");
1195         return -1;
1196     }
1197     pthread_mutex_init(&slabs_rebalance_lock, NULL);
1198 
1199     if ((ret = pthread_create(&rebalance_tid, NULL,
1200                               slab_rebalance_thread, NULL)) != 0) {
1201         fprintf(stderr, "Can't create rebal thread: %s\n", strerror(ret));
1202         return -1;
1203     }
1204     return 0;
1205 }
1206 
1207 /* The maintenance thread is on a sleep/loop cycle, so it should join after a
1208  * short wait */
stop_slab_maintenance_thread(void)1209 void stop_slab_maintenance_thread(void) {
1210     mutex_lock(&slabs_rebalance_lock);
1211     do_run_slab_thread = 0;
1212     do_run_slab_rebalance_thread = 0;
1213     pthread_cond_signal(&slab_rebalance_cond);
1214     pthread_mutex_unlock(&slabs_rebalance_lock);
1215 
1216     /* Wait for the maintenance thread to stop */
1217     pthread_join(rebalance_tid, NULL);
1218 }
1219