xref: /memcached-1.4.29/items.c (revision 690a9a9d)
1f6d334e0SBrad Fitzpatrick /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
256b8339eSSteven Grimm #include "memcached.h"
360d70942SAnatoly Vorobey #include <sys/stat.h>
460d70942SAnatoly Vorobey #include <sys/socket.h>
560d70942SAnatoly Vorobey #include <sys/resource.h>
660d70942SAnatoly Vorobey #include <fcntl.h>
7c6975ef4SPaul Lindner #include <netinet/in.h>
8c6975ef4SPaul Lindner #include <errno.h>
960d70942SAnatoly Vorobey #include <stdlib.h>
1060d70942SAnatoly Vorobey #include <stdio.h>
11e688e97dSNatanael Copa #include <signal.h>
1260d70942SAnatoly Vorobey #include <string.h>
131b533267SBrad Fitzpatrick #include <time.h>
1443202f2fSBrad Fitzpatrick #include <assert.h>
150d1f505cSdormando #include <unistd.h>
1686969ea4SBrad Fitzpatrick 
1777dde9f9SPaul Lindner /* Forward Declarations */
1877dde9f9SPaul Lindner static void item_link_q(item *it);
1977dde9f9SPaul Lindner static void item_unlink_q(item *it);
2077dde9f9SPaul Lindner 
219bce42f2Sdormando #define HOT_LRU 0
229bce42f2Sdormando #define WARM_LRU 64
239bce42f2Sdormando #define COLD_LRU 128
249bce42f2Sdormando #define NOEXP_LRU 192
25bbef930eSdormando static unsigned int lru_type_map[4] = {HOT_LRU, WARM_LRU, COLD_LRU, NOEXP_LRU};
269bce42f2Sdormando 
274de89c8cSdormando #define CLEAR_LRU(id) (id & ~(3<<6))
284de89c8cSdormando 
29fbdc57c8SDustin Sallings #define LARGEST_ID POWER_LARGEST
305e0bb476Sdormando typedef struct {
31b7ba2969Sdormando     uint64_t evicted;
32b7ba2969Sdormando     uint64_t evicted_nonzero;
33b7ba2969Sdormando     uint64_t reclaimed;
34b7ba2969Sdormando     uint64_t outofmemory;
35b7ba2969Sdormando     uint64_t tailrepairs;
36cb01d504Sdormando     uint64_t expired_unfetched; /* items reclaimed but never touched */
37cb01d504Sdormando     uint64_t evicted_unfetched; /* items evicted but never touched */
38649f7f01Sdormando     uint64_t crawler_reclaimed;
39c10feb9eSdormando     uint64_t crawler_items_checked;
402af4f6e6Sdormando     uint64_t lrutail_reflocked;
41f7bf26cbSdormando     uint64_t moves_to_cold;
42f7bf26cbSdormando     uint64_t moves_to_warm;
43f7bf26cbSdormando     uint64_t moves_within_lru;
44a0390847Sdormando     uint64_t direct_reclaims;
45f7bf26cbSdormando     rel_time_t evicted_time;
465e0bb476Sdormando } itemstats_t;
475e0bb476Sdormando 
48e708513aSdormando typedef struct {
49a66b28acSdormando     uint64_t histo[61];
50e708513aSdormando     uint64_t ttl_hourplus;
51e708513aSdormando     uint64_t noexp;
52e708513aSdormando     uint64_t reclaimed;
53e708513aSdormando     uint64_t seen;
54e708513aSdormando     rel_time_t start_time;
55e708513aSdormando     rel_time_t end_time;
56e708513aSdormando     bool run_complete;
57e708513aSdormando } crawlerstats_t;
58e708513aSdormando 
5960d70942SAnatoly Vorobey static item *heads[LARGEST_ID];
6060d70942SAnatoly Vorobey static item *tails[LARGEST_ID];
610d1f505cSdormando static crawler crawlers[LARGEST_ID];
625e0bb476Sdormando static itemstats_t itemstats[LARGEST_ID];
6377dde9f9SPaul Lindner static unsigned int sizes[LARGEST_ID];
64ee461d11Sdormando static uint64_t sizes_bytes[LARGEST_ID];
65ae6f4267Sdormando static unsigned int *stats_sizes_hist = NULL;
668d82383fSdormando static uint64_t stats_sizes_cas_min = 0;
67ae6f4267Sdormando static int stats_sizes_buckets = 0;
68e708513aSdormando static crawlerstats_t crawlerstats[MAX_NUMBER_OF_SLAB_CLASSES];
6986969ea4SBrad Fitzpatrick 
706be2b6c0Sdormando static int crawler_count = 0;
710d1f505cSdormando static volatile int do_run_lru_crawler_thread = 0;
72fb269897Sdormando static volatile int do_run_lru_maintainer_thread = 0;
736be2b6c0Sdormando static int lru_crawler_initialized = 0;
74fb269897Sdormando static int lru_maintainer_initialized = 0;
75fb269897Sdormando static int lru_maintainer_check_clsid = 0;
766be2b6c0Sdormando static pthread_mutex_t lru_crawler_lock = PTHREAD_MUTEX_INITIALIZER;
776be2b6c0Sdormando static pthread_cond_t  lru_crawler_cond = PTHREAD_COND_INITIALIZER;
78e708513aSdormando static pthread_mutex_t lru_crawler_stats_lock = PTHREAD_MUTEX_INITIALIZER;
79fb269897Sdormando static pthread_mutex_t lru_maintainer_lock = PTHREAD_MUTEX_INITIALIZER;
8069d1c699Sdormando static pthread_mutex_t cas_id_lock = PTHREAD_MUTEX_INITIALIZER;
81ae6f4267Sdormando static pthread_mutex_t stats_sizes_lock = PTHREAD_MUTEX_INITIALIZER;
820d1f505cSdormando 
item_stats_reset(void)8353180103STrond Norbye void item_stats_reset(void) {
849bce42f2Sdormando     int i;
859bce42f2Sdormando     for (i = 0; i < LARGEST_ID; i++) {
869bce42f2Sdormando         pthread_mutex_lock(&lru_locks[i]);
879bce42f2Sdormando         memset(&itemstats[i], 0, sizeof(itemstats_t));
889bce42f2Sdormando         pthread_mutex_unlock(&lru_locks[i]);
899bce42f2Sdormando     }
9053180103STrond Norbye }
9153180103STrond Norbye 
92a0390847Sdormando static int lru_pull_tail(const int orig_id, const int cur_lru,
93*690a9a9dSEiichi Tsukata         const uint64_t total_bytes, const bool do_evict);
9487ff9dc0Sdormando static int lru_crawler_start(uint32_t id, uint32_t remaining);
9553180103STrond Norbye 
96e4a45965SDustin Sallings /* Get the next CAS id for a new item. */
9769d1c699Sdormando /* TODO: refactor some atomics for this. */
get_cas_id(void)98df1b7e42STrond Norbye uint64_t get_cas_id(void) {
99579466f2Sdormando     static uint64_t cas_id = 0;
10069d1c699Sdormando     pthread_mutex_lock(&cas_id_lock);
10169d1c699Sdormando     uint64_t next_id = ++cas_id;
10269d1c699Sdormando     pthread_mutex_unlock(&cas_id_lock);
10369d1c699Sdormando     return next_id;
104e4a45965SDustin Sallings }
105e4a45965SDustin Sallings 
item_is_flushed(item * it)106004e2211Sdormando int item_is_flushed(item *it) {
10790593dcaSdormando     rel_time_t oldest_live = settings.oldest_live;
10890593dcaSdormando     uint64_t cas = ITEM_get_cas(it);
10990593dcaSdormando     uint64_t oldest_cas = settings.oldest_cas;
11090593dcaSdormando     if (oldest_live == 0 || oldest_live > current_time)
11190593dcaSdormando         return 0;
11290593dcaSdormando     if ((it->time <= oldest_live)
11390593dcaSdormando             || (oldest_cas != 0 && cas != 0 && cas < oldest_cas)) {
11490593dcaSdormando         return 1;
11590593dcaSdormando     }
11690593dcaSdormando     return 0;
11790593dcaSdormando }
11890593dcaSdormando 
noexp_lru_size(int slabs_clsid)1194de89c8cSdormando static unsigned int noexp_lru_size(int slabs_clsid) {
1204de89c8cSdormando     int id = CLEAR_LRU(slabs_clsid);
121d6e96467Sdormando     id |= NOEXP_LRU;
1224de89c8cSdormando     unsigned int ret;
1234de89c8cSdormando     pthread_mutex_lock(&lru_locks[id]);
124ee461d11Sdormando     ret = sizes_bytes[id];
1254de89c8cSdormando     pthread_mutex_unlock(&lru_locks[id]);
1264de89c8cSdormando     return ret;
1274de89c8cSdormando }
1284de89c8cSdormando 
12956b8339eSSteven Grimm /* Enable this for reference-count debugging. */
13056b8339eSSteven Grimm #if 0
13156b8339eSSteven Grimm # define DEBUG_REFCNT(it,op) \
13256b8339eSSteven Grimm                 fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
13356b8339eSSteven Grimm                         it, op, it->refcount, \
13456b8339eSSteven Grimm                         (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
1355da8dbabSTrond Norbye                         (it->it_flags & ITEM_SLABBED) ? 'S' : ' ')
13656b8339eSSteven Grimm #else
13756b8339eSSteven Grimm # define DEBUG_REFCNT(it,op) while(0)
13856b8339eSSteven Grimm #endif
13986969ea4SBrad Fitzpatrick 
14044ef96eeSPaul Lindner /**
141c9607c6dSBrad Fitzpatrick  * Generates the variable-sized part of the header for an object.
142c9607c6dSBrad Fitzpatrick  *
143217dcce0SSteven Grimm  * key     - The key
144217dcce0SSteven Grimm  * nkey    - The length of the key
145217dcce0SSteven Grimm  * flags   - key flags
146217dcce0SSteven Grimm  * nbytes  - Number of bytes to hold value and addition CRLF terminator
147c9607c6dSBrad Fitzpatrick  * suffix  - Buffer for the "VALUE" line suffix (flags, size).
148c9607c6dSBrad Fitzpatrick  * nsuffix - The length of the suffix is stored here.
149c9607c6dSBrad Fitzpatrick  *
150c9607c6dSBrad Fitzpatrick  * Returns the total size of the header.
151c9607c6dSBrad Fitzpatrick  */
item_make_header(const uint8_t nkey,const unsigned int flags,const int nbytes,char * suffix,uint8_t * nsuffix)152181ef834Sdormando static size_t item_make_header(const uint8_t nkey, const unsigned int flags, const int nbytes,
15377dde9f9SPaul Lindner                      char *suffix, uint8_t *nsuffix) {
15477dde9f9SPaul Lindner     /* suffix is defined at 40 chars elsewhere.. */
155181ef834Sdormando     *nsuffix = (uint8_t) snprintf(suffix, 40, " %u %d\r\n", flags, nbytes - 2);
156217dcce0SSteven Grimm     return sizeof(item) + nkey + *nsuffix + nbytes;
157c9607c6dSBrad Fitzpatrick }
15886969ea4SBrad Fitzpatrick 
do_item_alloc(char * key,const size_t nkey,const unsigned int flags,const rel_time_t exptime,const int nbytes)159181ef834Sdormando item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
160*690a9a9dSEiichi Tsukata                     const rel_time_t exptime, const int nbytes) {
161a0390847Sdormando     int i;
16277dde9f9SPaul Lindner     uint8_t nsuffix;
163d3807d06STrond Norbye     item *it = NULL;
164c9607c6dSBrad Fitzpatrick     char suffix[40];
16578955139STim Yardley     size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
166eda68b70STrond Norbye     if (settings.use_cas) {
167eda68b70STrond Norbye         ntotal += sizeof(uint64_t);
168eda68b70STrond Norbye     }
16986969ea4SBrad Fitzpatrick 
17078955139STim Yardley     unsigned int id = slabs_clsid(ntotal);
17160d70942SAnatoly Vorobey     if (id == 0)
17260d70942SAnatoly Vorobey         return 0;
17386969ea4SBrad Fitzpatrick 
174a0390847Sdormando     /* If no memory is available, attempt a direct LRU juggle/eviction */
175f5f8f1a0Sdormando     /* This is a race in order to simplify lru_pull_tail; in cases where
176a0390847Sdormando      * locked items are on the tail, you want them to fall out and cause
177a0390847Sdormando      * occasional OOM's, rather than internally work around them.
178f5f8f1a0Sdormando      * This also gives one fewer code path for slab alloc/free
179a0390847Sdormando      */
180ee461d11Sdormando     /* TODO: if power_largest, try a lot more times? or a number of times
18151a828b9Sdormando      * based on how many chunks the new object should take up?
18251a828b9Sdormando      * or based on the size of an object lru_pull_tail() says it evicted?
18351a828b9Sdormando      * This is a classical GC problem if "large items" are of too varying of
18451a828b9Sdormando      * sizes. This is actually okay here since the larger the data, the more
18551a828b9Sdormando      * bandwidth it takes, the more time we can loop in comparison to serving
18651a828b9Sdormando      * and replacing small items.
18751a828b9Sdormando      */
18851a828b9Sdormando     for (i = 0; i < 10; i++) {
189ee461d11Sdormando         uint64_t total_bytes;
190d9edfefdSdormando         /* Try to reclaim memory first */
191d9edfefdSdormando         if (!settings.lru_maintainer_thread) {
192*690a9a9dSEiichi Tsukata             lru_pull_tail(id, COLD_LRU, 0, false);
193d9edfefdSdormando         }
194ee461d11Sdormando         it = slabs_alloc(ntotal, id, &total_bytes, 0);
195ee461d11Sdormando 
1964de89c8cSdormando         if (settings.expirezero_does_not_evict)
197ee461d11Sdormando             total_bytes -= noexp_lru_size(id);
198ee461d11Sdormando 
199a0390847Sdormando         if (it == NULL) {
200d9edfefdSdormando             if (settings.lru_maintainer_thread) {
201*690a9a9dSEiichi Tsukata                 lru_pull_tail(id, HOT_LRU, total_bytes, false);
202*690a9a9dSEiichi Tsukata                 lru_pull_tail(id, WARM_LRU, total_bytes, false);
203*690a9a9dSEiichi Tsukata                 if (lru_pull_tail(id, COLD_LRU, total_bytes, true) <= 0)
20451a828b9Sdormando                     break;
205a0390847Sdormando             } else {
206*690a9a9dSEiichi Tsukata                 if (lru_pull_tail(id, COLD_LRU, 0, true) <= 0)
20751a828b9Sdormando                     break;
208d9edfefdSdormando             }
209d9edfefdSdormando         } else {
210a0390847Sdormando             break;
211a0390847Sdormando         }
212a0390847Sdormando     }
213a0390847Sdormando 
214a0390847Sdormando     if (i > 0) {
215a0390847Sdormando         pthread_mutex_lock(&lru_locks[id]);
216a0390847Sdormando         itemstats[id].direct_reclaims += i;
217a0390847Sdormando         pthread_mutex_unlock(&lru_locks[id]);
218a0390847Sdormando     }
2199bce42f2Sdormando 
2209bce42f2Sdormando     if (it == NULL) {
2219bce42f2Sdormando         pthread_mutex_lock(&lru_locks[id]);
2229bce42f2Sdormando         itemstats[id].outofmemory++;
2239bce42f2Sdormando         pthread_mutex_unlock(&lru_locks[id]);
2249bce42f2Sdormando         return NULL;
2259bce42f2Sdormando     }
2269bce42f2Sdormando 
2279bce42f2Sdormando     assert(it->slabs_clsid == 0);
2289bce42f2Sdormando     //assert(it != heads[id]);
2299bce42f2Sdormando 
23062415f16Sdormando     /* Refcount is seeded to 1 by slabs_alloc() */
2310567967aSdormando     it->next = it->prev = 0;
2320567967aSdormando 
2339bce42f2Sdormando     /* Items are initially loaded into the HOT_LRU. This is '0' but I want at
2349bce42f2Sdormando      * least a note here. Compiler (hopefully?) optimizes this out.
2359bce42f2Sdormando      */
236d9edfefdSdormando     if (settings.lru_maintainer_thread) {
2374de89c8cSdormando         if (exptime == 0 && settings.expirezero_does_not_evict) {
2384de89c8cSdormando             id |= NOEXP_LRU;
2394de89c8cSdormando         } else {
2409bce42f2Sdormando             id |= HOT_LRU;
2414de89c8cSdormando         }
242d9edfefdSdormando     } else {
243d9edfefdSdormando         /* There is only COLD in compat-mode */
244d9edfefdSdormando         id |= COLD_LRU;
245d9edfefdSdormando     }
2469bce42f2Sdormando     it->slabs_clsid = id;
2479bce42f2Sdormando 
2489bce42f2Sdormando     DEBUG_REFCNT(it, '*');
2490567967aSdormando     it->it_flags |= settings.use_cas ? ITEM_CAS : 0;
2509bce42f2Sdormando     it->nkey = nkey;
2519bce42f2Sdormando     it->nbytes = nbytes;
2529bce42f2Sdormando     memcpy(ITEM_key(it), key, nkey);
2539bce42f2Sdormando     it->exptime = exptime;
2549bce42f2Sdormando     memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
2559bce42f2Sdormando     it->nsuffix = nsuffix;
2560567967aSdormando 
2570567967aSdormando     /* Need to shuffle the pointer stored in h_next into it->data. */
2580567967aSdormando     if (it->it_flags & ITEM_CHUNKED) {
2590567967aSdormando         item_chunk *chunk = (item_chunk *) ITEM_data(it);
2600567967aSdormando 
2610567967aSdormando         chunk->next = (item_chunk *) it->h_next;
2620567967aSdormando         chunk->prev = 0;
2630567967aSdormando         chunk->head = it;
2640567967aSdormando         /* Need to chain back into the head's chunk */
2650567967aSdormando         chunk->next->prev = chunk;
2660567967aSdormando         chunk->size = chunk->next->size - ((char *)chunk - (char *)it);
2670567967aSdormando         chunk->used = 0;
2680567967aSdormando         assert(chunk->size > 0);
2690567967aSdormando     }
2700567967aSdormando     it->h_next = 0;
2710567967aSdormando 
2729bce42f2Sdormando     return it;
2739bce42f2Sdormando }
2749bce42f2Sdormando 
item_free(item * it)27560d70942SAnatoly Vorobey void item_free(item *it) {
27677dde9f9SPaul Lindner     size_t ntotal = ITEM_ntotal(it);
2778d3ac826Sdormando     unsigned int clsid;
27843202f2fSBrad Fitzpatrick     assert((it->it_flags & ITEM_LINKED) == 0);
2791baa8961SBrad Fitzpatrick     assert(it != heads[it->slabs_clsid]);
2801baa8961SBrad Fitzpatrick     assert(it != tails[it->slabs_clsid]);
281c08383afSBrad Fitzpatrick     assert(it->refcount == 0);
28286969ea4SBrad Fitzpatrick 
283b7b9d5f4SBrad Fitzpatrick     /* so slab size changer can tell later if item is already free or not */
2849bce42f2Sdormando     clsid = ITEM_clsid(it);
28556b8339eSSteven Grimm     DEBUG_REFCNT(it, 'F');
2868d3ac826Sdormando     slabs_free(it, ntotal, clsid);
28760d70942SAnatoly Vorobey }
28886969ea4SBrad Fitzpatrick 
28944ef96eeSPaul Lindner /**
290c9607c6dSBrad Fitzpatrick  * Returns true if an item will fit in the cache (its size does not exceed
291c9607c6dSBrad Fitzpatrick  * the maximum for a cache entry.)
292c9607c6dSBrad Fitzpatrick  */
item_size_ok(const size_t nkey,const int flags,const int nbytes)29310862f60SPaul Lindner bool item_size_ok(const size_t nkey, const int flags, const int nbytes) {
294c9607c6dSBrad Fitzpatrick     char prefix[40];
29577dde9f9SPaul Lindner     uint8_t nsuffix;
29686969ea4SBrad Fitzpatrick 
297911c9d0aSTrond Norbye     size_t ntotal = item_make_header(nkey + 1, flags, nbytes,
298911c9d0aSTrond Norbye                                      prefix, &nsuffix);
299911c9d0aSTrond Norbye     if (settings.use_cas) {
300911c9d0aSTrond Norbye         ntotal += sizeof(uint64_t);
301911c9d0aSTrond Norbye     }
302911c9d0aSTrond Norbye 
303911c9d0aSTrond Norbye     return slabs_clsid(ntotal) != 0;
304c9607c6dSBrad Fitzpatrick }
30586969ea4SBrad Fitzpatrick 
do_item_link_q(item * it)3069bce42f2Sdormando static void do_item_link_q(item *it) { /* item is the new head */
30760d70942SAnatoly Vorobey     item **head, **tail;
30854326f42SBrad Fitzpatrick     assert((it->it_flags & ITEM_SLABBED) == 0);
30986969ea4SBrad Fitzpatrick 
31060d70942SAnatoly Vorobey     head = &heads[it->slabs_clsid];
31160d70942SAnatoly Vorobey     tail = &tails[it->slabs_clsid];
31243202f2fSBrad Fitzpatrick     assert(it != *head);
31343202f2fSBrad Fitzpatrick     assert((*head && *tail) || (*head == 0 && *tail == 0));
31460d70942SAnatoly Vorobey     it->prev = 0;
31560d70942SAnatoly Vorobey     it->next = *head;
31660d70942SAnatoly Vorobey     if (it->next) it->next->prev = it;
31760d70942SAnatoly Vorobey     *head = it;
3186d74e134SBrad Fitzpatrick     if (*tail == 0) *tail = it;
31960d70942SAnatoly Vorobey     sizes[it->slabs_clsid]++;
320ee461d11Sdormando     sizes_bytes[it->slabs_clsid] += it->nbytes;
32160d70942SAnatoly Vorobey     return;
32260d70942SAnatoly Vorobey }
32386969ea4SBrad Fitzpatrick 
item_link_q(item * it)3249bce42f2Sdormando static void item_link_q(item *it) {
3259bce42f2Sdormando     pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
3269bce42f2Sdormando     do_item_link_q(it);
3279bce42f2Sdormando     pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
3289bce42f2Sdormando }
3299bce42f2Sdormando 
do_item_unlink_q(item * it)3309bce42f2Sdormando static void do_item_unlink_q(item *it) {
33160d70942SAnatoly Vorobey     item **head, **tail;
33260d70942SAnatoly Vorobey     head = &heads[it->slabs_clsid];
33360d70942SAnatoly Vorobey     tail = &tails[it->slabs_clsid];
33486969ea4SBrad Fitzpatrick 
335c3b1a8c9SBrad Fitzpatrick     if (*head == it) {
336c3b1a8c9SBrad Fitzpatrick         assert(it->prev == 0);
337c3b1a8c9SBrad Fitzpatrick         *head = it->next;
338c3b1a8c9SBrad Fitzpatrick     }
339c3b1a8c9SBrad Fitzpatrick     if (*tail == it) {
340c3b1a8c9SBrad Fitzpatrick         assert(it->next == 0);
341c3b1a8c9SBrad Fitzpatrick         *tail = it->prev;
342c3b1a8c9SBrad Fitzpatrick     }
343c3b1a8c9SBrad Fitzpatrick     assert(it->next != it);
344c3b1a8c9SBrad Fitzpatrick     assert(it->prev != it);
34586969ea4SBrad Fitzpatrick 
34660d70942SAnatoly Vorobey     if (it->next) it->next->prev = it->prev;
34760d70942SAnatoly Vorobey     if (it->prev) it->prev->next = it->next;
34860d70942SAnatoly Vorobey     sizes[it->slabs_clsid]--;
349ee461d11Sdormando     sizes_bytes[it->slabs_clsid] -= it->nbytes;
35060d70942SAnatoly Vorobey     return;
35160d70942SAnatoly Vorobey }
35286969ea4SBrad Fitzpatrick 
item_unlink_q(item * it)3539bce42f2Sdormando static void item_unlink_q(item *it) {
3549bce42f2Sdormando     pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
3559bce42f2Sdormando     do_item_unlink_q(it);
3569bce42f2Sdormando     pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
3579bce42f2Sdormando }
3589bce42f2Sdormando 
do_item_link(item * it,const uint32_t hv)359bab9acd1Sdormando int do_item_link(item *it, const uint32_t hv) {
36080ec0955STrond Norbye     MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
36154326f42SBrad Fitzpatrick     assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
36260d70942SAnatoly Vorobey     it->it_flags |= ITEM_LINKED;
363c9607c6dSBrad Fitzpatrick     it->time = current_time;
36486969ea4SBrad Fitzpatrick 
36556b8339eSSteven Grimm     STATS_LOCK();
366cb01d504Sdormando     stats_state.curr_bytes += ITEM_ntotal(it);
367cb01d504Sdormando     stats_state.curr_items += 1;
36860d70942SAnatoly Vorobey     stats.total_items += 1;
36956b8339eSSteven Grimm     STATS_UNLOCK();
37086969ea4SBrad Fitzpatrick 
371c2cdee22SDustin Sallings     /* Allocate a new CAS ID on link. */
372eda68b70STrond Norbye     ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
3738fe5bf1fSdormando     assoc_insert(it, hv);
37460d70942SAnatoly Vorobey     item_link_q(it);
37569d1c699Sdormando     refcount_incr(&it->refcount);
376ae6f4267Sdormando     item_stats_sizes_add(it);
37786969ea4SBrad Fitzpatrick 
37860d70942SAnatoly Vorobey     return 1;
37960d70942SAnatoly Vorobey }
38086969ea4SBrad Fitzpatrick 
do_item_unlink(item * it,const uint32_t hv)381bab9acd1Sdormando void do_item_unlink(item *it, const uint32_t hv) {
38280ec0955STrond Norbye     MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
38377dde9f9SPaul Lindner     if ((it->it_flags & ITEM_LINKED) != 0) {
38460d70942SAnatoly Vorobey         it->it_flags &= ~ITEM_LINKED;
38556b8339eSSteven Grimm         STATS_LOCK();
386cb01d504Sdormando         stats_state.curr_bytes -= ITEM_ntotal(it);
387cb01d504Sdormando         stats_state.curr_items -= 1;
38856b8339eSSteven Grimm         STATS_UNLOCK();
389ae6f4267Sdormando         item_stats_sizes_remove(it);
3908fe5bf1fSdormando         assoc_delete(ITEM_key(it), it->nkey, hv);
3918fe5bf1fSdormando         item_unlink_q(it);
392f4983b20Sdormando         do_item_remove(it);
3938fe5bf1fSdormando     }
3948fe5bf1fSdormando }
3958fe5bf1fSdormando 
39640b7b4b2Sdormando /* FIXME: Is it necessary to keep this copy/pasted code? */
do_item_unlink_nolock(item * it,const uint32_t hv)3978fe5bf1fSdormando void do_item_unlink_nolock(item *it, const uint32_t hv) {
3988fe5bf1fSdormando     MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
3998fe5bf1fSdormando     if ((it->it_flags & ITEM_LINKED) != 0) {
4008fe5bf1fSdormando         it->it_flags &= ~ITEM_LINKED;
4018fe5bf1fSdormando         STATS_LOCK();
402cb01d504Sdormando         stats_state.curr_bytes -= ITEM_ntotal(it);
403cb01d504Sdormando         stats_state.curr_items -= 1;
4048fe5bf1fSdormando         STATS_UNLOCK();
405ae6f4267Sdormando         item_stats_sizes_remove(it);
406bab9acd1Sdormando         assoc_delete(ITEM_key(it), it->nkey, hv);
4079bce42f2Sdormando         do_item_unlink_q(it);
408f4983b20Sdormando         do_item_remove(it);
40960d70942SAnatoly Vorobey     }
41056b8339eSSteven Grimm }
41186969ea4SBrad Fitzpatrick 
do_item_remove(item * it)41256b8339eSSteven Grimm void do_item_remove(item *it) {
41380ec0955STrond Norbye     MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
41454326f42SBrad Fitzpatrick     assert((it->it_flags & ITEM_SLABBED) == 0);
415cb9c269bSdormando     assert(it->refcount > 0);
416193a653eSdormando 
4173b961388Sdormando     if (refcount_decr(&it->refcount) == 0) {
41860d70942SAnatoly Vorobey         item_free(it);
41960d70942SAnatoly Vorobey     }
42060d70942SAnatoly Vorobey }
42186969ea4SBrad Fitzpatrick 
422f2a4e5b4Sdormando /* Copy/paste to avoid adding two extra branches for all common calls, since
423f5f8f1a0Sdormando  * _nolock is only used in an uncommon case where we want to relink. */
do_item_update_nolock(item * it)424fb269897Sdormando void do_item_update_nolock(item *it) {
425f2a4e5b4Sdormando     MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
426f2a4e5b4Sdormando     if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
427f2a4e5b4Sdormando         assert((it->it_flags & ITEM_SLABBED) == 0);
428f2a4e5b4Sdormando 
429f2a4e5b4Sdormando         if ((it->it_flags & ITEM_LINKED) != 0) {
430fb269897Sdormando             do_item_unlink_q(it);
431f2a4e5b4Sdormando             it->time = current_time;
432fb269897Sdormando             do_item_link_q(it);
433f2a4e5b4Sdormando         }
434f2a4e5b4Sdormando     }
435f2a4e5b4Sdormando }
436f2a4e5b4Sdormando 
437f5f8f1a0Sdormando /* Bump the last accessed time, or relink if we're in compat mode */
do_item_update(item * it)43856b8339eSSteven Grimm void do_item_update(item *it) {
43980ec0955STrond Norbye     MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
440217dcce0SSteven Grimm     if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
44154326f42SBrad Fitzpatrick         assert((it->it_flags & ITEM_SLABBED) == 0);
44286969ea4SBrad Fitzpatrick 
443193a653eSdormando         if ((it->it_flags & ITEM_LINKED) != 0) {
444c9607c6dSBrad Fitzpatrick             it->time = current_time;
445d9edfefdSdormando             if (!settings.lru_maintainer_thread) {
446a0390847Sdormando                 item_unlink_q(it);
44760d70942SAnatoly Vorobey                 item_link_q(it);
448d9edfefdSdormando             }
449217dcce0SSteven Grimm         }
45056b8339eSSteven Grimm     }
451a0390847Sdormando }
45286969ea4SBrad Fitzpatrick 
do_item_replace(item * it,item * new_it,const uint32_t hv)453bab9acd1Sdormando int do_item_replace(item *it, item *new_it, const uint32_t hv) {
45480ec0955STrond Norbye     MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,
45580ec0955STrond Norbye                            ITEM_key(new_it), new_it->nkey, new_it->nbytes);
45654326f42SBrad Fitzpatrick     assert((it->it_flags & ITEM_SLABBED) == 0);
45786969ea4SBrad Fitzpatrick 
458bab9acd1Sdormando     do_item_unlink(it, hv);
459bab9acd1Sdormando     return do_item_link(new_it, hv);
46060d70942SAnatoly Vorobey }
46186969ea4SBrad Fitzpatrick 
46277dde9f9SPaul Lindner /*@null@*/
46307c632f7Sdormando /* This is walking the line of violating lock order, but I think it's safe.
46407c632f7Sdormando  * If the LRU lock is held, an item in the LRU cannot be wiped and freed.
46507c632f7Sdormando  * The data could possibly be overwritten, but this is only accessing the
46607c632f7Sdormando  * headers.
46707c632f7Sdormando  * It may not be the best idea to leave it like this, but for now it's safe.
468f5f8f1a0Sdormando  * FIXME: only dumps the hot LRU with the new LRU's.
46907c632f7Sdormando  */
item_cachedump(const unsigned int slabs_clsid,const unsigned int limit,unsigned int * bytes)4709bce42f2Sdormando char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) {
47144ef96eeSPaul Lindner     unsigned int memlimit = 2 * 1024 * 1024;   /* 2MB max response size */
47260d70942SAnatoly Vorobey     char *buffer;
47377dde9f9SPaul Lindner     unsigned int bufcurr;
47460d70942SAnatoly Vorobey     item *it;
47544ef96eeSPaul Lindner     unsigned int len;
47644ef96eeSPaul Lindner     unsigned int shown = 0;
477ecdb0114SDustin Sallings     char key_temp[KEY_MAX_LENGTH + 1];
4788f5960e0SBrad Fitzpatrick     char temp[512];
479d9edfefdSdormando     unsigned int id = slabs_clsid;
480d9edfefdSdormando     if (!settings.lru_maintainer_thread)
481d9edfefdSdormando         id |= COLD_LRU;
48286969ea4SBrad Fitzpatrick 
483d9edfefdSdormando     pthread_mutex_lock(&lru_locks[id]);
484d9edfefdSdormando     it = heads[id];
48586969ea4SBrad Fitzpatrick 
48677dde9f9SPaul Lindner     buffer = malloc((size_t)memlimit);
4879bce42f2Sdormando     if (buffer == 0) {
4889bce42f2Sdormando         return NULL;
4899bce42f2Sdormando     }
49060d70942SAnatoly Vorobey     bufcurr = 0;
49186969ea4SBrad Fitzpatrick 
49277dde9f9SPaul Lindner     while (it != NULL && (limit == 0 || shown < limit)) {
493ecdb0114SDustin Sallings         assert(it->nkey <= KEY_MAX_LENGTH);
494b3bbd3b2Sdormando         if (it->nbytes == 0 && it->nkey == 0) {
495b3bbd3b2Sdormando             it = it->next;
496b3bbd3b2Sdormando             continue;
497b3bbd3b2Sdormando         }
498ecdb0114SDustin Sallings         /* Copy the key since it may not be null-terminated in the struct */
499bd2f3ab2SDustin Sallings         strncpy(key_temp, ITEM_key(it), it->nkey);
500bd2f3ab2SDustin Sallings         key_temp[it->nkey] = 0x00; /* terminate */
5017dcfcedcSDustin Sallings         len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %lu s]\r\n",
502ecdb0114SDustin Sallings                        key_temp, it->nbytes - 2,
5031b13a586Sdormando                        it->exptime == 0 ? 0 :
5041fdfb7e9STrond Norbye                        (unsigned long)it->exptime + process_started);
505d5d9ff0dSAndrei Nigmatulin         if (bufcurr + len + 6 > memlimit)  /* 6 is END\r\n\0 */
50660d70942SAnatoly Vorobey             break;
5077db0d07cSDustin Sallings         memcpy(buffer + bufcurr, temp, len);
50860d70942SAnatoly Vorobey         bufcurr += len;
50960d70942SAnatoly Vorobey         shown++;
51060d70942SAnatoly Vorobey         it = it->next;
51160d70942SAnatoly Vorobey     }
51286969ea4SBrad Fitzpatrick 
513c47ee897SPaul Lindner     memcpy(buffer + bufcurr, "END\r\n", 6);
51460d70942SAnatoly Vorobey     bufcurr += 5;
51586969ea4SBrad Fitzpatrick 
51660d70942SAnatoly Vorobey     *bytes = bufcurr;
517d9edfefdSdormando     pthread_mutex_unlock(&lru_locks[id]);
51860d70942SAnatoly Vorobey     return buffer;
51960d70942SAnatoly Vorobey }
52086969ea4SBrad Fitzpatrick 
item_stats_totals(ADD_STAT add_stats,void * c)5219bce42f2Sdormando void item_stats_totals(ADD_STAT add_stats, void *c) {
52267d7cfc1Sdormando     itemstats_t totals;
52367d7cfc1Sdormando     memset(&totals, 0, sizeof(itemstats_t));
524bbef930eSdormando     int n;
525bbef930eSdormando     for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
526bbef930eSdormando         int x;
52767d7cfc1Sdormando         int i;
528bbef930eSdormando         for (x = 0; x < 4; x++) {
529bbef930eSdormando             i = n | lru_type_map[x];
5309bce42f2Sdormando             pthread_mutex_lock(&lru_locks[i]);
53167d7cfc1Sdormando             totals.expired_unfetched += itemstats[i].expired_unfetched;
53267d7cfc1Sdormando             totals.evicted_unfetched += itemstats[i].evicted_unfetched;
53367d7cfc1Sdormando             totals.evicted += itemstats[i].evicted;
53467d7cfc1Sdormando             totals.reclaimed += itemstats[i].reclaimed;
535649f7f01Sdormando             totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
536c10feb9eSdormando             totals.crawler_items_checked += itemstats[i].crawler_items_checked;
5372af4f6e6Sdormando             totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
538f7bf26cbSdormando             totals.moves_to_cold += itemstats[i].moves_to_cold;
539f7bf26cbSdormando             totals.moves_to_warm += itemstats[i].moves_to_warm;
540f7bf26cbSdormando             totals.moves_within_lru += itemstats[i].moves_within_lru;
541a0390847Sdormando             totals.direct_reclaims += itemstats[i].direct_reclaims;
5429bce42f2Sdormando             pthread_mutex_unlock(&lru_locks[i]);
54367d7cfc1Sdormando         }
544bbef930eSdormando     }
54567d7cfc1Sdormando     APPEND_STAT("expired_unfetched", "%llu",
54667d7cfc1Sdormando                 (unsigned long long)totals.expired_unfetched);
54767d7cfc1Sdormando     APPEND_STAT("evicted_unfetched", "%llu",
54867d7cfc1Sdormando                 (unsigned long long)totals.evicted_unfetched);
54967d7cfc1Sdormando     APPEND_STAT("evictions", "%llu",
55067d7cfc1Sdormando                 (unsigned long long)totals.evicted);
55167d7cfc1Sdormando     APPEND_STAT("reclaimed", "%llu",
55267d7cfc1Sdormando                 (unsigned long long)totals.reclaimed);
553649f7f01Sdormando     APPEND_STAT("crawler_reclaimed", "%llu",
554649f7f01Sdormando                 (unsigned long long)totals.crawler_reclaimed);
555c10feb9eSdormando     APPEND_STAT("crawler_items_checked", "%llu",
556c10feb9eSdormando                 (unsigned long long)totals.crawler_items_checked);
5572af4f6e6Sdormando     APPEND_STAT("lrutail_reflocked", "%llu",
5582af4f6e6Sdormando                 (unsigned long long)totals.lrutail_reflocked);
559d9edfefdSdormando     if (settings.lru_maintainer_thread) {
560f7bf26cbSdormando         APPEND_STAT("moves_to_cold", "%llu",
561f7bf26cbSdormando                     (unsigned long long)totals.moves_to_cold);
562f7bf26cbSdormando         APPEND_STAT("moves_to_warm", "%llu",
563f7bf26cbSdormando                     (unsigned long long)totals.moves_to_warm);
564f7bf26cbSdormando         APPEND_STAT("moves_within_lru", "%llu",
565f7bf26cbSdormando                     (unsigned long long)totals.moves_within_lru);
566a0390847Sdormando         APPEND_STAT("direct_reclaims", "%llu",
567a0390847Sdormando                     (unsigned long long)totals.direct_reclaims);
56867d7cfc1Sdormando     }
569d9edfefdSdormando }
57067d7cfc1Sdormando 
item_stats(ADD_STAT add_stats,void * c)5719bce42f2Sdormando void item_stats(ADD_STAT add_stats, void *c) {
572bbef930eSdormando     itemstats_t totals;
573bbef930eSdormando     int n;
574bbef930eSdormando     for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
575bbef930eSdormando         memset(&totals, 0, sizeof(itemstats_t));
576bbef930eSdormando         int x;
57717df5c0eSTrond Norbye         int i;
578bbef930eSdormando         unsigned int size = 0;
579bbef930eSdormando         unsigned int age  = 0;
5808ab8a1dbSdormando         unsigned int lru_size_map[4];
581b89f7c7bSDustin Sallings         const char *fmt = "items:%d:%s";
58288a68689SDustin Sallings         char key_str[STAT_KEY_LEN];
58388a68689SDustin Sallings         char val_str[STAT_VAL_LEN];
584b89f7c7bSDustin Sallings         int klen = 0, vlen = 0;
585bbef930eSdormando         for (x = 0; x < 4; x++) {
586bbef930eSdormando             i = n | lru_type_map[x];
587bbef930eSdormando             pthread_mutex_lock(&lru_locks[i]);
588bbef930eSdormando             totals.evicted += itemstats[i].evicted;
589bbef930eSdormando             totals.evicted_nonzero += itemstats[i].evicted_nonzero;
590bbef930eSdormando             totals.outofmemory += itemstats[i].outofmemory;
591bbef930eSdormando             totals.tailrepairs += itemstats[i].tailrepairs;
592d9edfefdSdormando             totals.reclaimed += itemstats[i].reclaimed;
593bbef930eSdormando             totals.expired_unfetched += itemstats[i].expired_unfetched;
594bbef930eSdormando             totals.evicted_unfetched += itemstats[i].evicted_unfetched;
595bbef930eSdormando             totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
596c10feb9eSdormando             totals.crawler_items_checked += itemstats[i].crawler_items_checked;
597bbef930eSdormando             totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
598bbef930eSdormando             totals.moves_to_cold += itemstats[i].moves_to_cold;
599bbef930eSdormando             totals.moves_to_warm += itemstats[i].moves_to_warm;
600bbef930eSdormando             totals.moves_within_lru += itemstats[i].moves_within_lru;
601bbef930eSdormando             totals.direct_reclaims += itemstats[i].direct_reclaims;
602bbef930eSdormando             size += sizes[i];
6038ab8a1dbSdormando             lru_size_map[x] = sizes[i];
604bbef930eSdormando             if (lru_type_map[x] == COLD_LRU && tails[i] != NULL)
605bbef930eSdormando                 age = current_time - tails[i]->time;
6069bce42f2Sdormando             pthread_mutex_unlock(&lru_locks[i]);
607df8cf5e7SToru Maesaka         }
608bbef930eSdormando         if (size == 0)
609bbef930eSdormando             continue;
610bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "number", "%u", size);
611d9edfefdSdormando         if (settings.lru_maintainer_thread) {
6128ab8a1dbSdormando             APPEND_NUM_FMT_STAT(fmt, n, "number_hot", "%u", lru_size_map[0]);
6138ab8a1dbSdormando             APPEND_NUM_FMT_STAT(fmt, n, "number_warm", "%u", lru_size_map[1]);
6148ab8a1dbSdormando             APPEND_NUM_FMT_STAT(fmt, n, "number_cold", "%u", lru_size_map[2]);
61585a0042bSMatt Fowles Kulukundis             if (settings.expirezero_does_not_evict) {
6164de89c8cSdormando                 APPEND_NUM_FMT_STAT(fmt, n, "number_noexp", "%u", lru_size_map[3]);
617d9edfefdSdormando             }
61885a0042bSMatt Fowles Kulukundis         }
619bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "age", "%u", age);
620bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "evicted",
621bbef930eSdormando                             "%llu", (unsigned long long)totals.evicted);
622bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "evicted_nonzero",
623bbef930eSdormando                             "%llu", (unsigned long long)totals.evicted_nonzero);
624bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "evicted_time",
625bbef930eSdormando                             "%u", totals.evicted_time);
626bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "outofmemory",
627bbef930eSdormando                             "%llu", (unsigned long long)totals.outofmemory);
628bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "tailrepairs",
629bbef930eSdormando                             "%llu", (unsigned long long)totals.tailrepairs);
630bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "reclaimed",
631bbef930eSdormando                             "%llu", (unsigned long long)totals.reclaimed);
632bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "expired_unfetched",
633bbef930eSdormando                             "%llu", (unsigned long long)totals.expired_unfetched);
634bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "evicted_unfetched",
635bbef930eSdormando                             "%llu", (unsigned long long)totals.evicted_unfetched);
636bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "crawler_reclaimed",
637bbef930eSdormando                             "%llu", (unsigned long long)totals.crawler_reclaimed);
638c10feb9eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "crawler_items_checked",
639c10feb9eSdormando                             "%llu", (unsigned long long)totals.crawler_items_checked);
640bbef930eSdormando         APPEND_NUM_FMT_STAT(fmt, n, "lrutail_reflocked",
641bbef930eSdormando                             "%llu", (unsigned long long)totals.lrutail_reflocked);
642d9edfefdSdormando         if (settings.lru_maintainer_thread) {
643bbef930eSdormando             APPEND_NUM_FMT_STAT(fmt, n, "moves_to_cold",
644bbef930eSdormando                                 "%llu", (unsigned long long)totals.moves_to_cold);
645bbef930eSdormando             APPEND_NUM_FMT_STAT(fmt, n, "moves_to_warm",
646bbef930eSdormando                                 "%llu", (unsigned long long)totals.moves_to_warm);
647bbef930eSdormando             APPEND_NUM_FMT_STAT(fmt, n, "moves_within_lru",
648bbef930eSdormando                                 "%llu", (unsigned long long)totals.moves_within_lru);
649bbef930eSdormando             APPEND_NUM_FMT_STAT(fmt, n, "direct_reclaims",
650bbef930eSdormando                                 "%llu", (unsigned long long)totals.direct_reclaims);
651bbef930eSdormando         }
652d9edfefdSdormando     }
653df8cf5e7SToru Maesaka 
65448f45982SToru Maesaka     /* getting here means both ascii and binary terminators fit */
65517df5c0eSTrond Norbye     add_stats(NULL, 0, NULL, 0, c);
65660d70942SAnatoly Vorobey }
65786969ea4SBrad Fitzpatrick 
item_stats_sizes_status(void)6588d82383fSdormando bool item_stats_sizes_status(void) {
6598d82383fSdormando     bool ret = false;
660ae6f4267Sdormando     mutex_lock(&stats_sizes_lock);
6618d82383fSdormando     if (stats_sizes_hist != NULL)
6628d82383fSdormando         ret = true;
6638d82383fSdormando     mutex_unlock(&stats_sizes_lock);
6648d82383fSdormando     return ret;
6658d82383fSdormando }
6668d82383fSdormando 
item_stats_sizes_init(void)6678d82383fSdormando void item_stats_sizes_init(void) {
6688d82383fSdormando     if (stats_sizes_hist != NULL)
6698d82383fSdormando         return;
670ae6f4267Sdormando     stats_sizes_buckets = settings.item_size_max / 32 + 1;
671ae6f4267Sdormando     stats_sizes_hist = calloc(stats_sizes_buckets, sizeof(int));
6728d82383fSdormando     stats_sizes_cas_min = (settings.use_cas) ? get_cas_id() : 0;
6738d82383fSdormando }
6748d82383fSdormando 
item_stats_sizes_enable(ADD_STAT add_stats,void * c)6758d82383fSdormando void item_stats_sizes_enable(ADD_STAT add_stats, void *c) {
6768d82383fSdormando     mutex_lock(&stats_sizes_lock);
6778d82383fSdormando     if (!settings.use_cas) {
6788d82383fSdormando         APPEND_STAT("sizes_status", "error", "");
6798d82383fSdormando         APPEND_STAT("sizes_error", "cas_support_disabled", "");
6808d82383fSdormando     } else if (stats_sizes_hist == NULL) {
6818d82383fSdormando         item_stats_sizes_init();
682ae6f4267Sdormando         if (stats_sizes_hist != NULL) {
6838d82383fSdormando             APPEND_STAT("sizes_status", "enabled", "");
684ae6f4267Sdormando         } else {
6858d82383fSdormando             APPEND_STAT("sizes_status", "error", "");
686ae6f4267Sdormando             APPEND_STAT("sizes_error", "no_memory", "");
687ae6f4267Sdormando         }
688ae6f4267Sdormando     } else {
6898d82383fSdormando         APPEND_STAT("sizes_status", "enabled", "");
690ae6f4267Sdormando     }
691ae6f4267Sdormando     mutex_unlock(&stats_sizes_lock);
692ae6f4267Sdormando }
693ae6f4267Sdormando 
item_stats_sizes_disable(ADD_STAT add_stats,void * c)694ae6f4267Sdormando void item_stats_sizes_disable(ADD_STAT add_stats, void *c) {
695ae6f4267Sdormando     mutex_lock(&stats_sizes_lock);
696ae6f4267Sdormando     if (stats_sizes_hist != NULL) {
697ae6f4267Sdormando         free(stats_sizes_hist);
698ae6f4267Sdormando         stats_sizes_hist = NULL;
699ae6f4267Sdormando     }
7008d82383fSdormando     APPEND_STAT("sizes_status", "disabled", "");
701ae6f4267Sdormando     mutex_unlock(&stats_sizes_lock);
702ae6f4267Sdormando }
703ae6f4267Sdormando 
item_stats_sizes_add(item * it)7048d82383fSdormando void item_stats_sizes_add(item *it) {
7058d82383fSdormando     if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
706ae6f4267Sdormando         return;
707ae6f4267Sdormando     int ntotal = ITEM_ntotal(it);
708ae6f4267Sdormando     int bucket = ntotal / 32;
709ae6f4267Sdormando     if ((ntotal % 32) != 0) bucket++;
710ae6f4267Sdormando     if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]++;
711ae6f4267Sdormando }
712ae6f4267Sdormando 
713ae6f4267Sdormando /* I think there's no way for this to be accurate without using the CAS value.
714ae6f4267Sdormando  * Since items getting their time value bumped will pass this validation.
715ae6f4267Sdormando  */
item_stats_sizes_remove(item * it)7168d82383fSdormando void item_stats_sizes_remove(item *it) {
7178d82383fSdormando     if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
718ae6f4267Sdormando         return;
719ae6f4267Sdormando     int ntotal = ITEM_ntotal(it);
720ae6f4267Sdormando     int bucket = ntotal / 32;
721ae6f4267Sdormando     if ((ntotal % 32) != 0) bucket++;
722ae6f4267Sdormando     if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]--;
723ae6f4267Sdormando }
724ae6f4267Sdormando 
72544ef96eeSPaul Lindner /** dumps out a list of objects of each size, with granularity of 32 bytes */
72677dde9f9SPaul Lindner /*@null@*/
72707c632f7Sdormando /* Locks are correct based on a technicality. Holds LRU lock while doing the
72807c632f7Sdormando  * work, so items can't go invalid, and it's only looking at header sizes
72907c632f7Sdormando  * which don't change.
73007c632f7Sdormando  */
item_stats_sizes(ADD_STAT add_stats,void * c)7319bce42f2Sdormando void item_stats_sizes(ADD_STAT add_stats, void *c) {
732ae6f4267Sdormando     mutex_lock(&stats_sizes_lock);
733e2df488bSToru Maesaka 
734ae6f4267Sdormando     if (stats_sizes_hist != NULL) {
7356d74e134SBrad Fitzpatrick         int i;
736ae6f4267Sdormando         for (i = 0; i < stats_sizes_buckets; i++) {
737ae6f4267Sdormando             if (stats_sizes_hist[i] != 0) {
738f8770bf2SDustin Sallings                 char key[8];
739bb9d00feSDan McGee                 snprintf(key, sizeof(key), "%d", i * 32);
740ae6f4267Sdormando                 APPEND_STAT(key, "%u", stats_sizes_hist[i]);
7416d74e134SBrad Fitzpatrick             }
742e2df488bSToru Maesaka         }
743ae6f4267Sdormando     } else {
744ae6f4267Sdormando         APPEND_STAT("sizes_status", "disabled", "");
74517df5c0eSTrond Norbye     }
746ae6f4267Sdormando 
74717df5c0eSTrond Norbye     add_stats(NULL, 0, NULL, 0, c);
748ae6f4267Sdormando     mutex_unlock(&stats_sizes_lock);
7496d74e134SBrad Fitzpatrick }
750c5944dc2SSteven Grimm 
7515da8dbabSTrond Norbye /** wrapper around assoc_find which does the lazy expiration logic */
do_item_get(const char * key,const size_t nkey,const uint32_t hv,conn * c)7526895d23eSsergiocarlos item *do_item_get(const char *key, const size_t nkey, const uint32_t hv, conn *c) {
753bab9acd1Sdormando     item *it = assoc_find(key, nkey, hv);
754b3630e1aSdormando     if (it != NULL) {
7553b961388Sdormando         refcount_incr(&it->refcount);
756b3630e1aSdormando         /* Optimization for slab reassignment. prevents popular items from
757b3630e1aSdormando          * jamming in busy wait. Can only do this here to satisfy lock order
75869d1c699Sdormando          * of item_lock, slabs_lock. */
75969d1c699Sdormando         /* This was made unsafe by removal of the cache_lock:
76069d1c699Sdormando          * slab_rebalance_signal and slab_rebal.* are modified in a separate
76169d1c699Sdormando          * thread under slabs_lock. If slab_rebalance_signal = 1, slab_start =
76269d1c699Sdormando          * NULL (0), but slab_end is still equal to some value, this would end
76369d1c699Sdormando          * up unlinking every item fetched.
76469d1c699Sdormando          * This is either an acceptable loss, or if slab_rebalance_signal is
76569d1c699Sdormando          * true, slab_start/slab_end should be put behind the slabs_lock.
76669d1c699Sdormando          * Which would cause a huge potential slowdown.
76769d1c699Sdormando          * Could also use a specific lock for slab_rebal.* and
76869d1c699Sdormando          * slab_rebalance_signal (shorter lock?)
76969d1c699Sdormando          */
77069d1c699Sdormando         /*if (slab_rebalance_signal &&
771b3630e1aSdormando             ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
772ecea7458SJason CHAN             do_item_unlink(it, hv);
773f4983b20Sdormando             do_item_remove(it);
774b3630e1aSdormando             it = NULL;
77569d1c699Sdormando         }*/
776b3630e1aSdormando     }
77791adade7Sdormando     int was_found = 0;
77891adade7Sdormando 
77991adade7Sdormando     if (settings.verbose > 2) {
780fbe823d9Sdormando         int ii;
78191adade7Sdormando         if (it == NULL) {
782fbe823d9Sdormando             fprintf(stderr, "> NOT FOUND ");
78391adade7Sdormando         } else {
784fbe823d9Sdormando             fprintf(stderr, "> FOUND KEY ");
78591adade7Sdormando         }
786fbe823d9Sdormando         for (ii = 0; ii < nkey; ++ii) {
787fbe823d9Sdormando             fprintf(stderr, "%c", key[ii]);
788fbe823d9Sdormando         }
78991adade7Sdormando     }
79091adade7Sdormando 
791eb75a0dfSPaul Lindner     if (it != NULL) {
792c7fbccebSdormando         was_found = 1;
793004e2211Sdormando         if (item_is_flushed(it)) {
794f4983b20Sdormando             do_item_unlink(it, hv);
795f4983b20Sdormando             do_item_remove(it);
79640b7b4b2Sdormando             it = NULL;
79780ce0108Sdormando             pthread_mutex_lock(&c->thread->stats.mutex);
79880ce0108Sdormando             c->thread->stats.get_flushed++;
79980ce0108Sdormando             pthread_mutex_unlock(&c->thread->stats.mutex);
800c7fbccebSdormando             if (settings.verbose > 2) {
80140b7b4b2Sdormando                 fprintf(stderr, " -nuked by flush");
80240b7b4b2Sdormando             }
803c7fbccebSdormando             was_found = 2;
80440b7b4b2Sdormando         } else if (it->exptime != 0 && it->exptime <= current_time) {
805f4983b20Sdormando             do_item_unlink(it, hv);
806f4983b20Sdormando             do_item_remove(it);
80740b7b4b2Sdormando             it = NULL;
8086895d23eSsergiocarlos             pthread_mutex_lock(&c->thread->stats.mutex);
8096895d23eSsergiocarlos             c->thread->stats.get_expired++;
8106895d23eSsergiocarlos             pthread_mutex_unlock(&c->thread->stats.mutex);
811c7fbccebSdormando             if (settings.verbose > 2) {
81240b7b4b2Sdormando                 fprintf(stderr, " -nuked by expire");
81340b7b4b2Sdormando             }
814c7fbccebSdormando             was_found = 3;
81540b7b4b2Sdormando         } else {
8169bce42f2Sdormando             it->it_flags |= ITEM_FETCHED|ITEM_ACTIVE;
81756b8339eSSteven Grimm             DEBUG_REFCNT(it, '+');
81856b8339eSSteven Grimm         }
81940b7b4b2Sdormando     }
82091adade7Sdormando 
82191adade7Sdormando     if (settings.verbose > 2)
82291adade7Sdormando         fprintf(stderr, "\n");
823c7fbccebSdormando     /* For now this is in addition to the above verbose logging. */
824c7fbccebSdormando     LOGGER_LOG(c->thread->l, LOG_FETCHERS, LOGGER_ITEM_GET, NULL, was_found, key, nkey);
82591adade7Sdormando 
82656b8339eSSteven Grimm     return it;
82756b8339eSSteven Grimm }
82856b8339eSSteven Grimm 
do_item_touch(const char * key,size_t nkey,uint32_t exptime,const uint32_t hv,conn * c)829bab9acd1Sdormando item *do_item_touch(const char *key, size_t nkey, uint32_t exptime,
8306895d23eSsergiocarlos                     const uint32_t hv, conn *c) {
8316895d23eSsergiocarlos     item *it = do_item_get(key, nkey, hv, c);
832d87f568aSdormando     if (it != NULL) {
833d87f568aSdormando         it->exptime = exptime;
834d87f568aSdormando     }
835d87f568aSdormando     return it;
836d87f568aSdormando }
837d87f568aSdormando 
838fb269897Sdormando /*** LRU MAINTENANCE THREAD ***/
839fb269897Sdormando 
840f5f8f1a0Sdormando /* Returns number of items remove, expired, or evicted.
841f5f8f1a0Sdormando  * Callable from worker threads or the LRU maintainer thread */
lru_pull_tail(const int orig_id,const int cur_lru,const uint64_t total_bytes,const bool do_evict)842fb269897Sdormando static int lru_pull_tail(const int orig_id, const int cur_lru,
843*690a9a9dSEiichi Tsukata         const uint64_t total_bytes, const bool do_evict) {
844fb269897Sdormando     item *it = NULL;
845fb269897Sdormando     int id = orig_id;
846fb269897Sdormando     int removed = 0;
847fb269897Sdormando     if (id == 0)
848fb269897Sdormando         return 0;
849fb269897Sdormando 
850fb269897Sdormando     int tries = 5;
851fb269897Sdormando     item *search;
852fb269897Sdormando     item *next_it;
853fb269897Sdormando     void *hold_lock = NULL;
854fb269897Sdormando     unsigned int move_to_lru = 0;
855ee461d11Sdormando     uint64_t limit = 0;
856fb269897Sdormando 
857fb269897Sdormando     id |= cur_lru;
858fb269897Sdormando     pthread_mutex_lock(&lru_locks[id]);
859fb269897Sdormando     search = tails[id];
860f5f8f1a0Sdormando     /* We walk up *only* for locked items, and if bottom is expired. */
861fb269897Sdormando     for (; tries > 0 && search != NULL; tries--, search=next_it) {
862fb269897Sdormando         /* we might relink search mid-loop, so search->prev isn't reliable */
863fb269897Sdormando         next_it = search->prev;
864fb269897Sdormando         if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
865fb269897Sdormando             /* We are a crawler, ignore it. */
866fb269897Sdormando             tries++;
867fb269897Sdormando             continue;
868fb269897Sdormando         }
869fb269897Sdormando         uint32_t hv = hash(ITEM_key(search), search->nkey);
870fb269897Sdormando         /* Attempt to hash item lock the "search" item. If locked, no
871f5f8f1a0Sdormando          * other callers can incr the refcount. Also skip ourselves. */
872*690a9a9dSEiichi Tsukata         if ((hold_lock = item_trylock(hv)) == NULL)
873fb269897Sdormando             continue;
874fb269897Sdormando         /* Now see if the item is refcount locked */
875fb269897Sdormando         if (refcount_incr(&search->refcount) != 2) {
876fb269897Sdormando             /* Note pathological case with ref'ed items in tail.
877fb269897Sdormando              * Can still unlink the item, but it won't be reusable yet */
878fb269897Sdormando             itemstats[id].lrutail_reflocked++;
879fb269897Sdormando             /* In case of refcount leaks, enable for quick workaround. */
880fb269897Sdormando             /* WARNING: This can cause terrible corruption */
881fb269897Sdormando             if (settings.tail_repair_time &&
882fb269897Sdormando                     search->time + settings.tail_repair_time < current_time) {
883fb269897Sdormando                 itemstats[id].tailrepairs++;
884fb269897Sdormando                 search->refcount = 1;
885fb269897Sdormando                 /* This will call item_remove -> item_free since refcnt is 1 */
886fb269897Sdormando                 do_item_unlink_nolock(search, hv);
887fb269897Sdormando                 item_trylock_unlock(hold_lock);
888fb269897Sdormando                 continue;
889fb269897Sdormando             }
890fb269897Sdormando         }
891fb269897Sdormando 
892fb269897Sdormando         /* Expired or flushed */
893fb269897Sdormando         if ((search->exptime != 0 && search->exptime < current_time)
894004e2211Sdormando             || item_is_flushed(search)) {
895fb269897Sdormando             itemstats[id].reclaimed++;
896fb269897Sdormando             if ((search->it_flags & ITEM_FETCHED) == 0) {
897fb269897Sdormando                 itemstats[id].expired_unfetched++;
898fb269897Sdormando             }
899fb269897Sdormando             /* refcnt 2 -> 1 */
900a0390847Sdormando             do_item_unlink_nolock(search, hv);
901fb269897Sdormando             /* refcnt 1 -> 0 -> item_free */
902a0390847Sdormando             do_item_remove(search);
903fb269897Sdormando             item_trylock_unlock(hold_lock);
904fb269897Sdormando             removed++;
905fb269897Sdormando 
906fb269897Sdormando             /* If all we're finding are expired, can keep going */
907fb269897Sdormando             continue;
908fb269897Sdormando         }
909fb269897Sdormando 
910fb269897Sdormando         /* If we're HOT_LRU or WARM_LRU and over size limit, send to COLD_LRU.
911fb269897Sdormando          * If we're COLD_LRU, send to WARM_LRU unless we need to evict
912fb269897Sdormando          */
913fb269897Sdormando         switch (cur_lru) {
914fb269897Sdormando             case HOT_LRU:
915ee461d11Sdormando                 limit = total_bytes * settings.hot_lru_pct / 100;
916fb269897Sdormando             case WARM_LRU:
917ee461d11Sdormando                 if (limit == 0)
918ee461d11Sdormando                     limit = total_bytes * settings.warm_lru_pct / 100;
919ee461d11Sdormando                 if (sizes_bytes[id] > limit) {
920f7bf26cbSdormando                     itemstats[id].moves_to_cold++;
921fb269897Sdormando                     move_to_lru = COLD_LRU;
922fb269897Sdormando                     do_item_unlink_q(search);
923fb269897Sdormando                     it = search;
924fb269897Sdormando                     removed++;
925fb269897Sdormando                     break;
926fb269897Sdormando                 } else if ((search->it_flags & ITEM_ACTIVE) != 0) {
927fb269897Sdormando                     /* Only allow ACTIVE relinking if we're not too large. */
928f7bf26cbSdormando                     itemstats[id].moves_within_lru++;
929fb269897Sdormando                     search->it_flags &= ~ITEM_ACTIVE;
930fb269897Sdormando                     do_item_update_nolock(search);
931fb269897Sdormando                     do_item_remove(search);
932fb269897Sdormando                     item_trylock_unlock(hold_lock);
933fb269897Sdormando                 } else {
934fb269897Sdormando                     /* Don't want to move to COLD, not active, bail out */
935fb269897Sdormando                     it = search;
936fb269897Sdormando                 }
937fb269897Sdormando                 break;
938fb269897Sdormando             case COLD_LRU:
939fb269897Sdormando                 it = search; /* No matter what, we're stopping */
940fb269897Sdormando                 if (do_evict) {
941fb269897Sdormando                     if (settings.evict_to_free == 0) {
942f7bf26cbSdormando                         /* Don't think we need a counter for this. It'll OOM.  */
943fb269897Sdormando                         break;
944fb269897Sdormando                     }
945fb269897Sdormando                     itemstats[id].evicted++;
946fb269897Sdormando                     itemstats[id].evicted_time = current_time - search->time;
947fb269897Sdormando                     if (search->exptime != 0)
948fb269897Sdormando                         itemstats[id].evicted_nonzero++;
949fb269897Sdormando                     if ((search->it_flags & ITEM_FETCHED) == 0) {
950fb269897Sdormando                         itemstats[id].evicted_unfetched++;
951fb269897Sdormando                     }
952cb8257e3Sdormando                     LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EVICTION, search);
953a0390847Sdormando                     do_item_unlink_nolock(search, hv);
954fb269897Sdormando                     removed++;
955826403ddSdormando                     if (settings.slab_automove == 2) {
956826403ddSdormando                         slabs_reassign(-1, orig_id);
957826403ddSdormando                     }
958d9edfefdSdormando                 } else if ((search->it_flags & ITEM_ACTIVE) != 0
959d9edfefdSdormando                         && settings.lru_maintainer_thread) {
960f7bf26cbSdormando                     itemstats[id].moves_to_warm++;
961fb269897Sdormando                     search->it_flags &= ~ITEM_ACTIVE;
962fb269897Sdormando                     move_to_lru = WARM_LRU;
963fb269897Sdormando                     do_item_unlink_q(search);
964a0390847Sdormando                     removed++;
965fb269897Sdormando                 }
966fb269897Sdormando                 break;
967fb269897Sdormando         }
968fb269897Sdormando         if (it != NULL)
969fb269897Sdormando             break;
970fb269897Sdormando     }
971fb269897Sdormando 
972fb269897Sdormando     pthread_mutex_unlock(&lru_locks[id]);
973fb269897Sdormando 
974fb269897Sdormando     if (it != NULL) {
975fb269897Sdormando         if (move_to_lru) {
9764de89c8cSdormando             it->slabs_clsid = ITEM_clsid(it);
977fb269897Sdormando             it->slabs_clsid |= move_to_lru;
978fb269897Sdormando             item_link_q(it);
979fb269897Sdormando         }
980fb269897Sdormando         do_item_remove(it);
981fb269897Sdormando         item_trylock_unlock(hold_lock);
982fb269897Sdormando     }
983fb269897Sdormando 
984fb269897Sdormando     return removed;
985fb269897Sdormando }
986fb269897Sdormando 
987fb269897Sdormando /* Loop up to N times:
988fb269897Sdormando  * If too many items are in HOT_LRU, push to COLD_LRU
989fb269897Sdormando  * If too many items are in WARM_LRU, push to COLD_LRU
990fb269897Sdormando  * If too many items are in COLD_LRU, poke COLD_LRU tail
991b689a627Sdormando  * 1000 loops with 1ms min sleep gives us under 1m items shifted/sec. The
992b689a627Sdormando  * locks can't handle much more than that. Leaving a TODO for how to
993b689a627Sdormando  * autoadjust in the future.
994fb269897Sdormando  */
lru_maintainer_juggle(const int slabs_clsid)995a0390847Sdormando static int lru_maintainer_juggle(const int slabs_clsid) {
996fb269897Sdormando     int i;
997a0390847Sdormando     int did_moves = 0;
9988ab8a1dbSdormando     bool mem_limit_reached = false;
999ee461d11Sdormando     uint64_t total_bytes = 0;
1000d6e96467Sdormando     unsigned int chunks_perslab = 0;
1001d6e96467Sdormando     unsigned int chunks_free = 0;
1002b689a627Sdormando     /* TODO: if free_chunks below high watermark, increase aggressiveness */
1003d6e96467Sdormando     chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached,
1004ee461d11Sdormando             &total_bytes, &chunks_perslab);
10054de89c8cSdormando     if (settings.expirezero_does_not_evict)
1006ee461d11Sdormando         total_bytes -= noexp_lru_size(slabs_clsid);
1007fb269897Sdormando 
1008d6e96467Sdormando     /* If slab automove is enabled on any level, and we have more than 2 pages
1009d6e96467Sdormando      * worth of chunks free in this class, ask (gently) to reassign a page
1010d6e96467Sdormando      * from this class back into the global pool (0)
1011d6e96467Sdormando      */
10121c3f0a3dSdormando     if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2.5)) {
1013d6e96467Sdormando         slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL);
1014d6e96467Sdormando     }
1015d6e96467Sdormando 
1016fb269897Sdormando     /* Juggle HOT/WARM up to N times */
1017b689a627Sdormando     for (i = 0; i < 1000; i++) {
1018f7bf26cbSdormando         int do_more = 0;
1019*690a9a9dSEiichi Tsukata         if (lru_pull_tail(slabs_clsid, HOT_LRU, total_bytes, false) ||
1020*690a9a9dSEiichi Tsukata             lru_pull_tail(slabs_clsid, WARM_LRU, total_bytes, false)) {
1021f7bf26cbSdormando             do_more++;
1022fb269897Sdormando         }
1023*690a9a9dSEiichi Tsukata         do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_bytes, false);
1024f7bf26cbSdormando         if (do_more == 0)
1025f7bf26cbSdormando             break;
1026a0390847Sdormando         did_moves++;
1027fb269897Sdormando     }
1028a0390847Sdormando     return did_moves;
1029fb269897Sdormando }
1030fb269897Sdormando 
1031e708513aSdormando /* Will crawl all slab classes a minimum of once per hour */
1032e708513aSdormando #define MAX_MAINTCRAWL_WAIT 60 * 60
1033e708513aSdormando 
1034e708513aSdormando /* Hoping user input will improve this function. This is all a wild guess.
1035e708513aSdormando  * Operation: Kicks crawler for each slab id. Crawlers take some statistics as
1036e708513aSdormando  * to items with nonzero expirations. It then buckets how many items will
1037e708513aSdormando  * expire per minute for the next hour.
1038e708513aSdormando  * This function checks the results of a run, and if it things more than 1% of
1039e708513aSdormando  * expirable objects are ready to go, kick the crawler again to reap.
1040e708513aSdormando  * It will also kick the crawler once per minute regardless, waiting a minute
1041e708513aSdormando  * longer for each time it has no work to do, up to an hour wait time.
1042e708513aSdormando  * The latter is to avoid newly started daemons from waiting too long before
1043e708513aSdormando  * retrying a crawl.
1044e708513aSdormando  */
lru_maintainer_crawler_check(void)1045e708513aSdormando static void lru_maintainer_crawler_check(void) {
1046e708513aSdormando     int i;
1047e708513aSdormando     static rel_time_t last_crawls[MAX_NUMBER_OF_SLAB_CLASSES];
1048e708513aSdormando     static rel_time_t next_crawl_wait[MAX_NUMBER_OF_SLAB_CLASSES];
1049e708513aSdormando     for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
1050e708513aSdormando         crawlerstats_t *s = &crawlerstats[i];
1051e708513aSdormando         /* We've not successfully kicked off a crawl yet. */
1052e708513aSdormando         if (last_crawls[i] == 0) {
105387ff9dc0Sdormando             if (lru_crawler_start(i, 0) > 0) {
1054e708513aSdormando                 last_crawls[i] = current_time;
1055e708513aSdormando             }
1056e708513aSdormando         }
1057e708513aSdormando         pthread_mutex_lock(&lru_crawler_stats_lock);
1058e708513aSdormando         if (s->run_complete) {
1059e708513aSdormando             int x;
1060e708513aSdormando             /* Should we crawl again? */
1061e708513aSdormando             uint64_t possible_reclaims = s->seen - s->noexp;
1062e708513aSdormando             uint64_t available_reclaims = 0;
1063e708513aSdormando             /* Need to think we can free at least 1% of the items before
1064e708513aSdormando              * crawling. */
1065e708513aSdormando             /* FIXME: Configurable? */
1066e708513aSdormando             uint64_t low_watermark = (s->seen / 100) + 1;
1067e708513aSdormando             rel_time_t since_run = current_time - s->end_time;
1068e708513aSdormando             /* Don't bother if the payoff is too low. */
1069e708513aSdormando             if (settings.verbose > 1)
1070e708513aSdormando                 fprintf(stderr, "maint crawler: low_watermark: %llu, possible_reclaims: %llu, since_run: %u\n",
1071e708513aSdormando                         (unsigned long long)low_watermark, (unsigned long long)possible_reclaims,
1072e708513aSdormando                         (unsigned int)since_run);
1073e708513aSdormando             for (x = 0; x < 60; x++) {
1074e708513aSdormando                 if (since_run < (x * 60) + 60)
1075e708513aSdormando                     break;
1076e708513aSdormando                 available_reclaims += s->histo[x];
1077e708513aSdormando             }
1078e708513aSdormando             if (available_reclaims > low_watermark) {
1079e708513aSdormando                 last_crawls[i] = 0;
1080e708513aSdormando                 if (next_crawl_wait[i] > 60)
1081e708513aSdormando                     next_crawl_wait[i] -= 60;
1082e708513aSdormando             } else if (since_run > 5 && since_run > next_crawl_wait[i]) {
1083e708513aSdormando                 last_crawls[i] = 0;
1084e708513aSdormando                 if (next_crawl_wait[i] < MAX_MAINTCRAWL_WAIT)
1085e708513aSdormando                     next_crawl_wait[i] += 60;
1086e708513aSdormando             }
1087e708513aSdormando             if (settings.verbose > 1)
1088e708513aSdormando                 fprintf(stderr, "maint crawler: available reclaims: %llu, next_crawl: %u\n", (unsigned long long)available_reclaims, next_crawl_wait[i]);
1089e708513aSdormando         }
1090e708513aSdormando         pthread_mutex_unlock(&lru_crawler_stats_lock);
1091e708513aSdormando     }
1092e708513aSdormando }
1093e708513aSdormando 
1094fb269897Sdormando static pthread_t lru_maintainer_tid;
1095fb269897Sdormando 
1096a0390847Sdormando #define MAX_LRU_MAINTAINER_SLEEP 1000000
1097a0390847Sdormando #define MIN_LRU_MAINTAINER_SLEEP 1000
1098a0390847Sdormando 
lru_maintainer_thread(void * arg)1099fb269897Sdormando static void *lru_maintainer_thread(void *arg) {
1100fb269897Sdormando     int i;
1101a0390847Sdormando     useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;
1102e708513aSdormando     rel_time_t last_crawler_check = 0;
1103fb269897Sdormando 
1104fb269897Sdormando     pthread_mutex_lock(&lru_maintainer_lock);
1105fb269897Sdormando     if (settings.verbose > 2)
1106fb269897Sdormando         fprintf(stderr, "Starting LRU maintainer background thread\n");
1107fb269897Sdormando     while (do_run_lru_maintainer_thread) {
1108a0390847Sdormando         int did_moves = 0;
1109a0390847Sdormando         pthread_mutex_unlock(&lru_maintainer_lock);
1110a0390847Sdormando         usleep(to_sleep);
1111a0390847Sdormando         pthread_mutex_lock(&lru_maintainer_lock);
1112fb269897Sdormando 
11134de89c8cSdormando         STATS_LOCK();
11144de89c8cSdormando         stats.lru_maintainer_juggles++;
11154de89c8cSdormando         STATS_UNLOCK();
1116fb269897Sdormando         /* We were asked to immediately wake up and poke a particular slab
1117fb269897Sdormando          * class due to a low watermark being hit */
1118fb269897Sdormando         if (lru_maintainer_check_clsid != 0) {
1119a0390847Sdormando             did_moves = lru_maintainer_juggle(lru_maintainer_check_clsid);
1120fb269897Sdormando             lru_maintainer_check_clsid = 0;
1121fb269897Sdormando         } else {
1122fb269897Sdormando             for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
1123a0390847Sdormando                 did_moves += lru_maintainer_juggle(i);
1124fb269897Sdormando             }
1125fb269897Sdormando         }
1126a0390847Sdormando         if (did_moves == 0) {
1127a0390847Sdormando             if (to_sleep < MAX_LRU_MAINTAINER_SLEEP)
1128a0390847Sdormando                 to_sleep += 1000;
1129a0390847Sdormando         } else {
1130b689a627Sdormando             to_sleep /= 2;
1131b689a627Sdormando             if (to_sleep < MIN_LRU_MAINTAINER_SLEEP)
1132a0390847Sdormando                 to_sleep = MIN_LRU_MAINTAINER_SLEEP;
1133a0390847Sdormando         }
1134e708513aSdormando         /* Once per second at most */
1135e708513aSdormando         if (settings.lru_crawler && last_crawler_check != current_time) {
1136e708513aSdormando             lru_maintainer_crawler_check();
1137e708513aSdormando             last_crawler_check = current_time;
1138e708513aSdormando         }
1139fb269897Sdormando     }
1140fb269897Sdormando     pthread_mutex_unlock(&lru_maintainer_lock);
1141fb269897Sdormando     if (settings.verbose > 2)
1142fb269897Sdormando         fprintf(stderr, "LRU maintainer thread stopping\n");
1143fb269897Sdormando 
1144fb269897Sdormando     return NULL;
1145fb269897Sdormando }
stop_lru_maintainer_thread(void)1146fb269897Sdormando int stop_lru_maintainer_thread(void) {
1147fb269897Sdormando     int ret;
1148fb269897Sdormando     pthread_mutex_lock(&lru_maintainer_lock);
1149a0390847Sdormando     /* LRU thread is a sleep loop, will die on its own */
1150fb269897Sdormando     do_run_lru_maintainer_thread = 0;
1151fb269897Sdormando     pthread_mutex_unlock(&lru_maintainer_lock);
1152fb269897Sdormando     if ((ret = pthread_join(lru_maintainer_tid, NULL)) != 0) {
1153fb269897Sdormando         fprintf(stderr, "Failed to stop LRU maintainer thread: %s\n", strerror(ret));
1154fb269897Sdormando         return -1;
1155fb269897Sdormando     }
1156f7bf26cbSdormando     settings.lru_maintainer_thread = false;
1157fb269897Sdormando     return 0;
1158fb269897Sdormando }
1159fb269897Sdormando 
start_lru_maintainer_thread(void)1160fb269897Sdormando int start_lru_maintainer_thread(void) {
1161fb269897Sdormando     int ret;
1162fb269897Sdormando 
1163fb269897Sdormando     pthread_mutex_lock(&lru_maintainer_lock);
1164fb269897Sdormando     do_run_lru_maintainer_thread = 1;
1165f7bf26cbSdormando     settings.lru_maintainer_thread = true;
1166fb269897Sdormando     if ((ret = pthread_create(&lru_maintainer_tid, NULL,
1167fb269897Sdormando         lru_maintainer_thread, NULL)) != 0) {
1168fb269897Sdormando         fprintf(stderr, "Can't create LRU maintainer thread: %s\n",
1169fb269897Sdormando             strerror(ret));
1170fb269897Sdormando         pthread_mutex_unlock(&lru_maintainer_lock);
1171fb269897Sdormando         return -1;
1172fb269897Sdormando     }
1173fb269897Sdormando     pthread_mutex_unlock(&lru_maintainer_lock);
1174fb269897Sdormando 
1175fb269897Sdormando     return 0;
1176fb269897Sdormando }
1177fb269897Sdormando 
1178fb269897Sdormando /* If we hold this lock, crawler can't wake up or move */
lru_maintainer_pause(void)1179fb269897Sdormando void lru_maintainer_pause(void) {
1180fb269897Sdormando     pthread_mutex_lock(&lru_maintainer_lock);
1181fb269897Sdormando }
1182fb269897Sdormando 
lru_maintainer_resume(void)1183fb269897Sdormando void lru_maintainer_resume(void) {
1184fb269897Sdormando     pthread_mutex_unlock(&lru_maintainer_lock);
1185fb269897Sdormando }
1186fb269897Sdormando 
init_lru_maintainer(void)1187fb269897Sdormando int init_lru_maintainer(void) {
1188fb269897Sdormando     if (lru_maintainer_initialized == 0) {
1189fb269897Sdormando         pthread_mutex_init(&lru_maintainer_lock, NULL);
1190fb269897Sdormando         lru_maintainer_initialized = 1;
1191fb269897Sdormando     }
1192fb269897Sdormando     return 0;
1193fb269897Sdormando }
1194fb269897Sdormando 
1195fb269897Sdormando /*** ITEM CRAWLER THREAD ***/
1196fb269897Sdormando 
crawler_link_q(item * it)11970d1f505cSdormando static void crawler_link_q(item *it) { /* item is the new tail */
11980d1f505cSdormando     item **head, **tail;
11995e05fca2Sdormando     assert(it->it_flags == 1);
12005e05fca2Sdormando     assert(it->nbytes == 0);
12010d1f505cSdormando 
12020d1f505cSdormando     head = &heads[it->slabs_clsid];
12030d1f505cSdormando     tail = &tails[it->slabs_clsid];
12040d1f505cSdormando     assert(*tail != 0);
12050d1f505cSdormando     assert(it != *tail);
12060d1f505cSdormando     assert((*head && *tail) || (*head == 0 && *tail == 0));
12070d1f505cSdormando     it->prev = *tail;
12080d1f505cSdormando     it->next = 0;
12090d1f505cSdormando     if (it->prev) {
12100d1f505cSdormando         assert(it->prev->next == 0);
12110d1f505cSdormando         it->prev->next = it;
12120d1f505cSdormando     }
12130d1f505cSdormando     *tail = it;
12140d1f505cSdormando     if (*head == 0) *head = it;
12150d1f505cSdormando     return;
12160d1f505cSdormando }
12170d1f505cSdormando 
crawler_unlink_q(item * it)12180d1f505cSdormando static void crawler_unlink_q(item *it) {
12190d1f505cSdormando     item **head, **tail;
12200d1f505cSdormando     head = &heads[it->slabs_clsid];
12210d1f505cSdormando     tail = &tails[it->slabs_clsid];
12220d1f505cSdormando 
12230d1f505cSdormando     if (*head == it) {
12240d1f505cSdormando         assert(it->prev == 0);
12250d1f505cSdormando         *head = it->next;
12260d1f505cSdormando     }
12270d1f505cSdormando     if (*tail == it) {
12280d1f505cSdormando         assert(it->next == 0);
12290d1f505cSdormando         *tail = it->prev;
12300d1f505cSdormando     }
12310d1f505cSdormando     assert(it->next != it);
12320d1f505cSdormando     assert(it->prev != it);
12330d1f505cSdormando 
12340d1f505cSdormando     if (it->next) it->next->prev = it->prev;
12350d1f505cSdormando     if (it->prev) it->prev->next = it->next;
12360d1f505cSdormando     return;
12370d1f505cSdormando }
12380d1f505cSdormando 
123945177c01Sdormando /* This is too convoluted, but it's a difficult shuffle. Try to rewrite it
124045177c01Sdormando  * more clearly. */
crawler_crawl_q(item * it)12412418511aSdormando static item *crawler_crawl_q(item *it) {
12420d1f505cSdormando     item **head, **tail;
12435e05fca2Sdormando     assert(it->it_flags == 1);
12445e05fca2Sdormando     assert(it->nbytes == 0);
12450d1f505cSdormando     head = &heads[it->slabs_clsid];
12460d1f505cSdormando     tail = &tails[it->slabs_clsid];
12470d1f505cSdormando 
12480d1f505cSdormando     /* We've hit the head, pop off */
12490d1f505cSdormando     if (it->prev == 0) {
12500d1f505cSdormando         assert(*head == it);
12510d1f505cSdormando         if (it->next) {
12520d1f505cSdormando             *head = it->next;
12530d1f505cSdormando             assert(it->next->prev == it);
12540d1f505cSdormando             it->next->prev = 0;
12550d1f505cSdormando         }
12562418511aSdormando         return NULL; /* Done */
12570d1f505cSdormando     }
12580d1f505cSdormando 
12590d1f505cSdormando     /* Swing ourselves in front of the next item */
12600d1f505cSdormando     /* NB: If there is a prev, we can't be the head */
12610d1f505cSdormando     assert(it->prev != it);
12620d1f505cSdormando     if (it->prev) {
12630d1f505cSdormando         if (*head == it->prev) {
12640d1f505cSdormando             /* Prev was the head, now we're the head */
12650d1f505cSdormando             *head = it;
12660d1f505cSdormando         }
12670d1f505cSdormando         if (*tail == it) {
12680d1f505cSdormando             /* We are the tail, now they are the tail */
12690d1f505cSdormando             *tail = it->prev;
12700d1f505cSdormando         }
12710d1f505cSdormando         assert(it->next != it);
12720d1f505cSdormando         if (it->next) {
12730d1f505cSdormando             assert(it->prev->next == it);
12740d1f505cSdormando             it->prev->next = it->next;
12750d1f505cSdormando             it->next->prev = it->prev;
12760d1f505cSdormando         } else {
12770d1f505cSdormando             /* Tail. Move this above? */
12780d1f505cSdormando             it->prev->next = 0;
12790d1f505cSdormando         }
12800d1f505cSdormando         /* prev->prev's next is it->prev */
12810d1f505cSdormando         it->next = it->prev;
12820d1f505cSdormando         it->prev = it->next->prev;
12830d1f505cSdormando         it->next->prev = it;
12840d1f505cSdormando         /* New it->prev now, if we're not at the head. */
12850d1f505cSdormando         if (it->prev) {
12860d1f505cSdormando             it->prev->next = it;
12870d1f505cSdormando         }
12880d1f505cSdormando     }
12890d1f505cSdormando     assert(it->next != it);
12900d1f505cSdormando     assert(it->prev != it);
12910d1f505cSdormando 
12922418511aSdormando     return it->next; /* success */
12930d1f505cSdormando }
12940d1f505cSdormando 
1295c58291ecSdormando /* I pulled this out to make the main thread clearer, but it reaches into the
1296c58291ecSdormando  * main thread's values too much. Should rethink again.
12970d1f505cSdormando  */
item_crawler_evaluate(item * search,uint32_t hv,int i)1298c58291ecSdormando static void item_crawler_evaluate(item *search, uint32_t hv, int i) {
12994de89c8cSdormando     int slab_id = CLEAR_LRU(i);
1300e708513aSdormando     crawlerstats_t *s = &crawlerstats[slab_id];
1301c10feb9eSdormando     itemstats[i].crawler_items_checked++;
130280ce0108Sdormando     int is_flushed = item_is_flushed(search);
1303c58291ecSdormando     if ((search->exptime != 0 && search->exptime < current_time)
130480ce0108Sdormando         || is_flushed) {
1305c58291ecSdormando         itemstats[i].crawler_reclaimed++;
1306e708513aSdormando         s->reclaimed++;
1307c58291ecSdormando 
1308c58291ecSdormando         if (settings.verbose > 1) {
1309c58291ecSdormando             int ii;
1310c58291ecSdormando             char *key = ITEM_key(search);
1311c58291ecSdormando             fprintf(stderr, "LRU crawler found an expired item (flags: %d, slab: %d): ",
1312c58291ecSdormando                 search->it_flags, search->slabs_clsid);
1313c58291ecSdormando             for (ii = 0; ii < search->nkey; ++ii) {
1314c58291ecSdormando                 fprintf(stderr, "%c", key[ii]);
1315c58291ecSdormando             }
1316c58291ecSdormando             fprintf(stderr, "\n");
1317c58291ecSdormando         }
131880ce0108Sdormando         if ((search->it_flags & ITEM_FETCHED) == 0 && !is_flushed) {
1319c58291ecSdormando             itemstats[i].expired_unfetched++;
1320c58291ecSdormando         }
1321c58291ecSdormando         do_item_unlink_nolock(search, hv);
132293d5637bSdormando         do_item_remove(search);
132393d5637bSdormando         assert(search->slabs_clsid == 0);
1324c58291ecSdormando     } else {
1325e708513aSdormando         s->seen++;
1326c58291ecSdormando         refcount_decr(&search->refcount);
1327e708513aSdormando         if (search->exptime == 0) {
1328e708513aSdormando             s->noexp++;
1329e708513aSdormando         } else if (search->exptime - current_time > 3599) {
1330e708513aSdormando             s->ttl_hourplus++;
1331e708513aSdormando         } else {
1332e708513aSdormando             rel_time_t ttl_remain = search->exptime - current_time;
1333e708513aSdormando             int bucket = ttl_remain / 60;
1334e708513aSdormando             s->histo[bucket]++;
1335e708513aSdormando         }
1336c58291ecSdormando     }
1337c58291ecSdormando }
1338c58291ecSdormando 
item_crawler_thread(void * arg)13390d1f505cSdormando static void *item_crawler_thread(void *arg) {
13400d1f505cSdormando     int i;
134187ff9dc0Sdormando     int crawls_persleep = settings.crawls_persleep;
13420d1f505cSdormando 
13439d635fa7Sdormando     pthread_mutex_lock(&lru_crawler_lock);
1344b6bf6df7Sdormando     pthread_cond_signal(&lru_crawler_cond);
1345b6bf6df7Sdormando     settings.lru_crawler = true;
13460d1f505cSdormando     if (settings.verbose > 2)
13470d1f505cSdormando         fprintf(stderr, "Starting LRU crawler background thread\n");
13480d1f505cSdormando     while (do_run_lru_crawler_thread) {
13496be2b6c0Sdormando     pthread_cond_wait(&lru_crawler_cond, &lru_crawler_lock);
13500d1f505cSdormando 
13510d1f505cSdormando     while (crawler_count) {
13520d1f505cSdormando         item *search = NULL;
13530d1f505cSdormando         void *hold_lock = NULL;
13540d1f505cSdormando 
13559bce42f2Sdormando         for (i = POWER_SMALLEST; i < LARGEST_ID; i++) {
1356c58291ecSdormando             if (crawlers[i].it_flags != 1) {
1357c58291ecSdormando                 continue;
1358c58291ecSdormando             }
13599bce42f2Sdormando             pthread_mutex_lock(&lru_locks[i]);
13602418511aSdormando             search = crawler_crawl_q((item *)&crawlers[i]);
1361e31a5912Sdormando             if (search == NULL ||
1362e31a5912Sdormando                 (crawlers[i].remaining && --crawlers[i].remaining < 1)) {
13630d1f505cSdormando                 if (settings.verbose > 2)
13640d1f505cSdormando                     fprintf(stderr, "Nothing left to crawl for %d\n", i);
13650d1f505cSdormando                 crawlers[i].it_flags = 0;
13660d1f505cSdormando                 crawler_count--;
13670d1f505cSdormando                 crawler_unlink_q((item *)&crawlers[i]);
13689bce42f2Sdormando                 pthread_mutex_unlock(&lru_locks[i]);
1369e708513aSdormando                 pthread_mutex_lock(&lru_crawler_stats_lock);
13704de89c8cSdormando                 crawlerstats[CLEAR_LRU(i)].end_time = current_time;
13714de89c8cSdormando                 crawlerstats[CLEAR_LRU(i)].run_complete = true;
1372e708513aSdormando                 pthread_mutex_unlock(&lru_crawler_stats_lock);
13730d1f505cSdormando                 continue;
13740d1f505cSdormando             }
1375cb9cafa6Sdormando             uint32_t hv = hash(ITEM_key(search), search->nkey);
13760d1f505cSdormando             /* Attempt to hash item lock the "search" item. If locked, no
13770d1f505cSdormando              * other callers can incr the refcount
13780d1f505cSdormando              */
13790d1f505cSdormando             if ((hold_lock = item_trylock(hv)) == NULL) {
13809bce42f2Sdormando                 pthread_mutex_unlock(&lru_locks[i]);
13810d1f505cSdormando                 continue;
13820d1f505cSdormando             }
13830d1f505cSdormando             /* Now see if the item is refcount locked */
13840d1f505cSdormando             if (refcount_incr(&search->refcount) != 2) {
13850d1f505cSdormando                 refcount_decr(&search->refcount);
13860d1f505cSdormando                 if (hold_lock)
13870d1f505cSdormando                     item_trylock_unlock(hold_lock);
13889bce42f2Sdormando                 pthread_mutex_unlock(&lru_locks[i]);
13890d1f505cSdormando                 continue;
13900d1f505cSdormando             }
13910d1f505cSdormando 
1392c58291ecSdormando             /* Frees the item or decrements the refcount. */
1393c58291ecSdormando             /* Interface for this could improve: do the free/decr here
1394c58291ecSdormando              * instead? */
1395e708513aSdormando             pthread_mutex_lock(&lru_crawler_stats_lock);
1396c58291ecSdormando             item_crawler_evaluate(search, hv, i);
1397e708513aSdormando             pthread_mutex_unlock(&lru_crawler_stats_lock);
13980d1f505cSdormando 
13990d1f505cSdormando             if (hold_lock)
14000d1f505cSdormando                 item_trylock_unlock(hold_lock);
14019bce42f2Sdormando             pthread_mutex_unlock(&lru_locks[i]);
1402c58291ecSdormando 
140387ff9dc0Sdormando             if (crawls_persleep <= 0 && settings.lru_crawler_sleep) {
140431d533f8Sdormando                 usleep(settings.lru_crawler_sleep);
140587ff9dc0Sdormando                 crawls_persleep = settings.crawls_persleep;
140687ff9dc0Sdormando             }
14070d1f505cSdormando         }
1408c58291ecSdormando     }
14090d1f505cSdormando     if (settings.verbose > 2)
14100d1f505cSdormando         fprintf(stderr, "LRU crawler thread sleeping\n");
14116be2b6c0Sdormando     STATS_LOCK();
1412cb01d504Sdormando     stats_state.lru_crawler_running = false;
14136be2b6c0Sdormando     STATS_UNLOCK();
14140d1f505cSdormando     }
14159d635fa7Sdormando     pthread_mutex_unlock(&lru_crawler_lock);
14160d1f505cSdormando     if (settings.verbose > 2)
14170d1f505cSdormando         fprintf(stderr, "LRU crawler thread stopping\n");
14180d1f505cSdormando 
14190d1f505cSdormando     return NULL;
14200d1f505cSdormando }
14210d1f505cSdormando 
14220d1f505cSdormando static pthread_t item_crawler_tid;
14230d1f505cSdormando 
stop_item_crawler_thread(void)14240d1f505cSdormando int stop_item_crawler_thread(void) {
14250d1f505cSdormando     int ret;
14266be2b6c0Sdormando     pthread_mutex_lock(&lru_crawler_lock);
14270d1f505cSdormando     do_run_lru_crawler_thread = 0;
14286be2b6c0Sdormando     pthread_cond_signal(&lru_crawler_cond);
14296be2b6c0Sdormando     pthread_mutex_unlock(&lru_crawler_lock);
14300d1f505cSdormando     if ((ret = pthread_join(item_crawler_tid, NULL)) != 0) {
14310d1f505cSdormando         fprintf(stderr, "Failed to stop LRU crawler thread: %s\n", strerror(ret));
14320d1f505cSdormando         return -1;
14330d1f505cSdormando     }
1434bb63a3edSdormando     settings.lru_crawler = false;
14350d1f505cSdormando     return 0;
14360d1f505cSdormando }
14370d1f505cSdormando 
1438b6bf6df7Sdormando /* Lock dance to "block" until thread is waiting on its condition:
1439b6bf6df7Sdormando  * caller locks mtx. caller spawns thread.
1440b6bf6df7Sdormando  * thread blocks on mutex.
1441b6bf6df7Sdormando  * caller waits on condition, releases lock.
1442b6bf6df7Sdormando  * thread gets lock, sends signal.
1443b6bf6df7Sdormando  * caller can't wait, as thread has lock.
1444b6bf6df7Sdormando  * thread waits on condition, releases lock
1445b6bf6df7Sdormando  * caller wakes on condition, gets lock.
1446b6bf6df7Sdormando  * caller immediately releases lock.
1447b6bf6df7Sdormando  * thread is now safely waiting on condition before the caller returns.
1448b6bf6df7Sdormando  */
start_item_crawler_thread(void)14490d1f505cSdormando int start_item_crawler_thread(void) {
14500d1f505cSdormando     int ret;
14516be2b6c0Sdormando 
1452bb63a3edSdormando     if (settings.lru_crawler)
1453bb63a3edSdormando         return -1;
14546be2b6c0Sdormando     pthread_mutex_lock(&lru_crawler_lock);
14550d1f505cSdormando     do_run_lru_crawler_thread = 1;
14560d1f505cSdormando     if ((ret = pthread_create(&item_crawler_tid, NULL,
14570d1f505cSdormando         item_crawler_thread, NULL)) != 0) {
14580d1f505cSdormando         fprintf(stderr, "Can't create LRU crawler thread: %s\n",
14590d1f505cSdormando             strerror(ret));
14606be2b6c0Sdormando         pthread_mutex_unlock(&lru_crawler_lock);
14610d1f505cSdormando         return -1;
14620d1f505cSdormando     }
1463b6bf6df7Sdormando     /* Avoid returning until the crawler has actually started */
1464b6bf6df7Sdormando     pthread_cond_wait(&lru_crawler_cond, &lru_crawler_lock);
14656be2b6c0Sdormando     pthread_mutex_unlock(&lru_crawler_lock);
14666be2b6c0Sdormando 
14676be2b6c0Sdormando     return 0;
14686be2b6c0Sdormando }
14696be2b6c0Sdormando 
147087ff9dc0Sdormando /* 'remaining' is passed in so the LRU maintainer thread can scrub the whole
147187ff9dc0Sdormando  * LRU every time.
147287ff9dc0Sdormando  */
do_lru_crawler_start(uint32_t id,uint32_t remaining)147387ff9dc0Sdormando static int do_lru_crawler_start(uint32_t id, uint32_t remaining) {
147487ff9dc0Sdormando     int i;
147587ff9dc0Sdormando     uint32_t sid;
147687ff9dc0Sdormando     uint32_t tocrawl[3];
147787ff9dc0Sdormando     int starts = 0;
147887ff9dc0Sdormando     tocrawl[0] = id | HOT_LRU;
147987ff9dc0Sdormando     tocrawl[1] = id | WARM_LRU;
148087ff9dc0Sdormando     tocrawl[2] = id | COLD_LRU;
148187ff9dc0Sdormando 
148287ff9dc0Sdormando     for (i = 0; i < 3; i++) {
148387ff9dc0Sdormando         sid = tocrawl[i];
148487ff9dc0Sdormando         pthread_mutex_lock(&lru_locks[sid]);
148587ff9dc0Sdormando         if (tails[sid] != NULL) {
148687ff9dc0Sdormando             if (settings.verbose > 2)
1487f85592dcSRyan McCullagh                 fprintf(stderr, "Kicking LRU crawler off for LRU %u\n", sid);
148887ff9dc0Sdormando             crawlers[sid].nbytes = 0;
148987ff9dc0Sdormando             crawlers[sid].nkey = 0;
149087ff9dc0Sdormando             crawlers[sid].it_flags = 1; /* For a crawler, this means enabled. */
149187ff9dc0Sdormando             crawlers[sid].next = 0;
149287ff9dc0Sdormando             crawlers[sid].prev = 0;
149387ff9dc0Sdormando             crawlers[sid].time = 0;
149487ff9dc0Sdormando             crawlers[sid].remaining = remaining;
149587ff9dc0Sdormando             crawlers[sid].slabs_clsid = sid;
149687ff9dc0Sdormando             crawler_link_q((item *)&crawlers[sid]);
149787ff9dc0Sdormando             crawler_count++;
149887ff9dc0Sdormando             starts++;
149987ff9dc0Sdormando         }
150087ff9dc0Sdormando         pthread_mutex_unlock(&lru_locks[sid]);
150187ff9dc0Sdormando     }
150287ff9dc0Sdormando     if (starts) {
1503c10feb9eSdormando         STATS_LOCK();
1504cb01d504Sdormando         stats_state.lru_crawler_running = true;
1505c10feb9eSdormando         stats.lru_crawler_starts++;
1506c10feb9eSdormando         STATS_UNLOCK();
150787ff9dc0Sdormando         pthread_mutex_lock(&lru_crawler_stats_lock);
150887ff9dc0Sdormando         memset(&crawlerstats[id], 0, sizeof(crawlerstats_t));
150987ff9dc0Sdormando         crawlerstats[id].start_time = current_time;
151087ff9dc0Sdormando         pthread_mutex_unlock(&lru_crawler_stats_lock);
151187ff9dc0Sdormando     }
151287ff9dc0Sdormando     return starts;
151387ff9dc0Sdormando }
151487ff9dc0Sdormando 
lru_crawler_start(uint32_t id,uint32_t remaining)151587ff9dc0Sdormando static int lru_crawler_start(uint32_t id, uint32_t remaining) {
151687ff9dc0Sdormando     int starts;
151787ff9dc0Sdormando     if (pthread_mutex_trylock(&lru_crawler_lock) != 0) {
151887ff9dc0Sdormando         return 0;
151987ff9dc0Sdormando     }
152087ff9dc0Sdormando     starts = do_lru_crawler_start(id, remaining);
1521c10feb9eSdormando     if (starts) {
1522c10feb9eSdormando         pthread_cond_signal(&lru_crawler_cond);
1523c10feb9eSdormando     }
152487ff9dc0Sdormando     pthread_mutex_unlock(&lru_crawler_lock);
152587ff9dc0Sdormando     return starts;
152687ff9dc0Sdormando }
152787ff9dc0Sdormando 
1528e708513aSdormando /* FIXME: Split this into two functions: one to kick a crawler for a sid, and one to
1529e708513aSdormando  * parse the string. LRU maintainer code is generating a string to set up a
1530e708513aSdormando  * sid.
1531e708513aSdormando  * Also only clear the crawlerstats once per sid.
1532e708513aSdormando  */
lru_crawler_crawl(char * slabs)1533e8711e1bSdormando enum crawler_result_type lru_crawler_crawl(char *slabs) {
1534e8711e1bSdormando     char *b = NULL;
1535e8711e1bSdormando     uint32_t sid = 0;
1536e708513aSdormando     int starts = 0;
153787ff9dc0Sdormando     uint8_t tocrawl[MAX_NUMBER_OF_SLAB_CLASSES];
15386be2b6c0Sdormando     if (pthread_mutex_trylock(&lru_crawler_lock) != 0) {
15396be2b6c0Sdormando         return CRAWLER_RUNNING;
15406be2b6c0Sdormando     }
1541e8711e1bSdormando 
154287ff9dc0Sdormando     /* FIXME: I added this while debugging. Don't think it's needed? */
154387ff9dc0Sdormando     memset(tocrawl, 0, sizeof(uint8_t) * MAX_NUMBER_OF_SLAB_CLASSES);
1544f89e8613Sdormando     if (strcmp(slabs, "all") == 0) {
154587ff9dc0Sdormando         for (sid = 0; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) {
1546f89e8613Sdormando             tocrawl[sid] = 1;
1547f89e8613Sdormando         }
1548f89e8613Sdormando     } else {
1549e8711e1bSdormando         for (char *p = strtok_r(slabs, ",", &b);
1550e8711e1bSdormando              p != NULL;
1551e8711e1bSdormando              p = strtok_r(NULL, ",", &b)) {
1552e8711e1bSdormando 
1553e8711e1bSdormando             if (!safe_strtoul(p, &sid) || sid < POWER_SMALLEST
1554a2fc8e93Sdormando                     || sid >= MAX_NUMBER_OF_SLAB_CLASSES-1) {
1555e8711e1bSdormando                 pthread_mutex_unlock(&lru_crawler_lock);
15566be2b6c0Sdormando                 return CRAWLER_BADCLASS;
15576be2b6c0Sdormando             }
155887ff9dc0Sdormando             tocrawl[sid] = 1;
1559e8711e1bSdormando         }
1560f89e8613Sdormando     }
1561e8711e1bSdormando 
156287ff9dc0Sdormando     for (sid = POWER_SMALLEST; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) {
156387ff9dc0Sdormando         if (tocrawl[sid])
156487ff9dc0Sdormando             starts += do_lru_crawler_start(sid, settings.lru_crawler_tocrawl);
156569d1c699Sdormando     }
1566e708513aSdormando     if (starts) {
15676be2b6c0Sdormando         pthread_cond_signal(&lru_crawler_cond);
15686be2b6c0Sdormando         pthread_mutex_unlock(&lru_crawler_lock);
15696be2b6c0Sdormando         return CRAWLER_OK;
1570e708513aSdormando     } else {
1571e708513aSdormando         pthread_mutex_unlock(&lru_crawler_lock);
1572e708513aSdormando         return CRAWLER_NOTSTARTED;
1573e708513aSdormando     }
15746be2b6c0Sdormando }
15756be2b6c0Sdormando 
15766af7aa0bSdormando /* If we hold this lock, crawler can't wake up or move */
lru_crawler_pause(void)15776af7aa0bSdormando void lru_crawler_pause(void) {
15786af7aa0bSdormando     pthread_mutex_lock(&lru_crawler_lock);
15796af7aa0bSdormando }
15806af7aa0bSdormando 
lru_crawler_resume(void)15816af7aa0bSdormando void lru_crawler_resume(void) {
15826af7aa0bSdormando     pthread_mutex_unlock(&lru_crawler_lock);
15836af7aa0bSdormando }
15846af7aa0bSdormando 
init_lru_crawler(void)15856be2b6c0Sdormando int init_lru_crawler(void) {
15866be2b6c0Sdormando     if (lru_crawler_initialized == 0) {
1587e708513aSdormando         memset(&crawlerstats, 0, sizeof(crawlerstats_t) * MAX_NUMBER_OF_SLAB_CLASSES);
15886be2b6c0Sdormando         if (pthread_cond_init(&lru_crawler_cond, NULL) != 0) {
15896be2b6c0Sdormando             fprintf(stderr, "Can't initialize lru crawler condition\n");
15906be2b6c0Sdormando             return -1;
15916be2b6c0Sdormando         }
15926be2b6c0Sdormando         pthread_mutex_init(&lru_crawler_lock, NULL);
15936be2b6c0Sdormando         lru_crawler_initialized = 1;
15946be2b6c0Sdormando     }
15950d1f505cSdormando     return 0;
15960d1f505cSdormando }
1597