1f6d334e0SBrad Fitzpatrick /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
256b8339eSSteven Grimm #include "memcached.h"
360d70942SAnatoly Vorobey #include <sys/stat.h>
460d70942SAnatoly Vorobey #include <sys/socket.h>
560d70942SAnatoly Vorobey #include <sys/resource.h>
660d70942SAnatoly Vorobey #include <fcntl.h>
7c6975ef4SPaul Lindner #include <netinet/in.h>
8c6975ef4SPaul Lindner #include <errno.h>
960d70942SAnatoly Vorobey #include <stdlib.h>
1060d70942SAnatoly Vorobey #include <stdio.h>
11e688e97dSNatanael Copa #include <signal.h>
1260d70942SAnatoly Vorobey #include <string.h>
131b533267SBrad Fitzpatrick #include <time.h>
1443202f2fSBrad Fitzpatrick #include <assert.h>
150d1f505cSdormando #include <unistd.h>
1686969ea4SBrad Fitzpatrick
1777dde9f9SPaul Lindner /* Forward Declarations */
1877dde9f9SPaul Lindner static void item_link_q(item *it);
1977dde9f9SPaul Lindner static void item_unlink_q(item *it);
2077dde9f9SPaul Lindner
219bce42f2Sdormando #define HOT_LRU 0
229bce42f2Sdormando #define WARM_LRU 64
239bce42f2Sdormando #define COLD_LRU 128
249bce42f2Sdormando #define NOEXP_LRU 192
25bbef930eSdormando static unsigned int lru_type_map[4] = {HOT_LRU, WARM_LRU, COLD_LRU, NOEXP_LRU};
269bce42f2Sdormando
274de89c8cSdormando #define CLEAR_LRU(id) (id & ~(3<<6))
284de89c8cSdormando
29fbdc57c8SDustin Sallings #define LARGEST_ID POWER_LARGEST
305e0bb476Sdormando typedef struct {
31b7ba2969Sdormando uint64_t evicted;
32b7ba2969Sdormando uint64_t evicted_nonzero;
33b7ba2969Sdormando uint64_t reclaimed;
34b7ba2969Sdormando uint64_t outofmemory;
35b7ba2969Sdormando uint64_t tailrepairs;
36cb01d504Sdormando uint64_t expired_unfetched; /* items reclaimed but never touched */
37cb01d504Sdormando uint64_t evicted_unfetched; /* items evicted but never touched */
38649f7f01Sdormando uint64_t crawler_reclaimed;
39c10feb9eSdormando uint64_t crawler_items_checked;
402af4f6e6Sdormando uint64_t lrutail_reflocked;
41f7bf26cbSdormando uint64_t moves_to_cold;
42f7bf26cbSdormando uint64_t moves_to_warm;
43f7bf26cbSdormando uint64_t moves_within_lru;
44a0390847Sdormando uint64_t direct_reclaims;
45f7bf26cbSdormando rel_time_t evicted_time;
465e0bb476Sdormando } itemstats_t;
475e0bb476Sdormando
48e708513aSdormando typedef struct {
49a66b28acSdormando uint64_t histo[61];
50e708513aSdormando uint64_t ttl_hourplus;
51e708513aSdormando uint64_t noexp;
52e708513aSdormando uint64_t reclaimed;
53e708513aSdormando uint64_t seen;
54e708513aSdormando rel_time_t start_time;
55e708513aSdormando rel_time_t end_time;
56e708513aSdormando bool run_complete;
57e708513aSdormando } crawlerstats_t;
58e708513aSdormando
5960d70942SAnatoly Vorobey static item *heads[LARGEST_ID];
6060d70942SAnatoly Vorobey static item *tails[LARGEST_ID];
610d1f505cSdormando static crawler crawlers[LARGEST_ID];
625e0bb476Sdormando static itemstats_t itemstats[LARGEST_ID];
6377dde9f9SPaul Lindner static unsigned int sizes[LARGEST_ID];
64ee461d11Sdormando static uint64_t sizes_bytes[LARGEST_ID];
65ae6f4267Sdormando static unsigned int *stats_sizes_hist = NULL;
668d82383fSdormando static uint64_t stats_sizes_cas_min = 0;
67ae6f4267Sdormando static int stats_sizes_buckets = 0;
68e708513aSdormando static crawlerstats_t crawlerstats[MAX_NUMBER_OF_SLAB_CLASSES];
6986969ea4SBrad Fitzpatrick
706be2b6c0Sdormando static int crawler_count = 0;
710d1f505cSdormando static volatile int do_run_lru_crawler_thread = 0;
72fb269897Sdormando static volatile int do_run_lru_maintainer_thread = 0;
736be2b6c0Sdormando static int lru_crawler_initialized = 0;
74fb269897Sdormando static int lru_maintainer_initialized = 0;
75fb269897Sdormando static int lru_maintainer_check_clsid = 0;
766be2b6c0Sdormando static pthread_mutex_t lru_crawler_lock = PTHREAD_MUTEX_INITIALIZER;
776be2b6c0Sdormando static pthread_cond_t lru_crawler_cond = PTHREAD_COND_INITIALIZER;
78e708513aSdormando static pthread_mutex_t lru_crawler_stats_lock = PTHREAD_MUTEX_INITIALIZER;
79fb269897Sdormando static pthread_mutex_t lru_maintainer_lock = PTHREAD_MUTEX_INITIALIZER;
8069d1c699Sdormando static pthread_mutex_t cas_id_lock = PTHREAD_MUTEX_INITIALIZER;
81ae6f4267Sdormando static pthread_mutex_t stats_sizes_lock = PTHREAD_MUTEX_INITIALIZER;
820d1f505cSdormando
item_stats_reset(void)8353180103STrond Norbye void item_stats_reset(void) {
849bce42f2Sdormando int i;
859bce42f2Sdormando for (i = 0; i < LARGEST_ID; i++) {
869bce42f2Sdormando pthread_mutex_lock(&lru_locks[i]);
879bce42f2Sdormando memset(&itemstats[i], 0, sizeof(itemstats_t));
889bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
899bce42f2Sdormando }
9053180103STrond Norbye }
9153180103STrond Norbye
92a0390847Sdormando static int lru_pull_tail(const int orig_id, const int cur_lru,
93*690a9a9dSEiichi Tsukata const uint64_t total_bytes, const bool do_evict);
9487ff9dc0Sdormando static int lru_crawler_start(uint32_t id, uint32_t remaining);
9553180103STrond Norbye
96e4a45965SDustin Sallings /* Get the next CAS id for a new item. */
9769d1c699Sdormando /* TODO: refactor some atomics for this. */
get_cas_id(void)98df1b7e42STrond Norbye uint64_t get_cas_id(void) {
99579466f2Sdormando static uint64_t cas_id = 0;
10069d1c699Sdormando pthread_mutex_lock(&cas_id_lock);
10169d1c699Sdormando uint64_t next_id = ++cas_id;
10269d1c699Sdormando pthread_mutex_unlock(&cas_id_lock);
10369d1c699Sdormando return next_id;
104e4a45965SDustin Sallings }
105e4a45965SDustin Sallings
item_is_flushed(item * it)106004e2211Sdormando int item_is_flushed(item *it) {
10790593dcaSdormando rel_time_t oldest_live = settings.oldest_live;
10890593dcaSdormando uint64_t cas = ITEM_get_cas(it);
10990593dcaSdormando uint64_t oldest_cas = settings.oldest_cas;
11090593dcaSdormando if (oldest_live == 0 || oldest_live > current_time)
11190593dcaSdormando return 0;
11290593dcaSdormando if ((it->time <= oldest_live)
11390593dcaSdormando || (oldest_cas != 0 && cas != 0 && cas < oldest_cas)) {
11490593dcaSdormando return 1;
11590593dcaSdormando }
11690593dcaSdormando return 0;
11790593dcaSdormando }
11890593dcaSdormando
noexp_lru_size(int slabs_clsid)1194de89c8cSdormando static unsigned int noexp_lru_size(int slabs_clsid) {
1204de89c8cSdormando int id = CLEAR_LRU(slabs_clsid);
121d6e96467Sdormando id |= NOEXP_LRU;
1224de89c8cSdormando unsigned int ret;
1234de89c8cSdormando pthread_mutex_lock(&lru_locks[id]);
124ee461d11Sdormando ret = sizes_bytes[id];
1254de89c8cSdormando pthread_mutex_unlock(&lru_locks[id]);
1264de89c8cSdormando return ret;
1274de89c8cSdormando }
1284de89c8cSdormando
12956b8339eSSteven Grimm /* Enable this for reference-count debugging. */
13056b8339eSSteven Grimm #if 0
13156b8339eSSteven Grimm # define DEBUG_REFCNT(it,op) \
13256b8339eSSteven Grimm fprintf(stderr, "item %x refcnt(%c) %d %c%c%c\n", \
13356b8339eSSteven Grimm it, op, it->refcount, \
13456b8339eSSteven Grimm (it->it_flags & ITEM_LINKED) ? 'L' : ' ', \
1355da8dbabSTrond Norbye (it->it_flags & ITEM_SLABBED) ? 'S' : ' ')
13656b8339eSSteven Grimm #else
13756b8339eSSteven Grimm # define DEBUG_REFCNT(it,op) while(0)
13856b8339eSSteven Grimm #endif
13986969ea4SBrad Fitzpatrick
14044ef96eeSPaul Lindner /**
141c9607c6dSBrad Fitzpatrick * Generates the variable-sized part of the header for an object.
142c9607c6dSBrad Fitzpatrick *
143217dcce0SSteven Grimm * key - The key
144217dcce0SSteven Grimm * nkey - The length of the key
145217dcce0SSteven Grimm * flags - key flags
146217dcce0SSteven Grimm * nbytes - Number of bytes to hold value and addition CRLF terminator
147c9607c6dSBrad Fitzpatrick * suffix - Buffer for the "VALUE" line suffix (flags, size).
148c9607c6dSBrad Fitzpatrick * nsuffix - The length of the suffix is stored here.
149c9607c6dSBrad Fitzpatrick *
150c9607c6dSBrad Fitzpatrick * Returns the total size of the header.
151c9607c6dSBrad Fitzpatrick */
item_make_header(const uint8_t nkey,const unsigned int flags,const int nbytes,char * suffix,uint8_t * nsuffix)152181ef834Sdormando static size_t item_make_header(const uint8_t nkey, const unsigned int flags, const int nbytes,
15377dde9f9SPaul Lindner char *suffix, uint8_t *nsuffix) {
15477dde9f9SPaul Lindner /* suffix is defined at 40 chars elsewhere.. */
155181ef834Sdormando *nsuffix = (uint8_t) snprintf(suffix, 40, " %u %d\r\n", flags, nbytes - 2);
156217dcce0SSteven Grimm return sizeof(item) + nkey + *nsuffix + nbytes;
157c9607c6dSBrad Fitzpatrick }
15886969ea4SBrad Fitzpatrick
do_item_alloc(char * key,const size_t nkey,const unsigned int flags,const rel_time_t exptime,const int nbytes)159181ef834Sdormando item *do_item_alloc(char *key, const size_t nkey, const unsigned int flags,
160*690a9a9dSEiichi Tsukata const rel_time_t exptime, const int nbytes) {
161a0390847Sdormando int i;
16277dde9f9SPaul Lindner uint8_t nsuffix;
163d3807d06STrond Norbye item *it = NULL;
164c9607c6dSBrad Fitzpatrick char suffix[40];
16578955139STim Yardley size_t ntotal = item_make_header(nkey + 1, flags, nbytes, suffix, &nsuffix);
166eda68b70STrond Norbye if (settings.use_cas) {
167eda68b70STrond Norbye ntotal += sizeof(uint64_t);
168eda68b70STrond Norbye }
16986969ea4SBrad Fitzpatrick
17078955139STim Yardley unsigned int id = slabs_clsid(ntotal);
17160d70942SAnatoly Vorobey if (id == 0)
17260d70942SAnatoly Vorobey return 0;
17386969ea4SBrad Fitzpatrick
174a0390847Sdormando /* If no memory is available, attempt a direct LRU juggle/eviction */
175f5f8f1a0Sdormando /* This is a race in order to simplify lru_pull_tail; in cases where
176a0390847Sdormando * locked items are on the tail, you want them to fall out and cause
177a0390847Sdormando * occasional OOM's, rather than internally work around them.
178f5f8f1a0Sdormando * This also gives one fewer code path for slab alloc/free
179a0390847Sdormando */
180ee461d11Sdormando /* TODO: if power_largest, try a lot more times? or a number of times
18151a828b9Sdormando * based on how many chunks the new object should take up?
18251a828b9Sdormando * or based on the size of an object lru_pull_tail() says it evicted?
18351a828b9Sdormando * This is a classical GC problem if "large items" are of too varying of
18451a828b9Sdormando * sizes. This is actually okay here since the larger the data, the more
18551a828b9Sdormando * bandwidth it takes, the more time we can loop in comparison to serving
18651a828b9Sdormando * and replacing small items.
18751a828b9Sdormando */
18851a828b9Sdormando for (i = 0; i < 10; i++) {
189ee461d11Sdormando uint64_t total_bytes;
190d9edfefdSdormando /* Try to reclaim memory first */
191d9edfefdSdormando if (!settings.lru_maintainer_thread) {
192*690a9a9dSEiichi Tsukata lru_pull_tail(id, COLD_LRU, 0, false);
193d9edfefdSdormando }
194ee461d11Sdormando it = slabs_alloc(ntotal, id, &total_bytes, 0);
195ee461d11Sdormando
1964de89c8cSdormando if (settings.expirezero_does_not_evict)
197ee461d11Sdormando total_bytes -= noexp_lru_size(id);
198ee461d11Sdormando
199a0390847Sdormando if (it == NULL) {
200d9edfefdSdormando if (settings.lru_maintainer_thread) {
201*690a9a9dSEiichi Tsukata lru_pull_tail(id, HOT_LRU, total_bytes, false);
202*690a9a9dSEiichi Tsukata lru_pull_tail(id, WARM_LRU, total_bytes, false);
203*690a9a9dSEiichi Tsukata if (lru_pull_tail(id, COLD_LRU, total_bytes, true) <= 0)
20451a828b9Sdormando break;
205a0390847Sdormando } else {
206*690a9a9dSEiichi Tsukata if (lru_pull_tail(id, COLD_LRU, 0, true) <= 0)
20751a828b9Sdormando break;
208d9edfefdSdormando }
209d9edfefdSdormando } else {
210a0390847Sdormando break;
211a0390847Sdormando }
212a0390847Sdormando }
213a0390847Sdormando
214a0390847Sdormando if (i > 0) {
215a0390847Sdormando pthread_mutex_lock(&lru_locks[id]);
216a0390847Sdormando itemstats[id].direct_reclaims += i;
217a0390847Sdormando pthread_mutex_unlock(&lru_locks[id]);
218a0390847Sdormando }
2199bce42f2Sdormando
2209bce42f2Sdormando if (it == NULL) {
2219bce42f2Sdormando pthread_mutex_lock(&lru_locks[id]);
2229bce42f2Sdormando itemstats[id].outofmemory++;
2239bce42f2Sdormando pthread_mutex_unlock(&lru_locks[id]);
2249bce42f2Sdormando return NULL;
2259bce42f2Sdormando }
2269bce42f2Sdormando
2279bce42f2Sdormando assert(it->slabs_clsid == 0);
2289bce42f2Sdormando //assert(it != heads[id]);
2299bce42f2Sdormando
23062415f16Sdormando /* Refcount is seeded to 1 by slabs_alloc() */
2310567967aSdormando it->next = it->prev = 0;
2320567967aSdormando
2339bce42f2Sdormando /* Items are initially loaded into the HOT_LRU. This is '0' but I want at
2349bce42f2Sdormando * least a note here. Compiler (hopefully?) optimizes this out.
2359bce42f2Sdormando */
236d9edfefdSdormando if (settings.lru_maintainer_thread) {
2374de89c8cSdormando if (exptime == 0 && settings.expirezero_does_not_evict) {
2384de89c8cSdormando id |= NOEXP_LRU;
2394de89c8cSdormando } else {
2409bce42f2Sdormando id |= HOT_LRU;
2414de89c8cSdormando }
242d9edfefdSdormando } else {
243d9edfefdSdormando /* There is only COLD in compat-mode */
244d9edfefdSdormando id |= COLD_LRU;
245d9edfefdSdormando }
2469bce42f2Sdormando it->slabs_clsid = id;
2479bce42f2Sdormando
2489bce42f2Sdormando DEBUG_REFCNT(it, '*');
2490567967aSdormando it->it_flags |= settings.use_cas ? ITEM_CAS : 0;
2509bce42f2Sdormando it->nkey = nkey;
2519bce42f2Sdormando it->nbytes = nbytes;
2529bce42f2Sdormando memcpy(ITEM_key(it), key, nkey);
2539bce42f2Sdormando it->exptime = exptime;
2549bce42f2Sdormando memcpy(ITEM_suffix(it), suffix, (size_t)nsuffix);
2559bce42f2Sdormando it->nsuffix = nsuffix;
2560567967aSdormando
2570567967aSdormando /* Need to shuffle the pointer stored in h_next into it->data. */
2580567967aSdormando if (it->it_flags & ITEM_CHUNKED) {
2590567967aSdormando item_chunk *chunk = (item_chunk *) ITEM_data(it);
2600567967aSdormando
2610567967aSdormando chunk->next = (item_chunk *) it->h_next;
2620567967aSdormando chunk->prev = 0;
2630567967aSdormando chunk->head = it;
2640567967aSdormando /* Need to chain back into the head's chunk */
2650567967aSdormando chunk->next->prev = chunk;
2660567967aSdormando chunk->size = chunk->next->size - ((char *)chunk - (char *)it);
2670567967aSdormando chunk->used = 0;
2680567967aSdormando assert(chunk->size > 0);
2690567967aSdormando }
2700567967aSdormando it->h_next = 0;
2710567967aSdormando
2729bce42f2Sdormando return it;
2739bce42f2Sdormando }
2749bce42f2Sdormando
item_free(item * it)27560d70942SAnatoly Vorobey void item_free(item *it) {
27677dde9f9SPaul Lindner size_t ntotal = ITEM_ntotal(it);
2778d3ac826Sdormando unsigned int clsid;
27843202f2fSBrad Fitzpatrick assert((it->it_flags & ITEM_LINKED) == 0);
2791baa8961SBrad Fitzpatrick assert(it != heads[it->slabs_clsid]);
2801baa8961SBrad Fitzpatrick assert(it != tails[it->slabs_clsid]);
281c08383afSBrad Fitzpatrick assert(it->refcount == 0);
28286969ea4SBrad Fitzpatrick
283b7b9d5f4SBrad Fitzpatrick /* so slab size changer can tell later if item is already free or not */
2849bce42f2Sdormando clsid = ITEM_clsid(it);
28556b8339eSSteven Grimm DEBUG_REFCNT(it, 'F');
2868d3ac826Sdormando slabs_free(it, ntotal, clsid);
28760d70942SAnatoly Vorobey }
28886969ea4SBrad Fitzpatrick
28944ef96eeSPaul Lindner /**
290c9607c6dSBrad Fitzpatrick * Returns true if an item will fit in the cache (its size does not exceed
291c9607c6dSBrad Fitzpatrick * the maximum for a cache entry.)
292c9607c6dSBrad Fitzpatrick */
item_size_ok(const size_t nkey,const int flags,const int nbytes)29310862f60SPaul Lindner bool item_size_ok(const size_t nkey, const int flags, const int nbytes) {
294c9607c6dSBrad Fitzpatrick char prefix[40];
29577dde9f9SPaul Lindner uint8_t nsuffix;
29686969ea4SBrad Fitzpatrick
297911c9d0aSTrond Norbye size_t ntotal = item_make_header(nkey + 1, flags, nbytes,
298911c9d0aSTrond Norbye prefix, &nsuffix);
299911c9d0aSTrond Norbye if (settings.use_cas) {
300911c9d0aSTrond Norbye ntotal += sizeof(uint64_t);
301911c9d0aSTrond Norbye }
302911c9d0aSTrond Norbye
303911c9d0aSTrond Norbye return slabs_clsid(ntotal) != 0;
304c9607c6dSBrad Fitzpatrick }
30586969ea4SBrad Fitzpatrick
do_item_link_q(item * it)3069bce42f2Sdormando static void do_item_link_q(item *it) { /* item is the new head */
30760d70942SAnatoly Vorobey item **head, **tail;
30854326f42SBrad Fitzpatrick assert((it->it_flags & ITEM_SLABBED) == 0);
30986969ea4SBrad Fitzpatrick
31060d70942SAnatoly Vorobey head = &heads[it->slabs_clsid];
31160d70942SAnatoly Vorobey tail = &tails[it->slabs_clsid];
31243202f2fSBrad Fitzpatrick assert(it != *head);
31343202f2fSBrad Fitzpatrick assert((*head && *tail) || (*head == 0 && *tail == 0));
31460d70942SAnatoly Vorobey it->prev = 0;
31560d70942SAnatoly Vorobey it->next = *head;
31660d70942SAnatoly Vorobey if (it->next) it->next->prev = it;
31760d70942SAnatoly Vorobey *head = it;
3186d74e134SBrad Fitzpatrick if (*tail == 0) *tail = it;
31960d70942SAnatoly Vorobey sizes[it->slabs_clsid]++;
320ee461d11Sdormando sizes_bytes[it->slabs_clsid] += it->nbytes;
32160d70942SAnatoly Vorobey return;
32260d70942SAnatoly Vorobey }
32386969ea4SBrad Fitzpatrick
item_link_q(item * it)3249bce42f2Sdormando static void item_link_q(item *it) {
3259bce42f2Sdormando pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
3269bce42f2Sdormando do_item_link_q(it);
3279bce42f2Sdormando pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
3289bce42f2Sdormando }
3299bce42f2Sdormando
do_item_unlink_q(item * it)3309bce42f2Sdormando static void do_item_unlink_q(item *it) {
33160d70942SAnatoly Vorobey item **head, **tail;
33260d70942SAnatoly Vorobey head = &heads[it->slabs_clsid];
33360d70942SAnatoly Vorobey tail = &tails[it->slabs_clsid];
33486969ea4SBrad Fitzpatrick
335c3b1a8c9SBrad Fitzpatrick if (*head == it) {
336c3b1a8c9SBrad Fitzpatrick assert(it->prev == 0);
337c3b1a8c9SBrad Fitzpatrick *head = it->next;
338c3b1a8c9SBrad Fitzpatrick }
339c3b1a8c9SBrad Fitzpatrick if (*tail == it) {
340c3b1a8c9SBrad Fitzpatrick assert(it->next == 0);
341c3b1a8c9SBrad Fitzpatrick *tail = it->prev;
342c3b1a8c9SBrad Fitzpatrick }
343c3b1a8c9SBrad Fitzpatrick assert(it->next != it);
344c3b1a8c9SBrad Fitzpatrick assert(it->prev != it);
34586969ea4SBrad Fitzpatrick
34660d70942SAnatoly Vorobey if (it->next) it->next->prev = it->prev;
34760d70942SAnatoly Vorobey if (it->prev) it->prev->next = it->next;
34860d70942SAnatoly Vorobey sizes[it->slabs_clsid]--;
349ee461d11Sdormando sizes_bytes[it->slabs_clsid] -= it->nbytes;
35060d70942SAnatoly Vorobey return;
35160d70942SAnatoly Vorobey }
35286969ea4SBrad Fitzpatrick
item_unlink_q(item * it)3539bce42f2Sdormando static void item_unlink_q(item *it) {
3549bce42f2Sdormando pthread_mutex_lock(&lru_locks[it->slabs_clsid]);
3559bce42f2Sdormando do_item_unlink_q(it);
3569bce42f2Sdormando pthread_mutex_unlock(&lru_locks[it->slabs_clsid]);
3579bce42f2Sdormando }
3589bce42f2Sdormando
do_item_link(item * it,const uint32_t hv)359bab9acd1Sdormando int do_item_link(item *it, const uint32_t hv) {
36080ec0955STrond Norbye MEMCACHED_ITEM_LINK(ITEM_key(it), it->nkey, it->nbytes);
36154326f42SBrad Fitzpatrick assert((it->it_flags & (ITEM_LINKED|ITEM_SLABBED)) == 0);
36260d70942SAnatoly Vorobey it->it_flags |= ITEM_LINKED;
363c9607c6dSBrad Fitzpatrick it->time = current_time;
36486969ea4SBrad Fitzpatrick
36556b8339eSSteven Grimm STATS_LOCK();
366cb01d504Sdormando stats_state.curr_bytes += ITEM_ntotal(it);
367cb01d504Sdormando stats_state.curr_items += 1;
36860d70942SAnatoly Vorobey stats.total_items += 1;
36956b8339eSSteven Grimm STATS_UNLOCK();
37086969ea4SBrad Fitzpatrick
371c2cdee22SDustin Sallings /* Allocate a new CAS ID on link. */
372eda68b70STrond Norbye ITEM_set_cas(it, (settings.use_cas) ? get_cas_id() : 0);
3738fe5bf1fSdormando assoc_insert(it, hv);
37460d70942SAnatoly Vorobey item_link_q(it);
37569d1c699Sdormando refcount_incr(&it->refcount);
376ae6f4267Sdormando item_stats_sizes_add(it);
37786969ea4SBrad Fitzpatrick
37860d70942SAnatoly Vorobey return 1;
37960d70942SAnatoly Vorobey }
38086969ea4SBrad Fitzpatrick
do_item_unlink(item * it,const uint32_t hv)381bab9acd1Sdormando void do_item_unlink(item *it, const uint32_t hv) {
38280ec0955STrond Norbye MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
38377dde9f9SPaul Lindner if ((it->it_flags & ITEM_LINKED) != 0) {
38460d70942SAnatoly Vorobey it->it_flags &= ~ITEM_LINKED;
38556b8339eSSteven Grimm STATS_LOCK();
386cb01d504Sdormando stats_state.curr_bytes -= ITEM_ntotal(it);
387cb01d504Sdormando stats_state.curr_items -= 1;
38856b8339eSSteven Grimm STATS_UNLOCK();
389ae6f4267Sdormando item_stats_sizes_remove(it);
3908fe5bf1fSdormando assoc_delete(ITEM_key(it), it->nkey, hv);
3918fe5bf1fSdormando item_unlink_q(it);
392f4983b20Sdormando do_item_remove(it);
3938fe5bf1fSdormando }
3948fe5bf1fSdormando }
3958fe5bf1fSdormando
39640b7b4b2Sdormando /* FIXME: Is it necessary to keep this copy/pasted code? */
do_item_unlink_nolock(item * it,const uint32_t hv)3978fe5bf1fSdormando void do_item_unlink_nolock(item *it, const uint32_t hv) {
3988fe5bf1fSdormando MEMCACHED_ITEM_UNLINK(ITEM_key(it), it->nkey, it->nbytes);
3998fe5bf1fSdormando if ((it->it_flags & ITEM_LINKED) != 0) {
4008fe5bf1fSdormando it->it_flags &= ~ITEM_LINKED;
4018fe5bf1fSdormando STATS_LOCK();
402cb01d504Sdormando stats_state.curr_bytes -= ITEM_ntotal(it);
403cb01d504Sdormando stats_state.curr_items -= 1;
4048fe5bf1fSdormando STATS_UNLOCK();
405ae6f4267Sdormando item_stats_sizes_remove(it);
406bab9acd1Sdormando assoc_delete(ITEM_key(it), it->nkey, hv);
4079bce42f2Sdormando do_item_unlink_q(it);
408f4983b20Sdormando do_item_remove(it);
40960d70942SAnatoly Vorobey }
41056b8339eSSteven Grimm }
41186969ea4SBrad Fitzpatrick
do_item_remove(item * it)41256b8339eSSteven Grimm void do_item_remove(item *it) {
41380ec0955STrond Norbye MEMCACHED_ITEM_REMOVE(ITEM_key(it), it->nkey, it->nbytes);
41454326f42SBrad Fitzpatrick assert((it->it_flags & ITEM_SLABBED) == 0);
415cb9c269bSdormando assert(it->refcount > 0);
416193a653eSdormando
4173b961388Sdormando if (refcount_decr(&it->refcount) == 0) {
41860d70942SAnatoly Vorobey item_free(it);
41960d70942SAnatoly Vorobey }
42060d70942SAnatoly Vorobey }
42186969ea4SBrad Fitzpatrick
422f2a4e5b4Sdormando /* Copy/paste to avoid adding two extra branches for all common calls, since
423f5f8f1a0Sdormando * _nolock is only used in an uncommon case where we want to relink. */
do_item_update_nolock(item * it)424fb269897Sdormando void do_item_update_nolock(item *it) {
425f2a4e5b4Sdormando MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
426f2a4e5b4Sdormando if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
427f2a4e5b4Sdormando assert((it->it_flags & ITEM_SLABBED) == 0);
428f2a4e5b4Sdormando
429f2a4e5b4Sdormando if ((it->it_flags & ITEM_LINKED) != 0) {
430fb269897Sdormando do_item_unlink_q(it);
431f2a4e5b4Sdormando it->time = current_time;
432fb269897Sdormando do_item_link_q(it);
433f2a4e5b4Sdormando }
434f2a4e5b4Sdormando }
435f2a4e5b4Sdormando }
436f2a4e5b4Sdormando
437f5f8f1a0Sdormando /* Bump the last accessed time, or relink if we're in compat mode */
do_item_update(item * it)43856b8339eSSteven Grimm void do_item_update(item *it) {
43980ec0955STrond Norbye MEMCACHED_ITEM_UPDATE(ITEM_key(it), it->nkey, it->nbytes);
440217dcce0SSteven Grimm if (it->time < current_time - ITEM_UPDATE_INTERVAL) {
44154326f42SBrad Fitzpatrick assert((it->it_flags & ITEM_SLABBED) == 0);
44286969ea4SBrad Fitzpatrick
443193a653eSdormando if ((it->it_flags & ITEM_LINKED) != 0) {
444c9607c6dSBrad Fitzpatrick it->time = current_time;
445d9edfefdSdormando if (!settings.lru_maintainer_thread) {
446a0390847Sdormando item_unlink_q(it);
44760d70942SAnatoly Vorobey item_link_q(it);
448d9edfefdSdormando }
449217dcce0SSteven Grimm }
45056b8339eSSteven Grimm }
451a0390847Sdormando }
45286969ea4SBrad Fitzpatrick
do_item_replace(item * it,item * new_it,const uint32_t hv)453bab9acd1Sdormando int do_item_replace(item *it, item *new_it, const uint32_t hv) {
45480ec0955STrond Norbye MEMCACHED_ITEM_REPLACE(ITEM_key(it), it->nkey, it->nbytes,
45580ec0955STrond Norbye ITEM_key(new_it), new_it->nkey, new_it->nbytes);
45654326f42SBrad Fitzpatrick assert((it->it_flags & ITEM_SLABBED) == 0);
45786969ea4SBrad Fitzpatrick
458bab9acd1Sdormando do_item_unlink(it, hv);
459bab9acd1Sdormando return do_item_link(new_it, hv);
46060d70942SAnatoly Vorobey }
46186969ea4SBrad Fitzpatrick
46277dde9f9SPaul Lindner /*@null@*/
46307c632f7Sdormando /* This is walking the line of violating lock order, but I think it's safe.
46407c632f7Sdormando * If the LRU lock is held, an item in the LRU cannot be wiped and freed.
46507c632f7Sdormando * The data could possibly be overwritten, but this is only accessing the
46607c632f7Sdormando * headers.
46707c632f7Sdormando * It may not be the best idea to leave it like this, but for now it's safe.
468f5f8f1a0Sdormando * FIXME: only dumps the hot LRU with the new LRU's.
46907c632f7Sdormando */
item_cachedump(const unsigned int slabs_clsid,const unsigned int limit,unsigned int * bytes)4709bce42f2Sdormando char *item_cachedump(const unsigned int slabs_clsid, const unsigned int limit, unsigned int *bytes) {
47144ef96eeSPaul Lindner unsigned int memlimit = 2 * 1024 * 1024; /* 2MB max response size */
47260d70942SAnatoly Vorobey char *buffer;
47377dde9f9SPaul Lindner unsigned int bufcurr;
47460d70942SAnatoly Vorobey item *it;
47544ef96eeSPaul Lindner unsigned int len;
47644ef96eeSPaul Lindner unsigned int shown = 0;
477ecdb0114SDustin Sallings char key_temp[KEY_MAX_LENGTH + 1];
4788f5960e0SBrad Fitzpatrick char temp[512];
479d9edfefdSdormando unsigned int id = slabs_clsid;
480d9edfefdSdormando if (!settings.lru_maintainer_thread)
481d9edfefdSdormando id |= COLD_LRU;
48286969ea4SBrad Fitzpatrick
483d9edfefdSdormando pthread_mutex_lock(&lru_locks[id]);
484d9edfefdSdormando it = heads[id];
48586969ea4SBrad Fitzpatrick
48677dde9f9SPaul Lindner buffer = malloc((size_t)memlimit);
4879bce42f2Sdormando if (buffer == 0) {
4889bce42f2Sdormando return NULL;
4899bce42f2Sdormando }
49060d70942SAnatoly Vorobey bufcurr = 0;
49186969ea4SBrad Fitzpatrick
49277dde9f9SPaul Lindner while (it != NULL && (limit == 0 || shown < limit)) {
493ecdb0114SDustin Sallings assert(it->nkey <= KEY_MAX_LENGTH);
494b3bbd3b2Sdormando if (it->nbytes == 0 && it->nkey == 0) {
495b3bbd3b2Sdormando it = it->next;
496b3bbd3b2Sdormando continue;
497b3bbd3b2Sdormando }
498ecdb0114SDustin Sallings /* Copy the key since it may not be null-terminated in the struct */
499bd2f3ab2SDustin Sallings strncpy(key_temp, ITEM_key(it), it->nkey);
500bd2f3ab2SDustin Sallings key_temp[it->nkey] = 0x00; /* terminate */
5017dcfcedcSDustin Sallings len = snprintf(temp, sizeof(temp), "ITEM %s [%d b; %lu s]\r\n",
502ecdb0114SDustin Sallings key_temp, it->nbytes - 2,
5031b13a586Sdormando it->exptime == 0 ? 0 :
5041fdfb7e9STrond Norbye (unsigned long)it->exptime + process_started);
505d5d9ff0dSAndrei Nigmatulin if (bufcurr + len + 6 > memlimit) /* 6 is END\r\n\0 */
50660d70942SAnatoly Vorobey break;
5077db0d07cSDustin Sallings memcpy(buffer + bufcurr, temp, len);
50860d70942SAnatoly Vorobey bufcurr += len;
50960d70942SAnatoly Vorobey shown++;
51060d70942SAnatoly Vorobey it = it->next;
51160d70942SAnatoly Vorobey }
51286969ea4SBrad Fitzpatrick
513c47ee897SPaul Lindner memcpy(buffer + bufcurr, "END\r\n", 6);
51460d70942SAnatoly Vorobey bufcurr += 5;
51586969ea4SBrad Fitzpatrick
51660d70942SAnatoly Vorobey *bytes = bufcurr;
517d9edfefdSdormando pthread_mutex_unlock(&lru_locks[id]);
51860d70942SAnatoly Vorobey return buffer;
51960d70942SAnatoly Vorobey }
52086969ea4SBrad Fitzpatrick
item_stats_totals(ADD_STAT add_stats,void * c)5219bce42f2Sdormando void item_stats_totals(ADD_STAT add_stats, void *c) {
52267d7cfc1Sdormando itemstats_t totals;
52367d7cfc1Sdormando memset(&totals, 0, sizeof(itemstats_t));
524bbef930eSdormando int n;
525bbef930eSdormando for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
526bbef930eSdormando int x;
52767d7cfc1Sdormando int i;
528bbef930eSdormando for (x = 0; x < 4; x++) {
529bbef930eSdormando i = n | lru_type_map[x];
5309bce42f2Sdormando pthread_mutex_lock(&lru_locks[i]);
53167d7cfc1Sdormando totals.expired_unfetched += itemstats[i].expired_unfetched;
53267d7cfc1Sdormando totals.evicted_unfetched += itemstats[i].evicted_unfetched;
53367d7cfc1Sdormando totals.evicted += itemstats[i].evicted;
53467d7cfc1Sdormando totals.reclaimed += itemstats[i].reclaimed;
535649f7f01Sdormando totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
536c10feb9eSdormando totals.crawler_items_checked += itemstats[i].crawler_items_checked;
5372af4f6e6Sdormando totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
538f7bf26cbSdormando totals.moves_to_cold += itemstats[i].moves_to_cold;
539f7bf26cbSdormando totals.moves_to_warm += itemstats[i].moves_to_warm;
540f7bf26cbSdormando totals.moves_within_lru += itemstats[i].moves_within_lru;
541a0390847Sdormando totals.direct_reclaims += itemstats[i].direct_reclaims;
5429bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
54367d7cfc1Sdormando }
544bbef930eSdormando }
54567d7cfc1Sdormando APPEND_STAT("expired_unfetched", "%llu",
54667d7cfc1Sdormando (unsigned long long)totals.expired_unfetched);
54767d7cfc1Sdormando APPEND_STAT("evicted_unfetched", "%llu",
54867d7cfc1Sdormando (unsigned long long)totals.evicted_unfetched);
54967d7cfc1Sdormando APPEND_STAT("evictions", "%llu",
55067d7cfc1Sdormando (unsigned long long)totals.evicted);
55167d7cfc1Sdormando APPEND_STAT("reclaimed", "%llu",
55267d7cfc1Sdormando (unsigned long long)totals.reclaimed);
553649f7f01Sdormando APPEND_STAT("crawler_reclaimed", "%llu",
554649f7f01Sdormando (unsigned long long)totals.crawler_reclaimed);
555c10feb9eSdormando APPEND_STAT("crawler_items_checked", "%llu",
556c10feb9eSdormando (unsigned long long)totals.crawler_items_checked);
5572af4f6e6Sdormando APPEND_STAT("lrutail_reflocked", "%llu",
5582af4f6e6Sdormando (unsigned long long)totals.lrutail_reflocked);
559d9edfefdSdormando if (settings.lru_maintainer_thread) {
560f7bf26cbSdormando APPEND_STAT("moves_to_cold", "%llu",
561f7bf26cbSdormando (unsigned long long)totals.moves_to_cold);
562f7bf26cbSdormando APPEND_STAT("moves_to_warm", "%llu",
563f7bf26cbSdormando (unsigned long long)totals.moves_to_warm);
564f7bf26cbSdormando APPEND_STAT("moves_within_lru", "%llu",
565f7bf26cbSdormando (unsigned long long)totals.moves_within_lru);
566a0390847Sdormando APPEND_STAT("direct_reclaims", "%llu",
567a0390847Sdormando (unsigned long long)totals.direct_reclaims);
56867d7cfc1Sdormando }
569d9edfefdSdormando }
57067d7cfc1Sdormando
item_stats(ADD_STAT add_stats,void * c)5719bce42f2Sdormando void item_stats(ADD_STAT add_stats, void *c) {
572bbef930eSdormando itemstats_t totals;
573bbef930eSdormando int n;
574bbef930eSdormando for (n = 0; n < MAX_NUMBER_OF_SLAB_CLASSES; n++) {
575bbef930eSdormando memset(&totals, 0, sizeof(itemstats_t));
576bbef930eSdormando int x;
57717df5c0eSTrond Norbye int i;
578bbef930eSdormando unsigned int size = 0;
579bbef930eSdormando unsigned int age = 0;
5808ab8a1dbSdormando unsigned int lru_size_map[4];
581b89f7c7bSDustin Sallings const char *fmt = "items:%d:%s";
58288a68689SDustin Sallings char key_str[STAT_KEY_LEN];
58388a68689SDustin Sallings char val_str[STAT_VAL_LEN];
584b89f7c7bSDustin Sallings int klen = 0, vlen = 0;
585bbef930eSdormando for (x = 0; x < 4; x++) {
586bbef930eSdormando i = n | lru_type_map[x];
587bbef930eSdormando pthread_mutex_lock(&lru_locks[i]);
588bbef930eSdormando totals.evicted += itemstats[i].evicted;
589bbef930eSdormando totals.evicted_nonzero += itemstats[i].evicted_nonzero;
590bbef930eSdormando totals.outofmemory += itemstats[i].outofmemory;
591bbef930eSdormando totals.tailrepairs += itemstats[i].tailrepairs;
592d9edfefdSdormando totals.reclaimed += itemstats[i].reclaimed;
593bbef930eSdormando totals.expired_unfetched += itemstats[i].expired_unfetched;
594bbef930eSdormando totals.evicted_unfetched += itemstats[i].evicted_unfetched;
595bbef930eSdormando totals.crawler_reclaimed += itemstats[i].crawler_reclaimed;
596c10feb9eSdormando totals.crawler_items_checked += itemstats[i].crawler_items_checked;
597bbef930eSdormando totals.lrutail_reflocked += itemstats[i].lrutail_reflocked;
598bbef930eSdormando totals.moves_to_cold += itemstats[i].moves_to_cold;
599bbef930eSdormando totals.moves_to_warm += itemstats[i].moves_to_warm;
600bbef930eSdormando totals.moves_within_lru += itemstats[i].moves_within_lru;
601bbef930eSdormando totals.direct_reclaims += itemstats[i].direct_reclaims;
602bbef930eSdormando size += sizes[i];
6038ab8a1dbSdormando lru_size_map[x] = sizes[i];
604bbef930eSdormando if (lru_type_map[x] == COLD_LRU && tails[i] != NULL)
605bbef930eSdormando age = current_time - tails[i]->time;
6069bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
607df8cf5e7SToru Maesaka }
608bbef930eSdormando if (size == 0)
609bbef930eSdormando continue;
610bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "number", "%u", size);
611d9edfefdSdormando if (settings.lru_maintainer_thread) {
6128ab8a1dbSdormando APPEND_NUM_FMT_STAT(fmt, n, "number_hot", "%u", lru_size_map[0]);
6138ab8a1dbSdormando APPEND_NUM_FMT_STAT(fmt, n, "number_warm", "%u", lru_size_map[1]);
6148ab8a1dbSdormando APPEND_NUM_FMT_STAT(fmt, n, "number_cold", "%u", lru_size_map[2]);
61585a0042bSMatt Fowles Kulukundis if (settings.expirezero_does_not_evict) {
6164de89c8cSdormando APPEND_NUM_FMT_STAT(fmt, n, "number_noexp", "%u", lru_size_map[3]);
617d9edfefdSdormando }
61885a0042bSMatt Fowles Kulukundis }
619bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "age", "%u", age);
620bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "evicted",
621bbef930eSdormando "%llu", (unsigned long long)totals.evicted);
622bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "evicted_nonzero",
623bbef930eSdormando "%llu", (unsigned long long)totals.evicted_nonzero);
624bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "evicted_time",
625bbef930eSdormando "%u", totals.evicted_time);
626bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "outofmemory",
627bbef930eSdormando "%llu", (unsigned long long)totals.outofmemory);
628bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "tailrepairs",
629bbef930eSdormando "%llu", (unsigned long long)totals.tailrepairs);
630bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "reclaimed",
631bbef930eSdormando "%llu", (unsigned long long)totals.reclaimed);
632bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "expired_unfetched",
633bbef930eSdormando "%llu", (unsigned long long)totals.expired_unfetched);
634bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "evicted_unfetched",
635bbef930eSdormando "%llu", (unsigned long long)totals.evicted_unfetched);
636bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "crawler_reclaimed",
637bbef930eSdormando "%llu", (unsigned long long)totals.crawler_reclaimed);
638c10feb9eSdormando APPEND_NUM_FMT_STAT(fmt, n, "crawler_items_checked",
639c10feb9eSdormando "%llu", (unsigned long long)totals.crawler_items_checked);
640bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "lrutail_reflocked",
641bbef930eSdormando "%llu", (unsigned long long)totals.lrutail_reflocked);
642d9edfefdSdormando if (settings.lru_maintainer_thread) {
643bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "moves_to_cold",
644bbef930eSdormando "%llu", (unsigned long long)totals.moves_to_cold);
645bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "moves_to_warm",
646bbef930eSdormando "%llu", (unsigned long long)totals.moves_to_warm);
647bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "moves_within_lru",
648bbef930eSdormando "%llu", (unsigned long long)totals.moves_within_lru);
649bbef930eSdormando APPEND_NUM_FMT_STAT(fmt, n, "direct_reclaims",
650bbef930eSdormando "%llu", (unsigned long long)totals.direct_reclaims);
651bbef930eSdormando }
652d9edfefdSdormando }
653df8cf5e7SToru Maesaka
65448f45982SToru Maesaka /* getting here means both ascii and binary terminators fit */
65517df5c0eSTrond Norbye add_stats(NULL, 0, NULL, 0, c);
65660d70942SAnatoly Vorobey }
65786969ea4SBrad Fitzpatrick
item_stats_sizes_status(void)6588d82383fSdormando bool item_stats_sizes_status(void) {
6598d82383fSdormando bool ret = false;
660ae6f4267Sdormando mutex_lock(&stats_sizes_lock);
6618d82383fSdormando if (stats_sizes_hist != NULL)
6628d82383fSdormando ret = true;
6638d82383fSdormando mutex_unlock(&stats_sizes_lock);
6648d82383fSdormando return ret;
6658d82383fSdormando }
6668d82383fSdormando
item_stats_sizes_init(void)6678d82383fSdormando void item_stats_sizes_init(void) {
6688d82383fSdormando if (stats_sizes_hist != NULL)
6698d82383fSdormando return;
670ae6f4267Sdormando stats_sizes_buckets = settings.item_size_max / 32 + 1;
671ae6f4267Sdormando stats_sizes_hist = calloc(stats_sizes_buckets, sizeof(int));
6728d82383fSdormando stats_sizes_cas_min = (settings.use_cas) ? get_cas_id() : 0;
6738d82383fSdormando }
6748d82383fSdormando
item_stats_sizes_enable(ADD_STAT add_stats,void * c)6758d82383fSdormando void item_stats_sizes_enable(ADD_STAT add_stats, void *c) {
6768d82383fSdormando mutex_lock(&stats_sizes_lock);
6778d82383fSdormando if (!settings.use_cas) {
6788d82383fSdormando APPEND_STAT("sizes_status", "error", "");
6798d82383fSdormando APPEND_STAT("sizes_error", "cas_support_disabled", "");
6808d82383fSdormando } else if (stats_sizes_hist == NULL) {
6818d82383fSdormando item_stats_sizes_init();
682ae6f4267Sdormando if (stats_sizes_hist != NULL) {
6838d82383fSdormando APPEND_STAT("sizes_status", "enabled", "");
684ae6f4267Sdormando } else {
6858d82383fSdormando APPEND_STAT("sizes_status", "error", "");
686ae6f4267Sdormando APPEND_STAT("sizes_error", "no_memory", "");
687ae6f4267Sdormando }
688ae6f4267Sdormando } else {
6898d82383fSdormando APPEND_STAT("sizes_status", "enabled", "");
690ae6f4267Sdormando }
691ae6f4267Sdormando mutex_unlock(&stats_sizes_lock);
692ae6f4267Sdormando }
693ae6f4267Sdormando
item_stats_sizes_disable(ADD_STAT add_stats,void * c)694ae6f4267Sdormando void item_stats_sizes_disable(ADD_STAT add_stats, void *c) {
695ae6f4267Sdormando mutex_lock(&stats_sizes_lock);
696ae6f4267Sdormando if (stats_sizes_hist != NULL) {
697ae6f4267Sdormando free(stats_sizes_hist);
698ae6f4267Sdormando stats_sizes_hist = NULL;
699ae6f4267Sdormando }
7008d82383fSdormando APPEND_STAT("sizes_status", "disabled", "");
701ae6f4267Sdormando mutex_unlock(&stats_sizes_lock);
702ae6f4267Sdormando }
703ae6f4267Sdormando
item_stats_sizes_add(item * it)7048d82383fSdormando void item_stats_sizes_add(item *it) {
7058d82383fSdormando if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
706ae6f4267Sdormando return;
707ae6f4267Sdormando int ntotal = ITEM_ntotal(it);
708ae6f4267Sdormando int bucket = ntotal / 32;
709ae6f4267Sdormando if ((ntotal % 32) != 0) bucket++;
710ae6f4267Sdormando if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]++;
711ae6f4267Sdormando }
712ae6f4267Sdormando
713ae6f4267Sdormando /* I think there's no way for this to be accurate without using the CAS value.
714ae6f4267Sdormando * Since items getting their time value bumped will pass this validation.
715ae6f4267Sdormando */
item_stats_sizes_remove(item * it)7168d82383fSdormando void item_stats_sizes_remove(item *it) {
7178d82383fSdormando if (stats_sizes_hist == NULL || stats_sizes_cas_min > ITEM_get_cas(it))
718ae6f4267Sdormando return;
719ae6f4267Sdormando int ntotal = ITEM_ntotal(it);
720ae6f4267Sdormando int bucket = ntotal / 32;
721ae6f4267Sdormando if ((ntotal % 32) != 0) bucket++;
722ae6f4267Sdormando if (bucket < stats_sizes_buckets) stats_sizes_hist[bucket]--;
723ae6f4267Sdormando }
724ae6f4267Sdormando
72544ef96eeSPaul Lindner /** dumps out a list of objects of each size, with granularity of 32 bytes */
72677dde9f9SPaul Lindner /*@null@*/
72707c632f7Sdormando /* Locks are correct based on a technicality. Holds LRU lock while doing the
72807c632f7Sdormando * work, so items can't go invalid, and it's only looking at header sizes
72907c632f7Sdormando * which don't change.
73007c632f7Sdormando */
item_stats_sizes(ADD_STAT add_stats,void * c)7319bce42f2Sdormando void item_stats_sizes(ADD_STAT add_stats, void *c) {
732ae6f4267Sdormando mutex_lock(&stats_sizes_lock);
733e2df488bSToru Maesaka
734ae6f4267Sdormando if (stats_sizes_hist != NULL) {
7356d74e134SBrad Fitzpatrick int i;
736ae6f4267Sdormando for (i = 0; i < stats_sizes_buckets; i++) {
737ae6f4267Sdormando if (stats_sizes_hist[i] != 0) {
738f8770bf2SDustin Sallings char key[8];
739bb9d00feSDan McGee snprintf(key, sizeof(key), "%d", i * 32);
740ae6f4267Sdormando APPEND_STAT(key, "%u", stats_sizes_hist[i]);
7416d74e134SBrad Fitzpatrick }
742e2df488bSToru Maesaka }
743ae6f4267Sdormando } else {
744ae6f4267Sdormando APPEND_STAT("sizes_status", "disabled", "");
74517df5c0eSTrond Norbye }
746ae6f4267Sdormando
74717df5c0eSTrond Norbye add_stats(NULL, 0, NULL, 0, c);
748ae6f4267Sdormando mutex_unlock(&stats_sizes_lock);
7496d74e134SBrad Fitzpatrick }
750c5944dc2SSteven Grimm
7515da8dbabSTrond Norbye /** wrapper around assoc_find which does the lazy expiration logic */
do_item_get(const char * key,const size_t nkey,const uint32_t hv,conn * c)7526895d23eSsergiocarlos item *do_item_get(const char *key, const size_t nkey, const uint32_t hv, conn *c) {
753bab9acd1Sdormando item *it = assoc_find(key, nkey, hv);
754b3630e1aSdormando if (it != NULL) {
7553b961388Sdormando refcount_incr(&it->refcount);
756b3630e1aSdormando /* Optimization for slab reassignment. prevents popular items from
757b3630e1aSdormando * jamming in busy wait. Can only do this here to satisfy lock order
75869d1c699Sdormando * of item_lock, slabs_lock. */
75969d1c699Sdormando /* This was made unsafe by removal of the cache_lock:
76069d1c699Sdormando * slab_rebalance_signal and slab_rebal.* are modified in a separate
76169d1c699Sdormando * thread under slabs_lock. If slab_rebalance_signal = 1, slab_start =
76269d1c699Sdormando * NULL (0), but slab_end is still equal to some value, this would end
76369d1c699Sdormando * up unlinking every item fetched.
76469d1c699Sdormando * This is either an acceptable loss, or if slab_rebalance_signal is
76569d1c699Sdormando * true, slab_start/slab_end should be put behind the slabs_lock.
76669d1c699Sdormando * Which would cause a huge potential slowdown.
76769d1c699Sdormando * Could also use a specific lock for slab_rebal.* and
76869d1c699Sdormando * slab_rebalance_signal (shorter lock?)
76969d1c699Sdormando */
77069d1c699Sdormando /*if (slab_rebalance_signal &&
771b3630e1aSdormando ((void *)it >= slab_rebal.slab_start && (void *)it < slab_rebal.slab_end)) {
772ecea7458SJason CHAN do_item_unlink(it, hv);
773f4983b20Sdormando do_item_remove(it);
774b3630e1aSdormando it = NULL;
77569d1c699Sdormando }*/
776b3630e1aSdormando }
77791adade7Sdormando int was_found = 0;
77891adade7Sdormando
77991adade7Sdormando if (settings.verbose > 2) {
780fbe823d9Sdormando int ii;
78191adade7Sdormando if (it == NULL) {
782fbe823d9Sdormando fprintf(stderr, "> NOT FOUND ");
78391adade7Sdormando } else {
784fbe823d9Sdormando fprintf(stderr, "> FOUND KEY ");
78591adade7Sdormando }
786fbe823d9Sdormando for (ii = 0; ii < nkey; ++ii) {
787fbe823d9Sdormando fprintf(stderr, "%c", key[ii]);
788fbe823d9Sdormando }
78991adade7Sdormando }
79091adade7Sdormando
791eb75a0dfSPaul Lindner if (it != NULL) {
792c7fbccebSdormando was_found = 1;
793004e2211Sdormando if (item_is_flushed(it)) {
794f4983b20Sdormando do_item_unlink(it, hv);
795f4983b20Sdormando do_item_remove(it);
79640b7b4b2Sdormando it = NULL;
79780ce0108Sdormando pthread_mutex_lock(&c->thread->stats.mutex);
79880ce0108Sdormando c->thread->stats.get_flushed++;
79980ce0108Sdormando pthread_mutex_unlock(&c->thread->stats.mutex);
800c7fbccebSdormando if (settings.verbose > 2) {
80140b7b4b2Sdormando fprintf(stderr, " -nuked by flush");
80240b7b4b2Sdormando }
803c7fbccebSdormando was_found = 2;
80440b7b4b2Sdormando } else if (it->exptime != 0 && it->exptime <= current_time) {
805f4983b20Sdormando do_item_unlink(it, hv);
806f4983b20Sdormando do_item_remove(it);
80740b7b4b2Sdormando it = NULL;
8086895d23eSsergiocarlos pthread_mutex_lock(&c->thread->stats.mutex);
8096895d23eSsergiocarlos c->thread->stats.get_expired++;
8106895d23eSsergiocarlos pthread_mutex_unlock(&c->thread->stats.mutex);
811c7fbccebSdormando if (settings.verbose > 2) {
81240b7b4b2Sdormando fprintf(stderr, " -nuked by expire");
81340b7b4b2Sdormando }
814c7fbccebSdormando was_found = 3;
81540b7b4b2Sdormando } else {
8169bce42f2Sdormando it->it_flags |= ITEM_FETCHED|ITEM_ACTIVE;
81756b8339eSSteven Grimm DEBUG_REFCNT(it, '+');
81856b8339eSSteven Grimm }
81940b7b4b2Sdormando }
82091adade7Sdormando
82191adade7Sdormando if (settings.verbose > 2)
82291adade7Sdormando fprintf(stderr, "\n");
823c7fbccebSdormando /* For now this is in addition to the above verbose logging. */
824c7fbccebSdormando LOGGER_LOG(c->thread->l, LOG_FETCHERS, LOGGER_ITEM_GET, NULL, was_found, key, nkey);
82591adade7Sdormando
82656b8339eSSteven Grimm return it;
82756b8339eSSteven Grimm }
82856b8339eSSteven Grimm
do_item_touch(const char * key,size_t nkey,uint32_t exptime,const uint32_t hv,conn * c)829bab9acd1Sdormando item *do_item_touch(const char *key, size_t nkey, uint32_t exptime,
8306895d23eSsergiocarlos const uint32_t hv, conn *c) {
8316895d23eSsergiocarlos item *it = do_item_get(key, nkey, hv, c);
832d87f568aSdormando if (it != NULL) {
833d87f568aSdormando it->exptime = exptime;
834d87f568aSdormando }
835d87f568aSdormando return it;
836d87f568aSdormando }
837d87f568aSdormando
838fb269897Sdormando /*** LRU MAINTENANCE THREAD ***/
839fb269897Sdormando
840f5f8f1a0Sdormando /* Returns number of items remove, expired, or evicted.
841f5f8f1a0Sdormando * Callable from worker threads or the LRU maintainer thread */
lru_pull_tail(const int orig_id,const int cur_lru,const uint64_t total_bytes,const bool do_evict)842fb269897Sdormando static int lru_pull_tail(const int orig_id, const int cur_lru,
843*690a9a9dSEiichi Tsukata const uint64_t total_bytes, const bool do_evict) {
844fb269897Sdormando item *it = NULL;
845fb269897Sdormando int id = orig_id;
846fb269897Sdormando int removed = 0;
847fb269897Sdormando if (id == 0)
848fb269897Sdormando return 0;
849fb269897Sdormando
850fb269897Sdormando int tries = 5;
851fb269897Sdormando item *search;
852fb269897Sdormando item *next_it;
853fb269897Sdormando void *hold_lock = NULL;
854fb269897Sdormando unsigned int move_to_lru = 0;
855ee461d11Sdormando uint64_t limit = 0;
856fb269897Sdormando
857fb269897Sdormando id |= cur_lru;
858fb269897Sdormando pthread_mutex_lock(&lru_locks[id]);
859fb269897Sdormando search = tails[id];
860f5f8f1a0Sdormando /* We walk up *only* for locked items, and if bottom is expired. */
861fb269897Sdormando for (; tries > 0 && search != NULL; tries--, search=next_it) {
862fb269897Sdormando /* we might relink search mid-loop, so search->prev isn't reliable */
863fb269897Sdormando next_it = search->prev;
864fb269897Sdormando if (search->nbytes == 0 && search->nkey == 0 && search->it_flags == 1) {
865fb269897Sdormando /* We are a crawler, ignore it. */
866fb269897Sdormando tries++;
867fb269897Sdormando continue;
868fb269897Sdormando }
869fb269897Sdormando uint32_t hv = hash(ITEM_key(search), search->nkey);
870fb269897Sdormando /* Attempt to hash item lock the "search" item. If locked, no
871f5f8f1a0Sdormando * other callers can incr the refcount. Also skip ourselves. */
872*690a9a9dSEiichi Tsukata if ((hold_lock = item_trylock(hv)) == NULL)
873fb269897Sdormando continue;
874fb269897Sdormando /* Now see if the item is refcount locked */
875fb269897Sdormando if (refcount_incr(&search->refcount) != 2) {
876fb269897Sdormando /* Note pathological case with ref'ed items in tail.
877fb269897Sdormando * Can still unlink the item, but it won't be reusable yet */
878fb269897Sdormando itemstats[id].lrutail_reflocked++;
879fb269897Sdormando /* In case of refcount leaks, enable for quick workaround. */
880fb269897Sdormando /* WARNING: This can cause terrible corruption */
881fb269897Sdormando if (settings.tail_repair_time &&
882fb269897Sdormando search->time + settings.tail_repair_time < current_time) {
883fb269897Sdormando itemstats[id].tailrepairs++;
884fb269897Sdormando search->refcount = 1;
885fb269897Sdormando /* This will call item_remove -> item_free since refcnt is 1 */
886fb269897Sdormando do_item_unlink_nolock(search, hv);
887fb269897Sdormando item_trylock_unlock(hold_lock);
888fb269897Sdormando continue;
889fb269897Sdormando }
890fb269897Sdormando }
891fb269897Sdormando
892fb269897Sdormando /* Expired or flushed */
893fb269897Sdormando if ((search->exptime != 0 && search->exptime < current_time)
894004e2211Sdormando || item_is_flushed(search)) {
895fb269897Sdormando itemstats[id].reclaimed++;
896fb269897Sdormando if ((search->it_flags & ITEM_FETCHED) == 0) {
897fb269897Sdormando itemstats[id].expired_unfetched++;
898fb269897Sdormando }
899fb269897Sdormando /* refcnt 2 -> 1 */
900a0390847Sdormando do_item_unlink_nolock(search, hv);
901fb269897Sdormando /* refcnt 1 -> 0 -> item_free */
902a0390847Sdormando do_item_remove(search);
903fb269897Sdormando item_trylock_unlock(hold_lock);
904fb269897Sdormando removed++;
905fb269897Sdormando
906fb269897Sdormando /* If all we're finding are expired, can keep going */
907fb269897Sdormando continue;
908fb269897Sdormando }
909fb269897Sdormando
910fb269897Sdormando /* If we're HOT_LRU or WARM_LRU and over size limit, send to COLD_LRU.
911fb269897Sdormando * If we're COLD_LRU, send to WARM_LRU unless we need to evict
912fb269897Sdormando */
913fb269897Sdormando switch (cur_lru) {
914fb269897Sdormando case HOT_LRU:
915ee461d11Sdormando limit = total_bytes * settings.hot_lru_pct / 100;
916fb269897Sdormando case WARM_LRU:
917ee461d11Sdormando if (limit == 0)
918ee461d11Sdormando limit = total_bytes * settings.warm_lru_pct / 100;
919ee461d11Sdormando if (sizes_bytes[id] > limit) {
920f7bf26cbSdormando itemstats[id].moves_to_cold++;
921fb269897Sdormando move_to_lru = COLD_LRU;
922fb269897Sdormando do_item_unlink_q(search);
923fb269897Sdormando it = search;
924fb269897Sdormando removed++;
925fb269897Sdormando break;
926fb269897Sdormando } else if ((search->it_flags & ITEM_ACTIVE) != 0) {
927fb269897Sdormando /* Only allow ACTIVE relinking if we're not too large. */
928f7bf26cbSdormando itemstats[id].moves_within_lru++;
929fb269897Sdormando search->it_flags &= ~ITEM_ACTIVE;
930fb269897Sdormando do_item_update_nolock(search);
931fb269897Sdormando do_item_remove(search);
932fb269897Sdormando item_trylock_unlock(hold_lock);
933fb269897Sdormando } else {
934fb269897Sdormando /* Don't want to move to COLD, not active, bail out */
935fb269897Sdormando it = search;
936fb269897Sdormando }
937fb269897Sdormando break;
938fb269897Sdormando case COLD_LRU:
939fb269897Sdormando it = search; /* No matter what, we're stopping */
940fb269897Sdormando if (do_evict) {
941fb269897Sdormando if (settings.evict_to_free == 0) {
942f7bf26cbSdormando /* Don't think we need a counter for this. It'll OOM. */
943fb269897Sdormando break;
944fb269897Sdormando }
945fb269897Sdormando itemstats[id].evicted++;
946fb269897Sdormando itemstats[id].evicted_time = current_time - search->time;
947fb269897Sdormando if (search->exptime != 0)
948fb269897Sdormando itemstats[id].evicted_nonzero++;
949fb269897Sdormando if ((search->it_flags & ITEM_FETCHED) == 0) {
950fb269897Sdormando itemstats[id].evicted_unfetched++;
951fb269897Sdormando }
952cb8257e3Sdormando LOGGER_LOG(NULL, LOG_EVICTIONS, LOGGER_EVICTION, search);
953a0390847Sdormando do_item_unlink_nolock(search, hv);
954fb269897Sdormando removed++;
955826403ddSdormando if (settings.slab_automove == 2) {
956826403ddSdormando slabs_reassign(-1, orig_id);
957826403ddSdormando }
958d9edfefdSdormando } else if ((search->it_flags & ITEM_ACTIVE) != 0
959d9edfefdSdormando && settings.lru_maintainer_thread) {
960f7bf26cbSdormando itemstats[id].moves_to_warm++;
961fb269897Sdormando search->it_flags &= ~ITEM_ACTIVE;
962fb269897Sdormando move_to_lru = WARM_LRU;
963fb269897Sdormando do_item_unlink_q(search);
964a0390847Sdormando removed++;
965fb269897Sdormando }
966fb269897Sdormando break;
967fb269897Sdormando }
968fb269897Sdormando if (it != NULL)
969fb269897Sdormando break;
970fb269897Sdormando }
971fb269897Sdormando
972fb269897Sdormando pthread_mutex_unlock(&lru_locks[id]);
973fb269897Sdormando
974fb269897Sdormando if (it != NULL) {
975fb269897Sdormando if (move_to_lru) {
9764de89c8cSdormando it->slabs_clsid = ITEM_clsid(it);
977fb269897Sdormando it->slabs_clsid |= move_to_lru;
978fb269897Sdormando item_link_q(it);
979fb269897Sdormando }
980fb269897Sdormando do_item_remove(it);
981fb269897Sdormando item_trylock_unlock(hold_lock);
982fb269897Sdormando }
983fb269897Sdormando
984fb269897Sdormando return removed;
985fb269897Sdormando }
986fb269897Sdormando
987fb269897Sdormando /* Loop up to N times:
988fb269897Sdormando * If too many items are in HOT_LRU, push to COLD_LRU
989fb269897Sdormando * If too many items are in WARM_LRU, push to COLD_LRU
990fb269897Sdormando * If too many items are in COLD_LRU, poke COLD_LRU tail
991b689a627Sdormando * 1000 loops with 1ms min sleep gives us under 1m items shifted/sec. The
992b689a627Sdormando * locks can't handle much more than that. Leaving a TODO for how to
993b689a627Sdormando * autoadjust in the future.
994fb269897Sdormando */
lru_maintainer_juggle(const int slabs_clsid)995a0390847Sdormando static int lru_maintainer_juggle(const int slabs_clsid) {
996fb269897Sdormando int i;
997a0390847Sdormando int did_moves = 0;
9988ab8a1dbSdormando bool mem_limit_reached = false;
999ee461d11Sdormando uint64_t total_bytes = 0;
1000d6e96467Sdormando unsigned int chunks_perslab = 0;
1001d6e96467Sdormando unsigned int chunks_free = 0;
1002b689a627Sdormando /* TODO: if free_chunks below high watermark, increase aggressiveness */
1003d6e96467Sdormando chunks_free = slabs_available_chunks(slabs_clsid, &mem_limit_reached,
1004ee461d11Sdormando &total_bytes, &chunks_perslab);
10054de89c8cSdormando if (settings.expirezero_does_not_evict)
1006ee461d11Sdormando total_bytes -= noexp_lru_size(slabs_clsid);
1007fb269897Sdormando
1008d6e96467Sdormando /* If slab automove is enabled on any level, and we have more than 2 pages
1009d6e96467Sdormando * worth of chunks free in this class, ask (gently) to reassign a page
1010d6e96467Sdormando * from this class back into the global pool (0)
1011d6e96467Sdormando */
10121c3f0a3dSdormando if (settings.slab_automove > 0 && chunks_free > (chunks_perslab * 2.5)) {
1013d6e96467Sdormando slabs_reassign(slabs_clsid, SLAB_GLOBAL_PAGE_POOL);
1014d6e96467Sdormando }
1015d6e96467Sdormando
1016fb269897Sdormando /* Juggle HOT/WARM up to N times */
1017b689a627Sdormando for (i = 0; i < 1000; i++) {
1018f7bf26cbSdormando int do_more = 0;
1019*690a9a9dSEiichi Tsukata if (lru_pull_tail(slabs_clsid, HOT_LRU, total_bytes, false) ||
1020*690a9a9dSEiichi Tsukata lru_pull_tail(slabs_clsid, WARM_LRU, total_bytes, false)) {
1021f7bf26cbSdormando do_more++;
1022fb269897Sdormando }
1023*690a9a9dSEiichi Tsukata do_more += lru_pull_tail(slabs_clsid, COLD_LRU, total_bytes, false);
1024f7bf26cbSdormando if (do_more == 0)
1025f7bf26cbSdormando break;
1026a0390847Sdormando did_moves++;
1027fb269897Sdormando }
1028a0390847Sdormando return did_moves;
1029fb269897Sdormando }
1030fb269897Sdormando
1031e708513aSdormando /* Will crawl all slab classes a minimum of once per hour */
1032e708513aSdormando #define MAX_MAINTCRAWL_WAIT 60 * 60
1033e708513aSdormando
1034e708513aSdormando /* Hoping user input will improve this function. This is all a wild guess.
1035e708513aSdormando * Operation: Kicks crawler for each slab id. Crawlers take some statistics as
1036e708513aSdormando * to items with nonzero expirations. It then buckets how many items will
1037e708513aSdormando * expire per minute for the next hour.
1038e708513aSdormando * This function checks the results of a run, and if it things more than 1% of
1039e708513aSdormando * expirable objects are ready to go, kick the crawler again to reap.
1040e708513aSdormando * It will also kick the crawler once per minute regardless, waiting a minute
1041e708513aSdormando * longer for each time it has no work to do, up to an hour wait time.
1042e708513aSdormando * The latter is to avoid newly started daemons from waiting too long before
1043e708513aSdormando * retrying a crawl.
1044e708513aSdormando */
lru_maintainer_crawler_check(void)1045e708513aSdormando static void lru_maintainer_crawler_check(void) {
1046e708513aSdormando int i;
1047e708513aSdormando static rel_time_t last_crawls[MAX_NUMBER_OF_SLAB_CLASSES];
1048e708513aSdormando static rel_time_t next_crawl_wait[MAX_NUMBER_OF_SLAB_CLASSES];
1049e708513aSdormando for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
1050e708513aSdormando crawlerstats_t *s = &crawlerstats[i];
1051e708513aSdormando /* We've not successfully kicked off a crawl yet. */
1052e708513aSdormando if (last_crawls[i] == 0) {
105387ff9dc0Sdormando if (lru_crawler_start(i, 0) > 0) {
1054e708513aSdormando last_crawls[i] = current_time;
1055e708513aSdormando }
1056e708513aSdormando }
1057e708513aSdormando pthread_mutex_lock(&lru_crawler_stats_lock);
1058e708513aSdormando if (s->run_complete) {
1059e708513aSdormando int x;
1060e708513aSdormando /* Should we crawl again? */
1061e708513aSdormando uint64_t possible_reclaims = s->seen - s->noexp;
1062e708513aSdormando uint64_t available_reclaims = 0;
1063e708513aSdormando /* Need to think we can free at least 1% of the items before
1064e708513aSdormando * crawling. */
1065e708513aSdormando /* FIXME: Configurable? */
1066e708513aSdormando uint64_t low_watermark = (s->seen / 100) + 1;
1067e708513aSdormando rel_time_t since_run = current_time - s->end_time;
1068e708513aSdormando /* Don't bother if the payoff is too low. */
1069e708513aSdormando if (settings.verbose > 1)
1070e708513aSdormando fprintf(stderr, "maint crawler: low_watermark: %llu, possible_reclaims: %llu, since_run: %u\n",
1071e708513aSdormando (unsigned long long)low_watermark, (unsigned long long)possible_reclaims,
1072e708513aSdormando (unsigned int)since_run);
1073e708513aSdormando for (x = 0; x < 60; x++) {
1074e708513aSdormando if (since_run < (x * 60) + 60)
1075e708513aSdormando break;
1076e708513aSdormando available_reclaims += s->histo[x];
1077e708513aSdormando }
1078e708513aSdormando if (available_reclaims > low_watermark) {
1079e708513aSdormando last_crawls[i] = 0;
1080e708513aSdormando if (next_crawl_wait[i] > 60)
1081e708513aSdormando next_crawl_wait[i] -= 60;
1082e708513aSdormando } else if (since_run > 5 && since_run > next_crawl_wait[i]) {
1083e708513aSdormando last_crawls[i] = 0;
1084e708513aSdormando if (next_crawl_wait[i] < MAX_MAINTCRAWL_WAIT)
1085e708513aSdormando next_crawl_wait[i] += 60;
1086e708513aSdormando }
1087e708513aSdormando if (settings.verbose > 1)
1088e708513aSdormando fprintf(stderr, "maint crawler: available reclaims: %llu, next_crawl: %u\n", (unsigned long long)available_reclaims, next_crawl_wait[i]);
1089e708513aSdormando }
1090e708513aSdormando pthread_mutex_unlock(&lru_crawler_stats_lock);
1091e708513aSdormando }
1092e708513aSdormando }
1093e708513aSdormando
1094fb269897Sdormando static pthread_t lru_maintainer_tid;
1095fb269897Sdormando
1096a0390847Sdormando #define MAX_LRU_MAINTAINER_SLEEP 1000000
1097a0390847Sdormando #define MIN_LRU_MAINTAINER_SLEEP 1000
1098a0390847Sdormando
lru_maintainer_thread(void * arg)1099fb269897Sdormando static void *lru_maintainer_thread(void *arg) {
1100fb269897Sdormando int i;
1101a0390847Sdormando useconds_t to_sleep = MIN_LRU_MAINTAINER_SLEEP;
1102e708513aSdormando rel_time_t last_crawler_check = 0;
1103fb269897Sdormando
1104fb269897Sdormando pthread_mutex_lock(&lru_maintainer_lock);
1105fb269897Sdormando if (settings.verbose > 2)
1106fb269897Sdormando fprintf(stderr, "Starting LRU maintainer background thread\n");
1107fb269897Sdormando while (do_run_lru_maintainer_thread) {
1108a0390847Sdormando int did_moves = 0;
1109a0390847Sdormando pthread_mutex_unlock(&lru_maintainer_lock);
1110a0390847Sdormando usleep(to_sleep);
1111a0390847Sdormando pthread_mutex_lock(&lru_maintainer_lock);
1112fb269897Sdormando
11134de89c8cSdormando STATS_LOCK();
11144de89c8cSdormando stats.lru_maintainer_juggles++;
11154de89c8cSdormando STATS_UNLOCK();
1116fb269897Sdormando /* We were asked to immediately wake up and poke a particular slab
1117fb269897Sdormando * class due to a low watermark being hit */
1118fb269897Sdormando if (lru_maintainer_check_clsid != 0) {
1119a0390847Sdormando did_moves = lru_maintainer_juggle(lru_maintainer_check_clsid);
1120fb269897Sdormando lru_maintainer_check_clsid = 0;
1121fb269897Sdormando } else {
1122fb269897Sdormando for (i = POWER_SMALLEST; i < MAX_NUMBER_OF_SLAB_CLASSES; i++) {
1123a0390847Sdormando did_moves += lru_maintainer_juggle(i);
1124fb269897Sdormando }
1125fb269897Sdormando }
1126a0390847Sdormando if (did_moves == 0) {
1127a0390847Sdormando if (to_sleep < MAX_LRU_MAINTAINER_SLEEP)
1128a0390847Sdormando to_sleep += 1000;
1129a0390847Sdormando } else {
1130b689a627Sdormando to_sleep /= 2;
1131b689a627Sdormando if (to_sleep < MIN_LRU_MAINTAINER_SLEEP)
1132a0390847Sdormando to_sleep = MIN_LRU_MAINTAINER_SLEEP;
1133a0390847Sdormando }
1134e708513aSdormando /* Once per second at most */
1135e708513aSdormando if (settings.lru_crawler && last_crawler_check != current_time) {
1136e708513aSdormando lru_maintainer_crawler_check();
1137e708513aSdormando last_crawler_check = current_time;
1138e708513aSdormando }
1139fb269897Sdormando }
1140fb269897Sdormando pthread_mutex_unlock(&lru_maintainer_lock);
1141fb269897Sdormando if (settings.verbose > 2)
1142fb269897Sdormando fprintf(stderr, "LRU maintainer thread stopping\n");
1143fb269897Sdormando
1144fb269897Sdormando return NULL;
1145fb269897Sdormando }
stop_lru_maintainer_thread(void)1146fb269897Sdormando int stop_lru_maintainer_thread(void) {
1147fb269897Sdormando int ret;
1148fb269897Sdormando pthread_mutex_lock(&lru_maintainer_lock);
1149a0390847Sdormando /* LRU thread is a sleep loop, will die on its own */
1150fb269897Sdormando do_run_lru_maintainer_thread = 0;
1151fb269897Sdormando pthread_mutex_unlock(&lru_maintainer_lock);
1152fb269897Sdormando if ((ret = pthread_join(lru_maintainer_tid, NULL)) != 0) {
1153fb269897Sdormando fprintf(stderr, "Failed to stop LRU maintainer thread: %s\n", strerror(ret));
1154fb269897Sdormando return -1;
1155fb269897Sdormando }
1156f7bf26cbSdormando settings.lru_maintainer_thread = false;
1157fb269897Sdormando return 0;
1158fb269897Sdormando }
1159fb269897Sdormando
start_lru_maintainer_thread(void)1160fb269897Sdormando int start_lru_maintainer_thread(void) {
1161fb269897Sdormando int ret;
1162fb269897Sdormando
1163fb269897Sdormando pthread_mutex_lock(&lru_maintainer_lock);
1164fb269897Sdormando do_run_lru_maintainer_thread = 1;
1165f7bf26cbSdormando settings.lru_maintainer_thread = true;
1166fb269897Sdormando if ((ret = pthread_create(&lru_maintainer_tid, NULL,
1167fb269897Sdormando lru_maintainer_thread, NULL)) != 0) {
1168fb269897Sdormando fprintf(stderr, "Can't create LRU maintainer thread: %s\n",
1169fb269897Sdormando strerror(ret));
1170fb269897Sdormando pthread_mutex_unlock(&lru_maintainer_lock);
1171fb269897Sdormando return -1;
1172fb269897Sdormando }
1173fb269897Sdormando pthread_mutex_unlock(&lru_maintainer_lock);
1174fb269897Sdormando
1175fb269897Sdormando return 0;
1176fb269897Sdormando }
1177fb269897Sdormando
1178fb269897Sdormando /* If we hold this lock, crawler can't wake up or move */
lru_maintainer_pause(void)1179fb269897Sdormando void lru_maintainer_pause(void) {
1180fb269897Sdormando pthread_mutex_lock(&lru_maintainer_lock);
1181fb269897Sdormando }
1182fb269897Sdormando
lru_maintainer_resume(void)1183fb269897Sdormando void lru_maintainer_resume(void) {
1184fb269897Sdormando pthread_mutex_unlock(&lru_maintainer_lock);
1185fb269897Sdormando }
1186fb269897Sdormando
init_lru_maintainer(void)1187fb269897Sdormando int init_lru_maintainer(void) {
1188fb269897Sdormando if (lru_maintainer_initialized == 0) {
1189fb269897Sdormando pthread_mutex_init(&lru_maintainer_lock, NULL);
1190fb269897Sdormando lru_maintainer_initialized = 1;
1191fb269897Sdormando }
1192fb269897Sdormando return 0;
1193fb269897Sdormando }
1194fb269897Sdormando
1195fb269897Sdormando /*** ITEM CRAWLER THREAD ***/
1196fb269897Sdormando
crawler_link_q(item * it)11970d1f505cSdormando static void crawler_link_q(item *it) { /* item is the new tail */
11980d1f505cSdormando item **head, **tail;
11995e05fca2Sdormando assert(it->it_flags == 1);
12005e05fca2Sdormando assert(it->nbytes == 0);
12010d1f505cSdormando
12020d1f505cSdormando head = &heads[it->slabs_clsid];
12030d1f505cSdormando tail = &tails[it->slabs_clsid];
12040d1f505cSdormando assert(*tail != 0);
12050d1f505cSdormando assert(it != *tail);
12060d1f505cSdormando assert((*head && *tail) || (*head == 0 && *tail == 0));
12070d1f505cSdormando it->prev = *tail;
12080d1f505cSdormando it->next = 0;
12090d1f505cSdormando if (it->prev) {
12100d1f505cSdormando assert(it->prev->next == 0);
12110d1f505cSdormando it->prev->next = it;
12120d1f505cSdormando }
12130d1f505cSdormando *tail = it;
12140d1f505cSdormando if (*head == 0) *head = it;
12150d1f505cSdormando return;
12160d1f505cSdormando }
12170d1f505cSdormando
crawler_unlink_q(item * it)12180d1f505cSdormando static void crawler_unlink_q(item *it) {
12190d1f505cSdormando item **head, **tail;
12200d1f505cSdormando head = &heads[it->slabs_clsid];
12210d1f505cSdormando tail = &tails[it->slabs_clsid];
12220d1f505cSdormando
12230d1f505cSdormando if (*head == it) {
12240d1f505cSdormando assert(it->prev == 0);
12250d1f505cSdormando *head = it->next;
12260d1f505cSdormando }
12270d1f505cSdormando if (*tail == it) {
12280d1f505cSdormando assert(it->next == 0);
12290d1f505cSdormando *tail = it->prev;
12300d1f505cSdormando }
12310d1f505cSdormando assert(it->next != it);
12320d1f505cSdormando assert(it->prev != it);
12330d1f505cSdormando
12340d1f505cSdormando if (it->next) it->next->prev = it->prev;
12350d1f505cSdormando if (it->prev) it->prev->next = it->next;
12360d1f505cSdormando return;
12370d1f505cSdormando }
12380d1f505cSdormando
123945177c01Sdormando /* This is too convoluted, but it's a difficult shuffle. Try to rewrite it
124045177c01Sdormando * more clearly. */
crawler_crawl_q(item * it)12412418511aSdormando static item *crawler_crawl_q(item *it) {
12420d1f505cSdormando item **head, **tail;
12435e05fca2Sdormando assert(it->it_flags == 1);
12445e05fca2Sdormando assert(it->nbytes == 0);
12450d1f505cSdormando head = &heads[it->slabs_clsid];
12460d1f505cSdormando tail = &tails[it->slabs_clsid];
12470d1f505cSdormando
12480d1f505cSdormando /* We've hit the head, pop off */
12490d1f505cSdormando if (it->prev == 0) {
12500d1f505cSdormando assert(*head == it);
12510d1f505cSdormando if (it->next) {
12520d1f505cSdormando *head = it->next;
12530d1f505cSdormando assert(it->next->prev == it);
12540d1f505cSdormando it->next->prev = 0;
12550d1f505cSdormando }
12562418511aSdormando return NULL; /* Done */
12570d1f505cSdormando }
12580d1f505cSdormando
12590d1f505cSdormando /* Swing ourselves in front of the next item */
12600d1f505cSdormando /* NB: If there is a prev, we can't be the head */
12610d1f505cSdormando assert(it->prev != it);
12620d1f505cSdormando if (it->prev) {
12630d1f505cSdormando if (*head == it->prev) {
12640d1f505cSdormando /* Prev was the head, now we're the head */
12650d1f505cSdormando *head = it;
12660d1f505cSdormando }
12670d1f505cSdormando if (*tail == it) {
12680d1f505cSdormando /* We are the tail, now they are the tail */
12690d1f505cSdormando *tail = it->prev;
12700d1f505cSdormando }
12710d1f505cSdormando assert(it->next != it);
12720d1f505cSdormando if (it->next) {
12730d1f505cSdormando assert(it->prev->next == it);
12740d1f505cSdormando it->prev->next = it->next;
12750d1f505cSdormando it->next->prev = it->prev;
12760d1f505cSdormando } else {
12770d1f505cSdormando /* Tail. Move this above? */
12780d1f505cSdormando it->prev->next = 0;
12790d1f505cSdormando }
12800d1f505cSdormando /* prev->prev's next is it->prev */
12810d1f505cSdormando it->next = it->prev;
12820d1f505cSdormando it->prev = it->next->prev;
12830d1f505cSdormando it->next->prev = it;
12840d1f505cSdormando /* New it->prev now, if we're not at the head. */
12850d1f505cSdormando if (it->prev) {
12860d1f505cSdormando it->prev->next = it;
12870d1f505cSdormando }
12880d1f505cSdormando }
12890d1f505cSdormando assert(it->next != it);
12900d1f505cSdormando assert(it->prev != it);
12910d1f505cSdormando
12922418511aSdormando return it->next; /* success */
12930d1f505cSdormando }
12940d1f505cSdormando
1295c58291ecSdormando /* I pulled this out to make the main thread clearer, but it reaches into the
1296c58291ecSdormando * main thread's values too much. Should rethink again.
12970d1f505cSdormando */
item_crawler_evaluate(item * search,uint32_t hv,int i)1298c58291ecSdormando static void item_crawler_evaluate(item *search, uint32_t hv, int i) {
12994de89c8cSdormando int slab_id = CLEAR_LRU(i);
1300e708513aSdormando crawlerstats_t *s = &crawlerstats[slab_id];
1301c10feb9eSdormando itemstats[i].crawler_items_checked++;
130280ce0108Sdormando int is_flushed = item_is_flushed(search);
1303c58291ecSdormando if ((search->exptime != 0 && search->exptime < current_time)
130480ce0108Sdormando || is_flushed) {
1305c58291ecSdormando itemstats[i].crawler_reclaimed++;
1306e708513aSdormando s->reclaimed++;
1307c58291ecSdormando
1308c58291ecSdormando if (settings.verbose > 1) {
1309c58291ecSdormando int ii;
1310c58291ecSdormando char *key = ITEM_key(search);
1311c58291ecSdormando fprintf(stderr, "LRU crawler found an expired item (flags: %d, slab: %d): ",
1312c58291ecSdormando search->it_flags, search->slabs_clsid);
1313c58291ecSdormando for (ii = 0; ii < search->nkey; ++ii) {
1314c58291ecSdormando fprintf(stderr, "%c", key[ii]);
1315c58291ecSdormando }
1316c58291ecSdormando fprintf(stderr, "\n");
1317c58291ecSdormando }
131880ce0108Sdormando if ((search->it_flags & ITEM_FETCHED) == 0 && !is_flushed) {
1319c58291ecSdormando itemstats[i].expired_unfetched++;
1320c58291ecSdormando }
1321c58291ecSdormando do_item_unlink_nolock(search, hv);
132293d5637bSdormando do_item_remove(search);
132393d5637bSdormando assert(search->slabs_clsid == 0);
1324c58291ecSdormando } else {
1325e708513aSdormando s->seen++;
1326c58291ecSdormando refcount_decr(&search->refcount);
1327e708513aSdormando if (search->exptime == 0) {
1328e708513aSdormando s->noexp++;
1329e708513aSdormando } else if (search->exptime - current_time > 3599) {
1330e708513aSdormando s->ttl_hourplus++;
1331e708513aSdormando } else {
1332e708513aSdormando rel_time_t ttl_remain = search->exptime - current_time;
1333e708513aSdormando int bucket = ttl_remain / 60;
1334e708513aSdormando s->histo[bucket]++;
1335e708513aSdormando }
1336c58291ecSdormando }
1337c58291ecSdormando }
1338c58291ecSdormando
item_crawler_thread(void * arg)13390d1f505cSdormando static void *item_crawler_thread(void *arg) {
13400d1f505cSdormando int i;
134187ff9dc0Sdormando int crawls_persleep = settings.crawls_persleep;
13420d1f505cSdormando
13439d635fa7Sdormando pthread_mutex_lock(&lru_crawler_lock);
1344b6bf6df7Sdormando pthread_cond_signal(&lru_crawler_cond);
1345b6bf6df7Sdormando settings.lru_crawler = true;
13460d1f505cSdormando if (settings.verbose > 2)
13470d1f505cSdormando fprintf(stderr, "Starting LRU crawler background thread\n");
13480d1f505cSdormando while (do_run_lru_crawler_thread) {
13496be2b6c0Sdormando pthread_cond_wait(&lru_crawler_cond, &lru_crawler_lock);
13500d1f505cSdormando
13510d1f505cSdormando while (crawler_count) {
13520d1f505cSdormando item *search = NULL;
13530d1f505cSdormando void *hold_lock = NULL;
13540d1f505cSdormando
13559bce42f2Sdormando for (i = POWER_SMALLEST; i < LARGEST_ID; i++) {
1356c58291ecSdormando if (crawlers[i].it_flags != 1) {
1357c58291ecSdormando continue;
1358c58291ecSdormando }
13599bce42f2Sdormando pthread_mutex_lock(&lru_locks[i]);
13602418511aSdormando search = crawler_crawl_q((item *)&crawlers[i]);
1361e31a5912Sdormando if (search == NULL ||
1362e31a5912Sdormando (crawlers[i].remaining && --crawlers[i].remaining < 1)) {
13630d1f505cSdormando if (settings.verbose > 2)
13640d1f505cSdormando fprintf(stderr, "Nothing left to crawl for %d\n", i);
13650d1f505cSdormando crawlers[i].it_flags = 0;
13660d1f505cSdormando crawler_count--;
13670d1f505cSdormando crawler_unlink_q((item *)&crawlers[i]);
13689bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
1369e708513aSdormando pthread_mutex_lock(&lru_crawler_stats_lock);
13704de89c8cSdormando crawlerstats[CLEAR_LRU(i)].end_time = current_time;
13714de89c8cSdormando crawlerstats[CLEAR_LRU(i)].run_complete = true;
1372e708513aSdormando pthread_mutex_unlock(&lru_crawler_stats_lock);
13730d1f505cSdormando continue;
13740d1f505cSdormando }
1375cb9cafa6Sdormando uint32_t hv = hash(ITEM_key(search), search->nkey);
13760d1f505cSdormando /* Attempt to hash item lock the "search" item. If locked, no
13770d1f505cSdormando * other callers can incr the refcount
13780d1f505cSdormando */
13790d1f505cSdormando if ((hold_lock = item_trylock(hv)) == NULL) {
13809bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
13810d1f505cSdormando continue;
13820d1f505cSdormando }
13830d1f505cSdormando /* Now see if the item is refcount locked */
13840d1f505cSdormando if (refcount_incr(&search->refcount) != 2) {
13850d1f505cSdormando refcount_decr(&search->refcount);
13860d1f505cSdormando if (hold_lock)
13870d1f505cSdormando item_trylock_unlock(hold_lock);
13889bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
13890d1f505cSdormando continue;
13900d1f505cSdormando }
13910d1f505cSdormando
1392c58291ecSdormando /* Frees the item or decrements the refcount. */
1393c58291ecSdormando /* Interface for this could improve: do the free/decr here
1394c58291ecSdormando * instead? */
1395e708513aSdormando pthread_mutex_lock(&lru_crawler_stats_lock);
1396c58291ecSdormando item_crawler_evaluate(search, hv, i);
1397e708513aSdormando pthread_mutex_unlock(&lru_crawler_stats_lock);
13980d1f505cSdormando
13990d1f505cSdormando if (hold_lock)
14000d1f505cSdormando item_trylock_unlock(hold_lock);
14019bce42f2Sdormando pthread_mutex_unlock(&lru_locks[i]);
1402c58291ecSdormando
140387ff9dc0Sdormando if (crawls_persleep <= 0 && settings.lru_crawler_sleep) {
140431d533f8Sdormando usleep(settings.lru_crawler_sleep);
140587ff9dc0Sdormando crawls_persleep = settings.crawls_persleep;
140687ff9dc0Sdormando }
14070d1f505cSdormando }
1408c58291ecSdormando }
14090d1f505cSdormando if (settings.verbose > 2)
14100d1f505cSdormando fprintf(stderr, "LRU crawler thread sleeping\n");
14116be2b6c0Sdormando STATS_LOCK();
1412cb01d504Sdormando stats_state.lru_crawler_running = false;
14136be2b6c0Sdormando STATS_UNLOCK();
14140d1f505cSdormando }
14159d635fa7Sdormando pthread_mutex_unlock(&lru_crawler_lock);
14160d1f505cSdormando if (settings.verbose > 2)
14170d1f505cSdormando fprintf(stderr, "LRU crawler thread stopping\n");
14180d1f505cSdormando
14190d1f505cSdormando return NULL;
14200d1f505cSdormando }
14210d1f505cSdormando
14220d1f505cSdormando static pthread_t item_crawler_tid;
14230d1f505cSdormando
stop_item_crawler_thread(void)14240d1f505cSdormando int stop_item_crawler_thread(void) {
14250d1f505cSdormando int ret;
14266be2b6c0Sdormando pthread_mutex_lock(&lru_crawler_lock);
14270d1f505cSdormando do_run_lru_crawler_thread = 0;
14286be2b6c0Sdormando pthread_cond_signal(&lru_crawler_cond);
14296be2b6c0Sdormando pthread_mutex_unlock(&lru_crawler_lock);
14300d1f505cSdormando if ((ret = pthread_join(item_crawler_tid, NULL)) != 0) {
14310d1f505cSdormando fprintf(stderr, "Failed to stop LRU crawler thread: %s\n", strerror(ret));
14320d1f505cSdormando return -1;
14330d1f505cSdormando }
1434bb63a3edSdormando settings.lru_crawler = false;
14350d1f505cSdormando return 0;
14360d1f505cSdormando }
14370d1f505cSdormando
1438b6bf6df7Sdormando /* Lock dance to "block" until thread is waiting on its condition:
1439b6bf6df7Sdormando * caller locks mtx. caller spawns thread.
1440b6bf6df7Sdormando * thread blocks on mutex.
1441b6bf6df7Sdormando * caller waits on condition, releases lock.
1442b6bf6df7Sdormando * thread gets lock, sends signal.
1443b6bf6df7Sdormando * caller can't wait, as thread has lock.
1444b6bf6df7Sdormando * thread waits on condition, releases lock
1445b6bf6df7Sdormando * caller wakes on condition, gets lock.
1446b6bf6df7Sdormando * caller immediately releases lock.
1447b6bf6df7Sdormando * thread is now safely waiting on condition before the caller returns.
1448b6bf6df7Sdormando */
start_item_crawler_thread(void)14490d1f505cSdormando int start_item_crawler_thread(void) {
14500d1f505cSdormando int ret;
14516be2b6c0Sdormando
1452bb63a3edSdormando if (settings.lru_crawler)
1453bb63a3edSdormando return -1;
14546be2b6c0Sdormando pthread_mutex_lock(&lru_crawler_lock);
14550d1f505cSdormando do_run_lru_crawler_thread = 1;
14560d1f505cSdormando if ((ret = pthread_create(&item_crawler_tid, NULL,
14570d1f505cSdormando item_crawler_thread, NULL)) != 0) {
14580d1f505cSdormando fprintf(stderr, "Can't create LRU crawler thread: %s\n",
14590d1f505cSdormando strerror(ret));
14606be2b6c0Sdormando pthread_mutex_unlock(&lru_crawler_lock);
14610d1f505cSdormando return -1;
14620d1f505cSdormando }
1463b6bf6df7Sdormando /* Avoid returning until the crawler has actually started */
1464b6bf6df7Sdormando pthread_cond_wait(&lru_crawler_cond, &lru_crawler_lock);
14656be2b6c0Sdormando pthread_mutex_unlock(&lru_crawler_lock);
14666be2b6c0Sdormando
14676be2b6c0Sdormando return 0;
14686be2b6c0Sdormando }
14696be2b6c0Sdormando
147087ff9dc0Sdormando /* 'remaining' is passed in so the LRU maintainer thread can scrub the whole
147187ff9dc0Sdormando * LRU every time.
147287ff9dc0Sdormando */
do_lru_crawler_start(uint32_t id,uint32_t remaining)147387ff9dc0Sdormando static int do_lru_crawler_start(uint32_t id, uint32_t remaining) {
147487ff9dc0Sdormando int i;
147587ff9dc0Sdormando uint32_t sid;
147687ff9dc0Sdormando uint32_t tocrawl[3];
147787ff9dc0Sdormando int starts = 0;
147887ff9dc0Sdormando tocrawl[0] = id | HOT_LRU;
147987ff9dc0Sdormando tocrawl[1] = id | WARM_LRU;
148087ff9dc0Sdormando tocrawl[2] = id | COLD_LRU;
148187ff9dc0Sdormando
148287ff9dc0Sdormando for (i = 0; i < 3; i++) {
148387ff9dc0Sdormando sid = tocrawl[i];
148487ff9dc0Sdormando pthread_mutex_lock(&lru_locks[sid]);
148587ff9dc0Sdormando if (tails[sid] != NULL) {
148687ff9dc0Sdormando if (settings.verbose > 2)
1487f85592dcSRyan McCullagh fprintf(stderr, "Kicking LRU crawler off for LRU %u\n", sid);
148887ff9dc0Sdormando crawlers[sid].nbytes = 0;
148987ff9dc0Sdormando crawlers[sid].nkey = 0;
149087ff9dc0Sdormando crawlers[sid].it_flags = 1; /* For a crawler, this means enabled. */
149187ff9dc0Sdormando crawlers[sid].next = 0;
149287ff9dc0Sdormando crawlers[sid].prev = 0;
149387ff9dc0Sdormando crawlers[sid].time = 0;
149487ff9dc0Sdormando crawlers[sid].remaining = remaining;
149587ff9dc0Sdormando crawlers[sid].slabs_clsid = sid;
149687ff9dc0Sdormando crawler_link_q((item *)&crawlers[sid]);
149787ff9dc0Sdormando crawler_count++;
149887ff9dc0Sdormando starts++;
149987ff9dc0Sdormando }
150087ff9dc0Sdormando pthread_mutex_unlock(&lru_locks[sid]);
150187ff9dc0Sdormando }
150287ff9dc0Sdormando if (starts) {
1503c10feb9eSdormando STATS_LOCK();
1504cb01d504Sdormando stats_state.lru_crawler_running = true;
1505c10feb9eSdormando stats.lru_crawler_starts++;
1506c10feb9eSdormando STATS_UNLOCK();
150787ff9dc0Sdormando pthread_mutex_lock(&lru_crawler_stats_lock);
150887ff9dc0Sdormando memset(&crawlerstats[id], 0, sizeof(crawlerstats_t));
150987ff9dc0Sdormando crawlerstats[id].start_time = current_time;
151087ff9dc0Sdormando pthread_mutex_unlock(&lru_crawler_stats_lock);
151187ff9dc0Sdormando }
151287ff9dc0Sdormando return starts;
151387ff9dc0Sdormando }
151487ff9dc0Sdormando
lru_crawler_start(uint32_t id,uint32_t remaining)151587ff9dc0Sdormando static int lru_crawler_start(uint32_t id, uint32_t remaining) {
151687ff9dc0Sdormando int starts;
151787ff9dc0Sdormando if (pthread_mutex_trylock(&lru_crawler_lock) != 0) {
151887ff9dc0Sdormando return 0;
151987ff9dc0Sdormando }
152087ff9dc0Sdormando starts = do_lru_crawler_start(id, remaining);
1521c10feb9eSdormando if (starts) {
1522c10feb9eSdormando pthread_cond_signal(&lru_crawler_cond);
1523c10feb9eSdormando }
152487ff9dc0Sdormando pthread_mutex_unlock(&lru_crawler_lock);
152587ff9dc0Sdormando return starts;
152687ff9dc0Sdormando }
152787ff9dc0Sdormando
1528e708513aSdormando /* FIXME: Split this into two functions: one to kick a crawler for a sid, and one to
1529e708513aSdormando * parse the string. LRU maintainer code is generating a string to set up a
1530e708513aSdormando * sid.
1531e708513aSdormando * Also only clear the crawlerstats once per sid.
1532e708513aSdormando */
lru_crawler_crawl(char * slabs)1533e8711e1bSdormando enum crawler_result_type lru_crawler_crawl(char *slabs) {
1534e8711e1bSdormando char *b = NULL;
1535e8711e1bSdormando uint32_t sid = 0;
1536e708513aSdormando int starts = 0;
153787ff9dc0Sdormando uint8_t tocrawl[MAX_NUMBER_OF_SLAB_CLASSES];
15386be2b6c0Sdormando if (pthread_mutex_trylock(&lru_crawler_lock) != 0) {
15396be2b6c0Sdormando return CRAWLER_RUNNING;
15406be2b6c0Sdormando }
1541e8711e1bSdormando
154287ff9dc0Sdormando /* FIXME: I added this while debugging. Don't think it's needed? */
154387ff9dc0Sdormando memset(tocrawl, 0, sizeof(uint8_t) * MAX_NUMBER_OF_SLAB_CLASSES);
1544f89e8613Sdormando if (strcmp(slabs, "all") == 0) {
154587ff9dc0Sdormando for (sid = 0; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) {
1546f89e8613Sdormando tocrawl[sid] = 1;
1547f89e8613Sdormando }
1548f89e8613Sdormando } else {
1549e8711e1bSdormando for (char *p = strtok_r(slabs, ",", &b);
1550e8711e1bSdormando p != NULL;
1551e8711e1bSdormando p = strtok_r(NULL, ",", &b)) {
1552e8711e1bSdormando
1553e8711e1bSdormando if (!safe_strtoul(p, &sid) || sid < POWER_SMALLEST
1554a2fc8e93Sdormando || sid >= MAX_NUMBER_OF_SLAB_CLASSES-1) {
1555e8711e1bSdormando pthread_mutex_unlock(&lru_crawler_lock);
15566be2b6c0Sdormando return CRAWLER_BADCLASS;
15576be2b6c0Sdormando }
155887ff9dc0Sdormando tocrawl[sid] = 1;
1559e8711e1bSdormando }
1560f89e8613Sdormando }
1561e8711e1bSdormando
156287ff9dc0Sdormando for (sid = POWER_SMALLEST; sid < MAX_NUMBER_OF_SLAB_CLASSES; sid++) {
156387ff9dc0Sdormando if (tocrawl[sid])
156487ff9dc0Sdormando starts += do_lru_crawler_start(sid, settings.lru_crawler_tocrawl);
156569d1c699Sdormando }
1566e708513aSdormando if (starts) {
15676be2b6c0Sdormando pthread_cond_signal(&lru_crawler_cond);
15686be2b6c0Sdormando pthread_mutex_unlock(&lru_crawler_lock);
15696be2b6c0Sdormando return CRAWLER_OK;
1570e708513aSdormando } else {
1571e708513aSdormando pthread_mutex_unlock(&lru_crawler_lock);
1572e708513aSdormando return CRAWLER_NOTSTARTED;
1573e708513aSdormando }
15746be2b6c0Sdormando }
15756be2b6c0Sdormando
15766af7aa0bSdormando /* If we hold this lock, crawler can't wake up or move */
lru_crawler_pause(void)15776af7aa0bSdormando void lru_crawler_pause(void) {
15786af7aa0bSdormando pthread_mutex_lock(&lru_crawler_lock);
15796af7aa0bSdormando }
15806af7aa0bSdormando
lru_crawler_resume(void)15816af7aa0bSdormando void lru_crawler_resume(void) {
15826af7aa0bSdormando pthread_mutex_unlock(&lru_crawler_lock);
15836af7aa0bSdormando }
15846af7aa0bSdormando
init_lru_crawler(void)15856be2b6c0Sdormando int init_lru_crawler(void) {
15866be2b6c0Sdormando if (lru_crawler_initialized == 0) {
1587e708513aSdormando memset(&crawlerstats, 0, sizeof(crawlerstats_t) * MAX_NUMBER_OF_SLAB_CLASSES);
15886be2b6c0Sdormando if (pthread_cond_init(&lru_crawler_cond, NULL) != 0) {
15896be2b6c0Sdormando fprintf(stderr, "Can't initialize lru crawler condition\n");
15906be2b6c0Sdormando return -1;
15916be2b6c0Sdormando }
15926be2b6c0Sdormando pthread_mutex_init(&lru_crawler_lock, NULL);
15936be2b6c0Sdormando lru_crawler_initialized = 1;
15946be2b6c0Sdormando }
15950d1f505cSdormando return 0;
15960d1f505cSdormando }
1597