xref: /memcached-1.4.29/memcached.h (revision ee461d11)
1f6d334e0SBrad Fitzpatrick /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2c6975ef4SPaul Lindner 
3cf1b7559SDustin Sallings /** \file
4cf1b7559SDustin Sallings  * The main memcached header holding commonly used data
5cf1b7559SDustin Sallings  * structures and function prototypes.
6cf1b7559SDustin Sallings  */
7cf1b7559SDustin Sallings 
870960eb3SPaul Lindner #ifdef HAVE_CONFIG_H
956b8339eSSteven Grimm #include "config.h"
1070960eb3SPaul Lindner #endif
1170960eb3SPaul Lindner 
1256b8339eSSteven Grimm #include <sys/types.h>
13b65aa2abSPaul Lindner #include <sys/socket.h>
1456b8339eSSteven Grimm #include <sys/time.h>
1556b8339eSSteven Grimm #include <netinet/in.h>
1656b8339eSSteven Grimm #include <event.h>
17a61a6900SBrian Aker #include <netdb.h>
181fdfb7e9STrond Norbye #include <pthread.h>
19f1307c4dSDustin Sallings #include <unistd.h>
20916fff36Sdormando #include <assert.h>
2156b8339eSSteven Grimm 
22a85a6e15STrond Norbye #include "protocol_binary.h"
234c86fa59STrond Norbye #include "cache.h"
24916fff36Sdormando #include "logger.h"
25a85a6e15STrond Norbye 
26f1307c4dSDustin Sallings #include "sasl_defs.h"
27f1307c4dSDustin Sallings 
28cf1b7559SDustin Sallings /** Maximum length of a key. */
29ecdb0114SDustin Sallings #define KEY_MAX_LENGTH 250
30ecdb0114SDustin Sallings 
317b7bc241SDustin Sallings /** Size of an incr buf. */
327b7bc241SDustin Sallings #define INCR_MAX_STORAGE_LEN 24
337b7bc241SDustin Sallings 
3460d70942SAnatoly Vorobey #define DATA_BUFFER_SIZE 2048
35c9607c6dSBrad Fitzpatrick #define UDP_READ_BUFFER_SIZE 65536
36c9607c6dSBrad Fitzpatrick #define UDP_MAX_PAYLOAD_SIZE 1400
37c9607c6dSBrad Fitzpatrick #define UDP_HEADER_SIZE 8
38c9607c6dSBrad Fitzpatrick #define MAX_SENDBUF_SIZE (256 * 1024 * 1024)
39420aa2d9SBrad Fitzpatrick /* I'm told the max length of a 64-bit num converted to string is 20 bytes.
40b17a0c8bSdormando  * Plus a few for spaces, \r\n, \0 */
41b17a0c8bSdormando #define SUFFIX_SIZE 24
4286969ea4SBrad Fitzpatrick 
43516e7dc2SPaul Lindner /** Initial size of list of items being returned by "get". */
44c9607c6dSBrad Fitzpatrick #define ITEM_LIST_INITIAL 200
4586969ea4SBrad Fitzpatrick 
46e61c0a86Sdormando /** Initial size of list of CAS suffixes appended to "gets" lines. */
47e61c0a86Sdormando #define SUFFIX_LIST_INITIAL 20
48e61c0a86Sdormando 
49516e7dc2SPaul Lindner /** Initial size of the sendmsg() scatter/gather array. */
50c9607c6dSBrad Fitzpatrick #define IOV_LIST_INITIAL 400
5186969ea4SBrad Fitzpatrick 
52516e7dc2SPaul Lindner /** Initial number of sendmsg() argument structures to allocate. */
53c9607c6dSBrad Fitzpatrick #define MSG_LIST_INITIAL 10
5486969ea4SBrad Fitzpatrick 
55516e7dc2SPaul Lindner /** High water marks for buffer shrinking */
56c9607c6dSBrad Fitzpatrick #define READ_BUFFER_HIGHWAT 8192
57c9607c6dSBrad Fitzpatrick #define ITEM_LIST_HIGHWAT 400
58c9607c6dSBrad Fitzpatrick #define IOV_LIST_HIGHWAT 600
59c9607c6dSBrad Fitzpatrick #define MSG_LIST_HIGHWAT 100
6086969ea4SBrad Fitzpatrick 
616aafe58eSDustin Sallings /* Binary protocol stuff */
627cfd3438SDustin Sallings #define MIN_BIN_PKT_LENGTH 16
636aafe58eSDustin Sallings #define BIN_PKT_HDR_WORDS (MIN_BIN_PKT_LENGTH/sizeof(uint32_t))
646aafe58eSDustin Sallings 
651db1de38Sdormando /* Initial power multiplier for the hash table */
661db1de38Sdormando #define HASHPOWER_DEFAULT 16
671db1de38Sdormando 
68d7324b0bSdormando /*
69d7324b0bSdormando  * We only reposition items in the LRU queue if they haven't been repositioned
70d7324b0bSdormando  * in this many seconds. That saves us from churning on frequently-accessed
71d7324b0bSdormando  * items.
72d7324b0bSdormando  */
73d7324b0bSdormando #define ITEM_UPDATE_INTERVAL 60
74d7324b0bSdormando 
75d9b97d80SPaul Lindner /* unistd.h is here */
76d9b97d80SPaul Lindner #if HAVE_UNISTD_H
77d9b97d80SPaul Lindner # include <unistd.h>
78d9b97d80SPaul Lindner #endif
79d9b97d80SPaul Lindner 
80067102a4SDustin Sallings /* Slab sizing definitions. */
81067102a4SDustin Sallings #define POWER_SMALLEST 1
82369845f0Sdormando #define POWER_LARGEST  256 /* actual cap is 255 */
83d6e96467Sdormando #define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */
84067102a4SDustin Sallings #define CHUNK_ALIGN_BYTES 8
859bce42f2Sdormando /* slab class max is a 6-bit number, -1. */
86a2fc8e93Sdormando #define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1)
87067102a4SDustin Sallings 
88cf1b7559SDustin Sallings /** How long an object can reasonably be assumed to be locked before
8959bd02ceSdormando     harvesting it on a low memory condition. Default: disabled. */
9059bd02ceSdormando #define TAIL_REPAIR_TIME_DEFAULT 0
91b8d997e5SDustin Sallings 
92fcc2c98fSDustin Sallings /* warning: don't use these macros with a function, as it evals its arg twice */
93df158875SDan McGee #define ITEM_get_cas(i) (((i)->it_flags & ITEM_CAS) ? \
94df158875SDan McGee         (i)->data->cas : (uint64_t)0)
95fcc2c98fSDustin Sallings 
96df158875SDan McGee #define ITEM_set_cas(i,v) { \
97df158875SDan McGee     if ((i)->it_flags & ITEM_CAS) { \
98df158875SDan McGee         (i)->data->cas = v; \
99df158875SDan McGee     } \
100df158875SDan McGee }
101df158875SDan McGee 
102df158875SDan McGee #define ITEM_key(item) (((char*)&((item)->data)) \
103fcc2c98fSDustin Sallings          + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
104fcc2c98fSDustin Sallings 
105df158875SDan McGee #define ITEM_suffix(item) ((char*) &((item)->data) + (item)->nkey + 1 \
106fcc2c98fSDustin Sallings          + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
107fcc2c98fSDustin Sallings 
108df158875SDan McGee #define ITEM_data(item) ((char*) &((item)->data) + (item)->nkey + 1 \
109fcc2c98fSDustin Sallings          + (item)->nsuffix \
110fcc2c98fSDustin Sallings          + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
111fcc2c98fSDustin Sallings 
112fcc2c98fSDustin Sallings #define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 \
113fcc2c98fSDustin Sallings          + (item)->nsuffix + (item)->nbytes \
114fcc2c98fSDustin Sallings          + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0))
115fcc2c98fSDustin Sallings 
1169bce42f2Sdormando #define ITEM_clsid(item) ((item)->slabs_clsid & ~(3<<6))
1179bce42f2Sdormando 
11888a68689SDustin Sallings #define STAT_KEY_LEN 128
11988a68689SDustin Sallings #define STAT_VAL_LEN 128
12088a68689SDustin Sallings 
121fcc2c98fSDustin Sallings /** Append a simple stat with a stat name, value format and value */
122fcc2c98fSDustin Sallings #define APPEND_STAT(name, fmt, val) \
123fcc2c98fSDustin Sallings     append_stat(name, add_stats, c, fmt, val);
124fcc2c98fSDustin Sallings 
125fcc2c98fSDustin Sallings /** Append an indexed stat with a stat name (with format), value format
126fcc2c98fSDustin Sallings     and value */
127fcc2c98fSDustin Sallings #define APPEND_NUM_FMT_STAT(name_fmt, num, name, fmt, val)          \
12888a68689SDustin Sallings     klen = snprintf(key_str, STAT_KEY_LEN, name_fmt, num, name);    \
12988a68689SDustin Sallings     vlen = snprintf(val_str, STAT_VAL_LEN, fmt, val);               \
130fcc2c98fSDustin Sallings     add_stats(key_str, klen, val_str, vlen, c);
131fcc2c98fSDustin Sallings 
132fcc2c98fSDustin Sallings /** Common APPEND_NUM_FMT_STAT format. */
133fcc2c98fSDustin Sallings #define APPEND_NUM_STAT(num, name, fmt, val) \
134fcc2c98fSDustin Sallings     APPEND_NUM_FMT_STAT("%d:%s", num, name, fmt, val)
135fcc2c98fSDustin Sallings 
136fcc2c98fSDustin Sallings /**
137fcc2c98fSDustin Sallings  * Callback for any function producing stats.
138fcc2c98fSDustin Sallings  *
139fcc2c98fSDustin Sallings  * @param key the stat's key
140fcc2c98fSDustin Sallings  * @param klen length of the key
141fcc2c98fSDustin Sallings  * @param val the stat's value in an ascii form (e.g. text form of a number)
142fcc2c98fSDustin Sallings  * @param vlen length of the value
143fcc2c98fSDustin Sallings  * @parm cookie magic callback cookie
144fcc2c98fSDustin Sallings  */
145fcc2c98fSDustin Sallings typedef void (*ADD_STAT)(const char *key, const uint16_t klen,
146fcc2c98fSDustin Sallings                          const char *val, const uint32_t vlen,
147fcc2c98fSDustin Sallings                          const void *cookie);
148fcc2c98fSDustin Sallings 
149fcc2c98fSDustin Sallings /*
150fcc2c98fSDustin Sallings  * NOTE: If you modify this table you _MUST_ update the function state_text
151fcc2c98fSDustin Sallings  */
152fcc2c98fSDustin Sallings /**
153fcc2c98fSDustin Sallings  * Possible states of a connection.
154fcc2c98fSDustin Sallings  */
155fcc2c98fSDustin Sallings enum conn_states {
156fcc2c98fSDustin Sallings     conn_listening,  /**< the socket which listens for connections */
157fcc2c98fSDustin Sallings     conn_new_cmd,    /**< Prepare connection for next command */
158fcc2c98fSDustin Sallings     conn_waiting,    /**< waiting for a readable socket */
159fcc2c98fSDustin Sallings     conn_read,       /**< reading in a command line */
160fcc2c98fSDustin Sallings     conn_parse_cmd,  /**< try to parse a command from the input buffer */
161fcc2c98fSDustin Sallings     conn_write,      /**< writing out a simple response */
162fcc2c98fSDustin Sallings     conn_nread,      /**< reading in a fixed number of bytes */
163fcc2c98fSDustin Sallings     conn_swallow,    /**< swallowing unnecessary bytes w/o storing */
164fcc2c98fSDustin Sallings     conn_closing,    /**< closing this connection */
165fcc2c98fSDustin Sallings     conn_mwrite,     /**< writing out many items sequentially */
16670c1b5f6SSteven Grimm     conn_closed,     /**< connection is closed */
167916fff36Sdormando     conn_watch,      /**< held by the logger thread as a watcher */
168fcc2c98fSDustin Sallings     conn_max_state   /**< Max state value (used for assertion) */
169fcc2c98fSDustin Sallings };
170fcc2c98fSDustin Sallings 
171fcc2c98fSDustin Sallings enum bin_substates {
172fcc2c98fSDustin Sallings     bin_no_state,
173fcc2c98fSDustin Sallings     bin_reading_set_header,
174fcc2c98fSDustin Sallings     bin_reading_cas_header,
175fcc2c98fSDustin Sallings     bin_read_set_value,
176fcc2c98fSDustin Sallings     bin_reading_get_key,
177fcc2c98fSDustin Sallings     bin_reading_stat,
178fcc2c98fSDustin Sallings     bin_reading_del_header,
179fcc2c98fSDustin Sallings     bin_reading_incr_header,
180f1307c4dSDustin Sallings     bin_read_flush_exptime,
181f1307c4dSDustin Sallings     bin_reading_sasl_auth,
182d87f568aSdormando     bin_reading_sasl_auth_data,
183d87f568aSdormando     bin_reading_touch_key,
184fcc2c98fSDustin Sallings };
185fcc2c98fSDustin Sallings 
186fcc2c98fSDustin Sallings enum protocol {
187fcc2c98fSDustin Sallings     ascii_prot = 3, /* arbitrary value. */
188fcc2c98fSDustin Sallings     binary_prot,
189fcc2c98fSDustin Sallings     negotiating_prot /* Discovering the protocol */
190fcc2c98fSDustin Sallings };
191fcc2c98fSDustin Sallings 
192fcc2c98fSDustin Sallings enum network_transport {
193fcc2c98fSDustin Sallings     local_transport, /* Unix sockets*/
194fcc2c98fSDustin Sallings     tcp_transport,
195fcc2c98fSDustin Sallings     udp_transport
196fcc2c98fSDustin Sallings };
197fcc2c98fSDustin Sallings 
1986af7aa0bSdormando enum pause_thread_types {
1996af7aa0bSdormando     PAUSE_WORKER_THREADS = 0,
2006af7aa0bSdormando     PAUSE_ALL_THREADS,
2016af7aa0bSdormando     RESUME_ALL_THREADS,
2026af7aa0bSdormando     RESUME_WORKER_THREADS
2031c94e12cSdormando };
2041c94e12cSdormando 
20583ba6bd9SJay Grizzard #define IS_TCP(x) (x == tcp_transport)
206fcc2c98fSDustin Sallings #define IS_UDP(x) (x == udp_transport)
207fcc2c98fSDustin Sallings 
208fcc2c98fSDustin Sallings #define NREAD_ADD 1
209fcc2c98fSDustin Sallings #define NREAD_SET 2
210fcc2c98fSDustin Sallings #define NREAD_REPLACE 3
211fcc2c98fSDustin Sallings #define NREAD_APPEND 4
212fcc2c98fSDustin Sallings #define NREAD_PREPEND 5
213fcc2c98fSDustin Sallings #define NREAD_CAS 6
214fcc2c98fSDustin Sallings 
215fcc2c98fSDustin Sallings enum store_item_type {
216c7fbccebSdormando     NOT_STORED=0, STORED, EXISTS, NOT_FOUND, TOO_LARGE, NO_MEMORY
217fcc2c98fSDustin Sallings };
218fcc2c98fSDustin Sallings 
219d044acb2SDustin Sallings enum delta_result_type {
220ea2d42a5Sdormando     OK, NON_NUMERIC, EOM, DELTA_ITEM_NOT_FOUND, DELTA_ITEM_CAS_MISMATCH
221d044acb2SDustin Sallings };
222fcc2c98fSDustin Sallings 
223516e7dc2SPaul Lindner /** Time relative to server start. Smaller than time_t on 64-bit systems. */
224c9607c6dSBrad Fitzpatrick typedef unsigned int rel_time_t;
22586969ea4SBrad Fitzpatrick 
22668c64594Sdormando /** Use X macros to avoid iterating over the stats fields during reset and
22768c64594Sdormando  * aggregation. No longer have to add new stats in 3+ places.
22868c64594Sdormando  */
22968c64594Sdormando 
23068c64594Sdormando #define SLAB_STATS_FIELDS \
23168c64594Sdormando     X(set_cmds) \
23268c64594Sdormando     X(get_hits) \
23368c64594Sdormando     X(touch_hits) \
23468c64594Sdormando     X(delete_hits) \
23568c64594Sdormando     X(cas_hits) \
23668c64594Sdormando     X(cas_badval) \
23768c64594Sdormando     X(incr_hits) \
23868c64594Sdormando     X(decr_hits)
23968c64594Sdormando 
240cf1b7559SDustin Sallings /** Stats stored per slab (and per thread). */
24125b5189cSDustin Sallings struct slab_stats {
24268c64594Sdormando #define X(name) uint64_t    name;
24368c64594Sdormando     SLAB_STATS_FIELDS
24468c64594Sdormando #undef X
24525b5189cSDustin Sallings };
24625b5189cSDustin Sallings 
24768c64594Sdormando #define THREAD_STATS_FIELDS \
24868c64594Sdormando     X(get_cmds) \
24968c64594Sdormando     X(get_misses) \
25068c64594Sdormando     X(get_expired) \
25168c64594Sdormando     X(get_flushed) \
25268c64594Sdormando     X(touch_cmds) \
25368c64594Sdormando     X(touch_misses) \
25468c64594Sdormando     X(delete_misses) \
25568c64594Sdormando     X(incr_misses) \
25668c64594Sdormando     X(decr_misses) \
25768c64594Sdormando     X(cas_misses) \
25868c64594Sdormando     X(bytes_read) \
25968c64594Sdormando     X(bytes_written) \
26068c64594Sdormando     X(flush_cmds) \
26168c64594Sdormando     X(conn_yields) /* # of yields for connections (-R option)*/ \
26268c64594Sdormando     X(auth_cmds) \
26368c64594Sdormando     X(auth_errors) \
26468c64594Sdormando     X(idle_kicks) /* idle connections killed */
26568c64594Sdormando 
266cf1b7559SDustin Sallings /**
267cf1b7559SDustin Sallings  * Stats stored per-thread.
268cf1b7559SDustin Sallings  */
2691fdfb7e9STrond Norbye struct thread_stats {
2701fdfb7e9STrond Norbye     pthread_mutex_t   mutex;
27168c64594Sdormando #define X(name) uint64_t    name;
27268c64594Sdormando     THREAD_STATS_FIELDS
27368c64594Sdormando #undef X
27425b5189cSDustin Sallings     struct slab_stats slab_stats[MAX_NUMBER_OF_SLAB_CLASSES];
2751fdfb7e9STrond Norbye };
2761fdfb7e9STrond Norbye 
277cf1b7559SDustin Sallings /**
278cb01d504Sdormando  * Global stats. Only resettable stats should go into this structure.
279cf1b7559SDustin Sallings  */
28060d70942SAnatoly Vorobey struct stats {
2819517c656Sdormando     uint64_t      total_items;
2829517c656Sdormando     uint64_t      total_conns;
283d1f9d992Sdormando     uint64_t      rejected_conns;
284de021a9cSTrond Norbye     uint64_t      malloc_fails;
2853d540bdbSdormando     uint64_t      listen_disabled_num;
28610698baeSdormando     uint64_t      slabs_moved;       /* times slabs were moved around */
287004e2211Sdormando     uint64_t      slab_reassign_rescues; /* items rescued during slab move */
2888fa54f7eSdormando     uint64_t      slab_reassign_evictions_nomem; /* valid items lost during slab move */
289b1debc4cSdormando     uint64_t      slab_reassign_inline_reclaim; /* valid items lost during slab move */
290*ee461d11Sdormando     uint64_t      slab_reassign_chunk_rescues; /* chunked-item chunks recovered */
291004e2211Sdormando     uint64_t      slab_reassign_busy_items; /* valid temporarily unmovable */
292c10feb9eSdormando     uint64_t      lru_crawler_starts; /* Number of item crawlers kicked off */
293f7bf26cbSdormando     uint64_t      lru_maintainer_juggles; /* number of LRU bg pokes */
294a1f269eeSIan Miell     uint64_t      time_in_listen_disabled_us;  /* elapsed time in microseconds while server unable to process new connections */
2950503b5e2Sdormando     uint64_t      log_worker_dropped; /* logs dropped by worker threads */
2960503b5e2Sdormando     uint64_t      log_worker_written; /* logs written by worker threads */
2970503b5e2Sdormando     uint64_t      log_watcher_skipped; /* logs watchers missed */
2980503b5e2Sdormando     uint64_t      log_watcher_sent; /* logs sent to watcher buffers */
299a1f269eeSIan Miell     struct timeval maxconns_entered;  /* last time maxconns entered */
30060d70942SAnatoly Vorobey };
30186969ea4SBrad Fitzpatrick 
302cb01d504Sdormando /**
303cb01d504Sdormando  * Global "state" stats. Reflects state that shouldn't be wiped ever.
304cb01d504Sdormando  * Ordered for some cache line locality for commonly updated counters.
305cb01d504Sdormando  */
306cb01d504Sdormando struct stats_state {
307cb01d504Sdormando     uint64_t      curr_items;
308cb01d504Sdormando     uint64_t      curr_bytes;
309cb01d504Sdormando     uint64_t      curr_conns;
310cb01d504Sdormando     uint64_t      hash_bytes;       /* size used for hash tables */
311cb01d504Sdormando     unsigned int  conn_structs;
312cb01d504Sdormando     unsigned int  reserved_fds;
313cb01d504Sdormando     unsigned int  hash_power_level; /* Better hope it's not over 9000 */
314cb01d504Sdormando     bool          hash_is_expanding; /* If the hash table is being expanded */
315cb01d504Sdormando     bool          accepting_conns;  /* whether we are currently accepting */
316cb01d504Sdormando     bool          slab_reassign_running; /* slab reassign in progress */
317cb01d504Sdormando     bool          lru_crawler_running; /* crawl in progress */
318cb01d504Sdormando };
319cb01d504Sdormando 
320eca55c9aSPaul Lindner #define MAX_VERBOSITY_LEVEL 2
321eca55c9aSPaul Lindner 
32252778791SDustin Sallings /* When adding a setting, be sure to update process_stat_settings */
323cf1b7559SDustin Sallings /**
324cf1b7559SDustin Sallings  * Globally accessible settings as derived from the commandline.
325cf1b7559SDustin Sallings  */
32660d70942SAnatoly Vorobey struct settings {
327c9607c6dSBrad Fitzpatrick     size_t maxbytes;
32860d70942SAnatoly Vorobey     int maxconns;
32960d70942SAnatoly Vorobey     int port;
330c9607c6dSBrad Fitzpatrick     int udpport;
331a61a6900SBrian Aker     char *inter;
33260d70942SAnatoly Vorobey     int verbose;
333c9607c6dSBrad Fitzpatrick     rel_time_t oldest_live; /* ignore existing items older than this */
33490593dcaSdormando     uint64_t oldest_cas; /* ignore existing items with CAS values lower than this */
335841811e9SJason Titus     int evict_to_free;
336c9607c6dSBrad Fitzpatrick     char *socketpath;   /* path to unix socket if using local socket */
33740c76cedSDavid Bremner     int access;  /* access mask (a la chmod) for unix domain socket */
338c9607c6dSBrad Fitzpatrick     double factor;          /* chunk size growth factor */
339c9607c6dSBrad Fitzpatrick     int chunk_size;
3402fe44f1cSDmitry Isaykin     int num_threads;        /* number of worker (without dispatcher) libevent threads to run */
341c60ca35bSTrond Norbye     int num_threads_per_udp; /* number of worker threads serving each udp socket */
34256b8339eSSteven Grimm     char prefix_delimiter;  /* character that marks a key prefix (for stats) */
34356b8339eSSteven Grimm     int detail_enabled;     /* nonzero if we're collecting detailed stats */
344ca90710fSdormando     int reqs_per_event;     /* Maximum number of io to process on each
345ca90710fSdormando                                io-event. */
346eda68b70STrond Norbye     bool use_cas;
347a155b044SDustin Sallings     enum protocol binding_protocol;
3487d010a85SChris Goffinet     int backlog;
3490567967aSdormando     int item_size_max;        /* Maximum item size */
3500567967aSdormando     int slab_chunk_size_max;  /* Upper end for chunks within slab pages. */
3510567967aSdormando     int slab_page_size;     /* Slab's page units. */
352f1307c4dSDustin Sallings     bool sasl;              /* SASL on/off */
353d1f9d992Sdormando     bool maxconns_fast;     /* Whether or not to early close connections */
354d425b35bSdormando     bool lru_crawler;        /* Whether or not to enable the autocrawler thread */
355f7bf26cbSdormando     bool lru_maintainer_thread; /* LRU maintainer background thread */
35610698baeSdormando     bool slab_reassign;     /* Whether or not slab reassignment is allowed */
35763bf748aSdormando     int slab_automove;     /* Whether or not to automatically move slabs */
3581db1de38Sdormando     int hashpower_init;     /* Starting hash power level */
359d11dc0eaSBrian Aker     bool shutdown_command; /* allow shutdown command */
360058af0d8SKeyur     int tail_repair_time;   /* LRU tail refcount leak repair time */
361a2f5ca50SDaniel Pañeda     bool flush_enabled;     /* flush_all enabled */
36205ca809cSdormando     char *hash_algorithm;     /* Hash algorithm in use */
36331d533f8Sdormando     int lru_crawler_sleep;  /* Microsecond sleep between items */
364e31a5912Sdormando     uint32_t lru_crawler_tocrawl; /* Number of items to crawl per run */
3658d6bf78aSdormando     int hot_lru_pct; /* percentage of slab space for HOT_LRU */
3668d6bf78aSdormando     int warm_lru_pct; /* percentage of slab space for WARM_LRU */
36787ff9dc0Sdormando     int crawls_persleep; /* Number of LRU crawls to run before sleeping */
3684de89c8cSdormando     bool expirezero_does_not_evict; /* exptime == 0 goes into NOEXP_LRU */
36983ba6bd9SJay Grizzard     int idle_timeout;       /* Number of seconds to let connections idle */
370d704f2c0Sdormando     unsigned int logger_watcher_buf_size; /* size of logger's per-watcher buffer */
371d704f2c0Sdormando     unsigned int logger_buf_size; /* size of per-thread logger buffer */
37260d70942SAnatoly Vorobey };
37386969ea4SBrad Fitzpatrick 
37460d70942SAnatoly Vorobey extern struct stats stats;
375cb01d504Sdormando extern struct stats_state stats_state;
3761fdfb7e9STrond Norbye extern time_t process_started;
37760d70942SAnatoly Vorobey extern struct settings settings;
37886969ea4SBrad Fitzpatrick 
37960d70942SAnatoly Vorobey #define ITEM_LINKED 1
380eda68b70STrond Norbye #define ITEM_CAS 2
38186969ea4SBrad Fitzpatrick 
38254326f42SBrad Fitzpatrick /* temp */
38354326f42SBrad Fitzpatrick #define ITEM_SLABBED 4
38486969ea4SBrad Fitzpatrick 
3859bce42f2Sdormando /* Item was fetched at least once in its lifetime */
386839949a9Sdormando #define ITEM_FETCHED 8
3879bce42f2Sdormando /* Appended on fetch, removed on LRU shuffling */
3889bce42f2Sdormando #define ITEM_ACTIVE 16
3890567967aSdormando /* If an item's storage are chained chunks. */
3900567967aSdormando #define ITEM_CHUNKED 32
3910567967aSdormando #define ITEM_CHUNK 64
392839949a9Sdormando 
393cf1b7559SDustin Sallings /**
394cf1b7559SDustin Sallings  * Structure for storing items within memcached.
395cf1b7559SDustin Sallings  */
39660d70942SAnatoly Vorobey typedef struct _stritem {
39769d1c699Sdormando     /* Protected by LRU locks */
39860d70942SAnatoly Vorobey     struct _stritem *next;
39960d70942SAnatoly Vorobey     struct _stritem *prev;
40069d1c699Sdormando     /* Rest are protected by an item lock */
401f6d334e0SBrad Fitzpatrick     struct _stritem *h_next;    /* hash chain next */
402c9607c6dSBrad Fitzpatrick     rel_time_t      time;       /* least recent access */
403c9607c6dSBrad Fitzpatrick     rel_time_t      exptime;    /* expire time */
40460d70942SAnatoly Vorobey     int             nbytes;     /* size of data */
405c9607c6dSBrad Fitzpatrick     unsigned short  refcount;
40677dde9f9SPaul Lindner     uint8_t         nsuffix;    /* length of flags-and-length string */
40777dde9f9SPaul Lindner     uint8_t         it_flags;   /* ITEM_* above */
40877dde9f9SPaul Lindner     uint8_t         slabs_clsid;/* which slab class we're in */
40977dde9f9SPaul Lindner     uint8_t         nkey;       /* key length, w/terminating null and padding */
410df158875SDan McGee     /* this odd type prevents type-punning issues when we do
411df158875SDan McGee      * the little shuffle to save space when not using CAS. */
412df158875SDan McGee     union {
413df158875SDan McGee         uint64_t cas;
414df158875SDan McGee         char end;
415df158875SDan McGee     } data[];
416eda68b70STrond Norbye     /* if it_flags & ITEM_CAS we have 8 bytes CAS */
417c9607c6dSBrad Fitzpatrick     /* then null-terminated key */
418c9607c6dSBrad Fitzpatrick     /* then " flags length\r\n" (no terminating null) */
419c9607c6dSBrad Fitzpatrick     /* then data with terminating \r\n (no terminating null; it's binary!) */
42060d70942SAnatoly Vorobey } item;
42186969ea4SBrad Fitzpatrick 
4221fdfb7e9STrond Norbye typedef struct {
4230d1f505cSdormando     struct _stritem *next;
4240d1f505cSdormando     struct _stritem *prev;
4250d1f505cSdormando     struct _stritem *h_next;    /* hash chain next */
4260d1f505cSdormando     rel_time_t      time;       /* least recent access */
4270d1f505cSdormando     rel_time_t      exptime;    /* expire time */
4280d1f505cSdormando     int             nbytes;     /* size of data */
4290d1f505cSdormando     unsigned short  refcount;
4300d1f505cSdormando     uint8_t         nsuffix;    /* length of flags-and-length string */
4310d1f505cSdormando     uint8_t         it_flags;   /* ITEM_* above */
4320d1f505cSdormando     uint8_t         slabs_clsid;/* which slab class we're in */
4330d1f505cSdormando     uint8_t         nkey;       /* key length, w/terminating null and padding */
434e31a5912Sdormando     uint32_t        remaining;  /* Max keys to crawl per slab per invocation */
4350d1f505cSdormando } crawler;
4360d1f505cSdormando 
4370567967aSdormando /* Header when an item is actually a chunk of another item. */
4380567967aSdormando typedef struct _strchunk {
4390567967aSdormando     struct _strchunk *next;     /* points within its own chain. */
4400567967aSdormando     struct _strchunk *prev;     /* can potentially point to the head. */
4410567967aSdormando     struct _stritem  *head;     /* always points to the owner chunk */
4420567967aSdormando     int              size;      /* available chunk space in bytes */
4430567967aSdormando     int              used;      /* chunk space used */
4440567967aSdormando     int              nbytes;    /* used. */
4450567967aSdormando     unsigned short   refcount;  /* used? */
4460567967aSdormando     uint8_t          nsuffix;   /* unused */
4470567967aSdormando     uint8_t          it_flags;  /* ITEM_* above. */
448b05653f9Sdormando     uint8_t          slabs_clsid; /* Same as above. */
4490567967aSdormando     char data[];
4500567967aSdormando } item_chunk;
4510567967aSdormando 
4520d1f505cSdormando typedef struct {
4531fdfb7e9STrond Norbye     pthread_t thread_id;        /* unique ID of this thread */
4541fdfb7e9STrond Norbye     struct event_base *base;    /* libevent handle this thread uses */
4551fdfb7e9STrond Norbye     struct event notify_event;  /* listen event for notify pipe */
4561fdfb7e9STrond Norbye     int notify_receive_fd;      /* receiving end of notify pipe */
4571fdfb7e9STrond Norbye     int notify_send_fd;         /* sending end of notify pipe */
4581fdfb7e9STrond Norbye     struct thread_stats stats;  /* Stats generated by this thread */
4591fdfb7e9STrond Norbye     struct conn_queue *new_conn_queue; /* queue of new connections to handle */
4604c86fa59STrond Norbye     cache_t *suffix_cache;      /* suffix cache */
461916fff36Sdormando     logger *l;                  /* logger buffer */
4621fdfb7e9STrond Norbye } LIBEVENT_THREAD;
4631fdfb7e9STrond Norbye 
4642fe44f1cSDmitry Isaykin typedef struct {
4652fe44f1cSDmitry Isaykin     pthread_t thread_id;        /* unique ID of this thread */
4662fe44f1cSDmitry Isaykin     struct event_base *base;    /* libevent handle this thread uses */
4672fe44f1cSDmitry Isaykin } LIBEVENT_DISPATCHER_THREAD;
4682fe44f1cSDmitry Isaykin 
469cf1b7559SDustin Sallings /**
470cf1b7559SDustin Sallings  * The structure representing a connection into memcached.
471cf1b7559SDustin Sallings  */
4729150c85bSBrian Aker typedef struct conn conn;
4739150c85bSBrian Aker struct conn {
47460d70942SAnatoly Vorobey     int    sfd;
475f1307c4dSDustin Sallings     sasl_conn_t *sasl_conn;
47687c1cf0fS伊藤洋也     bool authenticated;
4773ed60cddSDustin Sallings     enum conn_states  state;
478d86881eaSDustin Sallings     enum bin_substates substate;
47970c1b5f6SSteven Grimm     rel_time_t last_cmd_time;
48060d70942SAnatoly Vorobey     struct event event;
48160d70942SAnatoly Vorobey     short  ev_flags;
482516e7dc2SPaul Lindner     short  which;   /** which events were just triggered */
48386969ea4SBrad Fitzpatrick 
484516e7dc2SPaul Lindner     char   *rbuf;   /** buffer to read commands into */
485516e7dc2SPaul Lindner     char   *rcurr;  /** but if we parsed some already, this is where we stopped */
486516e7dc2SPaul Lindner     int    rsize;   /** total allocated size of rbuf */
487516e7dc2SPaul Lindner     int    rbytes;  /** how much data, starting from rcur, do we have unparsed */
48886969ea4SBrad Fitzpatrick 
48960d70942SAnatoly Vorobey     char   *wbuf;
49060d70942SAnatoly Vorobey     char   *wcurr;
49160d70942SAnatoly Vorobey     int    wsize;
49260d70942SAnatoly Vorobey     int    wbytes;
493a564b426SDustin Sallings     /** which state to go into after finishing current write */
494a564b426SDustin Sallings     enum conn_states  write_and_go;
495516e7dc2SPaul Lindner     void   *write_and_free; /** free this memory after finishing writing */
49686969ea4SBrad Fitzpatrick 
497516e7dc2SPaul Lindner     char   *ritem;  /** when we read in an item's value, it goes here */
49860d70942SAnatoly Vorobey     int    rlbytes;
49986969ea4SBrad Fitzpatrick 
50060d70942SAnatoly Vorobey     /* data for the nread state */
50186969ea4SBrad Fitzpatrick 
502516e7dc2SPaul Lindner     /**
50360d70942SAnatoly Vorobey      * item is used to hold an item structure created after reading the command
50460d70942SAnatoly Vorobey      * line of set/add/replace commands, but before we finished reading the actual
5057917af40SBrad Fitzpatrick      * data. The data is read into ITEM_data(item) to avoid extra copying.
50660d70942SAnatoly Vorobey      */
50786969ea4SBrad Fitzpatrick 
50860d70942SAnatoly Vorobey     void   *item;     /* for commands set/add/replace  */
50986969ea4SBrad Fitzpatrick 
51060d70942SAnatoly Vorobey     /* data for the swallow state */
51160d70942SAnatoly Vorobey     int    sbytes;    /* how many bytes to swallow */
51286969ea4SBrad Fitzpatrick 
51360d70942SAnatoly Vorobey     /* data for the mwrite state */
514c9607c6dSBrad Fitzpatrick     struct iovec *iov;
515c9607c6dSBrad Fitzpatrick     int    iovsize;   /* number of elements allocated in iov[] */
516c9607c6dSBrad Fitzpatrick     int    iovused;   /* number of elements used in iov[] */
51786969ea4SBrad Fitzpatrick 
518c9607c6dSBrad Fitzpatrick     struct msghdr *msglist;
519c9607c6dSBrad Fitzpatrick     int    msgsize;   /* number of elements allocated in msglist[] */
520c9607c6dSBrad Fitzpatrick     int    msgused;   /* number of elements used in msglist[] */
521c9607c6dSBrad Fitzpatrick     int    msgcurr;   /* element in msglist[] being transmitted now */
522c9607c6dSBrad Fitzpatrick     int    msgbytes;  /* number of bytes in current msg */
52386969ea4SBrad Fitzpatrick 
52460d70942SAnatoly Vorobey     item   **ilist;   /* list of items to write out */
52560d70942SAnatoly Vorobey     int    isize;
52660d70942SAnatoly Vorobey     item   **icurr;
52760d70942SAnatoly Vorobey     int    ileft;
52886969ea4SBrad Fitzpatrick 
529e61c0a86Sdormando     char   **suffixlist;
530e61c0a86Sdormando     int    suffixsize;
531e61c0a86Sdormando     char   **suffixcurr;
532e61c0a86Sdormando     int    suffixleft;
533e61c0a86Sdormando 
534e1407b25SDustin Sallings     enum protocol protocol;   /* which protocol this connection speaks */
53515ace4b5SEric Lambert     enum network_transport transport; /* what transport is used by this connection */
5362cdde3e4SDustin Sallings 
537c9607c6dSBrad Fitzpatrick     /* data for UDP clients */
538c9607c6dSBrad Fitzpatrick     int    request_id; /* Incoming UDP request ID, if this is a UDP "connection" */
539c6a700a3SAlex Leone     struct sockaddr_in6 request_addr; /* udp: Who sent the most recent request */
540c9607c6dSBrad Fitzpatrick     socklen_t request_addr_size;
541c9607c6dSBrad Fitzpatrick     unsigned char *hdrbuf; /* udp packet headers */
542c9607c6dSBrad Fitzpatrick     int    hdrsize;   /* number of headers' worth of space is allocated */
54386969ea4SBrad Fitzpatrick 
544d9ece780STomash Brechko     bool   noreply;   /* True if the reply should not be sent. */
54517df5c0eSTrond Norbye     /* current stats command */
54617df5c0eSTrond Norbye     struct {
54717df5c0eSTrond Norbye         char *buffer;
54817df5c0eSTrond Norbye         size_t size;
54917df5c0eSTrond Norbye         size_t offset;
55017df5c0eSTrond Norbye     } stats;
55117df5c0eSTrond Norbye 
5526aafe58eSDustin Sallings     /* Binary protocol stuff */
5536aafe58eSDustin Sallings     /* This is where the binary header goes */
554a85a6e15STrond Norbye     protocol_binary_request_header binary_header;
555a85a6e15STrond Norbye     uint64_t cas; /* the cas to return */
5560a77fdfaSDustin Sallings     short cmd; /* current command being processed */
5576aafe58eSDustin Sallings     int opaque;
5586aafe58eSDustin Sallings     int keylen;
5599150c85bSBrian Aker     conn   *next;     /* Used for generating a list of conn structures */
5601fdfb7e9STrond Norbye     LIBEVENT_THREAD *thread; /* Pointer to the thread object serving this connection */
5619150c85bSBrian Aker };
5626aafe58eSDustin Sallings 
56370c1b5f6SSteven Grimm /* array of conn structures, indexed by file descriptor */
56470c1b5f6SSteven Grimm extern conn **conns;
56586969ea4SBrad Fitzpatrick 
566c9607c6dSBrad Fitzpatrick /* current time of day (updated periodically) */
567c9607c6dSBrad Fitzpatrick extern volatile rel_time_t current_time;
56886969ea4SBrad Fitzpatrick 
56910698baeSdormando /* TODO: Move to slabs.h? */
57010698baeSdormando extern volatile int slab_rebalance_signal;
57110698baeSdormando 
57210698baeSdormando struct slab_rebalance {
57310698baeSdormando     void *slab_start;
57410698baeSdormando     void *slab_end;
57510698baeSdormando     void *slab_pos;
57610698baeSdormando     int s_clsid;
57710698baeSdormando     int d_clsid;
5786ee8daefSdormando     uint32_t busy_items;
5796ee8daefSdormando     uint32_t rescues;
5808fa54f7eSdormando     uint32_t evictions_nomem;
581b1debc4cSdormando     uint32_t inline_reclaim;
582*ee461d11Sdormando     uint32_t chunk_rescues;
58310698baeSdormando     uint8_t done;
58410698baeSdormando };
58510698baeSdormando 
58610698baeSdormando extern struct slab_rebalance slab_rebal;
58710698baeSdormando 
58860d70942SAnatoly Vorobey /*
58960d70942SAnatoly Vorobey  * Functions
59060d70942SAnatoly Vorobey  */
591a0e4a756Sdormando void do_accept_new_conns(const bool do_accept);
592cbcd3872Sdormando enum delta_result_type do_add_delta(conn *c, const char *key,
593cbcd3872Sdormando                                     const size_t nkey, const bool incr,
594ea2d42a5Sdormando                                     const int64_t delta, char *buf,
595bab9acd1Sdormando                                     uint64_t *cas, const uint32_t hv);
596bab9acd1Sdormando enum store_item_type do_store_item(item *item, int comm, conn* c, const uint32_t hv);
59715ace4b5SEric Lambert conn *conn_new(const int sfd, const enum conn_states init_state, const int event_flags, const int read_buffer_size, enum network_transport transport, struct event_base *base);
59808c14e4eSTrond Norbye extern int daemonize(int nochdir, int noclose);
59956b8339eSSteven Grimm 
6000aa1a82aSdormando #define mutex_lock(x) pthread_mutex_lock(x)
60145e0e950Sdormando #define mutex_unlock(x) pthread_mutex_unlock(x)
60256b8339eSSteven Grimm 
60356b8339eSSteven Grimm #include "stats.h"
60477dde9f9SPaul Lindner #include "slabs.h"
60577dde9f9SPaul Lindner #include "assoc.h"
60677dde9f9SPaul Lindner #include "items.h"
607e440813dSTrond Norbye #include "trace.h"
6081a070652STrond Norbye #include "hash.h"
609420aa2d9SBrad Fitzpatrick #include "util.h"
61056b8339eSSteven Grimm 
61156b8339eSSteven Grimm /*
61256b8339eSSteven Grimm  * Functions such as the libevent-related calls that need to do cross-thread
61356b8339eSSteven Grimm  * communication in multithreaded mode (rather than actually doing the work
61456b8339eSSteven Grimm  * in the current thread) are called via "dispatch_" frontends, which are
61556b8339eSSteven Grimm  * also #define-d to directly call the underlying code in singlethreaded mode.
61656b8339eSSteven Grimm  */
61756b8339eSSteven Grimm 
618434c7cc5Sdormando void memcached_thread_init(int nthreads, struct event_base *main_base);
61956b8339eSSteven Grimm int  dispatch_event_add(int thread, conn *c);
62015ace4b5SEric Lambert void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, int read_buffer_size, enum network_transport transport);
621916fff36Sdormando void sidethread_conn_close(conn *c);
62256b8339eSSteven Grimm 
62356b8339eSSteven Grimm /* Lock wrappers for cache functions that are called from main loop. */
624cbcd3872Sdormando enum delta_result_type add_delta(conn *c, const char *key,
625cbcd3872Sdormando                                  const size_t nkey, const int incr,
626ea2d42a5Sdormando                                  const int64_t delta, char *buf,
627ea2d42a5Sdormando                                  uint64_t *cas);
628a0e4a756Sdormando void accept_new_conns(const bool do_accept);
629a9dcd9acSToru Maesaka conn *conn_from_freelist(void);
630a9dcd9acSToru Maesaka bool  conn_add_to_freelist(conn *c);
63183ba6bd9SJay Grizzard void  conn_close_idle(conn *c);
632a9dcd9acSToru Maesaka int   is_listen_thread(void);
633a9dcd9acSToru Maesaka item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes);
6346895d23eSsergiocarlos item *item_get(const char *key, const size_t nkey, conn *c);
6356895d23eSsergiocarlos item *item_touch(const char *key, const size_t nkey, uint32_t exptime, conn *c);
636a9dcd9acSToru Maesaka int   item_link(item *it);
637a9dcd9acSToru Maesaka void  item_remove(item *it);
638bab9acd1Sdormando int   item_replace(item *it, item *new_it, const uint32_t hv);
639a9dcd9acSToru Maesaka void  item_unlink(item *it);
640a9dcd9acSToru Maesaka void  item_update(item *it);
6411fdfb7e9STrond Norbye 
6428fe5bf1fSdormando void item_lock(uint32_t hv);
6431c94e12cSdormando void *item_trylock(uint32_t hv);
6441c94e12cSdormando void item_trylock_unlock(void *arg);
6458fe5bf1fSdormando void item_unlock(uint32_t hv);
6466af7aa0bSdormando void pause_threads(enum pause_thread_types type);
6473b961388Sdormando unsigned short refcount_incr(unsigned short *refcount);
6483b961388Sdormando unsigned short refcount_decr(unsigned short *refcount);
649a9dcd9acSToru Maesaka void STATS_LOCK(void);
650a9dcd9acSToru Maesaka void STATS_UNLOCK(void);
6511fdfb7e9STrond Norbye void threadlocal_stats_reset(void);
6521fdfb7e9STrond Norbye void threadlocal_stats_aggregate(struct thread_stats *stats);
65325b5189cSDustin Sallings void slab_stats_aggregate(struct thread_stats *stats, struct slab_stats *out);
6541fdfb7e9STrond Norbye 
655dd713869SDustin Sallings /* Stat processing functions */
65617df5c0eSTrond Norbye void append_stat(const char *name, ADD_STAT add_stats, conn *c,
657dd713869SDustin Sallings                  const char *fmt, ...);
658dd713869SDustin Sallings 
659e5d053c3SDustin Sallings enum store_item_type store_item(item *item, int comm, conn *c);
66056b8339eSSteven Grimm 
66169aa5427STrond Norbye #if HAVE_DROP_PRIVILEGES
6623fa31371STrond Norbye extern void drop_privileges(void);
66369aa5427STrond Norbye #else
66469aa5427STrond Norbye #define drop_privileges()
66569aa5427STrond Norbye #endif
66669aa5427STrond Norbye 
667c12ebb2bSBrian Aker /* If supported, give compiler hints for branch prediction. */
668c12ebb2bSBrian Aker #if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
669c12ebb2bSBrian Aker #define __builtin_expect(x, expected_value) (x)
670c12ebb2bSBrian Aker #endif
67156b8339eSSteven Grimm 
672c12ebb2bSBrian Aker #define likely(x)       __builtin_expect((x),1)
673c12ebb2bSBrian Aker #define unlikely(x)     __builtin_expect((x),0)
674