1f6d334e0SBrad Fitzpatrick /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2c6975ef4SPaul Lindner 3cf1b7559SDustin Sallings /** \file 4cf1b7559SDustin Sallings * The main memcached header holding commonly used data 5cf1b7559SDustin Sallings * structures and function prototypes. 6cf1b7559SDustin Sallings */ 7cf1b7559SDustin Sallings 870960eb3SPaul Lindner #ifdef HAVE_CONFIG_H 956b8339eSSteven Grimm #include "config.h" 1070960eb3SPaul Lindner #endif 1170960eb3SPaul Lindner 1256b8339eSSteven Grimm #include <sys/types.h> 13b65aa2abSPaul Lindner #include <sys/socket.h> 1456b8339eSSteven Grimm #include <sys/time.h> 1556b8339eSSteven Grimm #include <netinet/in.h> 1656b8339eSSteven Grimm #include <event.h> 17a61a6900SBrian Aker #include <netdb.h> 181fdfb7e9STrond Norbye #include <pthread.h> 19f1307c4dSDustin Sallings #include <unistd.h> 20916fff36Sdormando #include <assert.h> 2156b8339eSSteven Grimm 22a85a6e15STrond Norbye #include "protocol_binary.h" 234c86fa59STrond Norbye #include "cache.h" 24916fff36Sdormando #include "logger.h" 25a85a6e15STrond Norbye 26f1307c4dSDustin Sallings #include "sasl_defs.h" 27f1307c4dSDustin Sallings 28cf1b7559SDustin Sallings /** Maximum length of a key. */ 29ecdb0114SDustin Sallings #define KEY_MAX_LENGTH 250 30ecdb0114SDustin Sallings 317b7bc241SDustin Sallings /** Size of an incr buf. */ 327b7bc241SDustin Sallings #define INCR_MAX_STORAGE_LEN 24 337b7bc241SDustin Sallings 3460d70942SAnatoly Vorobey #define DATA_BUFFER_SIZE 2048 35c9607c6dSBrad Fitzpatrick #define UDP_READ_BUFFER_SIZE 65536 36c9607c6dSBrad Fitzpatrick #define UDP_MAX_PAYLOAD_SIZE 1400 37c9607c6dSBrad Fitzpatrick #define UDP_HEADER_SIZE 8 38c9607c6dSBrad Fitzpatrick #define MAX_SENDBUF_SIZE (256 * 1024 * 1024) 39420aa2d9SBrad Fitzpatrick /* I'm told the max length of a 64-bit num converted to string is 20 bytes. 40b17a0c8bSdormando * Plus a few for spaces, \r\n, \0 */ 41b17a0c8bSdormando #define SUFFIX_SIZE 24 4286969ea4SBrad Fitzpatrick 43516e7dc2SPaul Lindner /** Initial size of list of items being returned by "get". */ 44c9607c6dSBrad Fitzpatrick #define ITEM_LIST_INITIAL 200 4586969ea4SBrad Fitzpatrick 46e61c0a86Sdormando /** Initial size of list of CAS suffixes appended to "gets" lines. */ 47e61c0a86Sdormando #define SUFFIX_LIST_INITIAL 20 48e61c0a86Sdormando 49516e7dc2SPaul Lindner /** Initial size of the sendmsg() scatter/gather array. */ 50c9607c6dSBrad Fitzpatrick #define IOV_LIST_INITIAL 400 5186969ea4SBrad Fitzpatrick 52516e7dc2SPaul Lindner /** Initial number of sendmsg() argument structures to allocate. */ 53c9607c6dSBrad Fitzpatrick #define MSG_LIST_INITIAL 10 5486969ea4SBrad Fitzpatrick 55516e7dc2SPaul Lindner /** High water marks for buffer shrinking */ 56c9607c6dSBrad Fitzpatrick #define READ_BUFFER_HIGHWAT 8192 57c9607c6dSBrad Fitzpatrick #define ITEM_LIST_HIGHWAT 400 58c9607c6dSBrad Fitzpatrick #define IOV_LIST_HIGHWAT 600 59c9607c6dSBrad Fitzpatrick #define MSG_LIST_HIGHWAT 100 6086969ea4SBrad Fitzpatrick 616aafe58eSDustin Sallings /* Binary protocol stuff */ 627cfd3438SDustin Sallings #define MIN_BIN_PKT_LENGTH 16 636aafe58eSDustin Sallings #define BIN_PKT_HDR_WORDS (MIN_BIN_PKT_LENGTH/sizeof(uint32_t)) 646aafe58eSDustin Sallings 651db1de38Sdormando /* Initial power multiplier for the hash table */ 661db1de38Sdormando #define HASHPOWER_DEFAULT 16 671db1de38Sdormando 68d7324b0bSdormando /* 69d7324b0bSdormando * We only reposition items in the LRU queue if they haven't been repositioned 70d7324b0bSdormando * in this many seconds. That saves us from churning on frequently-accessed 71d7324b0bSdormando * items. 72d7324b0bSdormando */ 73d7324b0bSdormando #define ITEM_UPDATE_INTERVAL 60 74d7324b0bSdormando 75d9b97d80SPaul Lindner /* unistd.h is here */ 76d9b97d80SPaul Lindner #if HAVE_UNISTD_H 77d9b97d80SPaul Lindner # include <unistd.h> 78d9b97d80SPaul Lindner #endif 79d9b97d80SPaul Lindner 80067102a4SDustin Sallings /* Slab sizing definitions. */ 81067102a4SDustin Sallings #define POWER_SMALLEST 1 82369845f0Sdormando #define POWER_LARGEST 256 /* actual cap is 255 */ 83d6e96467Sdormando #define SLAB_GLOBAL_PAGE_POOL 0 /* magic slab class for storing pages for reassignment */ 84067102a4SDustin Sallings #define CHUNK_ALIGN_BYTES 8 859bce42f2Sdormando /* slab class max is a 6-bit number, -1. */ 86a2fc8e93Sdormando #define MAX_NUMBER_OF_SLAB_CLASSES (63 + 1) 87067102a4SDustin Sallings 88cf1b7559SDustin Sallings /** How long an object can reasonably be assumed to be locked before 8959bd02ceSdormando harvesting it on a low memory condition. Default: disabled. */ 9059bd02ceSdormando #define TAIL_REPAIR_TIME_DEFAULT 0 91b8d997e5SDustin Sallings 92fcc2c98fSDustin Sallings /* warning: don't use these macros with a function, as it evals its arg twice */ 93df158875SDan McGee #define ITEM_get_cas(i) (((i)->it_flags & ITEM_CAS) ? \ 94df158875SDan McGee (i)->data->cas : (uint64_t)0) 95fcc2c98fSDustin Sallings 96df158875SDan McGee #define ITEM_set_cas(i,v) { \ 97df158875SDan McGee if ((i)->it_flags & ITEM_CAS) { \ 98df158875SDan McGee (i)->data->cas = v; \ 99df158875SDan McGee } \ 100df158875SDan McGee } 101df158875SDan McGee 102df158875SDan McGee #define ITEM_key(item) (((char*)&((item)->data)) \ 103fcc2c98fSDustin Sallings + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) 104fcc2c98fSDustin Sallings 105df158875SDan McGee #define ITEM_suffix(item) ((char*) &((item)->data) + (item)->nkey + 1 \ 106fcc2c98fSDustin Sallings + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) 107fcc2c98fSDustin Sallings 108df158875SDan McGee #define ITEM_data(item) ((char*) &((item)->data) + (item)->nkey + 1 \ 109fcc2c98fSDustin Sallings + (item)->nsuffix \ 110fcc2c98fSDustin Sallings + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) 111fcc2c98fSDustin Sallings 112fcc2c98fSDustin Sallings #define ITEM_ntotal(item) (sizeof(struct _stritem) + (item)->nkey + 1 \ 113fcc2c98fSDustin Sallings + (item)->nsuffix + (item)->nbytes \ 114fcc2c98fSDustin Sallings + (((item)->it_flags & ITEM_CAS) ? sizeof(uint64_t) : 0)) 115fcc2c98fSDustin Sallings 1169bce42f2Sdormando #define ITEM_clsid(item) ((item)->slabs_clsid & ~(3<<6)) 1179bce42f2Sdormando 11888a68689SDustin Sallings #define STAT_KEY_LEN 128 11988a68689SDustin Sallings #define STAT_VAL_LEN 128 12088a68689SDustin Sallings 121fcc2c98fSDustin Sallings /** Append a simple stat with a stat name, value format and value */ 122fcc2c98fSDustin Sallings #define APPEND_STAT(name, fmt, val) \ 123fcc2c98fSDustin Sallings append_stat(name, add_stats, c, fmt, val); 124fcc2c98fSDustin Sallings 125fcc2c98fSDustin Sallings /** Append an indexed stat with a stat name (with format), value format 126fcc2c98fSDustin Sallings and value */ 127fcc2c98fSDustin Sallings #define APPEND_NUM_FMT_STAT(name_fmt, num, name, fmt, val) \ 12888a68689SDustin Sallings klen = snprintf(key_str, STAT_KEY_LEN, name_fmt, num, name); \ 12988a68689SDustin Sallings vlen = snprintf(val_str, STAT_VAL_LEN, fmt, val); \ 130fcc2c98fSDustin Sallings add_stats(key_str, klen, val_str, vlen, c); 131fcc2c98fSDustin Sallings 132fcc2c98fSDustin Sallings /** Common APPEND_NUM_FMT_STAT format. */ 133fcc2c98fSDustin Sallings #define APPEND_NUM_STAT(num, name, fmt, val) \ 134fcc2c98fSDustin Sallings APPEND_NUM_FMT_STAT("%d:%s", num, name, fmt, val) 135fcc2c98fSDustin Sallings 136fcc2c98fSDustin Sallings /** 137fcc2c98fSDustin Sallings * Callback for any function producing stats. 138fcc2c98fSDustin Sallings * 139fcc2c98fSDustin Sallings * @param key the stat's key 140fcc2c98fSDustin Sallings * @param klen length of the key 141fcc2c98fSDustin Sallings * @param val the stat's value in an ascii form (e.g. text form of a number) 142fcc2c98fSDustin Sallings * @param vlen length of the value 143fcc2c98fSDustin Sallings * @parm cookie magic callback cookie 144fcc2c98fSDustin Sallings */ 145fcc2c98fSDustin Sallings typedef void (*ADD_STAT)(const char *key, const uint16_t klen, 146fcc2c98fSDustin Sallings const char *val, const uint32_t vlen, 147fcc2c98fSDustin Sallings const void *cookie); 148fcc2c98fSDustin Sallings 149fcc2c98fSDustin Sallings /* 150fcc2c98fSDustin Sallings * NOTE: If you modify this table you _MUST_ update the function state_text 151fcc2c98fSDustin Sallings */ 152fcc2c98fSDustin Sallings /** 153fcc2c98fSDustin Sallings * Possible states of a connection. 154fcc2c98fSDustin Sallings */ 155fcc2c98fSDustin Sallings enum conn_states { 156fcc2c98fSDustin Sallings conn_listening, /**< the socket which listens for connections */ 157fcc2c98fSDustin Sallings conn_new_cmd, /**< Prepare connection for next command */ 158fcc2c98fSDustin Sallings conn_waiting, /**< waiting for a readable socket */ 159fcc2c98fSDustin Sallings conn_read, /**< reading in a command line */ 160fcc2c98fSDustin Sallings conn_parse_cmd, /**< try to parse a command from the input buffer */ 161fcc2c98fSDustin Sallings conn_write, /**< writing out a simple response */ 162fcc2c98fSDustin Sallings conn_nread, /**< reading in a fixed number of bytes */ 163fcc2c98fSDustin Sallings conn_swallow, /**< swallowing unnecessary bytes w/o storing */ 164fcc2c98fSDustin Sallings conn_closing, /**< closing this connection */ 165fcc2c98fSDustin Sallings conn_mwrite, /**< writing out many items sequentially */ 16670c1b5f6SSteven Grimm conn_closed, /**< connection is closed */ 167916fff36Sdormando conn_watch, /**< held by the logger thread as a watcher */ 168fcc2c98fSDustin Sallings conn_max_state /**< Max state value (used for assertion) */ 169fcc2c98fSDustin Sallings }; 170fcc2c98fSDustin Sallings 171fcc2c98fSDustin Sallings enum bin_substates { 172fcc2c98fSDustin Sallings bin_no_state, 173fcc2c98fSDustin Sallings bin_reading_set_header, 174fcc2c98fSDustin Sallings bin_reading_cas_header, 175fcc2c98fSDustin Sallings bin_read_set_value, 176fcc2c98fSDustin Sallings bin_reading_get_key, 177fcc2c98fSDustin Sallings bin_reading_stat, 178fcc2c98fSDustin Sallings bin_reading_del_header, 179fcc2c98fSDustin Sallings bin_reading_incr_header, 180f1307c4dSDustin Sallings bin_read_flush_exptime, 181f1307c4dSDustin Sallings bin_reading_sasl_auth, 182d87f568aSdormando bin_reading_sasl_auth_data, 183d87f568aSdormando bin_reading_touch_key, 184fcc2c98fSDustin Sallings }; 185fcc2c98fSDustin Sallings 186fcc2c98fSDustin Sallings enum protocol { 187fcc2c98fSDustin Sallings ascii_prot = 3, /* arbitrary value. */ 188fcc2c98fSDustin Sallings binary_prot, 189fcc2c98fSDustin Sallings negotiating_prot /* Discovering the protocol */ 190fcc2c98fSDustin Sallings }; 191fcc2c98fSDustin Sallings 192fcc2c98fSDustin Sallings enum network_transport { 193fcc2c98fSDustin Sallings local_transport, /* Unix sockets*/ 194fcc2c98fSDustin Sallings tcp_transport, 195fcc2c98fSDustin Sallings udp_transport 196fcc2c98fSDustin Sallings }; 197fcc2c98fSDustin Sallings 1986af7aa0bSdormando enum pause_thread_types { 1996af7aa0bSdormando PAUSE_WORKER_THREADS = 0, 2006af7aa0bSdormando PAUSE_ALL_THREADS, 2016af7aa0bSdormando RESUME_ALL_THREADS, 2026af7aa0bSdormando RESUME_WORKER_THREADS 2031c94e12cSdormando }; 2041c94e12cSdormando 20583ba6bd9SJay Grizzard #define IS_TCP(x) (x == tcp_transport) 206fcc2c98fSDustin Sallings #define IS_UDP(x) (x == udp_transport) 207fcc2c98fSDustin Sallings 208fcc2c98fSDustin Sallings #define NREAD_ADD 1 209fcc2c98fSDustin Sallings #define NREAD_SET 2 210fcc2c98fSDustin Sallings #define NREAD_REPLACE 3 211fcc2c98fSDustin Sallings #define NREAD_APPEND 4 212fcc2c98fSDustin Sallings #define NREAD_PREPEND 5 213fcc2c98fSDustin Sallings #define NREAD_CAS 6 214fcc2c98fSDustin Sallings 215fcc2c98fSDustin Sallings enum store_item_type { 216c7fbccebSdormando NOT_STORED=0, STORED, EXISTS, NOT_FOUND, TOO_LARGE, NO_MEMORY 217fcc2c98fSDustin Sallings }; 218fcc2c98fSDustin Sallings 219d044acb2SDustin Sallings enum delta_result_type { 220ea2d42a5Sdormando OK, NON_NUMERIC, EOM, DELTA_ITEM_NOT_FOUND, DELTA_ITEM_CAS_MISMATCH 221d044acb2SDustin Sallings }; 222fcc2c98fSDustin Sallings 223516e7dc2SPaul Lindner /** Time relative to server start. Smaller than time_t on 64-bit systems. */ 224c9607c6dSBrad Fitzpatrick typedef unsigned int rel_time_t; 22586969ea4SBrad Fitzpatrick 22668c64594Sdormando /** Use X macros to avoid iterating over the stats fields during reset and 22768c64594Sdormando * aggregation. No longer have to add new stats in 3+ places. 22868c64594Sdormando */ 22968c64594Sdormando 23068c64594Sdormando #define SLAB_STATS_FIELDS \ 23168c64594Sdormando X(set_cmds) \ 23268c64594Sdormando X(get_hits) \ 23368c64594Sdormando X(touch_hits) \ 23468c64594Sdormando X(delete_hits) \ 23568c64594Sdormando X(cas_hits) \ 23668c64594Sdormando X(cas_badval) \ 23768c64594Sdormando X(incr_hits) \ 23868c64594Sdormando X(decr_hits) 23968c64594Sdormando 240cf1b7559SDustin Sallings /** Stats stored per slab (and per thread). */ 24125b5189cSDustin Sallings struct slab_stats { 24268c64594Sdormando #define X(name) uint64_t name; 24368c64594Sdormando SLAB_STATS_FIELDS 24468c64594Sdormando #undef X 24525b5189cSDustin Sallings }; 24625b5189cSDustin Sallings 24768c64594Sdormando #define THREAD_STATS_FIELDS \ 24868c64594Sdormando X(get_cmds) \ 24968c64594Sdormando X(get_misses) \ 25068c64594Sdormando X(get_expired) \ 25168c64594Sdormando X(get_flushed) \ 25268c64594Sdormando X(touch_cmds) \ 25368c64594Sdormando X(touch_misses) \ 25468c64594Sdormando X(delete_misses) \ 25568c64594Sdormando X(incr_misses) \ 25668c64594Sdormando X(decr_misses) \ 25768c64594Sdormando X(cas_misses) \ 25868c64594Sdormando X(bytes_read) \ 25968c64594Sdormando X(bytes_written) \ 26068c64594Sdormando X(flush_cmds) \ 26168c64594Sdormando X(conn_yields) /* # of yields for connections (-R option)*/ \ 26268c64594Sdormando X(auth_cmds) \ 26368c64594Sdormando X(auth_errors) \ 26468c64594Sdormando X(idle_kicks) /* idle connections killed */ 26568c64594Sdormando 266cf1b7559SDustin Sallings /** 267cf1b7559SDustin Sallings * Stats stored per-thread. 268cf1b7559SDustin Sallings */ 2691fdfb7e9STrond Norbye struct thread_stats { 2701fdfb7e9STrond Norbye pthread_mutex_t mutex; 27168c64594Sdormando #define X(name) uint64_t name; 27268c64594Sdormando THREAD_STATS_FIELDS 27368c64594Sdormando #undef X 27425b5189cSDustin Sallings struct slab_stats slab_stats[MAX_NUMBER_OF_SLAB_CLASSES]; 2751fdfb7e9STrond Norbye }; 2761fdfb7e9STrond Norbye 277cf1b7559SDustin Sallings /** 278cb01d504Sdormando * Global stats. Only resettable stats should go into this structure. 279cf1b7559SDustin Sallings */ 28060d70942SAnatoly Vorobey struct stats { 2819517c656Sdormando uint64_t total_items; 2829517c656Sdormando uint64_t total_conns; 283d1f9d992Sdormando uint64_t rejected_conns; 284de021a9cSTrond Norbye uint64_t malloc_fails; 2853d540bdbSdormando uint64_t listen_disabled_num; 28610698baeSdormando uint64_t slabs_moved; /* times slabs were moved around */ 287004e2211Sdormando uint64_t slab_reassign_rescues; /* items rescued during slab move */ 2888fa54f7eSdormando uint64_t slab_reassign_evictions_nomem; /* valid items lost during slab move */ 289b1debc4cSdormando uint64_t slab_reassign_inline_reclaim; /* valid items lost during slab move */ 290*ee461d11Sdormando uint64_t slab_reassign_chunk_rescues; /* chunked-item chunks recovered */ 291004e2211Sdormando uint64_t slab_reassign_busy_items; /* valid temporarily unmovable */ 292c10feb9eSdormando uint64_t lru_crawler_starts; /* Number of item crawlers kicked off */ 293f7bf26cbSdormando uint64_t lru_maintainer_juggles; /* number of LRU bg pokes */ 294a1f269eeSIan Miell uint64_t time_in_listen_disabled_us; /* elapsed time in microseconds while server unable to process new connections */ 2950503b5e2Sdormando uint64_t log_worker_dropped; /* logs dropped by worker threads */ 2960503b5e2Sdormando uint64_t log_worker_written; /* logs written by worker threads */ 2970503b5e2Sdormando uint64_t log_watcher_skipped; /* logs watchers missed */ 2980503b5e2Sdormando uint64_t log_watcher_sent; /* logs sent to watcher buffers */ 299a1f269eeSIan Miell struct timeval maxconns_entered; /* last time maxconns entered */ 30060d70942SAnatoly Vorobey }; 30186969ea4SBrad Fitzpatrick 302cb01d504Sdormando /** 303cb01d504Sdormando * Global "state" stats. Reflects state that shouldn't be wiped ever. 304cb01d504Sdormando * Ordered for some cache line locality for commonly updated counters. 305cb01d504Sdormando */ 306cb01d504Sdormando struct stats_state { 307cb01d504Sdormando uint64_t curr_items; 308cb01d504Sdormando uint64_t curr_bytes; 309cb01d504Sdormando uint64_t curr_conns; 310cb01d504Sdormando uint64_t hash_bytes; /* size used for hash tables */ 311cb01d504Sdormando unsigned int conn_structs; 312cb01d504Sdormando unsigned int reserved_fds; 313cb01d504Sdormando unsigned int hash_power_level; /* Better hope it's not over 9000 */ 314cb01d504Sdormando bool hash_is_expanding; /* If the hash table is being expanded */ 315cb01d504Sdormando bool accepting_conns; /* whether we are currently accepting */ 316cb01d504Sdormando bool slab_reassign_running; /* slab reassign in progress */ 317cb01d504Sdormando bool lru_crawler_running; /* crawl in progress */ 318cb01d504Sdormando }; 319cb01d504Sdormando 320eca55c9aSPaul Lindner #define MAX_VERBOSITY_LEVEL 2 321eca55c9aSPaul Lindner 32252778791SDustin Sallings /* When adding a setting, be sure to update process_stat_settings */ 323cf1b7559SDustin Sallings /** 324cf1b7559SDustin Sallings * Globally accessible settings as derived from the commandline. 325cf1b7559SDustin Sallings */ 32660d70942SAnatoly Vorobey struct settings { 327c9607c6dSBrad Fitzpatrick size_t maxbytes; 32860d70942SAnatoly Vorobey int maxconns; 32960d70942SAnatoly Vorobey int port; 330c9607c6dSBrad Fitzpatrick int udpport; 331a61a6900SBrian Aker char *inter; 33260d70942SAnatoly Vorobey int verbose; 333c9607c6dSBrad Fitzpatrick rel_time_t oldest_live; /* ignore existing items older than this */ 33490593dcaSdormando uint64_t oldest_cas; /* ignore existing items with CAS values lower than this */ 335841811e9SJason Titus int evict_to_free; 336c9607c6dSBrad Fitzpatrick char *socketpath; /* path to unix socket if using local socket */ 33740c76cedSDavid Bremner int access; /* access mask (a la chmod) for unix domain socket */ 338c9607c6dSBrad Fitzpatrick double factor; /* chunk size growth factor */ 339c9607c6dSBrad Fitzpatrick int chunk_size; 3402fe44f1cSDmitry Isaykin int num_threads; /* number of worker (without dispatcher) libevent threads to run */ 341c60ca35bSTrond Norbye int num_threads_per_udp; /* number of worker threads serving each udp socket */ 34256b8339eSSteven Grimm char prefix_delimiter; /* character that marks a key prefix (for stats) */ 34356b8339eSSteven Grimm int detail_enabled; /* nonzero if we're collecting detailed stats */ 344ca90710fSdormando int reqs_per_event; /* Maximum number of io to process on each 345ca90710fSdormando io-event. */ 346eda68b70STrond Norbye bool use_cas; 347a155b044SDustin Sallings enum protocol binding_protocol; 3487d010a85SChris Goffinet int backlog; 3490567967aSdormando int item_size_max; /* Maximum item size */ 3500567967aSdormando int slab_chunk_size_max; /* Upper end for chunks within slab pages. */ 3510567967aSdormando int slab_page_size; /* Slab's page units. */ 352f1307c4dSDustin Sallings bool sasl; /* SASL on/off */ 353d1f9d992Sdormando bool maxconns_fast; /* Whether or not to early close connections */ 354d425b35bSdormando bool lru_crawler; /* Whether or not to enable the autocrawler thread */ 355f7bf26cbSdormando bool lru_maintainer_thread; /* LRU maintainer background thread */ 35610698baeSdormando bool slab_reassign; /* Whether or not slab reassignment is allowed */ 35763bf748aSdormando int slab_automove; /* Whether or not to automatically move slabs */ 3581db1de38Sdormando int hashpower_init; /* Starting hash power level */ 359d11dc0eaSBrian Aker bool shutdown_command; /* allow shutdown command */ 360058af0d8SKeyur int tail_repair_time; /* LRU tail refcount leak repair time */ 361a2f5ca50SDaniel Pañeda bool flush_enabled; /* flush_all enabled */ 36205ca809cSdormando char *hash_algorithm; /* Hash algorithm in use */ 36331d533f8Sdormando int lru_crawler_sleep; /* Microsecond sleep between items */ 364e31a5912Sdormando uint32_t lru_crawler_tocrawl; /* Number of items to crawl per run */ 3658d6bf78aSdormando int hot_lru_pct; /* percentage of slab space for HOT_LRU */ 3668d6bf78aSdormando int warm_lru_pct; /* percentage of slab space for WARM_LRU */ 36787ff9dc0Sdormando int crawls_persleep; /* Number of LRU crawls to run before sleeping */ 3684de89c8cSdormando bool expirezero_does_not_evict; /* exptime == 0 goes into NOEXP_LRU */ 36983ba6bd9SJay Grizzard int idle_timeout; /* Number of seconds to let connections idle */ 370d704f2c0Sdormando unsigned int logger_watcher_buf_size; /* size of logger's per-watcher buffer */ 371d704f2c0Sdormando unsigned int logger_buf_size; /* size of per-thread logger buffer */ 37260d70942SAnatoly Vorobey }; 37386969ea4SBrad Fitzpatrick 37460d70942SAnatoly Vorobey extern struct stats stats; 375cb01d504Sdormando extern struct stats_state stats_state; 3761fdfb7e9STrond Norbye extern time_t process_started; 37760d70942SAnatoly Vorobey extern struct settings settings; 37886969ea4SBrad Fitzpatrick 37960d70942SAnatoly Vorobey #define ITEM_LINKED 1 380eda68b70STrond Norbye #define ITEM_CAS 2 38186969ea4SBrad Fitzpatrick 38254326f42SBrad Fitzpatrick /* temp */ 38354326f42SBrad Fitzpatrick #define ITEM_SLABBED 4 38486969ea4SBrad Fitzpatrick 3859bce42f2Sdormando /* Item was fetched at least once in its lifetime */ 386839949a9Sdormando #define ITEM_FETCHED 8 3879bce42f2Sdormando /* Appended on fetch, removed on LRU shuffling */ 3889bce42f2Sdormando #define ITEM_ACTIVE 16 3890567967aSdormando /* If an item's storage are chained chunks. */ 3900567967aSdormando #define ITEM_CHUNKED 32 3910567967aSdormando #define ITEM_CHUNK 64 392839949a9Sdormando 393cf1b7559SDustin Sallings /** 394cf1b7559SDustin Sallings * Structure for storing items within memcached. 395cf1b7559SDustin Sallings */ 39660d70942SAnatoly Vorobey typedef struct _stritem { 39769d1c699Sdormando /* Protected by LRU locks */ 39860d70942SAnatoly Vorobey struct _stritem *next; 39960d70942SAnatoly Vorobey struct _stritem *prev; 40069d1c699Sdormando /* Rest are protected by an item lock */ 401f6d334e0SBrad Fitzpatrick struct _stritem *h_next; /* hash chain next */ 402c9607c6dSBrad Fitzpatrick rel_time_t time; /* least recent access */ 403c9607c6dSBrad Fitzpatrick rel_time_t exptime; /* expire time */ 40460d70942SAnatoly Vorobey int nbytes; /* size of data */ 405c9607c6dSBrad Fitzpatrick unsigned short refcount; 40677dde9f9SPaul Lindner uint8_t nsuffix; /* length of flags-and-length string */ 40777dde9f9SPaul Lindner uint8_t it_flags; /* ITEM_* above */ 40877dde9f9SPaul Lindner uint8_t slabs_clsid;/* which slab class we're in */ 40977dde9f9SPaul Lindner uint8_t nkey; /* key length, w/terminating null and padding */ 410df158875SDan McGee /* this odd type prevents type-punning issues when we do 411df158875SDan McGee * the little shuffle to save space when not using CAS. */ 412df158875SDan McGee union { 413df158875SDan McGee uint64_t cas; 414df158875SDan McGee char end; 415df158875SDan McGee } data[]; 416eda68b70STrond Norbye /* if it_flags & ITEM_CAS we have 8 bytes CAS */ 417c9607c6dSBrad Fitzpatrick /* then null-terminated key */ 418c9607c6dSBrad Fitzpatrick /* then " flags length\r\n" (no terminating null) */ 419c9607c6dSBrad Fitzpatrick /* then data with terminating \r\n (no terminating null; it's binary!) */ 42060d70942SAnatoly Vorobey } item; 42186969ea4SBrad Fitzpatrick 4221fdfb7e9STrond Norbye typedef struct { 4230d1f505cSdormando struct _stritem *next; 4240d1f505cSdormando struct _stritem *prev; 4250d1f505cSdormando struct _stritem *h_next; /* hash chain next */ 4260d1f505cSdormando rel_time_t time; /* least recent access */ 4270d1f505cSdormando rel_time_t exptime; /* expire time */ 4280d1f505cSdormando int nbytes; /* size of data */ 4290d1f505cSdormando unsigned short refcount; 4300d1f505cSdormando uint8_t nsuffix; /* length of flags-and-length string */ 4310d1f505cSdormando uint8_t it_flags; /* ITEM_* above */ 4320d1f505cSdormando uint8_t slabs_clsid;/* which slab class we're in */ 4330d1f505cSdormando uint8_t nkey; /* key length, w/terminating null and padding */ 434e31a5912Sdormando uint32_t remaining; /* Max keys to crawl per slab per invocation */ 4350d1f505cSdormando } crawler; 4360d1f505cSdormando 4370567967aSdormando /* Header when an item is actually a chunk of another item. */ 4380567967aSdormando typedef struct _strchunk { 4390567967aSdormando struct _strchunk *next; /* points within its own chain. */ 4400567967aSdormando struct _strchunk *prev; /* can potentially point to the head. */ 4410567967aSdormando struct _stritem *head; /* always points to the owner chunk */ 4420567967aSdormando int size; /* available chunk space in bytes */ 4430567967aSdormando int used; /* chunk space used */ 4440567967aSdormando int nbytes; /* used. */ 4450567967aSdormando unsigned short refcount; /* used? */ 4460567967aSdormando uint8_t nsuffix; /* unused */ 4470567967aSdormando uint8_t it_flags; /* ITEM_* above. */ 448b05653f9Sdormando uint8_t slabs_clsid; /* Same as above. */ 4490567967aSdormando char data[]; 4500567967aSdormando } item_chunk; 4510567967aSdormando 4520d1f505cSdormando typedef struct { 4531fdfb7e9STrond Norbye pthread_t thread_id; /* unique ID of this thread */ 4541fdfb7e9STrond Norbye struct event_base *base; /* libevent handle this thread uses */ 4551fdfb7e9STrond Norbye struct event notify_event; /* listen event for notify pipe */ 4561fdfb7e9STrond Norbye int notify_receive_fd; /* receiving end of notify pipe */ 4571fdfb7e9STrond Norbye int notify_send_fd; /* sending end of notify pipe */ 4581fdfb7e9STrond Norbye struct thread_stats stats; /* Stats generated by this thread */ 4591fdfb7e9STrond Norbye struct conn_queue *new_conn_queue; /* queue of new connections to handle */ 4604c86fa59STrond Norbye cache_t *suffix_cache; /* suffix cache */ 461916fff36Sdormando logger *l; /* logger buffer */ 4621fdfb7e9STrond Norbye } LIBEVENT_THREAD; 4631fdfb7e9STrond Norbye 4642fe44f1cSDmitry Isaykin typedef struct { 4652fe44f1cSDmitry Isaykin pthread_t thread_id; /* unique ID of this thread */ 4662fe44f1cSDmitry Isaykin struct event_base *base; /* libevent handle this thread uses */ 4672fe44f1cSDmitry Isaykin } LIBEVENT_DISPATCHER_THREAD; 4682fe44f1cSDmitry Isaykin 469cf1b7559SDustin Sallings /** 470cf1b7559SDustin Sallings * The structure representing a connection into memcached. 471cf1b7559SDustin Sallings */ 4729150c85bSBrian Aker typedef struct conn conn; 4739150c85bSBrian Aker struct conn { 47460d70942SAnatoly Vorobey int sfd; 475f1307c4dSDustin Sallings sasl_conn_t *sasl_conn; 47687c1cf0fS伊藤洋也 bool authenticated; 4773ed60cddSDustin Sallings enum conn_states state; 478d86881eaSDustin Sallings enum bin_substates substate; 47970c1b5f6SSteven Grimm rel_time_t last_cmd_time; 48060d70942SAnatoly Vorobey struct event event; 48160d70942SAnatoly Vorobey short ev_flags; 482516e7dc2SPaul Lindner short which; /** which events were just triggered */ 48386969ea4SBrad Fitzpatrick 484516e7dc2SPaul Lindner char *rbuf; /** buffer to read commands into */ 485516e7dc2SPaul Lindner char *rcurr; /** but if we parsed some already, this is where we stopped */ 486516e7dc2SPaul Lindner int rsize; /** total allocated size of rbuf */ 487516e7dc2SPaul Lindner int rbytes; /** how much data, starting from rcur, do we have unparsed */ 48886969ea4SBrad Fitzpatrick 48960d70942SAnatoly Vorobey char *wbuf; 49060d70942SAnatoly Vorobey char *wcurr; 49160d70942SAnatoly Vorobey int wsize; 49260d70942SAnatoly Vorobey int wbytes; 493a564b426SDustin Sallings /** which state to go into after finishing current write */ 494a564b426SDustin Sallings enum conn_states write_and_go; 495516e7dc2SPaul Lindner void *write_and_free; /** free this memory after finishing writing */ 49686969ea4SBrad Fitzpatrick 497516e7dc2SPaul Lindner char *ritem; /** when we read in an item's value, it goes here */ 49860d70942SAnatoly Vorobey int rlbytes; 49986969ea4SBrad Fitzpatrick 50060d70942SAnatoly Vorobey /* data for the nread state */ 50186969ea4SBrad Fitzpatrick 502516e7dc2SPaul Lindner /** 50360d70942SAnatoly Vorobey * item is used to hold an item structure created after reading the command 50460d70942SAnatoly Vorobey * line of set/add/replace commands, but before we finished reading the actual 5057917af40SBrad Fitzpatrick * data. The data is read into ITEM_data(item) to avoid extra copying. 50660d70942SAnatoly Vorobey */ 50786969ea4SBrad Fitzpatrick 50860d70942SAnatoly Vorobey void *item; /* for commands set/add/replace */ 50986969ea4SBrad Fitzpatrick 51060d70942SAnatoly Vorobey /* data for the swallow state */ 51160d70942SAnatoly Vorobey int sbytes; /* how many bytes to swallow */ 51286969ea4SBrad Fitzpatrick 51360d70942SAnatoly Vorobey /* data for the mwrite state */ 514c9607c6dSBrad Fitzpatrick struct iovec *iov; 515c9607c6dSBrad Fitzpatrick int iovsize; /* number of elements allocated in iov[] */ 516c9607c6dSBrad Fitzpatrick int iovused; /* number of elements used in iov[] */ 51786969ea4SBrad Fitzpatrick 518c9607c6dSBrad Fitzpatrick struct msghdr *msglist; 519c9607c6dSBrad Fitzpatrick int msgsize; /* number of elements allocated in msglist[] */ 520c9607c6dSBrad Fitzpatrick int msgused; /* number of elements used in msglist[] */ 521c9607c6dSBrad Fitzpatrick int msgcurr; /* element in msglist[] being transmitted now */ 522c9607c6dSBrad Fitzpatrick int msgbytes; /* number of bytes in current msg */ 52386969ea4SBrad Fitzpatrick 52460d70942SAnatoly Vorobey item **ilist; /* list of items to write out */ 52560d70942SAnatoly Vorobey int isize; 52660d70942SAnatoly Vorobey item **icurr; 52760d70942SAnatoly Vorobey int ileft; 52886969ea4SBrad Fitzpatrick 529e61c0a86Sdormando char **suffixlist; 530e61c0a86Sdormando int suffixsize; 531e61c0a86Sdormando char **suffixcurr; 532e61c0a86Sdormando int suffixleft; 533e61c0a86Sdormando 534e1407b25SDustin Sallings enum protocol protocol; /* which protocol this connection speaks */ 53515ace4b5SEric Lambert enum network_transport transport; /* what transport is used by this connection */ 5362cdde3e4SDustin Sallings 537c9607c6dSBrad Fitzpatrick /* data for UDP clients */ 538c9607c6dSBrad Fitzpatrick int request_id; /* Incoming UDP request ID, if this is a UDP "connection" */ 539c6a700a3SAlex Leone struct sockaddr_in6 request_addr; /* udp: Who sent the most recent request */ 540c9607c6dSBrad Fitzpatrick socklen_t request_addr_size; 541c9607c6dSBrad Fitzpatrick unsigned char *hdrbuf; /* udp packet headers */ 542c9607c6dSBrad Fitzpatrick int hdrsize; /* number of headers' worth of space is allocated */ 54386969ea4SBrad Fitzpatrick 544d9ece780STomash Brechko bool noreply; /* True if the reply should not be sent. */ 54517df5c0eSTrond Norbye /* current stats command */ 54617df5c0eSTrond Norbye struct { 54717df5c0eSTrond Norbye char *buffer; 54817df5c0eSTrond Norbye size_t size; 54917df5c0eSTrond Norbye size_t offset; 55017df5c0eSTrond Norbye } stats; 55117df5c0eSTrond Norbye 5526aafe58eSDustin Sallings /* Binary protocol stuff */ 5536aafe58eSDustin Sallings /* This is where the binary header goes */ 554a85a6e15STrond Norbye protocol_binary_request_header binary_header; 555a85a6e15STrond Norbye uint64_t cas; /* the cas to return */ 5560a77fdfaSDustin Sallings short cmd; /* current command being processed */ 5576aafe58eSDustin Sallings int opaque; 5586aafe58eSDustin Sallings int keylen; 5599150c85bSBrian Aker conn *next; /* Used for generating a list of conn structures */ 5601fdfb7e9STrond Norbye LIBEVENT_THREAD *thread; /* Pointer to the thread object serving this connection */ 5619150c85bSBrian Aker }; 5626aafe58eSDustin Sallings 56370c1b5f6SSteven Grimm /* array of conn structures, indexed by file descriptor */ 56470c1b5f6SSteven Grimm extern conn **conns; 56586969ea4SBrad Fitzpatrick 566c9607c6dSBrad Fitzpatrick /* current time of day (updated periodically) */ 567c9607c6dSBrad Fitzpatrick extern volatile rel_time_t current_time; 56886969ea4SBrad Fitzpatrick 56910698baeSdormando /* TODO: Move to slabs.h? */ 57010698baeSdormando extern volatile int slab_rebalance_signal; 57110698baeSdormando 57210698baeSdormando struct slab_rebalance { 57310698baeSdormando void *slab_start; 57410698baeSdormando void *slab_end; 57510698baeSdormando void *slab_pos; 57610698baeSdormando int s_clsid; 57710698baeSdormando int d_clsid; 5786ee8daefSdormando uint32_t busy_items; 5796ee8daefSdormando uint32_t rescues; 5808fa54f7eSdormando uint32_t evictions_nomem; 581b1debc4cSdormando uint32_t inline_reclaim; 582*ee461d11Sdormando uint32_t chunk_rescues; 58310698baeSdormando uint8_t done; 58410698baeSdormando }; 58510698baeSdormando 58610698baeSdormando extern struct slab_rebalance slab_rebal; 58710698baeSdormando 58860d70942SAnatoly Vorobey /* 58960d70942SAnatoly Vorobey * Functions 59060d70942SAnatoly Vorobey */ 591a0e4a756Sdormando void do_accept_new_conns(const bool do_accept); 592cbcd3872Sdormando enum delta_result_type do_add_delta(conn *c, const char *key, 593cbcd3872Sdormando const size_t nkey, const bool incr, 594ea2d42a5Sdormando const int64_t delta, char *buf, 595bab9acd1Sdormando uint64_t *cas, const uint32_t hv); 596bab9acd1Sdormando enum store_item_type do_store_item(item *item, int comm, conn* c, const uint32_t hv); 59715ace4b5SEric Lambert conn *conn_new(const int sfd, const enum conn_states init_state, const int event_flags, const int read_buffer_size, enum network_transport transport, struct event_base *base); 59808c14e4eSTrond Norbye extern int daemonize(int nochdir, int noclose); 59956b8339eSSteven Grimm 6000aa1a82aSdormando #define mutex_lock(x) pthread_mutex_lock(x) 60145e0e950Sdormando #define mutex_unlock(x) pthread_mutex_unlock(x) 60256b8339eSSteven Grimm 60356b8339eSSteven Grimm #include "stats.h" 60477dde9f9SPaul Lindner #include "slabs.h" 60577dde9f9SPaul Lindner #include "assoc.h" 60677dde9f9SPaul Lindner #include "items.h" 607e440813dSTrond Norbye #include "trace.h" 6081a070652STrond Norbye #include "hash.h" 609420aa2d9SBrad Fitzpatrick #include "util.h" 61056b8339eSSteven Grimm 61156b8339eSSteven Grimm /* 61256b8339eSSteven Grimm * Functions such as the libevent-related calls that need to do cross-thread 61356b8339eSSteven Grimm * communication in multithreaded mode (rather than actually doing the work 61456b8339eSSteven Grimm * in the current thread) are called via "dispatch_" frontends, which are 61556b8339eSSteven Grimm * also #define-d to directly call the underlying code in singlethreaded mode. 61656b8339eSSteven Grimm */ 61756b8339eSSteven Grimm 618434c7cc5Sdormando void memcached_thread_init(int nthreads, struct event_base *main_base); 61956b8339eSSteven Grimm int dispatch_event_add(int thread, conn *c); 62015ace4b5SEric Lambert void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags, int read_buffer_size, enum network_transport transport); 621916fff36Sdormando void sidethread_conn_close(conn *c); 62256b8339eSSteven Grimm 62356b8339eSSteven Grimm /* Lock wrappers for cache functions that are called from main loop. */ 624cbcd3872Sdormando enum delta_result_type add_delta(conn *c, const char *key, 625cbcd3872Sdormando const size_t nkey, const int incr, 626ea2d42a5Sdormando const int64_t delta, char *buf, 627ea2d42a5Sdormando uint64_t *cas); 628a0e4a756Sdormando void accept_new_conns(const bool do_accept); 629a9dcd9acSToru Maesaka conn *conn_from_freelist(void); 630a9dcd9acSToru Maesaka bool conn_add_to_freelist(conn *c); 63183ba6bd9SJay Grizzard void conn_close_idle(conn *c); 632a9dcd9acSToru Maesaka int is_listen_thread(void); 633a9dcd9acSToru Maesaka item *item_alloc(char *key, size_t nkey, int flags, rel_time_t exptime, int nbytes); 6346895d23eSsergiocarlos item *item_get(const char *key, const size_t nkey, conn *c); 6356895d23eSsergiocarlos item *item_touch(const char *key, const size_t nkey, uint32_t exptime, conn *c); 636a9dcd9acSToru Maesaka int item_link(item *it); 637a9dcd9acSToru Maesaka void item_remove(item *it); 638bab9acd1Sdormando int item_replace(item *it, item *new_it, const uint32_t hv); 639a9dcd9acSToru Maesaka void item_unlink(item *it); 640a9dcd9acSToru Maesaka void item_update(item *it); 6411fdfb7e9STrond Norbye 6428fe5bf1fSdormando void item_lock(uint32_t hv); 6431c94e12cSdormando void *item_trylock(uint32_t hv); 6441c94e12cSdormando void item_trylock_unlock(void *arg); 6458fe5bf1fSdormando void item_unlock(uint32_t hv); 6466af7aa0bSdormando void pause_threads(enum pause_thread_types type); 6473b961388Sdormando unsigned short refcount_incr(unsigned short *refcount); 6483b961388Sdormando unsigned short refcount_decr(unsigned short *refcount); 649a9dcd9acSToru Maesaka void STATS_LOCK(void); 650a9dcd9acSToru Maesaka void STATS_UNLOCK(void); 6511fdfb7e9STrond Norbye void threadlocal_stats_reset(void); 6521fdfb7e9STrond Norbye void threadlocal_stats_aggregate(struct thread_stats *stats); 65325b5189cSDustin Sallings void slab_stats_aggregate(struct thread_stats *stats, struct slab_stats *out); 6541fdfb7e9STrond Norbye 655dd713869SDustin Sallings /* Stat processing functions */ 65617df5c0eSTrond Norbye void append_stat(const char *name, ADD_STAT add_stats, conn *c, 657dd713869SDustin Sallings const char *fmt, ...); 658dd713869SDustin Sallings 659e5d053c3SDustin Sallings enum store_item_type store_item(item *item, int comm, conn *c); 66056b8339eSSteven Grimm 66169aa5427STrond Norbye #if HAVE_DROP_PRIVILEGES 6623fa31371STrond Norbye extern void drop_privileges(void); 66369aa5427STrond Norbye #else 66469aa5427STrond Norbye #define drop_privileges() 66569aa5427STrond Norbye #endif 66669aa5427STrond Norbye 667c12ebb2bSBrian Aker /* If supported, give compiler hints for branch prediction. */ 668c12ebb2bSBrian Aker #if !defined(__GNUC__) || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) 669c12ebb2bSBrian Aker #define __builtin_expect(x, expected_value) (x) 670c12ebb2bSBrian Aker #endif 67156b8339eSSteven Grimm 672c12ebb2bSBrian Aker #define likely(x) __builtin_expect((x),1) 673c12ebb2bSBrian Aker #define unlikely(x) __builtin_expect((x),0) 674