xref: /redis-3.2.3/src/server.c (revision 0a45fbc3)
1cef054e8Santirez /*
2cef054e8Santirez  * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
3cef054e8Santirez  * All rights reserved.
4cef054e8Santirez  *
5cef054e8Santirez  * Redistribution and use in source and binary forms, with or without
6cef054e8Santirez  * modification, are permitted provided that the following conditions are met:
7cef054e8Santirez  *
8cef054e8Santirez  *   * Redistributions of source code must retain the above copyright notice,
9cef054e8Santirez  *     this list of conditions and the following disclaimer.
10cef054e8Santirez  *   * Redistributions in binary form must reproduce the above copyright
11cef054e8Santirez  *     notice, this list of conditions and the following disclaimer in the
12cef054e8Santirez  *     documentation and/or other materials provided with the distribution.
13cef054e8Santirez  *   * Neither the name of Redis nor the names of its contributors may be used
14cef054e8Santirez  *     to endorse or promote products derived from this software without
15cef054e8Santirez  *     specific prior written permission.
16cef054e8Santirez  *
17cef054e8Santirez  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18cef054e8Santirez  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19cef054e8Santirez  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20cef054e8Santirez  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21cef054e8Santirez  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22cef054e8Santirez  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23cef054e8Santirez  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24cef054e8Santirez  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25cef054e8Santirez  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26cef054e8Santirez  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27cef054e8Santirez  * POSSIBILITY OF SUCH DAMAGE.
28cef054e8Santirez  */
29cef054e8Santirez 
30cef054e8Santirez #include "server.h"
31cef054e8Santirez #include "cluster.h"
32cef054e8Santirez #include "slowlog.h"
33cef054e8Santirez #include "bio.h"
34cef054e8Santirez #include "latency.h"
35cef054e8Santirez 
36cef054e8Santirez #include <time.h>
37cef054e8Santirez #include <signal.h>
38cef054e8Santirez #include <sys/wait.h>
39cef054e8Santirez #include <errno.h>
40cef054e8Santirez #include <assert.h>
41cef054e8Santirez #include <ctype.h>
42cef054e8Santirez #include <stdarg.h>
43cef054e8Santirez #include <arpa/inet.h>
44cef054e8Santirez #include <sys/stat.h>
45cef054e8Santirez #include <fcntl.h>
46cef054e8Santirez #include <sys/time.h>
47cef054e8Santirez #include <sys/resource.h>
48cef054e8Santirez #include <sys/uio.h>
49cef054e8Santirez #include <sys/un.h>
50cef054e8Santirez #include <limits.h>
51cef054e8Santirez #include <float.h>
52cef054e8Santirez #include <math.h>
53cef054e8Santirez #include <sys/resource.h>
54cef054e8Santirez #include <sys/utsname.h>
55cef054e8Santirez #include <locale.h>
56cef054e8Santirez #include <sys/socket.h>
57cef054e8Santirez 
58cef054e8Santirez /* Our shared "common" objects */
59cef054e8Santirez 
60cef054e8Santirez struct sharedObjectsStruct shared;
61cef054e8Santirez 
62cef054e8Santirez /* Global vars that are actually used as constants. The following double
63cef054e8Santirez  * values are used for double on-disk serialization, and are initialized
64cef054e8Santirez  * at runtime to avoid strange compiler optimizations. */
65cef054e8Santirez 
66cef054e8Santirez double R_Zero, R_PosInf, R_NegInf, R_Nan;
67cef054e8Santirez 
68cef054e8Santirez /*================================= Globals ================================= */
69cef054e8Santirez 
70cef054e8Santirez /* Global vars */
71cef054e8Santirez struct redisServer server; /* server global state */
72cef054e8Santirez 
73cef054e8Santirez /* Our command table.
74cef054e8Santirez  *
75cef054e8Santirez  * Every entry is composed of the following fields:
76cef054e8Santirez  *
77cef054e8Santirez  * name: a string representing the command name.
78cef054e8Santirez  * function: pointer to the C function implementing the command.
79cef054e8Santirez  * arity: number of arguments, it is possible to use -N to say >= N
80cef054e8Santirez  * sflags: command flags as string. See below for a table of flags.
81cef054e8Santirez  * flags: flags as bitmask. Computed by Redis using the 'sflags' field.
82cef054e8Santirez  * get_keys_proc: an optional function to get key arguments from a command.
83cef054e8Santirez  *                This is only used when the following three fields are not
84cef054e8Santirez  *                enough to specify what arguments are keys.
85cef054e8Santirez  * first_key_index: first argument that is a key
86cef054e8Santirez  * last_key_index: last argument that is a key
87cef054e8Santirez  * key_step: step to get all the keys from first to last argument. For instance
88cef054e8Santirez  *           in MSET the step is two since arguments are key,val,key,val,...
89cef054e8Santirez  * microseconds: microseconds of total execution time for this command.
90cef054e8Santirez  * calls: total number of calls of this command.
91cef054e8Santirez  *
92cef054e8Santirez  * The flags, microseconds and calls fields are computed by Redis and should
93cef054e8Santirez  * always be set to zero.
94cef054e8Santirez  *
95cef054e8Santirez  * Command flags are expressed using strings where every character represents
96cef054e8Santirez  * a flag. Later the populateCommandTable() function will take care of
97cef054e8Santirez  * populating the real 'flags' field using this characters.
98cef054e8Santirez  *
99cef054e8Santirez  * This is the meaning of the flags:
100cef054e8Santirez  *
101cef054e8Santirez  * w: write command (may modify the key space).
102cef054e8Santirez  * r: read command  (will never modify the key space).
103cef054e8Santirez  * m: may increase memory usage once called. Don't allow if out of memory.
104cef054e8Santirez  * a: admin command, like SAVE or SHUTDOWN.
105cef054e8Santirez  * p: Pub/Sub related command.
106cef054e8Santirez  * f: force replication of this command, regardless of server.dirty.
107cef054e8Santirez  * s: command not allowed in scripts.
108cef054e8Santirez  * R: random command. Command is not deterministic, that is, the same command
109cef054e8Santirez  *    with the same arguments, with the same key space, may have different
110cef054e8Santirez  *    results. For instance SPOP and RANDOMKEY are two random commands.
111cef054e8Santirez  * S: Sort command output array if called from script, so that the output
112cef054e8Santirez  *    is deterministic.
113cef054e8Santirez  * l: Allow command while loading the database.
114cef054e8Santirez  * t: Allow command while a slave has stale data but is not allowed to
115cef054e8Santirez  *    server this data. Normally no command is accepted in this condition
116cef054e8Santirez  *    but just a few.
117cef054e8Santirez  * M: Do not automatically propagate the command on MONITOR.
118cef054e8Santirez  * k: Perform an implicit ASKING for this command, so the command will be
119cef054e8Santirez  *    accepted in cluster mode if the slot is marked as 'importing'.
120cef054e8Santirez  * F: Fast command: O(1) or O(log(N)) command that should never delay
121cef054e8Santirez  *    its execution as long as the kernel scheduler is giving us time.
122cef054e8Santirez  *    Note that commands that may trigger a DEL as a side effect (like SET)
123cef054e8Santirez  *    are not fast commands.
124cef054e8Santirez  */
125cef054e8Santirez struct redisCommand redisCommandTable[] = {
126cef054e8Santirez     {"get",getCommand,2,"rF",0,NULL,1,1,1,0,0},
127cef054e8Santirez     {"set",setCommand,-3,"wm",0,NULL,1,1,1,0,0},
128cef054e8Santirez     {"setnx",setnxCommand,3,"wmF",0,NULL,1,1,1,0,0},
129cef054e8Santirez     {"setex",setexCommand,4,"wm",0,NULL,1,1,1,0,0},
130cef054e8Santirez     {"psetex",psetexCommand,4,"wm",0,NULL,1,1,1,0,0},
131cef054e8Santirez     {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0},
132cef054e8Santirez     {"strlen",strlenCommand,2,"rF",0,NULL,1,1,1,0,0},
133cef054e8Santirez     {"del",delCommand,-2,"w",0,NULL,1,-1,1,0,0},
134cef054e8Santirez     {"exists",existsCommand,-2,"rF",0,NULL,1,-1,1,0,0},
135cef054e8Santirez     {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0},
136cef054e8Santirez     {"getbit",getbitCommand,3,"rF",0,NULL,1,1,1,0,0},
137761a7728Santirez     {"bitfield",bitfieldCommand,-2,"wm",0,NULL,1,1,1,0,0},
138cef054e8Santirez     {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0},
139cef054e8Santirez     {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
140cef054e8Santirez     {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0},
141cef054e8Santirez     {"incr",incrCommand,2,"wmF",0,NULL,1,1,1,0,0},
142cef054e8Santirez     {"decr",decrCommand,2,"wmF",0,NULL,1,1,1,0,0},
143cef054e8Santirez     {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0},
144cef054e8Santirez     {"rpush",rpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
145cef054e8Santirez     {"lpush",lpushCommand,-3,"wmF",0,NULL,1,1,1,0,0},
146cef054e8Santirez     {"rpushx",rpushxCommand,3,"wmF",0,NULL,1,1,1,0,0},
147cef054e8Santirez     {"lpushx",lpushxCommand,3,"wmF",0,NULL,1,1,1,0,0},
148cef054e8Santirez     {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0},
149cef054e8Santirez     {"rpop",rpopCommand,2,"wF",0,NULL,1,1,1,0,0},
150cef054e8Santirez     {"lpop",lpopCommand,2,"wF",0,NULL,1,1,1,0,0},
151cef054e8Santirez     {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0},
152cef054e8Santirez     {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0},
153cef054e8Santirez     {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0},
154cef054e8Santirez     {"llen",llenCommand,2,"rF",0,NULL,1,1,1,0,0},
155cef054e8Santirez     {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0},
156cef054e8Santirez     {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0},
157cef054e8Santirez     {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0},
158cef054e8Santirez     {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0},
159cef054e8Santirez     {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0},
160cef054e8Santirez     {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0},
161cef054e8Santirez     {"sadd",saddCommand,-3,"wmF",0,NULL,1,1,1,0,0},
162cef054e8Santirez     {"srem",sremCommand,-3,"wF",0,NULL,1,1,1,0,0},
163cef054e8Santirez     {"smove",smoveCommand,4,"wF",0,NULL,1,2,1,0,0},
164cef054e8Santirez     {"sismember",sismemberCommand,3,"rF",0,NULL,1,1,1,0,0},
165cef054e8Santirez     {"scard",scardCommand,2,"rF",0,NULL,1,1,1,0,0},
166620783e3SItamar Haber     {"spop",spopCommand,-2,"wRF",0,NULL,1,1,1,0,0},
167cef054e8Santirez     {"srandmember",srandmemberCommand,-2,"rR",0,NULL,1,1,1,0,0},
168cef054e8Santirez     {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0},
169cef054e8Santirez     {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
170cef054e8Santirez     {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0},
171cef054e8Santirez     {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
172cef054e8Santirez     {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0},
173cef054e8Santirez     {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0},
174cef054e8Santirez     {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0},
175cef054e8Santirez     {"sscan",sscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
176cef054e8Santirez     {"zadd",zaddCommand,-4,"wmF",0,NULL,1,1,1,0,0},
177cef054e8Santirez     {"zincrby",zincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0},
178cef054e8Santirez     {"zrem",zremCommand,-3,"wF",0,NULL,1,1,1,0,0},
179cef054e8Santirez     {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0},
180cef054e8Santirez     {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0},
181cef054e8Santirez     {"zremrangebylex",zremrangebylexCommand,4,"w",0,NULL,1,1,1,0,0},
182cef054e8Santirez     {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
183cef054e8Santirez     {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0},
184cef054e8Santirez     {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
185cef054e8Santirez     {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
186cef054e8Santirez     {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0},
187cef054e8Santirez     {"zrangebylex",zrangebylexCommand,-4,"r",0,NULL,1,1,1,0,0},
188cef054e8Santirez     {"zrevrangebylex",zrevrangebylexCommand,-4,"r",0,NULL,1,1,1,0,0},
189cef054e8Santirez     {"zcount",zcountCommand,4,"rF",0,NULL,1,1,1,0,0},
190cef054e8Santirez     {"zlexcount",zlexcountCommand,4,"rF",0,NULL,1,1,1,0,0},
191cef054e8Santirez     {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0},
192cef054e8Santirez     {"zcard",zcardCommand,2,"rF",0,NULL,1,1,1,0,0},
193cef054e8Santirez     {"zscore",zscoreCommand,3,"rF",0,NULL,1,1,1,0,0},
194cef054e8Santirez     {"zrank",zrankCommand,3,"rF",0,NULL,1,1,1,0,0},
195cef054e8Santirez     {"zrevrank",zrevrankCommand,3,"rF",0,NULL,1,1,1,0,0},
196cef054e8Santirez     {"zscan",zscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
197cef054e8Santirez     {"hset",hsetCommand,4,"wmF",0,NULL,1,1,1,0,0},
198cef054e8Santirez     {"hsetnx",hsetnxCommand,4,"wmF",0,NULL,1,1,1,0,0},
199cef054e8Santirez     {"hget",hgetCommand,3,"rF",0,NULL,1,1,1,0,0},
200cef054e8Santirez     {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0},
201cef054e8Santirez     {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0},
202cef054e8Santirez     {"hincrby",hincrbyCommand,4,"wmF",0,NULL,1,1,1,0,0},
203cef054e8Santirez     {"hincrbyfloat",hincrbyfloatCommand,4,"wmF",0,NULL,1,1,1,0,0},
204cef054e8Santirez     {"hdel",hdelCommand,-3,"wF",0,NULL,1,1,1,0,0},
205cef054e8Santirez     {"hlen",hlenCommand,2,"rF",0,NULL,1,1,1,0,0},
206cef054e8Santirez     {"hstrlen",hstrlenCommand,3,"rF",0,NULL,1,1,1,0,0},
207cef054e8Santirez     {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0},
208cef054e8Santirez     {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0},
209cef054e8Santirez     {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0},
210cef054e8Santirez     {"hexists",hexistsCommand,3,"rF",0,NULL,1,1,1,0,0},
211cef054e8Santirez     {"hscan",hscanCommand,-3,"rR",0,NULL,1,1,1,0,0},
212cef054e8Santirez     {"incrby",incrbyCommand,3,"wmF",0,NULL,1,1,1,0,0},
213cef054e8Santirez     {"decrby",decrbyCommand,3,"wmF",0,NULL,1,1,1,0,0},
214cef054e8Santirez     {"incrbyfloat",incrbyfloatCommand,3,"wmF",0,NULL,1,1,1,0,0},
215cef054e8Santirez     {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0},
216cef054e8Santirez     {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0},
217cef054e8Santirez     {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0},
218cef054e8Santirez     {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0},
2193ff8f57eSantirez     {"select",selectCommand,2,"lF",0,NULL,0,0,0,0,0},
220cef054e8Santirez     {"move",moveCommand,3,"wF",0,NULL,1,1,1,0,0},
221cef054e8Santirez     {"rename",renameCommand,3,"w",0,NULL,1,2,1,0,0},
222cef054e8Santirez     {"renamenx",renamenxCommand,3,"wF",0,NULL,1,2,1,0,0},
223cef054e8Santirez     {"expire",expireCommand,3,"wF",0,NULL,1,1,1,0,0},
224cef054e8Santirez     {"expireat",expireatCommand,3,"wF",0,NULL,1,1,1,0,0},
225cef054e8Santirez     {"pexpire",pexpireCommand,3,"wF",0,NULL,1,1,1,0,0},
226cef054e8Santirez     {"pexpireat",pexpireatCommand,3,"wF",0,NULL,1,1,1,0,0},
227cef054e8Santirez     {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0},
228cef054e8Santirez     {"scan",scanCommand,-2,"rR",0,NULL,0,0,0,0,0},
229cef054e8Santirez     {"dbsize",dbsizeCommand,1,"rF",0,NULL,0,0,0,0,0},
2303ff8f57eSantirez     {"auth",authCommand,2,"sltF",0,NULL,0,0,0,0,0},
2313ff8f57eSantirez     {"ping",pingCommand,-1,"tF",0,NULL,0,0,0,0,0},
2323ff8f57eSantirez     {"echo",echoCommand,2,"F",0,NULL,0,0,0,0,0},
2333ff8f57eSantirez     {"save",saveCommand,1,"as",0,NULL,0,0,0,0,0},
23421cffc26Santirez     {"bgsave",bgsaveCommand,-1,"a",0,NULL,0,0,0,0,0},
2353ff8f57eSantirez     {"bgrewriteaof",bgrewriteaofCommand,1,"a",0,NULL,0,0,0,0,0},
2363ff8f57eSantirez     {"shutdown",shutdownCommand,-1,"alt",0,NULL,0,0,0,0,0},
2373ff8f57eSantirez     {"lastsave",lastsaveCommand,1,"RF",0,NULL,0,0,0,0,0},
238cef054e8Santirez     {"type",typeCommand,2,"rF",0,NULL,1,1,1,0,0},
2393ff8f57eSantirez     {"multi",multiCommand,1,"sF",0,NULL,0,0,0,0,0},
240cef054e8Santirez     {"exec",execCommand,1,"sM",0,NULL,0,0,0,0,0},
2413ff8f57eSantirez     {"discard",discardCommand,1,"sF",0,NULL,0,0,0,0,0},
242cef054e8Santirez     {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0},
243cef054e8Santirez     {"psync",syncCommand,3,"ars",0,NULL,0,0,0,0,0},
2443ff8f57eSantirez     {"replconf",replconfCommand,-1,"aslt",0,NULL,0,0,0,0,0},
245cef054e8Santirez     {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0},
246cef054e8Santirez     {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0},
247cef054e8Santirez     {"sort",sortCommand,-2,"wm",0,sortGetKeys,1,1,1,0,0},
2483ff8f57eSantirez     {"info",infoCommand,-1,"lt",0,NULL,0,0,0,0,0},
2493ff8f57eSantirez     {"monitor",monitorCommand,1,"as",0,NULL,0,0,0,0,0},
250cef054e8Santirez     {"ttl",ttlCommand,2,"rF",0,NULL,1,1,1,0,0},
251b23aa670Santirez     {"touch",touchCommand,-2,"rF",0,NULL,1,1,1,0,0},
252cef054e8Santirez     {"pttl",pttlCommand,2,"rF",0,NULL,1,1,1,0,0},
253cef054e8Santirez     {"persist",persistCommand,2,"wF",0,NULL,1,1,1,0,0},
254cef054e8Santirez     {"slaveof",slaveofCommand,3,"ast",0,NULL,0,0,0,0,0},
255cef054e8Santirez     {"role",roleCommand,1,"lst",0,NULL,0,0,0,0,0},
25607b852d2Santirez     {"debug",debugCommand,-1,"as",0,NULL,0,0,0,0,0},
257ae95de93Santirez     {"config",configCommand,-2,"lat",0,NULL,0,0,0,0,0},
2583ff8f57eSantirez     {"subscribe",subscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
2593ff8f57eSantirez     {"unsubscribe",unsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
2603ff8f57eSantirez     {"psubscribe",psubscribeCommand,-2,"pslt",0,NULL,0,0,0,0,0},
2613ff8f57eSantirez     {"punsubscribe",punsubscribeCommand,-1,"pslt",0,NULL,0,0,0,0,0},
2623ff8f57eSantirez     {"publish",publishCommand,3,"pltF",0,NULL,0,0,0,0,0},
2633ff8f57eSantirez     {"pubsub",pubsubCommand,-2,"pltR",0,NULL,0,0,0,0,0},
2643ff8f57eSantirez     {"watch",watchCommand,-2,"sF",0,NULL,1,-1,1,0,0},
2653ff8f57eSantirez     {"unwatch",unwatchCommand,1,"sF",0,NULL,0,0,0,0,0},
2663ff8f57eSantirez     {"cluster",clusterCommand,-2,"a",0,NULL,0,0,0,0,0},
267cef054e8Santirez     {"restore",restoreCommand,-4,"wm",0,NULL,1,1,1,0,0},
268cef054e8Santirez     {"restore-asking",restoreCommand,-4,"wmk",0,NULL,1,1,1,0,0},
26900353f99Santirez     {"migrate",migrateCommand,-6,"w",0,migrateGetKeys,0,0,0,0,0},
2703ff8f57eSantirez     {"asking",askingCommand,1,"F",0,NULL,0,0,0,0,0},
2713ff8f57eSantirez     {"readonly",readonlyCommand,1,"F",0,NULL,0,0,0,0,0},
2723ff8f57eSantirez     {"readwrite",readwriteCommand,1,"F",0,NULL,0,0,0,0,0},
273cef054e8Santirez     {"dump",dumpCommand,2,"r",0,NULL,1,1,1,0,0},
274cef054e8Santirez     {"object",objectCommand,3,"r",0,NULL,2,2,2,0,0},
2753ff8f57eSantirez     {"client",clientCommand,-2,"as",0,NULL,0,0,0,0,0},
276cef054e8Santirez     {"eval",evalCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
277cef054e8Santirez     {"evalsha",evalShaCommand,-3,"s",0,evalGetKeys,0,0,0,0,0},
2783ff8f57eSantirez     {"slowlog",slowlogCommand,-2,"a",0,NULL,0,0,0,0,0},
2793ff8f57eSantirez     {"script",scriptCommand,-2,"s",0,NULL,0,0,0,0,0},
2803ff8f57eSantirez     {"time",timeCommand,1,"RF",0,NULL,0,0,0,0,0},
281cef054e8Santirez     {"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0},
282cef054e8Santirez     {"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0},
283cef054e8Santirez     {"bitpos",bitposCommand,-3,"r",0,NULL,1,1,1,0,0},
2843ff8f57eSantirez     {"wait",waitCommand,3,"s",0,NULL,0,0,0,0,0},
285f01a2714Santirez     {"command",commandCommand,0,"lt",0,NULL,0,0,0,0,0},
286cef054e8Santirez     {"geoadd",geoaddCommand,-5,"wm",0,NULL,1,1,1,0,0},
2870b6daf5aSantirez     {"georadius",georadiusCommand,-6,"w",0,NULL,1,1,1,0,0},
2880b6daf5aSantirez     {"georadiusbymember",georadiusByMemberCommand,-5,"w",0,NULL,1,1,1,0,0},
289cef054e8Santirez     {"geohash",geohashCommand,-2,"r",0,NULL,1,1,1,0,0},
290cef054e8Santirez     {"geopos",geoposCommand,-2,"r",0,NULL,1,1,1,0,0},
291cef054e8Santirez     {"geodist",geodistCommand,-4,"r",0,NULL,1,1,1,0,0},
2923ff8f57eSantirez     {"pfselftest",pfselftestCommand,1,"a",0,NULL,0,0,0,0,0},
293cef054e8Santirez     {"pfadd",pfaddCommand,-2,"wmF",0,NULL,1,1,1,0,0},
294cef054e8Santirez     {"pfcount",pfcountCommand,-2,"r",0,NULL,1,-1,1,0,0},
295cef054e8Santirez     {"pfmerge",pfmergeCommand,-2,"wm",0,NULL,1,-1,1,0,0},
296cef054e8Santirez     {"pfdebug",pfdebugCommand,-3,"w",0,NULL,0,0,0,0,0},
2973ff8f57eSantirez     {"latency",latencyCommand,-2,"aslt",0,NULL,0,0,0,0,0}
298cef054e8Santirez };
299cef054e8Santirez 
300cef054e8Santirez struct evictionPoolEntry *evictionPoolAlloc(void);
301cef054e8Santirez 
302cef054e8Santirez /*============================ Utility functions ============================ */
303cef054e8Santirez 
304cef054e8Santirez /* Low level logging. To use only for very big messages, otherwise
305424fe9afSantirez  * serverLog() is to prefer. */
serverLogRaw(int level,const char * msg)306424fe9afSantirez void serverLogRaw(int level, const char *msg) {
307cef054e8Santirez     const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING };
308cef054e8Santirez     const char *c = ".-*#";
309cef054e8Santirez     FILE *fp;
310cef054e8Santirez     char buf[64];
31132f80e2fSantirez     int rawmode = (level & LL_RAW);
312cef054e8Santirez     int log_to_stdout = server.logfile[0] == '\0';
313cef054e8Santirez 
314cef054e8Santirez     level &= 0xff; /* clear flags */
315cef054e8Santirez     if (level < server.verbosity) return;
316cef054e8Santirez 
317cef054e8Santirez     fp = log_to_stdout ? stdout : fopen(server.logfile,"a");
318cef054e8Santirez     if (!fp) return;
319cef054e8Santirez 
320cef054e8Santirez     if (rawmode) {
321cef054e8Santirez         fprintf(fp,"%s",msg);
322cef054e8Santirez     } else {
323cef054e8Santirez         int off;
324cef054e8Santirez         struct timeval tv;
325cef054e8Santirez         int role_char;
326cef054e8Santirez         pid_t pid = getpid();
327cef054e8Santirez 
328cef054e8Santirez         gettimeofday(&tv,NULL);
329cef054e8Santirez         off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec));
330cef054e8Santirez         snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000);
331cef054e8Santirez         if (server.sentinel_mode) {
332cef054e8Santirez             role_char = 'X'; /* Sentinel. */
333cef054e8Santirez         } else if (pid != server.pid) {
334cef054e8Santirez             role_char = 'C'; /* RDB / AOF writing child. */
335cef054e8Santirez         } else {
336cef054e8Santirez             role_char = (server.masterhost ? 'S':'M'); /* Slave or Master. */
337cef054e8Santirez         }
338cef054e8Santirez         fprintf(fp,"%d:%c %s %c %s\n",
339cef054e8Santirez             (int)getpid(),role_char, buf,c[level],msg);
340cef054e8Santirez     }
341cef054e8Santirez     fflush(fp);
342cef054e8Santirez 
343cef054e8Santirez     if (!log_to_stdout) fclose(fp);
344cef054e8Santirez     if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg);
345cef054e8Santirez }
346cef054e8Santirez 
347424fe9afSantirez /* Like serverLogRaw() but with printf-alike support. This is the function that
348cef054e8Santirez  * is used across the code. The raw version is only used in order to dump
349cef054e8Santirez  * the INFO output on crash. */
serverLog(int level,const char * fmt,...)350424fe9afSantirez void serverLog(int level, const char *fmt, ...) {
351cef054e8Santirez     va_list ap;
35232f80e2fSantirez     char msg[LOG_MAX_LEN];
353cef054e8Santirez 
354cef054e8Santirez     if ((level&0xff) < server.verbosity) return;
355cef054e8Santirez 
356cef054e8Santirez     va_start(ap, fmt);
357cef054e8Santirez     vsnprintf(msg, sizeof(msg), fmt, ap);
358cef054e8Santirez     va_end(ap);
359cef054e8Santirez 
360424fe9afSantirez     serverLogRaw(level,msg);
361cef054e8Santirez }
362cef054e8Santirez 
363cef054e8Santirez /* Log a fixed message without printf-alike capabilities, in a way that is
364cef054e8Santirez  * safe to call from a signal handler.
365cef054e8Santirez  *
366cef054e8Santirez  * We actually use this only for signals that are not fatal from the point
367cef054e8Santirez  * of view of Redis. Signals that are going to kill the server anyway and
368424fe9afSantirez  * where we need printf-alike features are served by serverLog(). */
serverLogFromHandler(int level,const char * msg)369424fe9afSantirez void serverLogFromHandler(int level, const char *msg) {
370cef054e8Santirez     int fd;
371cef054e8Santirez     int log_to_stdout = server.logfile[0] == '\0';
372cef054e8Santirez     char buf[64];
373cef054e8Santirez 
374cef054e8Santirez     if ((level&0xff) < server.verbosity || (log_to_stdout && server.daemonize))
375cef054e8Santirez         return;
376cef054e8Santirez     fd = log_to_stdout ? STDOUT_FILENO :
377cef054e8Santirez                          open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644);
378cef054e8Santirez     if (fd == -1) return;
379cef054e8Santirez     ll2string(buf,sizeof(buf),getpid());
380cef054e8Santirez     if (write(fd,buf,strlen(buf)) == -1) goto err;
381cef054e8Santirez     if (write(fd,":signal-handler (",17) == -1) goto err;
382cef054e8Santirez     ll2string(buf,sizeof(buf),time(NULL));
383cef054e8Santirez     if (write(fd,buf,strlen(buf)) == -1) goto err;
384cef054e8Santirez     if (write(fd,") ",2) == -1) goto err;
385cef054e8Santirez     if (write(fd,msg,strlen(msg)) == -1) goto err;
386cef054e8Santirez     if (write(fd,"\n",1) == -1) goto err;
387cef054e8Santirez err:
388cef054e8Santirez     if (!log_to_stdout) close(fd);
389cef054e8Santirez }
390cef054e8Santirez 
391cef054e8Santirez /* Return the UNIX time in microseconds */
ustime(void)392cef054e8Santirez long long ustime(void) {
393cef054e8Santirez     struct timeval tv;
394cef054e8Santirez     long long ust;
395cef054e8Santirez 
396cef054e8Santirez     gettimeofday(&tv, NULL);
397cef054e8Santirez     ust = ((long long)tv.tv_sec)*1000000;
398cef054e8Santirez     ust += tv.tv_usec;
399cef054e8Santirez     return ust;
400cef054e8Santirez }
401cef054e8Santirez 
402cef054e8Santirez /* Return the UNIX time in milliseconds */
mstime(void)403a83e79b1Santirez mstime_t mstime(void) {
404cef054e8Santirez     return ustime()/1000;
405cef054e8Santirez }
406cef054e8Santirez 
407cef054e8Santirez /* After an RDB dump or AOF rewrite we exit from children using _exit() instead of
408cef054e8Santirez  * exit(), because the latter may interact with the same file objects used by
409cef054e8Santirez  * the parent process. However if we are testing the coverage normal exit() is
410cef054e8Santirez  * used in order to obtain the right coverage information. */
exitFromChild(int retcode)411cef054e8Santirez void exitFromChild(int retcode) {
412cef054e8Santirez #ifdef COVERAGE_TEST
413cef054e8Santirez     exit(retcode);
414cef054e8Santirez #else
415cef054e8Santirez     _exit(retcode);
416cef054e8Santirez #endif
417cef054e8Santirez }
418cef054e8Santirez 
419cef054e8Santirez /*====================== Hash table type implementation  ==================== */
420cef054e8Santirez 
421cef054e8Santirez /* This is a hash table type that uses the SDS dynamic strings library as
422cef054e8Santirez  * keys and redis objects as values (objects can hold SDS strings,
423cef054e8Santirez  * lists, sets). */
424cef054e8Santirez 
dictVanillaFree(void * privdata,void * val)425cef054e8Santirez void dictVanillaFree(void *privdata, void *val)
426cef054e8Santirez {
427cef054e8Santirez     DICT_NOTUSED(privdata);
428cef054e8Santirez     zfree(val);
429cef054e8Santirez }
430cef054e8Santirez 
dictListDestructor(void * privdata,void * val)431cef054e8Santirez void dictListDestructor(void *privdata, void *val)
432cef054e8Santirez {
433cef054e8Santirez     DICT_NOTUSED(privdata);
434cef054e8Santirez     listRelease((list*)val);
435cef054e8Santirez }
436cef054e8Santirez 
dictSdsKeyCompare(void * privdata,const void * key1,const void * key2)437cef054e8Santirez int dictSdsKeyCompare(void *privdata, const void *key1,
438cef054e8Santirez         const void *key2)
439cef054e8Santirez {
440cef054e8Santirez     int l1,l2;
441cef054e8Santirez     DICT_NOTUSED(privdata);
442cef054e8Santirez 
443cef054e8Santirez     l1 = sdslen((sds)key1);
444cef054e8Santirez     l2 = sdslen((sds)key2);
445cef054e8Santirez     if (l1 != l2) return 0;
446cef054e8Santirez     return memcmp(key1, key2, l1) == 0;
447cef054e8Santirez }
448cef054e8Santirez 
449cef054e8Santirez /* A case insensitive version used for the command lookup table and other
450cef054e8Santirez  * places where case insensitive non binary-safe comparison is needed. */
dictSdsKeyCaseCompare(void * privdata,const void * key1,const void * key2)451cef054e8Santirez int dictSdsKeyCaseCompare(void *privdata, const void *key1,
452cef054e8Santirez         const void *key2)
453cef054e8Santirez {
454cef054e8Santirez     DICT_NOTUSED(privdata);
455cef054e8Santirez 
456cef054e8Santirez     return strcasecmp(key1, key2) == 0;
457cef054e8Santirez }
458cef054e8Santirez 
dictObjectDestructor(void * privdata,void * val)4595cfb7927Santirez void dictObjectDestructor(void *privdata, void *val)
460cef054e8Santirez {
461cef054e8Santirez     DICT_NOTUSED(privdata);
462cef054e8Santirez 
463cef054e8Santirez     if (val == NULL) return; /* Values of swapped out keys as set to NULL */
464cef054e8Santirez     decrRefCount(val);
465cef054e8Santirez }
466cef054e8Santirez 
dictSdsDestructor(void * privdata,void * val)467cef054e8Santirez void dictSdsDestructor(void *privdata, void *val)
468cef054e8Santirez {
469cef054e8Santirez     DICT_NOTUSED(privdata);
470cef054e8Santirez 
471cef054e8Santirez     sdsfree(val);
472cef054e8Santirez }
473cef054e8Santirez 
dictObjKeyCompare(void * privdata,const void * key1,const void * key2)474cef054e8Santirez int dictObjKeyCompare(void *privdata, const void *key1,
475cef054e8Santirez         const void *key2)
476cef054e8Santirez {
477cef054e8Santirez     const robj *o1 = key1, *o2 = key2;
478cef054e8Santirez     return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
479cef054e8Santirez }
480cef054e8Santirez 
dictObjHash(const void * key)481cef054e8Santirez unsigned int dictObjHash(const void *key) {
482cef054e8Santirez     const robj *o = key;
483cef054e8Santirez     return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
484cef054e8Santirez }
485cef054e8Santirez 
dictSdsHash(const void * key)486cef054e8Santirez unsigned int dictSdsHash(const void *key) {
487cef054e8Santirez     return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
488cef054e8Santirez }
489cef054e8Santirez 
dictSdsCaseHash(const void * key)490cef054e8Santirez unsigned int dictSdsCaseHash(const void *key) {
491cef054e8Santirez     return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
492cef054e8Santirez }
493cef054e8Santirez 
dictEncObjKeyCompare(void * privdata,const void * key1,const void * key2)494cef054e8Santirez int dictEncObjKeyCompare(void *privdata, const void *key1,
495cef054e8Santirez         const void *key2)
496cef054e8Santirez {
497cef054e8Santirez     robj *o1 = (robj*) key1, *o2 = (robj*) key2;
498cef054e8Santirez     int cmp;
499cef054e8Santirez 
50014ff5724Santirez     if (o1->encoding == OBJ_ENCODING_INT &&
50114ff5724Santirez         o2->encoding == OBJ_ENCODING_INT)
502cef054e8Santirez             return o1->ptr == o2->ptr;
503cef054e8Santirez 
504cef054e8Santirez     o1 = getDecodedObject(o1);
505cef054e8Santirez     o2 = getDecodedObject(o2);
506cef054e8Santirez     cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
507cef054e8Santirez     decrRefCount(o1);
508cef054e8Santirez     decrRefCount(o2);
509cef054e8Santirez     return cmp;
510cef054e8Santirez }
511cef054e8Santirez 
dictEncObjHash(const void * key)512cef054e8Santirez unsigned int dictEncObjHash(const void *key) {
513cef054e8Santirez     robj *o = (robj*) key;
514cef054e8Santirez 
515cef054e8Santirez     if (sdsEncodedObject(o)) {
516cef054e8Santirez         return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
517cef054e8Santirez     } else {
51814ff5724Santirez         if (o->encoding == OBJ_ENCODING_INT) {
519cef054e8Santirez             char buf[32];
520cef054e8Santirez             int len;
521cef054e8Santirez 
522cef054e8Santirez             len = ll2string(buf,32,(long)o->ptr);
523cef054e8Santirez             return dictGenHashFunction((unsigned char*)buf, len);
524cef054e8Santirez         } else {
525cef054e8Santirez             unsigned int hash;
526cef054e8Santirez 
527cef054e8Santirez             o = getDecodedObject(o);
528cef054e8Santirez             hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
529cef054e8Santirez             decrRefCount(o);
530cef054e8Santirez             return hash;
531cef054e8Santirez         }
532cef054e8Santirez     }
533cef054e8Santirez }
534cef054e8Santirez 
535cef054e8Santirez /* Sets type hash table */
536cef054e8Santirez dictType setDictType = {
537cef054e8Santirez     dictEncObjHash,            /* hash function */
538cef054e8Santirez     NULL,                      /* key dup */
539cef054e8Santirez     NULL,                      /* val dup */
540cef054e8Santirez     dictEncObjKeyCompare,      /* key compare */
5415cfb7927Santirez     dictObjectDestructor, /* key destructor */
542cef054e8Santirez     NULL                       /* val destructor */
543cef054e8Santirez };
544cef054e8Santirez 
545cef054e8Santirez /* Sorted sets hash (note: a skiplist is used in addition to the hash table) */
546cef054e8Santirez dictType zsetDictType = {
547cef054e8Santirez     dictEncObjHash,            /* hash function */
548cef054e8Santirez     NULL,                      /* key dup */
549cef054e8Santirez     NULL,                      /* val dup */
550cef054e8Santirez     dictEncObjKeyCompare,      /* key compare */
5515cfb7927Santirez     dictObjectDestructor, /* key destructor */
552cef054e8Santirez     NULL                       /* val destructor */
553cef054e8Santirez };
554cef054e8Santirez 
555cef054e8Santirez /* Db->dict, keys are sds strings, vals are Redis objects. */
556cef054e8Santirez dictType dbDictType = {
557cef054e8Santirez     dictSdsHash,                /* hash function */
558cef054e8Santirez     NULL,                       /* key dup */
559cef054e8Santirez     NULL,                       /* val dup */
560cef054e8Santirez     dictSdsKeyCompare,          /* key compare */
561cef054e8Santirez     dictSdsDestructor,          /* key destructor */
5625cfb7927Santirez     dictObjectDestructor   /* val destructor */
563cef054e8Santirez };
564cef054e8Santirez 
565cef054e8Santirez /* server.lua_scripts sha (as sds string) -> scripts (as robj) cache. */
566cef054e8Santirez dictType shaScriptObjectDictType = {
567cef054e8Santirez     dictSdsCaseHash,            /* hash function */
568cef054e8Santirez     NULL,                       /* key dup */
569cef054e8Santirez     NULL,                       /* val dup */
570cef054e8Santirez     dictSdsKeyCaseCompare,      /* key compare */
571cef054e8Santirez     dictSdsDestructor,          /* key destructor */
5725cfb7927Santirez     dictObjectDestructor   /* val destructor */
573cef054e8Santirez };
574cef054e8Santirez 
575cef054e8Santirez /* Db->expires */
576cef054e8Santirez dictType keyptrDictType = {
577cef054e8Santirez     dictSdsHash,               /* hash function */
578cef054e8Santirez     NULL,                      /* key dup */
579cef054e8Santirez     NULL,                      /* val dup */
580cef054e8Santirez     dictSdsKeyCompare,         /* key compare */
581cef054e8Santirez     NULL,                      /* key destructor */
582cef054e8Santirez     NULL                       /* val destructor */
583cef054e8Santirez };
584cef054e8Santirez 
585cef054e8Santirez /* Command table. sds string -> command struct pointer. */
586cef054e8Santirez dictType commandTableDictType = {
587cef054e8Santirez     dictSdsCaseHash,           /* hash function */
588cef054e8Santirez     NULL,                      /* key dup */
589cef054e8Santirez     NULL,                      /* val dup */
590cef054e8Santirez     dictSdsKeyCaseCompare,     /* key compare */
591cef054e8Santirez     dictSdsDestructor,         /* key destructor */
592cef054e8Santirez     NULL                       /* val destructor */
593cef054e8Santirez };
594cef054e8Santirez 
595cef054e8Santirez /* Hash type hash table (note that small hashes are represented with ziplists) */
596cef054e8Santirez dictType hashDictType = {
597cef054e8Santirez     dictEncObjHash,             /* hash function */
598cef054e8Santirez     NULL,                       /* key dup */
599cef054e8Santirez     NULL,                       /* val dup */
600cef054e8Santirez     dictEncObjKeyCompare,       /* key compare */
6015cfb7927Santirez     dictObjectDestructor,  /* key destructor */
6025cfb7927Santirez     dictObjectDestructor   /* val destructor */
603cef054e8Santirez };
604cef054e8Santirez 
605cef054e8Santirez /* Keylist hash table type has unencoded redis objects as keys and
606cef054e8Santirez  * lists as values. It's used for blocking operations (BLPOP) and to
607cef054e8Santirez  * map swapped keys to a list of clients waiting for this keys to be loaded. */
608cef054e8Santirez dictType keylistDictType = {
609cef054e8Santirez     dictObjHash,                /* hash function */
610cef054e8Santirez     NULL,                       /* key dup */
611cef054e8Santirez     NULL,                       /* val dup */
612cef054e8Santirez     dictObjKeyCompare,          /* key compare */
6135cfb7927Santirez     dictObjectDestructor,  /* key destructor */
614cef054e8Santirez     dictListDestructor          /* val destructor */
615cef054e8Santirez };
616cef054e8Santirez 
617cef054e8Santirez /* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to
618cef054e8Santirez  * clusterNode structures. */
619cef054e8Santirez dictType clusterNodesDictType = {
620cef054e8Santirez     dictSdsHash,                /* hash function */
621cef054e8Santirez     NULL,                       /* key dup */
622cef054e8Santirez     NULL,                       /* val dup */
623cef054e8Santirez     dictSdsKeyCompare,          /* key compare */
624cef054e8Santirez     dictSdsDestructor,          /* key destructor */
625cef054e8Santirez     NULL                        /* val destructor */
626cef054e8Santirez };
627cef054e8Santirez 
628cef054e8Santirez /* Cluster re-addition blacklist. This maps node IDs to the time
629cef054e8Santirez  * we can re-add this node. The goal is to avoid readding a removed
630cef054e8Santirez  * node for some time. */
631cef054e8Santirez dictType clusterNodesBlackListDictType = {
632cef054e8Santirez     dictSdsCaseHash,            /* hash function */
633cef054e8Santirez     NULL,                       /* key dup */
634cef054e8Santirez     NULL,                       /* val dup */
635cef054e8Santirez     dictSdsKeyCaseCompare,      /* key compare */
636cef054e8Santirez     dictSdsDestructor,          /* key destructor */
637cef054e8Santirez     NULL                        /* val destructor */
638cef054e8Santirez };
639cef054e8Santirez 
640cef054e8Santirez /* Migrate cache dict type. */
641cef054e8Santirez dictType migrateCacheDictType = {
642cef054e8Santirez     dictSdsHash,                /* hash function */
643cef054e8Santirez     NULL,                       /* key dup */
644cef054e8Santirez     NULL,                       /* val dup */
645cef054e8Santirez     dictSdsKeyCompare,          /* key compare */
646cef054e8Santirez     dictSdsDestructor,          /* key destructor */
647cef054e8Santirez     NULL                        /* val destructor */
648cef054e8Santirez };
649cef054e8Santirez 
650cef054e8Santirez /* Replication cached script dict (server.repl_scriptcache_dict).
651cef054e8Santirez  * Keys are sds SHA1 strings, while values are not used at all in the current
652cef054e8Santirez  * implementation. */
653cef054e8Santirez dictType replScriptCacheDictType = {
654cef054e8Santirez     dictSdsCaseHash,            /* hash function */
655cef054e8Santirez     NULL,                       /* key dup */
656cef054e8Santirez     NULL,                       /* val dup */
657cef054e8Santirez     dictSdsKeyCaseCompare,      /* key compare */
658cef054e8Santirez     dictSdsDestructor,          /* key destructor */
659cef054e8Santirez     NULL                        /* val destructor */
660cef054e8Santirez };
661cef054e8Santirez 
htNeedsResize(dict * dict)662cef054e8Santirez int htNeedsResize(dict *dict) {
663cef054e8Santirez     long long size, used;
664cef054e8Santirez 
665cef054e8Santirez     size = dictSlots(dict);
666cef054e8Santirez     used = dictSize(dict);
66760323407Soranagra     return (size > DICT_HT_INITIAL_SIZE &&
66832f80e2fSantirez             (used*100/size < HASHTABLE_MIN_FILL));
669cef054e8Santirez }
670cef054e8Santirez 
67132f80e2fSantirez /* If the percentage of used slots in the HT reaches HASHTABLE_MIN_FILL
672cef054e8Santirez  * we resize the hash table to save memory */
tryResizeHashTables(int dbid)673cef054e8Santirez void tryResizeHashTables(int dbid) {
674cef054e8Santirez     if (htNeedsResize(server.db[dbid].dict))
675cef054e8Santirez         dictResize(server.db[dbid].dict);
676cef054e8Santirez     if (htNeedsResize(server.db[dbid].expires))
677cef054e8Santirez         dictResize(server.db[dbid].expires);
678cef054e8Santirez }
679cef054e8Santirez 
680cef054e8Santirez /* Our hash table implementation performs rehashing incrementally while
681cef054e8Santirez  * we write/read from the hash table. Still if the server is idle, the hash
682cef054e8Santirez  * table will use two tables for a long time. So we try to use 1 millisecond
683cef054e8Santirez  * of CPU time at every call of this function to perform some rehahsing.
684cef054e8Santirez  *
685cef054e8Santirez  * The function returns 1 if some rehashing was performed, otherwise 0
686cef054e8Santirez  * is returned. */
incrementallyRehash(int dbid)687cef054e8Santirez int incrementallyRehash(int dbid) {
688cef054e8Santirez     /* Keys dictionary */
689cef054e8Santirez     if (dictIsRehashing(server.db[dbid].dict)) {
690cef054e8Santirez         dictRehashMilliseconds(server.db[dbid].dict,1);
691cef054e8Santirez         return 1; /* already used our millisecond for this loop... */
692cef054e8Santirez     }
693cef054e8Santirez     /* Expires */
694cef054e8Santirez     if (dictIsRehashing(server.db[dbid].expires)) {
695cef054e8Santirez         dictRehashMilliseconds(server.db[dbid].expires,1);
696cef054e8Santirez         return 1; /* already used our millisecond for this loop... */
697cef054e8Santirez     }
698cef054e8Santirez     return 0;
699cef054e8Santirez }
700cef054e8Santirez 
701cef054e8Santirez /* This function is called once a background process of some kind terminates,
702cef054e8Santirez  * as we want to avoid resizing the hash tables when there is a child in order
703cef054e8Santirez  * to play well with copy-on-write (otherwise when a resize happens lots of
704cef054e8Santirez  * memory pages are copied). The goal of this function is to update the ability
705cef054e8Santirez  * for dict.c to resize the hash tables accordingly to the fact we have o not
706cef054e8Santirez  * running childs. */
updateDictResizePolicy(void)707cef054e8Santirez void updateDictResizePolicy(void) {
708cef054e8Santirez     if (server.rdb_child_pid == -1 && server.aof_child_pid == -1)
709cef054e8Santirez         dictEnableResize();
710cef054e8Santirez     else
711cef054e8Santirez         dictDisableResize();
712cef054e8Santirez }
713cef054e8Santirez 
714cef054e8Santirez /* ======================= Cron: called every 100 ms ======================== */
715cef054e8Santirez 
716cef054e8Santirez /* Helper function for the activeExpireCycle() function.
717cef054e8Santirez  * This function will try to expire the key that is stored in the hash table
718cef054e8Santirez  * entry 'de' of the 'expires' hash table of a Redis database.
719cef054e8Santirez  *
720cef054e8Santirez  * If the key is found to be expired, it is removed from the database and
721cef054e8Santirez  * 1 is returned. Otherwise no operation is performed and 0 is returned.
722cef054e8Santirez  *
723cef054e8Santirez  * When a key is expired, server.stat_expiredkeys is incremented.
724cef054e8Santirez  *
725cef054e8Santirez  * The parameter 'now' is the current time in milliseconds as is passed
726cef054e8Santirez  * to the function to avoid too many gettimeofday() syscalls. */
activeExpireCycleTryExpire(redisDb * db,dictEntry * de,long long now)727cef054e8Santirez int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
728cef054e8Santirez     long long t = dictGetSignedIntegerVal(de);
729cef054e8Santirez     if (now > t) {
730cef054e8Santirez         sds key = dictGetKey(de);
731cef054e8Santirez         robj *keyobj = createStringObject(key,sdslen(key));
732cef054e8Santirez 
733cef054e8Santirez         propagateExpire(db,keyobj);
734cef054e8Santirez         dbDelete(db,keyobj);
73532f80e2fSantirez         notifyKeyspaceEvent(NOTIFY_EXPIRED,
736cef054e8Santirez             "expired",keyobj,db->id);
737cef054e8Santirez         decrRefCount(keyobj);
738cef054e8Santirez         server.stat_expiredkeys++;
739cef054e8Santirez         return 1;
740cef054e8Santirez     } else {
741cef054e8Santirez         return 0;
742cef054e8Santirez     }
743cef054e8Santirez }
744cef054e8Santirez 
745cef054e8Santirez /* Try to expire a few timed out keys. The algorithm used is adaptive and
746cef054e8Santirez  * will use few CPU cycles if there are few expiring keys, otherwise
747cef054e8Santirez  * it will get more aggressive to avoid that too much memory is used by
748cef054e8Santirez  * keys that can be removed from the keyspace.
749cef054e8Santirez  *
75032f80e2fSantirez  * No more than CRON_DBS_PER_CALL databases are tested at every
751cef054e8Santirez  * iteration.
752cef054e8Santirez  *
753cef054e8Santirez  * This kind of call is used when Redis detects that timelimit_exit is
754cef054e8Santirez  * true, so there is more work to do, and we do it more incrementally from
755cef054e8Santirez  * the beforeSleep() function of the event loop.
756cef054e8Santirez  *
757cef054e8Santirez  * Expire cycle type:
758cef054e8Santirez  *
759cef054e8Santirez  * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
760cef054e8Santirez  * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
761cef054e8Santirez  * microseconds, and is not repeated again before the same amount of time.
762cef054e8Santirez  *
763cef054e8Santirez  * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
764cef054e8Santirez  * executed, where the time limit is a percentage of the REDIS_HZ period
765cef054e8Santirez  * as specified by the REDIS_EXPIRELOOKUPS_TIME_PERC define. */
766cef054e8Santirez 
activeExpireCycle(int type)767cef054e8Santirez void activeExpireCycle(int type) {
768cef054e8Santirez     /* This function has some global state in order to continue the work
769cef054e8Santirez      * incrementally across calls. */
770cef054e8Santirez     static unsigned int current_db = 0; /* Last DB tested. */
771cef054e8Santirez     static int timelimit_exit = 0;      /* Time limit hit in previous call? */
772cef054e8Santirez     static long long last_fast_cycle = 0; /* When last fast cycle ran. */
773cef054e8Santirez 
774cef054e8Santirez     int j, iteration = 0;
77532f80e2fSantirez     int dbs_per_call = CRON_DBS_PER_CALL;
776cef054e8Santirez     long long start = ustime(), timelimit;
777cef054e8Santirez 
778cef054e8Santirez     if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
779cef054e8Santirez         /* Don't start a fast cycle if the previous cycle did not exited
780cef054e8Santirez          * for time limt. Also don't repeat a fast cycle for the same period
781cef054e8Santirez          * as the fast cycle total duration itself. */
782cef054e8Santirez         if (!timelimit_exit) return;
783cef054e8Santirez         if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
784cef054e8Santirez         last_fast_cycle = start;
785cef054e8Santirez     }
786cef054e8Santirez 
78732f80e2fSantirez     /* We usually should test CRON_DBS_PER_CALL per iteration, with
788cef054e8Santirez      * two exceptions:
789cef054e8Santirez      *
790cef054e8Santirez      * 1) Don't test more DBs than we have.
791cef054e8Santirez      * 2) If last time we hit the time limit, we want to scan all DBs
792cef054e8Santirez      * in this iteration, as there is work to do in some DB and we don't want
793cef054e8Santirez      * expired keys to use memory for too much time. */
794cef054e8Santirez     if (dbs_per_call > server.dbnum || timelimit_exit)
795cef054e8Santirez         dbs_per_call = server.dbnum;
796cef054e8Santirez 
797cef054e8Santirez     /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
798cef054e8Santirez      * per iteration. Since this function gets called with a frequency of
799cef054e8Santirez      * server.hz times per second, the following is the max amount of
800cef054e8Santirez      * microseconds we can spend in this function. */
801cef054e8Santirez     timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
802cef054e8Santirez     timelimit_exit = 0;
803cef054e8Santirez     if (timelimit <= 0) timelimit = 1;
804cef054e8Santirez 
805cef054e8Santirez     if (type == ACTIVE_EXPIRE_CYCLE_FAST)
806cef054e8Santirez         timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
807cef054e8Santirez 
808cef054e8Santirez     for (j = 0; j < dbs_per_call; j++) {
809cef054e8Santirez         int expired;
810cef054e8Santirez         redisDb *db = server.db+(current_db % server.dbnum);
811cef054e8Santirez 
812cef054e8Santirez         /* Increment the DB now so we are sure if we run out of time
813cef054e8Santirez          * in the current DB we'll restart from the next. This allows to
814cef054e8Santirez          * distribute the time evenly across DBs. */
815cef054e8Santirez         current_db++;
816cef054e8Santirez 
817cef054e8Santirez         /* Continue to expire if at the end of the cycle more than 25%
818cef054e8Santirez          * of the keys were expired. */
819cef054e8Santirez         do {
820cef054e8Santirez             unsigned long num, slots;
821cef054e8Santirez             long long now, ttl_sum;
822cef054e8Santirez             int ttl_samples;
823cef054e8Santirez 
824cef054e8Santirez             /* If there is nothing to expire try next DB ASAP. */
825cef054e8Santirez             if ((num = dictSize(db->expires)) == 0) {
826cef054e8Santirez                 db->avg_ttl = 0;
827cef054e8Santirez                 break;
828cef054e8Santirez             }
829cef054e8Santirez             slots = dictSlots(db->expires);
830cef054e8Santirez             now = mstime();
831cef054e8Santirez 
832cef054e8Santirez             /* When there are less than 1% filled slots getting random
833cef054e8Santirez              * keys is expensive, so stop here waiting for better times...
834cef054e8Santirez              * The dictionary will be resized asap. */
835cef054e8Santirez             if (num && slots > DICT_HT_INITIAL_SIZE &&
836cef054e8Santirez                 (num*100/slots < 1)) break;
837cef054e8Santirez 
838cef054e8Santirez             /* The main collection cycle. Sample random keys among keys
839cef054e8Santirez              * with an expire set, checking for expired ones. */
840cef054e8Santirez             expired = 0;
841cef054e8Santirez             ttl_sum = 0;
842cef054e8Santirez             ttl_samples = 0;
843cef054e8Santirez 
844cef054e8Santirez             if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
845cef054e8Santirez                 num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
846cef054e8Santirez 
847cef054e8Santirez             while (num--) {
848cef054e8Santirez                 dictEntry *de;
849cef054e8Santirez                 long long ttl;
850cef054e8Santirez 
851cef054e8Santirez                 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
852cef054e8Santirez                 ttl = dictGetSignedIntegerVal(de)-now;
853cef054e8Santirez                 if (activeExpireCycleTryExpire(db,de,now)) expired++;
854b81fb9cdSantirez                 if (ttl > 0) {
855b81fb9cdSantirez                     /* We want the average TTL of keys yet not expired. */
856cef054e8Santirez                     ttl_sum += ttl;
857cef054e8Santirez                     ttl_samples++;
858cef054e8Santirez                 }
859b81fb9cdSantirez             }
860cef054e8Santirez 
861cef054e8Santirez             /* Update the average TTL stats for this database. */
862cef054e8Santirez             if (ttl_samples) {
863cef054e8Santirez                 long long avg_ttl = ttl_sum/ttl_samples;
864cef054e8Santirez 
865b81fb9cdSantirez                 /* Do a simple running average with a few samples.
866b81fb9cdSantirez                  * We just use the current estimate with a weight of 2%
867b81fb9cdSantirez                  * and the previous estimate with a weight of 98%. */
868cef054e8Santirez                 if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
869b81fb9cdSantirez                 db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
870cef054e8Santirez             }
871cef054e8Santirez 
872cef054e8Santirez             /* We can't block forever here even if there are many keys to
873cef054e8Santirez              * expire. So after a given amount of milliseconds return to the
874cef054e8Santirez              * caller waiting for the other active expire cycle. */
875cef054e8Santirez             iteration++;
876cef054e8Santirez             if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
877cef054e8Santirez                 long long elapsed = ustime()-start;
878cef054e8Santirez 
879cef054e8Santirez                 latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
880cef054e8Santirez                 if (elapsed > timelimit) timelimit_exit = 1;
881cef054e8Santirez             }
882cef054e8Santirez             if (timelimit_exit) return;
883cef054e8Santirez             /* We don't repeat the cycle if there are less than 25% of keys
884cef054e8Santirez              * found expired in the current DB. */
885cef054e8Santirez         } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
886cef054e8Santirez     }
887cef054e8Santirez }
888cef054e8Santirez 
getLRUClock(void)889cef054e8Santirez unsigned int getLRUClock(void) {
89032f80e2fSantirez     return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
891cef054e8Santirez }
892cef054e8Santirez 
893cef054e8Santirez /* Add a sample to the operations per second array of samples. */
trackInstantaneousMetric(int metric,long long current_reading)894cef054e8Santirez void trackInstantaneousMetric(int metric, long long current_reading) {
895cef054e8Santirez     long long t = mstime() - server.inst_metric[metric].last_sample_time;
896cef054e8Santirez     long long ops = current_reading -
897cef054e8Santirez                     server.inst_metric[metric].last_sample_count;
898cef054e8Santirez     long long ops_sec;
899cef054e8Santirez 
900cef054e8Santirez     ops_sec = t > 0 ? (ops*1000/t) : 0;
901cef054e8Santirez 
902cef054e8Santirez     server.inst_metric[metric].samples[server.inst_metric[metric].idx] =
903cef054e8Santirez         ops_sec;
904cef054e8Santirez     server.inst_metric[metric].idx++;
90532f80e2fSantirez     server.inst_metric[metric].idx %= STATS_METRIC_SAMPLES;
906cef054e8Santirez     server.inst_metric[metric].last_sample_time = mstime();
907cef054e8Santirez     server.inst_metric[metric].last_sample_count = current_reading;
908cef054e8Santirez }
909cef054e8Santirez 
910cef054e8Santirez /* Return the mean of all the samples. */
getInstantaneousMetric(int metric)911cef054e8Santirez long long getInstantaneousMetric(int metric) {
912cef054e8Santirez     int j;
913cef054e8Santirez     long long sum = 0;
914cef054e8Santirez 
91532f80e2fSantirez     for (j = 0; j < STATS_METRIC_SAMPLES; j++)
916cef054e8Santirez         sum += server.inst_metric[metric].samples[j];
91732f80e2fSantirez     return sum / STATS_METRIC_SAMPLES;
918cef054e8Santirez }
919cef054e8Santirez 
920cef054e8Santirez /* Check for timeouts. Returns non-zero if the client was terminated.
921cef054e8Santirez  * The function gets the current time in milliseconds as argument since
922cef054e8Santirez  * it gets called multiple times in a loop, so calling gettimeofday() for
923cef054e8Santirez  * each iteration would be costly without any actual gain. */
clientsCronHandleTimeout(client * c,mstime_t now_ms)924554bd0e7Santirez int clientsCronHandleTimeout(client *c, mstime_t now_ms) {
925cef054e8Santirez     time_t now = now_ms/1000;
926cef054e8Santirez 
927cef054e8Santirez     if (server.maxidletime &&
92832f80e2fSantirez         !(c->flags & CLIENT_SLAVE) &&    /* no timeout for slaves */
92932f80e2fSantirez         !(c->flags & CLIENT_MASTER) &&   /* no timeout for masters */
93032f80e2fSantirez         !(c->flags & CLIENT_BLOCKED) &&  /* no timeout for BLPOP */
93132f80e2fSantirez         !(c->flags & CLIENT_PUBSUB) &&   /* no timeout for Pub/Sub clients */
932cef054e8Santirez         (now - c->lastinteraction > server.maxidletime))
933cef054e8Santirez     {
93432f80e2fSantirez         serverLog(LL_VERBOSE,"Closing idle client");
935cef054e8Santirez         freeClient(c);
936cef054e8Santirez         return 1;
93732f80e2fSantirez     } else if (c->flags & CLIENT_BLOCKED) {
938cef054e8Santirez         /* Blocked OPS timeout is handled with milliseconds resolution.
939cef054e8Santirez          * However note that the actual resolution is limited by
940cef054e8Santirez          * server.hz. */
941cef054e8Santirez 
942cef054e8Santirez         if (c->bpop.timeout != 0 && c->bpop.timeout < now_ms) {
943cef054e8Santirez             /* Handle blocking operation specific timeout. */
944cef054e8Santirez             replyToBlockedClientTimedOut(c);
945cef054e8Santirez             unblockClient(c);
946cef054e8Santirez         } else if (server.cluster_enabled) {
947cef054e8Santirez             /* Cluster: handle unblock & redirect of clients blocked
948cef054e8Santirez              * into keys no longer served by this server. */
949cef054e8Santirez             if (clusterRedirectBlockedClientIfNeeded(c))
950cef054e8Santirez                 unblockClient(c);
951cef054e8Santirez         }
952cef054e8Santirez     }
953cef054e8Santirez     return 0;
954cef054e8Santirez }
955cef054e8Santirez 
956cef054e8Santirez /* The client query buffer is an sds.c string that can end with a lot of
957cef054e8Santirez  * free space not used, this function reclaims space if needed.
958cef054e8Santirez  *
959cef054e8Santirez  * The function always returns 0 as it never terminates the client. */
clientsCronResizeQueryBuffer(client * c)960554bd0e7Santirez int clientsCronResizeQueryBuffer(client *c) {
961cef054e8Santirez     size_t querybuf_size = sdsAllocSize(c->querybuf);
962cef054e8Santirez     time_t idletime = server.unixtime - c->lastinteraction;
963cef054e8Santirez 
964cef054e8Santirez     /* There are two conditions to resize the query buffer:
965cef054e8Santirez      * 1) Query buffer is > BIG_ARG and too big for latest peak.
966cef054e8Santirez      * 2) Client is inactive and the buffer is bigger than 1k. */
96732f80e2fSantirez     if (((querybuf_size > PROTO_MBULK_BIG_ARG) &&
968cef054e8Santirez          (querybuf_size/(c->querybuf_peak+1)) > 2) ||
969cef054e8Santirez          (querybuf_size > 1024 && idletime > 2))
970cef054e8Santirez     {
971cef054e8Santirez         /* Only resize the query buffer if it is actually wasting space. */
972cef054e8Santirez         if (sdsavail(c->querybuf) > 1024) {
973cef054e8Santirez             c->querybuf = sdsRemoveFreeSpace(c->querybuf);
974cef054e8Santirez         }
975cef054e8Santirez     }
976cef054e8Santirez     /* Reset the peak again to capture the peak memory usage in the next
977cef054e8Santirez      * cycle. */
978cef054e8Santirez     c->querybuf_peak = 0;
979cef054e8Santirez     return 0;
980cef054e8Santirez }
981cef054e8Santirez 
982cef054e8Santirez #define CLIENTS_CRON_MIN_ITERATIONS 5
clientsCron(void)983cef054e8Santirez void clientsCron(void) {
984cef054e8Santirez     /* Make sure to process at least numclients/server.hz of clients
985cef054e8Santirez      * per call. Since this function is called server.hz times per second
986cef054e8Santirez      * we are sure that in the worst case we process all the clients in 1
987cef054e8Santirez      * second. */
988cef054e8Santirez     int numclients = listLength(server.clients);
989cef054e8Santirez     int iterations = numclients/server.hz;
990cef054e8Santirez     mstime_t now = mstime();
991cef054e8Santirez 
992cef054e8Santirez     /* Process at least a few clients while we are at it, even if we need
993cef054e8Santirez      * to process less than CLIENTS_CRON_MIN_ITERATIONS to meet our contract
994cef054e8Santirez      * of processing each client once per second. */
995cef054e8Santirez     if (iterations < CLIENTS_CRON_MIN_ITERATIONS)
996cef054e8Santirez         iterations = (numclients < CLIENTS_CRON_MIN_ITERATIONS) ?
997cef054e8Santirez                      numclients : CLIENTS_CRON_MIN_ITERATIONS;
998cef054e8Santirez 
999cef054e8Santirez     while(listLength(server.clients) && iterations--) {
1000554bd0e7Santirez         client *c;
1001cef054e8Santirez         listNode *head;
1002cef054e8Santirez 
1003cef054e8Santirez         /* Rotate the list, take the current head, process.
1004cef054e8Santirez          * This way if the client must be removed from the list it's the
1005cef054e8Santirez          * first element and we don't incur into O(N) computation. */
1006cef054e8Santirez         listRotate(server.clients);
1007cef054e8Santirez         head = listFirst(server.clients);
1008cef054e8Santirez         c = listNodeValue(head);
1009cef054e8Santirez         /* The following functions do different service checks on the client.
1010cef054e8Santirez          * The protocol is that they return non-zero if the client was
1011cef054e8Santirez          * terminated. */
1012cef054e8Santirez         if (clientsCronHandleTimeout(c,now)) continue;
1013cef054e8Santirez         if (clientsCronResizeQueryBuffer(c)) continue;
1014cef054e8Santirez     }
1015cef054e8Santirez }
1016cef054e8Santirez 
1017cef054e8Santirez /* This function handles 'background' operations we are required to do
1018cef054e8Santirez  * incrementally in Redis databases, such as active key expiring, resizing,
1019cef054e8Santirez  * rehashing. */
databasesCron(void)1020cef054e8Santirez void databasesCron(void) {
1021cef054e8Santirez     /* Expire keys by random sampling. Not required for slaves
1022cef054e8Santirez      * as master will synthesize DELs for us. */
1023cef054e8Santirez     if (server.active_expire_enabled && server.masterhost == NULL)
1024cef054e8Santirez         activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW);
1025cef054e8Santirez 
1026cef054e8Santirez     /* Perform hash tables rehashing if needed, but only if there are no
1027cef054e8Santirez      * other processes saving the DB on disk. Otherwise rehashing is bad
1028cef054e8Santirez      * as will cause a lot of copy-on-write of memory pages. */
1029cef054e8Santirez     if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) {
1030cef054e8Santirez         /* We use global counters so if we stop the computation at a given
1031cef054e8Santirez          * DB we'll be able to start from the successive in the next
1032cef054e8Santirez          * cron loop iteration. */
1033cef054e8Santirez         static unsigned int resize_db = 0;
1034cef054e8Santirez         static unsigned int rehash_db = 0;
103532f80e2fSantirez         int dbs_per_call = CRON_DBS_PER_CALL;
1036cef054e8Santirez         int j;
1037cef054e8Santirez 
1038cef054e8Santirez         /* Don't test more DBs than we have. */
1039cef054e8Santirez         if (dbs_per_call > server.dbnum) dbs_per_call = server.dbnum;
1040cef054e8Santirez 
1041cef054e8Santirez         /* Resize */
1042cef054e8Santirez         for (j = 0; j < dbs_per_call; j++) {
1043cef054e8Santirez             tryResizeHashTables(resize_db % server.dbnum);
1044cef054e8Santirez             resize_db++;
1045cef054e8Santirez         }
1046cef054e8Santirez 
1047cef054e8Santirez         /* Rehash */
1048cef054e8Santirez         if (server.activerehashing) {
1049cef054e8Santirez             for (j = 0; j < dbs_per_call; j++) {
1050cef054e8Santirez                 int work_done = incrementallyRehash(rehash_db % server.dbnum);
1051cef054e8Santirez                 rehash_db++;
1052cef054e8Santirez                 if (work_done) {
1053cef054e8Santirez                     /* If the function did some work, stop here, we'll do
1054cef054e8Santirez                      * more at the next cron loop. */
1055cef054e8Santirez                     break;
1056cef054e8Santirez                 }
1057cef054e8Santirez             }
1058cef054e8Santirez         }
1059cef054e8Santirez     }
1060cef054e8Santirez }
1061cef054e8Santirez 
1062cef054e8Santirez /* We take a cached value of the unix time in the global state because with
1063cef054e8Santirez  * virtual memory and aging there is to store the current time in objects at
1064cef054e8Santirez  * every object access, and accuracy is not needed. To access a global var is
1065cef054e8Santirez  * a lot faster than calling time(NULL) */
updateCachedTime(void)1066cef054e8Santirez void updateCachedTime(void) {
1067cef054e8Santirez     server.unixtime = time(NULL);
1068cef054e8Santirez     server.mstime = mstime();
1069cef054e8Santirez }
1070cef054e8Santirez 
1071cef054e8Santirez /* This is our timer interrupt, called server.hz times per second.
1072cef054e8Santirez  * Here is where we do a number of things that need to be done asynchronously.
1073cef054e8Santirez  * For instance:
1074cef054e8Santirez  *
1075cef054e8Santirez  * - Active expired keys collection (it is also performed in a lazy way on
1076cef054e8Santirez  *   lookup).
1077cef054e8Santirez  * - Software watchdog.
1078cef054e8Santirez  * - Update some statistic.
1079cef054e8Santirez  * - Incremental rehashing of the DBs hash tables.
1080cef054e8Santirez  * - Triggering BGSAVE / AOF rewrite, and handling of terminated children.
1081cef054e8Santirez  * - Clients timeout of different kinds.
1082cef054e8Santirez  * - Replication reconnection.
1083cef054e8Santirez  * - Many more...
1084cef054e8Santirez  *
1085cef054e8Santirez  * Everything directly called here will be called server.hz times per second,
1086cef054e8Santirez  * so in order to throttle execution of things we want to do less frequently
1087cef054e8Santirez  * a macro is used: run_with_period(milliseconds) { .... }
1088cef054e8Santirez  */
1089cef054e8Santirez 
serverCron(struct aeEventLoop * eventLoop,long long id,void * clientData)1090cef054e8Santirez int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
1091cef054e8Santirez     int j;
109232f80e2fSantirez     UNUSED(eventLoop);
109332f80e2fSantirez     UNUSED(id);
109432f80e2fSantirez     UNUSED(clientData);
1095cef054e8Santirez 
1096cef054e8Santirez     /* Software watchdog: deliver the SIGALRM that will reach the signal
1097cef054e8Santirez      * handler if we don't return here fast enough. */
1098cef054e8Santirez     if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period);
1099cef054e8Santirez 
1100cef054e8Santirez     /* Update the time cache. */
1101cef054e8Santirez     updateCachedTime();
1102cef054e8Santirez 
1103cef054e8Santirez     run_with_period(100) {
110432f80e2fSantirez         trackInstantaneousMetric(STATS_METRIC_COMMAND,server.stat_numcommands);
110532f80e2fSantirez         trackInstantaneousMetric(STATS_METRIC_NET_INPUT,
1106cef054e8Santirez                 server.stat_net_input_bytes);
110732f80e2fSantirez         trackInstantaneousMetric(STATS_METRIC_NET_OUTPUT,
1108cef054e8Santirez                 server.stat_net_output_bytes);
1109cef054e8Santirez     }
1110cef054e8Santirez 
111132f80e2fSantirez     /* We have just LRU_BITS bits per object for LRU information.
1112cef054e8Santirez      * So we use an (eventually wrapping) LRU clock.
1113cef054e8Santirez      *
1114cef054e8Santirez      * Note that even if the counter wraps it's not a big problem,
1115cef054e8Santirez      * everything will still work but some object will appear younger
1116cef054e8Santirez      * to Redis. However for this to happen a given object should never be
1117cef054e8Santirez      * touched for all the time needed to the counter to wrap, which is
1118cef054e8Santirez      * not likely.
1119cef054e8Santirez      *
1120cef054e8Santirez      * Note that you can change the resolution altering the
112132f80e2fSantirez      * LRU_CLOCK_RESOLUTION define. */
1122cef054e8Santirez     server.lruclock = getLRUClock();
1123cef054e8Santirez 
1124cef054e8Santirez     /* Record the max memory used since the server was started. */
1125cef054e8Santirez     if (zmalloc_used_memory() > server.stat_peak_memory)
1126cef054e8Santirez         server.stat_peak_memory = zmalloc_used_memory();
1127cef054e8Santirez 
1128cef054e8Santirez     /* Sample the RSS here since this is a relatively slow call. */
1129cef054e8Santirez     server.resident_set_size = zmalloc_get_rss();
1130cef054e8Santirez 
1131cef054e8Santirez     /* We received a SIGTERM, shutting down here in a safe way, as it is
1132cef054e8Santirez      * not ok doing so inside the signal handler. */
1133cef054e8Santirez     if (server.shutdown_asap) {
1134813ff7fdSantirez         if (prepareForShutdown(SHUTDOWN_NOFLAGS) == C_OK) exit(0);
113532f80e2fSantirez         serverLog(LL_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information");
1136cef054e8Santirez         server.shutdown_asap = 0;
1137cef054e8Santirez     }
1138cef054e8Santirez 
1139cef054e8Santirez     /* Show some info about non-empty databases */
1140cef054e8Santirez     run_with_period(5000) {
1141cef054e8Santirez         for (j = 0; j < server.dbnum; j++) {
1142cef054e8Santirez             long long size, used, vkeys;
1143cef054e8Santirez 
1144cef054e8Santirez             size = dictSlots(server.db[j].dict);
1145cef054e8Santirez             used = dictSize(server.db[j].dict);
1146cef054e8Santirez             vkeys = dictSize(server.db[j].expires);
1147cef054e8Santirez             if (used || vkeys) {
114832f80e2fSantirez                 serverLog(LL_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size);
1149cef054e8Santirez                 /* dictPrintStats(server.dict); */
1150cef054e8Santirez             }
1151cef054e8Santirez         }
1152cef054e8Santirez     }
1153cef054e8Santirez 
1154cef054e8Santirez     /* Show information about connected clients */
1155cef054e8Santirez     if (!server.sentinel_mode) {
1156cef054e8Santirez         run_with_period(5000) {
115732f80e2fSantirez             serverLog(LL_VERBOSE,
1158cef054e8Santirez                 "%lu clients connected (%lu slaves), %zu bytes in use",
1159cef054e8Santirez                 listLength(server.clients)-listLength(server.slaves),
1160cef054e8Santirez                 listLength(server.slaves),
1161cef054e8Santirez                 zmalloc_used_memory());
1162cef054e8Santirez         }
1163cef054e8Santirez     }
1164cef054e8Santirez 
1165cef054e8Santirez     /* We need to do a few operations on clients asynchronously. */
1166cef054e8Santirez     clientsCron();
1167cef054e8Santirez 
1168cef054e8Santirez     /* Handle background operations on Redis databases. */
1169cef054e8Santirez     databasesCron();
1170cef054e8Santirez 
1171cef054e8Santirez     /* Start a scheduled AOF rewrite if this was requested by the user while
1172cef054e8Santirez      * a BGSAVE was in progress. */
1173cef054e8Santirez     if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
1174cef054e8Santirez         server.aof_rewrite_scheduled)
1175cef054e8Santirez     {
1176cef054e8Santirez         rewriteAppendOnlyFileBackground();
1177cef054e8Santirez     }
1178cef054e8Santirez 
1179cef054e8Santirez     /* Check if a background saving or AOF rewrite in progress terminated. */
1180c912df9aSantirez     if (server.rdb_child_pid != -1 || server.aof_child_pid != -1 ||
1181c912df9aSantirez         ldbPendingChildren())
1182c912df9aSantirez     {
1183cef054e8Santirez         int statloc;
1184cef054e8Santirez         pid_t pid;
1185cef054e8Santirez 
1186cef054e8Santirez         if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) {
1187cef054e8Santirez             int exitcode = WEXITSTATUS(statloc);
1188cef054e8Santirez             int bysignal = 0;
1189cef054e8Santirez 
1190cef054e8Santirez             if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc);
1191cef054e8Santirez 
11921cc7a454Santirez             if (pid == -1) {
11931cc7a454Santirez                 serverLog(LL_WARNING,"wait3() returned an error: %s. "
11941cc7a454Santirez                     "rdb_child_pid = %d, aof_child_pid = %d",
11951cc7a454Santirez                     strerror(errno),
11961cc7a454Santirez                     (int) server.rdb_child_pid,
11971cc7a454Santirez                     (int) server.aof_child_pid);
11981cc7a454Santirez             } else if (pid == server.rdb_child_pid) {
1199cef054e8Santirez                 backgroundSaveDoneHandler(exitcode,bysignal);
1200cef054e8Santirez             } else if (pid == server.aof_child_pid) {
1201cef054e8Santirez                 backgroundRewriteDoneHandler(exitcode,bysignal);
1202cef054e8Santirez             } else {
1203cdb92412Santirez                 if (!ldbRemoveChild(pid)) {
120432f80e2fSantirez                     serverLog(LL_WARNING,
1205cdb92412Santirez                         "Warning, detected child with unmatched pid: %ld",
1206cef054e8Santirez                         (long)pid);
1207cef054e8Santirez                 }
1208cdb92412Santirez             }
1209cef054e8Santirez             updateDictResizePolicy();
1210cef054e8Santirez         }
1211cef054e8Santirez     } else {
1212cef054e8Santirez         /* If there is not a background saving/rewrite in progress check if
1213cef054e8Santirez          * we have to save/rewrite now */
1214cef054e8Santirez          for (j = 0; j < server.saveparamslen; j++) {
1215cef054e8Santirez             struct saveparam *sp = server.saveparams+j;
1216cef054e8Santirez 
1217cef054e8Santirez             /* Save if we reached the given amount of changes,
1218cef054e8Santirez              * the given amount of seconds, and if the latest bgsave was
1219cef054e8Santirez              * successful or if, in case of an error, at least
122032f80e2fSantirez              * CONFIG_BGSAVE_RETRY_DELAY seconds already elapsed. */
1221cef054e8Santirez             if (server.dirty >= sp->changes &&
1222cef054e8Santirez                 server.unixtime-server.lastsave > sp->seconds &&
1223cef054e8Santirez                 (server.unixtime-server.lastbgsave_try >
122432f80e2fSantirez                  CONFIG_BGSAVE_RETRY_DELAY ||
122540eb548aSantirez                  server.lastbgsave_status == C_OK))
1226cef054e8Santirez             {
122732f80e2fSantirez                 serverLog(LL_NOTICE,"%d changes in %d seconds. Saving...",
1228cef054e8Santirez                     sp->changes, (int)sp->seconds);
1229cef054e8Santirez                 rdbSaveBackground(server.rdb_filename);
1230cef054e8Santirez                 break;
1231cef054e8Santirez             }
1232cef054e8Santirez          }
1233cef054e8Santirez 
1234cef054e8Santirez          /* Trigger an AOF rewrite if needed */
1235cef054e8Santirez          if (server.rdb_child_pid == -1 &&
1236cef054e8Santirez              server.aof_child_pid == -1 &&
1237cef054e8Santirez              server.aof_rewrite_perc &&
1238cef054e8Santirez              server.aof_current_size > server.aof_rewrite_min_size)
1239cef054e8Santirez          {
1240cef054e8Santirez             long long base = server.aof_rewrite_base_size ?
1241cef054e8Santirez                             server.aof_rewrite_base_size : 1;
1242cef054e8Santirez             long long growth = (server.aof_current_size*100/base) - 100;
1243cef054e8Santirez             if (growth >= server.aof_rewrite_perc) {
124432f80e2fSantirez                 serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
1245cef054e8Santirez                 rewriteAppendOnlyFileBackground();
1246cef054e8Santirez             }
1247cef054e8Santirez          }
1248cef054e8Santirez     }
1249cef054e8Santirez 
1250cef054e8Santirez 
1251cef054e8Santirez     /* AOF postponed flush: Try at every cron cycle if the slow fsync
1252cef054e8Santirez      * completed. */
1253cef054e8Santirez     if (server.aof_flush_postponed_start) flushAppendOnlyFile(0);
1254cef054e8Santirez 
1255cef054e8Santirez     /* AOF write errors: in this case we have a buffer to flush as well and
1256cef054e8Santirez      * clear the AOF error in case of success to make the DB writable again,
1257cef054e8Santirez      * however to try every second is enough in case of 'hz' is set to
1258cef054e8Santirez      * an higher frequency. */
1259cef054e8Santirez     run_with_period(1000) {
126040eb548aSantirez         if (server.aof_last_write_status == C_ERR)
1261cef054e8Santirez             flushAppendOnlyFile(0);
1262cef054e8Santirez     }
1263cef054e8Santirez 
1264cef054e8Santirez     /* Close clients that need to be closed asynchronous */
1265cef054e8Santirez     freeClientsInAsyncFreeQueue();
1266cef054e8Santirez 
1267cef054e8Santirez     /* Clear the paused clients flag if needed. */
1268cef054e8Santirez     clientsArePaused(); /* Don't check return value, just use the side effect. */
1269cef054e8Santirez 
127021cffc26Santirez     /* Replication cron function -- used to reconnect to master,
127121cffc26Santirez      * detect transfer failures, start background RDB transfers and so forth. */
1272cef054e8Santirez     run_with_period(1000) replicationCron();
1273cef054e8Santirez 
1274cef054e8Santirez     /* Run the Redis Cluster cron. */
1275cef054e8Santirez     run_with_period(100) {
1276cef054e8Santirez         if (server.cluster_enabled) clusterCron();
1277cef054e8Santirez     }
1278cef054e8Santirez 
1279cef054e8Santirez     /* Run the Sentinel timer if we are in sentinel mode. */
1280cef054e8Santirez     run_with_period(100) {
1281cef054e8Santirez         if (server.sentinel_mode) sentinelTimer();
1282cef054e8Santirez     }
1283cef054e8Santirez 
1284cef054e8Santirez     /* Cleanup expired MIGRATE cached sockets. */
1285cef054e8Santirez     run_with_period(1000) {
1286cef054e8Santirez         migrateCloseTimedoutSockets();
1287cef054e8Santirez     }
1288cef054e8Santirez 
128921cffc26Santirez     /* Start a scheduled BGSAVE if the corresponding flag is set. This is
129021cffc26Santirez      * useful when we are forced to postpone a BGSAVE because an AOF
129121cffc26Santirez      * rewrite is in progress.
129221cffc26Santirez      *
129321cffc26Santirez      * Note: this code must be after the replicationCron() call above so
129421cffc26Santirez      * make sure when refactoring this file to keep this order. This is useful
129521cffc26Santirez      * because we want to give priority to RDB savings for replication. */
129621cffc26Santirez     if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 &&
129721cffc26Santirez         server.rdb_bgsave_scheduled &&
129821cffc26Santirez         (server.unixtime-server.lastbgsave_try > CONFIG_BGSAVE_RETRY_DELAY ||
129921cffc26Santirez          server.lastbgsave_status == C_OK))
130021cffc26Santirez     {
130121cffc26Santirez         if (rdbSaveBackground(server.rdb_filename) == C_OK)
130221cffc26Santirez             server.rdb_bgsave_scheduled = 0;
130321cffc26Santirez     }
130421cffc26Santirez 
1305cef054e8Santirez     server.cronloops++;
1306cef054e8Santirez     return 1000/server.hz;
1307cef054e8Santirez }
1308cef054e8Santirez 
1309cef054e8Santirez /* This function gets called every time Redis is entering the
1310cef054e8Santirez  * main loop of the event driven library, that is, before to sleep
1311cef054e8Santirez  * for ready file descriptors. */
beforeSleep(struct aeEventLoop * eventLoop)1312cef054e8Santirez void beforeSleep(struct aeEventLoop *eventLoop) {
131332f80e2fSantirez     UNUSED(eventLoop);
1314cef054e8Santirez 
1315cef054e8Santirez     /* Call the Redis Cluster before sleep function. Note that this function
1316cef054e8Santirez      * may change the state of Redis Cluster (from ok to fail or vice versa),
1317cef054e8Santirez      * so it's a good idea to call it before serving the unblocked clients
1318cef054e8Santirez      * later in this function. */
1319cef054e8Santirez     if (server.cluster_enabled) clusterBeforeSleep();
1320cef054e8Santirez 
1321cef054e8Santirez     /* Run a fast expire cycle (the called function will return
1322cef054e8Santirez      * ASAP if a fast cycle is not needed). */
1323cef054e8Santirez     if (server.active_expire_enabled && server.masterhost == NULL)
1324cef054e8Santirez         activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST);
1325cef054e8Santirez 
1326cef054e8Santirez     /* Send all the slaves an ACK request if at least one client blocked
1327cef054e8Santirez      * during the previous event loop iteration. */
1328cef054e8Santirez     if (server.get_ack_from_slaves) {
1329cef054e8Santirez         robj *argv[3];
1330cef054e8Santirez 
1331cef054e8Santirez         argv[0] = createStringObject("REPLCONF",8);
1332cef054e8Santirez         argv[1] = createStringObject("GETACK",6);
1333cef054e8Santirez         argv[2] = createStringObject("*",1); /* Not used argument. */
1334cef054e8Santirez         replicationFeedSlaves(server.slaves, server.slaveseldb, argv, 3);
1335cef054e8Santirez         decrRefCount(argv[0]);
1336cef054e8Santirez         decrRefCount(argv[1]);
1337cef054e8Santirez         decrRefCount(argv[2]);
1338cef054e8Santirez         server.get_ack_from_slaves = 0;
1339cef054e8Santirez     }
1340cef054e8Santirez 
1341cef054e8Santirez     /* Unblock all the clients blocked for synchronous replication
1342cef054e8Santirez      * in WAIT. */
1343cef054e8Santirez     if (listLength(server.clients_waiting_acks))
1344cef054e8Santirez         processClientsWaitingReplicas();
1345cef054e8Santirez 
1346cef054e8Santirez     /* Try to process pending commands for clients that were just unblocked. */
1347cef054e8Santirez     if (listLength(server.unblocked_clients))
1348cef054e8Santirez         processUnblockedClients();
1349cef054e8Santirez 
1350cef054e8Santirez     /* Write the AOF buffer on disk */
1351cef054e8Santirez     flushAppendOnlyFile(0);
135223e7710cSantirez 
135323e7710cSantirez     /* Handle writes with pending output buffers. */
135423e7710cSantirez     handleClientsWithPendingWrites();
1355cef054e8Santirez }
1356cef054e8Santirez 
1357cef054e8Santirez /* =========================== Server initialization ======================== */
1358cef054e8Santirez 
createSharedObjects(void)1359cef054e8Santirez void createSharedObjects(void) {
1360cef054e8Santirez     int j;
1361cef054e8Santirez 
136214ff5724Santirez     shared.crlf = createObject(OBJ_STRING,sdsnew("\r\n"));
136314ff5724Santirez     shared.ok = createObject(OBJ_STRING,sdsnew("+OK\r\n"));
136414ff5724Santirez     shared.err = createObject(OBJ_STRING,sdsnew("-ERR\r\n"));
136514ff5724Santirez     shared.emptybulk = createObject(OBJ_STRING,sdsnew("$0\r\n\r\n"));
136614ff5724Santirez     shared.czero = createObject(OBJ_STRING,sdsnew(":0\r\n"));
136714ff5724Santirez     shared.cone = createObject(OBJ_STRING,sdsnew(":1\r\n"));
136814ff5724Santirez     shared.cnegone = createObject(OBJ_STRING,sdsnew(":-1\r\n"));
136914ff5724Santirez     shared.nullbulk = createObject(OBJ_STRING,sdsnew("$-1\r\n"));
137014ff5724Santirez     shared.nullmultibulk = createObject(OBJ_STRING,sdsnew("*-1\r\n"));
137114ff5724Santirez     shared.emptymultibulk = createObject(OBJ_STRING,sdsnew("*0\r\n"));
137214ff5724Santirez     shared.pong = createObject(OBJ_STRING,sdsnew("+PONG\r\n"));
137314ff5724Santirez     shared.queued = createObject(OBJ_STRING,sdsnew("+QUEUED\r\n"));
137414ff5724Santirez     shared.emptyscan = createObject(OBJ_STRING,sdsnew("*2\r\n$1\r\n0\r\n*0\r\n"));
137514ff5724Santirez     shared.wrongtypeerr = createObject(OBJ_STRING,sdsnew(
1376cef054e8Santirez         "-WRONGTYPE Operation against a key holding the wrong kind of value\r\n"));
137714ff5724Santirez     shared.nokeyerr = createObject(OBJ_STRING,sdsnew(
1378cef054e8Santirez         "-ERR no such key\r\n"));
137914ff5724Santirez     shared.syntaxerr = createObject(OBJ_STRING,sdsnew(
1380cef054e8Santirez         "-ERR syntax error\r\n"));
138114ff5724Santirez     shared.sameobjecterr = createObject(OBJ_STRING,sdsnew(
1382cef054e8Santirez         "-ERR source and destination objects are the same\r\n"));
138314ff5724Santirez     shared.outofrangeerr = createObject(OBJ_STRING,sdsnew(
1384cef054e8Santirez         "-ERR index out of range\r\n"));
138514ff5724Santirez     shared.noscripterr = createObject(OBJ_STRING,sdsnew(
1386cef054e8Santirez         "-NOSCRIPT No matching script. Please use EVAL.\r\n"));
138714ff5724Santirez     shared.loadingerr = createObject(OBJ_STRING,sdsnew(
1388cef054e8Santirez         "-LOADING Redis is loading the dataset in memory\r\n"));
138914ff5724Santirez     shared.slowscripterr = createObject(OBJ_STRING,sdsnew(
1390cef054e8Santirez         "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n"));
139114ff5724Santirez     shared.masterdownerr = createObject(OBJ_STRING,sdsnew(
1392cef054e8Santirez         "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n"));
139314ff5724Santirez     shared.bgsaveerr = createObject(OBJ_STRING,sdsnew(
1394cef054e8Santirez         "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n"));
139514ff5724Santirez     shared.roslaveerr = createObject(OBJ_STRING,sdsnew(
1396cef054e8Santirez         "-READONLY You can't write against a read only slave.\r\n"));
139714ff5724Santirez     shared.noautherr = createObject(OBJ_STRING,sdsnew(
1398cef054e8Santirez         "-NOAUTH Authentication required.\r\n"));
139914ff5724Santirez     shared.oomerr = createObject(OBJ_STRING,sdsnew(
1400cef054e8Santirez         "-OOM command not allowed when used memory > 'maxmemory'.\r\n"));
140114ff5724Santirez     shared.execaborterr = createObject(OBJ_STRING,sdsnew(
1402cef054e8Santirez         "-EXECABORT Transaction discarded because of previous errors.\r\n"));
140314ff5724Santirez     shared.noreplicaserr = createObject(OBJ_STRING,sdsnew(
1404cef054e8Santirez         "-NOREPLICAS Not enough good slaves to write.\r\n"));
140514ff5724Santirez     shared.busykeyerr = createObject(OBJ_STRING,sdsnew(
1406cef054e8Santirez         "-BUSYKEY Target key name already exists.\r\n"));
140714ff5724Santirez     shared.space = createObject(OBJ_STRING,sdsnew(" "));
140814ff5724Santirez     shared.colon = createObject(OBJ_STRING,sdsnew(":"));
140914ff5724Santirez     shared.plus = createObject(OBJ_STRING,sdsnew("+"));
1410cef054e8Santirez 
141132f80e2fSantirez     for (j = 0; j < PROTO_SHARED_SELECT_CMDS; j++) {
1412cef054e8Santirez         char dictid_str[64];
1413cef054e8Santirez         int dictid_len;
1414cef054e8Santirez 
1415cef054e8Santirez         dictid_len = ll2string(dictid_str,sizeof(dictid_str),j);
141614ff5724Santirez         shared.select[j] = createObject(OBJ_STRING,
1417cef054e8Santirez             sdscatprintf(sdsempty(),
1418cef054e8Santirez                 "*2\r\n$6\r\nSELECT\r\n$%d\r\n%s\r\n",
1419cef054e8Santirez                 dictid_len, dictid_str));
1420cef054e8Santirez     }
1421cef054e8Santirez     shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13);
1422cef054e8Santirez     shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14);
1423cef054e8Santirez     shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15);
1424cef054e8Santirez     shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18);
1425cef054e8Santirez     shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17);
1426cef054e8Santirez     shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19);
1427cef054e8Santirez     shared.del = createStringObject("DEL",3);
1428cef054e8Santirez     shared.rpop = createStringObject("RPOP",4);
1429cef054e8Santirez     shared.lpop = createStringObject("LPOP",4);
1430cef054e8Santirez     shared.lpush = createStringObject("LPUSH",5);
143132f80e2fSantirez     for (j = 0; j < OBJ_SHARED_INTEGERS; j++) {
143214ff5724Santirez         shared.integers[j] = createObject(OBJ_STRING,(void*)(long)j);
143314ff5724Santirez         shared.integers[j]->encoding = OBJ_ENCODING_INT;
1434cef054e8Santirez     }
143532f80e2fSantirez     for (j = 0; j < OBJ_SHARED_BULKHDR_LEN; j++) {
143614ff5724Santirez         shared.mbulkhdr[j] = createObject(OBJ_STRING,
1437cef054e8Santirez             sdscatprintf(sdsempty(),"*%d\r\n",j));
143814ff5724Santirez         shared.bulkhdr[j] = createObject(OBJ_STRING,
1439cef054e8Santirez             sdscatprintf(sdsempty(),"$%d\r\n",j));
1440cef054e8Santirez     }
1441cef054e8Santirez     /* The following two shared objects, minstring and maxstrings, are not
1442cef054e8Santirez      * actually used for their value but as a special object meaning
1443cef054e8Santirez      * respectively the minimum possible string and the maximum possible
1444cef054e8Santirez      * string in string comparisons for the ZRANGEBYLEX command. */
1445cef054e8Santirez     shared.minstring = createStringObject("minstring",9);
1446cef054e8Santirez     shared.maxstring = createStringObject("maxstring",9);
1447cef054e8Santirez }
1448cef054e8Santirez 
initServerConfig(void)1449cef054e8Santirez void initServerConfig(void) {
1450cef054e8Santirez     int j;
1451cef054e8Santirez 
145232f80e2fSantirez     getRandomHexChars(server.runid,CONFIG_RUN_ID_SIZE);
1453cef054e8Santirez     server.configfile = NULL;
14541db84c21Santirez     server.executable = NULL;
1455cef054e8Santirez     server.hz = CONFIG_DEFAULT_HZ;
145632f80e2fSantirez     server.runid[CONFIG_RUN_ID_SIZE] = '\0';
1457cef054e8Santirez     server.arch_bits = (sizeof(long) == 8) ? 64 : 32;
145832f80e2fSantirez     server.port = CONFIG_DEFAULT_SERVER_PORT;
145932f80e2fSantirez     server.tcp_backlog = CONFIG_DEFAULT_TCP_BACKLOG;
1460cef054e8Santirez     server.bindaddr_count = 0;
1461cef054e8Santirez     server.unixsocket = NULL;
1462cef054e8Santirez     server.unixsocketperm = CONFIG_DEFAULT_UNIX_SOCKET_PERM;
1463cef054e8Santirez     server.ipfd_count = 0;
1464cef054e8Santirez     server.sofd = -1;
1465273c49e7Santirez     server.protected_mode = CONFIG_DEFAULT_PROTECTED_MODE;
1466cef054e8Santirez     server.dbnum = CONFIG_DEFAULT_DBNUM;
1467cef054e8Santirez     server.verbosity = CONFIG_DEFAULT_VERBOSITY;
146832f80e2fSantirez     server.maxidletime = CONFIG_DEFAULT_CLIENT_TIMEOUT;
1469cef054e8Santirez     server.tcpkeepalive = CONFIG_DEFAULT_TCP_KEEPALIVE;
1470cef054e8Santirez     server.active_expire_enabled = 1;
147132f80e2fSantirez     server.client_max_querybuf_len = PROTO_MAX_QUERYBUF_LEN;
1472cef054e8Santirez     server.saveparams = NULL;
1473cef054e8Santirez     server.loading = 0;
1474cef054e8Santirez     server.logfile = zstrdup(CONFIG_DEFAULT_LOGFILE);
1475cef054e8Santirez     server.syslog_enabled = CONFIG_DEFAULT_SYSLOG_ENABLED;
1476cef054e8Santirez     server.syslog_ident = zstrdup(CONFIG_DEFAULT_SYSLOG_IDENT);
1477cef054e8Santirez     server.syslog_facility = LOG_LOCAL0;
1478cef054e8Santirez     server.daemonize = CONFIG_DEFAULT_DAEMONIZE;
1479cef054e8Santirez     server.supervised = 0;
148032f80e2fSantirez     server.supervised_mode = SUPERVISED_NONE;
148132f80e2fSantirez     server.aof_state = AOF_OFF;
1482cef054e8Santirez     server.aof_fsync = CONFIG_DEFAULT_AOF_FSYNC;
1483cef054e8Santirez     server.aof_no_fsync_on_rewrite = CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE;
148432f80e2fSantirez     server.aof_rewrite_perc = AOF_REWRITE_PERC;
148532f80e2fSantirez     server.aof_rewrite_min_size = AOF_REWRITE_MIN_SIZE;
1486cef054e8Santirez     server.aof_rewrite_base_size = 0;
1487cef054e8Santirez     server.aof_rewrite_scheduled = 0;
1488cef054e8Santirez     server.aof_last_fsync = time(NULL);
1489cef054e8Santirez     server.aof_rewrite_time_last = -1;
1490cef054e8Santirez     server.aof_rewrite_time_start = -1;
149140eb548aSantirez     server.aof_lastbgrewrite_status = C_OK;
1492cef054e8Santirez     server.aof_delayed_fsync = 0;
1493cef054e8Santirez     server.aof_fd = -1;
1494cef054e8Santirez     server.aof_selected_db = -1; /* Make sure the first time will not match */
1495cef054e8Santirez     server.aof_flush_postponed_start = 0;
1496cef054e8Santirez     server.aof_rewrite_incremental_fsync = CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC;
1497cef054e8Santirez     server.aof_load_truncated = CONFIG_DEFAULT_AOF_LOAD_TRUNCATED;
1498cef054e8Santirez     server.pidfile = NULL;
1499cef054e8Santirez     server.rdb_filename = zstrdup(CONFIG_DEFAULT_RDB_FILENAME);
1500cef054e8Santirez     server.aof_filename = zstrdup(CONFIG_DEFAULT_AOF_FILENAME);
1501cef054e8Santirez     server.requirepass = NULL;
1502cef054e8Santirez     server.rdb_compression = CONFIG_DEFAULT_RDB_COMPRESSION;
1503cef054e8Santirez     server.rdb_checksum = CONFIG_DEFAULT_RDB_CHECKSUM;
1504cef054e8Santirez     server.stop_writes_on_bgsave_err = CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR;
1505cef054e8Santirez     server.activerehashing = CONFIG_DEFAULT_ACTIVE_REHASHING;
1506cef054e8Santirez     server.notify_keyspace_events = 0;
1507cef054e8Santirez     server.maxclients = CONFIG_DEFAULT_MAX_CLIENTS;
1508cef054e8Santirez     server.bpop_blocked_clients = 0;
1509cef054e8Santirez     server.maxmemory = CONFIG_DEFAULT_MAXMEMORY;
1510cef054e8Santirez     server.maxmemory_policy = CONFIG_DEFAULT_MAXMEMORY_POLICY;
1511cef054e8Santirez     server.maxmemory_samples = CONFIG_DEFAULT_MAXMEMORY_SAMPLES;
151214ff5724Santirez     server.hash_max_ziplist_entries = OBJ_HASH_MAX_ZIPLIST_ENTRIES;
151314ff5724Santirez     server.hash_max_ziplist_value = OBJ_HASH_MAX_ZIPLIST_VALUE;
151414ff5724Santirez     server.list_max_ziplist_size = OBJ_LIST_MAX_ZIPLIST_SIZE;
151514ff5724Santirez     server.list_compress_depth = OBJ_LIST_COMPRESS_DEPTH;
151614ff5724Santirez     server.set_max_intset_entries = OBJ_SET_MAX_INTSET_ENTRIES;
151714ff5724Santirez     server.zset_max_ziplist_entries = OBJ_ZSET_MAX_ZIPLIST_ENTRIES;
151814ff5724Santirez     server.zset_max_ziplist_value = OBJ_ZSET_MAX_ZIPLIST_VALUE;
1519cef054e8Santirez     server.hll_sparse_max_bytes = CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES;
1520cef054e8Santirez     server.shutdown_asap = 0;
152132f80e2fSantirez     server.repl_ping_slave_period = CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD;
152232f80e2fSantirez     server.repl_timeout = CONFIG_DEFAULT_REPL_TIMEOUT;
1523cef054e8Santirez     server.repl_min_slaves_to_write = CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE;
1524cef054e8Santirez     server.repl_min_slaves_max_lag = CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG;
1525cef054e8Santirez     server.cluster_enabled = 0;
15263325a9b1Santirez     server.cluster_node_timeout = CLUSTER_DEFAULT_NODE_TIMEOUT;
15273325a9b1Santirez     server.cluster_migration_barrier = CLUSTER_DEFAULT_MIGRATION_BARRIER;
15283325a9b1Santirez     server.cluster_slave_validity_factor = CLUSTER_DEFAULT_SLAVE_VALIDITY;
15293325a9b1Santirez     server.cluster_require_full_coverage = CLUSTER_DEFAULT_REQUIRE_FULL_COVERAGE;
1530cef054e8Santirez     server.cluster_configfile = zstrdup(CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
1531cef054e8Santirez     server.migrate_cached_sockets = dictCreate(&migrateCacheDictType,NULL);
1532cef054e8Santirez     server.next_client_id = 1; /* Client IDs, start from 1 .*/
1533cef054e8Santirez     server.loading_process_events_interval_bytes = (1024*1024*2);
1534cef054e8Santirez 
1535cef054e8Santirez     server.lruclock = getLRUClock();
1536cef054e8Santirez     resetServerSaveParams();
1537cef054e8Santirez 
1538cef054e8Santirez     appendServerSaveParams(60*60,1);  /* save after 1 hour and 1 change */
1539cef054e8Santirez     appendServerSaveParams(300,100);  /* save after 5 minutes and 100 changes */
1540cef054e8Santirez     appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */
1541cef054e8Santirez     /* Replication related */
1542cef054e8Santirez     server.masterauth = NULL;
1543cef054e8Santirez     server.masterhost = NULL;
1544cef054e8Santirez     server.masterport = 6379;
1545cef054e8Santirez     server.master = NULL;
1546cef054e8Santirez     server.cached_master = NULL;
1547cef054e8Santirez     server.repl_master_initial_offset = -1;
154832f80e2fSantirez     server.repl_state = REPL_STATE_NONE;
154932f80e2fSantirez     server.repl_syncio_timeout = CONFIG_REPL_SYNCIO_TIMEOUT;
1550cef054e8Santirez     server.repl_serve_stale_data = CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA;
1551cef054e8Santirez     server.repl_slave_ro = CONFIG_DEFAULT_SLAVE_READ_ONLY;
1552cef054e8Santirez     server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
1553cef054e8Santirez     server.repl_disable_tcp_nodelay = CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY;
1554cef054e8Santirez     server.repl_diskless_sync = CONFIG_DEFAULT_REPL_DISKLESS_SYNC;
1555cef054e8Santirez     server.repl_diskless_sync_delay = CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY;
1556cef054e8Santirez     server.slave_priority = CONFIG_DEFAULT_SLAVE_PRIORITY;
1557*0a45fbc3Santirez     server.slave_announce_ip = CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP;
1558*0a45fbc3Santirez     server.slave_announce_port = CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT;
1559cef054e8Santirez     server.master_repl_offset = 0;
1560cef054e8Santirez 
1561cef054e8Santirez     /* Replication partial resync backlog */
1562cef054e8Santirez     server.repl_backlog = NULL;
1563cef054e8Santirez     server.repl_backlog_size = CONFIG_DEFAULT_REPL_BACKLOG_SIZE;
1564cef054e8Santirez     server.repl_backlog_histlen = 0;
1565cef054e8Santirez     server.repl_backlog_idx = 0;
1566cef054e8Santirez     server.repl_backlog_off = 0;
1567cef054e8Santirez     server.repl_backlog_time_limit = CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT;
1568cef054e8Santirez     server.repl_no_slaves_since = time(NULL);
1569cef054e8Santirez 
1570cef054e8Santirez     /* Client output buffer limits */
1571e6f39338Santirez     for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
1572cef054e8Santirez         server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
1573cef054e8Santirez 
1574cef054e8Santirez     /* Double constants initialization */
1575cef054e8Santirez     R_Zero = 0.0;
1576cef054e8Santirez     R_PosInf = 1.0/R_Zero;
1577cef054e8Santirez     R_NegInf = -1.0/R_Zero;
1578cef054e8Santirez     R_Nan = R_Zero/R_Zero;
1579cef054e8Santirez 
1580cef054e8Santirez     /* Command table -- we initiialize it here as it is part of the
1581cef054e8Santirez      * initial configuration, since command names may be changed via
1582cef054e8Santirez      * redis.conf using the rename-command directive. */
1583cef054e8Santirez     server.commands = dictCreate(&commandTableDictType,NULL);
1584cef054e8Santirez     server.orig_commands = dictCreate(&commandTableDictType,NULL);
1585cef054e8Santirez     populateCommandTable();
1586cef054e8Santirez     server.delCommand = lookupCommandByCString("del");
1587cef054e8Santirez     server.multiCommand = lookupCommandByCString("multi");
1588cef054e8Santirez     server.lpushCommand = lookupCommandByCString("lpush");
1589cef054e8Santirez     server.lpopCommand = lookupCommandByCString("lpop");
1590cef054e8Santirez     server.rpopCommand = lookupCommandByCString("rpop");
1591cef054e8Santirez     server.sremCommand = lookupCommandByCString("srem");
1592cbbfaf6aSantirez     server.execCommand = lookupCommandByCString("exec");
1593cef054e8Santirez 
1594cef054e8Santirez     /* Slow log */
159532f80e2fSantirez     server.slowlog_log_slower_than = CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN;
159632f80e2fSantirez     server.slowlog_max_len = CONFIG_DEFAULT_SLOWLOG_MAX_LEN;
1597cef054e8Santirez 
1598cef054e8Santirez     /* Latency monitor */
1599cef054e8Santirez     server.latency_monitor_threshold = CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD;
1600cef054e8Santirez 
1601cef054e8Santirez     /* Debugging */
1602cef054e8Santirez     server.assert_failed = "<no assertion failed>";
1603cef054e8Santirez     server.assert_file = "<no file>";
1604cef054e8Santirez     server.assert_line = 0;
1605cef054e8Santirez     server.bug_report_start = 0;
1606cef054e8Santirez     server.watchdog_period = 0;
1607cef054e8Santirez }
1608cef054e8Santirez 
16091db84c21Santirez extern char **environ;
16101db84c21Santirez 
16111db84c21Santirez /* Restart the server, executing the same executable that started this
16121db84c21Santirez  * instance, with the same arguments and configuration file.
16131db84c21Santirez  *
1614fcb3aef7Santirez  * The function is designed to directly call execve() so that the new
1615fcb3aef7Santirez  * server instance will retain the PID of the previous one.
1616fcb3aef7Santirez  *
16171db84c21Santirez  * The list of flags, that may be bitwise ORed together, alter the
16181db84c21Santirez  * behavior of this function:
16191db84c21Santirez  *
16201db84c21Santirez  * RESTART_SERVER_NONE              No flags.
16211db84c21Santirez  * RESTART_SERVER_GRACEFULLY        Do a proper shutdown before restarting.
16221db84c21Santirez  * RESTART_SERVER_CONFIG_REWRITE    Rewrite the config file before restarting.
16231db84c21Santirez  *
16241db84c21Santirez  * On success the function does not return, because the process turns into
16251db84c21Santirez  * a different process. On error C_ERR is returned. */
restartServer(int flags,mstime_t delay)16261db84c21Santirez int restartServer(int flags, mstime_t delay) {
16271db84c21Santirez     int j;
16281db84c21Santirez 
16291db84c21Santirez     /* Check if we still have accesses to the executable that started this
16301db84c21Santirez      * server instance. */
16311db84c21Santirez     if (access(server.executable,X_OK) == -1) return C_ERR;
16321db84c21Santirez 
16331db84c21Santirez     /* Config rewriting. */
16341db84c21Santirez     if (flags & RESTART_SERVER_CONFIG_REWRITE &&
16351db84c21Santirez         server.configfile &&
16361db84c21Santirez         rewriteConfig(server.configfile) == -1) return C_ERR;
16371db84c21Santirez 
16381db84c21Santirez     /* Perform a proper shutdown. */
16391db84c21Santirez     if (flags & RESTART_SERVER_GRACEFULLY &&
16401db84c21Santirez         prepareForShutdown(SHUTDOWN_NOFLAGS) != C_OK) return C_ERR;
16411db84c21Santirez 
16421db84c21Santirez     /* Close all file descriptors, with the exception of stdin, stdout, strerr
16431db84c21Santirez      * which are useful if we restart a Redis server which is not daemonized. */
16441db84c21Santirez     for (j = 3; j < (int)server.maxclients + 1024; j++) close(j);
16451db84c21Santirez 
16461db84c21Santirez     /* Execute the server with the original command line. */
16471db84c21Santirez     if (delay) usleep(delay*1000);
16481db84c21Santirez     execve(server.executable,server.exec_argv,environ);
16491db84c21Santirez 
16501db84c21Santirez     /* If an error occurred here, there is nothing we can do, but exit. */
16511db84c21Santirez     _exit(1);
16521db84c21Santirez 
16531db84c21Santirez     return C_ERR; /* Never reached. */
16541db84c21Santirez }
16551db84c21Santirez 
1656cef054e8Santirez /* This function will try to raise the max number of open files accordingly to
1657cef054e8Santirez  * the configured max number of clients. It also reserves a number of file
165832f80e2fSantirez  * descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
1659cef054e8Santirez  * persistence, listening sockets, log files and so forth.
1660cef054e8Santirez  *
1661cef054e8Santirez  * If it will not be possible to set the limit accordingly to the configured
1662cef054e8Santirez  * max number of clients, the function will do the reverse setting
1663cef054e8Santirez  * server.maxclients to the value that we can actually handle. */
adjustOpenFilesLimit(void)1664cef054e8Santirez void adjustOpenFilesLimit(void) {
166532f80e2fSantirez     rlim_t maxfiles = server.maxclients+CONFIG_MIN_RESERVED_FDS;
1666cef054e8Santirez     struct rlimit limit;
1667cef054e8Santirez 
1668cef054e8Santirez     if (getrlimit(RLIMIT_NOFILE,&limit) == -1) {
166932f80e2fSantirez         serverLog(LL_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.",
1670cef054e8Santirez             strerror(errno));
167132f80e2fSantirez         server.maxclients = 1024-CONFIG_MIN_RESERVED_FDS;
1672cef054e8Santirez     } else {
1673cef054e8Santirez         rlim_t oldlimit = limit.rlim_cur;
1674cef054e8Santirez 
1675cef054e8Santirez         /* Set the max number of files if the current limit is not enough
1676cef054e8Santirez          * for our needs. */
1677cef054e8Santirez         if (oldlimit < maxfiles) {
1678cef054e8Santirez             rlim_t bestlimit;
1679cef054e8Santirez             int setrlimit_error = 0;
1680cef054e8Santirez 
1681cef054e8Santirez             /* Try to set the file limit to match 'maxfiles' or at least
1682cef054e8Santirez              * to the higher value supported less than maxfiles. */
1683cef054e8Santirez             bestlimit = maxfiles;
1684cef054e8Santirez             while(bestlimit > oldlimit) {
1685cef054e8Santirez                 rlim_t decr_step = 16;
1686cef054e8Santirez 
1687cef054e8Santirez                 limit.rlim_cur = bestlimit;
1688cef054e8Santirez                 limit.rlim_max = bestlimit;
1689cef054e8Santirez                 if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break;
1690cef054e8Santirez                 setrlimit_error = errno;
1691cef054e8Santirez 
1692cef054e8Santirez                 /* We failed to set file limit to 'bestlimit'. Try with a
1693cef054e8Santirez                  * smaller limit decrementing by a few FDs per iteration. */
1694cef054e8Santirez                 if (bestlimit < decr_step) break;
1695cef054e8Santirez                 bestlimit -= decr_step;
1696cef054e8Santirez             }
1697cef054e8Santirez 
1698cef054e8Santirez             /* Assume that the limit we get initially is still valid if
1699cef054e8Santirez              * our last try was even lower. */
1700cef054e8Santirez             if (bestlimit < oldlimit) bestlimit = oldlimit;
1701cef054e8Santirez 
1702cef054e8Santirez             if (bestlimit < maxfiles) {
1703cef054e8Santirez                 int old_maxclients = server.maxclients;
170432f80e2fSantirez                 server.maxclients = bestlimit-CONFIG_MIN_RESERVED_FDS;
1705cef054e8Santirez                 if (server.maxclients < 1) {
170632f80e2fSantirez                     serverLog(LL_WARNING,"Your current 'ulimit -n' "
170754c71f2dSantirez                         "of %llu is not enough for the server to start. "
1708cef054e8Santirez                         "Please increase your open file limit to at least "
1709cef054e8Santirez                         "%llu. Exiting.",
1710cef054e8Santirez                         (unsigned long long) oldlimit,
1711cef054e8Santirez                         (unsigned long long) maxfiles);
1712cef054e8Santirez                     exit(1);
1713cef054e8Santirez                 }
171432f80e2fSantirez                 serverLog(LL_WARNING,"You requested maxclients of %d "
1715cef054e8Santirez                     "requiring at least %llu max file descriptors.",
1716cef054e8Santirez                     old_maxclients,
1717cef054e8Santirez                     (unsigned long long) maxfiles);
171854c71f2dSantirez                 serverLog(LL_WARNING,"Server can't set maximum open files "
1719cef054e8Santirez                     "to %llu because of OS error: %s.",
1720cef054e8Santirez                     (unsigned long long) maxfiles, strerror(setrlimit_error));
172132f80e2fSantirez                 serverLog(LL_WARNING,"Current maximum open files is %llu. "
1722cef054e8Santirez                     "maxclients has been reduced to %d to compensate for "
1723cef054e8Santirez                     "low ulimit. "
1724cef054e8Santirez                     "If you need higher maxclients increase 'ulimit -n'.",
1725cef054e8Santirez                     (unsigned long long) bestlimit, server.maxclients);
1726cef054e8Santirez             } else {
172732f80e2fSantirez                 serverLog(LL_NOTICE,"Increased maximum number of open files "
1728cef054e8Santirez                     "to %llu (it was originally set to %llu).",
1729cef054e8Santirez                     (unsigned long long) maxfiles,
1730cef054e8Santirez                     (unsigned long long) oldlimit);
1731cef054e8Santirez             }
1732cef054e8Santirez         }
1733cef054e8Santirez     }
1734cef054e8Santirez }
1735cef054e8Santirez 
1736cef054e8Santirez /* Check that server.tcp_backlog can be actually enforced in Linux according
1737cef054e8Santirez  * to the value of /proc/sys/net/core/somaxconn, or warn about it. */
checkTcpBacklogSettings(void)1738cef054e8Santirez void checkTcpBacklogSettings(void) {
1739cef054e8Santirez #ifdef HAVE_PROC_SOMAXCONN
1740cef054e8Santirez     FILE *fp = fopen("/proc/sys/net/core/somaxconn","r");
1741cef054e8Santirez     char buf[1024];
1742cef054e8Santirez     if (!fp) return;
1743cef054e8Santirez     if (fgets(buf,sizeof(buf),fp) != NULL) {
1744cef054e8Santirez         int somaxconn = atoi(buf);
1745cef054e8Santirez         if (somaxconn > 0 && somaxconn < server.tcp_backlog) {
174632f80e2fSantirez             serverLog(LL_WARNING,"WARNING: The TCP backlog setting of %d cannot be enforced because /proc/sys/net/core/somaxconn is set to the lower value of %d.", server.tcp_backlog, somaxconn);
1747cef054e8Santirez         }
1748cef054e8Santirez     }
1749cef054e8Santirez     fclose(fp);
1750cef054e8Santirez #endif
1751cef054e8Santirez }
1752cef054e8Santirez 
1753cef054e8Santirez /* Initialize a set of file descriptors to listen to the specified 'port'
1754cef054e8Santirez  * binding the addresses specified in the Redis server configuration.
1755cef054e8Santirez  *
1756cef054e8Santirez  * The listening file descriptors are stored in the integer array 'fds'
1757cef054e8Santirez  * and their number is set in '*count'.
1758cef054e8Santirez  *
1759cef054e8Santirez  * The addresses to bind are specified in the global server.bindaddr array
1760cef054e8Santirez  * and their number is server.bindaddr_count. If the server configuration
1761cef054e8Santirez  * contains no specific addresses to bind, this function will try to
1762cef054e8Santirez  * bind * (all addresses) for both the IPv4 and IPv6 protocols.
1763cef054e8Santirez  *
176440eb548aSantirez  * On success the function returns C_OK.
1765cef054e8Santirez  *
176640eb548aSantirez  * On error the function returns C_ERR. For the function to be on
1767cef054e8Santirez  * error, at least one of the server.bindaddr addresses was
1768cef054e8Santirez  * impossible to bind, or no bind addresses were specified in the server
1769cef054e8Santirez  * configuration but the function is not able to bind * for at least
1770cef054e8Santirez  * one of the IPv4 or IPv6 protocols. */
listenToPort(int port,int * fds,int * count)1771cef054e8Santirez int listenToPort(int port, int *fds, int *count) {
1772cef054e8Santirez     int j;
1773cef054e8Santirez 
1774cef054e8Santirez     /* Force binding of 0.0.0.0 if no bind address is specified, always
1775cef054e8Santirez      * entering the loop if j == 0. */
1776cef054e8Santirez     if (server.bindaddr_count == 0) server.bindaddr[0] = NULL;
1777cef054e8Santirez     for (j = 0; j < server.bindaddr_count || j == 0; j++) {
1778cef054e8Santirez         if (server.bindaddr[j] == NULL) {
1779cef054e8Santirez             /* Bind * for both IPv6 and IPv4, we enter here only if
1780cef054e8Santirez              * server.bindaddr_count == 0. */
1781cef054e8Santirez             fds[*count] = anetTcp6Server(server.neterr,port,NULL,
1782cef054e8Santirez                 server.tcp_backlog);
1783cef054e8Santirez             if (fds[*count] != ANET_ERR) {
1784cef054e8Santirez                 anetNonBlock(NULL,fds[*count]);
1785cef054e8Santirez                 (*count)++;
17860fda0622Santirez 
17870fda0622Santirez                 /* Bind the IPv4 address as well. */
1788cef054e8Santirez                 fds[*count] = anetTcpServer(server.neterr,port,NULL,
1789cef054e8Santirez                     server.tcp_backlog);
1790cef054e8Santirez                 if (fds[*count] != ANET_ERR) {
1791cef054e8Santirez                     anetNonBlock(NULL,fds[*count]);
1792cef054e8Santirez                     (*count)++;
1793cef054e8Santirez                 }
17940fda0622Santirez             }
17950fda0622Santirez             /* Exit the loop if we were able to bind * on IPv4 and IPv6,
1796cef054e8Santirez              * otherwise fds[*count] will be ANET_ERR and we'll print an
1797cef054e8Santirez              * error and return to the caller with an error. */
17980fda0622Santirez             if (*count == 2) break;
1799cef054e8Santirez         } else if (strchr(server.bindaddr[j],':')) {
1800cef054e8Santirez             /* Bind IPv6 address. */
1801cef054e8Santirez             fds[*count] = anetTcp6Server(server.neterr,port,server.bindaddr[j],
1802cef054e8Santirez                 server.tcp_backlog);
1803cef054e8Santirez         } else {
1804cef054e8Santirez             /* Bind IPv4 address. */
1805cef054e8Santirez             fds[*count] = anetTcpServer(server.neterr,port,server.bindaddr[j],
1806cef054e8Santirez                 server.tcp_backlog);
1807cef054e8Santirez         }
1808cef054e8Santirez         if (fds[*count] == ANET_ERR) {
180932f80e2fSantirez             serverLog(LL_WARNING,
1810cef054e8Santirez                 "Creating Server TCP listening socket %s:%d: %s",
1811cef054e8Santirez                 server.bindaddr[j] ? server.bindaddr[j] : "*",
1812cef054e8Santirez                 port, server.neterr);
181340eb548aSantirez             return C_ERR;
1814cef054e8Santirez         }
1815cef054e8Santirez         anetNonBlock(NULL,fds[*count]);
1816cef054e8Santirez         (*count)++;
1817cef054e8Santirez     }
181840eb548aSantirez     return C_OK;
1819cef054e8Santirez }
1820cef054e8Santirez 
1821cef054e8Santirez /* Resets the stats that we expose via INFO or other means that we want
1822cef054e8Santirez  * to reset via CONFIG RESETSTAT. The function is also used in order to
1823cef054e8Santirez  * initialize these fields in initServer() at server startup. */
resetServerStats(void)1824cef054e8Santirez void resetServerStats(void) {
1825cef054e8Santirez     int j;
1826cef054e8Santirez 
1827cef054e8Santirez     server.stat_numcommands = 0;
1828cef054e8Santirez     server.stat_numconnections = 0;
1829cef054e8Santirez     server.stat_expiredkeys = 0;
1830cef054e8Santirez     server.stat_evictedkeys = 0;
1831cef054e8Santirez     server.stat_keyspace_misses = 0;
1832cef054e8Santirez     server.stat_keyspace_hits = 0;
1833cef054e8Santirez     server.stat_fork_time = 0;
1834cef054e8Santirez     server.stat_fork_rate = 0;
1835cef054e8Santirez     server.stat_rejected_conn = 0;
1836cef054e8Santirez     server.stat_sync_full = 0;
1837cef054e8Santirez     server.stat_sync_partial_ok = 0;
1838cef054e8Santirez     server.stat_sync_partial_err = 0;
183932f80e2fSantirez     for (j = 0; j < STATS_METRIC_COUNT; j++) {
1840cef054e8Santirez         server.inst_metric[j].idx = 0;
1841cef054e8Santirez         server.inst_metric[j].last_sample_time = mstime();
1842cef054e8Santirez         server.inst_metric[j].last_sample_count = 0;
1843cef054e8Santirez         memset(server.inst_metric[j].samples,0,
1844cef054e8Santirez             sizeof(server.inst_metric[j].samples));
1845cef054e8Santirez     }
1846cef054e8Santirez     server.stat_net_input_bytes = 0;
1847cef054e8Santirez     server.stat_net_output_bytes = 0;
1848cef054e8Santirez     server.aof_delayed_fsync = 0;
1849cef054e8Santirez }
1850cef054e8Santirez 
initServer(void)1851cef054e8Santirez void initServer(void) {
1852cef054e8Santirez     int j;
1853cef054e8Santirez 
1854cef054e8Santirez     signal(SIGHUP, SIG_IGN);
1855cef054e8Santirez     signal(SIGPIPE, SIG_IGN);
1856cef054e8Santirez     setupSignalHandlers();
1857cef054e8Santirez 
1858cef054e8Santirez     if (server.syslog_enabled) {
1859cef054e8Santirez         openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT,
1860cef054e8Santirez             server.syslog_facility);
1861cef054e8Santirez     }
1862cef054e8Santirez 
1863cef054e8Santirez     server.pid = getpid();
1864cef054e8Santirez     server.current_client = NULL;
1865cef054e8Santirez     server.clients = listCreate();
1866cef054e8Santirez     server.clients_to_close = listCreate();
1867cef054e8Santirez     server.slaves = listCreate();
1868cef054e8Santirez     server.monitors = listCreate();
186923e7710cSantirez     server.clients_pending_write = listCreate();
1870cef054e8Santirez     server.slaveseldb = -1; /* Force to emit the first SELECT command. */
1871cef054e8Santirez     server.unblocked_clients = listCreate();
1872cef054e8Santirez     server.ready_keys = listCreate();
1873cef054e8Santirez     server.clients_waiting_acks = listCreate();
1874cef054e8Santirez     server.get_ack_from_slaves = 0;
1875cef054e8Santirez     server.clients_paused = 0;
1876cef054e8Santirez     server.system_memory_size = zmalloc_get_memory_size();
1877cef054e8Santirez 
1878cef054e8Santirez     createSharedObjects();
1879cef054e8Santirez     adjustOpenFilesLimit();
188032f80e2fSantirez     server.el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
1881cef054e8Santirez     server.db = zmalloc(sizeof(redisDb)*server.dbnum);
1882cef054e8Santirez 
1883cef054e8Santirez     /* Open the TCP listening socket for the user commands. */
1884cef054e8Santirez     if (server.port != 0 &&
188540eb548aSantirez         listenToPort(server.port,server.ipfd,&server.ipfd_count) == C_ERR)
1886cef054e8Santirez         exit(1);
1887cef054e8Santirez 
1888cef054e8Santirez     /* Open the listening Unix domain socket. */
1889cef054e8Santirez     if (server.unixsocket != NULL) {
1890cef054e8Santirez         unlink(server.unixsocket); /* don't care if this fails */
1891cef054e8Santirez         server.sofd = anetUnixServer(server.neterr,server.unixsocket,
1892cef054e8Santirez             server.unixsocketperm, server.tcp_backlog);
1893cef054e8Santirez         if (server.sofd == ANET_ERR) {
189432f80e2fSantirez             serverLog(LL_WARNING, "Opening Unix socket: %s", server.neterr);
1895cef054e8Santirez             exit(1);
1896cef054e8Santirez         }
1897cef054e8Santirez         anetNonBlock(NULL,server.sofd);
1898cef054e8Santirez     }
1899cef054e8Santirez 
1900cef054e8Santirez     /* Abort if there are no listening sockets at all. */
1901cef054e8Santirez     if (server.ipfd_count == 0 && server.sofd < 0) {
190232f80e2fSantirez         serverLog(LL_WARNING, "Configured to not listen anywhere, exiting.");
1903cef054e8Santirez         exit(1);
1904cef054e8Santirez     }
1905cef054e8Santirez 
1906cef054e8Santirez     /* Create the Redis databases, and initialize other internal state. */
1907cef054e8Santirez     for (j = 0; j < server.dbnum; j++) {
1908cef054e8Santirez         server.db[j].dict = dictCreate(&dbDictType,NULL);
1909cef054e8Santirez         server.db[j].expires = dictCreate(&keyptrDictType,NULL);
1910cef054e8Santirez         server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL);
1911cef054e8Santirez         server.db[j].ready_keys = dictCreate(&setDictType,NULL);
1912cef054e8Santirez         server.db[j].watched_keys = dictCreate(&keylistDictType,NULL);
1913cef054e8Santirez         server.db[j].eviction_pool = evictionPoolAlloc();
1914cef054e8Santirez         server.db[j].id = j;
1915cef054e8Santirez         server.db[j].avg_ttl = 0;
1916cef054e8Santirez     }
1917cef054e8Santirez     server.pubsub_channels = dictCreate(&keylistDictType,NULL);
1918cef054e8Santirez     server.pubsub_patterns = listCreate();
1919cef054e8Santirez     listSetFreeMethod(server.pubsub_patterns,freePubsubPattern);
1920cef054e8Santirez     listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern);
1921cef054e8Santirez     server.cronloops = 0;
1922cef054e8Santirez     server.rdb_child_pid = -1;
1923cef054e8Santirez     server.aof_child_pid = -1;
192432f80e2fSantirez     server.rdb_child_type = RDB_CHILD_TYPE_NONE;
192521cffc26Santirez     server.rdb_bgsave_scheduled = 0;
1926cef054e8Santirez     aofRewriteBufferReset();
1927cef054e8Santirez     server.aof_buf = sdsempty();
1928cef054e8Santirez     server.lastsave = time(NULL); /* At startup we consider the DB saved. */
1929cef054e8Santirez     server.lastbgsave_try = 0;    /* At startup we never tried to BGSAVE. */
1930cef054e8Santirez     server.rdb_save_time_last = -1;
1931cef054e8Santirez     server.rdb_save_time_start = -1;
1932cef054e8Santirez     server.dirty = 0;
1933cef054e8Santirez     resetServerStats();
1934cef054e8Santirez     /* A few stats we don't want to reset: server startup time, and peak mem. */
1935cef054e8Santirez     server.stat_starttime = time(NULL);
1936cef054e8Santirez     server.stat_peak_memory = 0;
1937cef054e8Santirez     server.resident_set_size = 0;
193840eb548aSantirez     server.lastbgsave_status = C_OK;
193940eb548aSantirez     server.aof_last_write_status = C_OK;
1940cef054e8Santirez     server.aof_last_write_errno = 0;
1941cef054e8Santirez     server.repl_good_slaves_count = 0;
1942cef054e8Santirez     updateCachedTime();
1943cef054e8Santirez 
1944cef054e8Santirez     /* Create the serverCron() time event, that's our main way to process
1945cef054e8Santirez      * background operations. */
1946cef054e8Santirez     if(aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL) == AE_ERR) {
194732f80e2fSantirez         serverPanic("Can't create the serverCron time event.");
1948cef054e8Santirez         exit(1);
1949cef054e8Santirez     }
1950cef054e8Santirez 
1951cef054e8Santirez     /* Create an event handler for accepting new connections in TCP and Unix
1952cef054e8Santirez      * domain sockets. */
1953cef054e8Santirez     for (j = 0; j < server.ipfd_count; j++) {
1954cef054e8Santirez         if (aeCreateFileEvent(server.el, server.ipfd[j], AE_READABLE,
1955cef054e8Santirez             acceptTcpHandler,NULL) == AE_ERR)
1956cef054e8Santirez             {
195732f80e2fSantirez                 serverPanic(
1958cef054e8Santirez                     "Unrecoverable error creating server.ipfd file event.");
1959cef054e8Santirez             }
1960cef054e8Santirez     }
1961cef054e8Santirez     if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE,
196232f80e2fSantirez         acceptUnixHandler,NULL) == AE_ERR) serverPanic("Unrecoverable error creating server.sofd file event.");
1963cef054e8Santirez 
1964cef054e8Santirez     /* Open the AOF file if needed. */
196532f80e2fSantirez     if (server.aof_state == AOF_ON) {
1966cef054e8Santirez         server.aof_fd = open(server.aof_filename,
1967cef054e8Santirez                                O_WRONLY|O_APPEND|O_CREAT,0644);
1968cef054e8Santirez         if (server.aof_fd == -1) {
196932f80e2fSantirez             serverLog(LL_WARNING, "Can't open the append-only file: %s",
1970cef054e8Santirez                 strerror(errno));
1971cef054e8Santirez             exit(1);
1972cef054e8Santirez         }
1973cef054e8Santirez     }
1974cef054e8Santirez 
1975cef054e8Santirez     /* 32 bit instances are limited to 4GB of address space, so if there is
1976cef054e8Santirez      * no explicit limit in the user provided configuration we set a limit
1977cef054e8Santirez      * at 3 GB using maxmemory with 'noeviction' policy'. This avoids
1978cef054e8Santirez      * useless crashes of the Redis instance for out of memory. */
1979cef054e8Santirez     if (server.arch_bits == 32 && server.maxmemory == 0) {
198032f80e2fSantirez         serverLog(LL_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3 GB maxmemory limit with 'noeviction' policy now.");
1981cef054e8Santirez         server.maxmemory = 3072LL*(1024*1024); /* 3 GB */
198232f80e2fSantirez         server.maxmemory_policy = MAXMEMORY_NO_EVICTION;
1983cef054e8Santirez     }
1984cef054e8Santirez 
1985cef054e8Santirez     if (server.cluster_enabled) clusterInit();
1986cef054e8Santirez     replicationScriptCacheInit();
1987d6c24ff6Santirez     scriptingInit(1);
1988cef054e8Santirez     slowlogInit();
1989cef054e8Santirez     latencyMonitorInit();
1990cef054e8Santirez     bioInit();
1991cef054e8Santirez }
1992cef054e8Santirez 
1993cef054e8Santirez /* Populates the Redis Command Table starting from the hard coded list
1994cef054e8Santirez  * we have on top of redis.c file. */
populateCommandTable(void)1995cef054e8Santirez void populateCommandTable(void) {
1996cef054e8Santirez     int j;
1997cef054e8Santirez     int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
1998cef054e8Santirez 
1999cef054e8Santirez     for (j = 0; j < numcommands; j++) {
2000cef054e8Santirez         struct redisCommand *c = redisCommandTable+j;
2001cef054e8Santirez         char *f = c->sflags;
2002cef054e8Santirez         int retval1, retval2;
2003cef054e8Santirez 
2004cef054e8Santirez         while(*f != '\0') {
2005cef054e8Santirez             switch(*f) {
200632f80e2fSantirez             case 'w': c->flags |= CMD_WRITE; break;
200732f80e2fSantirez             case 'r': c->flags |= CMD_READONLY; break;
200832f80e2fSantirez             case 'm': c->flags |= CMD_DENYOOM; break;
200932f80e2fSantirez             case 'a': c->flags |= CMD_ADMIN; break;
201032f80e2fSantirez             case 'p': c->flags |= CMD_PUBSUB; break;
201132f80e2fSantirez             case 's': c->flags |= CMD_NOSCRIPT; break;
201232f80e2fSantirez             case 'R': c->flags |= CMD_RANDOM; break;
201332f80e2fSantirez             case 'S': c->flags |= CMD_SORT_FOR_SCRIPT; break;
201432f80e2fSantirez             case 'l': c->flags |= CMD_LOADING; break;
201532f80e2fSantirez             case 't': c->flags |= CMD_STALE; break;
201632f80e2fSantirez             case 'M': c->flags |= CMD_SKIP_MONITOR; break;
201732f80e2fSantirez             case 'k': c->flags |= CMD_ASKING; break;
201832f80e2fSantirez             case 'F': c->flags |= CMD_FAST; break;
201932f80e2fSantirez             default: serverPanic("Unsupported command flag"); break;
2020cef054e8Santirez             }
2021cef054e8Santirez             f++;
2022cef054e8Santirez         }
2023cef054e8Santirez 
2024cef054e8Santirez         retval1 = dictAdd(server.commands, sdsnew(c->name), c);
2025cef054e8Santirez         /* Populate an additional dictionary that will be unaffected
2026cef054e8Santirez          * by rename-command statements in redis.conf. */
2027cef054e8Santirez         retval2 = dictAdd(server.orig_commands, sdsnew(c->name), c);
20282d9e3eb1Santirez         serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
2029cef054e8Santirez     }
2030cef054e8Santirez }
2031cef054e8Santirez 
resetCommandTableStats(void)2032cef054e8Santirez void resetCommandTableStats(void) {
2033cef054e8Santirez     int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
2034cef054e8Santirez     int j;
2035cef054e8Santirez 
2036cef054e8Santirez     for (j = 0; j < numcommands; j++) {
2037cef054e8Santirez         struct redisCommand *c = redisCommandTable+j;
2038cef054e8Santirez 
2039cef054e8Santirez         c->microseconds = 0;
2040cef054e8Santirez         c->calls = 0;
2041cef054e8Santirez     }
2042cef054e8Santirez }
2043cef054e8Santirez 
2044cef054e8Santirez /* ========================== Redis OP Array API ============================ */
2045cef054e8Santirez 
redisOpArrayInit(redisOpArray * oa)2046cef054e8Santirez void redisOpArrayInit(redisOpArray *oa) {
2047cef054e8Santirez     oa->ops = NULL;
2048cef054e8Santirez     oa->numops = 0;
2049cef054e8Santirez }
2050cef054e8Santirez 
redisOpArrayAppend(redisOpArray * oa,struct redisCommand * cmd,int dbid,robj ** argv,int argc,int target)2051cef054e8Santirez int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid,
2052cef054e8Santirez                        robj **argv, int argc, int target)
2053cef054e8Santirez {
2054cef054e8Santirez     redisOp *op;
2055cef054e8Santirez 
2056cef054e8Santirez     oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1));
2057cef054e8Santirez     op = oa->ops+oa->numops;
2058cef054e8Santirez     op->cmd = cmd;
2059cef054e8Santirez     op->dbid = dbid;
2060cef054e8Santirez     op->argv = argv;
2061cef054e8Santirez     op->argc = argc;
2062cef054e8Santirez     op->target = target;
2063cef054e8Santirez     oa->numops++;
2064cef054e8Santirez     return oa->numops;
2065cef054e8Santirez }
2066cef054e8Santirez 
redisOpArrayFree(redisOpArray * oa)2067cef054e8Santirez void redisOpArrayFree(redisOpArray *oa) {
2068cef054e8Santirez     while(oa->numops) {
2069cef054e8Santirez         int j;
2070cef054e8Santirez         redisOp *op;
2071cef054e8Santirez 
2072cef054e8Santirez         oa->numops--;
2073cef054e8Santirez         op = oa->ops+oa->numops;
2074cef054e8Santirez         for (j = 0; j < op->argc; j++)
2075cef054e8Santirez             decrRefCount(op->argv[j]);
2076cef054e8Santirez         zfree(op->argv);
2077cef054e8Santirez     }
2078cef054e8Santirez     zfree(oa->ops);
2079cef054e8Santirez }
2080cef054e8Santirez 
2081cef054e8Santirez /* ====================== Commands lookup and execution ===================== */
2082cef054e8Santirez 
lookupCommand(sds name)2083cef054e8Santirez struct redisCommand *lookupCommand(sds name) {
2084cef054e8Santirez     return dictFetchValue(server.commands, name);
2085cef054e8Santirez }
2086cef054e8Santirez 
lookupCommandByCString(char * s)2087cef054e8Santirez struct redisCommand *lookupCommandByCString(char *s) {
2088cef054e8Santirez     struct redisCommand *cmd;
2089cef054e8Santirez     sds name = sdsnew(s);
2090cef054e8Santirez 
2091cef054e8Santirez     cmd = dictFetchValue(server.commands, name);
2092cef054e8Santirez     sdsfree(name);
2093cef054e8Santirez     return cmd;
2094cef054e8Santirez }
2095cef054e8Santirez 
2096cef054e8Santirez /* Lookup the command in the current table, if not found also check in
2097cef054e8Santirez  * the original table containing the original command names unaffected by
2098cef054e8Santirez  * redis.conf rename-command statement.
2099cef054e8Santirez  *
2100cef054e8Santirez  * This is used by functions rewriting the argument vector such as
2101cef054e8Santirez  * rewriteClientCommandVector() in order to set client->cmd pointer
2102cef054e8Santirez  * correctly even if the command was renamed. */
lookupCommandOrOriginal(sds name)2103cef054e8Santirez struct redisCommand *lookupCommandOrOriginal(sds name) {
2104cef054e8Santirez     struct redisCommand *cmd = dictFetchValue(server.commands, name);
2105cef054e8Santirez 
2106cef054e8Santirez     if (!cmd) cmd = dictFetchValue(server.orig_commands,name);
2107cef054e8Santirez     return cmd;
2108cef054e8Santirez }
2109cef054e8Santirez 
2110cef054e8Santirez /* Propagate the specified command (in the context of the specified database id)
2111cef054e8Santirez  * to AOF and Slaves.
2112cef054e8Santirez  *
2113cef054e8Santirez  * flags are an xor between:
211432f80e2fSantirez  * + PROPAGATE_NONE (no propagation of command at all)
211532f80e2fSantirez  * + PROPAGATE_AOF (propagate into the AOF file if is enabled)
211632f80e2fSantirez  * + PROPAGATE_REPL (propagate into the replication link)
2117cef054e8Santirez  *
2118cef054e8Santirez  * This should not be used inside commands implementation. Use instead
2119cef054e8Santirez  * alsoPropagate(), preventCommandPropagation(), forceCommandPropagation().
2120cef054e8Santirez  */
propagate(struct redisCommand * cmd,int dbid,robj ** argv,int argc,int flags)2121cef054e8Santirez void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
2122cef054e8Santirez                int flags)
2123cef054e8Santirez {
212432f80e2fSantirez     if (server.aof_state != AOF_OFF && flags & PROPAGATE_AOF)
2125cef054e8Santirez         feedAppendOnlyFile(cmd,dbid,argv,argc);
212632f80e2fSantirez     if (flags & PROPAGATE_REPL)
2127cef054e8Santirez         replicationFeedSlaves(server.slaves,dbid,argv,argc);
2128cef054e8Santirez }
2129cef054e8Santirez 
2130cef054e8Santirez /* Used inside commands to schedule the propagation of additional commands
2131cef054e8Santirez  * after the current command is propagated to AOF / Replication.
2132cef054e8Santirez  *
2133cef054e8Santirez  * 'cmd' must be a pointer to the Redis command to replicate, dbid is the
2134cef054e8Santirez  * database ID the command should be propagated into.
2135cef054e8Santirez  * Arguments of the command to propagte are passed as an array of redis
2136cef054e8Santirez  * objects pointers of len 'argc', using the 'argv' vector.
2137cef054e8Santirez  *
2138cef054e8Santirez  * The function does not take a reference to the passed 'argv' vector,
2139cef054e8Santirez  * so it is up to the caller to release the passed argv (but it is usually
2140cef054e8Santirez  * stack allocated).  The function autoamtically increments ref count of
2141cef054e8Santirez  * passed objects, so the caller does not need to. */
alsoPropagate(struct redisCommand * cmd,int dbid,robj ** argv,int argc,int target)2142cef054e8Santirez void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc,
2143cef054e8Santirez                    int target)
2144cef054e8Santirez {
2145cef054e8Santirez     robj **argvcopy;
2146cef054e8Santirez     int j;
2147cef054e8Santirez 
2148cef054e8Santirez     if (server.loading) return; /* No propagation during loading. */
2149cef054e8Santirez 
2150cef054e8Santirez     argvcopy = zmalloc(sizeof(robj*)*argc);
2151cef054e8Santirez     for (j = 0; j < argc; j++) {
2152cef054e8Santirez         argvcopy[j] = argv[j];
2153cef054e8Santirez         incrRefCount(argv[j]);
2154cef054e8Santirez     }
2155cef054e8Santirez     redisOpArrayAppend(&server.also_propagate,cmd,dbid,argvcopy,argc,target);
2156cef054e8Santirez }
2157cef054e8Santirez 
2158cef054e8Santirez /* It is possible to call the function forceCommandPropagation() inside a
2159cef054e8Santirez  * Redis command implementation in order to to force the propagation of a
2160cef054e8Santirez  * specific command execution into AOF / Replication. */
forceCommandPropagation(client * c,int flags)2161554bd0e7Santirez void forceCommandPropagation(client *c, int flags) {
216232f80e2fSantirez     if (flags & PROPAGATE_REPL) c->flags |= CLIENT_FORCE_REPL;
216332f80e2fSantirez     if (flags & PROPAGATE_AOF) c->flags |= CLIENT_FORCE_AOF;
2164cef054e8Santirez }
2165cef054e8Santirez 
2166cef054e8Santirez /* Avoid that the executed command is propagated at all. This way we
2167cef054e8Santirez  * are free to just propagate what we want using the alsoPropagate()
2168cef054e8Santirez  * API. */
preventCommandPropagation(client * c)2169554bd0e7Santirez void preventCommandPropagation(client *c) {
217032f80e2fSantirez     c->flags |= CLIENT_PREVENT_PROP;
2171cef054e8Santirez }
2172cef054e8Santirez 
217324cf0f6dSantirez /* AOF specific version of preventCommandPropagation(). */
preventCommandAOF(client * c)217424cf0f6dSantirez void preventCommandAOF(client *c) {
217524cf0f6dSantirez     c->flags |= CLIENT_PREVENT_AOF_PROP;
217624cf0f6dSantirez }
217724cf0f6dSantirez 
217824cf0f6dSantirez /* Replication specific version of preventCommandPropagation(). */
preventCommandReplication(client * c)217924cf0f6dSantirez void preventCommandReplication(client *c) {
218024cf0f6dSantirez     c->flags |= CLIENT_PREVENT_REPL_PROP;
218124cf0f6dSantirez }
218224cf0f6dSantirez 
2183828a87baSantirez /* Call() is the core of Redis execution of a command.
2184828a87baSantirez  *
2185828a87baSantirez  * The following flags can be passed:
2186828a87baSantirez  * CMD_CALL_NONE        No flags.
2187828a87baSantirez  * CMD_CALL_SLOWLOG     Check command speed and log in the slow log if needed.
2188828a87baSantirez  * CMD_CALL_STATS       Populate command stats.
2189828a87baSantirez  * CMD_CALL_PROPAGATE_AOF   Append command to AOF if it modified the dataset
2190828a87baSantirez  *                          or if the client flags are forcing propagation.
2191828a87baSantirez  * CMD_CALL_PROPAGATE_REPL  Send command to salves if it modified the dataset
2192828a87baSantirez  *                          or if the client flags are forcing propagation.
2193828a87baSantirez  * CMD_CALL_PROPAGATE   Alias for PROPAGATE_AOF|PROPAGATE_REPL.
2194828a87baSantirez  * CMD_CALL_FULL        Alias for SLOWLOG|STATS|PROPAGATE.
2195828a87baSantirez  *
2196828a87baSantirez  * The exact propagation behavior depends on the client flags.
2197828a87baSantirez  * Specifically:
2198828a87baSantirez  *
2199828a87baSantirez  * 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
2200828a87baSantirez  *    and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
2201828a87baSantirez  *    in the call flags, then the command is propagated even if the
2202828a87baSantirez  *    dataset was not affected by the command.
2203828a87baSantirez  * 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
2204828a87baSantirez  *    are set, the propagation into AOF or to slaves is not performed even
2205828a87baSantirez  *    if the command modified the dataset.
2206828a87baSantirez  *
2207828a87baSantirez  * Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
2208828a87baSantirez  * or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
2209828a87baSantirez  * slaves propagation will never occur.
2210828a87baSantirez  *
2211828a87baSantirez  * Client flags are modified by the implementation of a given command
2212828a87baSantirez  * using the following API:
2213828a87baSantirez  *
2214828a87baSantirez  * forceCommandPropagation(client *c, int flags);
2215828a87baSantirez  * preventCommandPropagation(client *c);
2216828a87baSantirez  * preventCommandAOF(client *c);
2217828a87baSantirez  * preventCommandReplication(client *c);
2218828a87baSantirez  *
2219828a87baSantirez  */
call(client * c,int flags)2220554bd0e7Santirez void call(client *c, int flags) {
2221cef054e8Santirez     long long dirty, start, duration;
2222cef054e8Santirez     int client_old_flags = c->flags;
2223cef054e8Santirez 
2224cef054e8Santirez     /* Sent the command to clients in MONITOR mode, only if the commands are
2225cef054e8Santirez      * not generated from reading an AOF. */
2226cef054e8Santirez     if (listLength(server.monitors) &&
2227cef054e8Santirez         !server.loading &&
222832f80e2fSantirez         !(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
2229cef054e8Santirez     {
2230cef054e8Santirez         replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
2231cef054e8Santirez     }
2232cef054e8Santirez 
22330259da8cSantirez     /* Initialization: clear the flags that must be set by the command on
22340259da8cSantirez      * demand, and initialize the array for additional commands propagation. */
22350259da8cSantirez     c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
2236cef054e8Santirez     redisOpArrayInit(&server.also_propagate);
22370259da8cSantirez 
22380259da8cSantirez     /* Call the command. */
2239cef054e8Santirez     dirty = server.dirty;
2240cef054e8Santirez     start = ustime();
2241cef054e8Santirez     c->cmd->proc(c);
2242cef054e8Santirez     duration = ustime()-start;
2243cef054e8Santirez     dirty = server.dirty-dirty;
2244cef054e8Santirez     if (dirty < 0) dirty = 0;
2245cef054e8Santirez 
2246cef054e8Santirez     /* When EVAL is called loading the AOF we don't want commands called
2247cef054e8Santirez      * from Lua to go into the slowlog or to populate statistics. */
224832f80e2fSantirez     if (server.loading && c->flags & CLIENT_LUA)
224932f80e2fSantirez         flags &= ~(CMD_CALL_SLOWLOG | CMD_CALL_STATS);
2250cef054e8Santirez 
2251cef054e8Santirez     /* If the caller is Lua, we want to force the EVAL caller to propagate
2252cef054e8Santirez      * the script if the command flag or client flag are forcing the
2253cef054e8Santirez      * propagation. */
225432f80e2fSantirez     if (c->flags & CLIENT_LUA && server.lua_caller) {
225532f80e2fSantirez         if (c->flags & CLIENT_FORCE_REPL)
225632f80e2fSantirez             server.lua_caller->flags |= CLIENT_FORCE_REPL;
225732f80e2fSantirez         if (c->flags & CLIENT_FORCE_AOF)
225832f80e2fSantirez             server.lua_caller->flags |= CLIENT_FORCE_AOF;
2259cef054e8Santirez     }
2260cef054e8Santirez 
2261cef054e8Santirez     /* Log the command into the Slow log if needed, and populate the
2262cef054e8Santirez      * per-command statistics that we show in INFO commandstats. */
226332f80e2fSantirez     if (flags & CMD_CALL_SLOWLOG && c->cmd->proc != execCommand) {
226432f80e2fSantirez         char *latency_event = (c->cmd->flags & CMD_FAST) ?
2265cef054e8Santirez                               "fast-command" : "command";
2266cef054e8Santirez         latencyAddSampleIfNeeded(latency_event,duration/1000);
2267cef054e8Santirez         slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
2268cef054e8Santirez     }
226932f80e2fSantirez     if (flags & CMD_CALL_STATS) {
2270ba9154d7Santirez         c->lastcmd->microseconds += duration;
2271ba9154d7Santirez         c->lastcmd->calls++;
2272cef054e8Santirez     }
2273cef054e8Santirez 
2274cef054e8Santirez     /* Propagate the command into the AOF and replication link */
2275dfe7f797Santirez     if (flags & CMD_CALL_PROPAGATE &&
2276dfe7f797Santirez         (c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP)
2277dfe7f797Santirez     {
2278dfe7f797Santirez         int propagate_flags = PROPAGATE_NONE;
2279cef054e8Santirez 
228024cf0f6dSantirez         /* Check if the command operated changes in the data set. If so
228124cf0f6dSantirez          * set for replication / AOF propagation. */
2282dfe7f797Santirez         if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL);
228324cf0f6dSantirez 
2284828a87baSantirez         /* If the client forced AOF / replication of the command, set
228524cf0f6dSantirez          * the flags regardless of the command effects on the data set. */
2286e9d2329cSantirez         if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
2287e9d2329cSantirez         if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF;
228824cf0f6dSantirez 
2289dfe7f797Santirez         /* However prevent AOF / replication propagation if the command
2290dfe7f797Santirez          * implementatino called preventCommandPropagation() or similar,
2291dfe7f797Santirez          * or if we don't have the call() flags to do so. */
2292dfe7f797Santirez         if (c->flags & CLIENT_PREVENT_REPL_PROP ||
2293dfe7f797Santirez             !(flags & CMD_CALL_PROPAGATE_REPL))
2294dfe7f797Santirez                 propagate_flags &= ~PROPAGATE_REPL;
2295dfe7f797Santirez         if (c->flags & CLIENT_PREVENT_AOF_PROP ||
2296dfe7f797Santirez             !(flags & CMD_CALL_PROPAGATE_AOF))
2297dfe7f797Santirez                 propagate_flags &= ~PROPAGATE_AOF;
229824cf0f6dSantirez 
229924cf0f6dSantirez         /* Call propagate() only if at least one of AOF / replication
230024cf0f6dSantirez          * propagation is needed. */
2301dfe7f797Santirez         if (propagate_flags != PROPAGATE_NONE)
2302dfe7f797Santirez             propagate(c->cmd,c->db->id,c->argv,c->argc,propagate_flags);
2303cef054e8Santirez     }
2304cef054e8Santirez 
23050259da8cSantirez     /* Restore the old replication flags, since call() can be executed
2306cef054e8Santirez      * recursively. */
230732f80e2fSantirez     c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
2308cef054e8Santirez     c->flags |= client_old_flags &
230932f80e2fSantirez         (CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
2310cef054e8Santirez 
2311cef054e8Santirez     /* Handle the alsoPropagate() API to handle commands that want to propagate
2312cef054e8Santirez      * multiple separated commands. Note that alsoPropagate() is not affected
231332f80e2fSantirez      * by CLIENT_PREVENT_PROP flag. */
2314cef054e8Santirez     if (server.also_propagate.numops) {
2315cef054e8Santirez         int j;
2316cef054e8Santirez         redisOp *rop;
2317cef054e8Santirez 
231832f80e2fSantirez         if (flags & CMD_CALL_PROPAGATE) {
2319cef054e8Santirez             for (j = 0; j < server.also_propagate.numops; j++) {
2320cef054e8Santirez                 rop = &server.also_propagate.ops[j];
2321dfe7f797Santirez                 int target = rop->target;
2322dfe7f797Santirez                 /* Whatever the command wish is, we honor the call() flags. */
2323dfe7f797Santirez                 if (!(flags&CMD_CALL_PROPAGATE_AOF)) target &= ~PROPAGATE_AOF;
2324dfe7f797Santirez                 if (!(flags&CMD_CALL_PROPAGATE_REPL)) target &= ~PROPAGATE_REPL;
2325dfe7f797Santirez                 if (target)
2326dfe7f797Santirez                     propagate(rop->cmd,rop->dbid,rop->argv,rop->argc,target);
2327cef054e8Santirez             }
2328cef054e8Santirez         }
2329cef054e8Santirez         redisOpArrayFree(&server.also_propagate);
2330cef054e8Santirez     }
2331cef054e8Santirez     server.stat_numcommands++;
2332cef054e8Santirez }
2333cef054e8Santirez 
2334cef054e8Santirez /* If this function gets called we already read a whole
2335cef054e8Santirez  * command, arguments are in the client argv/argc fields.
2336cef054e8Santirez  * processCommand() execute the command or prepare the
2337cef054e8Santirez  * server for a bulk read from the client.
2338cef054e8Santirez  *
2339f50dfff0Santirez  * If C_OK is returned the client is still alive and valid and
2340cef054e8Santirez  * other operations can be performed by the caller. Otherwise
2341f50dfff0Santirez  * if C_ERR is returned the client was destroyed (i.e. after QUIT). */
processCommand(client * c)2342554bd0e7Santirez int processCommand(client *c) {
2343cef054e8Santirez     /* The QUIT command is handled separately. Normal command procs will
2344cef054e8Santirez      * go through checking for replication and QUIT will cause trouble
2345cef054e8Santirez      * when FORCE_REPLICATION is enabled and would be implemented in
2346cef054e8Santirez      * a regular command proc. */
2347cef054e8Santirez     if (!strcasecmp(c->argv[0]->ptr,"quit")) {
2348cef054e8Santirez         addReply(c,shared.ok);
234932f80e2fSantirez         c->flags |= CLIENT_CLOSE_AFTER_REPLY;
235040eb548aSantirez         return C_ERR;
2351cef054e8Santirez     }
2352cef054e8Santirez 
2353cef054e8Santirez     /* Now lookup the command and check ASAP about trivial error conditions
2354cef054e8Santirez      * such as wrong arity, bad command name and so forth. */
2355cef054e8Santirez     c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr);
2356cef054e8Santirez     if (!c->cmd) {
2357cef054e8Santirez         flagTransaction(c);
2358cef054e8Santirez         addReplyErrorFormat(c,"unknown command '%s'",
2359cef054e8Santirez             (char*)c->argv[0]->ptr);
236040eb548aSantirez         return C_OK;
2361cef054e8Santirez     } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) ||
2362cef054e8Santirez                (c->argc < -c->cmd->arity)) {
2363cef054e8Santirez         flagTransaction(c);
2364cef054e8Santirez         addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
2365cef054e8Santirez             c->cmd->name);
236640eb548aSantirez         return C_OK;
2367cef054e8Santirez     }
2368cef054e8Santirez 
2369cef054e8Santirez     /* Check if the user is authenticated */
2370cef054e8Santirez     if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
2371cef054e8Santirez     {
2372cef054e8Santirez         flagTransaction(c);
2373cef054e8Santirez         addReply(c,shared.noautherr);
237440eb548aSantirez         return C_OK;
2375cef054e8Santirez     }
2376cef054e8Santirez 
2377cef054e8Santirez     /* If cluster is enabled perform the cluster redirection here.
2378cef054e8Santirez      * However we don't perform the redirection if:
2379cef054e8Santirez      * 1) The sender of this command is our master.
2380cef054e8Santirez      * 2) The command has no key arguments. */
2381cef054e8Santirez     if (server.cluster_enabled &&
238232f80e2fSantirez         !(c->flags & CLIENT_MASTER) &&
238332f80e2fSantirez         !(c->flags & CLIENT_LUA &&
238432f80e2fSantirez           server.lua_caller->flags & CLIENT_MASTER) &&
23851f3ed652SChris Thunes         !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0 &&
23861f3ed652SChris Thunes           c->cmd->proc != execCommand))
2387cef054e8Santirez     {
2388cef054e8Santirez         int hashslot;
2389cef054e8Santirez         int error_code;
2390708f486cSantirez         clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,
2391708f486cSantirez                                         &hashslot,&error_code);
2392cef054e8Santirez         if (n == NULL || n != server.cluster->myself) {
23931f3ed652SChris Thunes             if (c->cmd->proc == execCommand) {
23941f3ed652SChris Thunes                 discardTransaction(c);
23951f3ed652SChris Thunes             } else {
2396cef054e8Santirez                 flagTransaction(c);
23971f3ed652SChris Thunes             }
2398cef054e8Santirez             clusterRedirectClient(c,n,hashslot,error_code);
239940eb548aSantirez             return C_OK;
2400cef054e8Santirez         }
2401cef054e8Santirez     }
2402cef054e8Santirez 
2403cef054e8Santirez     /* Handle the maxmemory directive.
2404cef054e8Santirez      *
2405cef054e8Santirez      * First we try to free some memory if possible (if there are volatile
2406cef054e8Santirez      * keys in the dataset). If there are not the only thing we can do
2407cef054e8Santirez      * is returning an error. */
2408cef054e8Santirez     if (server.maxmemory) {
2409cef054e8Santirez         int retval = freeMemoryIfNeeded();
24107a7e46b2Santirez         /* freeMemoryIfNeeded may flush slave output buffers. This may result
24117a7e46b2Santirez          * into a slave, that may be the active client, to be freed. */
24127a7e46b2Santirez         if (server.current_client == NULL) return C_ERR;
24137a7e46b2Santirez 
24147a7e46b2Santirez         /* It was impossible to free enough memory, and the command the client
24157a7e46b2Santirez          * is trying to execute is denied during OOM conditions? Error. */
241632f80e2fSantirez         if ((c->cmd->flags & CMD_DENYOOM) && retval == C_ERR) {
2417cef054e8Santirez             flagTransaction(c);
2418cef054e8Santirez             addReply(c, shared.oomerr);
241940eb548aSantirez             return C_OK;
2420cef054e8Santirez         }
2421cef054e8Santirez     }
2422cef054e8Santirez 
2423cef054e8Santirez     /* Don't accept write commands if there are problems persisting on disk
2424cef054e8Santirez      * and if this is a master instance. */
2425cef054e8Santirez     if (((server.stop_writes_on_bgsave_err &&
2426cef054e8Santirez           server.saveparamslen > 0 &&
242740eb548aSantirez           server.lastbgsave_status == C_ERR) ||
242840eb548aSantirez           server.aof_last_write_status == C_ERR) &&
2429cef054e8Santirez         server.masterhost == NULL &&
243032f80e2fSantirez         (c->cmd->flags & CMD_WRITE ||
2431cef054e8Santirez          c->cmd->proc == pingCommand))
2432cef054e8Santirez     {
2433cef054e8Santirez         flagTransaction(c);
243440eb548aSantirez         if (server.aof_last_write_status == C_OK)
2435cef054e8Santirez             addReply(c, shared.bgsaveerr);
2436cef054e8Santirez         else
2437cef054e8Santirez             addReplySds(c,
2438cef054e8Santirez                 sdscatprintf(sdsempty(),
2439cef054e8Santirez                 "-MISCONF Errors writing to the AOF file: %s\r\n",
2440cef054e8Santirez                 strerror(server.aof_last_write_errno)));
244140eb548aSantirez         return C_OK;
2442cef054e8Santirez     }
2443cef054e8Santirez 
2444cef054e8Santirez     /* Don't accept write commands if there are not enough good slaves and
2445cef054e8Santirez      * user configured the min-slaves-to-write option. */
2446cef054e8Santirez     if (server.masterhost == NULL &&
2447cef054e8Santirez         server.repl_min_slaves_to_write &&
2448cef054e8Santirez         server.repl_min_slaves_max_lag &&
244932f80e2fSantirez         c->cmd->flags & CMD_WRITE &&
2450cef054e8Santirez         server.repl_good_slaves_count < server.repl_min_slaves_to_write)
2451cef054e8Santirez     {
2452cef054e8Santirez         flagTransaction(c);
2453cef054e8Santirez         addReply(c, shared.noreplicaserr);
245440eb548aSantirez         return C_OK;
2455cef054e8Santirez     }
2456cef054e8Santirez 
2457cef054e8Santirez     /* Don't accept write commands if this is a read only slave. But
2458cef054e8Santirez      * accept write commands if this is our master. */
2459cef054e8Santirez     if (server.masterhost && server.repl_slave_ro &&
246032f80e2fSantirez         !(c->flags & CLIENT_MASTER) &&
246132f80e2fSantirez         c->cmd->flags & CMD_WRITE)
2462cef054e8Santirez     {
2463cef054e8Santirez         addReply(c, shared.roslaveerr);
246440eb548aSantirez         return C_OK;
2465cef054e8Santirez     }
2466cef054e8Santirez 
2467cef054e8Santirez     /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
246832f80e2fSantirez     if (c->flags & CLIENT_PUBSUB &&
2469cef054e8Santirez         c->cmd->proc != pingCommand &&
2470cef054e8Santirez         c->cmd->proc != subscribeCommand &&
2471cef054e8Santirez         c->cmd->proc != unsubscribeCommand &&
2472cef054e8Santirez         c->cmd->proc != psubscribeCommand &&
2473cef054e8Santirez         c->cmd->proc != punsubscribeCommand) {
247437109d8aSantirez         addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / PING / QUIT allowed in this context");
247540eb548aSantirez         return C_OK;
2476cef054e8Santirez     }
2477cef054e8Santirez 
2478cef054e8Santirez     /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
2479cef054e8Santirez      * we are a slave with a broken link with master. */
248032f80e2fSantirez     if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
2481cef054e8Santirez         server.repl_serve_stale_data == 0 &&
248232f80e2fSantirez         !(c->cmd->flags & CMD_STALE))
2483cef054e8Santirez     {
2484cef054e8Santirez         flagTransaction(c);
2485cef054e8Santirez         addReply(c, shared.masterdownerr);
248640eb548aSantirez         return C_OK;
2487cef054e8Santirez     }
2488cef054e8Santirez 
2489cef054e8Santirez     /* Loading DB? Return an error if the command has not the
249032f80e2fSantirez      * CMD_LOADING flag. */
249132f80e2fSantirez     if (server.loading && !(c->cmd->flags & CMD_LOADING)) {
2492cef054e8Santirez         addReply(c, shared.loadingerr);
249340eb548aSantirez         return C_OK;
2494cef054e8Santirez     }
2495cef054e8Santirez 
2496cef054e8Santirez     /* Lua script too slow? Only allow a limited number of commands. */
2497cef054e8Santirez     if (server.lua_timedout &&
2498cef054e8Santirez           c->cmd->proc != authCommand &&
2499cef054e8Santirez           c->cmd->proc != replconfCommand &&
2500cef054e8Santirez         !(c->cmd->proc == shutdownCommand &&
2501cef054e8Santirez           c->argc == 2 &&
2502cef054e8Santirez           tolower(((char*)c->argv[1]->ptr)[0]) == 'n') &&
2503cef054e8Santirez         !(c->cmd->proc == scriptCommand &&
2504cef054e8Santirez           c->argc == 2 &&
2505cef054e8Santirez           tolower(((char*)c->argv[1]->ptr)[0]) == 'k'))
2506cef054e8Santirez     {
2507cef054e8Santirez         flagTransaction(c);
2508cef054e8Santirez         addReply(c, shared.slowscripterr);
250940eb548aSantirez         return C_OK;
2510cef054e8Santirez     }
2511cef054e8Santirez 
2512cef054e8Santirez     /* Exec the command */
251332f80e2fSantirez     if (c->flags & CLIENT_MULTI &&
2514cef054e8Santirez         c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
2515cef054e8Santirez         c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
2516cef054e8Santirez     {
2517cef054e8Santirez         queueMultiCommand(c);
2518cef054e8Santirez         addReply(c,shared.queued);
2519cef054e8Santirez     } else {
252032f80e2fSantirez         call(c,CMD_CALL_FULL);
2521cef054e8Santirez         c->woff = server.master_repl_offset;
2522cef054e8Santirez         if (listLength(server.ready_keys))
2523cef054e8Santirez             handleClientsBlockedOnLists();
2524cef054e8Santirez     }
252540eb548aSantirez     return C_OK;
2526cef054e8Santirez }
2527cef054e8Santirez 
2528cef054e8Santirez /*================================== Shutdown =============================== */
2529cef054e8Santirez 
2530cef054e8Santirez /* Close listening sockets. Also unlink the unix domain socket if
2531cef054e8Santirez  * unlink_unix_socket is non-zero. */
closeListeningSockets(int unlink_unix_socket)2532cef054e8Santirez void closeListeningSockets(int unlink_unix_socket) {
2533cef054e8Santirez     int j;
2534cef054e8Santirez 
2535cef054e8Santirez     for (j = 0; j < server.ipfd_count; j++) close(server.ipfd[j]);
2536cef054e8Santirez     if (server.sofd != -1) close(server.sofd);
2537cef054e8Santirez     if (server.cluster_enabled)
2538cef054e8Santirez         for (j = 0; j < server.cfd_count; j++) close(server.cfd[j]);
2539cef054e8Santirez     if (unlink_unix_socket && server.unixsocket) {
254032f80e2fSantirez         serverLog(LL_NOTICE,"Removing the unix socket file.");
2541cef054e8Santirez         unlink(server.unixsocket); /* don't care if this fails */
2542cef054e8Santirez     }
2543cef054e8Santirez }
2544cef054e8Santirez 
prepareForShutdown(int flags)2545cef054e8Santirez int prepareForShutdown(int flags) {
254632f80e2fSantirez     int save = flags & SHUTDOWN_SAVE;
254732f80e2fSantirez     int nosave = flags & SHUTDOWN_NOSAVE;
2548cef054e8Santirez 
254932f80e2fSantirez     serverLog(LL_WARNING,"User requested shutdown...");
2550f069589eSantirez 
2551cdb92412Santirez     /* Kill all the Lua debugger forked sessions. */
2552cdb92412Santirez     ldbKillForkedSessions();
2553cdb92412Santirez 
2554cef054e8Santirez     /* Kill the saving child if there is a background saving in progress.
2555cef054e8Santirez        We want to avoid race conditions, for instance our saving child may
2556cef054e8Santirez        overwrite the synchronous saving did by SHUTDOWN. */
2557cef054e8Santirez     if (server.rdb_child_pid != -1) {
255832f80e2fSantirez         serverLog(LL_WARNING,"There is a child saving an .rdb. Killing it!");
2559cef054e8Santirez         kill(server.rdb_child_pid,SIGUSR1);
2560cef054e8Santirez         rdbRemoveTempFile(server.rdb_child_pid);
2561cef054e8Santirez     }
2562f069589eSantirez 
256332f80e2fSantirez     if (server.aof_state != AOF_OFF) {
2564cef054e8Santirez         /* Kill the AOF saving child as the AOF we already have may be longer
2565cef054e8Santirez          * but contains the full dataset anyway. */
2566cef054e8Santirez         if (server.aof_child_pid != -1) {
2567cef054e8Santirez             /* If we have AOF enabled but haven't written the AOF yet, don't
2568cef054e8Santirez              * shutdown or else the dataset will be lost. */
256932f80e2fSantirez             if (server.aof_state == AOF_WAIT_REWRITE) {
257032f80e2fSantirez                 serverLog(LL_WARNING, "Writing initial AOF, can't exit.");
257140eb548aSantirez                 return C_ERR;
2572cef054e8Santirez             }
257332f80e2fSantirez             serverLog(LL_WARNING,
2574cef054e8Santirez                 "There is a child rewriting the AOF. Killing it!");
2575cef054e8Santirez             kill(server.aof_child_pid,SIGUSR1);
2576cef054e8Santirez         }
2577cef054e8Santirez         /* Append only file: fsync() the AOF and exit */
257832f80e2fSantirez         serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
2579cef054e8Santirez         aof_fsync(server.aof_fd);
2580cef054e8Santirez     }
2581f069589eSantirez 
2582f069589eSantirez     /* Create a new RDB file before exiting. */
2583cef054e8Santirez     if ((server.saveparamslen > 0 && !nosave) || save) {
258432f80e2fSantirez         serverLog(LL_NOTICE,"Saving the final RDB snapshot before exiting.");
2585cef054e8Santirez         /* Snapshotting. Perform a SYNC SAVE and exit */
258640eb548aSantirez         if (rdbSave(server.rdb_filename) != C_OK) {
2587cef054e8Santirez             /* Ooops.. error saving! The best we can do is to continue
2588cef054e8Santirez              * operating. Note that if there was a background saving process,
2589cef054e8Santirez              * in the next cron() Redis will be notified that the background
2590cef054e8Santirez              * saving aborted, handling special stuff like slaves pending for
2591cef054e8Santirez              * synchronization... */
259232f80e2fSantirez             serverLog(LL_WARNING,"Error trying to save the DB, can't exit.");
259340eb548aSantirez             return C_ERR;
2594cef054e8Santirez         }
2595cef054e8Santirez     }
2596f069589eSantirez 
2597f069589eSantirez     /* Remove the pid file if possible and needed. */
2598cef054e8Santirez     if (server.daemonize || server.pidfile) {
259932f80e2fSantirez         serverLog(LL_NOTICE,"Removing the pid file.");
2600cef054e8Santirez         unlink(server.pidfile);
2601cef054e8Santirez     }
2602f069589eSantirez 
2603f069589eSantirez     /* Best effort flush of slave output buffers, so that we hopefully
2604f069589eSantirez      * send them pending writes. */
2605f069589eSantirez     flushSlavesOutputBuffers();
2606f069589eSantirez 
2607cef054e8Santirez     /* Close the listening sockets. Apparently this allows faster restarts. */
2608cef054e8Santirez     closeListeningSockets(1);
260932f80e2fSantirez     serverLog(LL_WARNING,"%s is now ready to exit, bye bye...",
2610cef054e8Santirez         server.sentinel_mode ? "Sentinel" : "Redis");
261140eb548aSantirez     return C_OK;
2612cef054e8Santirez }
2613cef054e8Santirez 
2614cef054e8Santirez /*================================== Commands =============================== */
2615cef054e8Santirez 
2616cef054e8Santirez /* Return zero if strings are the same, non-zero if they are not.
2617cef054e8Santirez  * The comparison is performed in a way that prevents an attacker to obtain
2618cef054e8Santirez  * information about the nature of the strings just monitoring the execution
2619cef054e8Santirez  * time of the function.
2620cef054e8Santirez  *
2621cef054e8Santirez  * Note that limiting the comparison length to strings up to 512 bytes we
2622cef054e8Santirez  * can avoid leaking any information about the password length and any
2623cef054e8Santirez  * possible branch misprediction related leak.
2624cef054e8Santirez  */
time_independent_strcmp(char * a,char * b)2625cef054e8Santirez int time_independent_strcmp(char *a, char *b) {
262632f80e2fSantirez     char bufa[CONFIG_AUTHPASS_MAX_LEN], bufb[CONFIG_AUTHPASS_MAX_LEN];
2627cef054e8Santirez     /* The above two strlen perform len(a) + len(b) operations where either
2628cef054e8Santirez      * a or b are fixed (our password) length, and the difference is only
2629cef054e8Santirez      * relative to the length of the user provided string, so no information
2630cef054e8Santirez      * leak is possible in the following two lines of code. */
2631cef054e8Santirez     unsigned int alen = strlen(a);
2632cef054e8Santirez     unsigned int blen = strlen(b);
2633cef054e8Santirez     unsigned int j;
2634cef054e8Santirez     int diff = 0;
2635cef054e8Santirez 
2636cef054e8Santirez     /* We can't compare strings longer than our static buffers.
2637cef054e8Santirez      * Note that this will never pass the first test in practical circumstances
2638cef054e8Santirez      * so there is no info leak. */
2639cef054e8Santirez     if (alen > sizeof(bufa) || blen > sizeof(bufb)) return 1;
2640cef054e8Santirez 
2641cef054e8Santirez     memset(bufa,0,sizeof(bufa));        /* Constant time. */
2642cef054e8Santirez     memset(bufb,0,sizeof(bufb));        /* Constant time. */
2643cef054e8Santirez     /* Again the time of the following two copies is proportional to
2644cef054e8Santirez      * len(a) + len(b) so no info is leaked. */
2645cef054e8Santirez     memcpy(bufa,a,alen);
2646cef054e8Santirez     memcpy(bufb,b,blen);
2647cef054e8Santirez 
2648cef054e8Santirez     /* Always compare all the chars in the two buffers without
2649cef054e8Santirez      * conditional expressions. */
2650cef054e8Santirez     for (j = 0; j < sizeof(bufa); j++) {
2651cef054e8Santirez         diff |= (bufa[j] ^ bufb[j]);
2652cef054e8Santirez     }
2653cef054e8Santirez     /* Length must be equal as well. */
2654cef054e8Santirez     diff |= alen ^ blen;
2655cef054e8Santirez     return diff; /* If zero strings are the same. */
2656cef054e8Santirez }
2657cef054e8Santirez 
authCommand(client * c)2658554bd0e7Santirez void authCommand(client *c) {
2659cef054e8Santirez     if (!server.requirepass) {
2660cef054e8Santirez         addReplyError(c,"Client sent AUTH, but no password is set");
2661cef054e8Santirez     } else if (!time_independent_strcmp(c->argv[1]->ptr, server.requirepass)) {
2662cef054e8Santirez       c->authenticated = 1;
2663cef054e8Santirez       addReply(c,shared.ok);
2664cef054e8Santirez     } else {
2665cef054e8Santirez       c->authenticated = 0;
2666cef054e8Santirez       addReplyError(c,"invalid password");
2667cef054e8Santirez     }
2668cef054e8Santirez }
2669cef054e8Santirez 
2670cef054e8Santirez /* The PING command. It works in a different way if the client is in
2671cef054e8Santirez  * in Pub/Sub mode. */
pingCommand(client * c)2672554bd0e7Santirez void pingCommand(client *c) {
2673cef054e8Santirez     /* The command takes zero or one arguments. */
2674cef054e8Santirez     if (c->argc > 2) {
2675cef054e8Santirez         addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
2676cef054e8Santirez             c->cmd->name);
2677cef054e8Santirez         return;
2678cef054e8Santirez     }
2679cef054e8Santirez 
268032f80e2fSantirez     if (c->flags & CLIENT_PUBSUB) {
2681cef054e8Santirez         addReply(c,shared.mbulkhdr[2]);
2682cef054e8Santirez         addReplyBulkCBuffer(c,"pong",4);
2683cef054e8Santirez         if (c->argc == 1)
2684cef054e8Santirez             addReplyBulkCBuffer(c,"",0);
2685cef054e8Santirez         else
2686cef054e8Santirez             addReplyBulk(c,c->argv[1]);
2687cef054e8Santirez     } else {
2688cef054e8Santirez         if (c->argc == 1)
2689cef054e8Santirez             addReply(c,shared.pong);
2690cef054e8Santirez         else
2691cef054e8Santirez             addReplyBulk(c,c->argv[1]);
2692cef054e8Santirez     }
2693cef054e8Santirez }
2694cef054e8Santirez 
echoCommand(client * c)2695554bd0e7Santirez void echoCommand(client *c) {
2696cef054e8Santirez     addReplyBulk(c,c->argv[1]);
2697cef054e8Santirez }
2698cef054e8Santirez 
timeCommand(client * c)2699554bd0e7Santirez void timeCommand(client *c) {
2700cef054e8Santirez     struct timeval tv;
2701cef054e8Santirez 
2702cef054e8Santirez     /* gettimeofday() can only fail if &tv is a bad address so we
2703cef054e8Santirez      * don't check for errors. */
2704cef054e8Santirez     gettimeofday(&tv,NULL);
2705cef054e8Santirez     addReplyMultiBulkLen(c,2);
2706cef054e8Santirez     addReplyBulkLongLong(c,tv.tv_sec);
2707cef054e8Santirez     addReplyBulkLongLong(c,tv.tv_usec);
2708cef054e8Santirez }
2709cef054e8Santirez 
2710cef054e8Santirez /* Helper function for addReplyCommand() to output flags. */
addReplyCommandFlag(client * c,struct redisCommand * cmd,int f,char * reply)2711554bd0e7Santirez int addReplyCommandFlag(client *c, struct redisCommand *cmd, int f, char *reply) {
2712cef054e8Santirez     if (cmd->flags & f) {
2713cef054e8Santirez         addReplyStatus(c, reply);
2714cef054e8Santirez         return 1;
2715cef054e8Santirez     }
2716cef054e8Santirez     return 0;
2717cef054e8Santirez }
2718cef054e8Santirez 
2719cef054e8Santirez /* Output the representation of a Redis command. Used by the COMMAND command. */
addReplyCommand(client * c,struct redisCommand * cmd)2720554bd0e7Santirez void addReplyCommand(client *c, struct redisCommand *cmd) {
2721cef054e8Santirez     if (!cmd) {
2722cef054e8Santirez         addReply(c, shared.nullbulk);
2723cef054e8Santirez     } else {
2724cef054e8Santirez         /* We are adding: command name, arg count, flags, first, last, offset */
2725cef054e8Santirez         addReplyMultiBulkLen(c, 6);
2726cef054e8Santirez         addReplyBulkCString(c, cmd->name);
2727cef054e8Santirez         addReplyLongLong(c, cmd->arity);
2728cef054e8Santirez 
2729cef054e8Santirez         int flagcount = 0;
2730cef054e8Santirez         void *flaglen = addDeferredMultiBulkLength(c);
273132f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_WRITE, "write");
273232f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_READONLY, "readonly");
273332f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_DENYOOM, "denyoom");
273432f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_ADMIN, "admin");
273532f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_PUBSUB, "pubsub");
273632f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_NOSCRIPT, "noscript");
273732f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_RANDOM, "random");
273832f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_SORT_FOR_SCRIPT,"sort_for_script");
273932f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_LOADING, "loading");
274032f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_STALE, "stale");
274132f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_SKIP_MONITOR, "skip_monitor");
274232f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_ASKING, "asking");
274332f80e2fSantirez         flagcount += addReplyCommandFlag(c,cmd,CMD_FAST, "fast");
2744cef054e8Santirez         if (cmd->getkeys_proc) {
2745cef054e8Santirez             addReplyStatus(c, "movablekeys");
2746cef054e8Santirez             flagcount += 1;
2747cef054e8Santirez         }
2748cef054e8Santirez         setDeferredMultiBulkLength(c, flaglen, flagcount);
2749cef054e8Santirez 
2750cef054e8Santirez         addReplyLongLong(c, cmd->firstkey);
2751cef054e8Santirez         addReplyLongLong(c, cmd->lastkey);
2752cef054e8Santirez         addReplyLongLong(c, cmd->keystep);
2753cef054e8Santirez     }
2754cef054e8Santirez }
2755cef054e8Santirez 
2756cef054e8Santirez /* COMMAND <subcommand> <args> */
commandCommand(client * c)2757554bd0e7Santirez void commandCommand(client *c) {
2758cef054e8Santirez     dictIterator *di;
2759cef054e8Santirez     dictEntry *de;
2760cef054e8Santirez 
2761cef054e8Santirez     if (c->argc == 1) {
2762cef054e8Santirez         addReplyMultiBulkLen(c, dictSize(server.commands));
2763cef054e8Santirez         di = dictGetIterator(server.commands);
2764cef054e8Santirez         while ((de = dictNext(di)) != NULL) {
2765cef054e8Santirez             addReplyCommand(c, dictGetVal(de));
2766cef054e8Santirez         }
2767cef054e8Santirez         dictReleaseIterator(di);
2768cef054e8Santirez     } else if (!strcasecmp(c->argv[1]->ptr, "info")) {
2769cef054e8Santirez         int i;
2770cef054e8Santirez         addReplyMultiBulkLen(c, c->argc-2);
2771cef054e8Santirez         for (i = 2; i < c->argc; i++) {
2772cef054e8Santirez             addReplyCommand(c, dictFetchValue(server.commands, c->argv[i]->ptr));
2773cef054e8Santirez         }
2774cef054e8Santirez     } else if (!strcasecmp(c->argv[1]->ptr, "count") && c->argc == 2) {
2775cef054e8Santirez         addReplyLongLong(c, dictSize(server.commands));
2776cef054e8Santirez     } else if (!strcasecmp(c->argv[1]->ptr,"getkeys") && c->argc >= 3) {
2777cef054e8Santirez         struct redisCommand *cmd = lookupCommand(c->argv[2]->ptr);
2778cef054e8Santirez         int *keys, numkeys, j;
2779cef054e8Santirez 
2780cef054e8Santirez         if (!cmd) {
2781cef054e8Santirez             addReplyErrorFormat(c,"Invalid command specified");
2782cef054e8Santirez             return;
2783cef054e8Santirez         } else if ((cmd->arity > 0 && cmd->arity != c->argc-2) ||
2784cef054e8Santirez                    ((c->argc-2) < -cmd->arity))
2785cef054e8Santirez         {
2786cef054e8Santirez             addReplyError(c,"Invalid number of arguments specified for command");
2787cef054e8Santirez             return;
2788cef054e8Santirez         }
2789cef054e8Santirez 
2790cef054e8Santirez         keys = getKeysFromCommand(cmd,c->argv+2,c->argc-2,&numkeys);
2791cef054e8Santirez         addReplyMultiBulkLen(c,numkeys);
2792cef054e8Santirez         for (j = 0; j < numkeys; j++) addReplyBulk(c,c->argv[keys[j]+2]);
2793cef054e8Santirez         getKeysFreeResult(keys);
2794cef054e8Santirez     } else {
2795cef054e8Santirez         addReplyError(c, "Unknown subcommand or wrong number of arguments.");
2796cef054e8Santirez         return;
2797cef054e8Santirez     }
2798cef054e8Santirez }
2799cef054e8Santirez 
2800cef054e8Santirez /* Convert an amount of bytes into a human readable string in the form
2801cef054e8Santirez  * of 100B, 2G, 100M, 4K, and so forth. */
bytesToHuman(char * s,unsigned long long n)2802cef054e8Santirez void bytesToHuman(char *s, unsigned long long n) {
2803cef054e8Santirez     double d;
2804cef054e8Santirez 
2805cef054e8Santirez     if (n < 1024) {
2806cef054e8Santirez         /* Bytes */
2807cef054e8Santirez         sprintf(s,"%lluB",n);
2808cef054e8Santirez         return;
2809cef054e8Santirez     } else if (n < (1024*1024)) {
2810cef054e8Santirez         d = (double)n/(1024);
2811cef054e8Santirez         sprintf(s,"%.2fK",d);
2812cef054e8Santirez     } else if (n < (1024LL*1024*1024)) {
2813cef054e8Santirez         d = (double)n/(1024*1024);
2814cef054e8Santirez         sprintf(s,"%.2fM",d);
2815cef054e8Santirez     } else if (n < (1024LL*1024*1024*1024)) {
2816cef054e8Santirez         d = (double)n/(1024LL*1024*1024);
2817cef054e8Santirez         sprintf(s,"%.2fG",d);
2818cef054e8Santirez     } else if (n < (1024LL*1024*1024*1024*1024)) {
2819cef054e8Santirez         d = (double)n/(1024LL*1024*1024*1024);
2820cef054e8Santirez         sprintf(s,"%.2fT",d);
2821cef054e8Santirez     } else if (n < (1024LL*1024*1024*1024*1024*1024)) {
2822cef054e8Santirez         d = (double)n/(1024LL*1024*1024*1024*1024);
2823cef054e8Santirez         sprintf(s,"%.2fP",d);
2824cef054e8Santirez     } else {
2825cef054e8Santirez         /* Let's hope we never need this */
2826cef054e8Santirez         sprintf(s,"%lluB",n);
2827cef054e8Santirez     }
2828cef054e8Santirez }
2829cef054e8Santirez 
2830cef054e8Santirez /* Create the string returned by the INFO command. This is decoupled
2831cef054e8Santirez  * by the INFO command itself as we need to report the same information
2832cef054e8Santirez  * on memory corruption problems. */
genRedisInfoString(char * section)2833cef054e8Santirez sds genRedisInfoString(char *section) {
2834cef054e8Santirez     sds info = sdsempty();
2835cef054e8Santirez     time_t uptime = server.unixtime-server.stat_starttime;
2836cef054e8Santirez     int j, numcommands;
2837cef054e8Santirez     struct rusage self_ru, c_ru;
2838cef054e8Santirez     unsigned long lol, bib;
2839cef054e8Santirez     int allsections = 0, defsections = 0;
2840cef054e8Santirez     int sections = 0;
2841cef054e8Santirez 
2842cef054e8Santirez     if (section == NULL) section = "default";
2843cef054e8Santirez     allsections = strcasecmp(section,"all") == 0;
2844cef054e8Santirez     defsections = strcasecmp(section,"default") == 0;
2845cef054e8Santirez 
2846cef054e8Santirez     getrusage(RUSAGE_SELF, &self_ru);
2847cef054e8Santirez     getrusage(RUSAGE_CHILDREN, &c_ru);
2848cef054e8Santirez     getClientsMaxBuffers(&lol,&bib);
2849cef054e8Santirez 
2850cef054e8Santirez     /* Server */
2851cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"server")) {
2852cef054e8Santirez         static int call_uname = 1;
2853cef054e8Santirez         static struct utsname name;
2854cef054e8Santirez         char *mode;
2855cef054e8Santirez 
2856cef054e8Santirez         if (server.cluster_enabled) mode = "cluster";
2857cef054e8Santirez         else if (server.sentinel_mode) mode = "sentinel";
2858cef054e8Santirez         else mode = "standalone";
2859cef054e8Santirez 
2860cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
2861cef054e8Santirez 
2862cef054e8Santirez         if (call_uname) {
2863cef054e8Santirez             /* Uname can be slow and is always the same output. Cache it. */
2864cef054e8Santirez             uname(&name);
2865cef054e8Santirez             call_uname = 0;
2866cef054e8Santirez         }
2867cef054e8Santirez 
2868cef054e8Santirez         info = sdscatprintf(info,
2869cef054e8Santirez             "# Server\r\n"
2870cef054e8Santirez             "redis_version:%s\r\n"
2871cef054e8Santirez             "redis_git_sha1:%s\r\n"
2872cef054e8Santirez             "redis_git_dirty:%d\r\n"
2873cef054e8Santirez             "redis_build_id:%llx\r\n"
2874cef054e8Santirez             "redis_mode:%s\r\n"
2875cef054e8Santirez             "os:%s %s %s\r\n"
2876cef054e8Santirez             "arch_bits:%d\r\n"
2877cef054e8Santirez             "multiplexing_api:%s\r\n"
2878cef054e8Santirez             "gcc_version:%d.%d.%d\r\n"
2879cef054e8Santirez             "process_id:%ld\r\n"
2880cef054e8Santirez             "run_id:%s\r\n"
2881cef054e8Santirez             "tcp_port:%d\r\n"
2882cef054e8Santirez             "uptime_in_seconds:%jd\r\n"
2883cef054e8Santirez             "uptime_in_days:%jd\r\n"
2884cef054e8Santirez             "hz:%d\r\n"
2885cef054e8Santirez             "lru_clock:%ld\r\n"
28861db84c21Santirez             "executable:%s\r\n"
2887cef054e8Santirez             "config_file:%s\r\n",
2888cef054e8Santirez             REDIS_VERSION,
2889cef054e8Santirez             redisGitSHA1(),
2890cef054e8Santirez             strtol(redisGitDirty(),NULL,10) > 0,
2891cef054e8Santirez             (unsigned long long) redisBuildId(),
2892cef054e8Santirez             mode,
2893cef054e8Santirez             name.sysname, name.release, name.machine,
2894cef054e8Santirez             server.arch_bits,
2895cef054e8Santirez             aeGetApiName(),
2896cef054e8Santirez #ifdef __GNUC__
2897cef054e8Santirez             __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__,
2898cef054e8Santirez #else
2899cef054e8Santirez             0,0,0,
2900cef054e8Santirez #endif
2901cef054e8Santirez             (long) getpid(),
2902cef054e8Santirez             server.runid,
2903cef054e8Santirez             server.port,
2904cef054e8Santirez             (intmax_t)uptime,
2905cef054e8Santirez             (intmax_t)(uptime/(3600*24)),
2906cef054e8Santirez             server.hz,
2907cef054e8Santirez             (unsigned long) server.lruclock,
29081db84c21Santirez             server.executable ? server.executable : "",
2909cef054e8Santirez             server.configfile ? server.configfile : "");
2910cef054e8Santirez     }
2911cef054e8Santirez 
2912cef054e8Santirez     /* Clients */
2913cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"clients")) {
2914cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
2915cef054e8Santirez         info = sdscatprintf(info,
2916cef054e8Santirez             "# Clients\r\n"
2917cef054e8Santirez             "connected_clients:%lu\r\n"
2918cef054e8Santirez             "client_longest_output_list:%lu\r\n"
2919cef054e8Santirez             "client_biggest_input_buf:%lu\r\n"
2920cef054e8Santirez             "blocked_clients:%d\r\n",
2921cef054e8Santirez             listLength(server.clients)-listLength(server.slaves),
2922cef054e8Santirez             lol, bib,
2923cef054e8Santirez             server.bpop_blocked_clients);
2924cef054e8Santirez     }
2925cef054e8Santirez 
2926cef054e8Santirez     /* Memory */
2927cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"memory")) {
2928cef054e8Santirez         char hmem[64];
2929cef054e8Santirez         char peak_hmem[64];
2930cef054e8Santirez         char total_system_hmem[64];
2931cef054e8Santirez         char used_memory_lua_hmem[64];
2932cef054e8Santirez         char used_memory_rss_hmem[64];
2933cef054e8Santirez         char maxmemory_hmem[64];
2934cef054e8Santirez         size_t zmalloc_used = zmalloc_used_memory();
2935cef054e8Santirez         size_t total_system_mem = server.system_memory_size;
29363a481067Stherealbill         const char *evict_policy = evictPolicyToString();
2937cef054e8Santirez         long long memory_lua = (long long)lua_gc(server.lua,LUA_GCCOUNT,0)*1024;
2938cef054e8Santirez 
2939cef054e8Santirez         /* Peak memory is updated from time to time by serverCron() so it
2940cef054e8Santirez          * may happen that the instantaneous value is slightly bigger than
2941cef054e8Santirez          * the peak value. This may confuse users, so we update the peak
2942cef054e8Santirez          * if found smaller than the current memory usage. */
2943cef054e8Santirez         if (zmalloc_used > server.stat_peak_memory)
2944cef054e8Santirez             server.stat_peak_memory = zmalloc_used;
2945cef054e8Santirez 
2946cef054e8Santirez         bytesToHuman(hmem,zmalloc_used);
2947cef054e8Santirez         bytesToHuman(peak_hmem,server.stat_peak_memory);
2948cef054e8Santirez         bytesToHuman(total_system_hmem,total_system_mem);
2949cef054e8Santirez         bytesToHuman(used_memory_lua_hmem,memory_lua);
2950cef054e8Santirez         bytesToHuman(used_memory_rss_hmem,server.resident_set_size);
2951cef054e8Santirez         bytesToHuman(maxmemory_hmem,server.maxmemory);
2952cef054e8Santirez 
2953cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
2954cef054e8Santirez         info = sdscatprintf(info,
2955cef054e8Santirez             "# Memory\r\n"
2956cef054e8Santirez             "used_memory:%zu\r\n"
2957cef054e8Santirez             "used_memory_human:%s\r\n"
2958cef054e8Santirez             "used_memory_rss:%zu\r\n"
2959cef054e8Santirez             "used_memory_rss_human:%s\r\n"
2960cef054e8Santirez             "used_memory_peak:%zu\r\n"
2961cef054e8Santirez             "used_memory_peak_human:%s\r\n"
2962cef054e8Santirez             "total_system_memory:%lu\r\n"
2963cef054e8Santirez             "total_system_memory_human:%s\r\n"
2964cef054e8Santirez             "used_memory_lua:%lld\r\n"
2965cef054e8Santirez             "used_memory_lua_human:%s\r\n"
2966cef054e8Santirez             "maxmemory:%lld\r\n"
2967cef054e8Santirez             "maxmemory_human:%s\r\n"
2968cef054e8Santirez             "maxmemory_policy:%s\r\n"
2969cef054e8Santirez             "mem_fragmentation_ratio:%.2f\r\n"
2970cef054e8Santirez             "mem_allocator:%s\r\n",
2971cef054e8Santirez             zmalloc_used,
2972cef054e8Santirez             hmem,
2973cef054e8Santirez             server.resident_set_size,
2974cef054e8Santirez             used_memory_rss_hmem,
2975cef054e8Santirez             server.stat_peak_memory,
2976cef054e8Santirez             peak_hmem,
2977cef054e8Santirez             (unsigned long)total_system_mem,
2978cef054e8Santirez             total_system_hmem,
2979cef054e8Santirez             memory_lua,
2980cef054e8Santirez             used_memory_lua_hmem,
2981cef054e8Santirez             server.maxmemory,
2982cef054e8Santirez             maxmemory_hmem,
2983cef054e8Santirez             evict_policy,
2984cef054e8Santirez             zmalloc_get_fragmentation_ratio(server.resident_set_size),
2985cef054e8Santirez             ZMALLOC_LIB
2986cef054e8Santirez             );
2987cef054e8Santirez     }
2988cef054e8Santirez 
2989cef054e8Santirez     /* Persistence */
2990cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"persistence")) {
2991cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
2992cef054e8Santirez         info = sdscatprintf(info,
2993cef054e8Santirez             "# Persistence\r\n"
2994cef054e8Santirez             "loading:%d\r\n"
2995cef054e8Santirez             "rdb_changes_since_last_save:%lld\r\n"
2996cef054e8Santirez             "rdb_bgsave_in_progress:%d\r\n"
2997cef054e8Santirez             "rdb_last_save_time:%jd\r\n"
2998cef054e8Santirez             "rdb_last_bgsave_status:%s\r\n"
2999cef054e8Santirez             "rdb_last_bgsave_time_sec:%jd\r\n"
3000cef054e8Santirez             "rdb_current_bgsave_time_sec:%jd\r\n"
3001cef054e8Santirez             "aof_enabled:%d\r\n"
3002cef054e8Santirez             "aof_rewrite_in_progress:%d\r\n"
3003cef054e8Santirez             "aof_rewrite_scheduled:%d\r\n"
3004cef054e8Santirez             "aof_last_rewrite_time_sec:%jd\r\n"
3005cef054e8Santirez             "aof_current_rewrite_time_sec:%jd\r\n"
3006cef054e8Santirez             "aof_last_bgrewrite_status:%s\r\n"
3007cef054e8Santirez             "aof_last_write_status:%s\r\n",
3008cef054e8Santirez             server.loading,
3009cef054e8Santirez             server.dirty,
3010cef054e8Santirez             server.rdb_child_pid != -1,
3011cef054e8Santirez             (intmax_t)server.lastsave,
301240eb548aSantirez             (server.lastbgsave_status == C_OK) ? "ok" : "err",
3013cef054e8Santirez             (intmax_t)server.rdb_save_time_last,
3014cef054e8Santirez             (intmax_t)((server.rdb_child_pid == -1) ?
3015cef054e8Santirez                 -1 : time(NULL)-server.rdb_save_time_start),
301632f80e2fSantirez             server.aof_state != AOF_OFF,
3017cef054e8Santirez             server.aof_child_pid != -1,
3018cef054e8Santirez             server.aof_rewrite_scheduled,
3019cef054e8Santirez             (intmax_t)server.aof_rewrite_time_last,
3020cef054e8Santirez             (intmax_t)((server.aof_child_pid == -1) ?
3021cef054e8Santirez                 -1 : time(NULL)-server.aof_rewrite_time_start),
302240eb548aSantirez             (server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
302340eb548aSantirez             (server.aof_last_write_status == C_OK) ? "ok" : "err");
3024cef054e8Santirez 
302532f80e2fSantirez         if (server.aof_state != AOF_OFF) {
3026cef054e8Santirez             info = sdscatprintf(info,
3027cef054e8Santirez                 "aof_current_size:%lld\r\n"
3028cef054e8Santirez                 "aof_base_size:%lld\r\n"
3029cef054e8Santirez                 "aof_pending_rewrite:%d\r\n"
3030cef054e8Santirez                 "aof_buffer_length:%zu\r\n"
3031cef054e8Santirez                 "aof_rewrite_buffer_length:%lu\r\n"
3032cef054e8Santirez                 "aof_pending_bio_fsync:%llu\r\n"
3033cef054e8Santirez                 "aof_delayed_fsync:%lu\r\n",
3034cef054e8Santirez                 (long long) server.aof_current_size,
3035cef054e8Santirez                 (long long) server.aof_rewrite_base_size,
3036cef054e8Santirez                 server.aof_rewrite_scheduled,
3037cef054e8Santirez                 sdslen(server.aof_buf),
3038cef054e8Santirez                 aofRewriteBufferSize(),
30393325a9b1Santirez                 bioPendingJobsOfType(BIO_AOF_FSYNC),
3040cef054e8Santirez                 server.aof_delayed_fsync);
3041cef054e8Santirez         }
3042cef054e8Santirez 
3043cef054e8Santirez         if (server.loading) {
3044cef054e8Santirez             double perc;
3045cef054e8Santirez             time_t eta, elapsed;
3046cef054e8Santirez             off_t remaining_bytes = server.loading_total_bytes-
3047cef054e8Santirez                                     server.loading_loaded_bytes;
3048cef054e8Santirez 
3049cef054e8Santirez             perc = ((double)server.loading_loaded_bytes /
3050cef054e8Santirez                    (server.loading_total_bytes+1)) * 100;
3051cef054e8Santirez 
3052cef054e8Santirez             elapsed = time(NULL)-server.loading_start_time;
3053cef054e8Santirez             if (elapsed == 0) {
3054cef054e8Santirez                 eta = 1; /* A fake 1 second figure if we don't have
3055cef054e8Santirez                             enough info */
3056cef054e8Santirez             } else {
3057cef054e8Santirez                 eta = (elapsed*remaining_bytes)/(server.loading_loaded_bytes+1);
3058cef054e8Santirez             }
3059cef054e8Santirez 
3060cef054e8Santirez             info = sdscatprintf(info,
3061cef054e8Santirez                 "loading_start_time:%jd\r\n"
3062cef054e8Santirez                 "loading_total_bytes:%llu\r\n"
3063cef054e8Santirez                 "loading_loaded_bytes:%llu\r\n"
3064cef054e8Santirez                 "loading_loaded_perc:%.2f\r\n"
3065cef054e8Santirez                 "loading_eta_seconds:%jd\r\n",
3066cef054e8Santirez                 (intmax_t) server.loading_start_time,
3067cef054e8Santirez                 (unsigned long long) server.loading_total_bytes,
3068cef054e8Santirez                 (unsigned long long) server.loading_loaded_bytes,
3069cef054e8Santirez                 perc,
3070cef054e8Santirez                 (intmax_t)eta
3071cef054e8Santirez             );
3072cef054e8Santirez         }
3073cef054e8Santirez     }
3074cef054e8Santirez 
3075cef054e8Santirez     /* Stats */
3076cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"stats")) {
3077cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
3078cef054e8Santirez         info = sdscatprintf(info,
3079cef054e8Santirez             "# Stats\r\n"
3080cef054e8Santirez             "total_connections_received:%lld\r\n"
3081cef054e8Santirez             "total_commands_processed:%lld\r\n"
3082cef054e8Santirez             "instantaneous_ops_per_sec:%lld\r\n"
3083cef054e8Santirez             "total_net_input_bytes:%lld\r\n"
3084cef054e8Santirez             "total_net_output_bytes:%lld\r\n"
3085cef054e8Santirez             "instantaneous_input_kbps:%.2f\r\n"
3086cef054e8Santirez             "instantaneous_output_kbps:%.2f\r\n"
3087cef054e8Santirez             "rejected_connections:%lld\r\n"
3088cef054e8Santirez             "sync_full:%lld\r\n"
3089cef054e8Santirez             "sync_partial_ok:%lld\r\n"
3090cef054e8Santirez             "sync_partial_err:%lld\r\n"
3091cef054e8Santirez             "expired_keys:%lld\r\n"
3092cef054e8Santirez             "evicted_keys:%lld\r\n"
3093cef054e8Santirez             "keyspace_hits:%lld\r\n"
3094cef054e8Santirez             "keyspace_misses:%lld\r\n"
3095cef054e8Santirez             "pubsub_channels:%ld\r\n"
3096cef054e8Santirez             "pubsub_patterns:%lu\r\n"
3097cef054e8Santirez             "latest_fork_usec:%lld\r\n"
3098cef054e8Santirez             "migrate_cached_sockets:%ld\r\n",
3099cef054e8Santirez             server.stat_numconnections,
3100cef054e8Santirez             server.stat_numcommands,
310132f80e2fSantirez             getInstantaneousMetric(STATS_METRIC_COMMAND),
3102cef054e8Santirez             server.stat_net_input_bytes,
3103cef054e8Santirez             server.stat_net_output_bytes,
310432f80e2fSantirez             (float)getInstantaneousMetric(STATS_METRIC_NET_INPUT)/1024,
310532f80e2fSantirez             (float)getInstantaneousMetric(STATS_METRIC_NET_OUTPUT)/1024,
3106cef054e8Santirez             server.stat_rejected_conn,
3107cef054e8Santirez             server.stat_sync_full,
3108cef054e8Santirez             server.stat_sync_partial_ok,
3109cef054e8Santirez             server.stat_sync_partial_err,
3110cef054e8Santirez             server.stat_expiredkeys,
3111cef054e8Santirez             server.stat_evictedkeys,
3112cef054e8Santirez             server.stat_keyspace_hits,
3113cef054e8Santirez             server.stat_keyspace_misses,
3114cef054e8Santirez             dictSize(server.pubsub_channels),
3115cef054e8Santirez             listLength(server.pubsub_patterns),
3116cef054e8Santirez             server.stat_fork_time,
3117cef054e8Santirez             dictSize(server.migrate_cached_sockets));
3118cef054e8Santirez     }
3119cef054e8Santirez 
3120cef054e8Santirez     /* Replication */
3121cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"replication")) {
3122cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
3123cef054e8Santirez         info = sdscatprintf(info,
3124cef054e8Santirez             "# Replication\r\n"
3125cef054e8Santirez             "role:%s\r\n",
3126cef054e8Santirez             server.masterhost == NULL ? "master" : "slave");
3127cef054e8Santirez         if (server.masterhost) {
3128cef054e8Santirez             long long slave_repl_offset = 1;
3129cef054e8Santirez 
3130cef054e8Santirez             if (server.master)
3131cef054e8Santirez                 slave_repl_offset = server.master->reploff;
3132cef054e8Santirez             else if (server.cached_master)
3133cef054e8Santirez                 slave_repl_offset = server.cached_master->reploff;
3134cef054e8Santirez 
3135cef054e8Santirez             info = sdscatprintf(info,
3136cef054e8Santirez                 "master_host:%s\r\n"
3137cef054e8Santirez                 "master_port:%d\r\n"
3138cef054e8Santirez                 "master_link_status:%s\r\n"
3139cef054e8Santirez                 "master_last_io_seconds_ago:%d\r\n"
3140cef054e8Santirez                 "master_sync_in_progress:%d\r\n"
3141cef054e8Santirez                 "slave_repl_offset:%lld\r\n"
3142cef054e8Santirez                 ,server.masterhost,
3143cef054e8Santirez                 server.masterport,
314432f80e2fSantirez                 (server.repl_state == REPL_STATE_CONNECTED) ?
3145cef054e8Santirez                     "up" : "down",
3146cef054e8Santirez                 server.master ?
3147cef054e8Santirez                 ((int)(server.unixtime-server.master->lastinteraction)) : -1,
314832f80e2fSantirez                 server.repl_state == REPL_STATE_TRANSFER,
3149cef054e8Santirez                 slave_repl_offset
3150cef054e8Santirez             );
3151cef054e8Santirez 
315232f80e2fSantirez             if (server.repl_state == REPL_STATE_TRANSFER) {
3153cef054e8Santirez                 info = sdscatprintf(info,
3154cef054e8Santirez                     "master_sync_left_bytes:%lld\r\n"
3155cef054e8Santirez                     "master_sync_last_io_seconds_ago:%d\r\n"
3156cef054e8Santirez                     , (long long)
3157cef054e8Santirez                         (server.repl_transfer_size - server.repl_transfer_read),
3158cef054e8Santirez                     (int)(server.unixtime-server.repl_transfer_lastio)
3159cef054e8Santirez                 );
3160cef054e8Santirez             }
3161cef054e8Santirez 
316232f80e2fSantirez             if (server.repl_state != REPL_STATE_CONNECTED) {
3163cef054e8Santirez                 info = sdscatprintf(info,
3164cef054e8Santirez                     "master_link_down_since_seconds:%jd\r\n",
3165cef054e8Santirez                     (intmax_t)server.unixtime-server.repl_down_since);
3166cef054e8Santirez             }
3167cef054e8Santirez             info = sdscatprintf(info,
3168cef054e8Santirez                 "slave_priority:%d\r\n"
3169cef054e8Santirez                 "slave_read_only:%d\r\n",
3170cef054e8Santirez                 server.slave_priority,
3171cef054e8Santirez                 server.repl_slave_ro);
3172cef054e8Santirez         }
3173cef054e8Santirez 
3174cef054e8Santirez         info = sdscatprintf(info,
3175cef054e8Santirez             "connected_slaves:%lu\r\n",
3176cef054e8Santirez             listLength(server.slaves));
3177cef054e8Santirez 
3178cef054e8Santirez         /* If min-slaves-to-write is active, write the number of slaves
3179cef054e8Santirez          * currently considered 'good'. */
3180cef054e8Santirez         if (server.repl_min_slaves_to_write &&
3181cef054e8Santirez             server.repl_min_slaves_max_lag) {
3182cef054e8Santirez             info = sdscatprintf(info,
3183cef054e8Santirez                 "min_slaves_good_slaves:%d\r\n",
3184cef054e8Santirez                 server.repl_good_slaves_count);
3185cef054e8Santirez         }
3186cef054e8Santirez 
3187cef054e8Santirez         if (listLength(server.slaves)) {
3188cef054e8Santirez             int slaveid = 0;
3189cef054e8Santirez             listNode *ln;
3190cef054e8Santirez             listIter li;
3191cef054e8Santirez 
3192cef054e8Santirez             listRewind(server.slaves,&li);
3193cef054e8Santirez             while((ln = listNext(&li))) {
3194554bd0e7Santirez                 client *slave = listNodeValue(ln);
3195cef054e8Santirez                 char *state = NULL;
3196*0a45fbc3Santirez                 char ip[NET_IP_STR_LEN], *slaveip = slave->slave_ip;
3197cef054e8Santirez                 int port;
3198cef054e8Santirez                 long lag = 0;
3199cef054e8Santirez 
3200*0a45fbc3Santirez                 if (slaveip[0] == '\0') {
3201*0a45fbc3Santirez                     if (anetPeerToString(slave->fd,ip,sizeof(ip),&port) == -1)
3202*0a45fbc3Santirez                         continue;
3203*0a45fbc3Santirez                     slaveip = ip;
3204*0a45fbc3Santirez                 }
3205cef054e8Santirez                 switch(slave->replstate) {
320632f80e2fSantirez                 case SLAVE_STATE_WAIT_BGSAVE_START:
320732f80e2fSantirez                 case SLAVE_STATE_WAIT_BGSAVE_END:
3208cef054e8Santirez                     state = "wait_bgsave";
3209cef054e8Santirez                     break;
321032f80e2fSantirez                 case SLAVE_STATE_SEND_BULK:
3211cef054e8Santirez                     state = "send_bulk";
3212cef054e8Santirez                     break;
321332f80e2fSantirez                 case SLAVE_STATE_ONLINE:
3214cef054e8Santirez                     state = "online";
3215cef054e8Santirez                     break;
3216cef054e8Santirez                 }
3217cef054e8Santirez                 if (state == NULL) continue;
321832f80e2fSantirez                 if (slave->replstate == SLAVE_STATE_ONLINE)
3219cef054e8Santirez                     lag = time(NULL) - slave->repl_ack_time;
3220cef054e8Santirez 
3221cef054e8Santirez                 info = sdscatprintf(info,
3222cef054e8Santirez                     "slave%d:ip=%s,port=%d,state=%s,"
3223cef054e8Santirez                     "offset=%lld,lag=%ld\r\n",
3224*0a45fbc3Santirez                     slaveid,slaveip,slave->slave_listening_port,state,
3225cef054e8Santirez                     slave->repl_ack_off, lag);
3226cef054e8Santirez                 slaveid++;
3227cef054e8Santirez             }
3228cef054e8Santirez         }
3229cef054e8Santirez         info = sdscatprintf(info,
3230cef054e8Santirez             "master_repl_offset:%lld\r\n"
3231cef054e8Santirez             "repl_backlog_active:%d\r\n"
3232cef054e8Santirez             "repl_backlog_size:%lld\r\n"
3233cef054e8Santirez             "repl_backlog_first_byte_offset:%lld\r\n"
3234cef054e8Santirez             "repl_backlog_histlen:%lld\r\n",
3235cef054e8Santirez             server.master_repl_offset,
3236cef054e8Santirez             server.repl_backlog != NULL,
3237cef054e8Santirez             server.repl_backlog_size,
3238cef054e8Santirez             server.repl_backlog_off,
3239cef054e8Santirez             server.repl_backlog_histlen);
3240cef054e8Santirez     }
3241cef054e8Santirez 
3242cef054e8Santirez     /* CPU */
3243cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"cpu")) {
3244cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
3245cef054e8Santirez         info = sdscatprintf(info,
3246cef054e8Santirez         "# CPU\r\n"
3247cef054e8Santirez         "used_cpu_sys:%.2f\r\n"
3248cef054e8Santirez         "used_cpu_user:%.2f\r\n"
3249cef054e8Santirez         "used_cpu_sys_children:%.2f\r\n"
3250cef054e8Santirez         "used_cpu_user_children:%.2f\r\n",
3251cef054e8Santirez         (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000,
3252cef054e8Santirez         (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000,
3253cef054e8Santirez         (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000,
3254cef054e8Santirez         (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000);
3255cef054e8Santirez     }
3256cef054e8Santirez 
3257cef054e8Santirez     /* cmdtime */
3258cef054e8Santirez     if (allsections || !strcasecmp(section,"commandstats")) {
3259cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
3260cef054e8Santirez         info = sdscatprintf(info, "# Commandstats\r\n");
3261cef054e8Santirez         numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand);
3262cef054e8Santirez         for (j = 0; j < numcommands; j++) {
3263cef054e8Santirez             struct redisCommand *c = redisCommandTable+j;
3264cef054e8Santirez 
3265cef054e8Santirez             if (!c->calls) continue;
3266cef054e8Santirez             info = sdscatprintf(info,
3267cef054e8Santirez                 "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n",
3268cef054e8Santirez                 c->name, c->calls, c->microseconds,
3269cef054e8Santirez                 (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls));
3270cef054e8Santirez         }
3271cef054e8Santirez     }
3272cef054e8Santirez 
3273cef054e8Santirez     /* Cluster */
3274cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"cluster")) {
3275cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
3276cef054e8Santirez         info = sdscatprintf(info,
3277cef054e8Santirez         "# Cluster\r\n"
3278cef054e8Santirez         "cluster_enabled:%d\r\n",
3279cef054e8Santirez         server.cluster_enabled);
3280cef054e8Santirez     }
3281cef054e8Santirez 
3282cef054e8Santirez     /* Key space */
3283cef054e8Santirez     if (allsections || defsections || !strcasecmp(section,"keyspace")) {
3284cef054e8Santirez         if (sections++) info = sdscat(info,"\r\n");
3285cef054e8Santirez         info = sdscatprintf(info, "# Keyspace\r\n");
3286cef054e8Santirez         for (j = 0; j < server.dbnum; j++) {
3287cef054e8Santirez             long long keys, vkeys;
3288cef054e8Santirez 
3289cef054e8Santirez             keys = dictSize(server.db[j].dict);
3290cef054e8Santirez             vkeys = dictSize(server.db[j].expires);
3291cef054e8Santirez             if (keys || vkeys) {
3292cef054e8Santirez                 info = sdscatprintf(info,
3293cef054e8Santirez                     "db%d:keys=%lld,expires=%lld,avg_ttl=%lld\r\n",
3294cef054e8Santirez                     j, keys, vkeys, server.db[j].avg_ttl);
3295cef054e8Santirez             }
3296cef054e8Santirez         }
3297cef054e8Santirez     }
3298cef054e8Santirez     return info;
3299cef054e8Santirez }
3300cef054e8Santirez 
infoCommand(client * c)3301554bd0e7Santirez void infoCommand(client *c) {
3302cef054e8Santirez     char *section = c->argc == 2 ? c->argv[1]->ptr : "default";
3303cef054e8Santirez 
3304cef054e8Santirez     if (c->argc > 2) {
3305cef054e8Santirez         addReply(c,shared.syntaxerr);
3306cef054e8Santirez         return;
3307cef054e8Santirez     }
3308cef054e8Santirez     addReplyBulkSds(c, genRedisInfoString(section));
3309cef054e8Santirez }
3310cef054e8Santirez 
monitorCommand(client * c)3311554bd0e7Santirez void monitorCommand(client *c) {
3312cef054e8Santirez     /* ignore MONITOR if already slave or in monitor mode */
331332f80e2fSantirez     if (c->flags & CLIENT_SLAVE) return;
3314cef054e8Santirez 
331532f80e2fSantirez     c->flags |= (CLIENT_SLAVE|CLIENT_MONITOR);
3316cef054e8Santirez     listAddNodeTail(server.monitors,c);
3317cef054e8Santirez     addReply(c,shared.ok);
3318cef054e8Santirez }
3319cef054e8Santirez 
3320cef054e8Santirez /* ============================ Maxmemory directive  ======================== */
3321cef054e8Santirez 
3322cef054e8Santirez /* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
3323cef054e8Santirez  * file to limit the max memory used by the server, before processing a
3324cef054e8Santirez  * command.
3325cef054e8Santirez  *
3326cef054e8Santirez  * The goal of the function is to free enough memory to keep Redis under the
3327cef054e8Santirez  * configured memory limit.
3328cef054e8Santirez  *
3329cef054e8Santirez  * The function starts calculating how many bytes should be freed to keep
3330cef054e8Santirez  * Redis under the limit, and enters a loop selecting the best keys to
3331cef054e8Santirez  * evict accordingly to the configured policy.
3332cef054e8Santirez  *
3333cef054e8Santirez  * If all the bytes needed to return back under the limit were freed the
333440eb548aSantirez  * function returns C_OK, otherwise C_ERR is returned, and the caller
3335cef054e8Santirez  * should block the execution of commands that will result in more memory
3336cef054e8Santirez  * used by the server.
3337cef054e8Santirez  *
3338cef054e8Santirez  * ------------------------------------------------------------------------
3339cef054e8Santirez  *
3340cef054e8Santirez  * LRU approximation algorithm
3341cef054e8Santirez  *
3342cef054e8Santirez  * Redis uses an approximation of the LRU algorithm that runs in constant
3343cef054e8Santirez  * memory. Every time there is a key to expire, we sample N keys (with
3344cef054e8Santirez  * N very small, usually in around 5) to populate a pool of best keys to
334532f80e2fSantirez  * evict of M keys (the pool size is defined by MAXMEMORY_EVICTION_POOL_SIZE).
3346cef054e8Santirez  *
3347cef054e8Santirez  * The N keys sampled are added in the pool of good keys to expire (the one
3348cef054e8Santirez  * with an old access time) if they are better than one of the current keys
3349cef054e8Santirez  * in the pool.
3350cef054e8Santirez  *
3351cef054e8Santirez  * After the pool is populated, the best key we have in the pool is expired.
3352cef054e8Santirez  * However note that we don't remove keys from the pool when they are deleted
3353cef054e8Santirez  * so the pool may contain keys that no longer exist.
3354cef054e8Santirez  *
3355cef054e8Santirez  * When we try to evict a key, and all the entries in the pool don't exist
3356cef054e8Santirez  * we populate it again. This time we'll be sure that the pool has at least
3357cef054e8Santirez  * one key that can be evicted, if there is at least one key that can be
3358cef054e8Santirez  * evicted in the whole database. */
3359cef054e8Santirez 
3360cef054e8Santirez /* Create a new eviction pool. */
evictionPoolAlloc(void)3361cef054e8Santirez struct evictionPoolEntry *evictionPoolAlloc(void) {
3362cef054e8Santirez     struct evictionPoolEntry *ep;
3363cef054e8Santirez     int j;
3364cef054e8Santirez 
336532f80e2fSantirez     ep = zmalloc(sizeof(*ep)*MAXMEMORY_EVICTION_POOL_SIZE);
336632f80e2fSantirez     for (j = 0; j < MAXMEMORY_EVICTION_POOL_SIZE; j++) {
3367cef054e8Santirez         ep[j].idle = 0;
3368cef054e8Santirez         ep[j].key = NULL;
3369cef054e8Santirez     }
3370cef054e8Santirez     return ep;
3371cef054e8Santirez }
3372cef054e8Santirez 
3373cef054e8Santirez /* This is an helper function for freeMemoryIfNeeded(), it is used in order
3374cef054e8Santirez  * to populate the evictionPool with a few entries every time we want to
3375cef054e8Santirez  * expire a key. Keys with idle time smaller than one of the current
3376cef054e8Santirez  * keys are added. Keys are always added if there are free entries.
3377cef054e8Santirez  *
3378cef054e8Santirez  * We insert keys on place in ascending order, so keys with the smaller
3379cef054e8Santirez  * idle time are on the left, and keys with the higher idle time on the
3380cef054e8Santirez  * right. */
3381cef054e8Santirez 
3382cef054e8Santirez #define EVICTION_SAMPLES_ARRAY_SIZE 16
evictionPoolPopulate(dict * sampledict,dict * keydict,struct evictionPoolEntry * pool)3383cef054e8Santirez void evictionPoolPopulate(dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
3384cef054e8Santirez     int j, k, count;
3385cef054e8Santirez     dictEntry *_samples[EVICTION_SAMPLES_ARRAY_SIZE];
3386cef054e8Santirez     dictEntry **samples;
3387cef054e8Santirez 
3388cef054e8Santirez     /* Try to use a static buffer: this function is a big hit...
3389cef054e8Santirez      * Note: it was actually measured that this helps. */
3390cef054e8Santirez     if (server.maxmemory_samples <= EVICTION_SAMPLES_ARRAY_SIZE) {
3391cef054e8Santirez         samples = _samples;
3392cef054e8Santirez     } else {
3393cef054e8Santirez         samples = zmalloc(sizeof(samples[0])*server.maxmemory_samples);
3394cef054e8Santirez     }
3395cef054e8Santirez 
3396cef054e8Santirez     count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
3397cef054e8Santirez     for (j = 0; j < count; j++) {
3398cef054e8Santirez         unsigned long long idle;
3399cef054e8Santirez         sds key;
3400cef054e8Santirez         robj *o;
3401cef054e8Santirez         dictEntry *de;
3402cef054e8Santirez 
3403cef054e8Santirez         de = samples[j];
3404cef054e8Santirez         key = dictGetKey(de);
3405cef054e8Santirez         /* If the dictionary we are sampling from is not the main
3406cef054e8Santirez          * dictionary (but the expires one) we need to lookup the key
3407cef054e8Santirez          * again in the key dictionary to obtain the value object. */
3408cef054e8Santirez         if (sampledict != keydict) de = dictFind(keydict, key);
3409cef054e8Santirez         o = dictGetVal(de);
3410cef054e8Santirez         idle = estimateObjectIdleTime(o);
3411cef054e8Santirez 
3412cef054e8Santirez         /* Insert the element inside the pool.
3413cef054e8Santirez          * First, find the first empty bucket or the first populated
3414cef054e8Santirez          * bucket that has an idle time smaller than our idle time. */
3415cef054e8Santirez         k = 0;
341632f80e2fSantirez         while (k < MAXMEMORY_EVICTION_POOL_SIZE &&
3417cef054e8Santirez                pool[k].key &&
3418cef054e8Santirez                pool[k].idle < idle) k++;
341932f80e2fSantirez         if (k == 0 && pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key != NULL) {
3420cef054e8Santirez             /* Can't insert if the element is < the worst element we have
3421cef054e8Santirez              * and there are no empty buckets. */
3422cef054e8Santirez             continue;
342332f80e2fSantirez         } else if (k < MAXMEMORY_EVICTION_POOL_SIZE && pool[k].key == NULL) {
3424cef054e8Santirez             /* Inserting into empty position. No setup needed before insert. */
3425cef054e8Santirez         } else {
3426cef054e8Santirez             /* Inserting in the middle. Now k points to the first element
3427cef054e8Santirez              * greater than the element to insert.  */
342832f80e2fSantirez             if (pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key == NULL) {
3429cef054e8Santirez                 /* Free space on the right? Insert at k shifting
3430cef054e8Santirez                  * all the elements from k to end to the right. */
3431cef054e8Santirez                 memmove(pool+k+1,pool+k,
343232f80e2fSantirez                     sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
3433cef054e8Santirez             } else {
3434cef054e8Santirez                 /* No free space on right? Insert at k-1 */
3435cef054e8Santirez                 k--;
3436cef054e8Santirez                 /* Shift all elements on the left of k (included) to the
3437cef054e8Santirez                  * left, so we discard the element with smaller idle time. */
3438cef054e8Santirez                 sdsfree(pool[0].key);
3439cef054e8Santirez                 memmove(pool,pool+1,sizeof(pool[0])*k);
3440cef054e8Santirez             }
3441cef054e8Santirez         }
3442cef054e8Santirez         pool[k].key = sdsdup(key);
3443cef054e8Santirez         pool[k].idle = idle;
3444cef054e8Santirez     }
3445cef054e8Santirez     if (samples != _samples) zfree(samples);
3446cef054e8Santirez }
3447cef054e8Santirez 
freeMemoryIfNeeded(void)3448cef054e8Santirez int freeMemoryIfNeeded(void) {
3449cef054e8Santirez     size_t mem_used, mem_tofree, mem_freed;
3450cef054e8Santirez     int slaves = listLength(server.slaves);
3451cef054e8Santirez     mstime_t latency, eviction_latency;
3452cef054e8Santirez 
3453cef054e8Santirez     /* Remove the size of slaves output buffers and AOF buffer from the
3454cef054e8Santirez      * count of used memory. */
3455cef054e8Santirez     mem_used = zmalloc_used_memory();
3456cef054e8Santirez     if (slaves) {
3457cef054e8Santirez         listIter li;
3458cef054e8Santirez         listNode *ln;
3459cef054e8Santirez 
3460cef054e8Santirez         listRewind(server.slaves,&li);
3461cef054e8Santirez         while((ln = listNext(&li))) {
3462554bd0e7Santirez             client *slave = listNodeValue(ln);
3463cef054e8Santirez             unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave);
3464cef054e8Santirez             if (obuf_bytes > mem_used)
3465cef054e8Santirez                 mem_used = 0;
3466cef054e8Santirez             else
3467cef054e8Santirez                 mem_used -= obuf_bytes;
3468cef054e8Santirez         }
3469cef054e8Santirez     }
347032f80e2fSantirez     if (server.aof_state != AOF_OFF) {
3471cef054e8Santirez         mem_used -= sdslen(server.aof_buf);
3472cef054e8Santirez         mem_used -= aofRewriteBufferSize();
3473cef054e8Santirez     }
3474cef054e8Santirez 
3475cef054e8Santirez     /* Check if we are over the memory limit. */
347640eb548aSantirez     if (mem_used <= server.maxmemory) return C_OK;
3477cef054e8Santirez 
347832f80e2fSantirez     if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
347940eb548aSantirez         return C_ERR; /* We need to free memory, but policy forbids. */
3480cef054e8Santirez 
3481cef054e8Santirez     /* Compute how much memory we need to free. */
3482cef054e8Santirez     mem_tofree = mem_used - server.maxmemory;
3483cef054e8Santirez     mem_freed = 0;
3484cef054e8Santirez     latencyStartMonitor(latency);
3485cef054e8Santirez     while (mem_freed < mem_tofree) {
3486cef054e8Santirez         int j, k, keys_freed = 0;
3487cef054e8Santirez 
3488cef054e8Santirez         for (j = 0; j < server.dbnum; j++) {
3489cef054e8Santirez             long bestval = 0; /* just to prevent warning */
3490cef054e8Santirez             sds bestkey = NULL;
3491cef054e8Santirez             dictEntry *de;
3492cef054e8Santirez             redisDb *db = server.db+j;
3493cef054e8Santirez             dict *dict;
3494cef054e8Santirez 
349532f80e2fSantirez             if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
349632f80e2fSantirez                 server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM)
3497cef054e8Santirez             {
3498cef054e8Santirez                 dict = server.db[j].dict;
3499cef054e8Santirez             } else {
3500cef054e8Santirez                 dict = server.db[j].expires;
3501cef054e8Santirez             }
3502cef054e8Santirez             if (dictSize(dict) == 0) continue;
3503cef054e8Santirez 
3504cef054e8Santirez             /* volatile-random and allkeys-random policy */
350532f80e2fSantirez             if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
350632f80e2fSantirez                 server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
3507cef054e8Santirez             {
3508cef054e8Santirez                 de = dictGetRandomKey(dict);
3509cef054e8Santirez                 bestkey = dictGetKey(de);
3510cef054e8Santirez             }
3511cef054e8Santirez 
3512cef054e8Santirez             /* volatile-lru and allkeys-lru policy */
351332f80e2fSantirez             else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
351432f80e2fSantirez                 server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU)
3515cef054e8Santirez             {
3516cef054e8Santirez                 struct evictionPoolEntry *pool = db->eviction_pool;
3517cef054e8Santirez 
3518cef054e8Santirez                 while(bestkey == NULL) {
3519cef054e8Santirez                     evictionPoolPopulate(dict, db->dict, db->eviction_pool);
3520cef054e8Santirez                     /* Go backward from best to worst element to evict. */
352132f80e2fSantirez                     for (k = MAXMEMORY_EVICTION_POOL_SIZE-1; k >= 0; k--) {
3522cef054e8Santirez                         if (pool[k].key == NULL) continue;
3523cef054e8Santirez                         de = dictFind(dict,pool[k].key);
3524cef054e8Santirez 
3525cef054e8Santirez                         /* Remove the entry from the pool. */
3526cef054e8Santirez                         sdsfree(pool[k].key);
3527cef054e8Santirez                         /* Shift all elements on its right to left. */
3528cef054e8Santirez                         memmove(pool+k,pool+k+1,
352932f80e2fSantirez                             sizeof(pool[0])*(MAXMEMORY_EVICTION_POOL_SIZE-k-1));
3530cef054e8Santirez                         /* Clear the element on the right which is empty
3531cef054e8Santirez                          * since we shifted one position to the left.  */
353232f80e2fSantirez                         pool[MAXMEMORY_EVICTION_POOL_SIZE-1].key = NULL;
353332f80e2fSantirez                         pool[MAXMEMORY_EVICTION_POOL_SIZE-1].idle = 0;
3534cef054e8Santirez 
3535cef054e8Santirez                         /* If the key exists, is our pick. Otherwise it is
3536cef054e8Santirez                          * a ghost and we need to try the next element. */
3537cef054e8Santirez                         if (de) {
3538cef054e8Santirez                             bestkey = dictGetKey(de);
3539cef054e8Santirez                             break;
3540cef054e8Santirez                         } else {
3541cef054e8Santirez                             /* Ghost... */
3542cef054e8Santirez                             continue;
3543cef054e8Santirez                         }
3544cef054e8Santirez                     }
3545cef054e8Santirez                 }
3546cef054e8Santirez             }
3547cef054e8Santirez 
3548cef054e8Santirez             /* volatile-ttl */
354932f80e2fSantirez             else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
3550cef054e8Santirez                 for (k = 0; k < server.maxmemory_samples; k++) {
3551cef054e8Santirez                     sds thiskey;
3552cef054e8Santirez                     long thisval;
3553cef054e8Santirez 
3554cef054e8Santirez                     de = dictGetRandomKey(dict);
3555cef054e8Santirez                     thiskey = dictGetKey(de);
3556cef054e8Santirez                     thisval = (long) dictGetVal(de);
3557cef054e8Santirez 
3558cef054e8Santirez                     /* Expire sooner (minor expire unix timestamp) is better
3559cef054e8Santirez                      * candidate for deletion */
3560cef054e8Santirez                     if (bestkey == NULL || thisval < bestval) {
3561cef054e8Santirez                         bestkey = thiskey;
3562cef054e8Santirez                         bestval = thisval;
3563cef054e8Santirez                     }
3564cef054e8Santirez                 }
3565cef054e8Santirez             }
3566cef054e8Santirez 
3567cef054e8Santirez             /* Finally remove the selected key. */
3568cef054e8Santirez             if (bestkey) {
3569cef054e8Santirez                 long long delta;
3570cef054e8Santirez 
3571cef054e8Santirez                 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
3572cef054e8Santirez                 propagateExpire(db,keyobj);
3573cef054e8Santirez                 /* We compute the amount of memory freed by dbDelete() alone.
3574cef054e8Santirez                  * It is possible that actually the memory needed to propagate
3575cef054e8Santirez                  * the DEL in AOF and replication link is greater than the one
3576cef054e8Santirez                  * we are freeing removing the key, but we can't account for
3577cef054e8Santirez                  * that otherwise we would never exit the loop.
3578cef054e8Santirez                  *
3579cef054e8Santirez                  * AOF and Output buffer memory will be freed eventually so
3580cef054e8Santirez                  * we only care about memory used by the key space. */
3581cef054e8Santirez                 delta = (long long) zmalloc_used_memory();
3582cef054e8Santirez                 latencyStartMonitor(eviction_latency);
3583cef054e8Santirez                 dbDelete(db,keyobj);
3584cef054e8Santirez                 latencyEndMonitor(eviction_latency);
3585cef054e8Santirez                 latencyAddSampleIfNeeded("eviction-del",eviction_latency);
3586cef054e8Santirez                 latencyRemoveNestedEvent(latency,eviction_latency);
3587cef054e8Santirez                 delta -= (long long) zmalloc_used_memory();
3588cef054e8Santirez                 mem_freed += delta;
3589cef054e8Santirez                 server.stat_evictedkeys++;
359032f80e2fSantirez                 notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
3591cef054e8Santirez                     keyobj, db->id);
3592cef054e8Santirez                 decrRefCount(keyobj);
3593cef054e8Santirez                 keys_freed++;
3594cef054e8Santirez 
3595cef054e8Santirez                 /* When the memory to free starts to be big enough, we may
3596cef054e8Santirez                  * start spending so much time here that is impossible to
3597cef054e8Santirez                  * deliver data to the slaves fast enough, so we force the
3598cef054e8Santirez                  * transmission here inside the loop. */
3599cef054e8Santirez                 if (slaves) flushSlavesOutputBuffers();
3600cef054e8Santirez             }
3601cef054e8Santirez         }
3602cef054e8Santirez         if (!keys_freed) {
3603cef054e8Santirez             latencyEndMonitor(latency);
3604cef054e8Santirez             latencyAddSampleIfNeeded("eviction-cycle",latency);
360540eb548aSantirez             return C_ERR; /* nothing to free... */
3606cef054e8Santirez         }
3607cef054e8Santirez     }
3608cef054e8Santirez     latencyEndMonitor(latency);
3609cef054e8Santirez     latencyAddSampleIfNeeded("eviction-cycle",latency);
361040eb548aSantirez     return C_OK;
3611cef054e8Santirez }
3612cef054e8Santirez 
3613cef054e8Santirez /* =================================== Main! ================================ */
3614cef054e8Santirez 
3615cef054e8Santirez #ifdef __linux__
linuxOvercommitMemoryValue(void)3616cef054e8Santirez int linuxOvercommitMemoryValue(void) {
3617cef054e8Santirez     FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r");
3618cef054e8Santirez     char buf[64];
3619cef054e8Santirez 
3620cef054e8Santirez     if (!fp) return -1;
3621cef054e8Santirez     if (fgets(buf,64,fp) == NULL) {
3622cef054e8Santirez         fclose(fp);
3623cef054e8Santirez         return -1;
3624cef054e8Santirez     }
3625cef054e8Santirez     fclose(fp);
3626cef054e8Santirez 
3627cef054e8Santirez     return atoi(buf);
3628cef054e8Santirez }
3629cef054e8Santirez 
linuxMemoryWarnings(void)3630cef054e8Santirez void linuxMemoryWarnings(void) {
3631cef054e8Santirez     if (linuxOvercommitMemoryValue() == 0) {
363232f80e2fSantirez         serverLog(LL_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect.");
3633cef054e8Santirez     }
3634cef054e8Santirez     if (THPIsEnabled()) {
363532f80e2fSantirez         serverLog(LL_WARNING,"WARNING you have Transparent Huge Pages (THP) support enabled in your kernel. This will create latency and memory usage issues with Redis. To fix this issue run the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' as root, and add it to your /etc/rc.local in order to retain the setting after a reboot. Redis must be restarted after THP is disabled.");
3636cef054e8Santirez     }
3637cef054e8Santirez }
3638cef054e8Santirez #endif /* __linux__ */
3639cef054e8Santirez 
createPidFile(void)3640cef054e8Santirez void createPidFile(void) {
3641cef054e8Santirez     /* If pidfile requested, but no pidfile defined, use
3642cef054e8Santirez      * default pidfile path */
3643cef054e8Santirez     if (!server.pidfile) server.pidfile = zstrdup(CONFIG_DEFAULT_PID_FILE);
3644cef054e8Santirez 
3645cef054e8Santirez     /* Try to write the pid file in a best-effort way. */
3646cef054e8Santirez     FILE *fp = fopen(server.pidfile,"w");
3647cef054e8Santirez     if (fp) {
3648cef054e8Santirez         fprintf(fp,"%d\n",(int)getpid());
3649cef054e8Santirez         fclose(fp);
3650cef054e8Santirez     }
3651cef054e8Santirez }
3652cef054e8Santirez 
daemonize(void)3653cef054e8Santirez void daemonize(void) {
3654cef054e8Santirez     int fd;
3655cef054e8Santirez 
3656cef054e8Santirez     if (fork() != 0) exit(0); /* parent exits */
3657cef054e8Santirez     setsid(); /* create a new session */
3658cef054e8Santirez 
3659cef054e8Santirez     /* Every output goes to /dev/null. If Redis is daemonized but
3660cef054e8Santirez      * the 'logfile' is set to 'stdout' in the configuration file
3661cef054e8Santirez      * it will not log at all. */
3662cef054e8Santirez     if ((fd = open("/dev/null", O_RDWR, 0)) != -1) {
3663cef054e8Santirez         dup2(fd, STDIN_FILENO);
3664cef054e8Santirez         dup2(fd, STDOUT_FILENO);
3665cef054e8Santirez         dup2(fd, STDERR_FILENO);
3666cef054e8Santirez         if (fd > STDERR_FILENO) close(fd);
3667cef054e8Santirez     }
3668cef054e8Santirez }
3669cef054e8Santirez 
version(void)3670cef054e8Santirez void version(void) {
3671cef054e8Santirez     printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d build=%llx\n",
3672cef054e8Santirez         REDIS_VERSION,
3673cef054e8Santirez         redisGitSHA1(),
3674cef054e8Santirez         atoi(redisGitDirty()) > 0,
3675cef054e8Santirez         ZMALLOC_LIB,
3676cef054e8Santirez         sizeof(long) == 4 ? 32 : 64,
3677cef054e8Santirez         (unsigned long long) redisBuildId());
3678cef054e8Santirez     exit(0);
3679cef054e8Santirez }
3680cef054e8Santirez 
usage(void)3681cef054e8Santirez void usage(void) {
3682cef054e8Santirez     fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n");
3683cef054e8Santirez     fprintf(stderr,"       ./redis-server - (read config from stdin)\n");
3684cef054e8Santirez     fprintf(stderr,"       ./redis-server -v or --version\n");
3685cef054e8Santirez     fprintf(stderr,"       ./redis-server -h or --help\n");
3686cef054e8Santirez     fprintf(stderr,"       ./redis-server --test-memory <megabytes>\n\n");
3687cef054e8Santirez     fprintf(stderr,"Examples:\n");
3688cef054e8Santirez     fprintf(stderr,"       ./redis-server (run the server with default conf)\n");
3689cef054e8Santirez     fprintf(stderr,"       ./redis-server /etc/redis/6379.conf\n");
3690cef054e8Santirez     fprintf(stderr,"       ./redis-server --port 7777\n");
3691cef054e8Santirez     fprintf(stderr,"       ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n");
3692cef054e8Santirez     fprintf(stderr,"       ./redis-server /etc/myredis.conf --loglevel verbose\n\n");
3693cef054e8Santirez     fprintf(stderr,"Sentinel mode:\n");
3694cef054e8Santirez     fprintf(stderr,"       ./redis-server /etc/sentinel.conf --sentinel\n");
3695cef054e8Santirez     exit(1);
3696cef054e8Santirez }
3697cef054e8Santirez 
redisAsciiArt(void)3698cef054e8Santirez void redisAsciiArt(void) {
3699cef054e8Santirez #include "asciilogo.h"
3700cef054e8Santirez     char *buf = zmalloc(1024*16);
3701cef054e8Santirez     char *mode;
3702cef054e8Santirez 
3703cef054e8Santirez     if (server.cluster_enabled) mode = "cluster";
3704cef054e8Santirez     else if (server.sentinel_mode) mode = "sentinel";
3705cef054e8Santirez     else mode = "standalone";
3706cef054e8Santirez 
3707cef054e8Santirez     if (server.syslog_enabled) {
370832f80e2fSantirez         serverLog(LL_NOTICE,
3709cef054e8Santirez             "Redis %s (%s/%d) %s bit, %s mode, port %d, pid %ld ready to start.",
3710cef054e8Santirez             REDIS_VERSION,
3711cef054e8Santirez             redisGitSHA1(),
3712cef054e8Santirez             strtol(redisGitDirty(),NULL,10) > 0,
3713cef054e8Santirez             (sizeof(long) == 8) ? "64" : "32",
3714cef054e8Santirez             mode, server.port,
3715cef054e8Santirez             (long) getpid()
3716cef054e8Santirez         );
3717cef054e8Santirez     } else {
3718cef054e8Santirez         snprintf(buf,1024*16,ascii_logo,
3719cef054e8Santirez             REDIS_VERSION,
3720cef054e8Santirez             redisGitSHA1(),
3721cef054e8Santirez             strtol(redisGitDirty(),NULL,10) > 0,
3722cef054e8Santirez             (sizeof(long) == 8) ? "64" : "32",
3723cef054e8Santirez             mode, server.port,
3724cef054e8Santirez             (long) getpid()
3725cef054e8Santirez         );
372632f80e2fSantirez         serverLogRaw(LL_NOTICE|LL_RAW,buf);
3727cef054e8Santirez     }
3728cef054e8Santirez     zfree(buf);
3729cef054e8Santirez }
3730cef054e8Santirez 
sigShutdownHandler(int sig)3731cef054e8Santirez static void sigShutdownHandler(int sig) {
3732cef054e8Santirez     char *msg;
3733cef054e8Santirez 
3734cef054e8Santirez     switch (sig) {
3735cef054e8Santirez     case SIGINT:
3736cef054e8Santirez         msg = "Received SIGINT scheduling shutdown...";
3737cef054e8Santirez         break;
3738cef054e8Santirez     case SIGTERM:
3739cef054e8Santirez         msg = "Received SIGTERM scheduling shutdown...";
3740cef054e8Santirez         break;
3741cef054e8Santirez     default:
3742cef054e8Santirez         msg = "Received shutdown signal, scheduling shutdown...";
3743cef054e8Santirez     };
3744cef054e8Santirez 
3745cef054e8Santirez     /* SIGINT is often delivered via Ctrl+C in an interactive session.
3746cef054e8Santirez      * If we receive the signal the second time, we interpret this as
3747cef054e8Santirez      * the user really wanting to quit ASAP without waiting to persist
3748cef054e8Santirez      * on disk. */
3749cef054e8Santirez     if (server.shutdown_asap && sig == SIGINT) {
375032f80e2fSantirez         serverLogFromHandler(LL_WARNING, "You insist... exiting now.");
3751cef054e8Santirez         rdbRemoveTempFile(getpid());
3752cef054e8Santirez         exit(1); /* Exit with an error since this was not a clean shutdown. */
3753cef054e8Santirez     } else if (server.loading) {
3754cef054e8Santirez         exit(0);
3755cef054e8Santirez     }
3756cef054e8Santirez 
375732f80e2fSantirez     serverLogFromHandler(LL_WARNING, msg);
3758cef054e8Santirez     server.shutdown_asap = 1;
3759cef054e8Santirez }
3760cef054e8Santirez 
setupSignalHandlers(void)3761cef054e8Santirez void setupSignalHandlers(void) {
3762cef054e8Santirez     struct sigaction act;
3763cef054e8Santirez 
3764cef054e8Santirez     /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used.
3765cef054e8Santirez      * Otherwise, sa_handler is used. */
3766cef054e8Santirez     sigemptyset(&act.sa_mask);
3767cef054e8Santirez     act.sa_flags = 0;
3768cef054e8Santirez     act.sa_handler = sigShutdownHandler;
3769cef054e8Santirez     sigaction(SIGTERM, &act, NULL);
3770cef054e8Santirez     sigaction(SIGINT, &act, NULL);
3771cef054e8Santirez 
3772cef054e8Santirez #ifdef HAVE_BACKTRACE
3773cef054e8Santirez     sigemptyset(&act.sa_mask);
3774cef054e8Santirez     act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO;
3775cef054e8Santirez     act.sa_sigaction = sigsegvHandler;
3776cef054e8Santirez     sigaction(SIGSEGV, &act, NULL);
3777cef054e8Santirez     sigaction(SIGBUS, &act, NULL);
3778cef054e8Santirez     sigaction(SIGFPE, &act, NULL);
3779cef054e8Santirez     sigaction(SIGILL, &act, NULL);
3780cef054e8Santirez #endif
3781cef054e8Santirez     return;
3782cef054e8Santirez }
3783cef054e8Santirez 
3784cef054e8Santirez void memtest(size_t megabytes, int passes);
3785cef054e8Santirez 
3786cef054e8Santirez /* Returns 1 if there is --sentinel among the arguments or if
3787cef054e8Santirez  * argv[0] is exactly "redis-sentinel". */
checkForSentinelMode(int argc,char ** argv)3788cef054e8Santirez int checkForSentinelMode(int argc, char **argv) {
3789cef054e8Santirez     int j;
3790cef054e8Santirez 
3791cef054e8Santirez     if (strstr(argv[0],"redis-sentinel") != NULL) return 1;
3792cef054e8Santirez     for (j = 1; j < argc; j++)
3793cef054e8Santirez         if (!strcmp(argv[j],"--sentinel")) return 1;
3794cef054e8Santirez     return 0;
3795cef054e8Santirez }
3796cef054e8Santirez 
3797cef054e8Santirez /* Function called at startup to load RDB or AOF file in memory. */
loadDataFromDisk(void)3798cef054e8Santirez void loadDataFromDisk(void) {
3799cef054e8Santirez     long long start = ustime();
380032f80e2fSantirez     if (server.aof_state == AOF_ON) {
380140eb548aSantirez         if (loadAppendOnlyFile(server.aof_filename) == C_OK)
380232f80e2fSantirez             serverLog(LL_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000);
3803cef054e8Santirez     } else {
380440eb548aSantirez         if (rdbLoad(server.rdb_filename) == C_OK) {
380532f80e2fSantirez             serverLog(LL_NOTICE,"DB loaded from disk: %.3f seconds",
3806cef054e8Santirez                 (float)(ustime()-start)/1000000);
3807cef054e8Santirez         } else if (errno != ENOENT) {
380832f80e2fSantirez             serverLog(LL_WARNING,"Fatal error loading the DB: %s. Exiting.",strerror(errno));
3809cef054e8Santirez             exit(1);
3810cef054e8Santirez         }
3811cef054e8Santirez     }
3812cef054e8Santirez }
3813cef054e8Santirez 
redisOutOfMemoryHandler(size_t allocation_size)3814cef054e8Santirez void redisOutOfMemoryHandler(size_t allocation_size) {
381532f80e2fSantirez     serverLog(LL_WARNING,"Out Of Memory allocating %zu bytes!",
3816cef054e8Santirez         allocation_size);
381732f80e2fSantirez     serverPanic("Redis aborting for OUT OF MEMORY");
3818cef054e8Santirez }
3819cef054e8Santirez 
redisSetProcTitle(char * title)3820cef054e8Santirez void redisSetProcTitle(char *title) {
3821cef054e8Santirez #ifdef USE_SETPROCTITLE
3822cef054e8Santirez     char *server_mode = "";
3823cef054e8Santirez     if (server.cluster_enabled) server_mode = " [cluster]";
3824cef054e8Santirez     else if (server.sentinel_mode) server_mode = " [sentinel]";
3825cef054e8Santirez 
3826cef054e8Santirez     setproctitle("%s %s:%d%s",
3827cef054e8Santirez         title,
3828cef054e8Santirez         server.bindaddr_count ? server.bindaddr[0] : "*",
3829cef054e8Santirez         server.port,
3830cef054e8Santirez         server_mode);
3831cef054e8Santirez #else
383232f80e2fSantirez     UNUSED(title);
3833cef054e8Santirez #endif
3834cef054e8Santirez }
3835cef054e8Santirez 
3836cef054e8Santirez /*
3837cef054e8Santirez  * Check whether systemd or upstart have been used to start redis.
3838cef054e8Santirez  */
3839cef054e8Santirez 
redisSupervisedUpstart(void)3840cef054e8Santirez int redisSupervisedUpstart(void) {
3841cef054e8Santirez     const char *upstart_job = getenv("UPSTART_JOB");
3842cef054e8Santirez 
3843cef054e8Santirez     if (!upstart_job) {
384432f80e2fSantirez         serverLog(LL_WARNING,
3845cef054e8Santirez                 "upstart supervision requested, but UPSTART_JOB not found");
3846cef054e8Santirez         return 0;
3847cef054e8Santirez     }
3848cef054e8Santirez 
3849cd9f7c69SItamar Haber     serverLog(LL_NOTICE, "supervised by upstart, will stop to signal readiness");
3850cef054e8Santirez     raise(SIGSTOP);
3851cef054e8Santirez     unsetenv("UPSTART_JOB");
3852cef054e8Santirez     return 1;
3853cef054e8Santirez }
3854cef054e8Santirez 
redisSupervisedSystemd(void)3855cef054e8Santirez int redisSupervisedSystemd(void) {
3856cef054e8Santirez     const char *notify_socket = getenv("NOTIFY_SOCKET");
3857cef054e8Santirez     int fd = 1;
3858cef054e8Santirez     struct sockaddr_un su;
3859cef054e8Santirez     struct iovec iov;
3860cef054e8Santirez     struct msghdr hdr;
3861cef054e8Santirez     int sendto_flags = 0;
3862cef054e8Santirez 
3863cef054e8Santirez     if (!notify_socket) {
386432f80e2fSantirez         serverLog(LL_WARNING,
3865cef054e8Santirez                 "systemd supervision requested, but NOTIFY_SOCKET not found");
3866cef054e8Santirez         return 0;
3867cef054e8Santirez     }
3868cef054e8Santirez 
3869cef054e8Santirez     if ((strchr("@/", notify_socket[0])) == NULL || strlen(notify_socket) < 2) {
3870cef054e8Santirez         return 0;
3871cef054e8Santirez     }
3872cef054e8Santirez 
3873cd9f7c69SItamar Haber     serverLog(LL_NOTICE, "supervised by systemd, will signal readiness");
3874cef054e8Santirez     if ((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) == -1) {
387532f80e2fSantirez         serverLog(LL_WARNING,
3876cef054e8Santirez                 "Can't connect to systemd socket %s", notify_socket);
3877cef054e8Santirez         return 0;
3878cef054e8Santirez     }
3879cef054e8Santirez 
3880cef054e8Santirez     memset(&su, 0, sizeof(su));
3881cef054e8Santirez     su.sun_family = AF_UNIX;
3882cef054e8Santirez     strncpy (su.sun_path, notify_socket, sizeof(su.sun_path) -1);
3883cef054e8Santirez     su.sun_path[sizeof(su.sun_path) - 1] = '\0';
3884cef054e8Santirez 
3885cef054e8Santirez     if (notify_socket[0] == '@')
3886cef054e8Santirez         su.sun_path[0] = '\0';
3887cef054e8Santirez 
3888cef054e8Santirez     memset(&iov, 0, sizeof(iov));
3889cef054e8Santirez     iov.iov_base = "READY=1";
3890cef054e8Santirez     iov.iov_len = strlen("READY=1");
3891cef054e8Santirez 
3892cef054e8Santirez     memset(&hdr, 0, sizeof(hdr));
3893cef054e8Santirez     hdr.msg_name = &su;
3894cef054e8Santirez     hdr.msg_namelen = offsetof(struct sockaddr_un, sun_path) +
3895cef054e8Santirez         strlen(notify_socket);
3896cef054e8Santirez     hdr.msg_iov = &iov;
3897cef054e8Santirez     hdr.msg_iovlen = 1;
3898cef054e8Santirez 
3899cef054e8Santirez     unsetenv("NOTIFY_SOCKET");
3900cef054e8Santirez #ifdef HAVE_MSG_NOSIGNAL
3901cef054e8Santirez     sendto_flags |= MSG_NOSIGNAL;
3902cef054e8Santirez #endif
3903cef054e8Santirez     if (sendmsg(fd, &hdr, sendto_flags) < 0) {
390432f80e2fSantirez         serverLog(LL_WARNING, "Can't send notification to systemd");
3905cef054e8Santirez         close(fd);
3906cef054e8Santirez         return 0;
3907cef054e8Santirez     }
3908cef054e8Santirez     close(fd);
3909cef054e8Santirez     return 1;
3910cef054e8Santirez }
3911cef054e8Santirez 
redisIsSupervised(int mode)3912cef054e8Santirez int redisIsSupervised(int mode) {
391332f80e2fSantirez     if (mode == SUPERVISED_AUTODETECT) {
3914cef054e8Santirez         const char *upstart_job = getenv("UPSTART_JOB");
3915cef054e8Santirez         const char *notify_socket = getenv("NOTIFY_SOCKET");
3916cef054e8Santirez 
3917cef054e8Santirez         if (upstart_job) {
3918cef054e8Santirez             redisSupervisedUpstart();
3919cef054e8Santirez         } else if (notify_socket) {
3920cef054e8Santirez             redisSupervisedSystemd();
3921cef054e8Santirez         }
392232f80e2fSantirez     } else if (mode == SUPERVISED_UPSTART) {
3923cef054e8Santirez         return redisSupervisedUpstart();
392432f80e2fSantirez     } else if (mode == SUPERVISED_SYSTEMD) {
3925cef054e8Santirez         return redisSupervisedSystemd();
3926cef054e8Santirez     }
3927cef054e8Santirez 
3928cef054e8Santirez     return 0;
3929cef054e8Santirez }
3930cef054e8Santirez 
3931cef054e8Santirez 
main(int argc,char ** argv)3932cef054e8Santirez int main(int argc, char **argv) {
3933cef054e8Santirez     struct timeval tv;
39341db84c21Santirez     int j;
3935cef054e8Santirez 
3936cef054e8Santirez #ifdef REDIS_TEST
3937cef054e8Santirez     if (argc == 3 && !strcasecmp(argv[1], "test")) {
3938cef054e8Santirez         if (!strcasecmp(argv[2], "ziplist")) {
3939cef054e8Santirez             return ziplistTest(argc, argv);
3940cef054e8Santirez         } else if (!strcasecmp(argv[2], "quicklist")) {
3941cef054e8Santirez             quicklistTest(argc, argv);
3942cef054e8Santirez         } else if (!strcasecmp(argv[2], "intset")) {
3943cef054e8Santirez             return intsetTest(argc, argv);
3944cef054e8Santirez         } else if (!strcasecmp(argv[2], "zipmap")) {
3945cef054e8Santirez             return zipmapTest(argc, argv);
3946cef054e8Santirez         } else if (!strcasecmp(argv[2], "sha1test")) {
3947cef054e8Santirez             return sha1Test(argc, argv);
3948cef054e8Santirez         } else if (!strcasecmp(argv[2], "util")) {
3949cef054e8Santirez             return utilTest(argc, argv);
3950cef054e8Santirez         } else if (!strcasecmp(argv[2], "sds")) {
3951cef054e8Santirez             return sdsTest(argc, argv);
3952cef054e8Santirez         } else if (!strcasecmp(argv[2], "endianconv")) {
3953cef054e8Santirez             return endianconvTest(argc, argv);
3954cef054e8Santirez         } else if (!strcasecmp(argv[2], "crc64")) {
3955cef054e8Santirez             return crc64Test(argc, argv);
3956cef054e8Santirez         }
3957cef054e8Santirez 
3958cef054e8Santirez         return -1; /* test not found */
3959cef054e8Santirez     }
3960cef054e8Santirez #endif
3961cef054e8Santirez 
3962cef054e8Santirez     /* We need to initialize our libraries, and the server configuration. */
3963cef054e8Santirez #ifdef INIT_SETPROCTITLE_REPLACEMENT
3964cef054e8Santirez     spt_init(argc, argv);
3965cef054e8Santirez #endif
3966cef054e8Santirez     setlocale(LC_COLLATE,"");
3967cef054e8Santirez     zmalloc_enable_thread_safeness();
3968cef054e8Santirez     zmalloc_set_oom_handler(redisOutOfMemoryHandler);
3969cef054e8Santirez     srand(time(NULL)^getpid());
3970cef054e8Santirez     gettimeofday(&tv,NULL);
3971cef054e8Santirez     dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
3972cef054e8Santirez     server.sentinel_mode = checkForSentinelMode(argc,argv);
3973cef054e8Santirez     initServerConfig();
3974cef054e8Santirez 
39751db84c21Santirez     /* Store the executable path and arguments in a safe place in order
39761db84c21Santirez      * to be able to restart the server later. */
39771db84c21Santirez     server.executable = getAbsolutePath(argv[0]);
39781db84c21Santirez     server.exec_argv = zmalloc(sizeof(char*)*(argc+1));
39791db84c21Santirez     server.exec_argv[argc] = NULL;
39801db84c21Santirez     for (j = 0; j < argc; j++) server.exec_argv[j] = zstrdup(argv[j]);
39811db84c21Santirez 
3982cef054e8Santirez     /* We need to init sentinel right now as parsing the configuration file
3983cef054e8Santirez      * in sentinel mode will have the effect of populating the sentinel
3984cef054e8Santirez      * data structures with master nodes to monitor. */
3985cef054e8Santirez     if (server.sentinel_mode) {
3986cef054e8Santirez         initSentinelConfig();
3987cef054e8Santirez         initSentinel();
3988cef054e8Santirez     }
3989cef054e8Santirez 
3990cef054e8Santirez     /* Check if we need to start in redis-check-rdb mode. We just execute
3991cef054e8Santirez      * the program main. However the program is part of the Redis executable
3992cef054e8Santirez      * so that we can easily execute an RDB check on loading errors. */
3993cef054e8Santirez     if (strstr(argv[0],"redis-check-rdb") != NULL)
399435b18bfbSantirez         redis_check_rdb_main(argc,argv);
3995cef054e8Santirez 
3996cef054e8Santirez     if (argc >= 2) {
39971db84c21Santirez         j = 1; /* First option to parse in argv[] */
3998cef054e8Santirez         sds options = sdsempty();
3999cef054e8Santirez         char *configfile = NULL;
4000cef054e8Santirez 
4001cef054e8Santirez         /* Handle special options --help and --version */
4002cef054e8Santirez         if (strcmp(argv[1], "-v") == 0 ||
4003cef054e8Santirez             strcmp(argv[1], "--version") == 0) version();
4004cef054e8Santirez         if (strcmp(argv[1], "--help") == 0 ||
4005cef054e8Santirez             strcmp(argv[1], "-h") == 0) usage();
4006cef054e8Santirez         if (strcmp(argv[1], "--test-memory") == 0) {
4007cef054e8Santirez             if (argc == 3) {
4008cef054e8Santirez                 memtest(atoi(argv[2]),50);
4009cef054e8Santirez                 exit(0);
4010cef054e8Santirez             } else {
4011cef054e8Santirez                 fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n");
4012cef054e8Santirez                 fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n");
4013cef054e8Santirez                 exit(1);
4014cef054e8Santirez             }
4015cef054e8Santirez         }
4016cef054e8Santirez 
4017cef054e8Santirez         /* First argument is the config file name? */
40181db84c21Santirez         if (argv[j][0] != '-' || argv[j][1] != '-') {
40191db84c21Santirez             configfile = argv[j];
40201db84c21Santirez             server.configfile = getAbsolutePath(configfile);
40211db84c21Santirez             /* Replace the config file in server.exec_argv with
40221db84c21Santirez              * its absoulte path. */
40231db84c21Santirez             zfree(server.exec_argv[j]);
40241db84c21Santirez             server.exec_argv[j] = zstrdup(server.configfile);
40251db84c21Santirez             j++;
40261db84c21Santirez         }
40271db84c21Santirez 
4028cef054e8Santirez         /* All the other options are parsed and conceptually appended to the
4029cef054e8Santirez          * configuration file. For instance --port 6380 will generate the
4030cef054e8Santirez          * string "port 6380\n" to be parsed after the actual file name
4031cef054e8Santirez          * is parsed, if any. */
4032cef054e8Santirez         while(j != argc) {
4033cef054e8Santirez             if (argv[j][0] == '-' && argv[j][1] == '-') {
4034cef054e8Santirez                 /* Option name */
4035cef054e8Santirez                 if (!strcmp(argv[j], "--check-rdb")) {
4036cef054e8Santirez                     /* Argument has no options, need to skip for parsing. */
4037cef054e8Santirez                     j++;
4038cef054e8Santirez                     continue;
4039cef054e8Santirez                 }
4040cef054e8Santirez                 if (sdslen(options)) options = sdscat(options,"\n");
4041cef054e8Santirez                 options = sdscat(options,argv[j]+2);
4042cef054e8Santirez                 options = sdscat(options," ");
4043cef054e8Santirez             } else {
4044cef054e8Santirez                 /* Option argument */
4045cef054e8Santirez                 options = sdscatrepr(options,argv[j],strlen(argv[j]));
4046cef054e8Santirez                 options = sdscat(options," ");
4047cef054e8Santirez             }
4048cef054e8Santirez             j++;
4049cef054e8Santirez         }
4050cef054e8Santirez         if (server.sentinel_mode && configfile && *configfile == '-') {
405132f80e2fSantirez             serverLog(LL_WARNING,
4052cef054e8Santirez                 "Sentinel config from STDIN not allowed.");
405332f80e2fSantirez             serverLog(LL_WARNING,
4054cef054e8Santirez                 "Sentinel needs config file on disk to save state.  Exiting...");
4055cef054e8Santirez             exit(1);
4056cef054e8Santirez         }
4057cef054e8Santirez         resetServerSaveParams();
4058cef054e8Santirez         loadServerConfig(configfile,options);
4059cef054e8Santirez         sdsfree(options);
4060cef054e8Santirez     } else {
406132f80e2fSantirez         serverLog(LL_WARNING, "Warning: no config file specified, using the default config. In order to specify a config file use %s /path/to/%s.conf", argv[0], server.sentinel_mode ? "sentinel" : "redis");
4062cef054e8Santirez     }
4063cef054e8Santirez 
4064cef054e8Santirez     server.supervised = redisIsSupervised(server.supervised_mode);
4065cef054e8Santirez     int background = server.daemonize && !server.supervised;
4066cef054e8Santirez     if (background) daemonize();
4067cef054e8Santirez 
4068cef054e8Santirez     initServer();
4069cef054e8Santirez     if (background || server.pidfile) createPidFile();
4070cef054e8Santirez     redisSetProcTitle(argv[0]);
4071cef054e8Santirez     redisAsciiArt();
40724bc42ca7Santirez     checkTcpBacklogSettings();
4073cef054e8Santirez 
4074cef054e8Santirez     if (!server.sentinel_mode) {
4075cef054e8Santirez         /* Things not needed when running in Sentinel mode. */
407632f80e2fSantirez         serverLog(LL_WARNING,"Server started, Redis version " REDIS_VERSION);
4077cef054e8Santirez     #ifdef __linux__
4078cef054e8Santirez         linuxMemoryWarnings();
4079cef054e8Santirez     #endif
4080cef054e8Santirez         loadDataFromDisk();
4081cef054e8Santirez         if (server.cluster_enabled) {
408240eb548aSantirez             if (verifyClusterConfigWithData() == C_ERR) {
408332f80e2fSantirez                 serverLog(LL_WARNING,
4084cef054e8Santirez                     "You can't have keys in a DB different than DB 0 when in "
4085cef054e8Santirez                     "Cluster mode. Exiting.");
4086cef054e8Santirez                 exit(1);
4087cef054e8Santirez             }
4088cef054e8Santirez         }
4089cef054e8Santirez         if (server.ipfd_count > 0)
409032f80e2fSantirez             serverLog(LL_NOTICE,"The server is now ready to accept connections on port %d", server.port);
4091cef054e8Santirez         if (server.sofd > 0)
409232f80e2fSantirez             serverLog(LL_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket);
4093cef054e8Santirez     } else {
4094cef054e8Santirez         sentinelIsRunning();
4095cef054e8Santirez     }
4096cef054e8Santirez 
4097cef054e8Santirez     /* Warning the user about suspicious maxmemory setting. */
4098cef054e8Santirez     if (server.maxmemory > 0 && server.maxmemory < 1024*1024) {
409932f80e2fSantirez         serverLog(LL_WARNING,"WARNING: You specified a maxmemory value that is less than 1MB (current value is %llu bytes). Are you sure this is what you really want?", server.maxmemory);
4100cef054e8Santirez     }
4101cef054e8Santirez 
4102cef054e8Santirez     aeSetBeforeSleepProc(server.el,beforeSleep);
4103cef054e8Santirez     aeMain(server.el);
4104cef054e8Santirez     aeDeleteEventLoop(server.el);
4105cef054e8Santirez     return 0;
4106cef054e8Santirez }
4107cef054e8Santirez 
4108cef054e8Santirez /* The End */
4109