xref: /f-stack/app/redis-5.0.5/src/object.c (revision 572c4311)
1 /* Redis Object implementation.
2  *
3  * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  *   * Redistributions of source code must retain the above copyright notice,
10  *     this list of conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *   * Neither the name of Redis nor the names of its contributors may be used
15  *     to endorse or promote products derived from this software without
16  *     specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #include "server.h"
32 #include <math.h>
33 #include <ctype.h>
34 
35 #ifdef __CYGWIN__
36 #define strtold(a,b) ((long double)strtod((a),(b)))
37 #endif
38 
39 /* ===================== Creation and parsing of objects ==================== */
40 
createObject(int type,void * ptr)41 robj *createObject(int type, void *ptr) {
42     robj *o = zmalloc(sizeof(*o));
43     o->type = type;
44     o->encoding = OBJ_ENCODING_RAW;
45     o->ptr = ptr;
46     o->refcount = 1;
47 
48     /* Set the LRU to the current lruclock (minutes resolution), or
49      * alternatively the LFU counter. */
50     if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
51         o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
52     } else {
53         o->lru = LRU_CLOCK();
54     }
55     return o;
56 }
57 
58 /* Set a special refcount in the object to make it "shared":
59  * incrRefCount and decrRefCount() will test for this special refcount
60  * and will not touch the object. This way it is free to access shared
61  * objects such as small integers from different threads without any
62  * mutex.
63  *
64  * A common patter to create shared objects:
65  *
66  * robj *myobject = makeObjectShared(createObject(...));
67  *
68  */
makeObjectShared(robj * o)69 robj *makeObjectShared(robj *o) {
70     serverAssert(o->refcount == 1);
71     o->refcount = OBJ_SHARED_REFCOUNT;
72     return o;
73 }
74 
75 /* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
76  * string object where o->ptr points to a proper sds string. */
createRawStringObject(const char * ptr,size_t len)77 robj *createRawStringObject(const char *ptr, size_t len) {
78     return createObject(OBJ_STRING, sdsnewlen(ptr,len));
79 }
80 
81 /* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
82  * an object where the sds string is actually an unmodifiable string
83  * allocated in the same chunk as the object itself. */
createEmbeddedStringObject(const char * ptr,size_t len)84 robj *createEmbeddedStringObject(const char *ptr, size_t len) {
85     robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
86     struct sdshdr8 *sh = (void*)(o+1);
87 
88     o->type = OBJ_STRING;
89     o->encoding = OBJ_ENCODING_EMBSTR;
90     o->ptr = sh+1;
91     o->refcount = 1;
92     if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
93         o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
94     } else {
95         o->lru = LRU_CLOCK();
96     }
97 
98     sh->len = len;
99     sh->alloc = len;
100     sh->flags = SDS_TYPE_8;
101     if (ptr == SDS_NOINIT)
102         sh->buf[len] = '\0';
103     else if (ptr) {
104         memcpy(sh->buf,ptr,len);
105         sh->buf[len] = '\0';
106     } else {
107         memset(sh->buf,0,len+1);
108     }
109     return o;
110 }
111 
112 /* Create a string object with EMBSTR encoding if it is smaller than
113  * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
114  * used.
115  *
116  * The current limit of 44 is chosen so that the biggest string object
117  * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
118 #define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
createStringObject(const char * ptr,size_t len)119 robj *createStringObject(const char *ptr, size_t len) {
120     if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
121         return createEmbeddedStringObject(ptr,len);
122     else
123         return createRawStringObject(ptr,len);
124 }
125 
126 /* Create a string object from a long long value. When possible returns a
127  * shared integer object, or at least an integer encoded one.
128  *
129  * If valueobj is non zero, the function avoids returning a a shared
130  * integer, because the object is going to be used as value in the Redis key
131  * space (for instance when the INCR command is used), so we want LFU/LRU
132  * values specific for each key. */
createStringObjectFromLongLongWithOptions(long long value,int valueobj)133 robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
134     robj *o;
135 
136     if (server.maxmemory == 0 ||
137         !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
138     {
139         /* If the maxmemory policy permits, we can still return shared integers
140          * even if valueobj is true. */
141         valueobj = 0;
142     }
143 
144     if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
145         incrRefCount(shared.integers[value]);
146         o = shared.integers[value];
147     } else {
148         if (value >= LONG_MIN && value <= LONG_MAX) {
149             o = createObject(OBJ_STRING, NULL);
150             o->encoding = OBJ_ENCODING_INT;
151             o->ptr = (void*)((long)value);
152         } else {
153             o = createObject(OBJ_STRING,sdsfromlonglong(value));
154         }
155     }
156     return o;
157 }
158 
159 /* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
160  * to create a shared object if possible. */
createStringObjectFromLongLong(long long value)161 robj *createStringObjectFromLongLong(long long value) {
162     return createStringObjectFromLongLongWithOptions(value,0);
163 }
164 
165 /* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
166  * object when LFU/LRU info are needed, that is, when the object is used
167  * as a value in the key space, and Redis is configured to evict based on
168  * LFU/LRU. */
createStringObjectFromLongLongForValue(long long value)169 robj *createStringObjectFromLongLongForValue(long long value) {
170     return createStringObjectFromLongLongWithOptions(value,1);
171 }
172 
173 /* Create a string object from a long double. If humanfriendly is non-zero
174  * it does not use exponential format and trims trailing zeroes at the end,
175  * however this results in loss of precision. Otherwise exp format is used
176  * and the output of snprintf() is not modified.
177  *
178  * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
createStringObjectFromLongDouble(long double value,int humanfriendly)179 robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
180     char buf[MAX_LONG_DOUBLE_CHARS];
181     int len = ld2string(buf,sizeof(buf),value,humanfriendly);
182     return createStringObject(buf,len);
183 }
184 
185 /* Duplicate a string object, with the guarantee that the returned object
186  * has the same encoding as the original one.
187  *
188  * This function also guarantees that duplicating a small integer object
189  * (or a string object that contains a representation of a small integer)
190  * will always result in a fresh object that is unshared (refcount == 1).
191  *
192  * The resulting object always has refcount set to 1. */
dupStringObject(const robj * o)193 robj *dupStringObject(const robj *o) {
194     robj *d;
195 
196     serverAssert(o->type == OBJ_STRING);
197 
198     switch(o->encoding) {
199     case OBJ_ENCODING_RAW:
200         return createRawStringObject(o->ptr,sdslen(o->ptr));
201     case OBJ_ENCODING_EMBSTR:
202         return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
203     case OBJ_ENCODING_INT:
204         d = createObject(OBJ_STRING, NULL);
205         d->encoding = OBJ_ENCODING_INT;
206         d->ptr = o->ptr;
207         return d;
208     default:
209         serverPanic("Wrong encoding.");
210         break;
211     }
212 }
213 
createQuicklistObject(void)214 robj *createQuicklistObject(void) {
215     quicklist *l = quicklistCreate();
216     robj *o = createObject(OBJ_LIST,l);
217     o->encoding = OBJ_ENCODING_QUICKLIST;
218     return o;
219 }
220 
createZiplistObject(void)221 robj *createZiplistObject(void) {
222     unsigned char *zl = ziplistNew();
223     robj *o = createObject(OBJ_LIST,zl);
224     o->encoding = OBJ_ENCODING_ZIPLIST;
225     return o;
226 }
227 
createSetObject(void)228 robj *createSetObject(void) {
229     dict *d = dictCreate(&setDictType,NULL);
230     robj *o = createObject(OBJ_SET,d);
231     o->encoding = OBJ_ENCODING_HT;
232     return o;
233 }
234 
createIntsetObject(void)235 robj *createIntsetObject(void) {
236     intset *is = intsetNew();
237     robj *o = createObject(OBJ_SET,is);
238     o->encoding = OBJ_ENCODING_INTSET;
239     return o;
240 }
241 
createHashObject(void)242 robj *createHashObject(void) {
243     unsigned char *zl = ziplistNew();
244     robj *o = createObject(OBJ_HASH, zl);
245     o->encoding = OBJ_ENCODING_ZIPLIST;
246     return o;
247 }
248 
createZsetObject(void)249 robj *createZsetObject(void) {
250     zset *zs = zmalloc(sizeof(*zs));
251     robj *o;
252 
253     zs->dict = dictCreate(&zsetDictType,NULL);
254     zs->zsl = zslCreate();
255     o = createObject(OBJ_ZSET,zs);
256     o->encoding = OBJ_ENCODING_SKIPLIST;
257     return o;
258 }
259 
createZsetZiplistObject(void)260 robj *createZsetZiplistObject(void) {
261     unsigned char *zl = ziplistNew();
262     robj *o = createObject(OBJ_ZSET,zl);
263     o->encoding = OBJ_ENCODING_ZIPLIST;
264     return o;
265 }
266 
createStreamObject(void)267 robj *createStreamObject(void) {
268     stream *s = streamNew();
269     robj *o = createObject(OBJ_STREAM,s);
270     o->encoding = OBJ_ENCODING_STREAM;
271     return o;
272 }
273 
createModuleObject(moduleType * mt,void * value)274 robj *createModuleObject(moduleType *mt, void *value) {
275     moduleValue *mv = zmalloc(sizeof(*mv));
276     mv->type = mt;
277     mv->value = value;
278     return createObject(OBJ_MODULE,mv);
279 }
280 
freeStringObject(robj * o)281 void freeStringObject(robj *o) {
282     if (o->encoding == OBJ_ENCODING_RAW) {
283         sdsfree(o->ptr);
284     }
285 }
286 
freeListObject(robj * o)287 void freeListObject(robj *o) {
288     if (o->encoding == OBJ_ENCODING_QUICKLIST) {
289         quicklistRelease(o->ptr);
290     } else {
291         serverPanic("Unknown list encoding type");
292     }
293 }
294 
freeSetObject(robj * o)295 void freeSetObject(robj *o) {
296     switch (o->encoding) {
297     case OBJ_ENCODING_HT:
298         dictRelease((dict*) o->ptr);
299         break;
300     case OBJ_ENCODING_INTSET:
301         zfree(o->ptr);
302         break;
303     default:
304         serverPanic("Unknown set encoding type");
305     }
306 }
307 
freeZsetObject(robj * o)308 void freeZsetObject(robj *o) {
309     zset *zs;
310     switch (o->encoding) {
311     case OBJ_ENCODING_SKIPLIST:
312         zs = o->ptr;
313         dictRelease(zs->dict);
314         zslFree(zs->zsl);
315         zfree(zs);
316         break;
317     case OBJ_ENCODING_ZIPLIST:
318         zfree(o->ptr);
319         break;
320     default:
321         serverPanic("Unknown sorted set encoding");
322     }
323 }
324 
freeHashObject(robj * o)325 void freeHashObject(robj *o) {
326     switch (o->encoding) {
327     case OBJ_ENCODING_HT:
328         dictRelease((dict*) o->ptr);
329         break;
330     case OBJ_ENCODING_ZIPLIST:
331         zfree(o->ptr);
332         break;
333     default:
334         serverPanic("Unknown hash encoding type");
335         break;
336     }
337 }
338 
freeModuleObject(robj * o)339 void freeModuleObject(robj *o) {
340     moduleValue *mv = o->ptr;
341     mv->type->free(mv->value);
342     zfree(mv);
343 }
344 
freeStreamObject(robj * o)345 void freeStreamObject(robj *o) {
346     freeStream(o->ptr);
347 }
348 
incrRefCount(robj * o)349 void incrRefCount(robj *o) {
350     if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount++;
351 }
352 
decrRefCount(robj * o)353 void decrRefCount(robj *o) {
354     if (o->refcount == 1) {
355         switch(o->type) {
356         case OBJ_STRING: freeStringObject(o); break;
357         case OBJ_LIST: freeListObject(o); break;
358         case OBJ_SET: freeSetObject(o); break;
359         case OBJ_ZSET: freeZsetObject(o); break;
360         case OBJ_HASH: freeHashObject(o); break;
361         case OBJ_MODULE: freeModuleObject(o); break;
362         case OBJ_STREAM: freeStreamObject(o); break;
363         default: serverPanic("Unknown object type"); break;
364         }
365         zfree(o);
366     } else {
367         if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
368         if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
369     }
370 }
371 
372 /* This variant of decrRefCount() gets its argument as void, and is useful
373  * as free method in data structures that expect a 'void free_object(void*)'
374  * prototype for the free method. */
decrRefCountVoid(void * o)375 void decrRefCountVoid(void *o) {
376     decrRefCount(o);
377 }
378 
379 /* This function set the ref count to zero without freeing the object.
380  * It is useful in order to pass a new object to functions incrementing
381  * the ref count of the received object. Example:
382  *
383  *    functionThatWillIncrementRefCount(resetRefCount(CreateObject(...)));
384  *
385  * Otherwise you need to resort to the less elegant pattern:
386  *
387  *    *obj = createObject(...);
388  *    functionThatWillIncrementRefCount(obj);
389  *    decrRefCount(obj);
390  */
resetRefCount(robj * obj)391 robj *resetRefCount(robj *obj) {
392     obj->refcount = 0;
393     return obj;
394 }
395 
checkType(client * c,robj * o,int type)396 int checkType(client *c, robj *o, int type) {
397     if (o->type != type) {
398         addReply(c,shared.wrongtypeerr);
399         return 1;
400     }
401     return 0;
402 }
403 
isSdsRepresentableAsLongLong(sds s,long long * llval)404 int isSdsRepresentableAsLongLong(sds s, long long *llval) {
405     return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
406 }
407 
isObjectRepresentableAsLongLong(robj * o,long long * llval)408 int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
409     serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
410     if (o->encoding == OBJ_ENCODING_INT) {
411         if (llval) *llval = (long) o->ptr;
412         return C_OK;
413     } else {
414         return isSdsRepresentableAsLongLong(o->ptr,llval);
415     }
416 }
417 
418 /* Optimize the SDS string inside the string object to require little space,
419  * in case there is more than 10% of free space at the end of the SDS
420  * string. This happens because SDS strings tend to overallocate to avoid
421  * wasting too much time in allocations when appending to the string. */
trimStringObjectIfNeeded(robj * o)422 void trimStringObjectIfNeeded(robj *o) {
423     if (o->encoding == OBJ_ENCODING_RAW &&
424         sdsavail(o->ptr) > sdslen(o->ptr)/10)
425     {
426         o->ptr = sdsRemoveFreeSpace(o->ptr);
427     }
428 }
429 
430 /* Try to encode a string object in order to save space */
tryObjectEncoding(robj * o)431 robj *tryObjectEncoding(robj *o) {
432     long value;
433     sds s = o->ptr;
434     size_t len;
435 
436     /* Make sure this is a string object, the only type we encode
437      * in this function. Other types use encoded memory efficient
438      * representations but are handled by the commands implementing
439      * the type. */
440     serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
441 
442     /* We try some specialized encoding only for objects that are
443      * RAW or EMBSTR encoded, in other words objects that are still
444      * in represented by an actually array of chars. */
445     if (!sdsEncodedObject(o)) return o;
446 
447     /* It's not safe to encode shared objects: shared objects can be shared
448      * everywhere in the "object space" of Redis and may end in places where
449      * they are not handled. We handle them only as values in the keyspace. */
450      if (o->refcount > 1) return o;
451 
452     /* Check if we can represent this string as a long integer.
453      * Note that we are sure that a string larger than 20 chars is not
454      * representable as a 32 nor 64 bit integer. */
455     len = sdslen(s);
456     if (len <= 20 && string2l(s,len,&value)) {
457         /* This object is encodable as a long. Try to use a shared object.
458          * Note that we avoid using shared integers when maxmemory is used
459          * because every object needs to have a private LRU field for the LRU
460          * algorithm to work well. */
461         if ((server.maxmemory == 0 ||
462             !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
463             value >= 0 &&
464             value < OBJ_SHARED_INTEGERS)
465         {
466             decrRefCount(o);
467             incrRefCount(shared.integers[value]);
468             return shared.integers[value];
469         } else {
470             if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr);
471             o->encoding = OBJ_ENCODING_INT;
472             o->ptr = (void*) value;
473             return o;
474         }
475     }
476 
477     /* If the string is small and is still RAW encoded,
478      * try the EMBSTR encoding which is more efficient.
479      * In this representation the object and the SDS string are allocated
480      * in the same chunk of memory to save space and cache misses. */
481     if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
482         robj *emb;
483 
484         if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
485         emb = createEmbeddedStringObject(s,sdslen(s));
486         decrRefCount(o);
487         return emb;
488     }
489 
490     /* We can't encode the object...
491      *
492      * Do the last try, and at least optimize the SDS string inside
493      * the string object to require little space, in case there
494      * is more than 10% of free space at the end of the SDS string.
495      *
496      * We do that only for relatively large strings as this branch
497      * is only entered if the length of the string is greater than
498      * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
499     trimStringObjectIfNeeded(o);
500 
501     /* Return the original object. */
502     return o;
503 }
504 
505 /* Get a decoded version of an encoded object (returned as a new object).
506  * If the object is already raw-encoded just increment the ref count. */
getDecodedObject(robj * o)507 robj *getDecodedObject(robj *o) {
508     robj *dec;
509 
510     if (sdsEncodedObject(o)) {
511         incrRefCount(o);
512         return o;
513     }
514     if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
515         char buf[32];
516 
517         ll2string(buf,32,(long)o->ptr);
518         dec = createStringObject(buf,strlen(buf));
519         return dec;
520     } else {
521         serverPanic("Unknown encoding type");
522     }
523 }
524 
525 /* Compare two string objects via strcmp() or strcoll() depending on flags.
526  * Note that the objects may be integer-encoded. In such a case we
527  * use ll2string() to get a string representation of the numbers on the stack
528  * and compare the strings, it's much faster than calling getDecodedObject().
529  *
530  * Important note: when REDIS_COMPARE_BINARY is used a binary-safe comparison
531  * is used. */
532 
533 #define REDIS_COMPARE_BINARY (1<<0)
534 #define REDIS_COMPARE_COLL (1<<1)
535 
compareStringObjectsWithFlags(robj * a,robj * b,int flags)536 int compareStringObjectsWithFlags(robj *a, robj *b, int flags) {
537     serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
538     char bufa[128], bufb[128], *astr, *bstr;
539     size_t alen, blen, minlen;
540 
541     if (a == b) return 0;
542     if (sdsEncodedObject(a)) {
543         astr = a->ptr;
544         alen = sdslen(astr);
545     } else {
546         alen = ll2string(bufa,sizeof(bufa),(long) a->ptr);
547         astr = bufa;
548     }
549     if (sdsEncodedObject(b)) {
550         bstr = b->ptr;
551         blen = sdslen(bstr);
552     } else {
553         blen = ll2string(bufb,sizeof(bufb),(long) b->ptr);
554         bstr = bufb;
555     }
556     if (flags & REDIS_COMPARE_COLL) {
557         return strcoll(astr,bstr);
558     } else {
559         int cmp;
560 
561         minlen = (alen < blen) ? alen : blen;
562         cmp = memcmp(astr,bstr,minlen);
563         if (cmp == 0) return alen-blen;
564         return cmp;
565     }
566 }
567 
568 /* Wrapper for compareStringObjectsWithFlags() using binary comparison. */
compareStringObjects(robj * a,robj * b)569 int compareStringObjects(robj *a, robj *b) {
570     return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_BINARY);
571 }
572 
573 /* Wrapper for compareStringObjectsWithFlags() using collation. */
collateStringObjects(robj * a,robj * b)574 int collateStringObjects(robj *a, robj *b) {
575     return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_COLL);
576 }
577 
578 /* Equal string objects return 1 if the two objects are the same from the
579  * point of view of a string comparison, otherwise 0 is returned. Note that
580  * this function is faster then checking for (compareStringObject(a,b) == 0)
581  * because it can perform some more optimization. */
equalStringObjects(robj * a,robj * b)582 int equalStringObjects(robj *a, robj *b) {
583     if (a->encoding == OBJ_ENCODING_INT &&
584         b->encoding == OBJ_ENCODING_INT){
585         /* If both strings are integer encoded just check if the stored
586          * long is the same. */
587         return a->ptr == b->ptr;
588     } else {
589         return compareStringObjects(a,b) == 0;
590     }
591 }
592 
stringObjectLen(robj * o)593 size_t stringObjectLen(robj *o) {
594     serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
595     if (sdsEncodedObject(o)) {
596         return sdslen(o->ptr);
597     } else {
598         return sdigits10((long)o->ptr);
599     }
600 }
601 
getDoubleFromObject(const robj * o,double * target)602 int getDoubleFromObject(const robj *o, double *target) {
603     double value;
604     char *eptr;
605 
606     if (o == NULL) {
607         value = 0;
608     } else {
609         serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
610         if (sdsEncodedObject(o)) {
611             errno = 0;
612             value = strtod(o->ptr, &eptr);
613             if (sdslen(o->ptr) == 0 ||
614                 isspace(((const char*)o->ptr)[0]) ||
615                 (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) ||
616                 (errno == ERANGE &&
617                     (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
618                 isnan(value))
619                 return C_ERR;
620         } else if (o->encoding == OBJ_ENCODING_INT) {
621             value = (long)o->ptr;
622         } else {
623             serverPanic("Unknown string encoding");
624         }
625     }
626     *target = value;
627     return C_OK;
628 }
629 
getDoubleFromObjectOrReply(client * c,robj * o,double * target,const char * msg)630 int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
631     double value;
632     if (getDoubleFromObject(o, &value) != C_OK) {
633         if (msg != NULL) {
634             addReplyError(c,(char*)msg);
635         } else {
636             addReplyError(c,"value is not a valid float");
637         }
638         return C_ERR;
639     }
640     *target = value;
641     return C_OK;
642 }
643 
getLongDoubleFromObject(robj * o,long double * target)644 int getLongDoubleFromObject(robj *o, long double *target) {
645     long double value;
646     char *eptr;
647 
648     if (o == NULL) {
649         value = 0;
650     } else {
651         serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
652         if (sdsEncodedObject(o)) {
653             errno = 0;
654             value = strtold(o->ptr, &eptr);
655             if (sdslen(o->ptr) == 0 ||
656                 isspace(((const char*)o->ptr)[0]) ||
657                 (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) ||
658                 (errno == ERANGE &&
659                     (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
660                 isnan(value))
661                 return C_ERR;
662         } else if (o->encoding == OBJ_ENCODING_INT) {
663             value = (long)o->ptr;
664         } else {
665             serverPanic("Unknown string encoding");
666         }
667     }
668     *target = value;
669     return C_OK;
670 }
671 
getLongDoubleFromObjectOrReply(client * c,robj * o,long double * target,const char * msg)672 int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
673     long double value;
674     if (getLongDoubleFromObject(o, &value) != C_OK) {
675         if (msg != NULL) {
676             addReplyError(c,(char*)msg);
677         } else {
678             addReplyError(c,"value is not a valid float");
679         }
680         return C_ERR;
681     }
682     *target = value;
683     return C_OK;
684 }
685 
getLongLongFromObject(robj * o,long long * target)686 int getLongLongFromObject(robj *o, long long *target) {
687     long long value;
688 
689     if (o == NULL) {
690         value = 0;
691     } else {
692         serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
693         if (sdsEncodedObject(o)) {
694             if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
695         } else if (o->encoding == OBJ_ENCODING_INT) {
696             value = (long)o->ptr;
697         } else {
698             serverPanic("Unknown string encoding");
699         }
700     }
701     if (target) *target = value;
702     return C_OK;
703 }
704 
getLongLongFromObjectOrReply(client * c,robj * o,long long * target,const char * msg)705 int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
706     long long value;
707     if (getLongLongFromObject(o, &value) != C_OK) {
708         if (msg != NULL) {
709             addReplyError(c,(char*)msg);
710         } else {
711             addReplyError(c,"value is not an integer or out of range");
712         }
713         return C_ERR;
714     }
715     *target = value;
716     return C_OK;
717 }
718 
getLongFromObjectOrReply(client * c,robj * o,long * target,const char * msg)719 int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
720     long long value;
721 
722     if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
723     if (value < LONG_MIN || value > LONG_MAX) {
724         if (msg != NULL) {
725             addReplyError(c,(char*)msg);
726         } else {
727             addReplyError(c,"value is out of range");
728         }
729         return C_ERR;
730     }
731     *target = value;
732     return C_OK;
733 }
734 
strEncoding(int encoding)735 char *strEncoding(int encoding) {
736     switch(encoding) {
737     case OBJ_ENCODING_RAW: return "raw";
738     case OBJ_ENCODING_INT: return "int";
739     case OBJ_ENCODING_HT: return "hashtable";
740     case OBJ_ENCODING_QUICKLIST: return "quicklist";
741     case OBJ_ENCODING_ZIPLIST: return "ziplist";
742     case OBJ_ENCODING_INTSET: return "intset";
743     case OBJ_ENCODING_SKIPLIST: return "skiplist";
744     case OBJ_ENCODING_EMBSTR: return "embstr";
745     default: return "unknown";
746     }
747 }
748 
749 /* =========================== Memory introspection ========================= */
750 
751 
752 /* This is an helper function with the goal of estimating the memory
753  * size of a radix tree that is used to store Stream IDs.
754  *
755  * Note: to guess the size of the radix tree is not trivial, so we
756  * approximate it considering 16 bytes of data overhead for each
757  * key (the ID), and then adding the number of bare nodes, plus some
758  * overhead due by the data and child pointers. This secret recipe
759  * was obtained by checking the average radix tree created by real
760  * workloads, and then adjusting the constants to get numbers that
761  * more or less match the real memory usage.
762  *
763  * Actually the number of nodes and keys may be different depending
764  * on the insertion speed and thus the ability of the radix tree
765  * to compress prefixes. */
streamRadixTreeMemoryUsage(rax * rax)766 size_t streamRadixTreeMemoryUsage(rax *rax) {
767     size_t size;
768     size = rax->numele * sizeof(streamID);
769     size += rax->numnodes * sizeof(raxNode);
770     /* Add a fixed overhead due to the aux data pointer, children, ... */
771     size += rax->numnodes * sizeof(long)*30;
772     return size;
773 }
774 
775 /* Returns the size in bytes consumed by the key's value in RAM.
776  * Note that the returned value is just an approximation, especially in the
777  * case of aggregated data types where only "sample_size" elements
778  * are checked and averaged to estimate the total size. */
779 #define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
objectComputeSize(robj * o,size_t sample_size)780 size_t objectComputeSize(robj *o, size_t sample_size) {
781     sds ele, ele2;
782     dict *d;
783     dictIterator *di;
784     struct dictEntry *de;
785     size_t asize = 0, elesize = 0, samples = 0;
786 
787     if (o->type == OBJ_STRING) {
788         if(o->encoding == OBJ_ENCODING_INT) {
789             asize = sizeof(*o);
790         } else if(o->encoding == OBJ_ENCODING_RAW) {
791             asize = sdsAllocSize(o->ptr)+sizeof(*o);
792         } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
793             asize = sdslen(o->ptr)+2+sizeof(*o);
794         } else {
795             serverPanic("Unknown string encoding");
796         }
797     } else if (o->type == OBJ_LIST) {
798         if (o->encoding == OBJ_ENCODING_QUICKLIST) {
799             quicklist *ql = o->ptr;
800             quicklistNode *node = ql->head;
801             asize = sizeof(*o)+sizeof(quicklist);
802             do {
803                 elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl);
804                 samples++;
805             } while ((node = node->next) && samples < sample_size);
806             asize += (double)elesize/samples*ql->len;
807         } else if (o->encoding == OBJ_ENCODING_ZIPLIST) {
808             asize = sizeof(*o)+ziplistBlobLen(o->ptr);
809         } else {
810             serverPanic("Unknown list encoding");
811         }
812     } else if (o->type == OBJ_SET) {
813         if (o->encoding == OBJ_ENCODING_HT) {
814             d = o->ptr;
815             di = dictGetIterator(d);
816             asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
817             while((de = dictNext(di)) != NULL && samples < sample_size) {
818                 ele = dictGetKey(de);
819                 elesize += sizeof(struct dictEntry) + sdsAllocSize(ele);
820                 samples++;
821             }
822             dictReleaseIterator(di);
823             if (samples) asize += (double)elesize/samples*dictSize(d);
824         } else if (o->encoding == OBJ_ENCODING_INTSET) {
825             intset *is = o->ptr;
826             asize = sizeof(*o)+sizeof(*is)+is->encoding*is->length;
827         } else {
828             serverPanic("Unknown set encoding");
829         }
830     } else if (o->type == OBJ_ZSET) {
831         if (o->encoding == OBJ_ENCODING_ZIPLIST) {
832             asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
833         } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
834             d = ((zset*)o->ptr)->dict;
835             zskiplist *zsl = ((zset*)o->ptr)->zsl;
836             zskiplistNode *znode = zsl->header->level[0].forward;
837             asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d));
838             while(znode != NULL && samples < sample_size) {
839                 elesize += sdsAllocSize(znode->ele);
840                 elesize += sizeof(struct dictEntry) + zmalloc_size(znode);
841                 samples++;
842                 znode = znode->level[0].forward;
843             }
844             if (samples) asize += (double)elesize/samples*dictSize(d);
845         } else {
846             serverPanic("Unknown sorted set encoding");
847         }
848     } else if (o->type == OBJ_HASH) {
849         if (o->encoding == OBJ_ENCODING_ZIPLIST) {
850             asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
851         } else if (o->encoding == OBJ_ENCODING_HT) {
852             d = o->ptr;
853             di = dictGetIterator(d);
854             asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
855             while((de = dictNext(di)) != NULL && samples < sample_size) {
856                 ele = dictGetKey(de);
857                 ele2 = dictGetVal(de);
858                 elesize += sdsAllocSize(ele) + sdsAllocSize(ele2);
859                 elesize += sizeof(struct dictEntry);
860                 samples++;
861             }
862             dictReleaseIterator(di);
863             if (samples) asize += (double)elesize/samples*dictSize(d);
864         } else {
865             serverPanic("Unknown hash encoding");
866         }
867     } else if (o->type == OBJ_STREAM) {
868         stream *s = o->ptr;
869         asize = sizeof(*o);
870         asize += streamRadixTreeMemoryUsage(s->rax);
871 
872         /* Now we have to add the listpacks. The last listpack is often non
873          * complete, so we estimate the size of the first N listpacks, and
874          * use the average to compute the size of the first N-1 listpacks, and
875          * finally add the real size of the last node. */
876         raxIterator ri;
877         raxStart(&ri,s->rax);
878         raxSeek(&ri,"^",NULL,0);
879         size_t lpsize = 0, samples = 0;
880         while(samples < sample_size && raxNext(&ri)) {
881             unsigned char *lp = ri.data;
882             lpsize += lpBytes(lp);
883             samples++;
884         }
885         if (s->rax->numele <= samples) {
886             asize += lpsize;
887         } else {
888             if (samples) lpsize /= samples; /* Compute the average. */
889             asize += lpsize * (s->rax->numele-1);
890             /* No need to check if seek succeeded, we enter this branch only
891              * if there are a few elements in the radix tree. */
892             raxSeek(&ri,"$",NULL,0);
893             raxNext(&ri);
894             asize += lpBytes(ri.data);
895         }
896         raxStop(&ri);
897 
898         /* Consumer groups also have a non trivial memory overhead if there
899          * are many consumers and many groups, let's count at least the
900          * overhead of the pending entries in the groups and consumers
901          * PELs. */
902         if (s->cgroups) {
903             raxStart(&ri,s->cgroups);
904             raxSeek(&ri,"^",NULL,0);
905             while(raxNext(&ri)) {
906                 streamCG *cg = ri.data;
907                 asize += sizeof(*cg);
908                 asize += streamRadixTreeMemoryUsage(cg->pel);
909                 asize += sizeof(streamNACK)*raxSize(cg->pel);
910 
911                 /* For each consumer we also need to add the basic data
912                  * structures and the PEL memory usage. */
913                 raxIterator cri;
914                 raxStart(&cri,cg->consumers);
915                 raxSeek(&cri,"^",NULL,0);
916                 while(raxNext(&cri)) {
917                     streamConsumer *consumer = cri.data;
918                     asize += sizeof(*consumer);
919                     asize += sdslen(consumer->name);
920                     asize += streamRadixTreeMemoryUsage(consumer->pel);
921                     /* Don't count NACKs again, they are shared with the
922                      * consumer group PEL. */
923                 }
924                 raxStop(&cri);
925             }
926             raxStop(&ri);
927         }
928     } else if (o->type == OBJ_MODULE) {
929         moduleValue *mv = o->ptr;
930         moduleType *mt = mv->type;
931         if (mt->mem_usage != NULL) {
932             asize = mt->mem_usage(mv->value);
933         } else {
934             asize = 0;
935         }
936     } else {
937         serverPanic("Unknown object type");
938     }
939     return asize;
940 }
941 
942 /* Release data obtained with getMemoryOverheadData(). */
freeMemoryOverheadData(struct redisMemOverhead * mh)943 void freeMemoryOverheadData(struct redisMemOverhead *mh) {
944     zfree(mh->db);
945     zfree(mh);
946 }
947 
948 /* Return a struct redisMemOverhead filled with memory overhead
949  * information used for the MEMORY OVERHEAD and INFO command. The returned
950  * structure pointer should be freed calling freeMemoryOverheadData(). */
getMemoryOverheadData(void)951 struct redisMemOverhead *getMemoryOverheadData(void) {
952     int j;
953     size_t mem_total = 0;
954     size_t mem = 0;
955     size_t zmalloc_used = zmalloc_used_memory();
956     struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
957 
958     mh->total_allocated = zmalloc_used;
959     mh->startup_allocated = server.initial_memory_usage;
960     mh->peak_allocated = server.stat_peak_memory;
961     mh->total_frag =
962         (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
963     mh->total_frag_bytes =
964         server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
965     mh->allocator_frag =
966         (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
967     mh->allocator_frag_bytes =
968         server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
969     mh->allocator_rss =
970         (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
971     mh->allocator_rss_bytes =
972         server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
973     mh->rss_extra =
974         (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
975     mh->rss_extra_bytes =
976         server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
977 
978     mem_total += server.initial_memory_usage;
979 
980     mem = 0;
981     if (server.repl_backlog)
982         mem += zmalloc_size(server.repl_backlog);
983     mh->repl_backlog = mem;
984     mem_total += mem;
985 
986     mem = 0;
987     if (listLength(server.slaves)) {
988         listIter li;
989         listNode *ln;
990 
991         listRewind(server.slaves,&li);
992         while((ln = listNext(&li))) {
993             client *c = listNodeValue(ln);
994             mem += getClientOutputBufferMemoryUsage(c);
995             mem += sdsAllocSize(c->querybuf);
996             mem += sizeof(client);
997         }
998     }
999     mh->clients_slaves = mem;
1000     mem_total+=mem;
1001 
1002     mem = 0;
1003     if (listLength(server.clients)) {
1004         listIter li;
1005         listNode *ln;
1006 
1007         listRewind(server.clients,&li);
1008         while((ln = listNext(&li))) {
1009             client *c = listNodeValue(ln);
1010             if (c->flags & CLIENT_SLAVE && !(c->flags & CLIENT_MONITOR))
1011                 continue;
1012             mem += getClientOutputBufferMemoryUsage(c);
1013             mem += sdsAllocSize(c->querybuf);
1014             mem += sizeof(client);
1015         }
1016     }
1017     mh->clients_normal = mem;
1018     mem_total+=mem;
1019 
1020     mem = 0;
1021     if (server.aof_state != AOF_OFF) {
1022         mem += sdsalloc(server.aof_buf);
1023         mem += aofRewriteBufferSize();
1024     }
1025     mh->aof_buffer = mem;
1026     mem_total+=mem;
1027 
1028     mem = server.lua_scripts_mem;
1029     mem += dictSize(server.lua_scripts) * sizeof(dictEntry) +
1030         dictSlots(server.lua_scripts) * sizeof(dictEntry*);
1031     mem += dictSize(server.repl_scriptcache_dict) * sizeof(dictEntry) +
1032         dictSlots(server.repl_scriptcache_dict) * sizeof(dictEntry*);
1033     if (listLength(server.repl_scriptcache_fifo) > 0) {
1034         mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
1035             sdsZmallocSize(listNodeValue(listFirst(server.repl_scriptcache_fifo))));
1036     }
1037     mh->lua_caches = mem;
1038     mem_total+=mem;
1039 
1040     for (j = 0; j < server.dbnum; j++) {
1041         redisDb *db = server.db+j;
1042         long long keyscount = dictSize(db->dict);
1043         if (keyscount==0) continue;
1044 
1045         mh->total_keys += keyscount;
1046         mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
1047         mh->db[mh->num_dbs].dbid = j;
1048 
1049         mem = dictSize(db->dict) * sizeof(dictEntry) +
1050               dictSlots(db->dict) * sizeof(dictEntry*) +
1051               dictSize(db->dict) * sizeof(robj);
1052         mh->db[mh->num_dbs].overhead_ht_main = mem;
1053         mem_total+=mem;
1054 
1055         mem = dictSize(db->expires) * sizeof(dictEntry) +
1056               dictSlots(db->expires) * sizeof(dictEntry*);
1057         mh->db[mh->num_dbs].overhead_ht_expires = mem;
1058         mem_total+=mem;
1059 
1060         mh->num_dbs++;
1061     }
1062 
1063     mh->overhead_total = mem_total;
1064     mh->dataset = zmalloc_used - mem_total;
1065     mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
1066 
1067     /* Metrics computed after subtracting the startup memory from
1068      * the total memory. */
1069     size_t net_usage = 1;
1070     if (zmalloc_used > mh->startup_allocated)
1071         net_usage = zmalloc_used - mh->startup_allocated;
1072     mh->dataset_perc = (float)mh->dataset*100/net_usage;
1073     mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
1074 
1075     return mh;
1076 }
1077 
1078 /* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
1079  * stats output. */
inputCatSds(void * result,const char * str)1080 void inputCatSds(void *result, const char *str) {
1081     /* result is actually a (sds *), so re-cast it here */
1082     sds *info = (sds *)result;
1083     *info = sdscat(*info, str);
1084 }
1085 
1086 /* This implements MEMORY DOCTOR. An human readable analysis of the Redis
1087  * memory condition. */
getMemoryDoctorReport(void)1088 sds getMemoryDoctorReport(void) {
1089     int empty = 0;          /* Instance is empty or almost empty. */
1090     int big_peak = 0;       /* Memory peak is much larger than used mem. */
1091     int high_frag = 0;      /* High fragmentation. */
1092     int high_alloc_frag = 0;/* High allocator fragmentation. */
1093     int high_proc_rss = 0;  /* High process rss overhead. */
1094     int high_alloc_rss = 0; /* High rss overhead. */
1095     int big_slave_buf = 0;  /* Slave buffers are too big. */
1096     int big_client_buf = 0; /* Client buffers are too big. */
1097     int many_scripts = 0;   /* Script cache has too many scripts. */
1098     int num_reports = 0;
1099     struct redisMemOverhead *mh = getMemoryOverheadData();
1100 
1101     if (mh->total_allocated < (1024*1024*5)) {
1102         empty = 1;
1103         num_reports++;
1104     } else {
1105         /* Peak is > 150% of current used memory? */
1106         if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
1107             big_peak = 1;
1108             num_reports++;
1109         }
1110 
1111         /* Fragmentation is higher than 1.4 and 10MB ?*/
1112         if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
1113             high_frag = 1;
1114             num_reports++;
1115         }
1116 
1117         /* External fragmentation is higher than 1.1 and 10MB? */
1118         if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
1119             high_alloc_frag = 1;
1120             num_reports++;
1121         }
1122 
1123         /* Allocator fss is higher than 1.1 and 10MB ? */
1124         if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
1125             high_alloc_rss = 1;
1126             num_reports++;
1127         }
1128 
1129         /* Non-Allocator fss is higher than 1.1 and 10MB ? */
1130         if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
1131             high_proc_rss = 1;
1132             num_reports++;
1133         }
1134 
1135         /* Clients using more than 200k each average? */
1136         long numslaves = listLength(server.slaves);
1137         long numclients = listLength(server.clients)-numslaves;
1138         if (mh->clients_normal / numclients > (1024*200)) {
1139             big_client_buf = 1;
1140             num_reports++;
1141         }
1142 
1143         /* Slaves using more than 10 MB each? */
1144         if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) {
1145             big_slave_buf = 1;
1146             num_reports++;
1147         }
1148 
1149         /* Too many scripts are cached? */
1150         if (dictSize(server.lua_scripts) > 1000) {
1151             many_scripts = 1;
1152             num_reports++;
1153         }
1154     }
1155 
1156     sds s;
1157     if (num_reports == 0) {
1158         s = sdsnew(
1159         "Hi Sam, I can't find any memory issue in your instance. "
1160         "I can only account for what occurs on this base.\n");
1161     } else if (empty == 1) {
1162         s = sdsnew(
1163         "Hi Sam, this instance is empty or is using very little memory, "
1164         "my issues detector can't be used in these conditions. "
1165         "Please, leave for your mission on Earth and fill it with some data. "
1166         "The new Sam and I will be back to our programming as soon as I "
1167         "finished rebooting.\n");
1168     } else {
1169         s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
1170         if (big_peak) {
1171             s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
1172         }
1173         if (high_frag) {
1174             s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
1175         }
1176         if (high_alloc_frag) {
1177             s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
1178         }
1179         if (high_alloc_rss) {
1180             s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
1181         }
1182         if (high_proc_rss) {
1183             s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
1184         }
1185         if (big_slave_buf) {
1186             s = sdscat(s," * Big replica buffers: The replica output buffers in this instance are greater than 10MB for each replica (on average). This likely means that there is some replica instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what replica is not receiving data correctly and why. You can use the INFO output in order to check the replicas delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
1187         }
1188         if (big_client_buf) {
1189             s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
1190         }
1191         if (many_scripts) {
1192             s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
1193         }
1194         s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
1195     }
1196     freeMemoryOverheadData(mh);
1197     return s;
1198 }
1199 
1200 /* Set the object LRU/LFU depending on server.maxmemory_policy.
1201  * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
1202  * The lru_idle and lru_clock args are only relevant if policy
1203  * is MAXMEMORY_FLAG_LRU.
1204  * Either or both of them may be <0, in that case, nothing is set. */
objectSetLRUOrLFU(robj * val,long long lfu_freq,long long lru_idle,long long lru_clock)1205 void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
1206                        long long lru_clock) {
1207     if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1208         if (lfu_freq >= 0) {
1209             serverAssert(lfu_freq <= 255);
1210             val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
1211         }
1212     } else if (lru_idle >= 0) {
1213         /* Provided LRU idle time is in seconds. Scale
1214          * according to the LRU clock resolution this Redis
1215          * instance was compiled with (normally 1000 ms, so the
1216          * below statement will expand to lru_idle*1000/1000. */
1217         lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
1218         long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
1219         /* If the LRU field underflows (since LRU it is a wrapping
1220          * clock), the best we can do is to provide a large enough LRU
1221          * that is half-way in the circlular LRU clock we use: this way
1222          * the computed idle time for this object will stay high for quite
1223          * some time. */
1224         if (lru_abs < 0)
1225             lru_abs = (lru_clock+(LRU_CLOCK_MAX/2)) % LRU_CLOCK_MAX;
1226         val->lru = lru_abs;
1227     }
1228 }
1229 
1230 /* ======================= The OBJECT and MEMORY commands =================== */
1231 
1232 /* This is a helper function for the OBJECT command. We need to lookup keys
1233  * without any modification of LRU or other parameters. */
objectCommandLookup(client * c,robj * key)1234 robj *objectCommandLookup(client *c, robj *key) {
1235     dictEntry *de;
1236 
1237     if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL;
1238     return (robj*) dictGetVal(de);
1239 }
1240 
objectCommandLookupOrReply(client * c,robj * key,robj * reply)1241 robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
1242     robj *o = objectCommandLookup(c,key);
1243 
1244     if (!o) addReply(c, reply);
1245     return o;
1246 }
1247 
1248 /* Object command allows to inspect the internals of an Redis Object.
1249  * Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
objectCommand(client * c)1250 void objectCommand(client *c) {
1251     robj *o;
1252 
1253     if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
1254         const char *help[] = {
1255 "ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
1256 "FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
1257 "IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
1258 "REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
1259 NULL
1260         };
1261         addReplyHelp(c, help);
1262     } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
1263         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1264                 == NULL) return;
1265         addReplyLongLong(c,o->refcount);
1266     } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) {
1267         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1268                 == NULL) return;
1269         addReplyBulkCString(c,strEncoding(o->encoding));
1270     } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
1271         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1272                 == NULL) return;
1273         if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1274             addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1275             return;
1276         }
1277         addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
1278     } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
1279         if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1280                 == NULL) return;
1281         if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
1282             addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1283             return;
1284         }
1285         /* LFUDecrAndReturn should be called
1286          * in case of the key has not been accessed for a long time,
1287          * because we update the access time only
1288          * when the key is read or overwritten. */
1289         addReplyLongLong(c,LFUDecrAndReturn(o));
1290     } else {
1291         addReplySubcommandSyntaxError(c);
1292     }
1293 }
1294 
1295 /* The memory command will eventually be a complete interface for the
1296  * memory introspection capabilities of Redis.
1297  *
1298  * Usage: MEMORY usage <key> */
memoryCommand(client * c)1299 void memoryCommand(client *c) {
1300     if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
1301         const char *help[] = {
1302 "DOCTOR - Return memory problems reports.",
1303 "MALLOC-STATS -- Return internal statistics report from the memory allocator.",
1304 "PURGE -- Attempt to purge dirty pages for reclamation by the allocator.",
1305 "STATS -- Return information about the memory usage of the server.",
1306 "USAGE <key> [SAMPLES <count>] -- Return memory in bytes used by <key> and its value. Nested values are sampled up to <count> times (default: 5).",
1307 NULL
1308         };
1309         addReplyHelp(c, help);
1310     } else if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
1311         dictEntry *de;
1312         long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
1313         for (int j = 3; j < c->argc; j++) {
1314             if (!strcasecmp(c->argv[j]->ptr,"samples") &&
1315                 j+1 < c->argc)
1316             {
1317                 if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
1318                      == C_ERR) return;
1319                 if (samples < 0) {
1320                     addReply(c,shared.syntaxerr);
1321                     return;
1322                 }
1323                 if (samples == 0) samples = LLONG_MAX;;
1324                 j++; /* skip option argument. */
1325             } else {
1326                 addReply(c,shared.syntaxerr);
1327                 return;
1328             }
1329         }
1330         if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
1331             addReply(c, shared.nullbulk);
1332             return;
1333         }
1334         size_t usage = objectComputeSize(dictGetVal(de),samples);
1335         usage += sdsAllocSize(dictGetKey(de));
1336         usage += sizeof(dictEntry);
1337         addReplyLongLong(c,usage);
1338     } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
1339         struct redisMemOverhead *mh = getMemoryOverheadData();
1340 
1341         addReplyMultiBulkLen(c,(25+mh->num_dbs)*2);
1342 
1343         addReplyBulkCString(c,"peak.allocated");
1344         addReplyLongLong(c,mh->peak_allocated);
1345 
1346         addReplyBulkCString(c,"total.allocated");
1347         addReplyLongLong(c,mh->total_allocated);
1348 
1349         addReplyBulkCString(c,"startup.allocated");
1350         addReplyLongLong(c,mh->startup_allocated);
1351 
1352         addReplyBulkCString(c,"replication.backlog");
1353         addReplyLongLong(c,mh->repl_backlog);
1354 
1355         addReplyBulkCString(c,"clients.slaves");
1356         addReplyLongLong(c,mh->clients_slaves);
1357 
1358         addReplyBulkCString(c,"clients.normal");
1359         addReplyLongLong(c,mh->clients_normal);
1360 
1361         addReplyBulkCString(c,"aof.buffer");
1362         addReplyLongLong(c,mh->aof_buffer);
1363 
1364         addReplyBulkCString(c,"lua.caches");
1365         addReplyLongLong(c,mh->lua_caches);
1366 
1367         for (size_t j = 0; j < mh->num_dbs; j++) {
1368             char dbname[32];
1369             snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
1370             addReplyBulkCString(c,dbname);
1371             addReplyMultiBulkLen(c,4);
1372 
1373             addReplyBulkCString(c,"overhead.hashtable.main");
1374             addReplyLongLong(c,mh->db[j].overhead_ht_main);
1375 
1376             addReplyBulkCString(c,"overhead.hashtable.expires");
1377             addReplyLongLong(c,mh->db[j].overhead_ht_expires);
1378         }
1379 
1380         addReplyBulkCString(c,"overhead.total");
1381         addReplyLongLong(c,mh->overhead_total);
1382 
1383         addReplyBulkCString(c,"keys.count");
1384         addReplyLongLong(c,mh->total_keys);
1385 
1386         addReplyBulkCString(c,"keys.bytes-per-key");
1387         addReplyLongLong(c,mh->bytes_per_key);
1388 
1389         addReplyBulkCString(c,"dataset.bytes");
1390         addReplyLongLong(c,mh->dataset);
1391 
1392         addReplyBulkCString(c,"dataset.percentage");
1393         addReplyDouble(c,mh->dataset_perc);
1394 
1395         addReplyBulkCString(c,"peak.percentage");
1396         addReplyDouble(c,mh->peak_perc);
1397 
1398         addReplyBulkCString(c,"allocator.allocated");
1399         addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
1400 
1401         addReplyBulkCString(c,"allocator.active");
1402         addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
1403 
1404         addReplyBulkCString(c,"allocator.resident");
1405         addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
1406 
1407         addReplyBulkCString(c,"allocator-fragmentation.ratio");
1408         addReplyDouble(c,mh->allocator_frag);
1409 
1410         addReplyBulkCString(c,"allocator-fragmentation.bytes");
1411         addReplyLongLong(c,mh->allocator_frag_bytes);
1412 
1413         addReplyBulkCString(c,"allocator-rss.ratio");
1414         addReplyDouble(c,mh->allocator_rss);
1415 
1416         addReplyBulkCString(c,"allocator-rss.bytes");
1417         addReplyLongLong(c,mh->allocator_rss_bytes);
1418 
1419         addReplyBulkCString(c,"rss-overhead.ratio");
1420         addReplyDouble(c,mh->rss_extra);
1421 
1422         addReplyBulkCString(c,"rss-overhead.bytes");
1423         addReplyLongLong(c,mh->rss_extra_bytes);
1424 
1425         addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
1426         addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
1427 
1428         addReplyBulkCString(c,"fragmentation.bytes");
1429         addReplyLongLong(c,mh->total_frag_bytes);
1430 
1431         freeMemoryOverheadData(mh);
1432     } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
1433 #if defined(USE_JEMALLOC)
1434         sds info = sdsempty();
1435         je_malloc_stats_print(inputCatSds, &info, NULL);
1436         addReplyBulkSds(c, info);
1437 #else
1438         addReplyBulkCString(c,"Stats not supported for the current allocator");
1439 #endif
1440     } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
1441         sds report = getMemoryDoctorReport();
1442         addReplyBulkSds(c,report);
1443     } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
1444 #if defined(USE_JEMALLOC)
1445         char tmp[32];
1446         unsigned narenas = 0;
1447         size_t sz = sizeof(unsigned);
1448         if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
1449             sprintf(tmp, "arena.%d.purge", narenas);
1450             if (!je_mallctl(tmp, NULL, 0, NULL, 0)) {
1451                 addReply(c, shared.ok);
1452                 return;
1453             }
1454         }
1455         addReplyError(c, "Error purging dirty pages");
1456 #else
1457         addReply(c, shared.ok);
1458         /* Nothing to do for other allocators. */
1459 #endif
1460     } else {
1461         addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try MEMORY HELP", (char*)c->argv[1]->ptr);
1462     }
1463 }
1464