1 /* Redis Object implementation.
2 *
3 * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * * Neither the name of Redis nor the names of its contributors may be used
15 * to endorse or promote products derived from this software without
16 * specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include "server.h"
32 #include <math.h>
33 #include <ctype.h>
34
35 #ifdef __CYGWIN__
36 #define strtold(a,b) ((long double)strtod((a),(b)))
37 #endif
38
39 /* ===================== Creation and parsing of objects ==================== */
40
createObject(int type,void * ptr)41 robj *createObject(int type, void *ptr) {
42 robj *o = zmalloc(sizeof(*o));
43 o->type = type;
44 o->encoding = OBJ_ENCODING_RAW;
45 o->ptr = ptr;
46 o->refcount = 1;
47
48 /* Set the LRU to the current lruclock (minutes resolution), or
49 * alternatively the LFU counter. */
50 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
51 o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
52 } else {
53 o->lru = LRU_CLOCK();
54 }
55 return o;
56 }
57
58 /* Set a special refcount in the object to make it "shared":
59 * incrRefCount and decrRefCount() will test for this special refcount
60 * and will not touch the object. This way it is free to access shared
61 * objects such as small integers from different threads without any
62 * mutex.
63 *
64 * A common patter to create shared objects:
65 *
66 * robj *myobject = makeObjectShared(createObject(...));
67 *
68 */
makeObjectShared(robj * o)69 robj *makeObjectShared(robj *o) {
70 serverAssert(o->refcount == 1);
71 o->refcount = OBJ_SHARED_REFCOUNT;
72 return o;
73 }
74
75 /* Create a string object with encoding OBJ_ENCODING_RAW, that is a plain
76 * string object where o->ptr points to a proper sds string. */
createRawStringObject(const char * ptr,size_t len)77 robj *createRawStringObject(const char *ptr, size_t len) {
78 return createObject(OBJ_STRING, sdsnewlen(ptr,len));
79 }
80
81 /* Create a string object with encoding OBJ_ENCODING_EMBSTR, that is
82 * an object where the sds string is actually an unmodifiable string
83 * allocated in the same chunk as the object itself. */
createEmbeddedStringObject(const char * ptr,size_t len)84 robj *createEmbeddedStringObject(const char *ptr, size_t len) {
85 robj *o = zmalloc(sizeof(robj)+sizeof(struct sdshdr8)+len+1);
86 struct sdshdr8 *sh = (void*)(o+1);
87
88 o->type = OBJ_STRING;
89 o->encoding = OBJ_ENCODING_EMBSTR;
90 o->ptr = sh+1;
91 o->refcount = 1;
92 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
93 o->lru = (LFUGetTimeInMinutes()<<8) | LFU_INIT_VAL;
94 } else {
95 o->lru = LRU_CLOCK();
96 }
97
98 sh->len = len;
99 sh->alloc = len;
100 sh->flags = SDS_TYPE_8;
101 if (ptr == SDS_NOINIT)
102 sh->buf[len] = '\0';
103 else if (ptr) {
104 memcpy(sh->buf,ptr,len);
105 sh->buf[len] = '\0';
106 } else {
107 memset(sh->buf,0,len+1);
108 }
109 return o;
110 }
111
112 /* Create a string object with EMBSTR encoding if it is smaller than
113 * OBJ_ENCODING_EMBSTR_SIZE_LIMIT, otherwise the RAW encoding is
114 * used.
115 *
116 * The current limit of 44 is chosen so that the biggest string object
117 * we allocate as EMBSTR will still fit into the 64 byte arena of jemalloc. */
118 #define OBJ_ENCODING_EMBSTR_SIZE_LIMIT 44
createStringObject(const char * ptr,size_t len)119 robj *createStringObject(const char *ptr, size_t len) {
120 if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT)
121 return createEmbeddedStringObject(ptr,len);
122 else
123 return createRawStringObject(ptr,len);
124 }
125
126 /* Create a string object from a long long value. When possible returns a
127 * shared integer object, or at least an integer encoded one.
128 *
129 * If valueobj is non zero, the function avoids returning a a shared
130 * integer, because the object is going to be used as value in the Redis key
131 * space (for instance when the INCR command is used), so we want LFU/LRU
132 * values specific for each key. */
createStringObjectFromLongLongWithOptions(long long value,int valueobj)133 robj *createStringObjectFromLongLongWithOptions(long long value, int valueobj) {
134 robj *o;
135
136 if (server.maxmemory == 0 ||
137 !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS))
138 {
139 /* If the maxmemory policy permits, we can still return shared integers
140 * even if valueobj is true. */
141 valueobj = 0;
142 }
143
144 if (value >= 0 && value < OBJ_SHARED_INTEGERS && valueobj == 0) {
145 incrRefCount(shared.integers[value]);
146 o = shared.integers[value];
147 } else {
148 if (value >= LONG_MIN && value <= LONG_MAX) {
149 o = createObject(OBJ_STRING, NULL);
150 o->encoding = OBJ_ENCODING_INT;
151 o->ptr = (void*)((long)value);
152 } else {
153 o = createObject(OBJ_STRING,sdsfromlonglong(value));
154 }
155 }
156 return o;
157 }
158
159 /* Wrapper for createStringObjectFromLongLongWithOptions() always demanding
160 * to create a shared object if possible. */
createStringObjectFromLongLong(long long value)161 robj *createStringObjectFromLongLong(long long value) {
162 return createStringObjectFromLongLongWithOptions(value,0);
163 }
164
165 /* Wrapper for createStringObjectFromLongLongWithOptions() avoiding a shared
166 * object when LFU/LRU info are needed, that is, when the object is used
167 * as a value in the key space, and Redis is configured to evict based on
168 * LFU/LRU. */
createStringObjectFromLongLongForValue(long long value)169 robj *createStringObjectFromLongLongForValue(long long value) {
170 return createStringObjectFromLongLongWithOptions(value,1);
171 }
172
173 /* Create a string object from a long double. If humanfriendly is non-zero
174 * it does not use exponential format and trims trailing zeroes at the end,
175 * however this results in loss of precision. Otherwise exp format is used
176 * and the output of snprintf() is not modified.
177 *
178 * The 'humanfriendly' option is used for INCRBYFLOAT and HINCRBYFLOAT. */
createStringObjectFromLongDouble(long double value,int humanfriendly)179 robj *createStringObjectFromLongDouble(long double value, int humanfriendly) {
180 char buf[MAX_LONG_DOUBLE_CHARS];
181 int len = ld2string(buf,sizeof(buf),value,humanfriendly);
182 return createStringObject(buf,len);
183 }
184
185 /* Duplicate a string object, with the guarantee that the returned object
186 * has the same encoding as the original one.
187 *
188 * This function also guarantees that duplicating a small integer object
189 * (or a string object that contains a representation of a small integer)
190 * will always result in a fresh object that is unshared (refcount == 1).
191 *
192 * The resulting object always has refcount set to 1. */
dupStringObject(const robj * o)193 robj *dupStringObject(const robj *o) {
194 robj *d;
195
196 serverAssert(o->type == OBJ_STRING);
197
198 switch(o->encoding) {
199 case OBJ_ENCODING_RAW:
200 return createRawStringObject(o->ptr,sdslen(o->ptr));
201 case OBJ_ENCODING_EMBSTR:
202 return createEmbeddedStringObject(o->ptr,sdslen(o->ptr));
203 case OBJ_ENCODING_INT:
204 d = createObject(OBJ_STRING, NULL);
205 d->encoding = OBJ_ENCODING_INT;
206 d->ptr = o->ptr;
207 return d;
208 default:
209 serverPanic("Wrong encoding.");
210 break;
211 }
212 }
213
createQuicklistObject(void)214 robj *createQuicklistObject(void) {
215 quicklist *l = quicklistCreate();
216 robj *o = createObject(OBJ_LIST,l);
217 o->encoding = OBJ_ENCODING_QUICKLIST;
218 return o;
219 }
220
createZiplistObject(void)221 robj *createZiplistObject(void) {
222 unsigned char *zl = ziplistNew();
223 robj *o = createObject(OBJ_LIST,zl);
224 o->encoding = OBJ_ENCODING_ZIPLIST;
225 return o;
226 }
227
createSetObject(void)228 robj *createSetObject(void) {
229 dict *d = dictCreate(&setDictType,NULL);
230 robj *o = createObject(OBJ_SET,d);
231 o->encoding = OBJ_ENCODING_HT;
232 return o;
233 }
234
createIntsetObject(void)235 robj *createIntsetObject(void) {
236 intset *is = intsetNew();
237 robj *o = createObject(OBJ_SET,is);
238 o->encoding = OBJ_ENCODING_INTSET;
239 return o;
240 }
241
createHashObject(void)242 robj *createHashObject(void) {
243 unsigned char *zl = ziplistNew();
244 robj *o = createObject(OBJ_HASH, zl);
245 o->encoding = OBJ_ENCODING_ZIPLIST;
246 return o;
247 }
248
createZsetObject(void)249 robj *createZsetObject(void) {
250 zset *zs = zmalloc(sizeof(*zs));
251 robj *o;
252
253 zs->dict = dictCreate(&zsetDictType,NULL);
254 zs->zsl = zslCreate();
255 o = createObject(OBJ_ZSET,zs);
256 o->encoding = OBJ_ENCODING_SKIPLIST;
257 return o;
258 }
259
createZsetZiplistObject(void)260 robj *createZsetZiplistObject(void) {
261 unsigned char *zl = ziplistNew();
262 robj *o = createObject(OBJ_ZSET,zl);
263 o->encoding = OBJ_ENCODING_ZIPLIST;
264 return o;
265 }
266
createStreamObject(void)267 robj *createStreamObject(void) {
268 stream *s = streamNew();
269 robj *o = createObject(OBJ_STREAM,s);
270 o->encoding = OBJ_ENCODING_STREAM;
271 return o;
272 }
273
createModuleObject(moduleType * mt,void * value)274 robj *createModuleObject(moduleType *mt, void *value) {
275 moduleValue *mv = zmalloc(sizeof(*mv));
276 mv->type = mt;
277 mv->value = value;
278 return createObject(OBJ_MODULE,mv);
279 }
280
freeStringObject(robj * o)281 void freeStringObject(robj *o) {
282 if (o->encoding == OBJ_ENCODING_RAW) {
283 sdsfree(o->ptr);
284 }
285 }
286
freeListObject(robj * o)287 void freeListObject(robj *o) {
288 if (o->encoding == OBJ_ENCODING_QUICKLIST) {
289 quicklistRelease(o->ptr);
290 } else {
291 serverPanic("Unknown list encoding type");
292 }
293 }
294
freeSetObject(robj * o)295 void freeSetObject(robj *o) {
296 switch (o->encoding) {
297 case OBJ_ENCODING_HT:
298 dictRelease((dict*) o->ptr);
299 break;
300 case OBJ_ENCODING_INTSET:
301 zfree(o->ptr);
302 break;
303 default:
304 serverPanic("Unknown set encoding type");
305 }
306 }
307
freeZsetObject(robj * o)308 void freeZsetObject(robj *o) {
309 zset *zs;
310 switch (o->encoding) {
311 case OBJ_ENCODING_SKIPLIST:
312 zs = o->ptr;
313 dictRelease(zs->dict);
314 zslFree(zs->zsl);
315 zfree(zs);
316 break;
317 case OBJ_ENCODING_ZIPLIST:
318 zfree(o->ptr);
319 break;
320 default:
321 serverPanic("Unknown sorted set encoding");
322 }
323 }
324
freeHashObject(robj * o)325 void freeHashObject(robj *o) {
326 switch (o->encoding) {
327 case OBJ_ENCODING_HT:
328 dictRelease((dict*) o->ptr);
329 break;
330 case OBJ_ENCODING_ZIPLIST:
331 zfree(o->ptr);
332 break;
333 default:
334 serverPanic("Unknown hash encoding type");
335 break;
336 }
337 }
338
freeModuleObject(robj * o)339 void freeModuleObject(robj *o) {
340 moduleValue *mv = o->ptr;
341 mv->type->free(mv->value);
342 zfree(mv);
343 }
344
freeStreamObject(robj * o)345 void freeStreamObject(robj *o) {
346 freeStream(o->ptr);
347 }
348
incrRefCount(robj * o)349 void incrRefCount(robj *o) {
350 if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount++;
351 }
352
decrRefCount(robj * o)353 void decrRefCount(robj *o) {
354 if (o->refcount == 1) {
355 switch(o->type) {
356 case OBJ_STRING: freeStringObject(o); break;
357 case OBJ_LIST: freeListObject(o); break;
358 case OBJ_SET: freeSetObject(o); break;
359 case OBJ_ZSET: freeZsetObject(o); break;
360 case OBJ_HASH: freeHashObject(o); break;
361 case OBJ_MODULE: freeModuleObject(o); break;
362 case OBJ_STREAM: freeStreamObject(o); break;
363 default: serverPanic("Unknown object type"); break;
364 }
365 zfree(o);
366 } else {
367 if (o->refcount <= 0) serverPanic("decrRefCount against refcount <= 0");
368 if (o->refcount != OBJ_SHARED_REFCOUNT) o->refcount--;
369 }
370 }
371
372 /* This variant of decrRefCount() gets its argument as void, and is useful
373 * as free method in data structures that expect a 'void free_object(void*)'
374 * prototype for the free method. */
decrRefCountVoid(void * o)375 void decrRefCountVoid(void *o) {
376 decrRefCount(o);
377 }
378
379 /* This function set the ref count to zero without freeing the object.
380 * It is useful in order to pass a new object to functions incrementing
381 * the ref count of the received object. Example:
382 *
383 * functionThatWillIncrementRefCount(resetRefCount(CreateObject(...)));
384 *
385 * Otherwise you need to resort to the less elegant pattern:
386 *
387 * *obj = createObject(...);
388 * functionThatWillIncrementRefCount(obj);
389 * decrRefCount(obj);
390 */
resetRefCount(robj * obj)391 robj *resetRefCount(robj *obj) {
392 obj->refcount = 0;
393 return obj;
394 }
395
checkType(client * c,robj * o,int type)396 int checkType(client *c, robj *o, int type) {
397 if (o->type != type) {
398 addReply(c,shared.wrongtypeerr);
399 return 1;
400 }
401 return 0;
402 }
403
isSdsRepresentableAsLongLong(sds s,long long * llval)404 int isSdsRepresentableAsLongLong(sds s, long long *llval) {
405 return string2ll(s,sdslen(s),llval) ? C_OK : C_ERR;
406 }
407
isObjectRepresentableAsLongLong(robj * o,long long * llval)408 int isObjectRepresentableAsLongLong(robj *o, long long *llval) {
409 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
410 if (o->encoding == OBJ_ENCODING_INT) {
411 if (llval) *llval = (long) o->ptr;
412 return C_OK;
413 } else {
414 return isSdsRepresentableAsLongLong(o->ptr,llval);
415 }
416 }
417
418 /* Optimize the SDS string inside the string object to require little space,
419 * in case there is more than 10% of free space at the end of the SDS
420 * string. This happens because SDS strings tend to overallocate to avoid
421 * wasting too much time in allocations when appending to the string. */
trimStringObjectIfNeeded(robj * o)422 void trimStringObjectIfNeeded(robj *o) {
423 if (o->encoding == OBJ_ENCODING_RAW &&
424 sdsavail(o->ptr) > sdslen(o->ptr)/10)
425 {
426 o->ptr = sdsRemoveFreeSpace(o->ptr);
427 }
428 }
429
430 /* Try to encode a string object in order to save space */
tryObjectEncoding(robj * o)431 robj *tryObjectEncoding(robj *o) {
432 long value;
433 sds s = o->ptr;
434 size_t len;
435
436 /* Make sure this is a string object, the only type we encode
437 * in this function. Other types use encoded memory efficient
438 * representations but are handled by the commands implementing
439 * the type. */
440 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
441
442 /* We try some specialized encoding only for objects that are
443 * RAW or EMBSTR encoded, in other words objects that are still
444 * in represented by an actually array of chars. */
445 if (!sdsEncodedObject(o)) return o;
446
447 /* It's not safe to encode shared objects: shared objects can be shared
448 * everywhere in the "object space" of Redis and may end in places where
449 * they are not handled. We handle them only as values in the keyspace. */
450 if (o->refcount > 1) return o;
451
452 /* Check if we can represent this string as a long integer.
453 * Note that we are sure that a string larger than 20 chars is not
454 * representable as a 32 nor 64 bit integer. */
455 len = sdslen(s);
456 if (len <= 20 && string2l(s,len,&value)) {
457 /* This object is encodable as a long. Try to use a shared object.
458 * Note that we avoid using shared integers when maxmemory is used
459 * because every object needs to have a private LRU field for the LRU
460 * algorithm to work well. */
461 if ((server.maxmemory == 0 ||
462 !(server.maxmemory_policy & MAXMEMORY_FLAG_NO_SHARED_INTEGERS)) &&
463 value >= 0 &&
464 value < OBJ_SHARED_INTEGERS)
465 {
466 decrRefCount(o);
467 incrRefCount(shared.integers[value]);
468 return shared.integers[value];
469 } else {
470 if (o->encoding == OBJ_ENCODING_RAW) sdsfree(o->ptr);
471 o->encoding = OBJ_ENCODING_INT;
472 o->ptr = (void*) value;
473 return o;
474 }
475 }
476
477 /* If the string is small and is still RAW encoded,
478 * try the EMBSTR encoding which is more efficient.
479 * In this representation the object and the SDS string are allocated
480 * in the same chunk of memory to save space and cache misses. */
481 if (len <= OBJ_ENCODING_EMBSTR_SIZE_LIMIT) {
482 robj *emb;
483
484 if (o->encoding == OBJ_ENCODING_EMBSTR) return o;
485 emb = createEmbeddedStringObject(s,sdslen(s));
486 decrRefCount(o);
487 return emb;
488 }
489
490 /* We can't encode the object...
491 *
492 * Do the last try, and at least optimize the SDS string inside
493 * the string object to require little space, in case there
494 * is more than 10% of free space at the end of the SDS string.
495 *
496 * We do that only for relatively large strings as this branch
497 * is only entered if the length of the string is greater than
498 * OBJ_ENCODING_EMBSTR_SIZE_LIMIT. */
499 trimStringObjectIfNeeded(o);
500
501 /* Return the original object. */
502 return o;
503 }
504
505 /* Get a decoded version of an encoded object (returned as a new object).
506 * If the object is already raw-encoded just increment the ref count. */
getDecodedObject(robj * o)507 robj *getDecodedObject(robj *o) {
508 robj *dec;
509
510 if (sdsEncodedObject(o)) {
511 incrRefCount(o);
512 return o;
513 }
514 if (o->type == OBJ_STRING && o->encoding == OBJ_ENCODING_INT) {
515 char buf[32];
516
517 ll2string(buf,32,(long)o->ptr);
518 dec = createStringObject(buf,strlen(buf));
519 return dec;
520 } else {
521 serverPanic("Unknown encoding type");
522 }
523 }
524
525 /* Compare two string objects via strcmp() or strcoll() depending on flags.
526 * Note that the objects may be integer-encoded. In such a case we
527 * use ll2string() to get a string representation of the numbers on the stack
528 * and compare the strings, it's much faster than calling getDecodedObject().
529 *
530 * Important note: when REDIS_COMPARE_BINARY is used a binary-safe comparison
531 * is used. */
532
533 #define REDIS_COMPARE_BINARY (1<<0)
534 #define REDIS_COMPARE_COLL (1<<1)
535
compareStringObjectsWithFlags(robj * a,robj * b,int flags)536 int compareStringObjectsWithFlags(robj *a, robj *b, int flags) {
537 serverAssertWithInfo(NULL,a,a->type == OBJ_STRING && b->type == OBJ_STRING);
538 char bufa[128], bufb[128], *astr, *bstr;
539 size_t alen, blen, minlen;
540
541 if (a == b) return 0;
542 if (sdsEncodedObject(a)) {
543 astr = a->ptr;
544 alen = sdslen(astr);
545 } else {
546 alen = ll2string(bufa,sizeof(bufa),(long) a->ptr);
547 astr = bufa;
548 }
549 if (sdsEncodedObject(b)) {
550 bstr = b->ptr;
551 blen = sdslen(bstr);
552 } else {
553 blen = ll2string(bufb,sizeof(bufb),(long) b->ptr);
554 bstr = bufb;
555 }
556 if (flags & REDIS_COMPARE_COLL) {
557 return strcoll(astr,bstr);
558 } else {
559 int cmp;
560
561 minlen = (alen < blen) ? alen : blen;
562 cmp = memcmp(astr,bstr,minlen);
563 if (cmp == 0) return alen-blen;
564 return cmp;
565 }
566 }
567
568 /* Wrapper for compareStringObjectsWithFlags() using binary comparison. */
compareStringObjects(robj * a,robj * b)569 int compareStringObjects(robj *a, robj *b) {
570 return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_BINARY);
571 }
572
573 /* Wrapper for compareStringObjectsWithFlags() using collation. */
collateStringObjects(robj * a,robj * b)574 int collateStringObjects(robj *a, robj *b) {
575 return compareStringObjectsWithFlags(a,b,REDIS_COMPARE_COLL);
576 }
577
578 /* Equal string objects return 1 if the two objects are the same from the
579 * point of view of a string comparison, otherwise 0 is returned. Note that
580 * this function is faster then checking for (compareStringObject(a,b) == 0)
581 * because it can perform some more optimization. */
equalStringObjects(robj * a,robj * b)582 int equalStringObjects(robj *a, robj *b) {
583 if (a->encoding == OBJ_ENCODING_INT &&
584 b->encoding == OBJ_ENCODING_INT){
585 /* If both strings are integer encoded just check if the stored
586 * long is the same. */
587 return a->ptr == b->ptr;
588 } else {
589 return compareStringObjects(a,b) == 0;
590 }
591 }
592
stringObjectLen(robj * o)593 size_t stringObjectLen(robj *o) {
594 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
595 if (sdsEncodedObject(o)) {
596 return sdslen(o->ptr);
597 } else {
598 return sdigits10((long)o->ptr);
599 }
600 }
601
getDoubleFromObject(const robj * o,double * target)602 int getDoubleFromObject(const robj *o, double *target) {
603 double value;
604 char *eptr;
605
606 if (o == NULL) {
607 value = 0;
608 } else {
609 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
610 if (sdsEncodedObject(o)) {
611 errno = 0;
612 value = strtod(o->ptr, &eptr);
613 if (sdslen(o->ptr) == 0 ||
614 isspace(((const char*)o->ptr)[0]) ||
615 (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) ||
616 (errno == ERANGE &&
617 (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
618 isnan(value))
619 return C_ERR;
620 } else if (o->encoding == OBJ_ENCODING_INT) {
621 value = (long)o->ptr;
622 } else {
623 serverPanic("Unknown string encoding");
624 }
625 }
626 *target = value;
627 return C_OK;
628 }
629
getDoubleFromObjectOrReply(client * c,robj * o,double * target,const char * msg)630 int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg) {
631 double value;
632 if (getDoubleFromObject(o, &value) != C_OK) {
633 if (msg != NULL) {
634 addReplyError(c,(char*)msg);
635 } else {
636 addReplyError(c,"value is not a valid float");
637 }
638 return C_ERR;
639 }
640 *target = value;
641 return C_OK;
642 }
643
getLongDoubleFromObject(robj * o,long double * target)644 int getLongDoubleFromObject(robj *o, long double *target) {
645 long double value;
646 char *eptr;
647
648 if (o == NULL) {
649 value = 0;
650 } else {
651 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
652 if (sdsEncodedObject(o)) {
653 errno = 0;
654 value = strtold(o->ptr, &eptr);
655 if (sdslen(o->ptr) == 0 ||
656 isspace(((const char*)o->ptr)[0]) ||
657 (size_t)(eptr-(char*)o->ptr) != sdslen(o->ptr) ||
658 (errno == ERANGE &&
659 (value == HUGE_VAL || value == -HUGE_VAL || value == 0)) ||
660 isnan(value))
661 return C_ERR;
662 } else if (o->encoding == OBJ_ENCODING_INT) {
663 value = (long)o->ptr;
664 } else {
665 serverPanic("Unknown string encoding");
666 }
667 }
668 *target = value;
669 return C_OK;
670 }
671
getLongDoubleFromObjectOrReply(client * c,robj * o,long double * target,const char * msg)672 int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg) {
673 long double value;
674 if (getLongDoubleFromObject(o, &value) != C_OK) {
675 if (msg != NULL) {
676 addReplyError(c,(char*)msg);
677 } else {
678 addReplyError(c,"value is not a valid float");
679 }
680 return C_ERR;
681 }
682 *target = value;
683 return C_OK;
684 }
685
getLongLongFromObject(robj * o,long long * target)686 int getLongLongFromObject(robj *o, long long *target) {
687 long long value;
688
689 if (o == NULL) {
690 value = 0;
691 } else {
692 serverAssertWithInfo(NULL,o,o->type == OBJ_STRING);
693 if (sdsEncodedObject(o)) {
694 if (string2ll(o->ptr,sdslen(o->ptr),&value) == 0) return C_ERR;
695 } else if (o->encoding == OBJ_ENCODING_INT) {
696 value = (long)o->ptr;
697 } else {
698 serverPanic("Unknown string encoding");
699 }
700 }
701 if (target) *target = value;
702 return C_OK;
703 }
704
getLongLongFromObjectOrReply(client * c,robj * o,long long * target,const char * msg)705 int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg) {
706 long long value;
707 if (getLongLongFromObject(o, &value) != C_OK) {
708 if (msg != NULL) {
709 addReplyError(c,(char*)msg);
710 } else {
711 addReplyError(c,"value is not an integer or out of range");
712 }
713 return C_ERR;
714 }
715 *target = value;
716 return C_OK;
717 }
718
getLongFromObjectOrReply(client * c,robj * o,long * target,const char * msg)719 int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg) {
720 long long value;
721
722 if (getLongLongFromObjectOrReply(c, o, &value, msg) != C_OK) return C_ERR;
723 if (value < LONG_MIN || value > LONG_MAX) {
724 if (msg != NULL) {
725 addReplyError(c,(char*)msg);
726 } else {
727 addReplyError(c,"value is out of range");
728 }
729 return C_ERR;
730 }
731 *target = value;
732 return C_OK;
733 }
734
strEncoding(int encoding)735 char *strEncoding(int encoding) {
736 switch(encoding) {
737 case OBJ_ENCODING_RAW: return "raw";
738 case OBJ_ENCODING_INT: return "int";
739 case OBJ_ENCODING_HT: return "hashtable";
740 case OBJ_ENCODING_QUICKLIST: return "quicklist";
741 case OBJ_ENCODING_ZIPLIST: return "ziplist";
742 case OBJ_ENCODING_INTSET: return "intset";
743 case OBJ_ENCODING_SKIPLIST: return "skiplist";
744 case OBJ_ENCODING_EMBSTR: return "embstr";
745 default: return "unknown";
746 }
747 }
748
749 /* =========================== Memory introspection ========================= */
750
751
752 /* This is an helper function with the goal of estimating the memory
753 * size of a radix tree that is used to store Stream IDs.
754 *
755 * Note: to guess the size of the radix tree is not trivial, so we
756 * approximate it considering 16 bytes of data overhead for each
757 * key (the ID), and then adding the number of bare nodes, plus some
758 * overhead due by the data and child pointers. This secret recipe
759 * was obtained by checking the average radix tree created by real
760 * workloads, and then adjusting the constants to get numbers that
761 * more or less match the real memory usage.
762 *
763 * Actually the number of nodes and keys may be different depending
764 * on the insertion speed and thus the ability of the radix tree
765 * to compress prefixes. */
streamRadixTreeMemoryUsage(rax * rax)766 size_t streamRadixTreeMemoryUsage(rax *rax) {
767 size_t size;
768 size = rax->numele * sizeof(streamID);
769 size += rax->numnodes * sizeof(raxNode);
770 /* Add a fixed overhead due to the aux data pointer, children, ... */
771 size += rax->numnodes * sizeof(long)*30;
772 return size;
773 }
774
775 /* Returns the size in bytes consumed by the key's value in RAM.
776 * Note that the returned value is just an approximation, especially in the
777 * case of aggregated data types where only "sample_size" elements
778 * are checked and averaged to estimate the total size. */
779 #define OBJ_COMPUTE_SIZE_DEF_SAMPLES 5 /* Default sample size. */
objectComputeSize(robj * o,size_t sample_size)780 size_t objectComputeSize(robj *o, size_t sample_size) {
781 sds ele, ele2;
782 dict *d;
783 dictIterator *di;
784 struct dictEntry *de;
785 size_t asize = 0, elesize = 0, samples = 0;
786
787 if (o->type == OBJ_STRING) {
788 if(o->encoding == OBJ_ENCODING_INT) {
789 asize = sizeof(*o);
790 } else if(o->encoding == OBJ_ENCODING_RAW) {
791 asize = sdsAllocSize(o->ptr)+sizeof(*o);
792 } else if(o->encoding == OBJ_ENCODING_EMBSTR) {
793 asize = sdslen(o->ptr)+2+sizeof(*o);
794 } else {
795 serverPanic("Unknown string encoding");
796 }
797 } else if (o->type == OBJ_LIST) {
798 if (o->encoding == OBJ_ENCODING_QUICKLIST) {
799 quicklist *ql = o->ptr;
800 quicklistNode *node = ql->head;
801 asize = sizeof(*o)+sizeof(quicklist);
802 do {
803 elesize += sizeof(quicklistNode)+ziplistBlobLen(node->zl);
804 samples++;
805 } while ((node = node->next) && samples < sample_size);
806 asize += (double)elesize/samples*ql->len;
807 } else if (o->encoding == OBJ_ENCODING_ZIPLIST) {
808 asize = sizeof(*o)+ziplistBlobLen(o->ptr);
809 } else {
810 serverPanic("Unknown list encoding");
811 }
812 } else if (o->type == OBJ_SET) {
813 if (o->encoding == OBJ_ENCODING_HT) {
814 d = o->ptr;
815 di = dictGetIterator(d);
816 asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
817 while((de = dictNext(di)) != NULL && samples < sample_size) {
818 ele = dictGetKey(de);
819 elesize += sizeof(struct dictEntry) + sdsAllocSize(ele);
820 samples++;
821 }
822 dictReleaseIterator(di);
823 if (samples) asize += (double)elesize/samples*dictSize(d);
824 } else if (o->encoding == OBJ_ENCODING_INTSET) {
825 intset *is = o->ptr;
826 asize = sizeof(*o)+sizeof(*is)+is->encoding*is->length;
827 } else {
828 serverPanic("Unknown set encoding");
829 }
830 } else if (o->type == OBJ_ZSET) {
831 if (o->encoding == OBJ_ENCODING_ZIPLIST) {
832 asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
833 } else if (o->encoding == OBJ_ENCODING_SKIPLIST) {
834 d = ((zset*)o->ptr)->dict;
835 zskiplist *zsl = ((zset*)o->ptr)->zsl;
836 zskiplistNode *znode = zsl->header->level[0].forward;
837 asize = sizeof(*o)+sizeof(zset)+(sizeof(struct dictEntry*)*dictSlots(d));
838 while(znode != NULL && samples < sample_size) {
839 elesize += sdsAllocSize(znode->ele);
840 elesize += sizeof(struct dictEntry) + zmalloc_size(znode);
841 samples++;
842 znode = znode->level[0].forward;
843 }
844 if (samples) asize += (double)elesize/samples*dictSize(d);
845 } else {
846 serverPanic("Unknown sorted set encoding");
847 }
848 } else if (o->type == OBJ_HASH) {
849 if (o->encoding == OBJ_ENCODING_ZIPLIST) {
850 asize = sizeof(*o)+(ziplistBlobLen(o->ptr));
851 } else if (o->encoding == OBJ_ENCODING_HT) {
852 d = o->ptr;
853 di = dictGetIterator(d);
854 asize = sizeof(*o)+sizeof(dict)+(sizeof(struct dictEntry*)*dictSlots(d));
855 while((de = dictNext(di)) != NULL && samples < sample_size) {
856 ele = dictGetKey(de);
857 ele2 = dictGetVal(de);
858 elesize += sdsAllocSize(ele) + sdsAllocSize(ele2);
859 elesize += sizeof(struct dictEntry);
860 samples++;
861 }
862 dictReleaseIterator(di);
863 if (samples) asize += (double)elesize/samples*dictSize(d);
864 } else {
865 serverPanic("Unknown hash encoding");
866 }
867 } else if (o->type == OBJ_STREAM) {
868 stream *s = o->ptr;
869 asize = sizeof(*o);
870 asize += streamRadixTreeMemoryUsage(s->rax);
871
872 /* Now we have to add the listpacks. The last listpack is often non
873 * complete, so we estimate the size of the first N listpacks, and
874 * use the average to compute the size of the first N-1 listpacks, and
875 * finally add the real size of the last node. */
876 raxIterator ri;
877 raxStart(&ri,s->rax);
878 raxSeek(&ri,"^",NULL,0);
879 size_t lpsize = 0, samples = 0;
880 while(samples < sample_size && raxNext(&ri)) {
881 unsigned char *lp = ri.data;
882 lpsize += lpBytes(lp);
883 samples++;
884 }
885 if (s->rax->numele <= samples) {
886 asize += lpsize;
887 } else {
888 if (samples) lpsize /= samples; /* Compute the average. */
889 asize += lpsize * (s->rax->numele-1);
890 /* No need to check if seek succeeded, we enter this branch only
891 * if there are a few elements in the radix tree. */
892 raxSeek(&ri,"$",NULL,0);
893 raxNext(&ri);
894 asize += lpBytes(ri.data);
895 }
896 raxStop(&ri);
897
898 /* Consumer groups also have a non trivial memory overhead if there
899 * are many consumers and many groups, let's count at least the
900 * overhead of the pending entries in the groups and consumers
901 * PELs. */
902 if (s->cgroups) {
903 raxStart(&ri,s->cgroups);
904 raxSeek(&ri,"^",NULL,0);
905 while(raxNext(&ri)) {
906 streamCG *cg = ri.data;
907 asize += sizeof(*cg);
908 asize += streamRadixTreeMemoryUsage(cg->pel);
909 asize += sizeof(streamNACK)*raxSize(cg->pel);
910
911 /* For each consumer we also need to add the basic data
912 * structures and the PEL memory usage. */
913 raxIterator cri;
914 raxStart(&cri,cg->consumers);
915 raxSeek(&cri,"^",NULL,0);
916 while(raxNext(&cri)) {
917 streamConsumer *consumer = cri.data;
918 asize += sizeof(*consumer);
919 asize += sdslen(consumer->name);
920 asize += streamRadixTreeMemoryUsage(consumer->pel);
921 /* Don't count NACKs again, they are shared with the
922 * consumer group PEL. */
923 }
924 raxStop(&cri);
925 }
926 raxStop(&ri);
927 }
928 } else if (o->type == OBJ_MODULE) {
929 moduleValue *mv = o->ptr;
930 moduleType *mt = mv->type;
931 if (mt->mem_usage != NULL) {
932 asize = mt->mem_usage(mv->value);
933 } else {
934 asize = 0;
935 }
936 } else {
937 serverPanic("Unknown object type");
938 }
939 return asize;
940 }
941
942 /* Release data obtained with getMemoryOverheadData(). */
freeMemoryOverheadData(struct redisMemOverhead * mh)943 void freeMemoryOverheadData(struct redisMemOverhead *mh) {
944 zfree(mh->db);
945 zfree(mh);
946 }
947
948 /* Return a struct redisMemOverhead filled with memory overhead
949 * information used for the MEMORY OVERHEAD and INFO command. The returned
950 * structure pointer should be freed calling freeMemoryOverheadData(). */
getMemoryOverheadData(void)951 struct redisMemOverhead *getMemoryOverheadData(void) {
952 int j;
953 size_t mem_total = 0;
954 size_t mem = 0;
955 size_t zmalloc_used = zmalloc_used_memory();
956 struct redisMemOverhead *mh = zcalloc(sizeof(*mh));
957
958 mh->total_allocated = zmalloc_used;
959 mh->startup_allocated = server.initial_memory_usage;
960 mh->peak_allocated = server.stat_peak_memory;
961 mh->total_frag =
962 (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.zmalloc_used;
963 mh->total_frag_bytes =
964 server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
965 mh->allocator_frag =
966 (float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
967 mh->allocator_frag_bytes =
968 server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
969 mh->allocator_rss =
970 (float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
971 mh->allocator_rss_bytes =
972 server.cron_malloc_stats.allocator_resident - server.cron_malloc_stats.allocator_active;
973 mh->rss_extra =
974 (float)server.cron_malloc_stats.process_rss / server.cron_malloc_stats.allocator_resident;
975 mh->rss_extra_bytes =
976 server.cron_malloc_stats.process_rss - server.cron_malloc_stats.allocator_resident;
977
978 mem_total += server.initial_memory_usage;
979
980 mem = 0;
981 if (server.repl_backlog)
982 mem += zmalloc_size(server.repl_backlog);
983 mh->repl_backlog = mem;
984 mem_total += mem;
985
986 mem = 0;
987 if (listLength(server.slaves)) {
988 listIter li;
989 listNode *ln;
990
991 listRewind(server.slaves,&li);
992 while((ln = listNext(&li))) {
993 client *c = listNodeValue(ln);
994 mem += getClientOutputBufferMemoryUsage(c);
995 mem += sdsAllocSize(c->querybuf);
996 mem += sizeof(client);
997 }
998 }
999 mh->clients_slaves = mem;
1000 mem_total+=mem;
1001
1002 mem = 0;
1003 if (listLength(server.clients)) {
1004 listIter li;
1005 listNode *ln;
1006
1007 listRewind(server.clients,&li);
1008 while((ln = listNext(&li))) {
1009 client *c = listNodeValue(ln);
1010 if (c->flags & CLIENT_SLAVE && !(c->flags & CLIENT_MONITOR))
1011 continue;
1012 mem += getClientOutputBufferMemoryUsage(c);
1013 mem += sdsAllocSize(c->querybuf);
1014 mem += sizeof(client);
1015 }
1016 }
1017 mh->clients_normal = mem;
1018 mem_total+=mem;
1019
1020 mem = 0;
1021 if (server.aof_state != AOF_OFF) {
1022 mem += sdsalloc(server.aof_buf);
1023 mem += aofRewriteBufferSize();
1024 }
1025 mh->aof_buffer = mem;
1026 mem_total+=mem;
1027
1028 mem = server.lua_scripts_mem;
1029 mem += dictSize(server.lua_scripts) * sizeof(dictEntry) +
1030 dictSlots(server.lua_scripts) * sizeof(dictEntry*);
1031 mem += dictSize(server.repl_scriptcache_dict) * sizeof(dictEntry) +
1032 dictSlots(server.repl_scriptcache_dict) * sizeof(dictEntry*);
1033 if (listLength(server.repl_scriptcache_fifo) > 0) {
1034 mem += listLength(server.repl_scriptcache_fifo) * (sizeof(listNode) +
1035 sdsZmallocSize(listNodeValue(listFirst(server.repl_scriptcache_fifo))));
1036 }
1037 mh->lua_caches = mem;
1038 mem_total+=mem;
1039
1040 for (j = 0; j < server.dbnum; j++) {
1041 redisDb *db = server.db+j;
1042 long long keyscount = dictSize(db->dict);
1043 if (keyscount==0) continue;
1044
1045 mh->total_keys += keyscount;
1046 mh->db = zrealloc(mh->db,sizeof(mh->db[0])*(mh->num_dbs+1));
1047 mh->db[mh->num_dbs].dbid = j;
1048
1049 mem = dictSize(db->dict) * sizeof(dictEntry) +
1050 dictSlots(db->dict) * sizeof(dictEntry*) +
1051 dictSize(db->dict) * sizeof(robj);
1052 mh->db[mh->num_dbs].overhead_ht_main = mem;
1053 mem_total+=mem;
1054
1055 mem = dictSize(db->expires) * sizeof(dictEntry) +
1056 dictSlots(db->expires) * sizeof(dictEntry*);
1057 mh->db[mh->num_dbs].overhead_ht_expires = mem;
1058 mem_total+=mem;
1059
1060 mh->num_dbs++;
1061 }
1062
1063 mh->overhead_total = mem_total;
1064 mh->dataset = zmalloc_used - mem_total;
1065 mh->peak_perc = (float)zmalloc_used*100/mh->peak_allocated;
1066
1067 /* Metrics computed after subtracting the startup memory from
1068 * the total memory. */
1069 size_t net_usage = 1;
1070 if (zmalloc_used > mh->startup_allocated)
1071 net_usage = zmalloc_used - mh->startup_allocated;
1072 mh->dataset_perc = (float)mh->dataset*100/net_usage;
1073 mh->bytes_per_key = mh->total_keys ? (net_usage / mh->total_keys) : 0;
1074
1075 return mh;
1076 }
1077
1078 /* Helper for "MEMORY allocator-stats", used as a callback for the jemalloc
1079 * stats output. */
inputCatSds(void * result,const char * str)1080 void inputCatSds(void *result, const char *str) {
1081 /* result is actually a (sds *), so re-cast it here */
1082 sds *info = (sds *)result;
1083 *info = sdscat(*info, str);
1084 }
1085
1086 /* This implements MEMORY DOCTOR. An human readable analysis of the Redis
1087 * memory condition. */
getMemoryDoctorReport(void)1088 sds getMemoryDoctorReport(void) {
1089 int empty = 0; /* Instance is empty or almost empty. */
1090 int big_peak = 0; /* Memory peak is much larger than used mem. */
1091 int high_frag = 0; /* High fragmentation. */
1092 int high_alloc_frag = 0;/* High allocator fragmentation. */
1093 int high_proc_rss = 0; /* High process rss overhead. */
1094 int high_alloc_rss = 0; /* High rss overhead. */
1095 int big_slave_buf = 0; /* Slave buffers are too big. */
1096 int big_client_buf = 0; /* Client buffers are too big. */
1097 int many_scripts = 0; /* Script cache has too many scripts. */
1098 int num_reports = 0;
1099 struct redisMemOverhead *mh = getMemoryOverheadData();
1100
1101 if (mh->total_allocated < (1024*1024*5)) {
1102 empty = 1;
1103 num_reports++;
1104 } else {
1105 /* Peak is > 150% of current used memory? */
1106 if (((float)mh->peak_allocated / mh->total_allocated) > 1.5) {
1107 big_peak = 1;
1108 num_reports++;
1109 }
1110
1111 /* Fragmentation is higher than 1.4 and 10MB ?*/
1112 if (mh->total_frag > 1.4 && mh->total_frag_bytes > 10<<20) {
1113 high_frag = 1;
1114 num_reports++;
1115 }
1116
1117 /* External fragmentation is higher than 1.1 and 10MB? */
1118 if (mh->allocator_frag > 1.1 && mh->allocator_frag_bytes > 10<<20) {
1119 high_alloc_frag = 1;
1120 num_reports++;
1121 }
1122
1123 /* Allocator fss is higher than 1.1 and 10MB ? */
1124 if (mh->allocator_rss > 1.1 && mh->allocator_rss_bytes > 10<<20) {
1125 high_alloc_rss = 1;
1126 num_reports++;
1127 }
1128
1129 /* Non-Allocator fss is higher than 1.1 and 10MB ? */
1130 if (mh->rss_extra > 1.1 && mh->rss_extra_bytes > 10<<20) {
1131 high_proc_rss = 1;
1132 num_reports++;
1133 }
1134
1135 /* Clients using more than 200k each average? */
1136 long numslaves = listLength(server.slaves);
1137 long numclients = listLength(server.clients)-numslaves;
1138 if (mh->clients_normal / numclients > (1024*200)) {
1139 big_client_buf = 1;
1140 num_reports++;
1141 }
1142
1143 /* Slaves using more than 10 MB each? */
1144 if (numslaves > 0 && mh->clients_slaves / numslaves > (1024*1024*10)) {
1145 big_slave_buf = 1;
1146 num_reports++;
1147 }
1148
1149 /* Too many scripts are cached? */
1150 if (dictSize(server.lua_scripts) > 1000) {
1151 many_scripts = 1;
1152 num_reports++;
1153 }
1154 }
1155
1156 sds s;
1157 if (num_reports == 0) {
1158 s = sdsnew(
1159 "Hi Sam, I can't find any memory issue in your instance. "
1160 "I can only account for what occurs on this base.\n");
1161 } else if (empty == 1) {
1162 s = sdsnew(
1163 "Hi Sam, this instance is empty or is using very little memory, "
1164 "my issues detector can't be used in these conditions. "
1165 "Please, leave for your mission on Earth and fill it with some data. "
1166 "The new Sam and I will be back to our programming as soon as I "
1167 "finished rebooting.\n");
1168 } else {
1169 s = sdsnew("Sam, I detected a few issues in this Redis instance memory implants:\n\n");
1170 if (big_peak) {
1171 s = sdscat(s," * Peak memory: In the past this instance used more than 150% the memory that is currently using. The allocator is normally not able to release memory after a peak, so you can expect to see a big fragmentation ratio, however this is actually harmless and is only due to the memory peak, and if the Redis instance Resident Set Size (RSS) is currently bigger than expected, the memory will be used as soon as you fill the Redis instance with more data. If the memory peak was only occasional and you want to try to reclaim memory, please try the MEMORY PURGE command, otherwise the only other option is to shutdown and restart the instance.\n\n");
1172 }
1173 if (high_frag) {
1174 s = sdscatprintf(s," * High total RSS: This instance has a memory fragmentation and RSS overhead greater than 1.4 (this means that the Resident Set Size of the Redis process is much larger than the sum of the logical allocations Redis performed). This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. If the problem is a large peak memory, then there is no issue. Otherwise, make sure you are using the Jemalloc allocator and not the default libc malloc. Note: The currently used allocator is \"%s\".\n\n", ZMALLOC_LIB);
1175 }
1176 if (high_alloc_frag) {
1177 s = sdscatprintf(s," * High allocator fragmentation: This instance has an allocator external fragmentation greater than 1.1. This problem is usually due either to a large peak memory (check if there is a peak memory entry above in the report) or may result from a workload that causes the allocator to fragment memory a lot. You can try enabling 'activedefrag' config option.\n\n");
1178 }
1179 if (high_alloc_rss) {
1180 s = sdscatprintf(s," * High allocator RSS overhead: This instance has an RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the allocator is much larger than the sum what the allocator actually holds). This problem is usually due to a large peak memory (check if there is a peak memory entry above in the report), you can try the MEMORY PURGE command to reclaim it.\n\n");
1181 }
1182 if (high_proc_rss) {
1183 s = sdscatprintf(s," * High process RSS overhead: This instance has non-allocator RSS memory overhead is greater than 1.1 (this means that the Resident Set Size of the Redis process is much larger than the RSS the allocator holds). This problem may be due to Lua scripts or Modules.\n\n");
1184 }
1185 if (big_slave_buf) {
1186 s = sdscat(s," * Big replica buffers: The replica output buffers in this instance are greater than 10MB for each replica (on average). This likely means that there is some replica instance that is struggling receiving data, either because it is too slow or because of networking issues. As a result, data piles on the master output buffers. Please try to identify what replica is not receiving data correctly and why. You can use the INFO output in order to check the replicas delays and the CLIENT LIST command to check the output buffers of each replica.\n\n");
1187 }
1188 if (big_client_buf) {
1189 s = sdscat(s," * Big client buffers: The clients output buffers in this instance are greater than 200K per client (on average). This may result from different causes, like Pub/Sub clients subscribed to channels bot not receiving data fast enough, so that data piles on the Redis instance output buffer, or clients sending commands with large replies or very large sequences of commands in the same pipeline. Please use the CLIENT LIST command in order to investigate the issue if it causes problems in your instance, or to understand better why certain clients are using a big amount of memory.\n\n");
1190 }
1191 if (many_scripts) {
1192 s = sdscat(s," * Many scripts: There seem to be many cached scripts in this instance (more than 1000). This may be because scripts are generated and `EVAL`ed, instead of being parameterized (with KEYS and ARGV), `SCRIPT LOAD`ed and `EVALSHA`ed. Unless `SCRIPT FLUSH` is called periodically, the scripts' caches may end up consuming most of your memory.\n\n");
1193 }
1194 s = sdscat(s,"I'm here to keep you safe, Sam. I want to help you.\n");
1195 }
1196 freeMemoryOverheadData(mh);
1197 return s;
1198 }
1199
1200 /* Set the object LRU/LFU depending on server.maxmemory_policy.
1201 * The lfu_freq arg is only relevant if policy is MAXMEMORY_FLAG_LFU.
1202 * The lru_idle and lru_clock args are only relevant if policy
1203 * is MAXMEMORY_FLAG_LRU.
1204 * Either or both of them may be <0, in that case, nothing is set. */
objectSetLRUOrLFU(robj * val,long long lfu_freq,long long lru_idle,long long lru_clock)1205 void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
1206 long long lru_clock) {
1207 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1208 if (lfu_freq >= 0) {
1209 serverAssert(lfu_freq <= 255);
1210 val->lru = (LFUGetTimeInMinutes()<<8) | lfu_freq;
1211 }
1212 } else if (lru_idle >= 0) {
1213 /* Provided LRU idle time is in seconds. Scale
1214 * according to the LRU clock resolution this Redis
1215 * instance was compiled with (normally 1000 ms, so the
1216 * below statement will expand to lru_idle*1000/1000. */
1217 lru_idle = lru_idle*1000/LRU_CLOCK_RESOLUTION;
1218 long lru_abs = lru_clock - lru_idle; /* Absolute access time. */
1219 /* If the LRU field underflows (since LRU it is a wrapping
1220 * clock), the best we can do is to provide a large enough LRU
1221 * that is half-way in the circlular LRU clock we use: this way
1222 * the computed idle time for this object will stay high for quite
1223 * some time. */
1224 if (lru_abs < 0)
1225 lru_abs = (lru_clock+(LRU_CLOCK_MAX/2)) % LRU_CLOCK_MAX;
1226 val->lru = lru_abs;
1227 }
1228 }
1229
1230 /* ======================= The OBJECT and MEMORY commands =================== */
1231
1232 /* This is a helper function for the OBJECT command. We need to lookup keys
1233 * without any modification of LRU or other parameters. */
objectCommandLookup(client * c,robj * key)1234 robj *objectCommandLookup(client *c, robj *key) {
1235 dictEntry *de;
1236
1237 if ((de = dictFind(c->db->dict,key->ptr)) == NULL) return NULL;
1238 return (robj*) dictGetVal(de);
1239 }
1240
objectCommandLookupOrReply(client * c,robj * key,robj * reply)1241 robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply) {
1242 robj *o = objectCommandLookup(c,key);
1243
1244 if (!o) addReply(c, reply);
1245 return o;
1246 }
1247
1248 /* Object command allows to inspect the internals of an Redis Object.
1249 * Usage: OBJECT <refcount|encoding|idletime|freq> <key> */
objectCommand(client * c)1250 void objectCommand(client *c) {
1251 robj *o;
1252
1253 if (c->argc == 2 && !strcasecmp(c->argv[1]->ptr,"help")) {
1254 const char *help[] = {
1255 "ENCODING <key> -- Return the kind of internal representation used in order to store the value associated with a key.",
1256 "FREQ <key> -- Return the access frequency index of the key. The returned integer is proportional to the logarithm of the recent access frequency of the key.",
1257 "IDLETIME <key> -- Return the idle time of the key, that is the approximated number of seconds elapsed since the last access to the key.",
1258 "REFCOUNT <key> -- Return the number of references of the value associated with the specified key.",
1259 NULL
1260 };
1261 addReplyHelp(c, help);
1262 } else if (!strcasecmp(c->argv[1]->ptr,"refcount") && c->argc == 3) {
1263 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1264 == NULL) return;
1265 addReplyLongLong(c,o->refcount);
1266 } else if (!strcasecmp(c->argv[1]->ptr,"encoding") && c->argc == 3) {
1267 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1268 == NULL) return;
1269 addReplyBulkCString(c,strEncoding(o->encoding));
1270 } else if (!strcasecmp(c->argv[1]->ptr,"idletime") && c->argc == 3) {
1271 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1272 == NULL) return;
1273 if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
1274 addReplyError(c,"An LFU maxmemory policy is selected, idle time not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1275 return;
1276 }
1277 addReplyLongLong(c,estimateObjectIdleTime(o)/1000);
1278 } else if (!strcasecmp(c->argv[1]->ptr,"freq") && c->argc == 3) {
1279 if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nullbulk))
1280 == NULL) return;
1281 if (!(server.maxmemory_policy & MAXMEMORY_FLAG_LFU)) {
1282 addReplyError(c,"An LFU maxmemory policy is not selected, access frequency not tracked. Please note that when switching between policies at runtime LRU and LFU data will take some time to adjust.");
1283 return;
1284 }
1285 /* LFUDecrAndReturn should be called
1286 * in case of the key has not been accessed for a long time,
1287 * because we update the access time only
1288 * when the key is read or overwritten. */
1289 addReplyLongLong(c,LFUDecrAndReturn(o));
1290 } else {
1291 addReplySubcommandSyntaxError(c);
1292 }
1293 }
1294
1295 /* The memory command will eventually be a complete interface for the
1296 * memory introspection capabilities of Redis.
1297 *
1298 * Usage: MEMORY usage <key> */
memoryCommand(client * c)1299 void memoryCommand(client *c) {
1300 if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc == 2) {
1301 const char *help[] = {
1302 "DOCTOR - Return memory problems reports.",
1303 "MALLOC-STATS -- Return internal statistics report from the memory allocator.",
1304 "PURGE -- Attempt to purge dirty pages for reclamation by the allocator.",
1305 "STATS -- Return information about the memory usage of the server.",
1306 "USAGE <key> [SAMPLES <count>] -- Return memory in bytes used by <key> and its value. Nested values are sampled up to <count> times (default: 5).",
1307 NULL
1308 };
1309 addReplyHelp(c, help);
1310 } else if (!strcasecmp(c->argv[1]->ptr,"usage") && c->argc >= 3) {
1311 dictEntry *de;
1312 long long samples = OBJ_COMPUTE_SIZE_DEF_SAMPLES;
1313 for (int j = 3; j < c->argc; j++) {
1314 if (!strcasecmp(c->argv[j]->ptr,"samples") &&
1315 j+1 < c->argc)
1316 {
1317 if (getLongLongFromObjectOrReply(c,c->argv[j+1],&samples,NULL)
1318 == C_ERR) return;
1319 if (samples < 0) {
1320 addReply(c,shared.syntaxerr);
1321 return;
1322 }
1323 if (samples == 0) samples = LLONG_MAX;;
1324 j++; /* skip option argument. */
1325 } else {
1326 addReply(c,shared.syntaxerr);
1327 return;
1328 }
1329 }
1330 if ((de = dictFind(c->db->dict,c->argv[2]->ptr)) == NULL) {
1331 addReply(c, shared.nullbulk);
1332 return;
1333 }
1334 size_t usage = objectComputeSize(dictGetVal(de),samples);
1335 usage += sdsAllocSize(dictGetKey(de));
1336 usage += sizeof(dictEntry);
1337 addReplyLongLong(c,usage);
1338 } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
1339 struct redisMemOverhead *mh = getMemoryOverheadData();
1340
1341 addReplyMultiBulkLen(c,(25+mh->num_dbs)*2);
1342
1343 addReplyBulkCString(c,"peak.allocated");
1344 addReplyLongLong(c,mh->peak_allocated);
1345
1346 addReplyBulkCString(c,"total.allocated");
1347 addReplyLongLong(c,mh->total_allocated);
1348
1349 addReplyBulkCString(c,"startup.allocated");
1350 addReplyLongLong(c,mh->startup_allocated);
1351
1352 addReplyBulkCString(c,"replication.backlog");
1353 addReplyLongLong(c,mh->repl_backlog);
1354
1355 addReplyBulkCString(c,"clients.slaves");
1356 addReplyLongLong(c,mh->clients_slaves);
1357
1358 addReplyBulkCString(c,"clients.normal");
1359 addReplyLongLong(c,mh->clients_normal);
1360
1361 addReplyBulkCString(c,"aof.buffer");
1362 addReplyLongLong(c,mh->aof_buffer);
1363
1364 addReplyBulkCString(c,"lua.caches");
1365 addReplyLongLong(c,mh->lua_caches);
1366
1367 for (size_t j = 0; j < mh->num_dbs; j++) {
1368 char dbname[32];
1369 snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);
1370 addReplyBulkCString(c,dbname);
1371 addReplyMultiBulkLen(c,4);
1372
1373 addReplyBulkCString(c,"overhead.hashtable.main");
1374 addReplyLongLong(c,mh->db[j].overhead_ht_main);
1375
1376 addReplyBulkCString(c,"overhead.hashtable.expires");
1377 addReplyLongLong(c,mh->db[j].overhead_ht_expires);
1378 }
1379
1380 addReplyBulkCString(c,"overhead.total");
1381 addReplyLongLong(c,mh->overhead_total);
1382
1383 addReplyBulkCString(c,"keys.count");
1384 addReplyLongLong(c,mh->total_keys);
1385
1386 addReplyBulkCString(c,"keys.bytes-per-key");
1387 addReplyLongLong(c,mh->bytes_per_key);
1388
1389 addReplyBulkCString(c,"dataset.bytes");
1390 addReplyLongLong(c,mh->dataset);
1391
1392 addReplyBulkCString(c,"dataset.percentage");
1393 addReplyDouble(c,mh->dataset_perc);
1394
1395 addReplyBulkCString(c,"peak.percentage");
1396 addReplyDouble(c,mh->peak_perc);
1397
1398 addReplyBulkCString(c,"allocator.allocated");
1399 addReplyLongLong(c,server.cron_malloc_stats.allocator_allocated);
1400
1401 addReplyBulkCString(c,"allocator.active");
1402 addReplyLongLong(c,server.cron_malloc_stats.allocator_active);
1403
1404 addReplyBulkCString(c,"allocator.resident");
1405 addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);
1406
1407 addReplyBulkCString(c,"allocator-fragmentation.ratio");
1408 addReplyDouble(c,mh->allocator_frag);
1409
1410 addReplyBulkCString(c,"allocator-fragmentation.bytes");
1411 addReplyLongLong(c,mh->allocator_frag_bytes);
1412
1413 addReplyBulkCString(c,"allocator-rss.ratio");
1414 addReplyDouble(c,mh->allocator_rss);
1415
1416 addReplyBulkCString(c,"allocator-rss.bytes");
1417 addReplyLongLong(c,mh->allocator_rss_bytes);
1418
1419 addReplyBulkCString(c,"rss-overhead.ratio");
1420 addReplyDouble(c,mh->rss_extra);
1421
1422 addReplyBulkCString(c,"rss-overhead.bytes");
1423 addReplyLongLong(c,mh->rss_extra_bytes);
1424
1425 addReplyBulkCString(c,"fragmentation"); /* this is the total RSS overhead, including fragmentation */
1426 addReplyDouble(c,mh->total_frag); /* it is kept here for backwards compatibility */
1427
1428 addReplyBulkCString(c,"fragmentation.bytes");
1429 addReplyLongLong(c,mh->total_frag_bytes);
1430
1431 freeMemoryOverheadData(mh);
1432 } else if (!strcasecmp(c->argv[1]->ptr,"malloc-stats") && c->argc == 2) {
1433 #if defined(USE_JEMALLOC)
1434 sds info = sdsempty();
1435 je_malloc_stats_print(inputCatSds, &info, NULL);
1436 addReplyBulkSds(c, info);
1437 #else
1438 addReplyBulkCString(c,"Stats not supported for the current allocator");
1439 #endif
1440 } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
1441 sds report = getMemoryDoctorReport();
1442 addReplyBulkSds(c,report);
1443 } else if (!strcasecmp(c->argv[1]->ptr,"purge") && c->argc == 2) {
1444 #if defined(USE_JEMALLOC)
1445 char tmp[32];
1446 unsigned narenas = 0;
1447 size_t sz = sizeof(unsigned);
1448 if (!je_mallctl("arenas.narenas", &narenas, &sz, NULL, 0)) {
1449 sprintf(tmp, "arena.%d.purge", narenas);
1450 if (!je_mallctl(tmp, NULL, 0, NULL, 0)) {
1451 addReply(c, shared.ok);
1452 return;
1453 }
1454 }
1455 addReplyError(c, "Error purging dirty pages");
1456 #else
1457 addReply(c, shared.ok);
1458 /* Nothing to do for other allocators. */
1459 #endif
1460 } else {
1461 addReplyErrorFormat(c, "Unknown subcommand or wrong number of arguments for '%s'. Try MEMORY HELP", (char*)c->argv[1]->ptr);
1462 }
1463 }
1464