1 /* Maxmemory directive handling (LRU eviction and other policies).
2 *
3 * ----------------------------------------------------------------------------
4 *
5 * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice,
12 * this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Redis nor the names of its contributors may be used
17 * to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include "server.h"
34 #include "bio.h"
35 #include "atomicvar.h"
36
37 /* ----------------------------------------------------------------------------
38 * Data structures
39 * --------------------------------------------------------------------------*/
40
41 /* To improve the quality of the LRU approximation we take a set of keys
42 * that are good candidate for eviction across freeMemoryIfNeeded() calls.
43 *
44 * Entries inside the eviciton pool are taken ordered by idle time, putting
45 * greater idle times to the right (ascending order).
46 *
47 * When an LFU policy is used instead, a reverse frequency indication is used
48 * instead of the idle time, so that we still evict by larger value (larger
49 * inverse frequency means to evict keys with the least frequent accesses).
50 *
51 * Empty entries have the key pointer set to NULL. */
52 #define EVPOOL_SIZE 16
53 #define EVPOOL_CACHED_SDS_SIZE 255
54 struct evictionPoolEntry {
55 unsigned long long idle; /* Object idle time (inverse frequency for LFU) */
56 sds key; /* Key name. */
57 sds cached; /* Cached SDS object for key name. */
58 int dbid; /* Key DB number. */
59 };
60
61 static struct evictionPoolEntry *EvictionPoolLRU;
62
63 /* ----------------------------------------------------------------------------
64 * Implementation of eviction, aging and LRU
65 * --------------------------------------------------------------------------*/
66
67 /* Return the LRU clock, based on the clock resolution. This is a time
68 * in a reduced-bits format that can be used to set and check the
69 * object->lru field of redisObject structures. */
getLRUClock(void)70 unsigned int getLRUClock(void) {
71 return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
72 }
73
74 /* This function is used to obtain the current LRU clock.
75 * If the current resolution is lower than the frequency we refresh the
76 * LRU clock (as it should be in production servers) we return the
77 * precomputed value, otherwise we need to resort to a system call. */
LRU_CLOCK(void)78 unsigned int LRU_CLOCK(void) {
79 unsigned int lruclock;
80 if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
81 atomicGet(server.lruclock,lruclock);
82 } else {
83 lruclock = getLRUClock();
84 }
85 return lruclock;
86 }
87
88 /* Given an object returns the min number of milliseconds the object was never
89 * requested, using an approximated LRU algorithm. */
estimateObjectIdleTime(robj * o)90 unsigned long long estimateObjectIdleTime(robj *o) {
91 unsigned long long lruclock = LRU_CLOCK();
92 if (lruclock >= o->lru) {
93 return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
94 } else {
95 return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
96 LRU_CLOCK_RESOLUTION;
97 }
98 }
99
100 /* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
101 * file to limit the max memory used by the server, before processing a
102 * command.
103 *
104 * The goal of the function is to free enough memory to keep Redis under the
105 * configured memory limit.
106 *
107 * The function starts calculating how many bytes should be freed to keep
108 * Redis under the limit, and enters a loop selecting the best keys to
109 * evict accordingly to the configured policy.
110 *
111 * If all the bytes needed to return back under the limit were freed the
112 * function returns C_OK, otherwise C_ERR is returned, and the caller
113 * should block the execution of commands that will result in more memory
114 * used by the server.
115 *
116 * ------------------------------------------------------------------------
117 *
118 * LRU approximation algorithm
119 *
120 * Redis uses an approximation of the LRU algorithm that runs in constant
121 * memory. Every time there is a key to expire, we sample N keys (with
122 * N very small, usually in around 5) to populate a pool of best keys to
123 * evict of M keys (the pool size is defined by EVPOOL_SIZE).
124 *
125 * The N keys sampled are added in the pool of good keys to expire (the one
126 * with an old access time) if they are better than one of the current keys
127 * in the pool.
128 *
129 * After the pool is populated, the best key we have in the pool is expired.
130 * However note that we don't remove keys from the pool when they are deleted
131 * so the pool may contain keys that no longer exist.
132 *
133 * When we try to evict a key, and all the entries in the pool don't exist
134 * we populate it again. This time we'll be sure that the pool has at least
135 * one key that can be evicted, if there is at least one key that can be
136 * evicted in the whole database. */
137
138 /* Create a new eviction pool. */
evictionPoolAlloc(void)139 void evictionPoolAlloc(void) {
140 struct evictionPoolEntry *ep;
141 int j;
142
143 ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
144 for (j = 0; j < EVPOOL_SIZE; j++) {
145 ep[j].idle = 0;
146 ep[j].key = NULL;
147 ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
148 ep[j].dbid = 0;
149 }
150 EvictionPoolLRU = ep;
151 }
152
153 /* This is an helper function for freeMemoryIfNeeded(), it is used in order
154 * to populate the evictionPool with a few entries every time we want to
155 * expire a key. Keys with idle time smaller than one of the current
156 * keys are added. Keys are always added if there are free entries.
157 *
158 * We insert keys on place in ascending order, so keys with the smaller
159 * idle time are on the left, and keys with the higher idle time on the
160 * right. */
161
evictionPoolPopulate(int dbid,dict * sampledict,dict * keydict,struct evictionPoolEntry * pool)162 void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
163 int j, k, count;
164 dictEntry *samples[server.maxmemory_samples];
165
166 count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
167 for (j = 0; j < count; j++) {
168 unsigned long long idle;
169 sds key;
170 robj *o;
171 dictEntry *de;
172
173 de = samples[j];
174 key = dictGetKey(de);
175
176 /* If the dictionary we are sampling from is not the main
177 * dictionary (but the expires one) we need to lookup the key
178 * again in the key dictionary to obtain the value object. */
179 if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
180 if (sampledict != keydict) de = dictFind(keydict, key);
181 o = dictGetVal(de);
182 }
183
184 /* Calculate the idle time according to the policy. This is called
185 * idle just because the code initially handled LRU, but is in fact
186 * just a score where an higher score means better candidate. */
187 if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
188 idle = estimateObjectIdleTime(o);
189 } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
190 /* When we use an LRU policy, we sort the keys by idle time
191 * so that we expire keys starting from greater idle time.
192 * However when the policy is an LFU one, we have a frequency
193 * estimation, and we want to evict keys with lower frequency
194 * first. So inside the pool we put objects using the inverted
195 * frequency subtracting the actual frequency to the maximum
196 * frequency of 255. */
197 idle = 255-LFUDecrAndReturn(o);
198 } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
199 /* In this case the sooner the expire the better. */
200 idle = ULLONG_MAX - (long)dictGetVal(de);
201 } else {
202 serverPanic("Unknown eviction policy in evictionPoolPopulate()");
203 }
204
205 /* Insert the element inside the pool.
206 * First, find the first empty bucket or the first populated
207 * bucket that has an idle time smaller than our idle time. */
208 k = 0;
209 while (k < EVPOOL_SIZE &&
210 pool[k].key &&
211 pool[k].idle < idle) k++;
212 if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
213 /* Can't insert if the element is < the worst element we have
214 * and there are no empty buckets. */
215 continue;
216 } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
217 /* Inserting into empty position. No setup needed before insert. */
218 } else {
219 /* Inserting in the middle. Now k points to the first element
220 * greater than the element to insert. */
221 if (pool[EVPOOL_SIZE-1].key == NULL) {
222 /* Free space on the right? Insert at k shifting
223 * all the elements from k to end to the right. */
224
225 /* Save SDS before overwriting. */
226 sds cached = pool[EVPOOL_SIZE-1].cached;
227 memmove(pool+k+1,pool+k,
228 sizeof(pool[0])*(EVPOOL_SIZE-k-1));
229 pool[k].cached = cached;
230 } else {
231 /* No free space on right? Insert at k-1 */
232 k--;
233 /* Shift all elements on the left of k (included) to the
234 * left, so we discard the element with smaller idle time. */
235 sds cached = pool[0].cached; /* Save SDS before overwriting. */
236 if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
237 memmove(pool,pool+1,sizeof(pool[0])*k);
238 pool[k].cached = cached;
239 }
240 }
241
242 /* Try to reuse the cached SDS string allocated in the pool entry,
243 * because allocating and deallocating this object is costly
244 * (according to the profiler, not my fantasy. Remember:
245 * premature optimizbla bla bla bla. */
246 int klen = sdslen(key);
247 if (klen > EVPOOL_CACHED_SDS_SIZE) {
248 pool[k].key = sdsdup(key);
249 } else {
250 memcpy(pool[k].cached,key,klen+1);
251 sdssetlen(pool[k].cached,klen);
252 pool[k].key = pool[k].cached;
253 }
254 pool[k].idle = idle;
255 pool[k].dbid = dbid;
256 }
257 }
258
259 /* ----------------------------------------------------------------------------
260 * LFU (Least Frequently Used) implementation.
261
262 * We have 24 total bits of space in each object in order to implement
263 * an LFU (Least Frequently Used) eviction policy, since we re-use the
264 * LRU field for this purpose.
265 *
266 * We split the 24 bits into two fields:
267 *
268 * 16 bits 8 bits
269 * +----------------+--------+
270 * + Last decr time | LOG_C |
271 * +----------------+--------+
272 *
273 * LOG_C is a logarithmic counter that provides an indication of the access
274 * frequency. However this field must also be decremented otherwise what used
275 * to be a frequently accessed key in the past, will remain ranked like that
276 * forever, while we want the algorithm to adapt to access pattern changes.
277 *
278 * So the remaining 16 bits are used in order to store the "decrement time",
279 * a reduced-precision Unix time (we take 16 bits of the time converted
280 * in minutes since we don't care about wrapping around) where the LOG_C
281 * counter is halved if it has an high value, or just decremented if it
282 * has a low value.
283 *
284 * New keys don't start at zero, in order to have the ability to collect
285 * some accesses before being trashed away, so they start at COUNTER_INIT_VAL.
286 * The logarithmic increment performed on LOG_C takes care of COUNTER_INIT_VAL
287 * when incrementing the key, so that keys starting at COUNTER_INIT_VAL
288 * (or having a smaller value) have a very high chance of being incremented
289 * on access.
290 *
291 * During decrement, the value of the logarithmic counter is halved if
292 * its current value is greater than two times the COUNTER_INIT_VAL, otherwise
293 * it is just decremented by one.
294 * --------------------------------------------------------------------------*/
295
296 /* Return the current time in minutes, just taking the least significant
297 * 16 bits. The returned time is suitable to be stored as LDT (last decrement
298 * time) for the LFU implementation. */
LFUGetTimeInMinutes(void)299 unsigned long LFUGetTimeInMinutes(void) {
300 return (server.unixtime/60) & 65535;
301 }
302
303 /* Given an object last access time, compute the minimum number of minutes
304 * that elapsed since the last access. Handle overflow (ldt greater than
305 * the current 16 bits minutes time) considering the time as wrapping
306 * exactly once. */
LFUTimeElapsed(unsigned long ldt)307 unsigned long LFUTimeElapsed(unsigned long ldt) {
308 unsigned long now = LFUGetTimeInMinutes();
309 if (now >= ldt) return now-ldt;
310 return 65535-ldt+now;
311 }
312
313 /* Logarithmically increment a counter. The greater is the current counter value
314 * the less likely is that it gets really implemented. Saturate it at 255. */
LFULogIncr(uint8_t counter)315 uint8_t LFULogIncr(uint8_t counter) {
316 if (counter == 255) return 255;
317 double r = (double)rand()/RAND_MAX;
318 double baseval = counter - LFU_INIT_VAL;
319 if (baseval < 0) baseval = 0;
320 double p = 1.0/(baseval*server.lfu_log_factor+1);
321 if (r < p) counter++;
322 return counter;
323 }
324
325 /* If the object decrement time is reached decrement the LFU counter but
326 * do not update LFU fields of the object, we update the access time
327 * and counter in an explicit way when the object is really accessed.
328 * And we will times halve the counter according to the times of
329 * elapsed time than server.lfu_decay_time.
330 * Return the object frequency counter.
331 *
332 * This function is used in order to scan the dataset for the best object
333 * to fit: as we check for the candidate, we incrementally decrement the
334 * counter of the scanned objects if needed. */
LFUDecrAndReturn(robj * o)335 unsigned long LFUDecrAndReturn(robj *o) {
336 unsigned long ldt = o->lru >> 8;
337 unsigned long counter = o->lru & 255;
338 unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
339 if (num_periods)
340 counter = (num_periods > counter) ? 0 : counter - num_periods;
341 return counter;
342 }
343
344 /* ----------------------------------------------------------------------------
345 * The external API for eviction: freeMemroyIfNeeded() is called by the
346 * server when there is data to add in order to make space if needed.
347 * --------------------------------------------------------------------------*/
348
349 /* We don't want to count AOF buffers and slaves output buffers as
350 * used memory: the eviction should use mostly data size. This function
351 * returns the sum of AOF and slaves buffer. */
freeMemoryGetNotCountedMemory(void)352 size_t freeMemoryGetNotCountedMemory(void) {
353 size_t overhead = 0;
354 int slaves = listLength(server.slaves);
355
356 if (slaves) {
357 listIter li;
358 listNode *ln;
359
360 listRewind(server.slaves,&li);
361 while((ln = listNext(&li))) {
362 client *slave = listNodeValue(ln);
363 overhead += getClientOutputBufferMemoryUsage(slave);
364 }
365 }
366 if (server.aof_state != AOF_OFF) {
367 overhead += sdsalloc(server.aof_buf)+aofRewriteBufferSize();
368 }
369 return overhead;
370 }
371
372 /* Get the memory status from the point of view of the maxmemory directive:
373 * if the memory used is under the maxmemory setting then C_OK is returned.
374 * Otherwise, if we are over the memory limit, the function returns
375 * C_ERR.
376 *
377 * The function may return additional info via reference, only if the
378 * pointers to the respective arguments is not NULL. Certain fields are
379 * populated only when C_ERR is returned:
380 *
381 * 'total' total amount of bytes used.
382 * (Populated both for C_ERR and C_OK)
383 *
384 * 'logical' the amount of memory used minus the slaves/AOF buffers.
385 * (Populated when C_ERR is returned)
386 *
387 * 'tofree' the amount of memory that should be released
388 * in order to return back into the memory limits.
389 * (Populated when C_ERR is returned)
390 *
391 * 'level' this usually ranges from 0 to 1, and reports the amount of
392 * memory currently used. May be > 1 if we are over the memory
393 * limit.
394 * (Populated both for C_ERR and C_OK)
395 */
getMaxmemoryState(size_t * total,size_t * logical,size_t * tofree,float * level)396 int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
397 size_t mem_reported, mem_used, mem_tofree;
398
399 /* Check if we are over the memory usage limit. If we are not, no need
400 * to subtract the slaves output buffers. We can just return ASAP. */
401 mem_reported = zmalloc_used_memory();
402 if (total) *total = mem_reported;
403
404 /* We may return ASAP if there is no need to compute the level. */
405 int return_ok_asap = !server.maxmemory || mem_reported <= server.maxmemory;
406 if (return_ok_asap && !level) return C_OK;
407
408 /* Remove the size of slaves output buffers and AOF buffer from the
409 * count of used memory. */
410 mem_used = mem_reported;
411 size_t overhead = freeMemoryGetNotCountedMemory();
412 mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
413
414 /* Compute the ratio of memory usage. */
415 if (level) {
416 if (!server.maxmemory) {
417 *level = 0;
418 } else {
419 *level = (float)mem_used / (float)server.maxmemory;
420 }
421 }
422
423 if (return_ok_asap) return C_OK;
424
425 /* Check if we are still over the memory limit. */
426 if (mem_used <= server.maxmemory) return C_OK;
427
428 /* Compute how much memory we need to free. */
429 mem_tofree = mem_used - server.maxmemory;
430
431 if (logical) *logical = mem_used;
432 if (tofree) *tofree = mem_tofree;
433
434 return C_ERR;
435 }
436
437 /* This function is periodically called to see if there is memory to free
438 * according to the current "maxmemory" settings. In case we are over the
439 * memory limit, the function will try to free some memory to return back
440 * under the limit.
441 *
442 * The function returns C_OK if we are under the memory limit or if we
443 * were over the limit, but the attempt to free memory was successful.
444 * Otehrwise if we are over the memory limit, but not enough memory
445 * was freed to return back under the limit, the function returns C_ERR. */
freeMemoryIfNeeded(void)446 int freeMemoryIfNeeded(void) {
447 /* By default replicas should ignore maxmemory
448 * and just be masters exact copies. */
449 if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;
450
451 size_t mem_reported, mem_tofree, mem_freed;
452 mstime_t latency, eviction_latency;
453 long long delta;
454 int slaves = listLength(server.slaves);
455
456 /* When clients are paused the dataset should be static not just from the
457 * POV of clients not being able to write, but also from the POV of
458 * expires and evictions of keys not being performed. */
459 if (clientsArePaused()) return C_OK;
460 if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
461 return C_OK;
462
463 mem_freed = 0;
464
465 if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
466 goto cant_free; /* We need to free memory, but policy forbids. */
467
468 latencyStartMonitor(latency);
469 while (mem_freed < mem_tofree) {
470 int j, k, i, keys_freed = 0;
471 static unsigned int next_db = 0;
472 sds bestkey = NULL;
473 int bestdbid;
474 redisDb *db;
475 dict *dict;
476 dictEntry *de;
477
478 if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
479 server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
480 {
481 struct evictionPoolEntry *pool = EvictionPoolLRU;
482
483 while(bestkey == NULL) {
484 unsigned long total_keys = 0, keys;
485
486 /* We don't want to make local-db choices when expiring keys,
487 * so to start populate the eviction pool sampling keys from
488 * every DB. */
489 for (i = 0; i < server.dbnum; i++) {
490 db = server.db+i;
491 dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
492 db->dict : db->expires;
493 if ((keys = dictSize(dict)) != 0) {
494 evictionPoolPopulate(i, dict, db->dict, pool);
495 total_keys += keys;
496 }
497 }
498 if (!total_keys) break; /* No keys to evict. */
499
500 /* Go backward from best to worst element to evict. */
501 for (k = EVPOOL_SIZE-1; k >= 0; k--) {
502 if (pool[k].key == NULL) continue;
503 bestdbid = pool[k].dbid;
504
505 if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
506 de = dictFind(server.db[pool[k].dbid].dict,
507 pool[k].key);
508 } else {
509 de = dictFind(server.db[pool[k].dbid].expires,
510 pool[k].key);
511 }
512
513 /* Remove the entry from the pool. */
514 if (pool[k].key != pool[k].cached)
515 sdsfree(pool[k].key);
516 pool[k].key = NULL;
517 pool[k].idle = 0;
518
519 /* If the key exists, is our pick. Otherwise it is
520 * a ghost and we need to try the next element. */
521 if (de) {
522 bestkey = dictGetKey(de);
523 break;
524 } else {
525 /* Ghost... Iterate again. */
526 }
527 }
528 }
529 }
530
531 /* volatile-random and allkeys-random policy */
532 else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
533 server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
534 {
535 /* When evicting a random key, we try to evict a key for
536 * each DB, so we use the static 'next_db' variable to
537 * incrementally visit all DBs. */
538 for (i = 0; i < server.dbnum; i++) {
539 j = (++next_db) % server.dbnum;
540 db = server.db+j;
541 dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
542 db->dict : db->expires;
543 if (dictSize(dict) != 0) {
544 de = dictGetRandomKey(dict);
545 bestkey = dictGetKey(de);
546 bestdbid = j;
547 break;
548 }
549 }
550 }
551
552 /* Finally remove the selected key. */
553 if (bestkey) {
554 db = server.db+bestdbid;
555 robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
556 propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
557 /* We compute the amount of memory freed by db*Delete() alone.
558 * It is possible that actually the memory needed to propagate
559 * the DEL in AOF and replication link is greater than the one
560 * we are freeing removing the key, but we can't account for
561 * that otherwise we would never exit the loop.
562 *
563 * AOF and Output buffer memory will be freed eventually so
564 * we only care about memory used by the key space. */
565 delta = (long long) zmalloc_used_memory();
566 latencyStartMonitor(eviction_latency);
567 if (server.lazyfree_lazy_eviction)
568 dbAsyncDelete(db,keyobj);
569 else
570 dbSyncDelete(db,keyobj);
571 latencyEndMonitor(eviction_latency);
572 latencyAddSampleIfNeeded("eviction-del",eviction_latency);
573 latencyRemoveNestedEvent(latency,eviction_latency);
574 delta -= (long long) zmalloc_used_memory();
575 mem_freed += delta;
576 server.stat_evictedkeys++;
577 notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
578 keyobj, db->id);
579 decrRefCount(keyobj);
580 keys_freed++;
581
582 /* When the memory to free starts to be big enough, we may
583 * start spending so much time here that is impossible to
584 * deliver data to the slaves fast enough, so we force the
585 * transmission here inside the loop. */
586 if (slaves) flushSlavesOutputBuffers();
587
588 /* Normally our stop condition is the ability to release
589 * a fixed, pre-computed amount of memory. However when we
590 * are deleting objects in another thread, it's better to
591 * check, from time to time, if we already reached our target
592 * memory, since the "mem_freed" amount is computed only
593 * across the dbAsyncDelete() call, while the thread can
594 * release the memory all the time. */
595 if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
596 if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
597 /* Let's satisfy our stop condition. */
598 mem_freed = mem_tofree;
599 }
600 }
601 }
602
603 if (!keys_freed) {
604 latencyEndMonitor(latency);
605 latencyAddSampleIfNeeded("eviction-cycle",latency);
606 goto cant_free; /* nothing to free... */
607 }
608 }
609 latencyEndMonitor(latency);
610 latencyAddSampleIfNeeded("eviction-cycle",latency);
611 return C_OK;
612
613 cant_free:
614 /* We are here if we are not able to reclaim memory. There is only one
615 * last thing we can try: check if the lazyfree thread has jobs in queue
616 * and wait... */
617 while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
618 if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
619 break;
620 usleep(1000);
621 }
622 return C_ERR;
623 }
624
625 /* This is a wrapper for freeMemoryIfNeeded() that only really calls the
626 * function if right now there are the conditions to do so safely:
627 *
628 * - There must be no script in timeout condition.
629 * - Nor we are loading data right now.
630 *
631 */
freeMemoryIfNeededAndSafe(void)632 int freeMemoryIfNeededAndSafe(void) {
633 if (server.lua_timedout || server.loading) return C_OK;
634 return freeMemoryIfNeeded();
635 }
636