xref: /f-stack/app/redis-5.0.5/src/evict.c (revision 572c4311)
1 /* Maxmemory directive handling (LRU eviction and other policies).
2  *
3  * ----------------------------------------------------------------------------
4  *
5  * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  *   * Redistributions of source code must retain the above copyright notice,
12  *     this list of conditions and the following disclaimer.
13  *   * Redistributions in binary form must reproduce the above copyright
14  *     notice, this list of conditions and the following disclaimer in the
15  *     documentation and/or other materials provided with the distribution.
16  *   * Neither the name of Redis nor the names of its contributors may be used
17  *     to endorse or promote products derived from this software without
18  *     specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "server.h"
34 #include "bio.h"
35 #include "atomicvar.h"
36 
37 /* ----------------------------------------------------------------------------
38  * Data structures
39  * --------------------------------------------------------------------------*/
40 
41 /* To improve the quality of the LRU approximation we take a set of keys
42  * that are good candidate for eviction across freeMemoryIfNeeded() calls.
43  *
44  * Entries inside the eviciton pool are taken ordered by idle time, putting
45  * greater idle times to the right (ascending order).
46  *
47  * When an LFU policy is used instead, a reverse frequency indication is used
48  * instead of the idle time, so that we still evict by larger value (larger
49  * inverse frequency means to evict keys with the least frequent accesses).
50  *
51  * Empty entries have the key pointer set to NULL. */
52 #define EVPOOL_SIZE 16
53 #define EVPOOL_CACHED_SDS_SIZE 255
54 struct evictionPoolEntry {
55     unsigned long long idle;    /* Object idle time (inverse frequency for LFU) */
56     sds key;                    /* Key name. */
57     sds cached;                 /* Cached SDS object for key name. */
58     int dbid;                   /* Key DB number. */
59 };
60 
61 static struct evictionPoolEntry *EvictionPoolLRU;
62 
63 /* ----------------------------------------------------------------------------
64  * Implementation of eviction, aging and LRU
65  * --------------------------------------------------------------------------*/
66 
67 /* Return the LRU clock, based on the clock resolution. This is a time
68  * in a reduced-bits format that can be used to set and check the
69  * object->lru field of redisObject structures. */
getLRUClock(void)70 unsigned int getLRUClock(void) {
71     return (mstime()/LRU_CLOCK_RESOLUTION) & LRU_CLOCK_MAX;
72 }
73 
74 /* This function is used to obtain the current LRU clock.
75  * If the current resolution is lower than the frequency we refresh the
76  * LRU clock (as it should be in production servers) we return the
77  * precomputed value, otherwise we need to resort to a system call. */
LRU_CLOCK(void)78 unsigned int LRU_CLOCK(void) {
79     unsigned int lruclock;
80     if (1000/server.hz <= LRU_CLOCK_RESOLUTION) {
81         atomicGet(server.lruclock,lruclock);
82     } else {
83         lruclock = getLRUClock();
84     }
85     return lruclock;
86 }
87 
88 /* Given an object returns the min number of milliseconds the object was never
89  * requested, using an approximated LRU algorithm. */
estimateObjectIdleTime(robj * o)90 unsigned long long estimateObjectIdleTime(robj *o) {
91     unsigned long long lruclock = LRU_CLOCK();
92     if (lruclock >= o->lru) {
93         return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
94     } else {
95         return (lruclock + (LRU_CLOCK_MAX - o->lru)) *
96                     LRU_CLOCK_RESOLUTION;
97     }
98 }
99 
100 /* freeMemoryIfNeeded() gets called when 'maxmemory' is set on the config
101  * file to limit the max memory used by the server, before processing a
102  * command.
103  *
104  * The goal of the function is to free enough memory to keep Redis under the
105  * configured memory limit.
106  *
107  * The function starts calculating how many bytes should be freed to keep
108  * Redis under the limit, and enters a loop selecting the best keys to
109  * evict accordingly to the configured policy.
110  *
111  * If all the bytes needed to return back under the limit were freed the
112  * function returns C_OK, otherwise C_ERR is returned, and the caller
113  * should block the execution of commands that will result in more memory
114  * used by the server.
115  *
116  * ------------------------------------------------------------------------
117  *
118  * LRU approximation algorithm
119  *
120  * Redis uses an approximation of the LRU algorithm that runs in constant
121  * memory. Every time there is a key to expire, we sample N keys (with
122  * N very small, usually in around 5) to populate a pool of best keys to
123  * evict of M keys (the pool size is defined by EVPOOL_SIZE).
124  *
125  * The N keys sampled are added in the pool of good keys to expire (the one
126  * with an old access time) if they are better than one of the current keys
127  * in the pool.
128  *
129  * After the pool is populated, the best key we have in the pool is expired.
130  * However note that we don't remove keys from the pool when they are deleted
131  * so the pool may contain keys that no longer exist.
132  *
133  * When we try to evict a key, and all the entries in the pool don't exist
134  * we populate it again. This time we'll be sure that the pool has at least
135  * one key that can be evicted, if there is at least one key that can be
136  * evicted in the whole database. */
137 
138 /* Create a new eviction pool. */
evictionPoolAlloc(void)139 void evictionPoolAlloc(void) {
140     struct evictionPoolEntry *ep;
141     int j;
142 
143     ep = zmalloc(sizeof(*ep)*EVPOOL_SIZE);
144     for (j = 0; j < EVPOOL_SIZE; j++) {
145         ep[j].idle = 0;
146         ep[j].key = NULL;
147         ep[j].cached = sdsnewlen(NULL,EVPOOL_CACHED_SDS_SIZE);
148         ep[j].dbid = 0;
149     }
150     EvictionPoolLRU = ep;
151 }
152 
153 /* This is an helper function for freeMemoryIfNeeded(), it is used in order
154  * to populate the evictionPool with a few entries every time we want to
155  * expire a key. Keys with idle time smaller than one of the current
156  * keys are added. Keys are always added if there are free entries.
157  *
158  * We insert keys on place in ascending order, so keys with the smaller
159  * idle time are on the left, and keys with the higher idle time on the
160  * right. */
161 
evictionPoolPopulate(int dbid,dict * sampledict,dict * keydict,struct evictionPoolEntry * pool)162 void evictionPoolPopulate(int dbid, dict *sampledict, dict *keydict, struct evictionPoolEntry *pool) {
163     int j, k, count;
164     dictEntry *samples[server.maxmemory_samples];
165 
166     count = dictGetSomeKeys(sampledict,samples,server.maxmemory_samples);
167     for (j = 0; j < count; j++) {
168         unsigned long long idle;
169         sds key;
170         robj *o;
171         dictEntry *de;
172 
173         de = samples[j];
174         key = dictGetKey(de);
175 
176         /* If the dictionary we are sampling from is not the main
177          * dictionary (but the expires one) we need to lookup the key
178          * again in the key dictionary to obtain the value object. */
179         if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
180             if (sampledict != keydict) de = dictFind(keydict, key);
181             o = dictGetVal(de);
182         }
183 
184         /* Calculate the idle time according to the policy. This is called
185          * idle just because the code initially handled LRU, but is in fact
186          * just a score where an higher score means better candidate. */
187         if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
188             idle = estimateObjectIdleTime(o);
189         } else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
190             /* When we use an LRU policy, we sort the keys by idle time
191              * so that we expire keys starting from greater idle time.
192              * However when the policy is an LFU one, we have a frequency
193              * estimation, and we want to evict keys with lower frequency
194              * first. So inside the pool we put objects using the inverted
195              * frequency subtracting the actual frequency to the maximum
196              * frequency of 255. */
197             idle = 255-LFUDecrAndReturn(o);
198         } else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
199             /* In this case the sooner the expire the better. */
200             idle = ULLONG_MAX - (long)dictGetVal(de);
201         } else {
202             serverPanic("Unknown eviction policy in evictionPoolPopulate()");
203         }
204 
205         /* Insert the element inside the pool.
206          * First, find the first empty bucket or the first populated
207          * bucket that has an idle time smaller than our idle time. */
208         k = 0;
209         while (k < EVPOOL_SIZE &&
210                pool[k].key &&
211                pool[k].idle < idle) k++;
212         if (k == 0 && pool[EVPOOL_SIZE-1].key != NULL) {
213             /* Can't insert if the element is < the worst element we have
214              * and there are no empty buckets. */
215             continue;
216         } else if (k < EVPOOL_SIZE && pool[k].key == NULL) {
217             /* Inserting into empty position. No setup needed before insert. */
218         } else {
219             /* Inserting in the middle. Now k points to the first element
220              * greater than the element to insert.  */
221             if (pool[EVPOOL_SIZE-1].key == NULL) {
222                 /* Free space on the right? Insert at k shifting
223                  * all the elements from k to end to the right. */
224 
225                 /* Save SDS before overwriting. */
226                 sds cached = pool[EVPOOL_SIZE-1].cached;
227                 memmove(pool+k+1,pool+k,
228                     sizeof(pool[0])*(EVPOOL_SIZE-k-1));
229                 pool[k].cached = cached;
230             } else {
231                 /* No free space on right? Insert at k-1 */
232                 k--;
233                 /* Shift all elements on the left of k (included) to the
234                  * left, so we discard the element with smaller idle time. */
235                 sds cached = pool[0].cached; /* Save SDS before overwriting. */
236                 if (pool[0].key != pool[0].cached) sdsfree(pool[0].key);
237                 memmove(pool,pool+1,sizeof(pool[0])*k);
238                 pool[k].cached = cached;
239             }
240         }
241 
242         /* Try to reuse the cached SDS string allocated in the pool entry,
243          * because allocating and deallocating this object is costly
244          * (according to the profiler, not my fantasy. Remember:
245          * premature optimizbla bla bla bla. */
246         int klen = sdslen(key);
247         if (klen > EVPOOL_CACHED_SDS_SIZE) {
248             pool[k].key = sdsdup(key);
249         } else {
250             memcpy(pool[k].cached,key,klen+1);
251             sdssetlen(pool[k].cached,klen);
252             pool[k].key = pool[k].cached;
253         }
254         pool[k].idle = idle;
255         pool[k].dbid = dbid;
256     }
257 }
258 
259 /* ----------------------------------------------------------------------------
260  * LFU (Least Frequently Used) implementation.
261 
262  * We have 24 total bits of space in each object in order to implement
263  * an LFU (Least Frequently Used) eviction policy, since we re-use the
264  * LRU field for this purpose.
265  *
266  * We split the 24 bits into two fields:
267  *
268  *          16 bits      8 bits
269  *     +----------------+--------+
270  *     + Last decr time | LOG_C  |
271  *     +----------------+--------+
272  *
273  * LOG_C is a logarithmic counter that provides an indication of the access
274  * frequency. However this field must also be decremented otherwise what used
275  * to be a frequently accessed key in the past, will remain ranked like that
276  * forever, while we want the algorithm to adapt to access pattern changes.
277  *
278  * So the remaining 16 bits are used in order to store the "decrement time",
279  * a reduced-precision Unix time (we take 16 bits of the time converted
280  * in minutes since we don't care about wrapping around) where the LOG_C
281  * counter is halved if it has an high value, or just decremented if it
282  * has a low value.
283  *
284  * New keys don't start at zero, in order to have the ability to collect
285  * some accesses before being trashed away, so they start at COUNTER_INIT_VAL.
286  * The logarithmic increment performed on LOG_C takes care of COUNTER_INIT_VAL
287  * when incrementing the key, so that keys starting at COUNTER_INIT_VAL
288  * (or having a smaller value) have a very high chance of being incremented
289  * on access.
290  *
291  * During decrement, the value of the logarithmic counter is halved if
292  * its current value is greater than two times the COUNTER_INIT_VAL, otherwise
293  * it is just decremented by one.
294  * --------------------------------------------------------------------------*/
295 
296 /* Return the current time in minutes, just taking the least significant
297  * 16 bits. The returned time is suitable to be stored as LDT (last decrement
298  * time) for the LFU implementation. */
LFUGetTimeInMinutes(void)299 unsigned long LFUGetTimeInMinutes(void) {
300     return (server.unixtime/60) & 65535;
301 }
302 
303 /* Given an object last access time, compute the minimum number of minutes
304  * that elapsed since the last access. Handle overflow (ldt greater than
305  * the current 16 bits minutes time) considering the time as wrapping
306  * exactly once. */
LFUTimeElapsed(unsigned long ldt)307 unsigned long LFUTimeElapsed(unsigned long ldt) {
308     unsigned long now = LFUGetTimeInMinutes();
309     if (now >= ldt) return now-ldt;
310     return 65535-ldt+now;
311 }
312 
313 /* Logarithmically increment a counter. The greater is the current counter value
314  * the less likely is that it gets really implemented. Saturate it at 255. */
LFULogIncr(uint8_t counter)315 uint8_t LFULogIncr(uint8_t counter) {
316     if (counter == 255) return 255;
317     double r = (double)rand()/RAND_MAX;
318     double baseval = counter - LFU_INIT_VAL;
319     if (baseval < 0) baseval = 0;
320     double p = 1.0/(baseval*server.lfu_log_factor+1);
321     if (r < p) counter++;
322     return counter;
323 }
324 
325 /* If the object decrement time is reached decrement the LFU counter but
326  * do not update LFU fields of the object, we update the access time
327  * and counter in an explicit way when the object is really accessed.
328  * And we will times halve the counter according to the times of
329  * elapsed time than server.lfu_decay_time.
330  * Return the object frequency counter.
331  *
332  * This function is used in order to scan the dataset for the best object
333  * to fit: as we check for the candidate, we incrementally decrement the
334  * counter of the scanned objects if needed. */
LFUDecrAndReturn(robj * o)335 unsigned long LFUDecrAndReturn(robj *o) {
336     unsigned long ldt = o->lru >> 8;
337     unsigned long counter = o->lru & 255;
338     unsigned long num_periods = server.lfu_decay_time ? LFUTimeElapsed(ldt) / server.lfu_decay_time : 0;
339     if (num_periods)
340         counter = (num_periods > counter) ? 0 : counter - num_periods;
341     return counter;
342 }
343 
344 /* ----------------------------------------------------------------------------
345  * The external API for eviction: freeMemroyIfNeeded() is called by the
346  * server when there is data to add in order to make space if needed.
347  * --------------------------------------------------------------------------*/
348 
349 /* We don't want to count AOF buffers and slaves output buffers as
350  * used memory: the eviction should use mostly data size. This function
351  * returns the sum of AOF and slaves buffer. */
freeMemoryGetNotCountedMemory(void)352 size_t freeMemoryGetNotCountedMemory(void) {
353     size_t overhead = 0;
354     int slaves = listLength(server.slaves);
355 
356     if (slaves) {
357         listIter li;
358         listNode *ln;
359 
360         listRewind(server.slaves,&li);
361         while((ln = listNext(&li))) {
362             client *slave = listNodeValue(ln);
363             overhead += getClientOutputBufferMemoryUsage(slave);
364         }
365     }
366     if (server.aof_state != AOF_OFF) {
367         overhead += sdsalloc(server.aof_buf)+aofRewriteBufferSize();
368     }
369     return overhead;
370 }
371 
372 /* Get the memory status from the point of view of the maxmemory directive:
373  * if the memory used is under the maxmemory setting then C_OK is returned.
374  * Otherwise, if we are over the memory limit, the function returns
375  * C_ERR.
376  *
377  * The function may return additional info via reference, only if the
378  * pointers to the respective arguments is not NULL. Certain fields are
379  * populated only when C_ERR is returned:
380  *
381  *  'total'     total amount of bytes used.
382  *              (Populated both for C_ERR and C_OK)
383  *
384  *  'logical'   the amount of memory used minus the slaves/AOF buffers.
385  *              (Populated when C_ERR is returned)
386  *
387  *  'tofree'    the amount of memory that should be released
388  *              in order to return back into the memory limits.
389  *              (Populated when C_ERR is returned)
390  *
391  *  'level'     this usually ranges from 0 to 1, and reports the amount of
392  *              memory currently used. May be > 1 if we are over the memory
393  *              limit.
394  *              (Populated both for C_ERR and C_OK)
395  */
getMaxmemoryState(size_t * total,size_t * logical,size_t * tofree,float * level)396 int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level) {
397     size_t mem_reported, mem_used, mem_tofree;
398 
399     /* Check if we are over the memory usage limit. If we are not, no need
400      * to subtract the slaves output buffers. We can just return ASAP. */
401     mem_reported = zmalloc_used_memory();
402     if (total) *total = mem_reported;
403 
404     /* We may return ASAP if there is no need to compute the level. */
405     int return_ok_asap = !server.maxmemory || mem_reported <= server.maxmemory;
406     if (return_ok_asap && !level) return C_OK;
407 
408     /* Remove the size of slaves output buffers and AOF buffer from the
409      * count of used memory. */
410     mem_used = mem_reported;
411     size_t overhead = freeMemoryGetNotCountedMemory();
412     mem_used = (mem_used > overhead) ? mem_used-overhead : 0;
413 
414     /* Compute the ratio of memory usage. */
415     if (level) {
416         if (!server.maxmemory) {
417             *level = 0;
418         } else {
419             *level = (float)mem_used / (float)server.maxmemory;
420         }
421     }
422 
423     if (return_ok_asap) return C_OK;
424 
425     /* Check if we are still over the memory limit. */
426     if (mem_used <= server.maxmemory) return C_OK;
427 
428     /* Compute how much memory we need to free. */
429     mem_tofree = mem_used - server.maxmemory;
430 
431     if (logical) *logical = mem_used;
432     if (tofree) *tofree = mem_tofree;
433 
434     return C_ERR;
435 }
436 
437 /* This function is periodically called to see if there is memory to free
438  * according to the current "maxmemory" settings. In case we are over the
439  * memory limit, the function will try to free some memory to return back
440  * under the limit.
441  *
442  * The function returns C_OK if we are under the memory limit or if we
443  * were over the limit, but the attempt to free memory was successful.
444  * Otehrwise if we are over the memory limit, but not enough memory
445  * was freed to return back under the limit, the function returns C_ERR. */
freeMemoryIfNeeded(void)446 int freeMemoryIfNeeded(void) {
447     /* By default replicas should ignore maxmemory
448      * and just be masters exact copies. */
449     if (server.masterhost && server.repl_slave_ignore_maxmemory) return C_OK;
450 
451     size_t mem_reported, mem_tofree, mem_freed;
452     mstime_t latency, eviction_latency;
453     long long delta;
454     int slaves = listLength(server.slaves);
455 
456     /* When clients are paused the dataset should be static not just from the
457      * POV of clients not being able to write, but also from the POV of
458      * expires and evictions of keys not being performed. */
459     if (clientsArePaused()) return C_OK;
460     if (getMaxmemoryState(&mem_reported,NULL,&mem_tofree,NULL) == C_OK)
461         return C_OK;
462 
463     mem_freed = 0;
464 
465     if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION)
466         goto cant_free; /* We need to free memory, but policy forbids. */
467 
468     latencyStartMonitor(latency);
469     while (mem_freed < mem_tofree) {
470         int j, k, i, keys_freed = 0;
471         static unsigned int next_db = 0;
472         sds bestkey = NULL;
473         int bestdbid;
474         redisDb *db;
475         dict *dict;
476         dictEntry *de;
477 
478         if (server.maxmemory_policy & (MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU) ||
479             server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL)
480         {
481             struct evictionPoolEntry *pool = EvictionPoolLRU;
482 
483             while(bestkey == NULL) {
484                 unsigned long total_keys = 0, keys;
485 
486                 /* We don't want to make local-db choices when expiring keys,
487                  * so to start populate the eviction pool sampling keys from
488                  * every DB. */
489                 for (i = 0; i < server.dbnum; i++) {
490                     db = server.db+i;
491                     dict = (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) ?
492                             db->dict : db->expires;
493                     if ((keys = dictSize(dict)) != 0) {
494                         evictionPoolPopulate(i, dict, db->dict, pool);
495                         total_keys += keys;
496                     }
497                 }
498                 if (!total_keys) break; /* No keys to evict. */
499 
500                 /* Go backward from best to worst element to evict. */
501                 for (k = EVPOOL_SIZE-1; k >= 0; k--) {
502                     if (pool[k].key == NULL) continue;
503                     bestdbid = pool[k].dbid;
504 
505                     if (server.maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) {
506                         de = dictFind(server.db[pool[k].dbid].dict,
507                             pool[k].key);
508                     } else {
509                         de = dictFind(server.db[pool[k].dbid].expires,
510                             pool[k].key);
511                     }
512 
513                     /* Remove the entry from the pool. */
514                     if (pool[k].key != pool[k].cached)
515                         sdsfree(pool[k].key);
516                     pool[k].key = NULL;
517                     pool[k].idle = 0;
518 
519                     /* If the key exists, is our pick. Otherwise it is
520                      * a ghost and we need to try the next element. */
521                     if (de) {
522                         bestkey = dictGetKey(de);
523                         break;
524                     } else {
525                         /* Ghost... Iterate again. */
526                     }
527                 }
528             }
529         }
530 
531         /* volatile-random and allkeys-random policy */
532         else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
533                  server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM)
534         {
535             /* When evicting a random key, we try to evict a key for
536              * each DB, so we use the static 'next_db' variable to
537              * incrementally visit all DBs. */
538             for (i = 0; i < server.dbnum; i++) {
539                 j = (++next_db) % server.dbnum;
540                 db = server.db+j;
541                 dict = (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM) ?
542                         db->dict : db->expires;
543                 if (dictSize(dict) != 0) {
544                     de = dictGetRandomKey(dict);
545                     bestkey = dictGetKey(de);
546                     bestdbid = j;
547                     break;
548                 }
549             }
550         }
551 
552         /* Finally remove the selected key. */
553         if (bestkey) {
554             db = server.db+bestdbid;
555             robj *keyobj = createStringObject(bestkey,sdslen(bestkey));
556             propagateExpire(db,keyobj,server.lazyfree_lazy_eviction);
557             /* We compute the amount of memory freed by db*Delete() alone.
558              * It is possible that actually the memory needed to propagate
559              * the DEL in AOF and replication link is greater than the one
560              * we are freeing removing the key, but we can't account for
561              * that otherwise we would never exit the loop.
562              *
563              * AOF and Output buffer memory will be freed eventually so
564              * we only care about memory used by the key space. */
565             delta = (long long) zmalloc_used_memory();
566             latencyStartMonitor(eviction_latency);
567             if (server.lazyfree_lazy_eviction)
568                 dbAsyncDelete(db,keyobj);
569             else
570                 dbSyncDelete(db,keyobj);
571             latencyEndMonitor(eviction_latency);
572             latencyAddSampleIfNeeded("eviction-del",eviction_latency);
573             latencyRemoveNestedEvent(latency,eviction_latency);
574             delta -= (long long) zmalloc_used_memory();
575             mem_freed += delta;
576             server.stat_evictedkeys++;
577             notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted",
578                 keyobj, db->id);
579             decrRefCount(keyobj);
580             keys_freed++;
581 
582             /* When the memory to free starts to be big enough, we may
583              * start spending so much time here that is impossible to
584              * deliver data to the slaves fast enough, so we force the
585              * transmission here inside the loop. */
586             if (slaves) flushSlavesOutputBuffers();
587 
588             /* Normally our stop condition is the ability to release
589              * a fixed, pre-computed amount of memory. However when we
590              * are deleting objects in another thread, it's better to
591              * check, from time to time, if we already reached our target
592              * memory, since the "mem_freed" amount is computed only
593              * across the dbAsyncDelete() call, while the thread can
594              * release the memory all the time. */
595             if (server.lazyfree_lazy_eviction && !(keys_freed % 16)) {
596                 if (getMaxmemoryState(NULL,NULL,NULL,NULL) == C_OK) {
597                     /* Let's satisfy our stop condition. */
598                     mem_freed = mem_tofree;
599                 }
600             }
601         }
602 
603         if (!keys_freed) {
604             latencyEndMonitor(latency);
605             latencyAddSampleIfNeeded("eviction-cycle",latency);
606             goto cant_free; /* nothing to free... */
607         }
608     }
609     latencyEndMonitor(latency);
610     latencyAddSampleIfNeeded("eviction-cycle",latency);
611     return C_OK;
612 
613 cant_free:
614     /* We are here if we are not able to reclaim memory. There is only one
615      * last thing we can try: check if the lazyfree thread has jobs in queue
616      * and wait... */
617     while(bioPendingJobsOfType(BIO_LAZY_FREE)) {
618         if (((mem_reported - zmalloc_used_memory()) + mem_freed) >= mem_tofree)
619             break;
620         usleep(1000);
621     }
622     return C_ERR;
623 }
624 
625 /* This is a wrapper for freeMemoryIfNeeded() that only really calls the
626  * function if right now there are the conditions to do so safely:
627  *
628  * - There must be no script in timeout condition.
629  * - Nor we are loading data right now.
630  *
631  */
freeMemoryIfNeededAndSafe(void)632 int freeMemoryIfNeededAndSafe(void) {
633     if (server.lua_timedout || server.loading) return C_OK;
634     return freeMemoryIfNeeded();
635 }
636