xref: /f-stack/app/redis-5.0.5/src/expire.c (revision 572c4311)
1 /* Implementation of EXPIRE (keys with fixed time to live).
2  *
3  * ----------------------------------------------------------------------------
4  *
5  * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  *   * Redistributions of source code must retain the above copyright notice,
12  *     this list of conditions and the following disclaimer.
13  *   * Redistributions in binary form must reproduce the above copyright
14  *     notice, this list of conditions and the following disclaimer in the
15  *     documentation and/or other materials provided with the distribution.
16  *   * Neither the name of Redis nor the names of its contributors may be used
17  *     to endorse or promote products derived from this software without
18  *     specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include "server.h"
34 
35 /*-----------------------------------------------------------------------------
36  * Incremental collection of expired keys.
37  *
38  * When keys are accessed they are expired on-access. However we need a
39  * mechanism in order to ensure keys are eventually removed when expired even
40  * if no access is performed on them.
41  *----------------------------------------------------------------------------*/
42 
43 /* Helper function for the activeExpireCycle() function.
44  * This function will try to expire the key that is stored in the hash table
45  * entry 'de' of the 'expires' hash table of a Redis database.
46  *
47  * If the key is found to be expired, it is removed from the database and
48  * 1 is returned. Otherwise no operation is performed and 0 is returned.
49  *
50  * When a key is expired, server.stat_expiredkeys is incremented.
51  *
52  * The parameter 'now' is the current time in milliseconds as is passed
53  * to the function to avoid too many gettimeofday() syscalls. */
activeExpireCycleTryExpire(redisDb * db,dictEntry * de,long long now)54 int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
55     long long t = dictGetSignedIntegerVal(de);
56     if (now > t) {
57         sds key = dictGetKey(de);
58         robj *keyobj = createStringObject(key,sdslen(key));
59 
60         propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
61         if (server.lazyfree_lazy_expire)
62             dbAsyncDelete(db,keyobj);
63         else
64             dbSyncDelete(db,keyobj);
65         notifyKeyspaceEvent(NOTIFY_EXPIRED,
66             "expired",keyobj,db->id);
67         decrRefCount(keyobj);
68         server.stat_expiredkeys++;
69         return 1;
70     } else {
71         return 0;
72     }
73 }
74 
75 /* Try to expire a few timed out keys. The algorithm used is adaptive and
76  * will use few CPU cycles if there are few expiring keys, otherwise
77  * it will get more aggressive to avoid that too much memory is used by
78  * keys that can be removed from the keyspace.
79  *
80  * No more than CRON_DBS_PER_CALL databases are tested at every
81  * iteration.
82  *
83  * This kind of call is used when Redis detects that timelimit_exit is
84  * true, so there is more work to do, and we do it more incrementally from
85  * the beforeSleep() function of the event loop.
86  *
87  * Expire cycle type:
88  *
89  * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
90  * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
91  * microseconds, and is not repeated again before the same amount of time.
92  *
93  * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
94  * executed, where the time limit is a percentage of the REDIS_HZ period
95  * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */
96 
activeExpireCycle(int type)97 void activeExpireCycle(int type) {
98     /* This function has some global state in order to continue the work
99      * incrementally across calls. */
100     static unsigned int current_db = 0; /* Last DB tested. */
101     static int timelimit_exit = 0;      /* Time limit hit in previous call? */
102     static long long last_fast_cycle = 0; /* When last fast cycle ran. */
103 
104     int j, iteration = 0;
105     int dbs_per_call = CRON_DBS_PER_CALL;
106     long long start = ustime(), timelimit, elapsed;
107 
108     /* When clients are paused the dataset should be static not just from the
109      * POV of clients not being able to write, but also from the POV of
110      * expires and evictions of keys not being performed. */
111     if (clientsArePaused()) return;
112 
113     if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
114         /* Don't start a fast cycle if the previous cycle did not exit
115          * for time limit. Also don't repeat a fast cycle for the same period
116          * as the fast cycle total duration itself. */
117         if (!timelimit_exit) return;
118         if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
119         last_fast_cycle = start;
120     }
121 
122     /* We usually should test CRON_DBS_PER_CALL per iteration, with
123      * two exceptions:
124      *
125      * 1) Don't test more DBs than we have.
126      * 2) If last time we hit the time limit, we want to scan all DBs
127      * in this iteration, as there is work to do in some DB and we don't want
128      * expired keys to use memory for too much time. */
129     if (dbs_per_call > server.dbnum || timelimit_exit)
130         dbs_per_call = server.dbnum;
131 
132     /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
133      * per iteration. Since this function gets called with a frequency of
134      * server.hz times per second, the following is the max amount of
135      * microseconds we can spend in this function. */
136     timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
137     timelimit_exit = 0;
138     if (timelimit <= 0) timelimit = 1;
139 
140     if (type == ACTIVE_EXPIRE_CYCLE_FAST)
141         timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
142 
143     /* Accumulate some global stats as we expire keys, to have some idea
144      * about the number of keys that are already logically expired, but still
145      * existing inside the database. */
146     long total_sampled = 0;
147     long total_expired = 0;
148 
149     for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
150         int expired;
151         redisDb *db = server.db+(current_db % server.dbnum);
152 
153         /* Increment the DB now so we are sure if we run out of time
154          * in the current DB we'll restart from the next. This allows to
155          * distribute the time evenly across DBs. */
156         current_db++;
157 
158         /* Continue to expire if at the end of the cycle more than 25%
159          * of the keys were expired. */
160         do {
161             unsigned long num, slots;
162             long long now, ttl_sum;
163             int ttl_samples;
164             iteration++;
165 
166             /* If there is nothing to expire try next DB ASAP. */
167             if ((num = dictSize(db->expires)) == 0) {
168                 db->avg_ttl = 0;
169                 break;
170             }
171             slots = dictSlots(db->expires);
172             now = mstime();
173 
174             /* When there are less than 1% filled slots getting random
175              * keys is expensive, so stop here waiting for better times...
176              * The dictionary will be resized asap. */
177             if (num && slots > DICT_HT_INITIAL_SIZE &&
178                 (num*100/slots < 1)) break;
179 
180             /* The main collection cycle. Sample random keys among keys
181              * with an expire set, checking for expired ones. */
182             expired = 0;
183             ttl_sum = 0;
184             ttl_samples = 0;
185 
186             if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
187                 num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
188 
189             while (num--) {
190                 dictEntry *de;
191                 long long ttl;
192 
193                 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
194                 ttl = dictGetSignedIntegerVal(de)-now;
195                 if (activeExpireCycleTryExpire(db,de,now)) expired++;
196                 if (ttl > 0) {
197                     /* We want the average TTL of keys yet not expired. */
198                     ttl_sum += ttl;
199                     ttl_samples++;
200                 }
201                 total_sampled++;
202             }
203             total_expired += expired;
204 
205             /* Update the average TTL stats for this database. */
206             if (ttl_samples) {
207                 long long avg_ttl = ttl_sum/ttl_samples;
208 
209                 /* Do a simple running average with a few samples.
210                  * We just use the current estimate with a weight of 2%
211                  * and the previous estimate with a weight of 98%. */
212                 if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
213                 db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
214             }
215 
216             /* We can't block forever here even if there are many keys to
217              * expire. So after a given amount of milliseconds return to the
218              * caller waiting for the other active expire cycle. */
219             if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
220                 elapsed = ustime()-start;
221                 if (elapsed > timelimit) {
222                     timelimit_exit = 1;
223                     server.stat_expired_time_cap_reached_count++;
224                     break;
225                 }
226             }
227             /* We don't repeat the cycle if there are less than 25% of keys
228              * found expired in the current DB. */
229         } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
230     }
231 
232     elapsed = ustime()-start;
233     latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
234 
235     /* Update our estimate of keys existing but yet to be expired.
236      * Running average with this sample accounting for 5%. */
237     double current_perc;
238     if (total_sampled) {
239         current_perc = (double)total_expired/total_sampled;
240     } else
241         current_perc = 0;
242     server.stat_expired_stale_perc = (current_perc*0.05)+
243                                      (server.stat_expired_stale_perc*0.95);
244 }
245 
246 /*-----------------------------------------------------------------------------
247  * Expires of keys created in writable slaves
248  *
249  * Normally slaves do not process expires: they wait the masters to synthesize
250  * DEL operations in order to retain consistency. However writable slaves are
251  * an exception: if a key is created in the slave and an expire is assigned
252  * to it, we need a way to expire such a key, since the master does not know
253  * anything about such a key.
254  *
255  * In order to do so, we track keys created in the slave side with an expire
256  * set, and call the expireSlaveKeys() function from time to time in order to
257  * reclaim the keys if they already expired.
258  *
259  * Note that the use case we are trying to cover here, is a popular one where
260  * slaves are put in writable mode in order to compute slow operations in
261  * the slave side that are mostly useful to actually read data in a more
262  * processed way. Think at sets intersections in a tmp key, with an expire so
263  * that it is also used as a cache to avoid intersecting every time.
264  *
265  * This implementation is currently not perfect but a lot better than leaking
266  * the keys as implemented in 3.2.
267  *----------------------------------------------------------------------------*/
268 
269 /* The dictionary where we remember key names and database ID of keys we may
270  * want to expire from the slave. Since this function is not often used we
271  * don't even care to initialize the database at startup. We'll do it once
272  * the feature is used the first time, that is, when rememberSlaveKeyWithExpire()
273  * is called.
274  *
275  * The dictionary has an SDS string representing the key as the hash table
276  * key, while the value is a 64 bit unsigned integer with the bits corresponding
277  * to the DB where the keys may exist set to 1. Currently the keys created
278  * with a DB id > 63 are not expired, but a trivial fix is to set the bitmap
279  * to the max 64 bit unsigned value when we know there is a key with a DB
280  * ID greater than 63, and check all the configured DBs in such a case. */
281 dict *slaveKeysWithExpire = NULL;
282 
283 /* Check the set of keys created by the master with an expire set in order to
284  * check if they should be evicted. */
expireSlaveKeys(void)285 void expireSlaveKeys(void) {
286     if (slaveKeysWithExpire == NULL ||
287         dictSize(slaveKeysWithExpire) == 0) return;
288 
289     int cycles = 0, noexpire = 0;
290     mstime_t start = mstime();
291     while(1) {
292         dictEntry *de = dictGetRandomKey(slaveKeysWithExpire);
293         sds keyname = dictGetKey(de);
294         uint64_t dbids = dictGetUnsignedIntegerVal(de);
295         uint64_t new_dbids = 0;
296 
297         /* Check the key against every database corresponding to the
298          * bits set in the value bitmap. */
299         int dbid = 0;
300         while(dbids && dbid < server.dbnum) {
301             if ((dbids & 1) != 0) {
302                 redisDb *db = server.db+dbid;
303                 dictEntry *expire = dictFind(db->expires,keyname);
304                 int expired = 0;
305 
306                 if (expire &&
307                     activeExpireCycleTryExpire(server.db+dbid,expire,start))
308                 {
309                     expired = 1;
310                 }
311 
312                 /* If the key was not expired in this DB, we need to set the
313                  * corresponding bit in the new bitmap we set as value.
314                  * At the end of the loop if the bitmap is zero, it means we
315                  * no longer need to keep track of this key. */
316                 if (expire && !expired) {
317                     noexpire++;
318                     new_dbids |= (uint64_t)1 << dbid;
319                 }
320             }
321             dbid++;
322             dbids >>= 1;
323         }
324 
325         /* Set the new bitmap as value of the key, in the dictionary
326          * of keys with an expire set directly in the writable slave. Otherwise
327          * if the bitmap is zero, we no longer need to keep track of it. */
328         if (new_dbids)
329             dictSetUnsignedIntegerVal(de,new_dbids);
330         else
331             dictDelete(slaveKeysWithExpire,keyname);
332 
333         /* Stop conditions: found 3 keys we cna't expire in a row or
334          * time limit was reached. */
335         cycles++;
336         if (noexpire > 3) break;
337         if ((cycles % 64) == 0 && mstime()-start > 1) break;
338         if (dictSize(slaveKeysWithExpire) == 0) break;
339     }
340 }
341 
342 /* Track keys that received an EXPIRE or similar command in the context
343  * of a writable slave. */
rememberSlaveKeyWithExpire(redisDb * db,robj * key)344 void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
345     if (slaveKeysWithExpire == NULL) {
346         static dictType dt = {
347             dictSdsHash,                /* hash function */
348             NULL,                       /* key dup */
349             NULL,                       /* val dup */
350             dictSdsKeyCompare,          /* key compare */
351             dictSdsDestructor,          /* key destructor */
352             NULL                        /* val destructor */
353         };
354         slaveKeysWithExpire = dictCreate(&dt,NULL);
355     }
356     if (db->id > 63) return;
357 
358     dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr);
359     /* If the entry was just created, set it to a copy of the SDS string
360      * representing the key: we don't want to need to take those keys
361      * in sync with the main DB. The keys will be removed by expireSlaveKeys()
362      * as it scans to find keys to remove. */
363     if (de->key == key->ptr) {
364         de->key = sdsdup(key->ptr);
365         dictSetUnsignedIntegerVal(de,0);
366     }
367 
368     uint64_t dbids = dictGetUnsignedIntegerVal(de);
369     dbids |= (uint64_t)1 << db->id;
370     dictSetUnsignedIntegerVal(de,dbids);
371 }
372 
373 /* Return the number of keys we are tracking. */
getSlaveKeyWithExpireCount(void)374 size_t getSlaveKeyWithExpireCount(void) {
375     if (slaveKeysWithExpire == NULL) return 0;
376     return dictSize(slaveKeysWithExpire);
377 }
378 
379 /* Remove the keys in the hash table. We need to do that when data is
380  * flushed from the server. We may receive new keys from the master with
381  * the same name/db and it is no longer a good idea to expire them.
382  *
383  * Note: technically we should handle the case of a single DB being flushed
384  * but it is not worth it since anyway race conditions using the same set
385  * of key names in a wriatable slave and in its master will lead to
386  * inconsistencies. This is just a best-effort thing we do. */
flushSlaveKeysWithExpireList(void)387 void flushSlaveKeysWithExpireList(void) {
388     if (slaveKeysWithExpire) {
389         dictRelease(slaveKeysWithExpire);
390         slaveKeysWithExpire = NULL;
391     }
392 }
393 
394 /*-----------------------------------------------------------------------------
395  * Expires Commands
396  *----------------------------------------------------------------------------*/
397 
398 /* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
399  * and PEXPIREAT. Because the commad second argument may be relative or absolute
400  * the "basetime" argument is used to signal what the base time is (either 0
401  * for *AT variants of the command, or the current time for relative expires).
402  *
403  * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
404  * the argv[2] parameter. The basetime is always specified in milliseconds. */
expireGenericCommand(client * c,long long basetime,int unit)405 void expireGenericCommand(client *c, long long basetime, int unit) {
406     robj *key = c->argv[1], *param = c->argv[2];
407     long long when; /* unix time in milliseconds when the key will expire. */
408 
409     if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
410         return;
411 
412     if (unit == UNIT_SECONDS) when *= 1000;
413     when += basetime;
414 
415     /* No key, return zero. */
416     if (lookupKeyWrite(c->db,key) == NULL) {
417         addReply(c,shared.czero);
418         return;
419     }
420 
421     /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
422      * should never be executed as a DEL when load the AOF or in the context
423      * of a slave instance.
424      *
425      * Instead we take the other branch of the IF statement setting an expire
426      * (possibly in the past) and wait for an explicit DEL from the master. */
427     if (when <= mstime() && !server.loading && !server.masterhost) {
428         robj *aux;
429 
430         int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
431                                                     dbSyncDelete(c->db,key);
432         serverAssertWithInfo(c,key,deleted);
433         server.dirty++;
434 
435         /* Replicate/AOF this as an explicit DEL or UNLINK. */
436         aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
437         rewriteClientCommandVector(c,2,aux,key);
438         signalModifiedKey(c->db,key);
439         notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
440         addReply(c, shared.cone);
441         return;
442     } else {
443         setExpire(c,c->db,key,when);
444         addReply(c,shared.cone);
445         signalModifiedKey(c->db,key);
446         notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
447         server.dirty++;
448         return;
449     }
450 }
451 
452 /* EXPIRE key seconds */
expireCommand(client * c)453 void expireCommand(client *c) {
454     expireGenericCommand(c,mstime(),UNIT_SECONDS);
455 }
456 
457 /* EXPIREAT key time */
expireatCommand(client * c)458 void expireatCommand(client *c) {
459     expireGenericCommand(c,0,UNIT_SECONDS);
460 }
461 
462 /* PEXPIRE key milliseconds */
pexpireCommand(client * c)463 void pexpireCommand(client *c) {
464     expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
465 }
466 
467 /* PEXPIREAT key ms_time */
pexpireatCommand(client * c)468 void pexpireatCommand(client *c) {
469     expireGenericCommand(c,0,UNIT_MILLISECONDS);
470 }
471 
472 /* Implements TTL and PTTL */
ttlGenericCommand(client * c,int output_ms)473 void ttlGenericCommand(client *c, int output_ms) {
474     long long expire, ttl = -1;
475 
476     /* If the key does not exist at all, return -2 */
477     if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) {
478         addReplyLongLong(c,-2);
479         return;
480     }
481     /* The key exists. Return -1 if it has no expire, or the actual
482      * TTL value otherwise. */
483     expire = getExpire(c->db,c->argv[1]);
484     if (expire != -1) {
485         ttl = expire-mstime();
486         if (ttl < 0) ttl = 0;
487     }
488     if (ttl == -1) {
489         addReplyLongLong(c,-1);
490     } else {
491         addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
492     }
493 }
494 
495 /* TTL key */
ttlCommand(client * c)496 void ttlCommand(client *c) {
497     ttlGenericCommand(c, 0);
498 }
499 
500 /* PTTL key */
pttlCommand(client * c)501 void pttlCommand(client *c) {
502     ttlGenericCommand(c, 1);
503 }
504 
505 /* PERSIST key */
persistCommand(client * c)506 void persistCommand(client *c) {
507     if (lookupKeyWrite(c->db,c->argv[1])) {
508         if (removeExpire(c->db,c->argv[1])) {
509             addReply(c,shared.cone);
510             server.dirty++;
511         } else {
512             addReply(c,shared.czero);
513         }
514     } else {
515         addReply(c,shared.czero);
516     }
517 }
518 
519 /* TOUCH key1 [key2 key3 ... keyN] */
touchCommand(client * c)520 void touchCommand(client *c) {
521     int touched = 0;
522     for (int j = 1; j < c->argc; j++)
523         if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++;
524     addReplyLongLong(c,touched);
525 }
526 
527