1 /* Implementation of EXPIRE (keys with fixed time to live).
2 *
3 * ----------------------------------------------------------------------------
4 *
5 * Copyright (c) 2009-2016, Salvatore Sanfilippo <antirez at gmail dot com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * * Redistributions of source code must retain the above copyright notice,
12 * this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * * Neither the name of Redis nor the names of its contributors may be used
17 * to endorse or promote products derived from this software without
18 * specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #include "server.h"
34
35 /*-----------------------------------------------------------------------------
36 * Incremental collection of expired keys.
37 *
38 * When keys are accessed they are expired on-access. However we need a
39 * mechanism in order to ensure keys are eventually removed when expired even
40 * if no access is performed on them.
41 *----------------------------------------------------------------------------*/
42
43 /* Helper function for the activeExpireCycle() function.
44 * This function will try to expire the key that is stored in the hash table
45 * entry 'de' of the 'expires' hash table of a Redis database.
46 *
47 * If the key is found to be expired, it is removed from the database and
48 * 1 is returned. Otherwise no operation is performed and 0 is returned.
49 *
50 * When a key is expired, server.stat_expiredkeys is incremented.
51 *
52 * The parameter 'now' is the current time in milliseconds as is passed
53 * to the function to avoid too many gettimeofday() syscalls. */
activeExpireCycleTryExpire(redisDb * db,dictEntry * de,long long now)54 int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
55 long long t = dictGetSignedIntegerVal(de);
56 if (now > t) {
57 sds key = dictGetKey(de);
58 robj *keyobj = createStringObject(key,sdslen(key));
59
60 propagateExpire(db,keyobj,server.lazyfree_lazy_expire);
61 if (server.lazyfree_lazy_expire)
62 dbAsyncDelete(db,keyobj);
63 else
64 dbSyncDelete(db,keyobj);
65 notifyKeyspaceEvent(NOTIFY_EXPIRED,
66 "expired",keyobj,db->id);
67 decrRefCount(keyobj);
68 server.stat_expiredkeys++;
69 return 1;
70 } else {
71 return 0;
72 }
73 }
74
75 /* Try to expire a few timed out keys. The algorithm used is adaptive and
76 * will use few CPU cycles if there are few expiring keys, otherwise
77 * it will get more aggressive to avoid that too much memory is used by
78 * keys that can be removed from the keyspace.
79 *
80 * No more than CRON_DBS_PER_CALL databases are tested at every
81 * iteration.
82 *
83 * This kind of call is used when Redis detects that timelimit_exit is
84 * true, so there is more work to do, and we do it more incrementally from
85 * the beforeSleep() function of the event loop.
86 *
87 * Expire cycle type:
88 *
89 * If type is ACTIVE_EXPIRE_CYCLE_FAST the function will try to run a
90 * "fast" expire cycle that takes no longer than EXPIRE_FAST_CYCLE_DURATION
91 * microseconds, and is not repeated again before the same amount of time.
92 *
93 * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is
94 * executed, where the time limit is a percentage of the REDIS_HZ period
95 * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */
96
activeExpireCycle(int type)97 void activeExpireCycle(int type) {
98 /* This function has some global state in order to continue the work
99 * incrementally across calls. */
100 static unsigned int current_db = 0; /* Last DB tested. */
101 static int timelimit_exit = 0; /* Time limit hit in previous call? */
102 static long long last_fast_cycle = 0; /* When last fast cycle ran. */
103
104 int j, iteration = 0;
105 int dbs_per_call = CRON_DBS_PER_CALL;
106 long long start = ustime(), timelimit, elapsed;
107
108 /* When clients are paused the dataset should be static not just from the
109 * POV of clients not being able to write, but also from the POV of
110 * expires and evictions of keys not being performed. */
111 if (clientsArePaused()) return;
112
113 if (type == ACTIVE_EXPIRE_CYCLE_FAST) {
114 /* Don't start a fast cycle if the previous cycle did not exit
115 * for time limit. Also don't repeat a fast cycle for the same period
116 * as the fast cycle total duration itself. */
117 if (!timelimit_exit) return;
118 if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return;
119 last_fast_cycle = start;
120 }
121
122 /* We usually should test CRON_DBS_PER_CALL per iteration, with
123 * two exceptions:
124 *
125 * 1) Don't test more DBs than we have.
126 * 2) If last time we hit the time limit, we want to scan all DBs
127 * in this iteration, as there is work to do in some DB and we don't want
128 * expired keys to use memory for too much time. */
129 if (dbs_per_call > server.dbnum || timelimit_exit)
130 dbs_per_call = server.dbnum;
131
132 /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time
133 * per iteration. Since this function gets called with a frequency of
134 * server.hz times per second, the following is the max amount of
135 * microseconds we can spend in this function. */
136 timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/server.hz/100;
137 timelimit_exit = 0;
138 if (timelimit <= 0) timelimit = 1;
139
140 if (type == ACTIVE_EXPIRE_CYCLE_FAST)
141 timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */
142
143 /* Accumulate some global stats as we expire keys, to have some idea
144 * about the number of keys that are already logically expired, but still
145 * existing inside the database. */
146 long total_sampled = 0;
147 long total_expired = 0;
148
149 for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) {
150 int expired;
151 redisDb *db = server.db+(current_db % server.dbnum);
152
153 /* Increment the DB now so we are sure if we run out of time
154 * in the current DB we'll restart from the next. This allows to
155 * distribute the time evenly across DBs. */
156 current_db++;
157
158 /* Continue to expire if at the end of the cycle more than 25%
159 * of the keys were expired. */
160 do {
161 unsigned long num, slots;
162 long long now, ttl_sum;
163 int ttl_samples;
164 iteration++;
165
166 /* If there is nothing to expire try next DB ASAP. */
167 if ((num = dictSize(db->expires)) == 0) {
168 db->avg_ttl = 0;
169 break;
170 }
171 slots = dictSlots(db->expires);
172 now = mstime();
173
174 /* When there are less than 1% filled slots getting random
175 * keys is expensive, so stop here waiting for better times...
176 * The dictionary will be resized asap. */
177 if (num && slots > DICT_HT_INITIAL_SIZE &&
178 (num*100/slots < 1)) break;
179
180 /* The main collection cycle. Sample random keys among keys
181 * with an expire set, checking for expired ones. */
182 expired = 0;
183 ttl_sum = 0;
184 ttl_samples = 0;
185
186 if (num > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP)
187 num = ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP;
188
189 while (num--) {
190 dictEntry *de;
191 long long ttl;
192
193 if ((de = dictGetRandomKey(db->expires)) == NULL) break;
194 ttl = dictGetSignedIntegerVal(de)-now;
195 if (activeExpireCycleTryExpire(db,de,now)) expired++;
196 if (ttl > 0) {
197 /* We want the average TTL of keys yet not expired. */
198 ttl_sum += ttl;
199 ttl_samples++;
200 }
201 total_sampled++;
202 }
203 total_expired += expired;
204
205 /* Update the average TTL stats for this database. */
206 if (ttl_samples) {
207 long long avg_ttl = ttl_sum/ttl_samples;
208
209 /* Do a simple running average with a few samples.
210 * We just use the current estimate with a weight of 2%
211 * and the previous estimate with a weight of 98%. */
212 if (db->avg_ttl == 0) db->avg_ttl = avg_ttl;
213 db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50);
214 }
215
216 /* We can't block forever here even if there are many keys to
217 * expire. So after a given amount of milliseconds return to the
218 * caller waiting for the other active expire cycle. */
219 if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */
220 elapsed = ustime()-start;
221 if (elapsed > timelimit) {
222 timelimit_exit = 1;
223 server.stat_expired_time_cap_reached_count++;
224 break;
225 }
226 }
227 /* We don't repeat the cycle if there are less than 25% of keys
228 * found expired in the current DB. */
229 } while (expired > ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP/4);
230 }
231
232 elapsed = ustime()-start;
233 latencyAddSampleIfNeeded("expire-cycle",elapsed/1000);
234
235 /* Update our estimate of keys existing but yet to be expired.
236 * Running average with this sample accounting for 5%. */
237 double current_perc;
238 if (total_sampled) {
239 current_perc = (double)total_expired/total_sampled;
240 } else
241 current_perc = 0;
242 server.stat_expired_stale_perc = (current_perc*0.05)+
243 (server.stat_expired_stale_perc*0.95);
244 }
245
246 /*-----------------------------------------------------------------------------
247 * Expires of keys created in writable slaves
248 *
249 * Normally slaves do not process expires: they wait the masters to synthesize
250 * DEL operations in order to retain consistency. However writable slaves are
251 * an exception: if a key is created in the slave and an expire is assigned
252 * to it, we need a way to expire such a key, since the master does not know
253 * anything about such a key.
254 *
255 * In order to do so, we track keys created in the slave side with an expire
256 * set, and call the expireSlaveKeys() function from time to time in order to
257 * reclaim the keys if they already expired.
258 *
259 * Note that the use case we are trying to cover here, is a popular one where
260 * slaves are put in writable mode in order to compute slow operations in
261 * the slave side that are mostly useful to actually read data in a more
262 * processed way. Think at sets intersections in a tmp key, with an expire so
263 * that it is also used as a cache to avoid intersecting every time.
264 *
265 * This implementation is currently not perfect but a lot better than leaking
266 * the keys as implemented in 3.2.
267 *----------------------------------------------------------------------------*/
268
269 /* The dictionary where we remember key names and database ID of keys we may
270 * want to expire from the slave. Since this function is not often used we
271 * don't even care to initialize the database at startup. We'll do it once
272 * the feature is used the first time, that is, when rememberSlaveKeyWithExpire()
273 * is called.
274 *
275 * The dictionary has an SDS string representing the key as the hash table
276 * key, while the value is a 64 bit unsigned integer with the bits corresponding
277 * to the DB where the keys may exist set to 1. Currently the keys created
278 * with a DB id > 63 are not expired, but a trivial fix is to set the bitmap
279 * to the max 64 bit unsigned value when we know there is a key with a DB
280 * ID greater than 63, and check all the configured DBs in such a case. */
281 dict *slaveKeysWithExpire = NULL;
282
283 /* Check the set of keys created by the master with an expire set in order to
284 * check if they should be evicted. */
expireSlaveKeys(void)285 void expireSlaveKeys(void) {
286 if (slaveKeysWithExpire == NULL ||
287 dictSize(slaveKeysWithExpire) == 0) return;
288
289 int cycles = 0, noexpire = 0;
290 mstime_t start = mstime();
291 while(1) {
292 dictEntry *de = dictGetRandomKey(slaveKeysWithExpire);
293 sds keyname = dictGetKey(de);
294 uint64_t dbids = dictGetUnsignedIntegerVal(de);
295 uint64_t new_dbids = 0;
296
297 /* Check the key against every database corresponding to the
298 * bits set in the value bitmap. */
299 int dbid = 0;
300 while(dbids && dbid < server.dbnum) {
301 if ((dbids & 1) != 0) {
302 redisDb *db = server.db+dbid;
303 dictEntry *expire = dictFind(db->expires,keyname);
304 int expired = 0;
305
306 if (expire &&
307 activeExpireCycleTryExpire(server.db+dbid,expire,start))
308 {
309 expired = 1;
310 }
311
312 /* If the key was not expired in this DB, we need to set the
313 * corresponding bit in the new bitmap we set as value.
314 * At the end of the loop if the bitmap is zero, it means we
315 * no longer need to keep track of this key. */
316 if (expire && !expired) {
317 noexpire++;
318 new_dbids |= (uint64_t)1 << dbid;
319 }
320 }
321 dbid++;
322 dbids >>= 1;
323 }
324
325 /* Set the new bitmap as value of the key, in the dictionary
326 * of keys with an expire set directly in the writable slave. Otherwise
327 * if the bitmap is zero, we no longer need to keep track of it. */
328 if (new_dbids)
329 dictSetUnsignedIntegerVal(de,new_dbids);
330 else
331 dictDelete(slaveKeysWithExpire,keyname);
332
333 /* Stop conditions: found 3 keys we cna't expire in a row or
334 * time limit was reached. */
335 cycles++;
336 if (noexpire > 3) break;
337 if ((cycles % 64) == 0 && mstime()-start > 1) break;
338 if (dictSize(slaveKeysWithExpire) == 0) break;
339 }
340 }
341
342 /* Track keys that received an EXPIRE or similar command in the context
343 * of a writable slave. */
rememberSlaveKeyWithExpire(redisDb * db,robj * key)344 void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
345 if (slaveKeysWithExpire == NULL) {
346 static dictType dt = {
347 dictSdsHash, /* hash function */
348 NULL, /* key dup */
349 NULL, /* val dup */
350 dictSdsKeyCompare, /* key compare */
351 dictSdsDestructor, /* key destructor */
352 NULL /* val destructor */
353 };
354 slaveKeysWithExpire = dictCreate(&dt,NULL);
355 }
356 if (db->id > 63) return;
357
358 dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr);
359 /* If the entry was just created, set it to a copy of the SDS string
360 * representing the key: we don't want to need to take those keys
361 * in sync with the main DB. The keys will be removed by expireSlaveKeys()
362 * as it scans to find keys to remove. */
363 if (de->key == key->ptr) {
364 de->key = sdsdup(key->ptr);
365 dictSetUnsignedIntegerVal(de,0);
366 }
367
368 uint64_t dbids = dictGetUnsignedIntegerVal(de);
369 dbids |= (uint64_t)1 << db->id;
370 dictSetUnsignedIntegerVal(de,dbids);
371 }
372
373 /* Return the number of keys we are tracking. */
getSlaveKeyWithExpireCount(void)374 size_t getSlaveKeyWithExpireCount(void) {
375 if (slaveKeysWithExpire == NULL) return 0;
376 return dictSize(slaveKeysWithExpire);
377 }
378
379 /* Remove the keys in the hash table. We need to do that when data is
380 * flushed from the server. We may receive new keys from the master with
381 * the same name/db and it is no longer a good idea to expire them.
382 *
383 * Note: technically we should handle the case of a single DB being flushed
384 * but it is not worth it since anyway race conditions using the same set
385 * of key names in a wriatable slave and in its master will lead to
386 * inconsistencies. This is just a best-effort thing we do. */
flushSlaveKeysWithExpireList(void)387 void flushSlaveKeysWithExpireList(void) {
388 if (slaveKeysWithExpire) {
389 dictRelease(slaveKeysWithExpire);
390 slaveKeysWithExpire = NULL;
391 }
392 }
393
394 /*-----------------------------------------------------------------------------
395 * Expires Commands
396 *----------------------------------------------------------------------------*/
397
398 /* This is the generic command implementation for EXPIRE, PEXPIRE, EXPIREAT
399 * and PEXPIREAT. Because the commad second argument may be relative or absolute
400 * the "basetime" argument is used to signal what the base time is (either 0
401 * for *AT variants of the command, or the current time for relative expires).
402 *
403 * unit is either UNIT_SECONDS or UNIT_MILLISECONDS, and is only used for
404 * the argv[2] parameter. The basetime is always specified in milliseconds. */
expireGenericCommand(client * c,long long basetime,int unit)405 void expireGenericCommand(client *c, long long basetime, int unit) {
406 robj *key = c->argv[1], *param = c->argv[2];
407 long long when; /* unix time in milliseconds when the key will expire. */
408
409 if (getLongLongFromObjectOrReply(c, param, &when, NULL) != C_OK)
410 return;
411
412 if (unit == UNIT_SECONDS) when *= 1000;
413 when += basetime;
414
415 /* No key, return zero. */
416 if (lookupKeyWrite(c->db,key) == NULL) {
417 addReply(c,shared.czero);
418 return;
419 }
420
421 /* EXPIRE with negative TTL, or EXPIREAT with a timestamp into the past
422 * should never be executed as a DEL when load the AOF or in the context
423 * of a slave instance.
424 *
425 * Instead we take the other branch of the IF statement setting an expire
426 * (possibly in the past) and wait for an explicit DEL from the master. */
427 if (when <= mstime() && !server.loading && !server.masterhost) {
428 robj *aux;
429
430 int deleted = server.lazyfree_lazy_expire ? dbAsyncDelete(c->db,key) :
431 dbSyncDelete(c->db,key);
432 serverAssertWithInfo(c,key,deleted);
433 server.dirty++;
434
435 /* Replicate/AOF this as an explicit DEL or UNLINK. */
436 aux = server.lazyfree_lazy_expire ? shared.unlink : shared.del;
437 rewriteClientCommandVector(c,2,aux,key);
438 signalModifiedKey(c->db,key);
439 notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
440 addReply(c, shared.cone);
441 return;
442 } else {
443 setExpire(c,c->db,key,when);
444 addReply(c,shared.cone);
445 signalModifiedKey(c->db,key);
446 notifyKeyspaceEvent(NOTIFY_GENERIC,"expire",key,c->db->id);
447 server.dirty++;
448 return;
449 }
450 }
451
452 /* EXPIRE key seconds */
expireCommand(client * c)453 void expireCommand(client *c) {
454 expireGenericCommand(c,mstime(),UNIT_SECONDS);
455 }
456
457 /* EXPIREAT key time */
expireatCommand(client * c)458 void expireatCommand(client *c) {
459 expireGenericCommand(c,0,UNIT_SECONDS);
460 }
461
462 /* PEXPIRE key milliseconds */
pexpireCommand(client * c)463 void pexpireCommand(client *c) {
464 expireGenericCommand(c,mstime(),UNIT_MILLISECONDS);
465 }
466
467 /* PEXPIREAT key ms_time */
pexpireatCommand(client * c)468 void pexpireatCommand(client *c) {
469 expireGenericCommand(c,0,UNIT_MILLISECONDS);
470 }
471
472 /* Implements TTL and PTTL */
ttlGenericCommand(client * c,int output_ms)473 void ttlGenericCommand(client *c, int output_ms) {
474 long long expire, ttl = -1;
475
476 /* If the key does not exist at all, return -2 */
477 if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) {
478 addReplyLongLong(c,-2);
479 return;
480 }
481 /* The key exists. Return -1 if it has no expire, or the actual
482 * TTL value otherwise. */
483 expire = getExpire(c->db,c->argv[1]);
484 if (expire != -1) {
485 ttl = expire-mstime();
486 if (ttl < 0) ttl = 0;
487 }
488 if (ttl == -1) {
489 addReplyLongLong(c,-1);
490 } else {
491 addReplyLongLong(c,output_ms ? ttl : ((ttl+500)/1000));
492 }
493 }
494
495 /* TTL key */
ttlCommand(client * c)496 void ttlCommand(client *c) {
497 ttlGenericCommand(c, 0);
498 }
499
500 /* PTTL key */
pttlCommand(client * c)501 void pttlCommand(client *c) {
502 ttlGenericCommand(c, 1);
503 }
504
505 /* PERSIST key */
persistCommand(client * c)506 void persistCommand(client *c) {
507 if (lookupKeyWrite(c->db,c->argv[1])) {
508 if (removeExpire(c->db,c->argv[1])) {
509 addReply(c,shared.cone);
510 server.dirty++;
511 } else {
512 addReply(c,shared.czero);
513 }
514 } else {
515 addReply(c,shared.czero);
516 }
517 }
518
519 /* TOUCH key1 [key2 key3 ... keyN] */
touchCommand(client * c)520 void touchCommand(client *c) {
521 int touched = 0;
522 for (int j = 1; j < c->argc; j++)
523 if (lookupKeyRead(c->db,c->argv[j]) != NULL) touched++;
524 addReplyLongLong(c,touched);
525 }
526
527