xref: /f-stack/app/redis-5.0.5/src/latency.c (revision 572c4311)
1 /* The latency monitor allows to easily observe the sources of latency
2  * in a Redis instance using the LATENCY command. Different latency
3  * sources are monitored, like disk I/O, execution of commands, fork
4  * system call, and so forth.
5  *
6  * ----------------------------------------------------------------------------
7  *
8  * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  *   * Redistributions of source code must retain the above copyright notice,
15  *     this list of conditions and the following disclaimer.
16  *   * Redistributions in binary form must reproduce the above copyright
17  *     notice, this list of conditions and the following disclaimer in the
18  *     documentation and/or other materials provided with the distribution.
19  *   * Neither the name of Redis nor the names of its contributors may be used
20  *     to endorse or promote products derived from this software without
21  *     specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "server.h"
37 
38 /* Dictionary type for latency events. */
dictStringKeyCompare(void * privdata,const void * key1,const void * key2)39 int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) {
40     UNUSED(privdata);
41     return strcmp(key1,key2) == 0;
42 }
43 
dictStringHash(const void * key)44 uint64_t dictStringHash(const void *key) {
45     return dictGenHashFunction(key, strlen(key));
46 }
47 
48 void dictVanillaFree(void *privdata, void *val);
49 
50 dictType latencyTimeSeriesDictType = {
51     dictStringHash,             /* hash function */
52     NULL,                       /* key dup */
53     NULL,                       /* val dup */
54     dictStringKeyCompare,       /* key compare */
55     dictVanillaFree,            /* key destructor */
56     dictVanillaFree             /* val destructor */
57 };
58 
59 /* ------------------------- Utility functions ------------------------------ */
60 
61 #ifdef __linux__
62 /* Returns 1 if Transparent Huge Pages support is enabled in the kernel.
63  * Otherwise (or if we are unable to check) 0 is returned. */
THPIsEnabled(void)64 int THPIsEnabled(void) {
65     char buf[1024];
66 
67     FILE *fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
68     if (!fp) return 0;
69     if (fgets(buf,sizeof(buf),fp) == NULL) {
70         fclose(fp);
71         return 0;
72     }
73     fclose(fp);
74     return (strstr(buf,"[never]") == NULL) ? 1 : 0;
75 }
76 #endif
77 
78 /* Report the amount of AnonHugePages in smap, in bytes. If the return
79  * value of the function is non-zero, the process is being targeted by
80  * THP support, and is likely to have memory usage / latency issues. */
THPGetAnonHugePagesSize(void)81 int THPGetAnonHugePagesSize(void) {
82     return zmalloc_get_smap_bytes_by_field("AnonHugePages:",-1);
83 }
84 
85 /* ---------------------------- Latency API --------------------------------- */
86 
87 /* Latency monitor initialization. We just need to create the dictionary
88  * of time series, each time serie is craeted on demand in order to avoid
89  * having a fixed list to maintain. */
latencyMonitorInit(void)90 void latencyMonitorInit(void) {
91     server.latency_events = dictCreate(&latencyTimeSeriesDictType,NULL);
92 }
93 
94 /* Add the specified sample to the specified time series "event".
95  * This function is usually called via latencyAddSampleIfNeeded(), that
96  * is a macro that only adds the sample if the latency is higher than
97  * server.latency_monitor_threshold. */
latencyAddSample(char * event,mstime_t latency)98 void latencyAddSample(char *event, mstime_t latency) {
99     struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
100     time_t now = time(NULL);
101     int prev;
102 
103     /* Create the time series if it does not exist. */
104     if (ts == NULL) {
105         ts = zmalloc(sizeof(*ts));
106         ts->idx = 0;
107         ts->max = 0;
108         memset(ts->samples,0,sizeof(ts->samples));
109         dictAdd(server.latency_events,zstrdup(event),ts);
110     }
111 
112     if (latency > ts->max) ts->max = latency;
113 
114     /* If the previous sample is in the same second, we update our old sample
115      * if this latency is > of the old one, or just return. */
116     prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
117     if (ts->samples[prev].time == now) {
118         if (latency > ts->samples[prev].latency)
119             ts->samples[prev].latency = latency;
120         return;
121     }
122 
123     ts->samples[ts->idx].time = time(NULL);
124     ts->samples[ts->idx].latency = latency;
125 
126     ts->idx++;
127     if (ts->idx == LATENCY_TS_LEN) ts->idx = 0;
128 }
129 
130 /* Reset data for the specified event, or all the events data if 'event' is
131  * NULL.
132  *
133  * Note: this is O(N) even when event_to_reset is not NULL because makes
134  * the code simpler and we have a small fixed max number of events. */
latencyResetEvent(char * event_to_reset)135 int latencyResetEvent(char *event_to_reset) {
136     dictIterator *di;
137     dictEntry *de;
138     int resets = 0;
139 
140     di = dictGetSafeIterator(server.latency_events);
141     while((de = dictNext(di)) != NULL) {
142         char *event = dictGetKey(de);
143 
144         if (event_to_reset == NULL || strcasecmp(event,event_to_reset) == 0) {
145             dictDelete(server.latency_events, event);
146             resets++;
147         }
148     }
149     dictReleaseIterator(di);
150     return resets;
151 }
152 
153 /* ------------------------ Latency reporting (doctor) ---------------------- */
154 
155 /* Analyze the samples available for a given event and return a structure
156  * populate with different metrics, average, MAD, min, max, and so forth.
157  * Check latency.h definition of struct latenctStat for more info.
158  * If the specified event has no elements the structure is populate with
159  * zero values. */
analyzeLatencyForEvent(char * event,struct latencyStats * ls)160 void analyzeLatencyForEvent(char *event, struct latencyStats *ls) {
161     struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
162     int j;
163     uint64_t sum;
164 
165     ls->all_time_high = ts ? ts->max : 0;
166     ls->avg = 0;
167     ls->min = 0;
168     ls->max = 0;
169     ls->mad = 0;
170     ls->samples = 0;
171     ls->period = 0;
172     if (!ts) return;
173 
174     /* First pass, populate everything but the MAD. */
175     sum = 0;
176     for (j = 0; j < LATENCY_TS_LEN; j++) {
177         if (ts->samples[j].time == 0) continue;
178         ls->samples++;
179         if (ls->samples == 1) {
180             ls->min = ls->max = ts->samples[j].latency;
181         } else {
182             if (ls->min > ts->samples[j].latency)
183                 ls->min = ts->samples[j].latency;
184             if (ls->max < ts->samples[j].latency)
185                 ls->max = ts->samples[j].latency;
186         }
187         sum += ts->samples[j].latency;
188 
189         /* Track the oldest event time in ls->period. */
190         if (ls->period == 0 || ts->samples[j].time < ls->period)
191             ls->period = ts->samples[j].time;
192     }
193 
194     /* So far avg is actually the sum of the latencies, and period is
195      * the oldest event time. We need to make the first an average and
196      * the second a range of seconds. */
197     if (ls->samples) {
198         ls->avg = sum / ls->samples;
199         ls->period = time(NULL) - ls->period;
200         if (ls->period == 0) ls->period = 1;
201     }
202 
203     /* Second pass, compute MAD. */
204     sum = 0;
205     for (j = 0; j < LATENCY_TS_LEN; j++) {
206         int64_t delta;
207 
208         if (ts->samples[j].time == 0) continue;
209         delta = (int64_t)ls->avg - ts->samples[j].latency;
210         if (delta < 0) delta = -delta;
211         sum += delta;
212     }
213     if (ls->samples) ls->mad = sum / ls->samples;
214 }
215 
216 /* Create a human readable report of latency events for this Redis instance. */
createLatencyReport(void)217 sds createLatencyReport(void) {
218     sds report = sdsempty();
219     int advise_better_vm = 0;       /* Better virtual machines. */
220     int advise_slowlog_enabled = 0; /* Enable slowlog. */
221     int advise_slowlog_tuning = 0;  /* Reconfigure slowlog. */
222     int advise_slowlog_inspect = 0; /* Check your slowlog. */
223     int advise_disk_contention = 0; /* Try to lower disk contention. */
224     int advise_scheduler = 0;       /* Intrinsic latency. */
225     int advise_data_writeback = 0;  /* data=writeback. */
226     int advise_no_appendfsync = 0;  /* don't fsync during rewrites. */
227     int advise_local_disk = 0;      /* Avoid remote disks. */
228     int advise_ssd = 0;             /* Use an SSD drive. */
229     int advise_write_load_info = 0; /* Print info about AOF and write load. */
230     int advise_hz = 0;              /* Use higher HZ. */
231     int advise_large_objects = 0;   /* Deletion of large objects. */
232     int advise_mass_eviction = 0;   /* Avoid mass eviction of keys. */
233     int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
234     int advise_disable_thp = 0;     /* AnonHugePages detected. */
235     int advices = 0;
236 
237     /* Return ASAP if the latency engine is disabled and it looks like it
238      * was never enabled so far. */
239     if (dictSize(server.latency_events) == 0 &&
240         server.latency_monitor_threshold == 0)
241     {
242         report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at http://redis.io/topics/latency-monitor.\n");
243         return report;
244     }
245 
246     /* Show all the events stats and add for each event some event-related
247      * comment depending on the values. */
248     dictIterator *di;
249     dictEntry *de;
250     int eventnum = 0;
251 
252     di = dictGetSafeIterator(server.latency_events);
253     while((de = dictNext(di)) != NULL) {
254         char *event = dictGetKey(de);
255         struct latencyTimeSeries *ts = dictGetVal(de);
256         struct latencyStats ls;
257 
258         if (ts == NULL) continue;
259         eventnum++;
260         if (eventnum == 1) {
261             report = sdscat(report,"Dave, I have observed latency spikes in this Redis instance. You don't mind talking about it, do you Dave?\n\n");
262         }
263         analyzeLatencyForEvent(event,&ls);
264 
265         report = sdscatprintf(report,
266             "%d. %s: %d latency spikes (average %lums, mean deviation %lums, period %.2f sec). Worst all time event %lums.",
267             eventnum, event,
268             ls.samples,
269             (unsigned long) ls.avg,
270             (unsigned long) ls.mad,
271             (double) ls.period/ls.samples,
272             (unsigned long) ts->max);
273 
274         /* Fork */
275         if (!strcasecmp(event,"fork")) {
276             char *fork_quality;
277             if (server.stat_fork_rate < 10) {
278                 fork_quality = "terrible";
279                 advise_better_vm = 1;
280                 advices++;
281             } else if (server.stat_fork_rate < 25) {
282                 fork_quality = "poor";
283                 advise_better_vm = 1;
284                 advices++;
285             } else if (server.stat_fork_rate < 100) {
286                 fork_quality = "good";
287             } else {
288                 fork_quality = "excellent";
289             }
290             report = sdscatprintf(report,
291                 " Fork rate is %.2f GB/sec (%s).", server.stat_fork_rate,
292                 fork_quality);
293         }
294 
295         /* Potentially commands. */
296         if (!strcasecmp(event,"command")) {
297             if (server.slowlog_log_slower_than < 0) {
298                 advise_slowlog_enabled = 1;
299                 advices++;
300             } else if (server.slowlog_log_slower_than/1000 >
301                        server.latency_monitor_threshold)
302             {
303                 advise_slowlog_tuning = 1;
304                 advices++;
305             }
306             advise_slowlog_inspect = 1;
307             advise_large_objects = 1;
308             advices += 2;
309         }
310 
311         /* fast-command. */
312         if (!strcasecmp(event,"fast-command")) {
313             advise_scheduler = 1;
314             advices++;
315         }
316 
317         /* AOF and I/O. */
318         if (!strcasecmp(event,"aof-write-pending-fsync")) {
319             advise_local_disk = 1;
320             advise_disk_contention = 1;
321             advise_ssd = 1;
322             advise_data_writeback = 1;
323             advices += 4;
324         }
325 
326         if (!strcasecmp(event,"aof-write-active-child")) {
327             advise_no_appendfsync = 1;
328             advise_data_writeback = 1;
329             advise_ssd = 1;
330             advices += 3;
331         }
332 
333         if (!strcasecmp(event,"aof-write-alone")) {
334             advise_local_disk = 1;
335             advise_data_writeback = 1;
336             advise_ssd = 1;
337             advices += 3;
338         }
339 
340         if (!strcasecmp(event,"aof-fsync-always")) {
341             advise_relax_fsync_policy = 1;
342             advices++;
343         }
344 
345         if (!strcasecmp(event,"aof-fstat") ||
346             !strcasecmp(event,"rdb-unlik-temp-file")) {
347             advise_disk_contention = 1;
348             advise_local_disk = 1;
349             advices += 2;
350         }
351 
352         if (!strcasecmp(event,"aof-rewrite-diff-write") ||
353             !strcasecmp(event,"aof-rename")) {
354             advise_write_load_info = 1;
355             advise_data_writeback = 1;
356             advise_ssd = 1;
357             advise_local_disk = 1;
358             advices += 4;
359         }
360 
361         /* Expire cycle. */
362         if (!strcasecmp(event,"expire-cycle")) {
363             advise_hz = 1;
364             advise_large_objects = 1;
365             advices += 2;
366         }
367 
368         /* Eviction cycle. */
369         if (!strcasecmp(event,"eviction-del")) {
370             advise_large_objects = 1;
371             advices++;
372         }
373 
374         if (!strcasecmp(event,"eviction-cycle")) {
375             advise_mass_eviction = 1;
376             advices++;
377         }
378 
379         report = sdscatlen(report,"\n",1);
380     }
381     dictReleaseIterator(di);
382 
383     /* Add non event based advices. */
384     if (THPGetAnonHugePagesSize() > 0) {
385         advise_disable_thp = 1;
386         advices++;
387     }
388 
389     if (eventnum == 0 && advices == 0) {
390         report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Redis instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n");
391     } else if (eventnum > 0 && advices == 0) {
392         report = sdscat(report,"\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use the Redis community to get some help, providing this report in your help request.\n");
393     } else {
394         /* Add all the suggestions accumulated so far. */
395 
396         /* Better VM. */
397         report = sdscat(report,"\nI have a few advices for you:\n\n");
398         if (advise_better_vm) {
399             report = sdscat(report,"- If you are using a virtual machine, consider upgrading it with a faster one using an hypervisior that provides less latency during fork() calls. Xen is known to have poor fork() performance. Even in the context of the same VM provider, certain kinds of instances can execute fork faster than others.\n");
400         }
401 
402         /* Slow log. */
403         if (advise_slowlog_enabled) {
404             report = sdscatprintf(report,"- There are latency issues with potentially slow commands you are using. Try to enable the Slow Log Redis feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. If the Slow log is disabled Redis is not able to log slow commands execution for you.\n", (unsigned long long)server.latency_monitor_threshold*1000);
405         }
406 
407         if (advise_slowlog_tuning) {
408             report = sdscatprintf(report,"- Your current Slow Log configuration only logs events that are slower than your configured latency monitor threshold. Please use 'CONFIG SET slowlog-log-slower-than %llu'.\n", (unsigned long long)server.latency_monitor_threshold*1000);
409         }
410 
411         if (advise_slowlog_inspect) {
412             report = sdscat(report,"- Check your Slow Log to understand what are the commands you are running which are too slow to execute. Please check http://redis.io/commands/slowlog for more information.\n");
413         }
414 
415         /* Intrinsic latency. */
416         if (advise_scheduler) {
417             report = sdscat(report,"- The system is slow to execute Redis code paths not containing system calls. This usually means the system does not provide Redis CPU time to run for long periods. You should try to:\n"
418             "  1) Lower the system load.\n"
419             "  2) Use a computer / VM just for Redis if you are running other softawre in the same system.\n"
420             "  3) Check if you have a \"noisy neighbour\" problem.\n"
421             "  4) Check with 'redis-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n"
422             "  5) Check if the problem is allocator-related by recompiling Redis with MALLOC=libc, if you are using Jemalloc. However this may create fragmentation problems.\n");
423         }
424 
425         /* AOF / Disk latency. */
426         if (advise_local_disk) {
427             report = sdscat(report,"- It is strongly advised to use local disks for persistence, especially if you are using AOF. Remote disks provided by platform-as-a-service providers are known to be slow.\n");
428         }
429 
430         if (advise_ssd) {
431             report = sdscat(report,"- SSD disks are able to reduce fsync latency, and total time needed for snapshotting and AOF log rewriting (resulting in smaller memory usage and smaller final AOF rewrite buffer flushes). With extremely high write load SSD disks can be a good option. However Redis should perform reasonably with high load using normal disks. Use this advice as a last resort.\n");
432         }
433 
434         if (advise_data_writeback) {
435             report = sdscat(report,"- Mounting ext3/4 filesystems with data=writeback can provide a performance boost compared to data=ordered, however this mode of operation provides less guarantees, and sometimes it can happen that after a hard crash the AOF file will have an half-written command at the end and will require to be repaired before Redis restarts.\n");
436         }
437 
438         if (advise_disk_contention) {
439             report = sdscat(report,"- Try to lower the disk contention. This is often caused by other disk intensive processes running in the same computer (including other Redis instances).\n");
440         }
441 
442         if (advise_no_appendfsync) {
443             report = sdscat(report,"- Assuming from the point of view of data safety this is viable in your environment, you could try to enable the 'no-appendfsync-on-rewrite' option, so that fsync will not be performed while there is a child rewriting the AOF file or producing an RDB file (the moment where there is high disk contention).\n");
444         }
445 
446         if (advise_relax_fsync_policy && server.aof_fsync == AOF_FSYNC_ALWAYS) {
447             report = sdscat(report,"- Your fsync policy is set to 'always'. It is very hard to get good performances with such a setup, if possible try to relax the fsync policy to 'onesec'.\n");
448         }
449 
450         if (advise_write_load_info) {
451             report = sdscat(report,"- Latency during the AOF atomic rename operation or when the final difference is flushed to the AOF file at the end of the rewrite, sometimes is caused by very high write load, causing the AOF buffer to get very large. If possible try to send less commands to accomplish the same work, or use Lua scripts to group multiple operations into a single EVALSHA call.\n");
452         }
453 
454         if (advise_hz && server.hz < 100) {
455             report = sdscat(report,"- In order to make the Redis keys expiring process more incremental, try to set the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n");
456         }
457 
458         if (advise_large_objects) {
459             report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
460         }
461 
462         if (advise_mass_eviction) {
463             report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
464         }
465 
466         if (advise_disable_thp) {
467             report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
468         }
469     }
470 
471     return report;
472 }
473 
474 /* ---------------------- Latency command implementation -------------------- */
475 
476 /* latencyCommand() helper to produce a time-delay reply for all the samples
477  * in memory for the specified time series. */
latencyCommandReplyWithSamples(client * c,struct latencyTimeSeries * ts)478 void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
479     void *replylen = addDeferredMultiBulkLength(c);
480     int samples = 0, j;
481 
482     for (j = 0; j < LATENCY_TS_LEN; j++) {
483         int i = (ts->idx + j) % LATENCY_TS_LEN;
484 
485         if (ts->samples[i].time == 0) continue;
486         addReplyMultiBulkLen(c,2);
487         addReplyLongLong(c,ts->samples[i].time);
488         addReplyLongLong(c,ts->samples[i].latency);
489         samples++;
490     }
491     setDeferredMultiBulkLength(c,replylen,samples);
492 }
493 
494 /* latencyCommand() helper to produce the reply for the LATEST subcommand,
495  * listing the last latency sample for every event type registered so far. */
latencyCommandReplyWithLatestEvents(client * c)496 void latencyCommandReplyWithLatestEvents(client *c) {
497     dictIterator *di;
498     dictEntry *de;
499 
500     addReplyMultiBulkLen(c,dictSize(server.latency_events));
501     di = dictGetIterator(server.latency_events);
502     while((de = dictNext(di)) != NULL) {
503         char *event = dictGetKey(de);
504         struct latencyTimeSeries *ts = dictGetVal(de);
505         int last = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
506 
507         addReplyMultiBulkLen(c,4);
508         addReplyBulkCString(c,event);
509         addReplyLongLong(c,ts->samples[last].time);
510         addReplyLongLong(c,ts->samples[last].latency);
511         addReplyLongLong(c,ts->max);
512     }
513     dictReleaseIterator(di);
514 }
515 
516 #define LATENCY_GRAPH_COLS 80
latencyCommandGenSparkeline(char * event,struct latencyTimeSeries * ts)517 sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
518     int j;
519     struct sequence *seq = createSparklineSequence();
520     sds graph = sdsempty();
521     uint32_t min = 0, max = 0;
522 
523     for (j = 0; j < LATENCY_TS_LEN; j++) {
524         int i = (ts->idx + j) % LATENCY_TS_LEN;
525         int elapsed;
526         char buf[64];
527 
528         if (ts->samples[i].time == 0) continue;
529         /* Update min and max. */
530         if (seq->length == 0) {
531             min = max = ts->samples[i].latency;
532         } else {
533             if (ts->samples[i].latency > max) max = ts->samples[i].latency;
534             if (ts->samples[i].latency < min) min = ts->samples[i].latency;
535         }
536         /* Use as label the number of seconds / minutes / hours / days
537          * ago the event happened. */
538         elapsed = time(NULL) - ts->samples[i].time;
539         if (elapsed < 60)
540             snprintf(buf,sizeof(buf),"%ds",elapsed);
541         else if (elapsed < 3600)
542             snprintf(buf,sizeof(buf),"%dm",elapsed/60);
543         else if (elapsed < 3600*24)
544             snprintf(buf,sizeof(buf),"%dh",elapsed/3600);
545         else
546             snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));
547         sparklineSequenceAddSample(seq,ts->samples[i].latency,buf);
548     }
549 
550     graph = sdscatprintf(graph,
551         "%s - high %lu ms, low %lu ms (all time high %lu ms)\n", event,
552         (unsigned long) max, (unsigned long) min, (unsigned long) ts->max);
553     for (j = 0; j < LATENCY_GRAPH_COLS; j++)
554         graph = sdscatlen(graph,"-",1);
555     graph = sdscatlen(graph,"\n",1);
556     graph = sparklineRender(graph,seq,LATENCY_GRAPH_COLS,4,SPARKLINE_FILL);
557     freeSparklineSequence(seq);
558     return graph;
559 }
560 
561 /* LATENCY command implementations.
562  *
563  * LATENCY HISTORY: return time-latency samples for the specified event.
564  * LATENCY LATEST: return the latest latency for all the events classes.
565  * LATENCY DOCTOR: returns a human readable analysis of instance latency.
566  * LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
567  * LATENCY RESET: reset data of a specified event or all the data if no event provided.
568  */
latencyCommand(client * c)569 void latencyCommand(client *c) {
570     const char *help[] = {
571 "DOCTOR              -- Returns a human readable latency analysis report.",
572 "GRAPH   <event>     -- Returns an ASCII latency graph for the event class.",
573 "HISTORY <event>     -- Returns time-latency samples for the event class.",
574 "LATEST              -- Returns the latest latency samples for all events.",
575 "RESET   [event ...] -- Resets latency data of one or more event classes.",
576 "                       (default: reset all data for all event classes)",
577 "HELP                -- Prints this help.",
578 NULL
579     };
580     struct latencyTimeSeries *ts;
581 
582     if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
583         /* LATENCY HISTORY <event> */
584         ts = dictFetchValue(server.latency_events,c->argv[2]->ptr);
585         if (ts == NULL) {
586             addReplyMultiBulkLen(c,0);
587         } else {
588             latencyCommandReplyWithSamples(c,ts);
589         }
590     } else if (!strcasecmp(c->argv[1]->ptr,"graph") && c->argc == 3) {
591         /* LATENCY GRAPH <event> */
592         sds graph;
593         dictEntry *de;
594         char *event;
595 
596         de = dictFind(server.latency_events,c->argv[2]->ptr);
597         if (de == NULL) goto nodataerr;
598         ts = dictGetVal(de);
599         event = dictGetKey(de);
600 
601         graph = latencyCommandGenSparkeline(event,ts);
602         addReplyBulkCString(c,graph);
603         sdsfree(graph);
604     } else if (!strcasecmp(c->argv[1]->ptr,"latest") && c->argc == 2) {
605         /* LATENCY LATEST */
606         latencyCommandReplyWithLatestEvents(c);
607     } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
608         /* LATENCY DOCTOR */
609         sds report = createLatencyReport();
610 
611         addReplyBulkCBuffer(c,report,sdslen(report));
612         sdsfree(report);
613     } else if (!strcasecmp(c->argv[1]->ptr,"reset") && c->argc >= 2) {
614         /* LATENCY RESET */
615         if (c->argc == 2) {
616             addReplyLongLong(c,latencyResetEvent(NULL));
617         } else {
618             int j, resets = 0;
619 
620             for (j = 2; j < c->argc; j++)
621                 resets += latencyResetEvent(c->argv[j]->ptr);
622             addReplyLongLong(c,resets);
623         }
624     } else if (!strcasecmp(c->argv[1]->ptr,"help") && c->argc >= 2) {
625         addReplyHelp(c, help);
626     } else {
627         addReplySubcommandSyntaxError(c);
628     }
629     return;
630 
631 nodataerr:
632     /* Common error when the user asks for an event we have no latency
633      * information about. */
634     addReplyErrorFormat(c,
635         "No samples available for event '%s'", (char*) c->argv[2]->ptr);
636 }
637 
638