xref: /redis-3.2.3/src/latency.c (revision 32f80e2f)
1 /* The latency monitor allows to easily observe the sources of latency
2  * in a Redis instance using the LATENCY command. Different latency
3  * sources are monitored, like disk I/O, execution of commands, fork
4  * system call, and so forth.
5  *
6  * ----------------------------------------------------------------------------
7  *
8  * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  *   * Redistributions of source code must retain the above copyright notice,
15  *     this list of conditions and the following disclaimer.
16  *   * Redistributions in binary form must reproduce the above copyright
17  *     notice, this list of conditions and the following disclaimer in the
18  *     documentation and/or other materials provided with the distribution.
19  *   * Neither the name of Redis nor the names of its contributors may be used
20  *     to endorse or promote products derived from this software without
21  *     specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33  * POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 #include "server.h"
37 
38 /* Dictionary type for latency events. */
dictStringKeyCompare(void * privdata,const void * key1,const void * key2)39 int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) {
40     UNUSED(privdata);
41     return strcmp(key1,key2) == 0;
42 }
43 
dictStringHash(const void * key)44 unsigned int dictStringHash(const void *key) {
45     return dictGenHashFunction(key, strlen(key));
46 }
47 
48 void dictVanillaFree(void *privdata, void *val);
49 
50 dictType latencyTimeSeriesDictType = {
51     dictStringHash,             /* hash function */
52     NULL,                       /* key dup */
53     NULL,                       /* val dup */
54     dictStringKeyCompare,       /* key compare */
55     dictVanillaFree,            /* key destructor */
56     dictVanillaFree             /* val destructor */
57 };
58 
59 /* ------------------------- Utility functions ------------------------------ */
60 
61 #ifdef __linux__
62 /* Returns 1 if Transparent Huge Pages support is enabled in the kernel.
63  * Otherwise (or if we are unable to check) 0 is returned. */
THPIsEnabled(void)64 int THPIsEnabled(void) {
65     char buf[1024];
66 
67     FILE *fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
68     if (!fp) return 0;
69     if (fgets(buf,sizeof(buf),fp) == NULL) {
70         fclose(fp);
71         return 0;
72     }
73     fclose(fp);
74     return (strstr(buf,"[never]") == NULL) ? 1 : 0;
75 }
76 #endif
77 
78 /* Report the amount of AnonHugePages in smap, in bytes. If the return
79  * value of the function is non-zero, the process is being targeted by
80  * THP support, and is likely to have memory usage / latency issues. */
THPGetAnonHugePagesSize(void)81 int THPGetAnonHugePagesSize(void) {
82     return zmalloc_get_smap_bytes_by_field("AnonHugePages:");
83 }
84 
85 /* ---------------------------- Latency API --------------------------------- */
86 
87 /* Latency monitor initialization. We just need to create the dictionary
88  * of time series, each time serie is craeted on demand in order to avoid
89  * having a fixed list to maintain. */
latencyMonitorInit(void)90 void latencyMonitorInit(void) {
91     server.latency_events = dictCreate(&latencyTimeSeriesDictType,NULL);
92 }
93 
94 /* Add the specified sample to the specified time series "event".
95  * This function is usually called via latencyAddSampleIfNeeded(), that
96  * is a macro that only adds the sample if the latency is higher than
97  * server.latency_monitor_threshold. */
latencyAddSample(char * event,mstime_t latency)98 void latencyAddSample(char *event, mstime_t latency) {
99     struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
100     time_t now = time(NULL);
101     int prev;
102 
103     /* Create the time series if it does not exist. */
104     if (ts == NULL) {
105         ts = zmalloc(sizeof(*ts));
106         ts->idx = 0;
107         ts->max = 0;
108         memset(ts->samples,0,sizeof(ts->samples));
109         dictAdd(server.latency_events,zstrdup(event),ts);
110     }
111 
112     /* If the previous sample is in the same second, we update our old sample
113      * if this latency is > of the old one, or just return. */
114     prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
115     if (ts->samples[prev].time == now) {
116         if (latency > ts->samples[prev].latency)
117             ts->samples[prev].latency = latency;
118         return;
119     }
120 
121     ts->samples[ts->idx].time = time(NULL);
122     ts->samples[ts->idx].latency = latency;
123     if (latency > ts->max) ts->max = latency;
124 
125     ts->idx++;
126     if (ts->idx == LATENCY_TS_LEN) ts->idx = 0;
127 }
128 
129 /* Reset data for the specified event, or all the events data if 'event' is
130  * NULL.
131  *
132  * Note: this is O(N) even when event_to_reset is not NULL because makes
133  * the code simpler and we have a small fixed max number of events. */
latencyResetEvent(char * event_to_reset)134 int latencyResetEvent(char *event_to_reset) {
135     dictIterator *di;
136     dictEntry *de;
137     int resets = 0;
138 
139     di = dictGetSafeIterator(server.latency_events);
140     while((de = dictNext(di)) != NULL) {
141         char *event = dictGetKey(de);
142 
143         if (event_to_reset == NULL || strcasecmp(event,event_to_reset) == 0) {
144             dictDelete(server.latency_events, event);
145             resets++;
146         }
147     }
148     dictReleaseIterator(di);
149     return resets;
150 }
151 
152 /* ------------------------ Latency reporting (doctor) ---------------------- */
153 
154 /* Analyze the samples avaialble for a given event and return a structure
155  * populate with different metrics, average, MAD, min, max, and so forth.
156  * Check latency.h definition of struct latenctStat for more info.
157  * If the specified event has no elements the structure is populate with
158  * zero values. */
analyzeLatencyForEvent(char * event,struct latencyStats * ls)159 void analyzeLatencyForEvent(char *event, struct latencyStats *ls) {
160     struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
161     int j;
162     uint64_t sum;
163 
164     ls->all_time_high = ts ? ts->max : 0;
165     ls->avg = 0;
166     ls->min = 0;
167     ls->max = 0;
168     ls->mad = 0;
169     ls->samples = 0;
170     ls->period = 0;
171     if (!ts) return;
172 
173     /* First pass, populate everything but the MAD. */
174     sum = 0;
175     for (j = 0; j < LATENCY_TS_LEN; j++) {
176         if (ts->samples[j].time == 0) continue;
177         ls->samples++;
178         if (ls->samples == 1) {
179             ls->min = ls->max = ts->samples[j].latency;
180         } else {
181             if (ls->min > ts->samples[j].latency)
182                 ls->min = ts->samples[j].latency;
183             if (ls->max < ts->samples[j].latency)
184                 ls->max = ts->samples[j].latency;
185         }
186         sum += ts->samples[j].latency;
187 
188         /* Track the oldest event time in ls->period. */
189         if (ls->period == 0 || ts->samples[j].time < ls->period)
190             ls->period = ts->samples[j].time;
191     }
192 
193     /* So far avg is actually the sum of the latencies, and period is
194      * the oldest event time. We need to make the first an average and
195      * the second a range of seconds. */
196     if (ls->samples) {
197         ls->avg = sum / ls->samples;
198         ls->period = time(NULL) - ls->period;
199         if (ls->period == 0) ls->period = 1;
200     }
201 
202     /* Second pass, compute MAD. */
203     sum = 0;
204     for (j = 0; j < LATENCY_TS_LEN; j++) {
205         int64_t delta;
206 
207         if (ts->samples[j].time == 0) continue;
208         delta = (int64_t)ls->avg - ts->samples[j].latency;
209         if (delta < 0) delta = -delta;
210         sum += delta;
211     }
212     if (ls->samples) ls->mad = sum / ls->samples;
213 }
214 
215 /* Create a human readable report of latency events for this Redis instance. */
createLatencyReport(void)216 sds createLatencyReport(void) {
217     sds report = sdsempty();
218     int advise_better_vm = 0;       /* Better virtual machines. */
219     int advise_slowlog_enabled = 0; /* Enable slowlog. */
220     int advise_slowlog_tuning = 0;  /* Reconfigure slowlog. */
221     int advise_slowlog_inspect = 0; /* Check your slowlog. */
222     int advise_disk_contention = 0; /* Try to lower disk contention. */
223     int advise_scheduler = 0;       /* Intrinsic latency. */
224     int advise_data_writeback = 0;  /* data=writeback. */
225     int advise_no_appendfsync = 0;  /* don't fsync during rewrites. */
226     int advise_local_disk = 0;      /* Avoid remote disks. */
227     int advise_ssd = 0;             /* Use an SSD drive. */
228     int advise_write_load_info = 0; /* Print info about AOF and write load. */
229     int advise_hz = 0;              /* Use higher HZ. */
230     int advise_large_objects = 0;   /* Deletion of large objects. */
231     int advise_mass_eviction = 0;   /* Avoid mass eviction of keys. */
232     int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
233     int advise_disable_thp = 0;     /* AnonHugePages detected. */
234     int advices = 0;
235 
236     /* Return ASAP if the latency engine is disabled and it looks like it
237      * was never enabled so far. */
238     if (dictSize(server.latency_events) == 0 &&
239         server.latency_monitor_threshold == 0)
240     {
241         report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at http://redis.io/topics/latency-monitor.\n");
242         return report;
243     }
244 
245     /* Show all the events stats and add for each event some event-related
246      * comment depending on the values. */
247     dictIterator *di;
248     dictEntry *de;
249     int eventnum = 0;
250 
251     di = dictGetSafeIterator(server.latency_events);
252     while((de = dictNext(di)) != NULL) {
253         char *event = dictGetKey(de);
254         struct latencyTimeSeries *ts = dictGetVal(de);
255         struct latencyStats ls;
256 
257         if (ts == NULL) continue;
258         eventnum++;
259         if (eventnum == 1) {
260             report = sdscat(report,"Dave, I have observed latency spikes in this Redis instance. You don't mind talking about it, do you Dave?\n\n");
261         }
262         analyzeLatencyForEvent(event,&ls);
263 
264         report = sdscatprintf(report,
265             "%d. %s: %d latency spikes (average %lums, mean deviation %lums, period %.2f sec). Worst all time event %lums.",
266             eventnum, event,
267             ls.samples,
268             (unsigned long) ls.avg,
269             (unsigned long) ls.mad,
270             (double) ls.period/ls.samples,
271             (unsigned long) ts->max);
272 
273         /* Fork */
274         if (!strcasecmp(event,"fork")) {
275             char *fork_quality;
276             if (server.stat_fork_rate < 10) {
277                 fork_quality = "terrible";
278                 advise_better_vm = 1;
279                 advices++;
280             } else if (server.stat_fork_rate < 25) {
281                 fork_quality = "poor";
282                 advise_better_vm = 1;
283                 advices++;
284             } else if (server.stat_fork_rate < 100) {
285                 fork_quality = "good";
286             } else {
287                 fork_quality = "excellent";
288             }
289             report = sdscatprintf(report,
290                 " Fork rate is %.2f GB/sec (%s).", server.stat_fork_rate,
291                 fork_quality);
292         }
293 
294         /* Potentially commands. */
295         if (!strcasecmp(event,"command")) {
296             if (server.slowlog_log_slower_than == 0) {
297                 advise_slowlog_enabled = 1;
298                 advices++;
299             } else if (server.slowlog_log_slower_than/1000 >
300                        server.latency_monitor_threshold)
301             {
302                 advise_slowlog_tuning = 1;
303                 advices++;
304             }
305             advise_slowlog_inspect = 1;
306             advise_large_objects = 1;
307             advices += 2;
308         }
309 
310         /* fast-command. */
311         if (!strcasecmp(event,"fast-command")) {
312             advise_scheduler = 1;
313             advices++;
314         }
315 
316         /* AOF and I/O. */
317         if (!strcasecmp(event,"aof-write-pending-fsync")) {
318             advise_local_disk = 1;
319             advise_disk_contention = 1;
320             advise_ssd = 1;
321             advise_data_writeback = 1;
322             advices += 4;
323         }
324 
325         if (!strcasecmp(event,"aof-write-active-child")) {
326             advise_no_appendfsync = 1;
327             advise_data_writeback = 1;
328             advise_ssd = 1;
329             advices += 3;
330         }
331 
332         if (!strcasecmp(event,"aof-write-alone")) {
333             advise_local_disk = 1;
334             advise_data_writeback = 1;
335             advise_ssd = 1;
336             advices += 3;
337         }
338 
339         if (!strcasecmp(event,"aof-fsync-always")) {
340             advise_relax_fsync_policy = 1;
341             advices++;
342         }
343 
344         if (!strcasecmp(event,"aof-fstat") ||
345             !strcasecmp(event,"rdb-unlik-temp-file")) {
346             advise_disk_contention = 1;
347             advise_local_disk = 1;
348             advices += 2;
349         }
350 
351         if (!strcasecmp(event,"aof-rewrite-diff-write") ||
352             !strcasecmp(event,"aof-rename")) {
353             advise_write_load_info = 1;
354             advise_data_writeback = 1;
355             advise_ssd = 1;
356             advise_local_disk = 1;
357             advices += 4;
358         }
359 
360         /* Expire cycle. */
361         if (!strcasecmp(event,"expire-cycle")) {
362             advise_hz = 1;
363             advise_large_objects = 1;
364             advices += 2;
365         }
366 
367         /* Eviction cycle. */
368         if (!strcasecmp(event,"eviction-del")) {
369             advise_large_objects = 1;
370             advices++;
371         }
372 
373         if (!strcasecmp(event,"eviction-cycle")) {
374             advise_mass_eviction = 1;
375             advices++;
376         }
377 
378         report = sdscatlen(report,"\n",1);
379     }
380     dictReleaseIterator(di);
381 
382     /* Add non event based advices. */
383     if (THPGetAnonHugePagesSize() > 0) {
384         advise_disable_thp = 1;
385         advices++;
386     }
387 
388     if (eventnum == 0 && advices == 0) {
389         report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Redis instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n");
390     } else if (eventnum > 0 && advices == 0) {
391         report = sdscat(report,"\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use the Redis community to get some help, providing this report in your help request.\n");
392     } else {
393         /* Add all the suggestions accumulated so far. */
394 
395         /* Better VM. */
396         report = sdscat(report,"\nI have a few advices for you:\n\n");
397         if (advise_better_vm) {
398             report = sdscat(report,"- If you are using a virtual machine, consider upgrading it with a faster one using an hypervisior that provides less latency during fork() calls. Xen is known to have poor fork() performance. Even in the context of the same VM provider, certain kinds of instances can execute fork faster than others.\n");
399         }
400 
401         /* Slow log. */
402         if (advise_slowlog_enabled) {
403             report = sdscatprintf(report,"- There are latency issues with potentially slow commands you are using. Try to enable the Slow Log Redis feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. If the Slow log is disabled Redis is not able to log slow commands execution for you.\n", (unsigned long long)server.latency_monitor_threshold*1000);
404         }
405 
406         if (advise_slowlog_tuning) {
407             report = sdscatprintf(report,"- Your current Slow Log configuration only logs events that are slower than your configured latency monitor threshold. Please use 'CONFIG SET slowlog-log-slower-than %llu'.\n", (unsigned long long)server.latency_monitor_threshold*1000);
408         }
409 
410         if (advise_slowlog_inspect) {
411             report = sdscat(report,"- Check your Slow Log to understand what are the commands you are running which are too slow to execute. Please check http://redis.io/commands/slowlog for more information.\n");
412         }
413 
414         /* Intrinsic latency. */
415         if (advise_scheduler) {
416             report = sdscat(report,"- The system is slow to execute Redis code paths not containing system calls. This usually means the system does not provide Redis CPU time to run for long periods. You should try to:\n"
417             "  1) Lower the system load.\n"
418             "  2) Use a computer / VM just for Redis if you are running other softawre in the same system.\n"
419             "  3) Check if you have a \"noisy neighbour\" problem.\n"
420             "  4) Check with 'redis-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n"
421             "  5) Check if the problem is allocator-related by recompiling Redis with MALLOC=libc, if you are using Jemalloc. However this may create fragmentation problems.\n");
422         }
423 
424         /* AOF / Disk latency. */
425         if (advise_local_disk) {
426             report = sdscat(report,"- It is strongly advised to use local disks for persistence, especially if you are using AOF. Remote disks provided by platform-as-a-service providers are known to be slow.\n");
427         }
428 
429         if (advise_ssd) {
430             report = sdscat(report,"- SSD disks are able to reduce fsync latency, and total time needed for snapshotting and AOF log rewriting (resulting in smaller memory usage and smaller final AOF rewrite buffer flushes). With extremely high write load SSD disks can be a good option. However Redis should perform reasonably with high load using normal disks. Use this advice as a last resort.\n");
431         }
432 
433         if (advise_data_writeback) {
434             report = sdscat(report,"- Mounting ext3/4 filesystems with data=writeback can provide a performance boost compared to data=ordered, however this mode of operation provides less guarantees, and sometimes it can happen that after a hard crash the AOF file will have an half-written command at the end and will require to be repaired before Redis restarts.\n");
435         }
436 
437         if (advise_disk_contention) {
438             report = sdscat(report,"- Try to lower the disk contention. This is often caused by other disk intensive processes running in the same computer (including other Redis instances).\n");
439         }
440 
441         if (advise_no_appendfsync) {
442             report = sdscat(report,"- Assuming from the point of view of data safety this is viable in your environment, you could try to enable the 'no-appendfsync-on-rewrite' option, so that fsync will not be performed while there is a child rewriting the AOF file or producing an RDB file (the moment where there is high disk contention).\n");
443         }
444 
445         if (advise_relax_fsync_policy && server.aof_fsync == AOF_FSYNC_ALWAYS) {
446             report = sdscat(report,"- Your fsync policy is set to 'always'. It is very hard to get good performances with such a setup, if possible try to relax the fsync policy to 'onesec'.\n");
447         }
448 
449         if (advise_write_load_info) {
450             report = sdscat(report,"- Latency during the AOF atomic rename operation or when the final difference is flushed to the AOF file at the end of the rewrite, sometimes is caused by very high write load, causing the AOF buffer to get very large. If possible try to send less commands to accomplish the same work, or use Lua scripts to group multiple operations into a single EVALSHA call.\n");
451         }
452 
453         if (advise_hz && server.hz < 100) {
454             report = sdscat(report,"- In order to make the Redis keys expiring process more incremental, try to set the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n");
455         }
456 
457         if (advise_large_objects) {
458             report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
459         }
460 
461         if (advise_mass_eviction) {
462             report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
463         }
464 
465         if (advise_disable_thp) {
466             report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
467         }
468     }
469 
470     return report;
471 }
472 
473 /* ---------------------- Latency command implementation -------------------- */
474 
475 /* latencyCommand() helper to produce a time-delay reply for all the samples
476  * in memory for the specified time series. */
latencyCommandReplyWithSamples(client * c,struct latencyTimeSeries * ts)477 void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
478     void *replylen = addDeferredMultiBulkLength(c);
479     int samples = 0, j;
480 
481     for (j = 0; j < LATENCY_TS_LEN; j++) {
482         int i = (ts->idx + j) % LATENCY_TS_LEN;
483 
484         if (ts->samples[i].time == 0) continue;
485         addReplyMultiBulkLen(c,2);
486         addReplyLongLong(c,ts->samples[i].time);
487         addReplyLongLong(c,ts->samples[i].latency);
488         samples++;
489     }
490     setDeferredMultiBulkLength(c,replylen,samples);
491 }
492 
493 /* latencyCommand() helper to produce the reply for the LATEST subcommand,
494  * listing the last latency sample for every event type registered so far. */
latencyCommandReplyWithLatestEvents(client * c)495 void latencyCommandReplyWithLatestEvents(client *c) {
496     dictIterator *di;
497     dictEntry *de;
498 
499     addReplyMultiBulkLen(c,dictSize(server.latency_events));
500     di = dictGetIterator(server.latency_events);
501     while((de = dictNext(di)) != NULL) {
502         char *event = dictGetKey(de);
503         struct latencyTimeSeries *ts = dictGetVal(de);
504         int last = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
505 
506         addReplyMultiBulkLen(c,4);
507         addReplyBulkCString(c,event);
508         addReplyLongLong(c,ts->samples[last].time);
509         addReplyLongLong(c,ts->samples[last].latency);
510         addReplyLongLong(c,ts->max);
511     }
512     dictReleaseIterator(di);
513 }
514 
515 #define LATENCY_GRAPH_COLS 80
latencyCommandGenSparkeline(char * event,struct latencyTimeSeries * ts)516 sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
517     int j;
518     struct sequence *seq = createSparklineSequence();
519     sds graph = sdsempty();
520     uint32_t min = 0, max = 0;
521 
522     for (j = 0; j < LATENCY_TS_LEN; j++) {
523         int i = (ts->idx + j) % LATENCY_TS_LEN;
524         int elapsed;
525         char buf[64];
526 
527         if (ts->samples[i].time == 0) continue;
528         /* Update min and max. */
529         if (seq->length == 0) {
530             min = max = ts->samples[i].latency;
531         } else {
532             if (ts->samples[i].latency > max) max = ts->samples[i].latency;
533             if (ts->samples[i].latency < min) min = ts->samples[i].latency;
534         }
535         /* Use as label the number of seconds / minutes / hours / days
536          * ago the event happened. */
537         elapsed = time(NULL) - ts->samples[i].time;
538         if (elapsed < 60)
539             snprintf(buf,sizeof(buf),"%ds",elapsed);
540         else if (elapsed < 3600)
541             snprintf(buf,sizeof(buf),"%dm",elapsed/60);
542         else if (elapsed < 3600*24)
543             snprintf(buf,sizeof(buf),"%dh",elapsed/3600);
544         else
545             snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));
546         sparklineSequenceAddSample(seq,ts->samples[i].latency,buf);
547     }
548 
549     graph = sdscatprintf(graph,
550         "%s - high %lu ms, low %lu ms (all time high %lu ms)\n", event,
551         (unsigned long) max, (unsigned long) min, (unsigned long) ts->max);
552     for (j = 0; j < LATENCY_GRAPH_COLS; j++)
553         graph = sdscatlen(graph,"-",1);
554     graph = sdscatlen(graph,"\n",1);
555     graph = sparklineRender(graph,seq,LATENCY_GRAPH_COLS,4,SPARKLINE_FILL);
556     freeSparklineSequence(seq);
557     return graph;
558 }
559 
560 /* LATENCY command implementations.
561  *
562  * LATENCY SAMPLES: return time-latency samples for the specified event.
563  * LATENCY LATEST: return the latest latency for all the events classes.
564  * LATENCY DOCTOR: returns an human readable analysis of instance latency.
565  * LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
566  */
latencyCommand(client * c)567 void latencyCommand(client *c) {
568     struct latencyTimeSeries *ts;
569 
570     if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
571         /* LATENCY HISTORY <event> */
572         ts = dictFetchValue(server.latency_events,c->argv[2]->ptr);
573         if (ts == NULL) {
574             addReplyMultiBulkLen(c,0);
575         } else {
576             latencyCommandReplyWithSamples(c,ts);
577         }
578     } else if (!strcasecmp(c->argv[1]->ptr,"graph") && c->argc == 3) {
579         /* LATENCY GRAPH <event> */
580         sds graph;
581         dictEntry *de;
582         char *event;
583 
584         de = dictFind(server.latency_events,c->argv[2]->ptr);
585         if (de == NULL) goto nodataerr;
586         ts = dictGetVal(de);
587         event = dictGetKey(de);
588 
589         graph = latencyCommandGenSparkeline(event,ts);
590         addReplyBulkCString(c,graph);
591         sdsfree(graph);
592     } else if (!strcasecmp(c->argv[1]->ptr,"latest") && c->argc == 2) {
593         /* LATENCY LATEST */
594         latencyCommandReplyWithLatestEvents(c);
595     } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
596         /* LATENCY DOCTOR */
597         sds report = createLatencyReport();
598 
599         addReplyBulkCBuffer(c,report,sdslen(report));
600         sdsfree(report);
601     } else if (!strcasecmp(c->argv[1]->ptr,"reset") && c->argc >= 2) {
602         /* LATENCY RESET */
603         if (c->argc == 2) {
604             addReplyLongLong(c,latencyResetEvent(NULL));
605         } else {
606             int j, resets = 0;
607 
608             for (j = 2; j < c->argc; j++)
609                 resets += latencyResetEvent(c->argv[j]->ptr);
610             addReplyLongLong(c,resets);
611         }
612     } else {
613         addReply(c,shared.syntaxerr);
614     }
615     return;
616 
617 nodataerr:
618     /* Common error when the user asks for an event we have no latency
619      * information about. */
620     addReplyErrorFormat(c,
621         "No samples available for event '%s'", (char*) c->argv[2]->ptr);
622 }
623 
624