1 /* The latency monitor allows to easily observe the sources of latency
2 * in a Redis instance using the LATENCY command. Different latency
3 * sources are monitored, like disk I/O, execution of commands, fork
4 * system call, and so forth.
5 *
6 * ----------------------------------------------------------------------------
7 *
8 * Copyright (c) 2014, Salvatore Sanfilippo <antirez at gmail dot com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are met:
13 *
14 * * Redistributions of source code must retain the above copyright notice,
15 * this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * * Neither the name of Redis nor the names of its contributors may be used
20 * to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include "server.h"
37
38 /* Dictionary type for latency events. */
dictStringKeyCompare(void * privdata,const void * key1,const void * key2)39 int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) {
40 UNUSED(privdata);
41 return strcmp(key1,key2) == 0;
42 }
43
dictStringHash(const void * key)44 unsigned int dictStringHash(const void *key) {
45 return dictGenHashFunction(key, strlen(key));
46 }
47
48 void dictVanillaFree(void *privdata, void *val);
49
50 dictType latencyTimeSeriesDictType = {
51 dictStringHash, /* hash function */
52 NULL, /* key dup */
53 NULL, /* val dup */
54 dictStringKeyCompare, /* key compare */
55 dictVanillaFree, /* key destructor */
56 dictVanillaFree /* val destructor */
57 };
58
59 /* ------------------------- Utility functions ------------------------------ */
60
61 #ifdef __linux__
62 /* Returns 1 if Transparent Huge Pages support is enabled in the kernel.
63 * Otherwise (or if we are unable to check) 0 is returned. */
THPIsEnabled(void)64 int THPIsEnabled(void) {
65 char buf[1024];
66
67 FILE *fp = fopen("/sys/kernel/mm/transparent_hugepage/enabled","r");
68 if (!fp) return 0;
69 if (fgets(buf,sizeof(buf),fp) == NULL) {
70 fclose(fp);
71 return 0;
72 }
73 fclose(fp);
74 return (strstr(buf,"[never]") == NULL) ? 1 : 0;
75 }
76 #endif
77
78 /* Report the amount of AnonHugePages in smap, in bytes. If the return
79 * value of the function is non-zero, the process is being targeted by
80 * THP support, and is likely to have memory usage / latency issues. */
THPGetAnonHugePagesSize(void)81 int THPGetAnonHugePagesSize(void) {
82 return zmalloc_get_smap_bytes_by_field("AnonHugePages:");
83 }
84
85 /* ---------------------------- Latency API --------------------------------- */
86
87 /* Latency monitor initialization. We just need to create the dictionary
88 * of time series, each time serie is craeted on demand in order to avoid
89 * having a fixed list to maintain. */
latencyMonitorInit(void)90 void latencyMonitorInit(void) {
91 server.latency_events = dictCreate(&latencyTimeSeriesDictType,NULL);
92 }
93
94 /* Add the specified sample to the specified time series "event".
95 * This function is usually called via latencyAddSampleIfNeeded(), that
96 * is a macro that only adds the sample if the latency is higher than
97 * server.latency_monitor_threshold. */
latencyAddSample(char * event,mstime_t latency)98 void latencyAddSample(char *event, mstime_t latency) {
99 struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
100 time_t now = time(NULL);
101 int prev;
102
103 /* Create the time series if it does not exist. */
104 if (ts == NULL) {
105 ts = zmalloc(sizeof(*ts));
106 ts->idx = 0;
107 ts->max = 0;
108 memset(ts->samples,0,sizeof(ts->samples));
109 dictAdd(server.latency_events,zstrdup(event),ts);
110 }
111
112 /* If the previous sample is in the same second, we update our old sample
113 * if this latency is > of the old one, or just return. */
114 prev = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
115 if (ts->samples[prev].time == now) {
116 if (latency > ts->samples[prev].latency)
117 ts->samples[prev].latency = latency;
118 return;
119 }
120
121 ts->samples[ts->idx].time = time(NULL);
122 ts->samples[ts->idx].latency = latency;
123 if (latency > ts->max) ts->max = latency;
124
125 ts->idx++;
126 if (ts->idx == LATENCY_TS_LEN) ts->idx = 0;
127 }
128
129 /* Reset data for the specified event, or all the events data if 'event' is
130 * NULL.
131 *
132 * Note: this is O(N) even when event_to_reset is not NULL because makes
133 * the code simpler and we have a small fixed max number of events. */
latencyResetEvent(char * event_to_reset)134 int latencyResetEvent(char *event_to_reset) {
135 dictIterator *di;
136 dictEntry *de;
137 int resets = 0;
138
139 di = dictGetSafeIterator(server.latency_events);
140 while((de = dictNext(di)) != NULL) {
141 char *event = dictGetKey(de);
142
143 if (event_to_reset == NULL || strcasecmp(event,event_to_reset) == 0) {
144 dictDelete(server.latency_events, event);
145 resets++;
146 }
147 }
148 dictReleaseIterator(di);
149 return resets;
150 }
151
152 /* ------------------------ Latency reporting (doctor) ---------------------- */
153
154 /* Analyze the samples avaialble for a given event and return a structure
155 * populate with different metrics, average, MAD, min, max, and so forth.
156 * Check latency.h definition of struct latenctStat for more info.
157 * If the specified event has no elements the structure is populate with
158 * zero values. */
analyzeLatencyForEvent(char * event,struct latencyStats * ls)159 void analyzeLatencyForEvent(char *event, struct latencyStats *ls) {
160 struct latencyTimeSeries *ts = dictFetchValue(server.latency_events,event);
161 int j;
162 uint64_t sum;
163
164 ls->all_time_high = ts ? ts->max : 0;
165 ls->avg = 0;
166 ls->min = 0;
167 ls->max = 0;
168 ls->mad = 0;
169 ls->samples = 0;
170 ls->period = 0;
171 if (!ts) return;
172
173 /* First pass, populate everything but the MAD. */
174 sum = 0;
175 for (j = 0; j < LATENCY_TS_LEN; j++) {
176 if (ts->samples[j].time == 0) continue;
177 ls->samples++;
178 if (ls->samples == 1) {
179 ls->min = ls->max = ts->samples[j].latency;
180 } else {
181 if (ls->min > ts->samples[j].latency)
182 ls->min = ts->samples[j].latency;
183 if (ls->max < ts->samples[j].latency)
184 ls->max = ts->samples[j].latency;
185 }
186 sum += ts->samples[j].latency;
187
188 /* Track the oldest event time in ls->period. */
189 if (ls->period == 0 || ts->samples[j].time < ls->period)
190 ls->period = ts->samples[j].time;
191 }
192
193 /* So far avg is actually the sum of the latencies, and period is
194 * the oldest event time. We need to make the first an average and
195 * the second a range of seconds. */
196 if (ls->samples) {
197 ls->avg = sum / ls->samples;
198 ls->period = time(NULL) - ls->period;
199 if (ls->period == 0) ls->period = 1;
200 }
201
202 /* Second pass, compute MAD. */
203 sum = 0;
204 for (j = 0; j < LATENCY_TS_LEN; j++) {
205 int64_t delta;
206
207 if (ts->samples[j].time == 0) continue;
208 delta = (int64_t)ls->avg - ts->samples[j].latency;
209 if (delta < 0) delta = -delta;
210 sum += delta;
211 }
212 if (ls->samples) ls->mad = sum / ls->samples;
213 }
214
215 /* Create a human readable report of latency events for this Redis instance. */
createLatencyReport(void)216 sds createLatencyReport(void) {
217 sds report = sdsempty();
218 int advise_better_vm = 0; /* Better virtual machines. */
219 int advise_slowlog_enabled = 0; /* Enable slowlog. */
220 int advise_slowlog_tuning = 0; /* Reconfigure slowlog. */
221 int advise_slowlog_inspect = 0; /* Check your slowlog. */
222 int advise_disk_contention = 0; /* Try to lower disk contention. */
223 int advise_scheduler = 0; /* Intrinsic latency. */
224 int advise_data_writeback = 0; /* data=writeback. */
225 int advise_no_appendfsync = 0; /* don't fsync during rewrites. */
226 int advise_local_disk = 0; /* Avoid remote disks. */
227 int advise_ssd = 0; /* Use an SSD drive. */
228 int advise_write_load_info = 0; /* Print info about AOF and write load. */
229 int advise_hz = 0; /* Use higher HZ. */
230 int advise_large_objects = 0; /* Deletion of large objects. */
231 int advise_mass_eviction = 0; /* Avoid mass eviction of keys. */
232 int advise_relax_fsync_policy = 0; /* appendfsync always is slow. */
233 int advise_disable_thp = 0; /* AnonHugePages detected. */
234 int advices = 0;
235
236 /* Return ASAP if the latency engine is disabled and it looks like it
237 * was never enabled so far. */
238 if (dictSize(server.latency_events) == 0 &&
239 server.latency_monitor_threshold == 0)
240 {
241 report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at http://redis.io/topics/latency-monitor.\n");
242 return report;
243 }
244
245 /* Show all the events stats and add for each event some event-related
246 * comment depending on the values. */
247 dictIterator *di;
248 dictEntry *de;
249 int eventnum = 0;
250
251 di = dictGetSafeIterator(server.latency_events);
252 while((de = dictNext(di)) != NULL) {
253 char *event = dictGetKey(de);
254 struct latencyTimeSeries *ts = dictGetVal(de);
255 struct latencyStats ls;
256
257 if (ts == NULL) continue;
258 eventnum++;
259 if (eventnum == 1) {
260 report = sdscat(report,"Dave, I have observed latency spikes in this Redis instance. You don't mind talking about it, do you Dave?\n\n");
261 }
262 analyzeLatencyForEvent(event,&ls);
263
264 report = sdscatprintf(report,
265 "%d. %s: %d latency spikes (average %lums, mean deviation %lums, period %.2f sec). Worst all time event %lums.",
266 eventnum, event,
267 ls.samples,
268 (unsigned long) ls.avg,
269 (unsigned long) ls.mad,
270 (double) ls.period/ls.samples,
271 (unsigned long) ts->max);
272
273 /* Fork */
274 if (!strcasecmp(event,"fork")) {
275 char *fork_quality;
276 if (server.stat_fork_rate < 10) {
277 fork_quality = "terrible";
278 advise_better_vm = 1;
279 advices++;
280 } else if (server.stat_fork_rate < 25) {
281 fork_quality = "poor";
282 advise_better_vm = 1;
283 advices++;
284 } else if (server.stat_fork_rate < 100) {
285 fork_quality = "good";
286 } else {
287 fork_quality = "excellent";
288 }
289 report = sdscatprintf(report,
290 " Fork rate is %.2f GB/sec (%s).", server.stat_fork_rate,
291 fork_quality);
292 }
293
294 /* Potentially commands. */
295 if (!strcasecmp(event,"command")) {
296 if (server.slowlog_log_slower_than == 0) {
297 advise_slowlog_enabled = 1;
298 advices++;
299 } else if (server.slowlog_log_slower_than/1000 >
300 server.latency_monitor_threshold)
301 {
302 advise_slowlog_tuning = 1;
303 advices++;
304 }
305 advise_slowlog_inspect = 1;
306 advise_large_objects = 1;
307 advices += 2;
308 }
309
310 /* fast-command. */
311 if (!strcasecmp(event,"fast-command")) {
312 advise_scheduler = 1;
313 advices++;
314 }
315
316 /* AOF and I/O. */
317 if (!strcasecmp(event,"aof-write-pending-fsync")) {
318 advise_local_disk = 1;
319 advise_disk_contention = 1;
320 advise_ssd = 1;
321 advise_data_writeback = 1;
322 advices += 4;
323 }
324
325 if (!strcasecmp(event,"aof-write-active-child")) {
326 advise_no_appendfsync = 1;
327 advise_data_writeback = 1;
328 advise_ssd = 1;
329 advices += 3;
330 }
331
332 if (!strcasecmp(event,"aof-write-alone")) {
333 advise_local_disk = 1;
334 advise_data_writeback = 1;
335 advise_ssd = 1;
336 advices += 3;
337 }
338
339 if (!strcasecmp(event,"aof-fsync-always")) {
340 advise_relax_fsync_policy = 1;
341 advices++;
342 }
343
344 if (!strcasecmp(event,"aof-fstat") ||
345 !strcasecmp(event,"rdb-unlik-temp-file")) {
346 advise_disk_contention = 1;
347 advise_local_disk = 1;
348 advices += 2;
349 }
350
351 if (!strcasecmp(event,"aof-rewrite-diff-write") ||
352 !strcasecmp(event,"aof-rename")) {
353 advise_write_load_info = 1;
354 advise_data_writeback = 1;
355 advise_ssd = 1;
356 advise_local_disk = 1;
357 advices += 4;
358 }
359
360 /* Expire cycle. */
361 if (!strcasecmp(event,"expire-cycle")) {
362 advise_hz = 1;
363 advise_large_objects = 1;
364 advices += 2;
365 }
366
367 /* Eviction cycle. */
368 if (!strcasecmp(event,"eviction-del")) {
369 advise_large_objects = 1;
370 advices++;
371 }
372
373 if (!strcasecmp(event,"eviction-cycle")) {
374 advise_mass_eviction = 1;
375 advices++;
376 }
377
378 report = sdscatlen(report,"\n",1);
379 }
380 dictReleaseIterator(di);
381
382 /* Add non event based advices. */
383 if (THPGetAnonHugePagesSize() > 0) {
384 advise_disable_thp = 1;
385 advices++;
386 }
387
388 if (eventnum == 0 && advices == 0) {
389 report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Redis instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n");
390 } else if (eventnum > 0 && advices == 0) {
391 report = sdscat(report,"\nWhile there are latency events logged, I'm not able to suggest any easy fix. Please use the Redis community to get some help, providing this report in your help request.\n");
392 } else {
393 /* Add all the suggestions accumulated so far. */
394
395 /* Better VM. */
396 report = sdscat(report,"\nI have a few advices for you:\n\n");
397 if (advise_better_vm) {
398 report = sdscat(report,"- If you are using a virtual machine, consider upgrading it with a faster one using an hypervisior that provides less latency during fork() calls. Xen is known to have poor fork() performance. Even in the context of the same VM provider, certain kinds of instances can execute fork faster than others.\n");
399 }
400
401 /* Slow log. */
402 if (advise_slowlog_enabled) {
403 report = sdscatprintf(report,"- There are latency issues with potentially slow commands you are using. Try to enable the Slow Log Redis feature using the command 'CONFIG SET slowlog-log-slower-than %llu'. If the Slow log is disabled Redis is not able to log slow commands execution for you.\n", (unsigned long long)server.latency_monitor_threshold*1000);
404 }
405
406 if (advise_slowlog_tuning) {
407 report = sdscatprintf(report,"- Your current Slow Log configuration only logs events that are slower than your configured latency monitor threshold. Please use 'CONFIG SET slowlog-log-slower-than %llu'.\n", (unsigned long long)server.latency_monitor_threshold*1000);
408 }
409
410 if (advise_slowlog_inspect) {
411 report = sdscat(report,"- Check your Slow Log to understand what are the commands you are running which are too slow to execute. Please check http://redis.io/commands/slowlog for more information.\n");
412 }
413
414 /* Intrinsic latency. */
415 if (advise_scheduler) {
416 report = sdscat(report,"- The system is slow to execute Redis code paths not containing system calls. This usually means the system does not provide Redis CPU time to run for long periods. You should try to:\n"
417 " 1) Lower the system load.\n"
418 " 2) Use a computer / VM just for Redis if you are running other softawre in the same system.\n"
419 " 3) Check if you have a \"noisy neighbour\" problem.\n"
420 " 4) Check with 'redis-cli --intrinsic-latency 100' what is the intrinsic latency in your system.\n"
421 " 5) Check if the problem is allocator-related by recompiling Redis with MALLOC=libc, if you are using Jemalloc. However this may create fragmentation problems.\n");
422 }
423
424 /* AOF / Disk latency. */
425 if (advise_local_disk) {
426 report = sdscat(report,"- It is strongly advised to use local disks for persistence, especially if you are using AOF. Remote disks provided by platform-as-a-service providers are known to be slow.\n");
427 }
428
429 if (advise_ssd) {
430 report = sdscat(report,"- SSD disks are able to reduce fsync latency, and total time needed for snapshotting and AOF log rewriting (resulting in smaller memory usage and smaller final AOF rewrite buffer flushes). With extremely high write load SSD disks can be a good option. However Redis should perform reasonably with high load using normal disks. Use this advice as a last resort.\n");
431 }
432
433 if (advise_data_writeback) {
434 report = sdscat(report,"- Mounting ext3/4 filesystems with data=writeback can provide a performance boost compared to data=ordered, however this mode of operation provides less guarantees, and sometimes it can happen that after a hard crash the AOF file will have an half-written command at the end and will require to be repaired before Redis restarts.\n");
435 }
436
437 if (advise_disk_contention) {
438 report = sdscat(report,"- Try to lower the disk contention. This is often caused by other disk intensive processes running in the same computer (including other Redis instances).\n");
439 }
440
441 if (advise_no_appendfsync) {
442 report = sdscat(report,"- Assuming from the point of view of data safety this is viable in your environment, you could try to enable the 'no-appendfsync-on-rewrite' option, so that fsync will not be performed while there is a child rewriting the AOF file or producing an RDB file (the moment where there is high disk contention).\n");
443 }
444
445 if (advise_relax_fsync_policy && server.aof_fsync == AOF_FSYNC_ALWAYS) {
446 report = sdscat(report,"- Your fsync policy is set to 'always'. It is very hard to get good performances with such a setup, if possible try to relax the fsync policy to 'onesec'.\n");
447 }
448
449 if (advise_write_load_info) {
450 report = sdscat(report,"- Latency during the AOF atomic rename operation or when the final difference is flushed to the AOF file at the end of the rewrite, sometimes is caused by very high write load, causing the AOF buffer to get very large. If possible try to send less commands to accomplish the same work, or use Lua scripts to group multiple operations into a single EVALSHA call.\n");
451 }
452
453 if (advise_hz && server.hz < 100) {
454 report = sdscat(report,"- In order to make the Redis keys expiring process more incremental, try to set the 'hz' configuration parameter to 100 using 'CONFIG SET hz 100'.\n");
455 }
456
457 if (advise_large_objects) {
458 report = sdscat(report,"- Deleting, expiring or evicting (because of maxmemory policy) large objects is a blocking operation. If you have very large objects that are often deleted, expired, or evicted, try to fragment those objects into multiple smaller objects.\n");
459 }
460
461 if (advise_mass_eviction) {
462 report = sdscat(report,"- Sudden changes to the 'maxmemory' setting via 'CONFIG SET', or allocation of large objects via sets or sorted sets intersections, STORE option of SORT, Redis Cluster large keys migrations (RESTORE command), may create sudden memory pressure forcing the server to block trying to evict keys. \n");
463 }
464
465 if (advise_disable_thp) {
466 report = sdscat(report,"- I detected a non zero amount of anonymous huge pages used by your process. This creates very serious latency events in different conditions, especially when Redis is persisting on disk. To disable THP support use the command 'echo never > /sys/kernel/mm/transparent_hugepage/enabled', make sure to also add it into /etc/rc.local so that the command will be executed again after a reboot. Note that even if you have already disabled THP, you still need to restart the Redis process to get rid of the huge pages already created.\n");
467 }
468 }
469
470 return report;
471 }
472
473 /* ---------------------- Latency command implementation -------------------- */
474
475 /* latencyCommand() helper to produce a time-delay reply for all the samples
476 * in memory for the specified time series. */
latencyCommandReplyWithSamples(client * c,struct latencyTimeSeries * ts)477 void latencyCommandReplyWithSamples(client *c, struct latencyTimeSeries *ts) {
478 void *replylen = addDeferredMultiBulkLength(c);
479 int samples = 0, j;
480
481 for (j = 0; j < LATENCY_TS_LEN; j++) {
482 int i = (ts->idx + j) % LATENCY_TS_LEN;
483
484 if (ts->samples[i].time == 0) continue;
485 addReplyMultiBulkLen(c,2);
486 addReplyLongLong(c,ts->samples[i].time);
487 addReplyLongLong(c,ts->samples[i].latency);
488 samples++;
489 }
490 setDeferredMultiBulkLength(c,replylen,samples);
491 }
492
493 /* latencyCommand() helper to produce the reply for the LATEST subcommand,
494 * listing the last latency sample for every event type registered so far. */
latencyCommandReplyWithLatestEvents(client * c)495 void latencyCommandReplyWithLatestEvents(client *c) {
496 dictIterator *di;
497 dictEntry *de;
498
499 addReplyMultiBulkLen(c,dictSize(server.latency_events));
500 di = dictGetIterator(server.latency_events);
501 while((de = dictNext(di)) != NULL) {
502 char *event = dictGetKey(de);
503 struct latencyTimeSeries *ts = dictGetVal(de);
504 int last = (ts->idx + LATENCY_TS_LEN - 1) % LATENCY_TS_LEN;
505
506 addReplyMultiBulkLen(c,4);
507 addReplyBulkCString(c,event);
508 addReplyLongLong(c,ts->samples[last].time);
509 addReplyLongLong(c,ts->samples[last].latency);
510 addReplyLongLong(c,ts->max);
511 }
512 dictReleaseIterator(di);
513 }
514
515 #define LATENCY_GRAPH_COLS 80
latencyCommandGenSparkeline(char * event,struct latencyTimeSeries * ts)516 sds latencyCommandGenSparkeline(char *event, struct latencyTimeSeries *ts) {
517 int j;
518 struct sequence *seq = createSparklineSequence();
519 sds graph = sdsempty();
520 uint32_t min = 0, max = 0;
521
522 for (j = 0; j < LATENCY_TS_LEN; j++) {
523 int i = (ts->idx + j) % LATENCY_TS_LEN;
524 int elapsed;
525 char buf[64];
526
527 if (ts->samples[i].time == 0) continue;
528 /* Update min and max. */
529 if (seq->length == 0) {
530 min = max = ts->samples[i].latency;
531 } else {
532 if (ts->samples[i].latency > max) max = ts->samples[i].latency;
533 if (ts->samples[i].latency < min) min = ts->samples[i].latency;
534 }
535 /* Use as label the number of seconds / minutes / hours / days
536 * ago the event happened. */
537 elapsed = time(NULL) - ts->samples[i].time;
538 if (elapsed < 60)
539 snprintf(buf,sizeof(buf),"%ds",elapsed);
540 else if (elapsed < 3600)
541 snprintf(buf,sizeof(buf),"%dm",elapsed/60);
542 else if (elapsed < 3600*24)
543 snprintf(buf,sizeof(buf),"%dh",elapsed/3600);
544 else
545 snprintf(buf,sizeof(buf),"%dd",elapsed/(3600*24));
546 sparklineSequenceAddSample(seq,ts->samples[i].latency,buf);
547 }
548
549 graph = sdscatprintf(graph,
550 "%s - high %lu ms, low %lu ms (all time high %lu ms)\n", event,
551 (unsigned long) max, (unsigned long) min, (unsigned long) ts->max);
552 for (j = 0; j < LATENCY_GRAPH_COLS; j++)
553 graph = sdscatlen(graph,"-",1);
554 graph = sdscatlen(graph,"\n",1);
555 graph = sparklineRender(graph,seq,LATENCY_GRAPH_COLS,4,SPARKLINE_FILL);
556 freeSparklineSequence(seq);
557 return graph;
558 }
559
560 /* LATENCY command implementations.
561 *
562 * LATENCY SAMPLES: return time-latency samples for the specified event.
563 * LATENCY LATEST: return the latest latency for all the events classes.
564 * LATENCY DOCTOR: returns an human readable analysis of instance latency.
565 * LATENCY GRAPH: provide an ASCII graph of the latency of the specified event.
566 */
latencyCommand(client * c)567 void latencyCommand(client *c) {
568 struct latencyTimeSeries *ts;
569
570 if (!strcasecmp(c->argv[1]->ptr,"history") && c->argc == 3) {
571 /* LATENCY HISTORY <event> */
572 ts = dictFetchValue(server.latency_events,c->argv[2]->ptr);
573 if (ts == NULL) {
574 addReplyMultiBulkLen(c,0);
575 } else {
576 latencyCommandReplyWithSamples(c,ts);
577 }
578 } else if (!strcasecmp(c->argv[1]->ptr,"graph") && c->argc == 3) {
579 /* LATENCY GRAPH <event> */
580 sds graph;
581 dictEntry *de;
582 char *event;
583
584 de = dictFind(server.latency_events,c->argv[2]->ptr);
585 if (de == NULL) goto nodataerr;
586 ts = dictGetVal(de);
587 event = dictGetKey(de);
588
589 graph = latencyCommandGenSparkeline(event,ts);
590 addReplyBulkCString(c,graph);
591 sdsfree(graph);
592 } else if (!strcasecmp(c->argv[1]->ptr,"latest") && c->argc == 2) {
593 /* LATENCY LATEST */
594 latencyCommandReplyWithLatestEvents(c);
595 } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) {
596 /* LATENCY DOCTOR */
597 sds report = createLatencyReport();
598
599 addReplyBulkCBuffer(c,report,sdslen(report));
600 sdsfree(report);
601 } else if (!strcasecmp(c->argv[1]->ptr,"reset") && c->argc >= 2) {
602 /* LATENCY RESET */
603 if (c->argc == 2) {
604 addReplyLongLong(c,latencyResetEvent(NULL));
605 } else {
606 int j, resets = 0;
607
608 for (j = 2; j < c->argc; j++)
609 resets += latencyResetEvent(c->argv[j]->ptr);
610 addReplyLongLong(c,resets);
611 }
612 } else {
613 addReply(c,shared.syntaxerr);
614 }
615 return;
616
617 nodataerr:
618 /* Common error when the user asks for an event we have no latency
619 * information about. */
620 addReplyErrorFormat(c,
621 "No samples available for event '%s'", (char*) c->argv[2]->ptr);
622 }
623
624