xref: /redis-3.2.3/tests/instances.tcl (revision c476dbad)
1# Multi-instance test framework.
2# This is used in order to test Sentinel and Redis Cluster, and provides
3# basic capabilities for spawning and handling N parallel Redis / Sentinel
4# instances.
5#
6# Copyright (C) 2014 Salvatore Sanfilippo [email protected]
7# This software is released under the BSD License. See the COPYING file for
8# more information.
9
10package require Tcl 8.5
11
12set tcl_precision 17
13source ../support/redis.tcl
14source ../support/util.tcl
15source ../support/server.tcl
16source ../support/test.tcl
17
18set ::verbose 0
19set ::valgrind 0
20set ::pause_on_error 0
21set ::simulate_error 0
22set ::failed 0
23set ::sentinel_instances {}
24set ::redis_instances {}
25set ::sentinel_base_port 20000
26set ::redis_base_port 30000
27set ::pids {} ; # We kill everything at exit
28set ::dirs {} ; # We remove all the temp dirs at exit
29set ::run_matching {} ; # If non empty, only tests matching pattern are run.
30
31if {[catch {cd tmp}]} {
32    puts "tmp directory not found."
33    puts "Please run this test from the Redis source root."
34    exit 1
35}
36
37# Execute the specified instance of the server specified by 'type', using
38# the provided configuration file. Returns the PID of the process.
39proc exec_instance {type cfgfile} {
40    if {$type eq "redis"} {
41        set prgname redis-server
42    } elseif {$type eq "sentinel"} {
43        set prgname redis-sentinel
44    } else {
45        error "Unknown instance type."
46    }
47
48    if {$::valgrind} {
49        set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &]
50    } else {
51        set pid [exec ../../../src/${prgname} $cfgfile &]
52    }
53    return $pid
54}
55
56# Spawn a redis or sentinel instance, depending on 'type'.
57proc spawn_instance {type base_port count {conf {}}} {
58    for {set j 0} {$j < $count} {incr j} {
59        set port [find_available_port $base_port]
60        incr base_port
61        puts "Starting $type #$j at port $port"
62
63        # Create a directory for this instance.
64        set dirname "${type}_${j}"
65        lappend ::dirs $dirname
66        catch {exec rm -rf $dirname}
67        file mkdir $dirname
68
69        # Write the instance config file.
70        set cfgfile [file join $dirname $type.conf]
71        set cfg [open $cfgfile w]
72        puts $cfg "port $port"
73        puts $cfg "dir ./$dirname"
74        puts $cfg "logfile log.txt"
75        # Add additional config files
76        foreach directive $conf {
77            puts $cfg $directive
78        }
79        close $cfg
80
81        # Finally exec it and remember the pid for later cleanup.
82        set pid [exec_instance $type $cfgfile]
83        lappend ::pids $pid
84
85        # Check availability
86        if {[server_is_up 127.0.0.1 $port 100] == 0} {
87            abort_sentinel_test "Problems starting $type #$j: ping timeout"
88        }
89
90        # Push the instance into the right list
91        set link [redis 127.0.0.1 $port]
92        $link reconnect 1
93        lappend ::${type}_instances [list \
94            pid $pid \
95            host 127.0.0.1 \
96            port $port \
97            link $link \
98        ]
99    }
100}
101
102proc log_crashes {} {
103    set start_pattern {*REDIS BUG REPORT START*}
104    set logs [glob */log.txt]
105    foreach log $logs {
106        set fd [open $log]
107        set found 0
108        while {[gets $fd line] >= 0} {
109            if {[string match $start_pattern $line]} {
110                puts "\n*** Crash report found in $log ***"
111                set found 1
112            }
113            if {$found} {puts $line}
114        }
115    }
116}
117
118proc cleanup {} {
119    puts "Cleaning up..."
120    log_crashes
121    foreach pid $::pids {
122        catch {exec kill -9 $pid}
123    }
124    foreach dir $::dirs {
125        catch {exec rm -rf $dir}
126    }
127}
128
129proc abort_sentinel_test msg {
130    incr ::failed
131    puts "WARNING: Aborting the test."
132    puts ">>>>>>>> $msg"
133    if {$::pause_on_error} pause_on_error
134    cleanup
135    exit 1
136}
137
138proc parse_options {} {
139    for {set j 0} {$j < [llength $::argv]} {incr j} {
140        set opt [lindex $::argv $j]
141        set val [lindex $::argv [expr $j+1]]
142        if {$opt eq "--single"} {
143            incr j
144            set ::run_matching "*${val}*"
145        } elseif {$opt eq "--pause-on-error"} {
146            set ::pause_on_error 1
147        } elseif {$opt eq "--fail"} {
148            set ::simulate_error 1
149        } elseif {$opt eq {--valgrind}} {
150            set ::valgrind 1
151        } elseif {$opt eq "--help"} {
152            puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests."
153            puts "\nOptions:"
154            puts "--single <pattern>      Only runs tests specified by pattern."
155            puts "--pause-on-error        Pause for manual inspection on error."
156            puts "--fail                  Simulate a test failure."
157            puts "--valgrind              Run with valgrind."
158            puts "--help                  Shows this help."
159            exit 0
160        } else {
161            puts "Unknown option $opt"
162            exit 1
163        }
164    }
165}
166
167# If --pause-on-error option was passed at startup this function is called
168# on error in order to give the developer a chance to understand more about
169# the error condition while the instances are still running.
170proc pause_on_error {} {
171    puts ""
172    puts [colorstr yellow "*** Please inspect the error now ***"]
173    puts "\nType \"continue\" to resume the test, \"help\" for help screen.\n"
174    while 1 {
175        puts -nonewline "> "
176        flush stdout
177        set line [gets stdin]
178        set argv [split $line " "]
179        set cmd [lindex $argv 0]
180        if {$cmd eq {continue}} {
181            break
182        } elseif {$cmd eq {show-redis-logs}} {
183            set count 10
184            if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]}
185            foreach_redis_id id {
186                puts "=== REDIS $id ===="
187                puts [exec tail -$count redis_$id/log.txt]
188                puts "---------------------\n"
189            }
190        } elseif {$cmd eq {show-sentinel-logs}} {
191            set count 10
192            if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]}
193            foreach_sentinel_id id {
194                puts "=== SENTINEL $id ===="
195                puts [exec tail -$count sentinel_$id/log.txt]
196                puts "---------------------\n"
197            }
198        } elseif {$cmd eq {ls}} {
199            foreach_redis_id id {
200                puts -nonewline "Redis $id"
201                set errcode [catch {
202                    set str {}
203                    append str "@[RI $id tcp_port]: "
204                    append str "[RI $id role] "
205                    if {[RI $id role] eq {slave}} {
206                        append str "[RI $id master_host]:[RI $id master_port]"
207                    }
208                    set str
209                } retval]
210                if {$errcode} {
211                    puts " -- $retval"
212                } else {
213                    puts $retval
214                }
215            }
216            foreach_sentinel_id id {
217                puts -nonewline "Sentinel $id"
218                set errcode [catch {
219                    set str {}
220                    append str "@[SI $id tcp_port]: "
221                    append str "[join [S $id sentinel get-master-addr-by-name mymaster]]"
222                    set str
223                } retval]
224                if {$errcode} {
225                    puts " -- $retval"
226                } else {
227                    puts $retval
228                }
229            }
230        } elseif {$cmd eq {help}} {
231            puts "ls                     List Sentinel and Redis instances."
232            puts "show-sentinel-logs \[N\] Show latest N lines of logs."
233            puts "show-redis-logs \[N\]    Show latest N lines of logs."
234            puts "S <id> cmd ... arg     Call command in Sentinel <id>."
235            puts "R <id> cmd ... arg     Call command in Redis <id>."
236            puts "SI <id> <field>        Show Sentinel <id> INFO <field>."
237            puts "RI <id> <field>        Show Sentinel <id> INFO <field>."
238            puts "continue               Resume test."
239        } else {
240            set errcode [catch {eval $line} retval]
241            if {$retval ne {}} {puts "$retval"}
242        }
243    }
244}
245
246# We redefine 'test' as for Sentinel we don't use the server-client
247# architecture for the test, everything is sequential.
248proc test {descr code} {
249    set ts [clock format [clock seconds] -format %H:%M:%S]
250    puts -nonewline "$ts> $descr: "
251    flush stdout
252
253    if {[catch {set retval [uplevel 1 $code]} error]} {
254        incr ::failed
255        if {[string match "assertion:*" $error]} {
256            set msg [string range $error 10 end]
257            puts [colorstr red $msg]
258            if {$::pause_on_error} pause_on_error
259            puts "(Jumping to next unit after error)"
260            return -code continue
261        } else {
262            # Re-raise, let handler up the stack take care of this.
263            error $error $::errorInfo
264        }
265    } else {
266        puts [colorstr green OK]
267    }
268}
269
270# Check memory leaks when running on OSX using the "leaks" utility.
271proc check_leaks instance_types {
272    if {[string match {*Darwin*} [exec uname -a]]} {
273        puts -nonewline "Testing for memory leaks..."; flush stdout
274        foreach type $instance_types {
275            foreach_instance_id [set ::${type}_instances] id {
276                if {[instance_is_killed $type $id]} continue
277                set pid [get_instance_attrib $type $id pid]
278                set output {0 leaks}
279                catch {exec leaks $pid} output
280                if {[string match {*process does not exist*} $output] ||
281                    [string match {*cannot examine*} $output]} {
282                    # In a few tests we kill the server process.
283                    set output "0 leaks"
284                } else {
285                    puts -nonewline "$type/$pid "
286                    flush stdout
287                }
288                if {![string match {*0 leaks*} $output]} {
289                    puts [colorstr red "=== MEMORY LEAK DETECTED ==="]
290                    puts "Instance type $type, ID $id:"
291                    puts $output
292                    puts "==="
293                    incr ::failed
294                }
295            }
296        }
297        puts ""
298    }
299}
300
301# Execute all the units inside the 'tests' directory.
302proc run_tests {} {
303    set tests [lsort [glob ../tests/*]]
304    foreach test $tests {
305        if {$::run_matching ne {} && [string match $::run_matching $test] == 0} {
306            continue
307        }
308        if {[file isdirectory $test]} continue
309        puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"]
310        source $test
311        check_leaks {redis sentinel}
312    }
313}
314
315# Print a message and exists with 0 / 1 according to zero or more failures.
316proc end_tests {} {
317    if {$::failed == 0} {
318        puts "GOOD! No errors."
319        exit 0
320    } else {
321        puts "WARNING $::failed tests faield."
322        exit 1
323    }
324}
325
326# The "S" command is used to interact with the N-th Sentinel.
327# The general form is:
328#
329# S <sentinel-id> command arg arg arg ...
330#
331# Example to ping the Sentinel 0 (first instance): S 0 PING
332proc S {n args} {
333    set s [lindex $::sentinel_instances $n]
334    [dict get $s link] {*}$args
335}
336
337# Like R but to chat with Redis instances.
338proc R {n args} {
339    set r [lindex $::redis_instances $n]
340    [dict get $r link] {*}$args
341}
342
343proc get_info_field {info field} {
344    set fl [string length $field]
345    append field :
346    foreach line [split $info "\n"] {
347        set line [string trim $line "\r\n "]
348        if {[string range $line 0 $fl] eq $field} {
349            return [string range $line [expr {$fl+1}] end]
350        }
351    }
352    return {}
353}
354
355proc SI {n field} {
356    get_info_field [S $n info] $field
357}
358
359proc RI {n field} {
360    get_info_field [R $n info] $field
361}
362
363# Iterate over IDs of sentinel or redis instances.
364proc foreach_instance_id {instances idvar code} {
365    upvar 1 $idvar id
366    for {set id 0} {$id < [llength $instances]} {incr id} {
367        set errcode [catch {uplevel 1 $code} result]
368        if {$errcode == 1} {
369            error $result $::errorInfo $::errorCode
370        } elseif {$errcode == 4} {
371            continue
372        } elseif {$errcode == 3} {
373            break
374        } elseif {$errcode != 0} {
375            return -code $errcode $result
376        }
377    }
378}
379
380proc foreach_sentinel_id {idvar code} {
381    set errcode [catch {uplevel 1 [list foreach_instance_id $::sentinel_instances $idvar $code]} result]
382    return -code $errcode $result
383}
384
385proc foreach_redis_id {idvar code} {
386    set errcode [catch {uplevel 1 [list foreach_instance_id $::redis_instances $idvar $code]} result]
387    return -code $errcode $result
388}
389
390# Get the specific attribute of the specified instance type, id.
391proc get_instance_attrib {type id attrib} {
392    dict get [lindex [set ::${type}_instances] $id] $attrib
393}
394
395# Set the specific attribute of the specified instance type, id.
396proc set_instance_attrib {type id attrib newval} {
397    set d [lindex [set ::${type}_instances] $id]
398    dict set d $attrib $newval
399    lset ::${type}_instances $id $d
400}
401
402# Create a master-slave cluster of the given number of total instances.
403# The first instance "0" is the master, all others are configured as
404# slaves.
405proc create_redis_master_slave_cluster n {
406    foreach_redis_id id {
407        if {$id == 0} {
408            # Our master.
409            R $id slaveof no one
410            R $id flushall
411        } elseif {$id < $n} {
412            R $id slaveof [get_instance_attrib redis 0 host] \
413                          [get_instance_attrib redis 0 port]
414        } else {
415            # Instances not part of the cluster.
416            R $id slaveof no one
417        }
418    }
419    # Wait for all the slaves to sync.
420    wait_for_condition 1000 50 {
421        [RI 0 connected_slaves] == ($n-1)
422    } else {
423        fail "Unable to create a master-slaves cluster."
424    }
425}
426
427proc get_instance_id_by_port {type port} {
428    foreach_${type}_id id {
429        if {[get_instance_attrib $type $id port] == $port} {
430            return $id
431        }
432    }
433    fail "Instance $type port $port not found."
434}
435
436# Kill an instance of the specified type/id with SIGKILL.
437# This function will mark the instance PID as -1 to remember that this instance
438# is no longer running and will remove its PID from the list of pids that
439# we kill at cleanup.
440#
441# The instance can be restarted with restart-instance.
442proc kill_instance {type id} {
443    set pid [get_instance_attrib $type $id pid]
444    set port [get_instance_attrib $type $id port]
445
446    if {$pid == -1} {
447        error "You tried to kill $type $id twice."
448    }
449
450    exec kill -9 $pid
451    set_instance_attrib $type $id pid -1
452    set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance
453
454    # Remove the PID from the list of pids to kill at exit.
455    set ::pids [lsearch -all -inline -not -exact $::pids $pid]
456
457    # Wait for the port it was using to be available again, so that's not
458    # an issue to start a new server ASAP with the same port.
459    set retry 10
460    while {[incr retry -1]} {
461        set port_is_free [catch {set s [socket 127.0.01 $port]}]
462        if {$port_is_free} break
463        catch {close $s}
464        after 1000
465    }
466    if {$retry == 0} {
467        error "Port $port does not return available after killing instance."
468    }
469}
470
471# Return true of the instance of the specified type/id is killed.
472proc instance_is_killed {type id} {
473    set pid [get_instance_attrib $type $id pid]
474    expr {$pid == -1}
475}
476
477# Restart an instance previously killed by kill_instance
478proc restart_instance {type id} {
479    set dirname "${type}_${id}"
480    set cfgfile [file join $dirname $type.conf]
481    set port [get_instance_attrib $type $id port]
482
483    # Execute the instance with its old setup and append the new pid
484    # file for cleanup.
485    set pid [exec_instance $type $cfgfile]
486    set_instance_attrib $type $id pid $pid
487    lappend ::pids $pid
488
489    # Check that the instance is running
490    if {[server_is_up 127.0.0.1 $port 100] == 0} {
491        abort_sentinel_test "Problems starting $type #$id: ping timeout"
492    }
493
494    # Connect with it with a fresh link
495    set link [redis 127.0.0.1 $port]
496    $link reconnect 1
497    set_instance_attrib $type $id link $link
498
499    # Make sure the instance is not loading the dataset when this
500    # function returns.
501    while 1 {
502        catch {[$link ping]} retval
503        if {[string match {*LOADING*} $retval]} {
504            after 100
505            continue
506        } else {
507            break
508        }
509    }
510}
511
512