1# Multi-instance test framework. 2# This is used in order to test Sentinel and Redis Cluster, and provides 3# basic capabilities for spawning and handling N parallel Redis / Sentinel 4# instances. 5# 6# Copyright (C) 2014 Salvatore Sanfilippo [email protected] 7# This software is released under the BSD License. See the COPYING file for 8# more information. 9 10package require Tcl 8.5 11 12set tcl_precision 17 13source ../support/redis.tcl 14source ../support/util.tcl 15source ../support/server.tcl 16source ../support/test.tcl 17 18set ::verbose 0 19set ::valgrind 0 20set ::pause_on_error 0 21set ::simulate_error 0 22set ::failed 0 23set ::sentinel_instances {} 24set ::redis_instances {} 25set ::sentinel_base_port 20000 26set ::redis_base_port 30000 27set ::pids {} ; # We kill everything at exit 28set ::dirs {} ; # We remove all the temp dirs at exit 29set ::run_matching {} ; # If non empty, only tests matching pattern are run. 30 31if {[catch {cd tmp}]} { 32 puts "tmp directory not found." 33 puts "Please run this test from the Redis source root." 34 exit 1 35} 36 37# Execute the specified instance of the server specified by 'type', using 38# the provided configuration file. Returns the PID of the process. 39proc exec_instance {type cfgfile} { 40 if {$type eq "redis"} { 41 set prgname redis-server 42 } elseif {$type eq "sentinel"} { 43 set prgname redis-sentinel 44 } else { 45 error "Unknown instance type." 46 } 47 48 if {$::valgrind} { 49 set pid [exec valgrind --track-origins=yes --suppressions=../../../src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full ../../../src/${prgname} $cfgfile &] 50 } else { 51 set pid [exec ../../../src/${prgname} $cfgfile &] 52 } 53 return $pid 54} 55 56# Spawn a redis or sentinel instance, depending on 'type'. 57proc spawn_instance {type base_port count {conf {}}} { 58 for {set j 0} {$j < $count} {incr j} { 59 set port [find_available_port $base_port] 60 incr base_port 61 puts "Starting $type #$j at port $port" 62 63 # Create a directory for this instance. 64 set dirname "${type}_${j}" 65 lappend ::dirs $dirname 66 catch {exec rm -rf $dirname} 67 file mkdir $dirname 68 69 # Write the instance config file. 70 set cfgfile [file join $dirname $type.conf] 71 set cfg [open $cfgfile w] 72 puts $cfg "port $port" 73 puts $cfg "dir ./$dirname" 74 puts $cfg "logfile log.txt" 75 # Add additional config files 76 foreach directive $conf { 77 puts $cfg $directive 78 } 79 close $cfg 80 81 # Finally exec it and remember the pid for later cleanup. 82 set pid [exec_instance $type $cfgfile] 83 lappend ::pids $pid 84 85 # Check availability 86 if {[server_is_up 127.0.0.1 $port 100] == 0} { 87 abort_sentinel_test "Problems starting $type #$j: ping timeout" 88 } 89 90 # Push the instance into the right list 91 set link [redis 127.0.0.1 $port] 92 $link reconnect 1 93 lappend ::${type}_instances [list \ 94 pid $pid \ 95 host 127.0.0.1 \ 96 port $port \ 97 link $link \ 98 ] 99 } 100} 101 102proc log_crashes {} { 103 set start_pattern {*REDIS BUG REPORT START*} 104 set logs [glob */log.txt] 105 foreach log $logs { 106 set fd [open $log] 107 set found 0 108 while {[gets $fd line] >= 0} { 109 if {[string match $start_pattern $line]} { 110 puts "\n*** Crash report found in $log ***" 111 set found 1 112 } 113 if {$found} {puts $line} 114 } 115 } 116} 117 118proc cleanup {} { 119 puts "Cleaning up..." 120 log_crashes 121 foreach pid $::pids { 122 catch {exec kill -9 $pid} 123 } 124 foreach dir $::dirs { 125 catch {exec rm -rf $dir} 126 } 127} 128 129proc abort_sentinel_test msg { 130 incr ::failed 131 puts "WARNING: Aborting the test." 132 puts ">>>>>>>> $msg" 133 if {$::pause_on_error} pause_on_error 134 cleanup 135 exit 1 136} 137 138proc parse_options {} { 139 for {set j 0} {$j < [llength $::argv]} {incr j} { 140 set opt [lindex $::argv $j] 141 set val [lindex $::argv [expr $j+1]] 142 if {$opt eq "--single"} { 143 incr j 144 set ::run_matching "*${val}*" 145 } elseif {$opt eq "--pause-on-error"} { 146 set ::pause_on_error 1 147 } elseif {$opt eq "--fail"} { 148 set ::simulate_error 1 149 } elseif {$opt eq {--valgrind}} { 150 set ::valgrind 1 151 } elseif {$opt eq "--help"} { 152 puts "Hello, I'm sentinel.tcl and I run Sentinel unit tests." 153 puts "\nOptions:" 154 puts "--single <pattern> Only runs tests specified by pattern." 155 puts "--pause-on-error Pause for manual inspection on error." 156 puts "--fail Simulate a test failure." 157 puts "--valgrind Run with valgrind." 158 puts "--help Shows this help." 159 exit 0 160 } else { 161 puts "Unknown option $opt" 162 exit 1 163 } 164 } 165} 166 167# If --pause-on-error option was passed at startup this function is called 168# on error in order to give the developer a chance to understand more about 169# the error condition while the instances are still running. 170proc pause_on_error {} { 171 puts "" 172 puts [colorstr yellow "*** Please inspect the error now ***"] 173 puts "\nType \"continue\" to resume the test, \"help\" for help screen.\n" 174 while 1 { 175 puts -nonewline "> " 176 flush stdout 177 set line [gets stdin] 178 set argv [split $line " "] 179 set cmd [lindex $argv 0] 180 if {$cmd eq {continue}} { 181 break 182 } elseif {$cmd eq {show-redis-logs}} { 183 set count 10 184 if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]} 185 foreach_redis_id id { 186 puts "=== REDIS $id ====" 187 puts [exec tail -$count redis_$id/log.txt] 188 puts "---------------------\n" 189 } 190 } elseif {$cmd eq {show-sentinel-logs}} { 191 set count 10 192 if {[lindex $argv 1] ne {}} {set count [lindex $argv 1]} 193 foreach_sentinel_id id { 194 puts "=== SENTINEL $id ====" 195 puts [exec tail -$count sentinel_$id/log.txt] 196 puts "---------------------\n" 197 } 198 } elseif {$cmd eq {ls}} { 199 foreach_redis_id id { 200 puts -nonewline "Redis $id" 201 set errcode [catch { 202 set str {} 203 append str "@[RI $id tcp_port]: " 204 append str "[RI $id role] " 205 if {[RI $id role] eq {slave}} { 206 append str "[RI $id master_host]:[RI $id master_port]" 207 } 208 set str 209 } retval] 210 if {$errcode} { 211 puts " -- $retval" 212 } else { 213 puts $retval 214 } 215 } 216 foreach_sentinel_id id { 217 puts -nonewline "Sentinel $id" 218 set errcode [catch { 219 set str {} 220 append str "@[SI $id tcp_port]: " 221 append str "[join [S $id sentinel get-master-addr-by-name mymaster]]" 222 set str 223 } retval] 224 if {$errcode} { 225 puts " -- $retval" 226 } else { 227 puts $retval 228 } 229 } 230 } elseif {$cmd eq {help}} { 231 puts "ls List Sentinel and Redis instances." 232 puts "show-sentinel-logs \[N\] Show latest N lines of logs." 233 puts "show-redis-logs \[N\] Show latest N lines of logs." 234 puts "S <id> cmd ... arg Call command in Sentinel <id>." 235 puts "R <id> cmd ... arg Call command in Redis <id>." 236 puts "SI <id> <field> Show Sentinel <id> INFO <field>." 237 puts "RI <id> <field> Show Sentinel <id> INFO <field>." 238 puts "continue Resume test." 239 } else { 240 set errcode [catch {eval $line} retval] 241 if {$retval ne {}} {puts "$retval"} 242 } 243 } 244} 245 246# We redefine 'test' as for Sentinel we don't use the server-client 247# architecture for the test, everything is sequential. 248proc test {descr code} { 249 set ts [clock format [clock seconds] -format %H:%M:%S] 250 puts -nonewline "$ts> $descr: " 251 flush stdout 252 253 if {[catch {set retval [uplevel 1 $code]} error]} { 254 incr ::failed 255 if {[string match "assertion:*" $error]} { 256 set msg [string range $error 10 end] 257 puts [colorstr red $msg] 258 if {$::pause_on_error} pause_on_error 259 puts "(Jumping to next unit after error)" 260 return -code continue 261 } else { 262 # Re-raise, let handler up the stack take care of this. 263 error $error $::errorInfo 264 } 265 } else { 266 puts [colorstr green OK] 267 } 268} 269 270# Check memory leaks when running on OSX using the "leaks" utility. 271proc check_leaks instance_types { 272 if {[string match {*Darwin*} [exec uname -a]]} { 273 puts -nonewline "Testing for memory leaks..."; flush stdout 274 foreach type $instance_types { 275 foreach_instance_id [set ::${type}_instances] id { 276 if {[instance_is_killed $type $id]} continue 277 set pid [get_instance_attrib $type $id pid] 278 set output {0 leaks} 279 catch {exec leaks $pid} output 280 if {[string match {*process does not exist*} $output] || 281 [string match {*cannot examine*} $output]} { 282 # In a few tests we kill the server process. 283 set output "0 leaks" 284 } else { 285 puts -nonewline "$type/$pid " 286 flush stdout 287 } 288 if {![string match {*0 leaks*} $output]} { 289 puts [colorstr red "=== MEMORY LEAK DETECTED ==="] 290 puts "Instance type $type, ID $id:" 291 puts $output 292 puts "===" 293 incr ::failed 294 } 295 } 296 } 297 puts "" 298 } 299} 300 301# Execute all the units inside the 'tests' directory. 302proc run_tests {} { 303 set tests [lsort [glob ../tests/*]] 304 foreach test $tests { 305 if {$::run_matching ne {} && [string match $::run_matching $test] == 0} { 306 continue 307 } 308 if {[file isdirectory $test]} continue 309 puts [colorstr yellow "Testing unit: [lindex [file split $test] end]"] 310 source $test 311 check_leaks {redis sentinel} 312 } 313} 314 315# Print a message and exists with 0 / 1 according to zero or more failures. 316proc end_tests {} { 317 if {$::failed == 0} { 318 puts "GOOD! No errors." 319 exit 0 320 } else { 321 puts "WARNING $::failed tests faield." 322 exit 1 323 } 324} 325 326# The "S" command is used to interact with the N-th Sentinel. 327# The general form is: 328# 329# S <sentinel-id> command arg arg arg ... 330# 331# Example to ping the Sentinel 0 (first instance): S 0 PING 332proc S {n args} { 333 set s [lindex $::sentinel_instances $n] 334 [dict get $s link] {*}$args 335} 336 337# Like R but to chat with Redis instances. 338proc R {n args} { 339 set r [lindex $::redis_instances $n] 340 [dict get $r link] {*}$args 341} 342 343proc get_info_field {info field} { 344 set fl [string length $field] 345 append field : 346 foreach line [split $info "\n"] { 347 set line [string trim $line "\r\n "] 348 if {[string range $line 0 $fl] eq $field} { 349 return [string range $line [expr {$fl+1}] end] 350 } 351 } 352 return {} 353} 354 355proc SI {n field} { 356 get_info_field [S $n info] $field 357} 358 359proc RI {n field} { 360 get_info_field [R $n info] $field 361} 362 363# Iterate over IDs of sentinel or redis instances. 364proc foreach_instance_id {instances idvar code} { 365 upvar 1 $idvar id 366 for {set id 0} {$id < [llength $instances]} {incr id} { 367 set errcode [catch {uplevel 1 $code} result] 368 if {$errcode == 1} { 369 error $result $::errorInfo $::errorCode 370 } elseif {$errcode == 4} { 371 continue 372 } elseif {$errcode == 3} { 373 break 374 } elseif {$errcode != 0} { 375 return -code $errcode $result 376 } 377 } 378} 379 380proc foreach_sentinel_id {idvar code} { 381 set errcode [catch {uplevel 1 [list foreach_instance_id $::sentinel_instances $idvar $code]} result] 382 return -code $errcode $result 383} 384 385proc foreach_redis_id {idvar code} { 386 set errcode [catch {uplevel 1 [list foreach_instance_id $::redis_instances $idvar $code]} result] 387 return -code $errcode $result 388} 389 390# Get the specific attribute of the specified instance type, id. 391proc get_instance_attrib {type id attrib} { 392 dict get [lindex [set ::${type}_instances] $id] $attrib 393} 394 395# Set the specific attribute of the specified instance type, id. 396proc set_instance_attrib {type id attrib newval} { 397 set d [lindex [set ::${type}_instances] $id] 398 dict set d $attrib $newval 399 lset ::${type}_instances $id $d 400} 401 402# Create a master-slave cluster of the given number of total instances. 403# The first instance "0" is the master, all others are configured as 404# slaves. 405proc create_redis_master_slave_cluster n { 406 foreach_redis_id id { 407 if {$id == 0} { 408 # Our master. 409 R $id slaveof no one 410 R $id flushall 411 } elseif {$id < $n} { 412 R $id slaveof [get_instance_attrib redis 0 host] \ 413 [get_instance_attrib redis 0 port] 414 } else { 415 # Instances not part of the cluster. 416 R $id slaveof no one 417 } 418 } 419 # Wait for all the slaves to sync. 420 wait_for_condition 1000 50 { 421 [RI 0 connected_slaves] == ($n-1) 422 } else { 423 fail "Unable to create a master-slaves cluster." 424 } 425} 426 427proc get_instance_id_by_port {type port} { 428 foreach_${type}_id id { 429 if {[get_instance_attrib $type $id port] == $port} { 430 return $id 431 } 432 } 433 fail "Instance $type port $port not found." 434} 435 436# Kill an instance of the specified type/id with SIGKILL. 437# This function will mark the instance PID as -1 to remember that this instance 438# is no longer running and will remove its PID from the list of pids that 439# we kill at cleanup. 440# 441# The instance can be restarted with restart-instance. 442proc kill_instance {type id} { 443 set pid [get_instance_attrib $type $id pid] 444 set port [get_instance_attrib $type $id port] 445 446 if {$pid == -1} { 447 error "You tried to kill $type $id twice." 448 } 449 450 exec kill -9 $pid 451 set_instance_attrib $type $id pid -1 452 set_instance_attrib $type $id link you_tried_to_talk_with_killed_instance 453 454 # Remove the PID from the list of pids to kill at exit. 455 set ::pids [lsearch -all -inline -not -exact $::pids $pid] 456 457 # Wait for the port it was using to be available again, so that's not 458 # an issue to start a new server ASAP with the same port. 459 set retry 10 460 while {[incr retry -1]} { 461 set port_is_free [catch {set s [socket 127.0.01 $port]}] 462 if {$port_is_free} break 463 catch {close $s} 464 after 1000 465 } 466 if {$retry == 0} { 467 error "Port $port does not return available after killing instance." 468 } 469} 470 471# Return true of the instance of the specified type/id is killed. 472proc instance_is_killed {type id} { 473 set pid [get_instance_attrib $type $id pid] 474 expr {$pid == -1} 475} 476 477# Restart an instance previously killed by kill_instance 478proc restart_instance {type id} { 479 set dirname "${type}_${id}" 480 set cfgfile [file join $dirname $type.conf] 481 set port [get_instance_attrib $type $id port] 482 483 # Execute the instance with its old setup and append the new pid 484 # file for cleanup. 485 set pid [exec_instance $type $cfgfile] 486 set_instance_attrib $type $id pid $pid 487 lappend ::pids $pid 488 489 # Check that the instance is running 490 if {[server_is_up 127.0.0.1 $port 100] == 0} { 491 abort_sentinel_test "Problems starting $type #$id: ping timeout" 492 } 493 494 # Connect with it with a fresh link 495 set link [redis 127.0.0.1 $port] 496 $link reconnect 1 497 set_instance_attrib $type $id link $link 498 499 # Make sure the instance is not loading the dataset when this 500 # function returns. 501 while 1 { 502 catch {[$link ping]} retval 503 if {[string match {*LOADING*} $retval]} { 504 after 100 505 continue 506 } else { 507 break 508 } 509 } 510} 511 512