1start_server {tags {"psync2"}} {
2start_server {} {
3start_server {} {
4start_server {} {
5start_server {} {
6    set master_id 0                 ; # Current master
7    set start_time [clock seconds]  ; # Test start time
8    set counter_value 0             ; # Current value of the Redis counter "x"
9
10    # Config
11    set debug_msg 0                 ; # Enable additional debug messages
12
13    set no_exit 0                   ; # Do not exit at end of the test
14
15    set duration 20                 ; # Total test seconds
16
17    set genload 1                   ; # Load master with writes at every cycle
18
19    set genload_time 5000           ; # Writes duration time in ms
20
21    set disconnect 1                ; # Break replication link between random
22                                      # master and slave instances while the
23                                      # master is loaded with writes.
24
25    set disconnect_period 1000      ; # Disconnect repl link every N ms.
26
27    for {set j 0} {$j < 5} {incr j} {
28        set R($j) [srv [expr 0-$j] client]
29        set R_host($j) [srv [expr 0-$j] host]
30        set R_port($j) [srv [expr 0-$j] port]
31        if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
32    }
33
34    set cycle 1
35    while {([clock seconds]-$start_time) < $duration} {
36        test "PSYNC2: --- CYCLE $cycle ---" {}
37        incr cycle
38
39        # Create a random replication layout.
40        # Start with switching master (this simulates a failover).
41
42        # 1) Select the new master.
43        set master_id [randomInt 5]
44        set used [list $master_id]
45        test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
46            $R($master_id) slaveof no one
47            if {$counter_value == 0} {
48                $R($master_id) set x $counter_value
49            }
50        }
51
52        # 2) Attach all the slaves to a random instance
53        while {[llength $used] != 5} {
54            while 1 {
55                set slave_id [randomInt 5]
56                if {[lsearch -exact $used $slave_id] == -1} break
57            }
58            set rand [randomInt [llength $used]]
59            set mid [lindex $used $rand]
60            set master_host $R_host($mid)
61            set master_port $R_port($mid)
62
63            test "PSYNC2: Set #$slave_id to replicate from #$mid" {
64                $R($slave_id) slaveof $master_host $master_port
65            }
66            lappend used $slave_id
67        }
68
69        # 3) Increment the counter and wait for all the instances
70        # to converge.
71        test "PSYNC2: cluster is consistent after failover" {
72            $R($master_id) incr x; incr counter_value
73            for {set j 0} {$j < 5} {incr j} {
74                wait_for_condition 50 1000 {
75                    [$R($j) get x] == $counter_value
76                } else {
77                    fail "Instance #$j x variable is inconsistent"
78                }
79            }
80        }
81
82        # 4) Generate load while breaking the connection of random
83        # slave-master pairs.
84        test "PSYNC2: generate load while killing replication links" {
85            set t [clock milliseconds]
86            set next_break [expr {$t+$disconnect_period}]
87            while {[clock milliseconds]-$t < $genload_time} {
88                if {$genload} {
89                    $R($master_id) incr x; incr counter_value
90                }
91                if {[clock milliseconds] == $next_break} {
92                    set next_break \
93                        [expr {[clock milliseconds]+$disconnect_period}]
94                    set slave_id [randomInt 5]
95                    if {$disconnect} {
96                        $R($slave_id) client kill type master
97                        if {$debug_msg} {
98                            puts "+++ Breaking link for replica #$slave_id"
99                        }
100                    }
101                }
102            }
103        }
104
105        # 5) Increment the counter and wait for all the instances
106        set x [$R($master_id) get x]
107        test "PSYNC2: cluster is consistent after load (x = $x)" {
108            for {set j 0} {$j < 5} {incr j} {
109                wait_for_condition 50 1000 {
110                    [$R($j) get x] == $counter_value
111                } else {
112                    fail "Instance #$j x variable is inconsistent"
113                }
114            }
115        }
116
117        # Put down the old master so that it cannot generate more
118        # replication stream, this way in the next master switch, the time at
119        # which we move slaves away is not important, each will have full
120        # history (otherwise PINGs will make certain slaves have more history),
121        # and sometimes a full resync will be needed.
122        $R($master_id) slaveof 127.0.0.1 0 ;# We use port zero to make it fail.
123
124        if {$debug_msg} {
125            for {set j 0} {$j < 5} {incr j} {
126                puts "$j: sync_full: [status $R($j) sync_full]"
127                puts "$j: id1      : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
128                puts "$j: id2      : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
129                puts "$j: backlog  : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
130                puts "---"
131            }
132        }
133
134        test "PSYNC2: total sum of full synchronizations is exactly 4" {
135            set sum 0
136            for {set j 0} {$j < 5} {incr j} {
137                incr sum [status $R($j) sync_full]
138            }
139            assert {$sum == 4}
140        }
141
142        # Limit anyway the maximum number of cycles. This is useful when the
143        # test is skipped via --only option of the test suite. In that case
144        # we don't want to see many seconds of this test being just skipped.
145        if {$cycle > 50} break
146    }
147
148    test "PSYNC2: Bring the master back again for next test" {
149        $R($master_id) slaveof no one
150        set master_host $R_host($master_id)
151        set master_port $R_port($master_id)
152        for {set j 0} {$j < 5} {incr j} {
153            if {$j == $master_id} continue
154            $R($j) slaveof $master_host $master_port
155        }
156
157        # Wait for slaves to sync
158        wait_for_condition 50 1000 {
159            [status $R($master_id) connected_slaves] == 4
160        } else {
161            fail "Replica not reconnecting"
162        }
163    }
164
165    test "PSYNC2: Partial resync after restart using RDB aux fields" {
166        # Pick a random slave
167        set slave_id [expr {($master_id+1)%5}]
168        set sync_count [status $R($master_id) sync_full]
169        set sync_partial [status $R($master_id) sync_partial_ok]
170        catch {
171            $R($slave_id) config rewrite
172            $R($slave_id) debug restart
173        }
174        # note: just waiting for connected_slaves==4 has a race condition since
175        # we might do the check before the master realized that the slave disconnected
176        wait_for_condition 50 1000 {
177            [status $R($master_id) sync_partial_ok] == $sync_partial + 1
178        } else {
179            fail "Replica not reconnecting"
180        }
181        set new_sync_count [status $R($master_id) sync_full]
182        assert {$sync_count == $new_sync_count}
183    }
184
185    test "PSYNC2: Replica RDB restart with EVALSHA in backlog issue #4483" {
186        # Pick a random slave
187        set slave_id [expr {($master_id+1)%5}]
188        set sync_count [status $R($master_id) sync_full]
189
190        # Make sure to replicate the first EVAL while the salve is online
191        # so that it's part of the scripts the master believes it's safe
192        # to propagate as EVALSHA.
193        $R($master_id) EVAL {return redis.call("incr","__mycounter")} 0
194        $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
195
196        # Wait for the two to sync
197        wait_for_condition 50 1000 {
198            [$R($master_id) debug digest] == [$R($slave_id) debug digest]
199        } else {
200            fail "Replica not reconnecting"
201        }
202
203        # Prevent the slave from receiving master updates, and at
204        # the same time send a new script several times to the
205        # master, so that we'll end with EVALSHA into the backlog.
206        $R($slave_id) slaveof 127.0.0.1 0
207
208        $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
209        $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
210        $R($master_id) EVALSHA e6e0b547500efcec21eddb619ac3724081afee89 0
211
212        catch {
213            $R($slave_id) config rewrite
214            $R($slave_id) debug restart
215        }
216
217        # Reconfigure the slave correctly again, when it's back online.
218        set retry 50
219        while {$retry} {
220            if {[catch {
221                $R($slave_id) slaveof $master_host $master_port
222            }]} {
223                after 1000
224            } else {
225                break
226            }
227            incr retry -1
228        }
229
230        # The master should be back at 4 slaves eventually
231        wait_for_condition 50 1000 {
232            [status $R($master_id) connected_slaves] == 4
233        } else {
234            fail "Replica not reconnecting"
235        }
236        set new_sync_count [status $R($master_id) sync_full]
237        assert {$sync_count == $new_sync_count}
238
239        # However if the slave started with the full state of the
240        # scripting engine, we should now have the same digest.
241        wait_for_condition 50 1000 {
242            [$R($master_id) debug digest] == [$R($slave_id) debug digest]
243        } else {
244            fail "Debug digest mismatch between master and replica in post-restart handshake"
245        }
246    }
247
248    if {$no_exit} {
249        while 1 { puts -nonewline .; flush stdout; after 1000}
250    }
251
252}}}}}
253