1proc start_bg_complex_data {host port db ops} {
2    set tclsh [info nameofexecutable]
3    exec $tclsh tests/helpers/bg_complex_data.tcl $host $port $db $ops &
4}
5
6proc stop_bg_complex_data {handle} {
7    catch {exec /bin/kill -9 $handle}
8}
9
10# Creates a master-slave pair and breaks the link continuously to force
11# partial resyncs attempts, all this while flooding the master with
12# write queries.
13#
14# You can specify backlog size, ttl, delay before reconnection, test duration
15# in seconds, and an additional condition to verify at the end.
16#
17# If reconnect is > 0, the test actually try to break the connection and
18# reconnect with the master, otherwise just the initial synchronization is
19# checked for consistency.
20proc test_psync {descr duration backlog_size backlog_ttl delay cond diskless reconnect} {
21    start_server {tags {"repl"}} {
22        start_server {} {
23
24            set master [srv -1 client]
25            set master_host [srv -1 host]
26            set master_port [srv -1 port]
27            set slave [srv 0 client]
28
29            $master config set repl-backlog-size $backlog_size
30            $master config set repl-backlog-ttl $backlog_ttl
31            $master config set repl-diskless-sync $diskless
32            $master config set repl-diskless-sync-delay 1
33
34            set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
35            set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
36            set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000]
37
38            test {Slave should be able to synchronize with the master} {
39                $slave slaveof $master_host $master_port
40                wait_for_condition 50 100 {
41                    [lindex [r role] 0] eq {slave} &&
42                    [lindex [r role] 3] eq {connected}
43                } else {
44                    fail "Replication not started."
45                }
46            }
47
48            # Check that the background clients are actually writing.
49            test {Detect write load to master} {
50                wait_for_condition 50 1000 {
51                    [$master dbsize] > 100
52                } else {
53                    fail "Can't detect write load from background clients."
54                }
55            }
56
57            test "Test replication partial resync: $descr (diskless: $diskless, reconnect: $reconnect)" {
58                # Now while the clients are writing data, break the maste-slave
59                # link multiple times.
60                if ($reconnect) {
61                    for {set j 0} {$j < $duration*10} {incr j} {
62                        after 100
63                        # catch {puts "MASTER [$master dbsize] keys, REPLICA [$slave dbsize] keys"}
64
65                        if {($j % 20) == 0} {
66                            catch {
67                                if {$delay} {
68                                    $slave multi
69                                    $slave client kill $master_host:$master_port
70                                    $slave debug sleep $delay
71                                    $slave exec
72                                } else {
73                                    $slave client kill $master_host:$master_port
74                                }
75                            }
76                        }
77                    }
78                }
79                stop_bg_complex_data $load_handle0
80                stop_bg_complex_data $load_handle1
81                stop_bg_complex_data $load_handle2
82
83                # Wait for the slave to reach the "online"
84                # state from the POV of the master.
85                set retry 5000
86                while {$retry} {
87                    set info [$master info]
88                    if {[string match {*slave0:*state=online*} $info]} {
89                        break
90                    } else {
91                        incr retry -1
92                        after 100
93                    }
94                }
95                if {$retry == 0} {
96                    error "assertion:Slave not correctly synchronized"
97                }
98
99                # Wait that slave acknowledge it is online so
100                # we are sure that DBSIZE and DEBUG DIGEST will not
101                # fail because of timing issues. (-LOADING error)
102                wait_for_condition 5000 100 {
103                    [lindex [$slave role] 3] eq {connected}
104                } else {
105                    fail "Slave still not connected after some time"
106                }
107
108                set retry 10
109                while {$retry && ([$master debug digest] ne [$slave debug digest])}\
110                {
111                    after 1000
112                    incr retry -1
113                }
114                assert {[$master dbsize] > 0}
115
116                if {[$master debug digest] ne [$slave debug digest]} {
117                    set csv1 [csvdump r]
118                    set csv2 [csvdump {r -1}]
119                    set fd [open /tmp/repldump1.txt w]
120                    puts -nonewline $fd $csv1
121                    close $fd
122                    set fd [open /tmp/repldump2.txt w]
123                    puts -nonewline $fd $csv2
124                    close $fd
125                    puts "Master - Replica inconsistency"
126                    puts "Run diff -u against /tmp/repldump*.txt for more info"
127                }
128                assert_equal [r debug digest] [r -1 debug digest]
129                eval $cond
130            }
131        }
132    }
133}
134
135foreach diskless {no yes} {
136    test_psync {no reconnection, just sync} 6 1000000 3600 0 {
137    } $diskless 0
138
139    test_psync {ok psync} 6 100000000 3600 0 {
140        assert {[s -1 sync_partial_ok] > 0}
141    } $diskless 1
142
143    test_psync {no backlog} 6 100 3600 0.5 {
144        assert {[s -1 sync_partial_err] > 0}
145    } $diskless 1
146
147    test_psync {ok after delay} 3 100000000 3600 3 {
148        assert {[s -1 sync_partial_ok] > 0}
149    } $diskless 1
150
151    test_psync {backlog expired} 3 100000000 1 3 {
152        assert {[s -1 sync_partial_err] > 0}
153    } $diskless 1
154}
155