1start_server {tags {"hll"}} {
2    test {HyperLogLog self test passes} {
3        catch {r pfselftest} e
4        set e
5    } {OK}
6
7    test {PFADD without arguments creates an HLL value} {
8        r pfadd hll
9        r exists hll
10    } {1}
11
12    test {Approximated cardinality after creation is zero} {
13        r pfcount hll
14    } {0}
15
16    test {PFADD returns 1 when at least 1 reg was modified} {
17        r pfadd hll a b c
18    } {1}
19
20    test {PFADD returns 0 when no reg was modified} {
21        r pfadd hll a b c
22    } {0}
23
24    test {PFADD works with empty string (regression)} {
25        r pfadd hll ""
26    }
27
28    # Note that the self test stresses much better the
29    # cardinality estimation error. We are testing just the
30    # command implementation itself here.
31    test {PFCOUNT returns approximated cardinality of set} {
32        r del hll
33        set res {}
34        r pfadd hll 1 2 3 4 5
35        lappend res [r pfcount hll]
36        # Call it again to test cached value invalidation.
37        r pfadd hll 6 7 8 8 9 10
38        lappend res [r pfcount hll]
39        set res
40    } {5 10}
41
42    test {HyperLogLogs are promote from sparse to dense} {
43        r del hll
44        r config set hll-sparse-max-bytes 3000
45        set n 0
46        while {$n < 100000} {
47            set elements {}
48            for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
49            incr n 100
50            r pfadd hll {*}$elements
51            set card [r pfcount hll]
52            set err [expr {abs($card-$n)}]
53            assert {$err < (double($card)/100)*5}
54            if {$n < 1000} {
55                assert {[r pfdebug encoding hll] eq {sparse}}
56            } elseif {$n > 10000} {
57                assert {[r pfdebug encoding hll] eq {dense}}
58            }
59        }
60    }
61
62    test {HyperLogLog sparse encoding stress test} {
63        for {set x 0} {$x < 1000} {incr x} {
64            r del hll1 hll2
65            set numele [randomInt 100]
66            set elements {}
67            for {set j 0} {$j < $numele} {incr j} {
68                lappend elements [expr rand()]
69            }
70            # Force dense representation of hll2
71            r pfadd hll2
72            r pfdebug todense hll2
73            r pfadd hll1 {*}$elements
74            r pfadd hll2 {*}$elements
75            assert {[r pfdebug encoding hll1] eq {sparse}}
76            assert {[r pfdebug encoding hll2] eq {dense}}
77            # Cardinality estimated should match exactly.
78            assert {[r pfcount hll1] eq [r pfcount hll2]}
79        }
80    }
81
82    test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} {
83        r del hll
84        r pfadd hll a b c
85        r append hll "hello"
86        set e {}
87        catch {r pfcount hll} e
88        set e
89    } {*INVALIDOBJ*}
90
91    test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
92        r del hll
93        r pfadd hll a b c
94        r setrange hll 0 "0123"
95        set e {}
96        catch {r pfcount hll} e
97        set e
98    } {*WRONGTYPE*}
99
100    test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
101        r del hll
102        r pfadd hll a b c
103        r setrange hll 4 "x"
104        set e {}
105        catch {r pfcount hll} e
106        set e
107    } {*WRONGTYPE*}
108
109    test {Corrupted dense HyperLogLogs are detected: Wrong length} {
110        r del hll
111        r pfadd hll a b c
112        r setrange hll 4 "\x00"
113        set e {}
114        catch {r pfcount hll} e
115        set e
116    } {*WRONGTYPE*}
117
118    test {Fuzzing dense/sparse encoding: Redis should always detect errors} {
119        for {set j 0} {$j < 1000} {incr j} {
120            r del hll
121            set items {}
122            set numitems [randomInt 3000]
123            for {set i 0} {$i < $numitems} {incr i} {
124                lappend items [expr {rand()}]
125            }
126            r pfadd hll {*}$items
127
128            # Corrupt it in some random way.
129            for {set i 0} {$i < 5} {incr i} {
130                set len [r strlen hll]
131                set pos [randomInt $len]
132                set byte [randstring 1 1 binary]
133                r setrange hll $pos $byte
134                # Don't modify more bytes 50% of times
135                if {rand() < 0.5} break
136            }
137
138            # Use the hyperloglog to check if it crashes
139            # Redis in some way.
140            catch {
141                r pfcount hll
142            }
143        }
144    }
145
146    test {PFADD, PFCOUNT, PFMERGE type checking works} {
147        r set foo bar
148        catch {r pfadd foo 1} e
149        assert_match {*WRONGTYPE*} $e
150        catch {r pfcount foo} e
151        assert_match {*WRONGTYPE*} $e
152        catch {r pfmerge bar foo} e
153        assert_match {*WRONGTYPE*} $e
154        catch {r pfmerge foo bar} e
155        assert_match {*WRONGTYPE*} $e
156    }
157
158    test {PFMERGE results on the cardinality of union of sets} {
159        r del hll hll1 hll2 hll3
160        r pfadd hll1 a b c
161        r pfadd hll2 b c d
162        r pfadd hll3 c d e
163        r pfmerge hll hll1 hll2 hll3
164        r pfcount hll
165    } {5}
166
167    test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
168        r del hll1 hll2 hll3
169        for {set x 1} {$x < 10000} {incr x} {
170            r pfadd hll1 "foo-$x"
171            r pfadd hll2 "bar-$x"
172            r pfadd hll3 "zap-$x"
173
174            set card [r pfcount hll1 hll2 hll3]
175            set realcard [expr {$x*3}]
176            set err [expr {abs($card-$realcard)}]
177            assert {$err < (double($card)/100)*5}
178        }
179    }
180
181    test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
182        r del hll1 hll2 hll3
183        set elements {}
184        for {set x 1} {$x < 10000} {incr x} {
185            for {set j 1} {$j <= 3} {incr j} {
186                set rint [randomInt 20000]
187                r pfadd hll$j $rint
188                lappend elements $rint
189            }
190        }
191        set realcard [llength [lsort -unique $elements]]
192        set card [r pfcount hll1 hll2 hll3]
193        set err [expr {abs($card-$realcard)}]
194        assert {$err < (double($card)/100)*5}
195    }
196
197    test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
198        r del hll
199        r pfadd hll 1 2 3
200        llength [r pfdebug getreg hll]
201    } {16384}
202
203    test {PFADD / PFCOUNT cache invalidation works} {
204        r del hll
205        r pfadd hll a b c
206        r pfcount hll
207        assert {[r getrange hll 15 15] eq "\x00"}
208        r pfadd hll a b c
209        assert {[r getrange hll 15 15] eq "\x00"}
210        r pfadd hll 1 2 3
211        assert {[r getrange hll 15 15] eq "\x80"}
212    }
213}
214