xref: /redis-3.2.3/tests/unit/hyperloglog.tcl (revision 9cd1cd66)
1start_server {tags {"hll"}} {
2    test {HyperLogLog self test passes} {
3        catch {r pfselftest} e
4        set e
5    } {OK}
6
7    test {PFADD without arguments creates an HLL value} {
8        r pfadd hll
9        r exists hll
10    } {1}
11
12    test {Approximated cardinality after creation is zero} {
13        r pfcount hll
14    } {0}
15
16    test {PFADD returns 1 when at least 1 reg was modified} {
17        r pfadd hll a b c
18    } {1}
19
20    test {PFADD returns 0 when no reg was modified} {
21        r pfadd hll a b c
22    } {0}
23
24    test {PFADD works with empty string (regression)} {
25        r pfadd hll ""
26    }
27
28    # Note that the self test stresses much better the
29    # cardinality estimation error. We are testing just the
30    # command implementation itself here.
31    test {PFCOUNT returns approximated cardinality of set} {
32        r del hll
33        set res {}
34        r pfadd hll 1 2 3 4 5
35        lappend res [r pfcount hll]
36        # Call it again to test cached value invalidation.
37        r pfadd hll 6 7 8 8 9 10
38        lappend res [r pfcount hll]
39        set res
40    } {5 10}
41
42    test {HyperLogLogs are promote from sparse to dense} {
43        r del hll
44        r config set hll-sparse-max-bytes 3000
45        set n 0
46        while {$n < 100000} {
47            set elements {}
48            for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]}
49            incr n 100
50            r pfadd hll {*}$elements
51            set card [r pfcount hll]
52            set err [expr {abs($card-$n)}]
53            assert {$err < (double($card)/100)*5}
54            if {$n < 1000} {
55                assert {[r pfdebug encoding hll] eq {sparse}}
56            } elseif {$n > 10000} {
57                assert {[r pfdebug encoding hll] eq {dense}}
58            }
59        }
60    }
61
62    test {HyperLogLog sparse encoding stress test} {
63        for {set x 0} {$x < 1000} {incr x} {
64            r del hll1 hll2
65            set numele [randomInt 100]
66            set elements {}
67            for {set j 0} {$j < $numele} {incr j} {
68                lappend elements [expr rand()]
69            }
70            # Force dense representation of hll2
71            r pfadd hll2
72            r pfdebug todense hll2
73            r pfadd hll1 {*}$elements
74            r pfadd hll2 {*}$elements
75            assert {[r pfdebug encoding hll1] eq {sparse}}
76            assert {[r pfdebug encoding hll2] eq {dense}}
77            # Cardinality estimated should match exactly.
78            assert {[r pfcount hll1] eq [r pfcount hll2]}
79        }
80    }
81
82    test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} {
83        r del hll
84        r pfadd hll a b c
85        r append hll "hello"
86        set e {}
87        catch {r pfcount hll} e
88        set e
89    } {*INVALIDOBJ*}
90
91    test {Corrupted sparse HyperLogLogs are detected: Broken magic} {
92        r del hll
93        r pfadd hll a b c
94        r setrange hll 0 "0123"
95        set e {}
96        catch {r pfcount hll} e
97        set e
98    } {*WRONGTYPE*}
99
100    test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} {
101        r del hll
102        r pfadd hll a b c
103        r setrange hll 4 "x"
104        set e {}
105        catch {r pfcount hll} e
106        set e
107    } {*WRONGTYPE*}
108
109    test {Corrupted dense HyperLogLogs are detected: Wrong length} {
110        r del hll
111        r pfadd hll a b c
112        r setrange hll 4 "\x00"
113        set e {}
114        catch {r pfcount hll} e
115        set e
116    } {*WRONGTYPE*}
117
118    test {PFADD, PFCOUNT, PFMERGE type checking works} {
119        r set foo bar
120        catch {r pfadd foo 1} e
121        assert_match {*WRONGTYPE*} $e
122        catch {r pfcount foo} e
123        assert_match {*WRONGTYPE*} $e
124        catch {r pfmerge bar foo} e
125        assert_match {*WRONGTYPE*} $e
126        catch {r pfmerge foo bar} e
127        assert_match {*WRONGTYPE*} $e
128    }
129
130    test {PFMERGE results on the cardinality of union of sets} {
131        r del hll hll1 hll2 hll3
132        r pfadd hll1 a b c
133        r pfadd hll2 b c d
134        r pfadd hll3 c d e
135        r pfmerge hll hll1 hll2 hll3
136        r pfcount hll
137    } {5}
138
139    test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
140        r del hll1 hll2 hll3
141        for {set x 1} {$x < 10000} {incr x} {
142            r pfadd hll1 "foo-$x"
143            r pfadd hll2 "bar-$x"
144            r pfadd hll3 "zap-$x"
145
146            set card [r pfcount hll1 hll2 hll3]
147            set realcard [expr {$x*3}]
148            set err [expr {abs($card-$realcard)}]
149            assert {$err < (double($card)/100)*5}
150        }
151    }
152
153    test {PFCOUNT multiple-keys merge returns cardinality of union #2} {
154        r del hll1 hll2 hll3
155        set elements {}
156        for {set x 1} {$x < 10000} {incr x} {
157            for {set j 1} {$j <= 3} {incr j} {
158                set rint [randomInt 20000]
159                r pfadd hll$j $rint
160                lappend elements $rint
161            }
162        }
163        set realcard [llength [lsort -unique $elements]]
164        set card [r pfcount hll1 hll2 hll3]
165        set err [expr {abs($card-$realcard)}]
166        assert {$err < (double($card)/100)*5}
167    }
168
169    test {PFDEBUG GETREG returns the HyperLogLog raw registers} {
170        r del hll
171        r pfadd hll 1 2 3
172        llength [r pfdebug getreg hll]
173    } {16384}
174
175    test {PFADD / PFCOUNT cache invalidation works} {
176        r del hll
177        r pfadd hll a b c
178        r pfcount hll
179        assert {[r getrange hll 15 15] eq "\x00"}
180        r pfadd hll a b c
181        assert {[r getrange hll 15 15] eq "\x00"}
182        r pfadd hll 1 2 3
183        assert {[r getrange hll 15 15] eq "\x80"}
184    }
185}
186