1start_server {tags {"hll"}} { 2 test {HyperLogLog self test passes} { 3 catch {r pfselftest} e 4 set e 5 } {OK} 6 7 test {PFADD without arguments creates an HLL value} { 8 r pfadd hll 9 r exists hll 10 } {1} 11 12 test {Approximated cardinality after creation is zero} { 13 r pfcount hll 14 } {0} 15 16 test {PFADD returns 1 when at least 1 reg was modified} { 17 r pfadd hll a b c 18 } {1} 19 20 test {PFADD returns 0 when no reg was modified} { 21 r pfadd hll a b c 22 } {0} 23 24 test {PFADD works with empty string (regression)} { 25 r pfadd hll "" 26 } 27 28 # Note that the self test stresses much better the 29 # cardinality estimation error. We are testing just the 30 # command implementation itself here. 31 test {PFCOUNT returns approximated cardinality of set} { 32 r del hll 33 set res {} 34 r pfadd hll 1 2 3 4 5 35 lappend res [r pfcount hll] 36 # Call it again to test cached value invalidation. 37 r pfadd hll 6 7 8 8 9 10 38 lappend res [r pfcount hll] 39 set res 40 } {5 10} 41 42 test {HyperLogLogs are promote from sparse to dense} { 43 r del hll 44 r config set hll-sparse-max-bytes 3000 45 set n 0 46 while {$n < 100000} { 47 set elements {} 48 for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]} 49 incr n 100 50 r pfadd hll {*}$elements 51 set card [r pfcount hll] 52 set err [expr {abs($card-$n)}] 53 assert {$err < (double($card)/100)*5} 54 if {$n < 1000} { 55 assert {[r pfdebug encoding hll] eq {sparse}} 56 } elseif {$n > 10000} { 57 assert {[r pfdebug encoding hll] eq {dense}} 58 } 59 } 60 } 61 62 test {HyperLogLog sparse encoding stress test} { 63 for {set x 0} {$x < 1000} {incr x} { 64 r del hll1 hll2 65 set numele [randomInt 100] 66 set elements {} 67 for {set j 0} {$j < $numele} {incr j} { 68 lappend elements [expr rand()] 69 } 70 # Force dense representation of hll2 71 r pfadd hll2 72 r pfdebug todense hll2 73 r pfadd hll1 {*}$elements 74 r pfadd hll2 {*}$elements 75 assert {[r pfdebug encoding hll1] eq {sparse}} 76 assert {[r pfdebug encoding hll2] eq {dense}} 77 # Cardinality estimated should match exactly. 78 assert {[r pfcount hll1] eq [r pfcount hll2]} 79 } 80 } 81 82 test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} { 83 r del hll 84 r pfadd hll a b c 85 r append hll "hello" 86 set e {} 87 catch {r pfcount hll} e 88 set e 89 } {*INVALIDOBJ*} 90 91 test {Corrupted sparse HyperLogLogs are detected: Broken magic} { 92 r del hll 93 r pfadd hll a b c 94 r setrange hll 0 "0123" 95 set e {} 96 catch {r pfcount hll} e 97 set e 98 } {*WRONGTYPE*} 99 100 test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} { 101 r del hll 102 r pfadd hll a b c 103 r setrange hll 4 "x" 104 set e {} 105 catch {r pfcount hll} e 106 set e 107 } {*WRONGTYPE*} 108 109 test {Corrupted dense HyperLogLogs are detected: Wrong length} { 110 r del hll 111 r pfadd hll a b c 112 r setrange hll 4 "\x00" 113 set e {} 114 catch {r pfcount hll} e 115 set e 116 } {*WRONGTYPE*} 117 118 test {Fuzzing dense/sparse encoding: Redis should always detect errors} { 119 for {set j 0} {$j < 1000} {incr j} { 120 r del hll 121 set items {} 122 set numitems [randomInt 3000] 123 for {set i 0} {$i < $numitems} {incr i} { 124 lappend items [expr {rand()}] 125 } 126 r pfadd hll {*}$items 127 128 # Corrupt it in some random way. 129 for {set i 0} {$i < 5} {incr i} { 130 set len [r strlen hll] 131 set pos [randomInt $len] 132 set byte [randstring 1 1 binary] 133 r setrange hll $pos $byte 134 # Don't modify more bytes 50% of times 135 if {rand() < 0.5} break 136 } 137 138 # Use the hyperloglog to check if it crashes 139 # Redis in some way. 140 catch { 141 r pfcount hll 142 } 143 } 144 } 145 146 test {PFADD, PFCOUNT, PFMERGE type checking works} { 147 r set foo bar 148 catch {r pfadd foo 1} e 149 assert_match {*WRONGTYPE*} $e 150 catch {r pfcount foo} e 151 assert_match {*WRONGTYPE*} $e 152 catch {r pfmerge bar foo} e 153 assert_match {*WRONGTYPE*} $e 154 catch {r pfmerge foo bar} e 155 assert_match {*WRONGTYPE*} $e 156 } 157 158 test {PFMERGE results on the cardinality of union of sets} { 159 r del hll hll1 hll2 hll3 160 r pfadd hll1 a b c 161 r pfadd hll2 b c d 162 r pfadd hll3 c d e 163 r pfmerge hll hll1 hll2 hll3 164 r pfcount hll 165 } {5} 166 167 test {PFCOUNT multiple-keys merge returns cardinality of union #1} { 168 r del hll1 hll2 hll3 169 for {set x 1} {$x < 10000} {incr x} { 170 r pfadd hll1 "foo-$x" 171 r pfadd hll2 "bar-$x" 172 r pfadd hll3 "zap-$x" 173 174 set card [r pfcount hll1 hll2 hll3] 175 set realcard [expr {$x*3}] 176 set err [expr {abs($card-$realcard)}] 177 assert {$err < (double($card)/100)*5} 178 } 179 } 180 181 test {PFCOUNT multiple-keys merge returns cardinality of union #2} { 182 r del hll1 hll2 hll3 183 set elements {} 184 for {set x 1} {$x < 10000} {incr x} { 185 for {set j 1} {$j <= 3} {incr j} { 186 set rint [randomInt 20000] 187 r pfadd hll$j $rint 188 lappend elements $rint 189 } 190 } 191 set realcard [llength [lsort -unique $elements]] 192 set card [r pfcount hll1 hll2 hll3] 193 set err [expr {abs($card-$realcard)}] 194 assert {$err < (double($card)/100)*5} 195 } 196 197 test {PFDEBUG GETREG returns the HyperLogLog raw registers} { 198 r del hll 199 r pfadd hll 1 2 3 200 llength [r pfdebug getreg hll] 201 } {16384} 202 203 test {PFADD / PFCOUNT cache invalidation works} { 204 r del hll 205 r pfadd hll a b c 206 r pfcount hll 207 assert {[r getrange hll 15 15] eq "\x00"} 208 r pfadd hll a b c 209 assert {[r getrange hll 15 15] eq "\x00"} 210 r pfadd hll 1 2 3 211 assert {[r getrange hll 15 15] eq "\x80"} 212 } 213} 214