1start_server {tags {"hll"}} { 2 test {HyperLogLog self test passes} { 3 catch {r pfselftest} e 4 set e 5 } {OK} 6 7 test {PFADD without arguments creates an HLL value} { 8 r pfadd hll 9 r exists hll 10 } {1} 11 12 test {Approximated cardinality after creation is zero} { 13 r pfcount hll 14 } {0} 15 16 test {PFADD returns 1 when at least 1 reg was modified} { 17 r pfadd hll a b c 18 } {1} 19 20 test {PFADD returns 0 when no reg was modified} { 21 r pfadd hll a b c 22 } {0} 23 24 test {PFADD works with empty string (regression)} { 25 r pfadd hll "" 26 } 27 28 # Note that the self test stresses much better the 29 # cardinality estimation error. We are testing just the 30 # command implementation itself here. 31 test {PFCOUNT returns approximated cardinality of set} { 32 r del hll 33 set res {} 34 r pfadd hll 1 2 3 4 5 35 lappend res [r pfcount hll] 36 # Call it again to test cached value invalidation. 37 r pfadd hll 6 7 8 8 9 10 38 lappend res [r pfcount hll] 39 set res 40 } {5 10} 41 42 test {HyperLogLogs are promote from sparse to dense} { 43 r del hll 44 r config set hll-sparse-max-bytes 3000 45 set n 0 46 while {$n < 100000} { 47 set elements {} 48 for {set j 0} {$j < 100} {incr j} {lappend elements [expr rand()]} 49 incr n 100 50 r pfadd hll {*}$elements 51 set card [r pfcount hll] 52 set err [expr {abs($card-$n)}] 53 assert {$err < (double($card)/100)*5} 54 if {$n < 1000} { 55 assert {[r pfdebug encoding hll] eq {sparse}} 56 } elseif {$n > 10000} { 57 assert {[r pfdebug encoding hll] eq {dense}} 58 } 59 } 60 } 61 62 test {HyperLogLog sparse encoding stress test} { 63 for {set x 0} {$x < 1000} {incr x} { 64 r del hll1 hll2 65 set numele [randomInt 100] 66 set elements {} 67 for {set j 0} {$j < $numele} {incr j} { 68 lappend elements [expr rand()] 69 } 70 # Force dense representation of hll2 71 r pfadd hll2 72 r pfdebug todense hll2 73 r pfadd hll1 {*}$elements 74 r pfadd hll2 {*}$elements 75 assert {[r pfdebug encoding hll1] eq {sparse}} 76 assert {[r pfdebug encoding hll2] eq {dense}} 77 # Cardinality estimated should match exactly. 78 assert {[r pfcount hll1] eq [r pfcount hll2]} 79 } 80 } 81 82 test {Corrupted sparse HyperLogLogs are detected: Additionl at tail} { 83 r del hll 84 r pfadd hll a b c 85 r append hll "hello" 86 set e {} 87 catch {r pfcount hll} e 88 set e 89 } {*INVALIDOBJ*} 90 91 test {Corrupted sparse HyperLogLogs are detected: Broken magic} { 92 r del hll 93 r pfadd hll a b c 94 r setrange hll 0 "0123" 95 set e {} 96 catch {r pfcount hll} e 97 set e 98 } {*WRONGTYPE*} 99 100 test {Corrupted sparse HyperLogLogs are detected: Invalid encoding} { 101 r del hll 102 r pfadd hll a b c 103 r setrange hll 4 "x" 104 set e {} 105 catch {r pfcount hll} e 106 set e 107 } {*WRONGTYPE*} 108 109 test {Corrupted dense HyperLogLogs are detected: Wrong length} { 110 r del hll 111 r pfadd hll a b c 112 r setrange hll 4 "\x00" 113 set e {} 114 catch {r pfcount hll} e 115 set e 116 } {*WRONGTYPE*} 117 118 test {PFADD, PFCOUNT, PFMERGE type checking works} { 119 r set foo bar 120 catch {r pfadd foo 1} e 121 assert_match {*WRONGTYPE*} $e 122 catch {r pfcount foo} e 123 assert_match {*WRONGTYPE*} $e 124 catch {r pfmerge bar foo} e 125 assert_match {*WRONGTYPE*} $e 126 catch {r pfmerge foo bar} e 127 assert_match {*WRONGTYPE*} $e 128 } 129 130 test {PFMERGE results on the cardinality of union of sets} { 131 r del hll hll1 hll2 hll3 132 r pfadd hll1 a b c 133 r pfadd hll2 b c d 134 r pfadd hll3 c d e 135 r pfmerge hll hll1 hll2 hll3 136 r pfcount hll 137 } {5} 138 139 test {PFCOUNT multiple-keys merge returns cardinality of union #1} { 140 r del hll1 hll2 hll3 141 for {set x 1} {$x < 10000} {incr x} { 142 r pfadd hll1 "foo-$x" 143 r pfadd hll2 "bar-$x" 144 r pfadd hll3 "zap-$x" 145 146 set card [r pfcount hll1 hll2 hll3] 147 set realcard [expr {$x*3}] 148 set err [expr {abs($card-$realcard)}] 149 assert {$err < (double($card)/100)*5} 150 } 151 } 152 153 test {PFCOUNT multiple-keys merge returns cardinality of union #2} { 154 r del hll1 hll2 hll3 155 set elements {} 156 for {set x 1} {$x < 10000} {incr x} { 157 for {set j 1} {$j <= 3} {incr j} { 158 set rint [randomInt 20000] 159 r pfadd hll$j $rint 160 lappend elements $rint 161 } 162 } 163 set realcard [llength [lsort -unique $elements]] 164 set card [r pfcount hll1 hll2 hll3] 165 set err [expr {abs($card-$realcard)}] 166 assert {$err < (double($card)/100)*5} 167 } 168 169 test {PFDEBUG GETREG returns the HyperLogLog raw registers} { 170 r del hll 171 r pfadd hll 1 2 3 172 llength [r pfdebug getreg hll] 173 } {16384} 174 175 test {PFADD / PFCOUNT cache invalidation works} { 176 r del hll 177 r pfadd hll a b c 178 r pfcount hll 179 assert {[r getrange hll 15 15] eq "\x00"} 180 r pfadd hll a b c 181 assert {[r getrange hll 15 15] eq "\x00"} 182 r pfadd hll 1 2 3 183 assert {[r getrange hll 15 15] eq "\x80"} 184 } 185} 186