1# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo
2# BSD license, See the COPYING file for more information.
3#
4# This program is suited to output average and maximum errors of
5# the Redis HyperLogLog implementation in a format suitable to print
6# graphs using gnuplot.
7
8require 'rubygems'
9require 'redis'
10require 'digest/sha1'
11
12# Generate an array of [cardinality,relative_error] pairs
13# in the 0 - max range, with the specified step.
14#
15# 'r' is the Redis object used to perform the queries.
16# 'seed' must be different every time you want a test performed
17# with a different set. The function guarantees that if 'seed' is the
18# same, exactly the same dataset is used, and when it is different,
19# a totally unrelated different data set is used (without any common
20# element in practice).
21def run_experiment(r,seed,max,step)
22    r.del('hll')
23    i = 0
24    samples = []
25    step = 1000 if step > 1000
26    while i < max do
27        elements = []
28        step.times {
29            ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s)
30            elements << ele
31            i += 1
32        }
33        r.pfadd('hll',*elements)
34        approx = r.pfcount('hll')
35        err = approx-i
36        rel_err = 100.to_f*err/i
37        samples << [i,rel_err]
38    end
39    samples
40end
41
42def filter_samples(numsets,max,step,filter)
43    r = Redis.new
44    dataset = {}
45    (0...numsets).each{|i|
46        dataset[i] = run_experiment(r,i,max,step)
47        STDERR.puts "Set #{i}"
48    }
49    dataset[0].each_with_index{|ele,index|
50        if filter == :max
51            card=ele[0]
52            err=ele[1].abs
53            (1...numsets).each{|i|
54                err = dataset[i][index][1] if err < dataset[i][index][1]
55            }
56            puts "#{card} #{err}"
57        elsif filter == :avg
58            card=ele[0]
59            err = 0
60            (0...numsets).each{|i|
61                err += dataset[i][index][1]
62            }
63            err /= numsets
64            puts "#{card} #{err}"
65        elsif filter == :absavg
66            card=ele[0]
67            err = 0
68            (0...numsets).each{|i|
69                err += dataset[i][index][1].abs
70            }
71            err /= numsets
72            puts "#{card} #{err}"
73        elsif filter == :all
74            (0...numsets).each{|i|
75                card,err = dataset[i][index]
76                puts "#{card} #{err}"
77            }
78        else
79            raise "Unknown filter #{filter}"
80        end
81    }
82end
83
84if ARGV.length != 4
85    puts "Usage: hll-gnuplot-graph <samples> <max> <step> (max|avg|absavg|all)"
86    exit 1
87end
88filter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym)
89