1# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo 2# BSD license, See the COPYING file for more information. 3# 4# This program is suited to output average and maximum errors of 5# the Redis HyperLogLog implementation in a format suitable to print 6# graphs using gnuplot. 7 8require 'rubygems' 9require 'redis' 10require 'digest/sha1' 11 12# Generate an array of [cardinality,relative_error] pairs 13# in the 0 - max range, with the specified step. 14# 15# 'r' is the Redis object used to perform the queries. 16# 'seed' must be different every time you want a test performed 17# with a different set. The function guarantees that if 'seed' is the 18# same, exactly the same dataset is used, and when it is different, 19# a totally unrelated different data set is used (without any common 20# element in practice). 21def run_experiment(r,seed,max,step) 22 r.del('hll') 23 i = 0 24 samples = [] 25 step = 1000 if step > 1000 26 while i < max do 27 elements = [] 28 step.times { 29 ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s) 30 elements << ele 31 i += 1 32 } 33 r.pfadd('hll',elements) 34 approx = r.pfcount('hll') 35 err = approx-i 36 rel_err = 100.to_f*err/i 37 samples << [i,rel_err] 38 end 39 samples 40end 41 42def filter_samples(numsets,max,step,filter) 43 r = Redis.new 44 dataset = {} 45 (0...numsets).each{|i| 46 dataset[i] = run_experiment(r,i,max,step) 47 STDERR.puts "Set #{i}" 48 } 49 dataset[0].each_with_index{|ele,index| 50 if filter == :max 51 card=ele[0] 52 err=ele[1].abs 53 (1...numsets).each{|i| 54 err = dataset[i][index][1] if err < dataset[i][index][1] 55 } 56 puts "#{card} #{err}" 57 elsif filter == :avg 58 card=ele[0] 59 err = 0 60 (0...numsets).each{|i| 61 err += dataset[i][index][1] 62 } 63 err /= numsets 64 puts "#{card} #{err}" 65 elsif filter == :absavg 66 card=ele[0] 67 err = 0 68 (0...numsets).each{|i| 69 err += dataset[i][index][1].abs 70 } 71 err /= numsets 72 puts "#{card} #{err}" 73 elsif filter == :all 74 (0...numsets).each{|i| 75 card,err = dataset[i][index] 76 puts "#{card} #{err}" 77 } 78 else 79 raise "Unknown filter #{filter}" 80 end 81 } 82end 83 84if ARGV.length != 4 85 puts "Usage: hll-gnuplot-graph <samples> <max> <step> (max|avg|absavg|all)" 86 exit 1 87end 88filter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym) 89