1*572c4311Sfengbojiang# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo 2*572c4311Sfengbojiang# BSD license, See the COPYING file for more information. 3*572c4311Sfengbojiang# 4*572c4311Sfengbojiang# This program is suited to output average and maximum errors of 5*572c4311Sfengbojiang# the Redis HyperLogLog implementation in a format suitable to print 6*572c4311Sfengbojiang# graphs using gnuplot. 7*572c4311Sfengbojiang 8*572c4311Sfengbojiangrequire 'rubygems' 9*572c4311Sfengbojiangrequire 'redis' 10*572c4311Sfengbojiangrequire 'digest/sha1' 11*572c4311Sfengbojiang 12*572c4311Sfengbojiang# Generate an array of [cardinality,relative_error] pairs 13*572c4311Sfengbojiang# in the 0 - max range, with the specified step. 14*572c4311Sfengbojiang# 15*572c4311Sfengbojiang# 'r' is the Redis object used to perform the queries. 16*572c4311Sfengbojiang# 'seed' must be different every time you want a test performed 17*572c4311Sfengbojiang# with a different set. The function guarantees that if 'seed' is the 18*572c4311Sfengbojiang# same, exactly the same dataset is used, and when it is different, 19*572c4311Sfengbojiang# a totally unrelated different data set is used (without any common 20*572c4311Sfengbojiang# element in practice). 21*572c4311Sfengbojiangdef run_experiment(r,seed,max,step) 22*572c4311Sfengbojiang r.del('hll') 23*572c4311Sfengbojiang i = 0 24*572c4311Sfengbojiang samples = [] 25*572c4311Sfengbojiang step = 1000 if step > 1000 26*572c4311Sfengbojiang while i < max do 27*572c4311Sfengbojiang elements = [] 28*572c4311Sfengbojiang step.times { 29*572c4311Sfengbojiang ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s) 30*572c4311Sfengbojiang elements << ele 31*572c4311Sfengbojiang i += 1 32*572c4311Sfengbojiang } 33*572c4311Sfengbojiang r.pfadd('hll',elements) 34*572c4311Sfengbojiang approx = r.pfcount('hll') 35*572c4311Sfengbojiang err = approx-i 36*572c4311Sfengbojiang rel_err = 100.to_f*err/i 37*572c4311Sfengbojiang samples << [i,rel_err] 38*572c4311Sfengbojiang end 39*572c4311Sfengbojiang samples 40*572c4311Sfengbojiangend 41*572c4311Sfengbojiang 42*572c4311Sfengbojiangdef filter_samples(numsets,max,step,filter) 43*572c4311Sfengbojiang r = Redis.new 44*572c4311Sfengbojiang dataset = {} 45*572c4311Sfengbojiang (0...numsets).each{|i| 46*572c4311Sfengbojiang dataset[i] = run_experiment(r,i,max,step) 47*572c4311Sfengbojiang STDERR.puts "Set #{i}" 48*572c4311Sfengbojiang } 49*572c4311Sfengbojiang dataset[0].each_with_index{|ele,index| 50*572c4311Sfengbojiang if filter == :max 51*572c4311Sfengbojiang card=ele[0] 52*572c4311Sfengbojiang err=ele[1].abs 53*572c4311Sfengbojiang (1...numsets).each{|i| 54*572c4311Sfengbojiang err = dataset[i][index][1] if err < dataset[i][index][1] 55*572c4311Sfengbojiang } 56*572c4311Sfengbojiang puts "#{card} #{err}" 57*572c4311Sfengbojiang elsif filter == :avg 58*572c4311Sfengbojiang card=ele[0] 59*572c4311Sfengbojiang err = 0 60*572c4311Sfengbojiang (0...numsets).each{|i| 61*572c4311Sfengbojiang err += dataset[i][index][1] 62*572c4311Sfengbojiang } 63*572c4311Sfengbojiang err /= numsets 64*572c4311Sfengbojiang puts "#{card} #{err}" 65*572c4311Sfengbojiang elsif filter == :absavg 66*572c4311Sfengbojiang card=ele[0] 67*572c4311Sfengbojiang err = 0 68*572c4311Sfengbojiang (0...numsets).each{|i| 69*572c4311Sfengbojiang err += dataset[i][index][1].abs 70*572c4311Sfengbojiang } 71*572c4311Sfengbojiang err /= numsets 72*572c4311Sfengbojiang puts "#{card} #{err}" 73*572c4311Sfengbojiang elsif filter == :all 74*572c4311Sfengbojiang (0...numsets).each{|i| 75*572c4311Sfengbojiang card,err = dataset[i][index] 76*572c4311Sfengbojiang puts "#{card} #{err}" 77*572c4311Sfengbojiang } 78*572c4311Sfengbojiang else 79*572c4311Sfengbojiang raise "Unknown filter #{filter}" 80*572c4311Sfengbojiang end 81*572c4311Sfengbojiang } 82*572c4311Sfengbojiangend 83*572c4311Sfengbojiang 84*572c4311Sfengbojiangif ARGV.length != 4 85*572c4311Sfengbojiang puts "Usage: hll-gnuplot-graph <samples> <max> <step> (max|avg|absavg|all)" 86*572c4311Sfengbojiang exit 1 87*572c4311Sfengbojiangend 88*572c4311Sfengbojiangfilter_samples(ARGV[0].to_i,ARGV[1].to_i,ARGV[2].to_i,ARGV[3].to_sym) 89