1# hll-err.rb - Copyright (C) 2014 Salvatore Sanfilippo 2# BSD license, See the COPYING file for more information. 3# 4# This program is suited to output average and maximum errors of 5# the Redis HyperLogLog implementation in a format suitable to print 6# graphs using gnuplot. 7 8require 'rubygems' 9require 'redis' 10require 'digest/sha1' 11 12# Generate an array of [cardinality,relative_error] pairs 13# in the 0 - max range with step of 1000*step. 14# 15# 'r' is the Redis object used to perform the queries. 16# 'seed' must be different every time you want a test performed 17# with a different set. The function guarantees that if 'seed' is the 18# same, exactly the same dataset is used, and when it is different, 19# a totally unrelated different data set is used (without any common 20# element in practice). 21def run_experiment(r,seed,max,step) 22 r.del('hll') 23 i = 0 24 samples = [] 25 while i < max do 26 step.times { 27 elements = [] 28 1000.times { 29 ele = Digest::SHA1.hexdigest(i.to_s+seed.to_s) 30 elements << ele 31 i += 1 32 } 33 r.hlladd('hll',*elements) 34 } 35 approx = r.hllcount('hll') 36 err = approx-i 37 rel_err = 100.to_f*err/i 38 samples << [i,rel_err] 39 end 40 samples 41end 42 43def filter_samples(numsets,filter) 44 r = Redis.new 45 dataset = {} 46 (0...numsets).each{|i| 47 dataset[i] = run_experiment(r,i,100000,1) 48 } 49 dataset[0].each_with_index{|ele,index| 50 card,err=ele 51 if filter == :max 52 (1...numsets).each{|i| 53 err = dataset[i][index][1] if err < dataset[i][index][1] 54 } 55 elsif filter == :avg 56 (1...numsets).each{|i| 57 err += dataset[i][index][1] 58 } 59 err /= numsets 60 else 61 raise "Unknown filter #{filter}" 62 end 63 puts "#{card} #{err}" 64 } 65end 66 67filter_samples(100,:max) 68#filter_samples(100,:avg) 69