1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 //
9 // A database can be configured with a custom FilterPolicy object.
10 // This object is responsible for creating a small filter from a set
11 // of keys.  These filters are stored in rocksdb and are consulted
12 // automatically by rocksdb to decide whether or not to read some
13 // information from disk. In many cases, a filter can cut down the
14 // number of disk seeks form a handful to a single disk seek per
15 // DB::Get() call.
16 //
17 // Most people will want to use the builtin bloom filter support (see
18 // NewBloomFilterPolicy() below).
19 
20 #pragma once
21 
22 #include <stdlib.h>
23 #include <memory>
24 #include <stdexcept>
25 #include <string>
26 #include <vector>
27 
28 #include "rocksdb/advanced_options.h"
29 
30 namespace ROCKSDB_NAMESPACE {
31 
32 class Slice;
33 struct BlockBasedTableOptions;
34 
35 // A class that takes a bunch of keys, then generates filter
36 class FilterBitsBuilder {
37  public:
~FilterBitsBuilder()38   virtual ~FilterBitsBuilder() {}
39 
40   // Add Key to filter, you could use any way to store the key.
41   // Such as: storing hashes or original keys
42   // Keys are in sorted order and duplicated keys are possible.
43   virtual void AddKey(const Slice& key) = 0;
44 
45   // Generate the filter using the keys that are added
46   // The return value of this function would be the filter bits,
47   // The ownership of actual data is set to buf
48   virtual Slice Finish(std::unique_ptr<const char[]>* buf) = 0;
49 
50   // Calculate num of keys that can be added and generate a filter
51   // <= the specified number of bytes.
52 #if defined(_MSC_VER)
53 #pragma warning(push)
54 #pragma warning(disable : 4702)  // unreachable code
55 #endif
CalculateNumEntry(const uint32_t)56   virtual int CalculateNumEntry(const uint32_t /*bytes*/) {
57 #ifndef ROCKSDB_LITE
58     throw std::runtime_error("CalculateNumEntry not Implemented");
59 #else
60     abort();
61 #endif
62     return 0;
63   }
64 #if defined(_MSC_VER)
65 #pragma warning(pop)
66 #endif
67 };
68 
69 // A class that checks if a key can be in filter
70 // It should be initialized by Slice generated by BitsBuilder
71 class FilterBitsReader {
72  public:
~FilterBitsReader()73   virtual ~FilterBitsReader() {}
74 
75   // Check if the entry match the bits in filter
76   virtual bool MayMatch(const Slice& entry) = 0;
77 
78   // Check if an array of entries match the bits in filter
MayMatch(int num_keys,Slice ** keys,bool * may_match)79   virtual void MayMatch(int num_keys, Slice** keys, bool* may_match) {
80     for (int i = 0; i < num_keys; ++i) {
81       may_match[i] = MayMatch(*keys[i]);
82     }
83   }
84 };
85 
86 // Contextual information passed to BloomFilterPolicy at filter building time.
87 // Used in overriding FilterPolicy::GetBuilderWithContext(). References other
88 // structs because this is expected to be a temporary, stack-allocated object.
89 struct FilterBuildingContext {
90   // This constructor is for internal use only and subject to change.
91   FilterBuildingContext(const BlockBasedTableOptions& table_options);
92 
93   // Options for the table being built
94   const BlockBasedTableOptions& table_options;
95 
96   // Name of the column family for the table (or empty string if unknown)
97   std::string column_family_name;
98 
99   // The compactions style in effect for the table
100   CompactionStyle compaction_style = kCompactionStyleLevel;
101 
102   // The table level at time of constructing the SST file, or -1 if unknown.
103   // (The table file could later be used at a different level.)
104   int level_at_creation = -1;
105 
106   // An optional logger for reporting errors, warnings, etc.
107   Logger* info_log = nullptr;
108 };
109 
110 // We add a new format of filter block called full filter block
111 // This new interface gives you more space of customization
112 //
113 // For the full filter block, you can plug in your version by implement
114 // the FilterBitsBuilder and FilterBitsReader
115 //
116 // There are two sets of interface in FilterPolicy
117 // Set 1: CreateFilter, KeyMayMatch: used for blockbased filter
118 // Set 2: GetFilterBitsBuilder, GetFilterBitsReader, they are used for
119 // full filter.
120 // Set 1 MUST be implemented correctly, Set 2 is optional
121 // RocksDB would first try using functions in Set 2. if they return nullptr,
122 // it would use Set 1 instead.
123 // You can choose filter type in NewBloomFilterPolicy
124 class FilterPolicy {
125  public:
126   virtual ~FilterPolicy();
127 
128   // Return the name of this policy.  Note that if the filter encoding
129   // changes in an incompatible way, the name returned by this method
130   // must be changed.  Otherwise, old incompatible filters may be
131   // passed to methods of this type.
132   virtual const char* Name() const = 0;
133 
134   // keys[0,n-1] contains a list of keys (potentially with duplicates)
135   // that are ordered according to the user supplied comparator.
136   // Append a filter that summarizes keys[0,n-1] to *dst.
137   //
138   // Warning: do not change the initial contents of *dst.  Instead,
139   // append the newly constructed filter to *dst.
140   virtual void CreateFilter(const Slice* keys, int n,
141                             std::string* dst) const = 0;
142 
143   // "filter" contains the data appended by a preceding call to
144   // CreateFilter() on this class.  This method must return true if
145   // the key was in the list of keys passed to CreateFilter().
146   // This method may return true or false if the key was not on the
147   // list, but it should aim to return false with a high probability.
148   virtual bool KeyMayMatch(const Slice& key, const Slice& filter) const = 0;
149 
150   // Return a new FilterBitsBuilder for full or partitioned filter blocks, or
151   // nullptr if using block-based filter.
152   // NOTE: This function is only called by GetBuilderWithContext() below for
153   // custom FilterPolicy implementations. Thus, it is not necessary to
154   // override this function if overriding GetBuilderWithContext().
GetFilterBitsBuilder()155   virtual FilterBitsBuilder* GetFilterBitsBuilder() const { return nullptr; }
156 
157   // A newer variant of GetFilterBitsBuilder that allows a FilterPolicy
158   // to customize the builder for contextual constraints and hints.
159   // (Name changed to avoid triggering -Werror=overloaded-virtual.)
160   // If overriding GetFilterBitsBuilder() suffices, it is not necessary to
161   // override this function.
GetBuilderWithContext(const FilterBuildingContext &)162   virtual FilterBitsBuilder* GetBuilderWithContext(
163       const FilterBuildingContext&) const {
164     return GetFilterBitsBuilder();
165   }
166 
167   // Return a new FilterBitsReader for full or partitioned filter blocks, or
168   // nullptr if using block-based filter.
169   // As here, the input slice should NOT be deleted by FilterPolicy.
GetFilterBitsReader(const Slice &)170   virtual FilterBitsReader* GetFilterBitsReader(
171       const Slice& /*contents*/) const {
172     return nullptr;
173   }
174 };
175 
176 // Return a new filter policy that uses a bloom filter with approximately
177 // the specified number of bits per key.
178 //
179 // bits_per_key: average bits allocated per key in bloom filter. A good
180 // choice is 9.9, which yields a filter with ~ 1% false positive rate.
181 // When format_version < 5, the value will be rounded to the nearest
182 // integer. Recommend using no more than three decimal digits after the
183 // decimal point, as in 6.667.
184 //
185 // use_block_based_builder: use deprecated block based filter (true) rather
186 // than full or partitioned filter (false).
187 //
188 // Callers must delete the result after any database that is using the
189 // result has been closed.
190 //
191 // Note: if you are using a custom comparator that ignores some parts
192 // of the keys being compared, you must not use NewBloomFilterPolicy()
193 // and must provide your own FilterPolicy that also ignores the
194 // corresponding parts of the keys.  For example, if the comparator
195 // ignores trailing spaces, it would be incorrect to use a
196 // FilterPolicy (like NewBloomFilterPolicy) that does not ignore
197 // trailing spaces in keys.
198 extern const FilterPolicy* NewBloomFilterPolicy(
199     double bits_per_key, bool use_block_based_builder = false);
200 }  // namespace ROCKSDB_NAMESPACE
201