1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 
9 #pragma once
10 
11 #include <atomic>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "rocksdb/filter_policy.h"
17 #include "rocksdb/table.h"
18 
19 namespace ROCKSDB_NAMESPACE {
20 
21 class Slice;
22 
23 // Exposes any extra information needed for testing built-in
24 // FilterBitsBuilders
25 class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
26  public:
27   // Calculate number of bytes needed for a new filter, including
28   // metadata. Passing the result to CalculateNumEntry should
29   // return >= the num_entry passed in.
30   virtual uint32_t CalculateSpace(const int num_entry) = 0;
31 
32   // Returns an estimate of the FP rate of the returned filter if
33   // `keys` keys are added and the filter returned by Finish is `bytes`
34   // bytes.
35   virtual double EstimatedFpRate(size_t keys, size_t bytes) = 0;
36 };
37 
38 // RocksDB built-in filter policy for Bloom or Bloom-like filters.
39 // This class is considered internal API and subject to change.
40 // See NewBloomFilterPolicy.
41 class BloomFilterPolicy : public FilterPolicy {
42  public:
43   // An internal marker for operating modes of BloomFilterPolicy, in terms
44   // of selecting an implementation. This makes it easier for tests to track
45   // or to walk over the built-in set of Bloom filter implementations. The
46   // only variance in BloomFilterPolicy by mode/implementation is in
47   // GetFilterBitsBuilder(), so an enum is practical here vs. subclasses.
48   //
49   // This enum is essentially the union of all the different kinds of return
50   // value from GetFilterBitsBuilder, or "underlying implementation", and
51   // higher-level modes that choose an underlying implementation based on
52   // context information.
53   enum Mode {
54     // Legacy implementation of Bloom filter for full and partitioned filters.
55     // Set to 0 in case of value confusion with bool use_block_based_builder
56     // NOTE: TESTING ONLY as this mode does not use best compatible
57     // implementation
58     kLegacyBloom = 0,
59     // Deprecated block-based Bloom filter implementation.
60     // Set to 1 in case of value confusion with bool use_block_based_builder
61     // NOTE: DEPRECATED but user exposed
62     kDeprecatedBlock = 1,
63     // A fast, cache-local Bloom filter implementation. See description in
64     // FastLocalBloomImpl.
65     // NOTE: TESTING ONLY as this mode does not check format_version
66     kFastLocalBloom = 2,
67     // Automatically choose from the above (except kDeprecatedBlock) based on
68     // context at build time, including compatibility with format_version.
69     // NOTE: This is currently the only recommended mode that is user exposed.
70     kAuto = 100,
71   };
72   // All the different underlying implementations that a BloomFilterPolicy
73   // might use, as a mode that says "always use this implementation."
74   // Only appropriate for unit tests.
75   static const std::vector<Mode> kAllFixedImpls;
76 
77   // All the different modes of BloomFilterPolicy that are exposed from
78   // user APIs. Only appropriate for higher-level unit tests. Integration
79   // tests should prefer using NewBloomFilterPolicy (user-exposed).
80   static const std::vector<Mode> kAllUserModes;
81 
82   explicit BloomFilterPolicy(double bits_per_key, Mode mode);
83 
84   ~BloomFilterPolicy() override;
85 
86   const char* Name() const override;
87 
88   // Deprecated block-based filter only
89   void CreateFilter(const Slice* keys, int n, std::string* dst) const override;
90 
91   // Deprecated block-based filter only
92   bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const override;
93 
94   FilterBitsBuilder* GetFilterBitsBuilder() const override;
95 
96   // To use this function, call GetBuilderFromContext().
97   //
98   // Neither the context nor any objects therein should be saved beyond
99   // the call to this function, unless it's shared_ptr.
100   FilterBitsBuilder* GetBuilderWithContext(
101       const FilterBuildingContext&) const override;
102 
103   // Returns a new FilterBitsBuilder from the filter_policy in
104   // table_options of a context, or nullptr if not applicable.
105   // (An internal convenience function to save boilerplate.)
106   static FilterBitsBuilder* GetBuilderFromContext(const FilterBuildingContext&);
107 
108   // Read metadata to determine what kind of FilterBitsReader is needed
109   // and return a new one. This must successfully process any filter data
110   // generated by a built-in FilterBitsBuilder, regardless of the impl
111   // chosen for this BloomFilterPolicy. Not compatible with CreateFilter.
112   FilterBitsReader* GetFilterBitsReader(const Slice& contents) const override;
113 
114   // Essentially for testing only: configured millibits/key
GetMillibitsPerKey()115   int GetMillibitsPerKey() const { return millibits_per_key_; }
116   // Essentially for testing only: legacy whole bits/key
GetWholeBitsPerKey()117   int GetWholeBitsPerKey() const { return whole_bits_per_key_; }
118 
119  private:
120   // Newer filters support fractional bits per key. For predictable behavior
121   // of 0.001-precision values across floating point implementations, we
122   // round to thousandths of a bit (on average) per key.
123   int millibits_per_key_;
124 
125   // Older filters round to whole number bits per key. (There *should* be no
126   // compatibility issue with fractional bits per key, but preserving old
127   // behavior with format_version < 5 just in case.)
128   int whole_bits_per_key_;
129 
130   // Selected mode (a specific implementation or way of selecting an
131   // implementation) for building new SST filters.
132   Mode mode_;
133 
134   // Whether relevant warnings have been logged already. (Remember so we
135   // only report once per BloomFilterPolicy instance, to keep the noise down.)
136   mutable std::atomic<bool> warned_;
137 
138   // For newer Bloom filter implementation(s)
139   FilterBitsReader* GetBloomBitsReader(const Slice& contents) const;
140 };
141 
142 }  // namespace ROCKSDB_NAMESPACE
143