1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #include "table/block_based/full_filter_block.h"
7 #include <array>
8 
9 #include "monitoring/perf_context_imp.h"
10 #include "port/malloc.h"
11 #include "port/port.h"
12 #include "rocksdb/filter_policy.h"
13 #include "table/block_based/block_based_table_reader.h"
14 #include "util/coding.h"
15 
16 namespace ROCKSDB_NAMESPACE {
17 
FullFilterBlockBuilder(const SliceTransform * _prefix_extractor,bool whole_key_filtering,FilterBitsBuilder * filter_bits_builder)18 FullFilterBlockBuilder::FullFilterBlockBuilder(
19     const SliceTransform* _prefix_extractor, bool whole_key_filtering,
20     FilterBitsBuilder* filter_bits_builder)
21     : prefix_extractor_(_prefix_extractor),
22       whole_key_filtering_(whole_key_filtering),
23       last_whole_key_recorded_(false),
24       last_prefix_recorded_(false),
25       num_added_(0) {
26   assert(filter_bits_builder != nullptr);
27   filter_bits_builder_.reset(filter_bits_builder);
28 }
29 
Add(const Slice & key)30 void FullFilterBlockBuilder::Add(const Slice& key) {
31   const bool add_prefix = prefix_extractor_ && prefix_extractor_->InDomain(key);
32   if (whole_key_filtering_) {
33     if (!add_prefix) {
34       AddKey(key);
35     } else {
36       // if both whole_key and prefix are added to bloom then we will have whole
37       // key and prefix addition being interleaved and thus cannot rely on the
38       // bits builder to properly detect the duplicates by comparing with the
39       // last item.
40       Slice last_whole_key = Slice(last_whole_key_str_);
41       if (!last_whole_key_recorded_ || last_whole_key.compare(key) != 0) {
42         AddKey(key);
43         last_whole_key_recorded_ = true;
44         last_whole_key_str_.assign(key.data(), key.size());
45       }
46     }
47   }
48   if (add_prefix) {
49     AddPrefix(key);
50   }
51 }
52 
53 // Add key to filter if needed
AddKey(const Slice & key)54 inline void FullFilterBlockBuilder::AddKey(const Slice& key) {
55   filter_bits_builder_->AddKey(key);
56   num_added_++;
57 }
58 
59 // Add prefix to filter if needed
AddPrefix(const Slice & key)60 void FullFilterBlockBuilder::AddPrefix(const Slice& key) {
61   Slice prefix = prefix_extractor_->Transform(key);
62   if (whole_key_filtering_) {
63     // if both whole_key and prefix are added to bloom then we will have whole
64     // key and prefix addition being interleaved and thus cannot rely on the
65     // bits builder to properly detect the duplicates by comparing with the last
66     // item.
67     Slice last_prefix = Slice(last_prefix_str_);
68     if (!last_prefix_recorded_ || last_prefix.compare(prefix) != 0) {
69       AddKey(prefix);
70       last_prefix_recorded_ = true;
71       last_prefix_str_.assign(prefix.data(), prefix.size());
72     }
73   } else {
74     AddKey(prefix);
75   }
76 }
77 
Reset()78 void FullFilterBlockBuilder::Reset() {
79   last_whole_key_recorded_ = false;
80   last_prefix_recorded_ = false;
81 }
82 
Finish(const BlockHandle &,Status * status)83 Slice FullFilterBlockBuilder::Finish(const BlockHandle& /*tmp*/,
84                                      Status* status) {
85   Reset();
86   // In this impl we ignore BlockHandle
87   *status = Status::OK();
88   if (num_added_ != 0) {
89     num_added_ = 0;
90     return filter_bits_builder_->Finish(&filter_data_);
91   }
92   return Slice();
93 }
94 
FullFilterBlockReader(const BlockBasedTable * t,CachableEntry<ParsedFullFilterBlock> && filter_block)95 FullFilterBlockReader::FullFilterBlockReader(
96     const BlockBasedTable* t,
97     CachableEntry<ParsedFullFilterBlock>&& filter_block)
98     : FilterBlockReaderCommon(t, std::move(filter_block)) {
99   const SliceTransform* const prefix_extractor = table_prefix_extractor();
100   if (prefix_extractor) {
101     full_length_enabled_ =
102         prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_);
103   }
104 }
105 
KeyMayMatch(const Slice & key,const SliceTransform *,uint64_t block_offset,const bool no_io,const Slice * const,GetContext * get_context,BlockCacheLookupContext * lookup_context)106 bool FullFilterBlockReader::KeyMayMatch(
107     const Slice& key, const SliceTransform* /*prefix_extractor*/,
108     uint64_t block_offset, const bool no_io,
109     const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
110     BlockCacheLookupContext* lookup_context) {
111 #ifdef NDEBUG
112   (void)block_offset;
113 #endif
114   assert(block_offset == kNotValid);
115   if (!whole_key_filtering()) {
116     return true;
117   }
118   return MayMatch(key, no_io, get_context, lookup_context);
119 }
120 
Create(const BlockBasedTable * table,FilePrefetchBuffer * prefetch_buffer,bool use_cache,bool prefetch,bool pin,BlockCacheLookupContext * lookup_context)121 std::unique_ptr<FilterBlockReader> FullFilterBlockReader::Create(
122     const BlockBasedTable* table, FilePrefetchBuffer* prefetch_buffer,
123     bool use_cache, bool prefetch, bool pin,
124     BlockCacheLookupContext* lookup_context) {
125   assert(table);
126   assert(table->get_rep());
127   assert(!pin || prefetch);
128 
129   CachableEntry<ParsedFullFilterBlock> filter_block;
130   if (prefetch || !use_cache) {
131     const Status s = ReadFilterBlock(table, prefetch_buffer, ReadOptions(),
132                                      use_cache, nullptr /* get_context */,
133                                      lookup_context, &filter_block);
134     if (!s.ok()) {
135       return std::unique_ptr<FilterBlockReader>();
136     }
137 
138     if (use_cache && !pin) {
139       filter_block.Reset();
140     }
141   }
142 
143   return std::unique_ptr<FilterBlockReader>(
144       new FullFilterBlockReader(table, std::move(filter_block)));
145 }
146 
PrefixMayMatch(const Slice & prefix,const SliceTransform *,uint64_t block_offset,const bool no_io,const Slice * const,GetContext * get_context,BlockCacheLookupContext * lookup_context)147 bool FullFilterBlockReader::PrefixMayMatch(
148     const Slice& prefix, const SliceTransform* /* prefix_extractor */,
149     uint64_t block_offset, const bool no_io,
150     const Slice* const /*const_ikey_ptr*/, GetContext* get_context,
151     BlockCacheLookupContext* lookup_context) {
152 #ifdef NDEBUG
153   (void)block_offset;
154 #endif
155   assert(block_offset == kNotValid);
156   return MayMatch(prefix, no_io, get_context, lookup_context);
157 }
158 
MayMatch(const Slice & entry,bool no_io,GetContext * get_context,BlockCacheLookupContext * lookup_context) const159 bool FullFilterBlockReader::MayMatch(
160     const Slice& entry, bool no_io, GetContext* get_context,
161     BlockCacheLookupContext* lookup_context) const {
162   CachableEntry<ParsedFullFilterBlock> filter_block;
163 
164   const Status s =
165       GetOrReadFilterBlock(no_io, get_context, lookup_context, &filter_block);
166   if (!s.ok()) {
167     return true;
168   }
169 
170   assert(filter_block.GetValue());
171 
172   FilterBitsReader* const filter_bits_reader =
173       filter_block.GetValue()->filter_bits_reader();
174 
175   if (filter_bits_reader) {
176     if (filter_bits_reader->MayMatch(entry)) {
177       PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
178       return true;
179     } else {
180       PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
181       return false;
182     }
183   }
184   return true;  // remain the same with block_based filter
185 }
186 
KeysMayMatch(MultiGetRange * range,const SliceTransform *,uint64_t block_offset,const bool no_io,BlockCacheLookupContext * lookup_context)187 void FullFilterBlockReader::KeysMayMatch(
188     MultiGetRange* range, const SliceTransform* /*prefix_extractor*/,
189     uint64_t block_offset, const bool no_io,
190     BlockCacheLookupContext* lookup_context) {
191 #ifdef NDEBUG
192   (void)range;
193   (void)block_offset;
194 #endif
195   assert(block_offset == kNotValid);
196   if (!whole_key_filtering()) {
197     // Simply return. Don't skip any key - consider all keys as likely to be
198     // present
199     return;
200   }
201   MayMatch(range, no_io, nullptr, lookup_context);
202 }
203 
PrefixesMayMatch(MultiGetRange * range,const SliceTransform * prefix_extractor,uint64_t block_offset,const bool no_io,BlockCacheLookupContext * lookup_context)204 void FullFilterBlockReader::PrefixesMayMatch(
205     MultiGetRange* range, const SliceTransform* prefix_extractor,
206     uint64_t block_offset, const bool no_io,
207     BlockCacheLookupContext* lookup_context) {
208 #ifdef NDEBUG
209   (void)range;
210   (void)block_offset;
211 #endif
212   assert(block_offset == kNotValid);
213   MayMatch(range, no_io, prefix_extractor, lookup_context);
214 }
215 
MayMatch(MultiGetRange * range,bool no_io,const SliceTransform * prefix_extractor,BlockCacheLookupContext * lookup_context) const216 void FullFilterBlockReader::MayMatch(
217     MultiGetRange* range, bool no_io, const SliceTransform* prefix_extractor,
218     BlockCacheLookupContext* lookup_context) const {
219   CachableEntry<ParsedFullFilterBlock> filter_block;
220 
221   const Status s = GetOrReadFilterBlock(no_io, range->begin()->get_context,
222                                         lookup_context, &filter_block);
223   if (!s.ok()) {
224     return;
225   }
226 
227   assert(filter_block.GetValue());
228 
229   FilterBitsReader* const filter_bits_reader =
230       filter_block.GetValue()->filter_bits_reader();
231 
232   if (!filter_bits_reader) {
233     return;
234   }
235 
236   // We need to use an array instead of autovector for may_match since
237   // &may_match[0] doesn't work for autovector<bool> (compiler error). So
238   // declare both keys and may_match as arrays, which is also slightly less
239   // expensive compared to autovector
240   std::array<Slice*, MultiGetContext::MAX_BATCH_SIZE> keys;
241   std::array<bool, MultiGetContext::MAX_BATCH_SIZE> may_match = {{true}};
242   autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> prefixes;
243   int num_keys = 0;
244   MultiGetRange filter_range(*range, range->begin(), range->end());
245   for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) {
246     if (!prefix_extractor) {
247       keys[num_keys++] = &iter->ukey;
248     } else if (prefix_extractor->InDomain(iter->ukey)) {
249       prefixes.emplace_back(prefix_extractor->Transform(iter->ukey));
250       keys[num_keys++] = &prefixes.back();
251     } else {
252       filter_range.SkipKey(iter);
253     }
254   }
255 
256   filter_bits_reader->MayMatch(num_keys, &keys[0], &may_match[0]);
257 
258   int i = 0;
259   for (auto iter = filter_range.begin(); iter != filter_range.end(); ++iter) {
260     if (!may_match[i]) {
261       // Update original MultiGet range to skip this key. The filter_range
262       // was temporarily used just to skip keys not in prefix_extractor domain
263       range->SkipKey(iter);
264       PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
265     } else {
266       // PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
267       PerfContext* perf_ctx = get_perf_context();
268       perf_ctx->bloom_sst_hit_count++;
269     }
270     ++i;
271   }
272 }
273 
ApproximateMemoryUsage() const274 size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
275   size_t usage = ApproximateFilterBlockMemoryUsage();
276 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
277   usage += malloc_usable_size(const_cast<FullFilterBlockReader*>(this));
278 #else
279   usage += sizeof(*this);
280 #endif  // ROCKSDB_MALLOC_USABLE_SIZE
281   return usage;
282 }
283 
RangeMayExist(const Slice * iterate_upper_bound,const Slice & user_key,const SliceTransform * prefix_extractor,const Comparator * comparator,const Slice * const const_ikey_ptr,bool * filter_checked,bool need_upper_bound_check,bool no_io,BlockCacheLookupContext * lookup_context)284 bool FullFilterBlockReader::RangeMayExist(
285     const Slice* iterate_upper_bound, const Slice& user_key,
286     const SliceTransform* prefix_extractor, const Comparator* comparator,
287     const Slice* const const_ikey_ptr, bool* filter_checked,
288     bool need_upper_bound_check, bool no_io,
289     BlockCacheLookupContext* lookup_context) {
290   if (!prefix_extractor || !prefix_extractor->InDomain(user_key)) {
291     *filter_checked = false;
292     return true;
293   }
294   Slice prefix = prefix_extractor->Transform(user_key);
295   if (need_upper_bound_check &&
296       !IsFilterCompatible(iterate_upper_bound, prefix, comparator)) {
297     *filter_checked = false;
298     return true;
299   } else {
300     *filter_checked = true;
301     return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io,
302                           const_ikey_ptr, /* get_context */ nullptr,
303                           lookup_context);
304   }
305 }
306 
IsFilterCompatible(const Slice * iterate_upper_bound,const Slice & prefix,const Comparator * comparator) const307 bool FullFilterBlockReader::IsFilterCompatible(
308     const Slice* iterate_upper_bound, const Slice& prefix,
309     const Comparator* comparator) const {
310   // Try to reuse the bloom filter in the SST table if prefix_extractor in
311   // mutable_cf_options has changed. If range [user_key, upper_bound) all
312   // share the same prefix then we may still be able to use the bloom filter.
313   const SliceTransform* const prefix_extractor = table_prefix_extractor();
314   if (iterate_upper_bound != nullptr && prefix_extractor) {
315     if (!prefix_extractor->InDomain(*iterate_upper_bound)) {
316       return false;
317     }
318     Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound);
319     // first check if user_key and upper_bound all share the same prefix
320     if (!comparator->Equal(prefix, upper_bound_xform)) {
321       // second check if user_key's prefix is the immediate predecessor of
322       // upper_bound and have the same length. If so, we know for sure all
323       // keys in the range [user_key, upper_bound) share the same prefix.
324       // Also need to make sure upper_bound are full length to ensure
325       // correctness
326       if (!full_length_enabled_ ||
327           iterate_upper_bound->size() != prefix_extractor_full_length_ ||
328           !comparator->IsSameLengthImmediateSuccessor(prefix,
329                                                       *iterate_upper_bound)) {
330         return false;
331       }
332     }
333     return true;
334   } else {
335     return false;
336   }
337 }
338 
339 }  // namespace ROCKSDB_NAMESPACE
340