1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #ifndef GFLAGS
11 #include <cstdio>
main()12 int main() {
13 fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
14 return 0;
15 }
16 #else
17
18 #include <array>
19 #include <cmath>
20 #include <vector>
21
22 #include "logging/logging.h"
23 #include "memory/arena.h"
24 #include "rocksdb/filter_policy.h"
25 #include "table/block_based/filter_policy_internal.h"
26 #include "test_util/testharness.h"
27 #include "test_util/testutil.h"
28 #include "util/gflags_compat.h"
29 #include "util/hash.h"
30
31 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
32
33 DEFINE_int32(bits_per_key, 10, "");
34
35 namespace ROCKSDB_NAMESPACE {
36
37 static const int kVerbose = 1;
38
Key(int i,char * buffer)39 static Slice Key(int i, char* buffer) {
40 std::string s;
41 PutFixed32(&s, static_cast<uint32_t>(i));
42 memcpy(buffer, s.c_str(), sizeof(i));
43 return Slice(buffer, sizeof(i));
44 }
45
NextLength(int length)46 static int NextLength(int length) {
47 if (length < 10) {
48 length += 1;
49 } else if (length < 100) {
50 length += 10;
51 } else if (length < 1000) {
52 length += 100;
53 } else {
54 length += 1000;
55 }
56 return length;
57 }
58
59 class BlockBasedBloomTest : public testing::Test {
60 private:
61 std::unique_ptr<const FilterPolicy> policy_;
62 std::string filter_;
63 std::vector<std::string> keys_;
64
65 public:
BlockBasedBloomTest()66 BlockBasedBloomTest() { ResetPolicy(); }
67
Reset()68 void Reset() {
69 keys_.clear();
70 filter_.clear();
71 }
72
ResetPolicy(double bits_per_key)73 void ResetPolicy(double bits_per_key) {
74 policy_.reset(new BloomFilterPolicy(bits_per_key,
75 BloomFilterPolicy::kDeprecatedBlock));
76 Reset();
77 }
78
ResetPolicy()79 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
80
Add(const Slice & s)81 void Add(const Slice& s) {
82 keys_.push_back(s.ToString());
83 }
84
Build()85 void Build() {
86 std::vector<Slice> key_slices;
87 for (size_t i = 0; i < keys_.size(); i++) {
88 key_slices.push_back(Slice(keys_[i]));
89 }
90 filter_.clear();
91 policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
92 &filter_);
93 keys_.clear();
94 if (kVerbose >= 2) DumpFilter();
95 }
96
FilterSize() const97 size_t FilterSize() const {
98 return filter_.size();
99 }
100
FilterData() const101 Slice FilterData() const { return Slice(filter_); }
102
DumpFilter()103 void DumpFilter() {
104 fprintf(stderr, "F(");
105 for (size_t i = 0; i+1 < filter_.size(); i++) {
106 const unsigned int c = static_cast<unsigned int>(filter_[i]);
107 for (int j = 0; j < 8; j++) {
108 fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
109 }
110 }
111 fprintf(stderr, ")\n");
112 }
113
Matches(const Slice & s)114 bool Matches(const Slice& s) {
115 if (!keys_.empty()) {
116 Build();
117 }
118 return policy_->KeyMayMatch(s, filter_);
119 }
120
FalsePositiveRate()121 double FalsePositiveRate() {
122 char buffer[sizeof(int)];
123 int result = 0;
124 for (int i = 0; i < 10000; i++) {
125 if (Matches(Key(i + 1000000000, buffer))) {
126 result++;
127 }
128 }
129 return result / 10000.0;
130 }
131 };
132
TEST_F(BlockBasedBloomTest,EmptyFilter)133 TEST_F(BlockBasedBloomTest, EmptyFilter) {
134 ASSERT_TRUE(! Matches("hello"));
135 ASSERT_TRUE(! Matches("world"));
136 }
137
TEST_F(BlockBasedBloomTest,Small)138 TEST_F(BlockBasedBloomTest, Small) {
139 Add("hello");
140 Add("world");
141 ASSERT_TRUE(Matches("hello"));
142 ASSERT_TRUE(Matches("world"));
143 ASSERT_TRUE(! Matches("x"));
144 ASSERT_TRUE(! Matches("foo"));
145 }
146
TEST_F(BlockBasedBloomTest,VaryingLengths)147 TEST_F(BlockBasedBloomTest, VaryingLengths) {
148 char buffer[sizeof(int)];
149
150 // Count number of filters that significantly exceed the false positive rate
151 int mediocre_filters = 0;
152 int good_filters = 0;
153
154 for (int length = 1; length <= 10000; length = NextLength(length)) {
155 Reset();
156 for (int i = 0; i < length; i++) {
157 Add(Key(i, buffer));
158 }
159 Build();
160
161 ASSERT_LE(FilterSize(), (size_t)((length * 10 / 8) + 40)) << length;
162
163 // All added keys must match
164 for (int i = 0; i < length; i++) {
165 ASSERT_TRUE(Matches(Key(i, buffer)))
166 << "Length " << length << "; key " << i;
167 }
168
169 // Check false positive rate
170 double rate = FalsePositiveRate();
171 if (kVerbose >= 1) {
172 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
173 rate*100.0, length, static_cast<int>(FilterSize()));
174 }
175 ASSERT_LE(rate, 0.02); // Must not be over 2%
176 if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
177 else good_filters++;
178 }
179 if (kVerbose >= 1) {
180 fprintf(stderr, "Filters: %d good, %d mediocre\n",
181 good_filters, mediocre_filters);
182 }
183 ASSERT_LE(mediocre_filters, good_filters/5);
184 }
185
186 // Ensure the implementation doesn't accidentally change in an
187 // incompatible way
TEST_F(BlockBasedBloomTest,Schema)188 TEST_F(BlockBasedBloomTest, Schema) {
189 char buffer[sizeof(int)];
190
191 ResetPolicy(8); // num_probes = 5
192 for (int key = 0; key < 87; key++) {
193 Add(Key(key, buffer));
194 }
195 Build();
196 ASSERT_EQ(BloomHash(FilterData()), 3589896109U);
197
198 ResetPolicy(9); // num_probes = 6
199 for (int key = 0; key < 87; key++) {
200 Add(Key(key, buffer));
201 }
202 Build();
203 ASSERT_EQ(BloomHash(FilterData()), 969445585U);
204
205 ResetPolicy(11); // num_probes = 7
206 for (int key = 0; key < 87; key++) {
207 Add(Key(key, buffer));
208 }
209 Build();
210 ASSERT_EQ(BloomHash(FilterData()), 1694458207U);
211
212 ResetPolicy(10); // num_probes = 6
213 for (int key = 0; key < 87; key++) {
214 Add(Key(key, buffer));
215 }
216 Build();
217 ASSERT_EQ(BloomHash(FilterData()), 2373646410U);
218
219 ResetPolicy(10);
220 for (int key = /*CHANGED*/ 1; key < 87; key++) {
221 Add(Key(key, buffer));
222 }
223 Build();
224 ASSERT_EQ(BloomHash(FilterData()), 1908442116U);
225
226 ResetPolicy(10);
227 for (int key = 1; key < /*CHANGED*/ 88; key++) {
228 Add(Key(key, buffer));
229 }
230 Build();
231 ASSERT_EQ(BloomHash(FilterData()), 3057004015U);
232
233 // With new fractional bits_per_key, check that we are rounding to
234 // whole bits per key for old Bloom filters.
235 ResetPolicy(9.5); // Treated as 10
236 for (int key = 1; key < 88; key++) {
237 Add(Key(key, buffer));
238 }
239 Build();
240 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
241
242 ResetPolicy(10.499); // Treated as 10
243 for (int key = 1; key < 88; key++) {
244 Add(Key(key, buffer));
245 }
246 Build();
247 ASSERT_EQ(BloomHash(FilterData()), /*SAME*/ 3057004015U);
248
249 ResetPolicy();
250 }
251
252 // Different bits-per-byte
253
254 class FullBloomTest : public testing::TestWithParam<BloomFilterPolicy::Mode> {
255 private:
256 BlockBasedTableOptions table_options_;
257 std::shared_ptr<const FilterPolicy>& policy_;
258 std::unique_ptr<FilterBitsBuilder> bits_builder_;
259 std::unique_ptr<FilterBitsReader> bits_reader_;
260 std::unique_ptr<const char[]> buf_;
261 size_t filter_size_;
262
263 public:
FullBloomTest()264 FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
265 ResetPolicy();
266 }
267
GetBuiltinFilterBitsBuilder()268 BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
269 // Throws on bad cast
270 return &dynamic_cast<BuiltinFilterBitsBuilder&>(*bits_builder_);
271 }
272
GetBloomFilterPolicy()273 const BloomFilterPolicy* GetBloomFilterPolicy() {
274 // Throws on bad cast
275 return &dynamic_cast<const BloomFilterPolicy&>(*policy_);
276 }
277
Reset()278 void Reset() {
279 bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
280 FilterBuildingContext(table_options_)));
281 bits_reader_.reset(nullptr);
282 buf_.reset(nullptr);
283 filter_size_ = 0;
284 }
285
ResetPolicy(double bits_per_key)286 void ResetPolicy(double bits_per_key) {
287 policy_.reset(new BloomFilterPolicy(bits_per_key, GetParam()));
288 Reset();
289 }
290
ResetPolicy()291 void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
292
Add(const Slice & s)293 void Add(const Slice& s) {
294 bits_builder_->AddKey(s);
295 }
296
OpenRaw(const Slice & s)297 void OpenRaw(const Slice& s) {
298 bits_reader_.reset(policy_->GetFilterBitsReader(s));
299 }
300
Build()301 void Build() {
302 Slice filter = bits_builder_->Finish(&buf_);
303 bits_reader_.reset(policy_->GetFilterBitsReader(filter));
304 filter_size_ = filter.size();
305 }
306
FilterSize() const307 size_t FilterSize() const {
308 return filter_size_;
309 }
310
FilterData()311 Slice FilterData() { return Slice(buf_.get(), filter_size_); }
312
GetNumProbesFromFilterData()313 int GetNumProbesFromFilterData() {
314 assert(filter_size_ >= 5);
315 int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
316 if (raw_num_probes == -1) { // New bloom filter marker
317 return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
318 } else {
319 return raw_num_probes;
320 }
321 }
322
Matches(const Slice & s)323 bool Matches(const Slice& s) {
324 if (bits_reader_ == nullptr) {
325 Build();
326 }
327 return bits_reader_->MayMatch(s);
328 }
329
330 // Provides a kind of fingerprint on the Bloom filter's
331 // behavior, for reasonbly high FP rates.
PackedMatches()332 uint64_t PackedMatches() {
333 char buffer[sizeof(int)];
334 uint64_t result = 0;
335 for (int i = 0; i < 64; i++) {
336 if (Matches(Key(i + 12345, buffer))) {
337 result |= uint64_t{1} << i;
338 }
339 }
340 return result;
341 }
342
343 // Provides a kind of fingerprint on the Bloom filter's
344 // behavior, for lower FP rates.
FirstFPs(int count)345 std::string FirstFPs(int count) {
346 char buffer[sizeof(int)];
347 std::string rv;
348 int fp_count = 0;
349 for (int i = 0; i < 1000000; i++) {
350 // Pack four match booleans into each hexadecimal digit
351 if (Matches(Key(i + 1000000, buffer))) {
352 ++fp_count;
353 rv += std::to_string(i);
354 if (fp_count == count) {
355 break;
356 }
357 rv += ',';
358 }
359 }
360 return rv;
361 }
362
FalsePositiveRate()363 double FalsePositiveRate() {
364 char buffer[sizeof(int)];
365 int result = 0;
366 for (int i = 0; i < 10000; i++) {
367 if (Matches(Key(i + 1000000000, buffer))) {
368 result++;
369 }
370 }
371 return result / 10000.0;
372 }
373
SelectByImpl(uint32_t for_legacy_bloom,uint32_t for_fast_local_bloom)374 uint32_t SelectByImpl(uint32_t for_legacy_bloom,
375 uint32_t for_fast_local_bloom) {
376 switch (GetParam()) {
377 case BloomFilterPolicy::kLegacyBloom:
378 return for_legacy_bloom;
379 case BloomFilterPolicy::kFastLocalBloom:
380 return for_fast_local_bloom;
381 case BloomFilterPolicy::kDeprecatedBlock:
382 case BloomFilterPolicy::kAuto:
383 /* N/A */;
384 }
385 // otherwise
386 assert(false);
387 return 0;
388 }
389 };
390
TEST_P(FullBloomTest,FilterSize)391 TEST_P(FullBloomTest, FilterSize) {
392 // In addition to checking the consistency of space computation, we are
393 // checking that denoted and computed doubles are interpreted as expected
394 // as bits_per_key values.
395 bool some_computed_less_than_denoted = false;
396 // Note: enforced minimum is 1 bit per key (1000 millibits), and enforced
397 // maximum is 100 bits per key (100000 millibits).
398 for (auto bpk :
399 std::vector<std::pair<double, int> >{{-HUGE_VAL, 1000},
400 {-INFINITY, 1000},
401 {0.0, 1000},
402 {1.234, 1234},
403 {3.456, 3456},
404 {9.5, 9500},
405 {10.0, 10000},
406 {10.499, 10499},
407 {21.345, 21345},
408 {99.999, 99999},
409 {1234.0, 100000},
410 {HUGE_VAL, 100000},
411 {INFINITY, 100000},
412 {NAN, 100000}}) {
413 ResetPolicy(bpk.first);
414 auto bfp = GetBloomFilterPolicy();
415 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
416 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
417
418 double computed = bpk.first;
419 // This transforms e.g. 9.5 -> 9.499999999999998, which we still
420 // round to 10 for whole bits per key.
421 computed += 0.5;
422 computed /= 1234567.0;
423 computed *= 1234567.0;
424 computed -= 0.5;
425 some_computed_less_than_denoted |= (computed < bpk.first);
426 ResetPolicy(computed);
427 bfp = GetBloomFilterPolicy();
428 EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
429 EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
430
431 auto bits_builder = GetBuiltinFilterBitsBuilder();
432 for (int n = 1; n < 100; n++) {
433 auto space = bits_builder->CalculateSpace(n);
434 auto n2 = bits_builder->CalculateNumEntry(space);
435 EXPECT_GE(n2, n);
436 auto space2 = bits_builder->CalculateSpace(n2);
437 EXPECT_EQ(space, space2);
438 }
439 }
440 // Check that the compiler hasn't optimized our computation into nothing
441 EXPECT_TRUE(some_computed_less_than_denoted);
442 ResetPolicy();
443 }
444
TEST_P(FullBloomTest,FullEmptyFilter)445 TEST_P(FullBloomTest, FullEmptyFilter) {
446 // Empty filter is not match, at this level
447 ASSERT_TRUE(!Matches("hello"));
448 ASSERT_TRUE(!Matches("world"));
449 }
450
TEST_P(FullBloomTest,FullSmall)451 TEST_P(FullBloomTest, FullSmall) {
452 Add("hello");
453 Add("world");
454 ASSERT_TRUE(Matches("hello"));
455 ASSERT_TRUE(Matches("world"));
456 ASSERT_TRUE(!Matches("x"));
457 ASSERT_TRUE(!Matches("foo"));
458 }
459
TEST_P(FullBloomTest,FullVaryingLengths)460 TEST_P(FullBloomTest, FullVaryingLengths) {
461 char buffer[sizeof(int)];
462
463 // Count number of filters that significantly exceed the false positive rate
464 int mediocre_filters = 0;
465 int good_filters = 0;
466
467 for (int length = 1; length <= 10000; length = NextLength(length)) {
468 Reset();
469 for (int i = 0; i < length; i++) {
470 Add(Key(i, buffer));
471 }
472 Build();
473
474 ASSERT_LE(FilterSize(),
475 (size_t)((length * 10 / 8) + CACHE_LINE_SIZE * 2 + 5));
476
477 // All added keys must match
478 for (int i = 0; i < length; i++) {
479 ASSERT_TRUE(Matches(Key(i, buffer)))
480 << "Length " << length << "; key " << i;
481 }
482
483 // Check false positive rate
484 double rate = FalsePositiveRate();
485 if (kVerbose >= 1) {
486 fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
487 rate*100.0, length, static_cast<int>(FilterSize()));
488 }
489 ASSERT_LE(rate, 0.02); // Must not be over 2%
490 if (rate > 0.0125)
491 mediocre_filters++; // Allowed, but not too often
492 else
493 good_filters++;
494 }
495 if (kVerbose >= 1) {
496 fprintf(stderr, "Filters: %d good, %d mediocre\n",
497 good_filters, mediocre_filters);
498 }
499 ASSERT_LE(mediocre_filters, good_filters/5);
500 }
501
502 namespace {
SelectByCacheLineSize(uint32_t for64,uint32_t for128,uint32_t for256)503 inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
504 uint32_t for256) {
505 (void)for64;
506 (void)for128;
507 (void)for256;
508 #if CACHE_LINE_SIZE == 64
509 return for64;
510 #elif CACHE_LINE_SIZE == 128
511 return for128;
512 #elif CACHE_LINE_SIZE == 256
513 return for256;
514 #else
515 #error "CACHE_LINE_SIZE unknown or unrecognized"
516 #endif
517 }
518 } // namespace
519
520 // Ensure the implementation doesn't accidentally change in an
521 // incompatible way. This test doesn't check the reading side
522 // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
523 // ability to read filters generated using other cache line sizes.
524 // See RawSchema.
TEST_P(FullBloomTest,Schema)525 TEST_P(FullBloomTest, Schema) {
526 char buffer[sizeof(int)];
527
528 // Use enough keys so that changing bits / key by 1 is guaranteed to
529 // change number of allocated cache lines. So keys > max cache line bits.
530
531 ResetPolicy(2); // num_probes = 1
532 for (int key = 0; key < 2087; key++) {
533 Add(Key(key, buffer));
534 }
535 Build();
536 EXPECT_EQ(GetNumProbesFromFilterData(), 1);
537 EXPECT_EQ(
538 BloomHash(FilterData()),
539 SelectByImpl(SelectByCacheLineSize(1567096579, 1964771444, 2659542661U),
540 3817481309U));
541 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
542 EXPECT_EQ("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
543 }
544
545 ResetPolicy(3); // num_probes = 2
546 for (int key = 0; key < 2087; key++) {
547 Add(Key(key, buffer));
548 }
549 Build();
550 EXPECT_EQ(GetNumProbesFromFilterData(), 2);
551 EXPECT_EQ(
552 BloomHash(FilterData()),
553 SelectByImpl(SelectByCacheLineSize(2707206547U, 2571983456U, 218344685),
554 2807269961U));
555 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
556 EXPECT_EQ("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
557 }
558
559 ResetPolicy(5); // num_probes = 3
560 for (int key = 0; key < 2087; key++) {
561 Add(Key(key, buffer));
562 }
563 Build();
564 EXPECT_EQ(GetNumProbesFromFilterData(), 3);
565 EXPECT_EQ(
566 BloomHash(FilterData()),
567 SelectByImpl(SelectByCacheLineSize(515748486, 94611728, 2436112214U),
568 204628445));
569 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
570 EXPECT_EQ("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
571 }
572
573 ResetPolicy(8); // num_probes = 5
574 for (int key = 0; key < 2087; key++) {
575 Add(Key(key, buffer));
576 }
577 Build();
578 EXPECT_EQ(GetNumProbesFromFilterData(), 5);
579 EXPECT_EQ(
580 BloomHash(FilterData()),
581 SelectByImpl(SelectByCacheLineSize(1302145999, 2811644657U, 756553699),
582 355564975));
583 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
584 EXPECT_EQ("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
585 }
586
587 ResetPolicy(9); // num_probes = 6
588 for (int key = 0; key < 2087; key++) {
589 Add(Key(key, buffer));
590 }
591 Build();
592 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
593 EXPECT_EQ(
594 BloomHash(FilterData()),
595 SelectByImpl(SelectByCacheLineSize(2092755149, 661139132, 1182970461),
596 2137566013U));
597 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
598 EXPECT_EQ("156,367,791,872,945,1015,1139,1159,1265,1435", FirstFPs(10));
599 }
600
601 ResetPolicy(11); // num_probes = 7
602 for (int key = 0; key < 2087; key++) {
603 Add(Key(key, buffer));
604 }
605 Build();
606 EXPECT_EQ(GetNumProbesFromFilterData(), 7);
607 EXPECT_EQ(
608 BloomHash(FilterData()),
609 SelectByImpl(SelectByCacheLineSize(3755609649U, 1812694762, 1449142939),
610 2561502687U));
611 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
612 EXPECT_EQ("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
613 }
614
615 // This used to be 9 probes, but 8 is a better choice for speed,
616 // especially with SIMD groups of 8 probes, with essentially no
617 // change in FP rate.
618 // FP rate @ 9 probes, old Bloom: 0.4321%
619 // FP rate @ 9 probes, new Bloom: 0.1846%
620 // FP rate @ 8 probes, new Bloom: 0.1843%
621 ResetPolicy(14); // num_probes = 8 (new), 9 (old)
622 for (int key = 0; key < 2087; key++) {
623 Add(Key(key, buffer));
624 }
625 Build();
626 EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
627 SelectByImpl(9, 8));
628 EXPECT_EQ(
629 BloomHash(FilterData()),
630 SelectByImpl(SelectByCacheLineSize(178861123, 379087593, 2574136516U),
631 3709876890U));
632 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
633 EXPECT_EQ("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
634 }
635
636 // This used to be 11 probes, but 9 is a better choice for speed
637 // AND accuracy.
638 // FP rate @ 11 probes, old Bloom: 0.3571%
639 // FP rate @ 11 probes, new Bloom: 0.0884%
640 // FP rate @ 9 probes, new Bloom: 0.0843%
641 ResetPolicy(16); // num_probes = 9 (new), 11 (old)
642 for (int key = 0; key < 2087; key++) {
643 Add(Key(key, buffer));
644 }
645 Build();
646 EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
647 SelectByImpl(11, 9));
648 EXPECT_EQ(
649 BloomHash(FilterData()),
650 SelectByImpl(SelectByCacheLineSize(1129406313, 3049154394U, 1727750964),
651 1087138490));
652 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
653 EXPECT_EQ("3299,3611,3916,6620,7822,8079,8482,8942,10167", FirstFPs(9));
654 }
655
656 ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
657 for (int key = 0; key < 2087; key++) {
658 Add(Key(key, buffer));
659 }
660 Build();
661 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
662 EXPECT_EQ(
663 BloomHash(FilterData()),
664 SelectByImpl(SelectByCacheLineSize(1478976371, 2910591341U, 1182970461),
665 2498541272U));
666 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
667 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
668 }
669
670 ResetPolicy(10);
671 for (int key = /*CHANGED*/ 1; key < 2087; key++) {
672 Add(Key(key, buffer));
673 }
674 Build();
675 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
676 EXPECT_EQ(
677 BloomHash(FilterData()),
678 SelectByImpl(SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U),
679 2058382345U));
680 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
681 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
682 }
683
684 ResetPolicy(10);
685 for (int key = 1; key < /*CHANGED*/ 2088; key++) {
686 Add(Key(key, buffer));
687 }
688 Build();
689 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
690 EXPECT_EQ(
691 BloomHash(FilterData()),
692 SelectByImpl(SelectByCacheLineSize(2885052954U, 769447944, 4175124908U),
693 23699164));
694 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
695 EXPECT_EQ("16,126,133,422,466,472,813,1002,1035,1159", FirstFPs(10));
696 }
697
698 // With new fractional bits_per_key, check that we are rounding to
699 // whole bits per key for old Bloom filters but fractional for
700 // new Bloom filter.
701 ResetPolicy(9.5);
702 for (int key = 1; key < 2088; key++) {
703 Add(Key(key, buffer));
704 }
705 Build();
706 EXPECT_EQ(GetNumProbesFromFilterData(), 6);
707 EXPECT_EQ(BloomHash(FilterData()),
708 SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
709 4175124908U),
710 /*CHANGED*/ 3166884174U));
711 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
712 EXPECT_EQ(/*CHANGED*/ "126,156,367,444,458,791,813,976,1015,1035",
713 FirstFPs(10));
714 }
715
716 ResetPolicy(10.499);
717 for (int key = 1; key < 2088; key++) {
718 Add(Key(key, buffer));
719 }
720 Build();
721 EXPECT_EQ(static_cast<uint32_t>(GetNumProbesFromFilterData()),
722 SelectByImpl(6, 7));
723 EXPECT_EQ(BloomHash(FilterData()),
724 SelectByImpl(/*SAME*/ SelectByCacheLineSize(2885052954U, 769447944,
725 4175124908U),
726 /*CHANGED*/ 4098502778U));
727 if (GetParam() == BloomFilterPolicy::kFastLocalBloom) {
728 EXPECT_EQ(/*CHANGED*/ "16,236,240,472,1015,1045,1111,1409,1465,1612",
729 FirstFPs(10));
730 }
731
732 ResetPolicy();
733 }
734
735 // A helper class for testing custom or corrupt filter bits as read by
736 // built-in FilterBitsReaders.
737 struct RawFilterTester {
738 // Buffer, from which we always return a tail Slice, so the
739 // last five bytes are always the metadata bytes.
740 std::array<char, 3000> data_;
741 // Points five bytes from the end
742 char* metadata_ptr_;
743
RawFilterTesterROCKSDB_NAMESPACE::RawFilterTester744 RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
745
ResetNoFillROCKSDB_NAMESPACE::RawFilterTester746 Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
747 uint32_t num_probes) {
748 metadata_ptr_[0] = static_cast<char>(num_probes);
749 EncodeFixed32(metadata_ptr_ + 1, num_lines);
750 uint32_t len = len_without_metadata + /*metadata*/ 5;
751 assert(len <= data_.size());
752 return Slice(metadata_ptr_ - len_without_metadata, len);
753 }
754
ResetROCKSDB_NAMESPACE::RawFilterTester755 Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
756 uint32_t num_probes, bool fill_ones) {
757 data_.fill(fill_ones ? 0xff : 0);
758 return ResetNoFill(len_without_metadata, num_lines, num_probes);
759 }
760
ResetWeirdFillROCKSDB_NAMESPACE::RawFilterTester761 Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
762 uint32_t num_probes) {
763 for (uint32_t i = 0; i < data_.size(); ++i) {
764 data_[i] = static_cast<char>(0x7b7b >> (i % 7));
765 }
766 return ResetNoFill(len_without_metadata, num_lines, num_probes);
767 }
768 };
769
TEST_P(FullBloomTest,RawSchema)770 TEST_P(FullBloomTest, RawSchema) {
771 RawFilterTester cft;
772 // Two probes, about 3/4 bits set: ~50% "FP" rate
773 // One 256-byte cache line.
774 OpenRaw(cft.ResetWeirdFill(256, 1, 2));
775 EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
776
777 // Two 128-byte cache lines.
778 OpenRaw(cft.ResetWeirdFill(256, 2, 2));
779 EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
780
781 // Four 64-byte cache lines.
782 OpenRaw(cft.ResetWeirdFill(256, 4, 2));
783 EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
784 }
785
TEST_P(FullBloomTest,CorruptFilters)786 TEST_P(FullBloomTest, CorruptFilters) {
787 RawFilterTester cft;
788
789 for (bool fill : {false, true}) {
790 // Good filter bits - returns same as fill
791 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
792 ASSERT_EQ(fill, Matches("hello"));
793 ASSERT_EQ(fill, Matches("world"));
794
795 // Good filter bits - returns same as fill
796 OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
797 ASSERT_EQ(fill, Matches("hello"));
798 ASSERT_EQ(fill, Matches("world"));
799
800 // Good filter bits - returns same as fill
801 // 256 is unusual but legal cache line size
802 OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
803 ASSERT_EQ(fill, Matches("hello"));
804 ASSERT_EQ(fill, Matches("world"));
805
806 // Good filter bits - returns same as fill
807 // 30 should be max num_probes
808 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
809 ASSERT_EQ(fill, Matches("hello"));
810 ASSERT_EQ(fill, Matches("world"));
811
812 // Good filter bits - returns same as fill
813 // 1 should be min num_probes
814 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
815 ASSERT_EQ(fill, Matches("hello"));
816 ASSERT_EQ(fill, Matches("world"));
817
818 // Type 1 trivial filter bits - returns true as if FP by zero probes
819 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
820 ASSERT_TRUE(Matches("hello"));
821 ASSERT_TRUE(Matches("world"));
822
823 // Type 2 trivial filter bits - returns false as if built from zero keys
824 OpenRaw(cft.Reset(0, 0, 6, fill));
825 ASSERT_FALSE(Matches("hello"));
826 ASSERT_FALSE(Matches("world"));
827
828 // Type 2 trivial filter bits - returns false as if built from zero keys
829 OpenRaw(cft.Reset(0, 37, 6, fill));
830 ASSERT_FALSE(Matches("hello"));
831 ASSERT_FALSE(Matches("world"));
832
833 // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
834 OpenRaw(cft.Reset(0, 0, 0, fill));
835 ASSERT_FALSE(Matches("hello"));
836 ASSERT_FALSE(Matches("world"));
837
838 // Bad filter bits - returns true for safety
839 // No solution to 0 * x == CACHE_LINE_SIZE
840 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
841 ASSERT_TRUE(Matches("hello"));
842 ASSERT_TRUE(Matches("world"));
843
844 // Bad filter bits - returns true for safety
845 // Can't have 3 * x == 4 for integer x
846 OpenRaw(cft.Reset(4, 3, 6, fill));
847 ASSERT_TRUE(Matches("hello"));
848 ASSERT_TRUE(Matches("world"));
849
850 // Bad filter bits - returns true for safety
851 // 97 bytes is not a power of two, so not a legal cache line size
852 OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
853 ASSERT_TRUE(Matches("hello"));
854 ASSERT_TRUE(Matches("world"));
855
856 // Bad filter bits - returns true for safety
857 // 65 bytes is not a power of two, so not a legal cache line size
858 OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
859 ASSERT_TRUE(Matches("hello"));
860 ASSERT_TRUE(Matches("world"));
861
862 // Bad filter bits - returns false as if built from zero keys
863 // < 5 bytes overall means missing even metadata
864 OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
865 ASSERT_FALSE(Matches("hello"));
866 ASSERT_FALSE(Matches("world"));
867
868 OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
869 ASSERT_FALSE(Matches("hello"));
870 ASSERT_FALSE(Matches("world"));
871
872 // Dubious filter bits - returns same as fill (for now)
873 // 31 is not a useful num_probes, nor generated by RocksDB unless directly
874 // using filter bits API without BloomFilterPolicy.
875 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
876 ASSERT_EQ(fill, Matches("hello"));
877 ASSERT_EQ(fill, Matches("world"));
878
879 // Dubious filter bits - returns same as fill (for now)
880 // Similar, with 127, largest positive char
881 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
882 ASSERT_EQ(fill, Matches("hello"));
883 ASSERT_EQ(fill, Matches("world"));
884
885 // Dubious filter bits - returns true (for now)
886 // num_probes set to 128 / -128, lowest negative char
887 // NB: Bug in implementation interprets this as negative and has same
888 // effect as zero probes, but effectively reserves negative char values
889 // for future use.
890 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
891 ASSERT_TRUE(Matches("hello"));
892 ASSERT_TRUE(Matches("world"));
893
894 // Dubious filter bits - returns true (for now)
895 // Similar, with 255 / -1
896 OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 255, fill));
897 ASSERT_TRUE(Matches("hello"));
898 ASSERT_TRUE(Matches("world"));
899 }
900 }
901
902 INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
903 testing::Values(BloomFilterPolicy::kLegacyBloom,
904 BloomFilterPolicy::kFastLocalBloom));
905
906 } // namespace ROCKSDB_NAMESPACE
907
main(int argc,char ** argv)908 int main(int argc, char** argv) {
909 ::testing::InitGoogleTest(&argc, argv);
910 ParseCommandLineFlags(&argc, &argv, true);
911
912 return RUN_ALL_TESTS();
913 }
914
915 #endif // GFLAGS
916