1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include <stdio.h>
11 #include <algorithm>
12 #include <iostream>
13 #include <map>
14 #include <memory>
15 #include <string>
16 #include <vector>
17 
18 #include "block_fetcher.h"
19 #include "cache/lru_cache.h"
20 #include "db/dbformat.h"
21 #include "db/memtable.h"
22 #include "db/write_batch_internal.h"
23 #include "memtable/stl_wrappers.h"
24 #include "meta_blocks.h"
25 #include "monitoring/statistics.h"
26 #include "port/port.h"
27 #include "rocksdb/cache.h"
28 #include "rocksdb/db.h"
29 #include "rocksdb/env.h"
30 #include "rocksdb/file_checksum.h"
31 #include "rocksdb/file_system.h"
32 #include "rocksdb/iterator.h"
33 #include "rocksdb/memtablerep.h"
34 #include "rocksdb/perf_context.h"
35 #include "rocksdb/slice_transform.h"
36 #include "rocksdb/statistics.h"
37 #include "rocksdb/write_buffer_manager.h"
38 #include "table/block_based/block.h"
39 #include "table/block_based/block_based_table_builder.h"
40 #include "table/block_based/block_based_table_factory.h"
41 #include "table/block_based/block_based_table_reader.h"
42 #include "table/block_based/block_builder.h"
43 #include "table/block_based/flush_block_policy.h"
44 #include "table/format.h"
45 #include "table/get_context.h"
46 #include "table/internal_iterator.h"
47 #include "table/plain/plain_table_factory.h"
48 #include "table/scoped_arena_iterator.h"
49 #include "table/sst_file_writer_collectors.h"
50 #include "test_util/sync_point.h"
51 #include "test_util/testharness.h"
52 #include "test_util/testutil.h"
53 #include "util/compression.h"
54 #include "util/file_checksum_helper.h"
55 #include "util/random.h"
56 #include "util/string_util.h"
57 #include "utilities/merge_operators.h"
58 
59 namespace ROCKSDB_NAMESPACE {
60 
61 extern const uint64_t kLegacyBlockBasedTableMagicNumber;
62 extern const uint64_t kLegacyPlainTableMagicNumber;
63 extern const uint64_t kBlockBasedTableMagicNumber;
64 extern const uint64_t kPlainTableMagicNumber;
65 
66 namespace {
67 
68 const std::string kDummyValue(10000, 'o');
69 
70 // DummyPropertiesCollector used to test BlockBasedTableProperties
71 class DummyPropertiesCollector : public TablePropertiesCollector {
72  public:
Name() const73   const char* Name() const override { return ""; }
74 
Finish(UserCollectedProperties *)75   Status Finish(UserCollectedProperties* /*properties*/) override {
76     return Status::OK();
77   }
78 
Add(const Slice &,const Slice &)79   Status Add(const Slice& /*user_key*/, const Slice& /*value*/) override {
80     return Status::OK();
81   }
82 
GetReadableProperties() const83   UserCollectedProperties GetReadableProperties() const override {
84     return UserCollectedProperties{};
85   }
86 };
87 
88 class DummyPropertiesCollectorFactory1
89     : public TablePropertiesCollectorFactory {
90  public:
CreateTablePropertiesCollector(TablePropertiesCollectorFactory::Context)91   TablePropertiesCollector* CreateTablePropertiesCollector(
92       TablePropertiesCollectorFactory::Context /*context*/) override {
93     return new DummyPropertiesCollector();
94   }
Name() const95   const char* Name() const override { return "DummyPropertiesCollector1"; }
96 };
97 
98 class DummyPropertiesCollectorFactory2
99     : public TablePropertiesCollectorFactory {
100  public:
CreateTablePropertiesCollector(TablePropertiesCollectorFactory::Context)101   TablePropertiesCollector* CreateTablePropertiesCollector(
102       TablePropertiesCollectorFactory::Context /*context*/) override {
103     return new DummyPropertiesCollector();
104   }
Name() const105   const char* Name() const override { return "DummyPropertiesCollector2"; }
106 };
107 
108 // Return reverse of "key".
109 // Used to test non-lexicographic comparators.
Reverse(const Slice & key)110 std::string Reverse(const Slice& key) {
111   auto rev = key.ToString();
112   std::reverse(rev.begin(), rev.end());
113   return rev;
114 }
115 
116 class ReverseKeyComparator : public Comparator {
117  public:
Name() const118   const char* Name() const override {
119     return "rocksdb.ReverseBytewiseComparator";
120   }
121 
Compare(const Slice & a,const Slice & b) const122   int Compare(const Slice& a, const Slice& b) const override {
123     return BytewiseComparator()->Compare(Reverse(a), Reverse(b));
124   }
125 
FindShortestSeparator(std::string * start,const Slice & limit) const126   void FindShortestSeparator(std::string* start,
127                              const Slice& limit) const override {
128     std::string s = Reverse(*start);
129     std::string l = Reverse(limit);
130     BytewiseComparator()->FindShortestSeparator(&s, l);
131     *start = Reverse(s);
132   }
133 
FindShortSuccessor(std::string * key) const134   void FindShortSuccessor(std::string* key) const override {
135     std::string s = Reverse(*key);
136     BytewiseComparator()->FindShortSuccessor(&s);
137     *key = Reverse(s);
138   }
139 };
140 
141 ReverseKeyComparator reverse_key_comparator;
142 
Increment(const Comparator * cmp,std::string * key)143 void Increment(const Comparator* cmp, std::string* key) {
144   if (cmp == BytewiseComparator()) {
145     key->push_back('\0');
146   } else {
147     assert(cmp == &reverse_key_comparator);
148     std::string rev = Reverse(*key);
149     rev.push_back('\0');
150     *key = Reverse(rev);
151   }
152 }
153 
154 }  // namespace
155 
156 // Helper class for tests to unify the interface between
157 // BlockBuilder/TableBuilder and Block/Table.
158 class Constructor {
159  public:
Constructor(const Comparator * cmp)160   explicit Constructor(const Comparator* cmp)
161       : data_(stl_wrappers::LessOfComparator(cmp)) {}
~Constructor()162   virtual ~Constructor() { }
163 
Add(const std::string & key,const Slice & value)164   void Add(const std::string& key, const Slice& value) {
165     data_[key] = value.ToString();
166   }
167 
168   // Finish constructing the data structure with all the keys that have
169   // been added so far.  Returns the keys in sorted order in "*keys"
170   // and stores the key/value pairs in "*kvmap"
Finish(const Options & options,const ImmutableCFOptions & ioptions,const MutableCFOptions & moptions,const BlockBasedTableOptions & table_options,const InternalKeyComparator & internal_comparator,std::vector<std::string> * keys,stl_wrappers::KVMap * kvmap)171   void Finish(const Options& options, const ImmutableCFOptions& ioptions,
172               const MutableCFOptions& moptions,
173               const BlockBasedTableOptions& table_options,
174               const InternalKeyComparator& internal_comparator,
175               std::vector<std::string>* keys, stl_wrappers::KVMap* kvmap) {
176     last_internal_key_ = &internal_comparator;
177     *kvmap = data_;
178     keys->clear();
179     for (const auto& kv : data_) {
180       keys->push_back(kv.first);
181     }
182     data_.clear();
183     Status s = FinishImpl(options, ioptions, moptions, table_options,
184                           internal_comparator, *kvmap);
185     ASSERT_TRUE(s.ok()) << s.ToString();
186   }
187 
188   // Construct the data structure from the data in "data"
189   virtual Status FinishImpl(const Options& options,
190                             const ImmutableCFOptions& ioptions,
191                             const MutableCFOptions& moptions,
192                             const BlockBasedTableOptions& table_options,
193                             const InternalKeyComparator& internal_comparator,
194                             const stl_wrappers::KVMap& data) = 0;
195 
196   virtual InternalIterator* NewIterator(
197       const SliceTransform* prefix_extractor = nullptr) const = 0;
198 
data()199   virtual const stl_wrappers::KVMap& data() { return data_; }
200 
IsArenaMode() const201   virtual bool IsArenaMode() const { return false; }
202 
db() const203   virtual DB* db() const { return nullptr; }  // Overridden in DBConstructor
204 
AnywayDeleteIterator() const205   virtual bool AnywayDeleteIterator() const { return false; }
206 
207  protected:
208   const InternalKeyComparator* last_internal_key_;
209 
210  private:
211   stl_wrappers::KVMap data_;
212 };
213 
214 class BlockConstructor: public Constructor {
215  public:
BlockConstructor(const Comparator * cmp)216   explicit BlockConstructor(const Comparator* cmp)
217       : Constructor(cmp),
218         comparator_(cmp),
219         block_(nullptr) { }
~BlockConstructor()220   ~BlockConstructor() override { delete block_; }
FinishImpl(const Options &,const ImmutableCFOptions &,const MutableCFOptions &,const BlockBasedTableOptions & table_options,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)221   Status FinishImpl(const Options& /*options*/,
222                     const ImmutableCFOptions& /*ioptions*/,
223                     const MutableCFOptions& /*moptions*/,
224                     const BlockBasedTableOptions& table_options,
225                     const InternalKeyComparator& /*internal_comparator*/,
226                     const stl_wrappers::KVMap& kv_map) override {
227     delete block_;
228     block_ = nullptr;
229     BlockBuilder builder(table_options.block_restart_interval);
230 
231     for (const auto kv : kv_map) {
232       builder.Add(kv.first, kv.second);
233     }
234     // Open the block
235     data_ = builder.Finish().ToString();
236     BlockContents contents;
237     contents.data = data_;
238     block_ = new Block(std::move(contents));
239     return Status::OK();
240   }
NewIterator(const SliceTransform *) const241   InternalIterator* NewIterator(
242       const SliceTransform* /*prefix_extractor*/) const override {
243     return block_->NewDataIterator(comparator_, comparator_,
244                                    kDisableGlobalSequenceNumber);
245   }
246 
247  private:
248   const Comparator* comparator_;
249   std::string data_;
250   Block* block_;
251 
252   BlockConstructor();
253 };
254 
255 // A helper class that converts internal format keys into user keys
256 class KeyConvertingIterator : public InternalIterator {
257  public:
KeyConvertingIterator(InternalIterator * iter,bool arena_mode=false)258   explicit KeyConvertingIterator(InternalIterator* iter,
259                                  bool arena_mode = false)
260       : iter_(iter), arena_mode_(arena_mode) {}
~KeyConvertingIterator()261   ~KeyConvertingIterator() override {
262     if (arena_mode_) {
263       iter_->~InternalIterator();
264     } else {
265       delete iter_;
266     }
267   }
Valid() const268   bool Valid() const override { return iter_->Valid() && status_.ok(); }
Seek(const Slice & target)269   void Seek(const Slice& target) override {
270     ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
271     std::string encoded;
272     AppendInternalKey(&encoded, ikey);
273     iter_->Seek(encoded);
274   }
SeekForPrev(const Slice & target)275   void SeekForPrev(const Slice& target) override {
276     ParsedInternalKey ikey(target, kMaxSequenceNumber, kTypeValue);
277     std::string encoded;
278     AppendInternalKey(&encoded, ikey);
279     iter_->SeekForPrev(encoded);
280   }
SeekToFirst()281   void SeekToFirst() override { iter_->SeekToFirst(); }
SeekToLast()282   void SeekToLast() override { iter_->SeekToLast(); }
Next()283   void Next() override { iter_->Next(); }
Prev()284   void Prev() override { iter_->Prev(); }
IsOutOfBound()285   bool IsOutOfBound() override { return iter_->IsOutOfBound(); }
286 
key() const287   Slice key() const override {
288     assert(Valid());
289     ParsedInternalKey parsed_key;
290     if (!ParseInternalKey(iter_->key(), &parsed_key)) {
291       status_ = Status::Corruption("malformed internal key");
292       return Slice("corrupted key");
293     }
294     return parsed_key.user_key;
295   }
296 
value() const297   Slice value() const override { return iter_->value(); }
status() const298   Status status() const override {
299     return status_.ok() ? iter_->status() : status_;
300   }
301 
302  private:
303   mutable Status status_;
304   InternalIterator* iter_;
305   bool arena_mode_;
306 
307   // No copying allowed
308   KeyConvertingIterator(const KeyConvertingIterator&);
309   void operator=(const KeyConvertingIterator&);
310 };
311 
312 class TableConstructor: public Constructor {
313  public:
TableConstructor(const Comparator * cmp,bool convert_to_internal_key=false,int level=-1,SequenceNumber largest_seqno=0)314   explicit TableConstructor(const Comparator* cmp,
315                             bool convert_to_internal_key = false,
316                             int level = -1, SequenceNumber largest_seqno = 0)
317       : Constructor(cmp),
318         largest_seqno_(largest_seqno),
319         convert_to_internal_key_(convert_to_internal_key),
320         level_(level) {
321     env_ = ROCKSDB_NAMESPACE::Env::Default();
322   }
~TableConstructor()323   ~TableConstructor() override { Reset(); }
324 
FinishImpl(const Options & options,const ImmutableCFOptions & ioptions,const MutableCFOptions & moptions,const BlockBasedTableOptions &,const InternalKeyComparator & internal_comparator,const stl_wrappers::KVMap & kv_map)325   Status FinishImpl(const Options& options, const ImmutableCFOptions& ioptions,
326                     const MutableCFOptions& moptions,
327                     const BlockBasedTableOptions& /*table_options*/,
328                     const InternalKeyComparator& internal_comparator,
329                     const stl_wrappers::KVMap& kv_map) override {
330     Reset();
331     soptions.use_mmap_reads = ioptions.allow_mmap_reads;
332     file_writer_.reset(test::GetWritableFileWriter(new test::StringSink(),
333                                                    "" /* don't care */));
334     std::unique_ptr<TableBuilder> builder;
335     std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
336         int_tbl_prop_collector_factories;
337 
338     if (largest_seqno_ != 0) {
339       // Pretend that it's an external file written by SstFileWriter.
340       int_tbl_prop_collector_factories.emplace_back(
341           new SstFileWriterPropertiesCollectorFactory(2 /* version */,
342                                                       0 /* global_seqno*/));
343     }
344 
345     std::string column_family_name;
346     builder.reset(ioptions.table_factory->NewTableBuilder(
347         TableBuilderOptions(ioptions, moptions, internal_comparator,
348                             &int_tbl_prop_collector_factories,
349                             options.compression, options.sample_for_compression,
350                             options.compression_opts, false /* skip_filters */,
351                             column_family_name, level_),
352         TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
353         file_writer_.get()));
354 
355     for (const auto kv : kv_map) {
356       if (convert_to_internal_key_) {
357         ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
358         std::string encoded;
359         AppendInternalKey(&encoded, ikey);
360         builder->Add(encoded, kv.second);
361       } else {
362         builder->Add(kv.first, kv.second);
363       }
364       EXPECT_TRUE(builder->status().ok());
365     }
366     Status s = builder->Finish();
367     file_writer_->Flush();
368     EXPECT_TRUE(s.ok()) << s.ToString();
369 
370     EXPECT_EQ(TEST_GetSink()->contents().size(), builder->FileSize());
371 
372     // Open the table
373     uniq_id_ = cur_uniq_id_++;
374     file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
375         TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
376     const bool kSkipFilters = true;
377     const bool kImmortal = true;
378     return ioptions.table_factory->NewTableReader(
379         TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
380                            internal_comparator, !kSkipFilters, !kImmortal,
381                            level_, largest_seqno_, &block_cache_tracer_),
382         std::move(file_reader_), TEST_GetSink()->contents().size(),
383         &table_reader_);
384   }
385 
NewIterator(const SliceTransform * prefix_extractor) const386   InternalIterator* NewIterator(
387       const SliceTransform* prefix_extractor) const override {
388     ReadOptions ro;
389     InternalIterator* iter = table_reader_->NewIterator(
390         ro, prefix_extractor, /*arena=*/nullptr, /*skip_filters=*/false,
391         TableReaderCaller::kUncategorized);
392     if (convert_to_internal_key_) {
393       return new KeyConvertingIterator(iter);
394     } else {
395       return iter;
396     }
397   }
398 
ApproximateOffsetOf(const Slice & key) const399   uint64_t ApproximateOffsetOf(const Slice& key) const {
400     if (convert_to_internal_key_) {
401       InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
402       const Slice skey = ikey.Encode();
403       return table_reader_->ApproximateOffsetOf(
404           skey, TableReaderCaller::kUncategorized);
405     }
406     return table_reader_->ApproximateOffsetOf(
407         key, TableReaderCaller::kUncategorized);
408   }
409 
Reopen(const ImmutableCFOptions & ioptions,const MutableCFOptions & moptions)410   virtual Status Reopen(const ImmutableCFOptions& ioptions,
411                         const MutableCFOptions& moptions) {
412     file_reader_.reset(test::GetRandomAccessFileReader(new test::StringSource(
413         TEST_GetSink()->contents(), uniq_id_, ioptions.allow_mmap_reads)));
414     return ioptions.table_factory->NewTableReader(
415         TableReaderOptions(ioptions, moptions.prefix_extractor.get(), soptions,
416                            *last_internal_key_),
417         std::move(file_reader_), TEST_GetSink()->contents().size(),
418         &table_reader_);
419   }
420 
GetTableReader()421   virtual TableReader* GetTableReader() { return table_reader_.get(); }
422 
AnywayDeleteIterator() const423   bool AnywayDeleteIterator() const override {
424     return convert_to_internal_key_;
425   }
426 
ResetTableReader()427   void ResetTableReader() { table_reader_.reset(); }
428 
ConvertToInternalKey()429   bool ConvertToInternalKey() { return convert_to_internal_key_; }
430 
TEST_GetSink()431   test::StringSink* TEST_GetSink() {
432     return ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(
433         file_writer_.get());
434   }
435 
436   BlockCacheTracer block_cache_tracer_;
437 
438  private:
Reset()439   void Reset() {
440     uniq_id_ = 0;
441     table_reader_.reset();
442     file_writer_.reset();
443     file_reader_.reset();
444   }
445 
446   uint64_t uniq_id_;
447   std::unique_ptr<WritableFileWriter> file_writer_;
448   std::unique_ptr<RandomAccessFileReader> file_reader_;
449   std::unique_ptr<TableReader> table_reader_;
450   SequenceNumber largest_seqno_;
451   bool convert_to_internal_key_;
452   int level_;
453 
454   TableConstructor();
455 
456   static uint64_t cur_uniq_id_;
457   EnvOptions soptions;
458   Env* env_;
459 };
460 uint64_t TableConstructor::cur_uniq_id_ = 1;
461 
462 class MemTableConstructor: public Constructor {
463  public:
MemTableConstructor(const Comparator * cmp,WriteBufferManager * wb)464   explicit MemTableConstructor(const Comparator* cmp, WriteBufferManager* wb)
465       : Constructor(cmp),
466         internal_comparator_(cmp),
467         write_buffer_manager_(wb),
468         table_factory_(new SkipListFactory) {
469     options_.memtable_factory = table_factory_;
470     ImmutableCFOptions ioptions(options_);
471     memtable_ =
472         new MemTable(internal_comparator_, ioptions, MutableCFOptions(options_),
473                      wb, kMaxSequenceNumber, 0 /* column_family_id */);
474     memtable_->Ref();
475   }
~MemTableConstructor()476   ~MemTableConstructor() override { delete memtable_->Unref(); }
FinishImpl(const Options &,const ImmutableCFOptions & ioptions,const MutableCFOptions &,const BlockBasedTableOptions &,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)477   Status FinishImpl(const Options&, const ImmutableCFOptions& ioptions,
478                     const MutableCFOptions& /*moptions*/,
479                     const BlockBasedTableOptions& /*table_options*/,
480                     const InternalKeyComparator& /*internal_comparator*/,
481                     const stl_wrappers::KVMap& kv_map) override {
482     delete memtable_->Unref();
483     ImmutableCFOptions mem_ioptions(ioptions);
484     memtable_ = new MemTable(internal_comparator_, mem_ioptions,
485                              MutableCFOptions(options_), write_buffer_manager_,
486                              kMaxSequenceNumber, 0 /* column_family_id */);
487     memtable_->Ref();
488     int seq = 1;
489     for (const auto kv : kv_map) {
490       memtable_->Add(seq, kTypeValue, kv.first, kv.second);
491       seq++;
492     }
493     return Status::OK();
494   }
NewIterator(const SliceTransform *) const495   InternalIterator* NewIterator(
496       const SliceTransform* /*prefix_extractor*/) const override {
497     return new KeyConvertingIterator(
498         memtable_->NewIterator(ReadOptions(), &arena_), true);
499   }
500 
AnywayDeleteIterator() const501   bool AnywayDeleteIterator() const override { return true; }
502 
IsArenaMode() const503   bool IsArenaMode() const override { return true; }
504 
505  private:
506   mutable Arena arena_;
507   InternalKeyComparator internal_comparator_;
508   Options options_;
509   WriteBufferManager* write_buffer_manager_;
510   MemTable* memtable_;
511   std::shared_ptr<SkipListFactory> table_factory_;
512 };
513 
514 class InternalIteratorFromIterator : public InternalIterator {
515  public:
InternalIteratorFromIterator(Iterator * it)516   explicit InternalIteratorFromIterator(Iterator* it) : it_(it) {}
Valid() const517   bool Valid() const override { return it_->Valid(); }
Seek(const Slice & target)518   void Seek(const Slice& target) override { it_->Seek(target); }
SeekForPrev(const Slice & target)519   void SeekForPrev(const Slice& target) override { it_->SeekForPrev(target); }
SeekToFirst()520   void SeekToFirst() override { it_->SeekToFirst(); }
SeekToLast()521   void SeekToLast() override { it_->SeekToLast(); }
Next()522   void Next() override { it_->Next(); }
Prev()523   void Prev() override { it_->Prev(); }
key() const524   Slice key() const override { return it_->key(); }
value() const525   Slice value() const override { return it_->value(); }
status() const526   Status status() const override { return it_->status(); }
527 
528  private:
529   std::unique_ptr<Iterator> it_;
530 };
531 
532 class DBConstructor: public Constructor {
533  public:
DBConstructor(const Comparator * cmp)534   explicit DBConstructor(const Comparator* cmp)
535       : Constructor(cmp),
536         comparator_(cmp) {
537     db_ = nullptr;
538     NewDB();
539   }
~DBConstructor()540   ~DBConstructor() override { delete db_; }
FinishImpl(const Options &,const ImmutableCFOptions &,const MutableCFOptions &,const BlockBasedTableOptions &,const InternalKeyComparator &,const stl_wrappers::KVMap & kv_map)541   Status FinishImpl(const Options& /*options*/,
542                     const ImmutableCFOptions& /*ioptions*/,
543                     const MutableCFOptions& /*moptions*/,
544                     const BlockBasedTableOptions& /*table_options*/,
545                     const InternalKeyComparator& /*internal_comparator*/,
546                     const stl_wrappers::KVMap& kv_map) override {
547     delete db_;
548     db_ = nullptr;
549     NewDB();
550     for (const auto kv : kv_map) {
551       WriteBatch batch;
552       batch.Put(kv.first, kv.second);
553       EXPECT_TRUE(db_->Write(WriteOptions(), &batch).ok());
554     }
555     return Status::OK();
556   }
557 
NewIterator(const SliceTransform *) const558   InternalIterator* NewIterator(
559       const SliceTransform* /*prefix_extractor*/) const override {
560     return new InternalIteratorFromIterator(db_->NewIterator(ReadOptions()));
561   }
562 
db() const563   DB* db() const override { return db_; }
564 
565  private:
NewDB()566   void NewDB() {
567     std::string name = test::PerThreadDBPath("table_testdb");
568 
569     Options options;
570     options.comparator = comparator_;
571     Status status = DestroyDB(name, options);
572     ASSERT_TRUE(status.ok()) << status.ToString();
573 
574     options.create_if_missing = true;
575     options.error_if_exists = true;
576     options.write_buffer_size = 10000;  // Something small to force merging
577     status = DB::Open(options, name, &db_);
578     ASSERT_TRUE(status.ok()) << status.ToString();
579   }
580 
581   const Comparator* comparator_;
582   DB* db_;
583 };
584 
585 enum TestType {
586   BLOCK_BASED_TABLE_TEST,
587 #ifndef ROCKSDB_LITE
588   PLAIN_TABLE_SEMI_FIXED_PREFIX,
589   PLAIN_TABLE_FULL_STR_PREFIX,
590   PLAIN_TABLE_TOTAL_ORDER,
591 #endif  // !ROCKSDB_LITE
592   BLOCK_TEST,
593   MEMTABLE_TEST,
594   DB_TEST
595 };
596 
597 struct TestArgs {
598   TestType type;
599   bool reverse_compare;
600   int restart_interval;
601   CompressionType compression;
602   uint32_t format_version;
603   bool use_mmap;
604 };
605 
GenerateArgList()606 static std::vector<TestArgs> GenerateArgList() {
607   std::vector<TestArgs> test_args;
608   std::vector<TestType> test_types = {
609       BLOCK_BASED_TABLE_TEST,
610 #ifndef ROCKSDB_LITE
611       PLAIN_TABLE_SEMI_FIXED_PREFIX,
612       PLAIN_TABLE_FULL_STR_PREFIX,
613       PLAIN_TABLE_TOTAL_ORDER,
614 #endif  // !ROCKSDB_LITE
615       BLOCK_TEST,
616       MEMTABLE_TEST, DB_TEST};
617   std::vector<bool> reverse_compare_types = {false, true};
618   std::vector<int> restart_intervals = {16, 1, 1024};
619 
620   // Only add compression if it is supported
621   std::vector<std::pair<CompressionType, bool>> compression_types;
622   compression_types.emplace_back(kNoCompression, false);
623   if (Snappy_Supported()) {
624     compression_types.emplace_back(kSnappyCompression, false);
625   }
626   if (Zlib_Supported()) {
627     compression_types.emplace_back(kZlibCompression, false);
628     compression_types.emplace_back(kZlibCompression, true);
629   }
630   if (BZip2_Supported()) {
631     compression_types.emplace_back(kBZip2Compression, false);
632     compression_types.emplace_back(kBZip2Compression, true);
633   }
634   if (LZ4_Supported()) {
635     compression_types.emplace_back(kLZ4Compression, false);
636     compression_types.emplace_back(kLZ4Compression, true);
637     compression_types.emplace_back(kLZ4HCCompression, false);
638     compression_types.emplace_back(kLZ4HCCompression, true);
639   }
640   if (XPRESS_Supported()) {
641     compression_types.emplace_back(kXpressCompression, false);
642     compression_types.emplace_back(kXpressCompression, true);
643   }
644   if (ZSTD_Supported()) {
645     compression_types.emplace_back(kZSTD, false);
646     compression_types.emplace_back(kZSTD, true);
647   }
648 
649   for (auto test_type : test_types) {
650     for (auto reverse_compare : reverse_compare_types) {
651 #ifndef ROCKSDB_LITE
652       if (test_type == PLAIN_TABLE_SEMI_FIXED_PREFIX ||
653           test_type == PLAIN_TABLE_FULL_STR_PREFIX ||
654           test_type == PLAIN_TABLE_TOTAL_ORDER) {
655         // Plain table doesn't use restart index or compression.
656         TestArgs one_arg;
657         one_arg.type = test_type;
658         one_arg.reverse_compare = reverse_compare;
659         one_arg.restart_interval = restart_intervals[0];
660         one_arg.compression = compression_types[0].first;
661         one_arg.use_mmap = true;
662         test_args.push_back(one_arg);
663         one_arg.use_mmap = false;
664         test_args.push_back(one_arg);
665         continue;
666       }
667 #endif  // !ROCKSDB_LITE
668 
669       for (auto restart_interval : restart_intervals) {
670         for (auto compression_type : compression_types) {
671           TestArgs one_arg;
672           one_arg.type = test_type;
673           one_arg.reverse_compare = reverse_compare;
674           one_arg.restart_interval = restart_interval;
675           one_arg.compression = compression_type.first;
676           one_arg.format_version = compression_type.second ? 2 : 1;
677           one_arg.use_mmap = false;
678           test_args.push_back(one_arg);
679         }
680       }
681     }
682   }
683   return test_args;
684 }
685 
686 // In order to make all tests run for plain table format, including
687 // those operating on empty keys, create a new prefix transformer which
688 // return fixed prefix if the slice is not shorter than the prefix length,
689 // and the full slice if it is shorter.
690 class FixedOrLessPrefixTransform : public SliceTransform {
691  private:
692   const size_t prefix_len_;
693 
694  public:
FixedOrLessPrefixTransform(size_t prefix_len)695   explicit FixedOrLessPrefixTransform(size_t prefix_len) :
696       prefix_len_(prefix_len) {
697   }
698 
Name() const699   const char* Name() const override { return "rocksdb.FixedPrefix"; }
700 
Transform(const Slice & src) const701   Slice Transform(const Slice& src) const override {
702     assert(InDomain(src));
703     if (src.size() < prefix_len_) {
704       return src;
705     }
706     return Slice(src.data(), prefix_len_);
707   }
708 
InDomain(const Slice &) const709   bool InDomain(const Slice& /*src*/) const override { return true; }
710 
InRange(const Slice & dst) const711   bool InRange(const Slice& dst) const override {
712     return (dst.size() <= prefix_len_);
713   }
FullLengthEnabled(size_t *) const714   bool FullLengthEnabled(size_t* /*len*/) const override { return false; }
715 };
716 
717 class HarnessTest : public testing::Test {
718  public:
HarnessTest()719   HarnessTest()
720       : ioptions_(options_),
721         moptions_(options_),
722         constructor_(nullptr),
723         write_buffer_(options_.db_write_buffer_size) {}
724 
Init(const TestArgs & args)725   void Init(const TestArgs& args) {
726     delete constructor_;
727     constructor_ = nullptr;
728     options_ = Options();
729     options_.compression = args.compression;
730     // Use shorter block size for tests to exercise block boundary
731     // conditions more.
732     if (args.reverse_compare) {
733       options_.comparator = &reverse_key_comparator;
734     }
735 
736     internal_comparator_.reset(
737         new test::PlainInternalKeyComparator(options_.comparator));
738 
739     support_prev_ = true;
740     only_support_prefix_seek_ = false;
741     options_.allow_mmap_reads = args.use_mmap;
742     switch (args.type) {
743       case BLOCK_BASED_TABLE_TEST:
744         table_options_.flush_block_policy_factory.reset(
745             new FlushBlockBySizePolicyFactory());
746         table_options_.block_size = 256;
747         table_options_.block_restart_interval = args.restart_interval;
748         table_options_.index_block_restart_interval = args.restart_interval;
749         table_options_.format_version = args.format_version;
750         options_.table_factory.reset(
751             new BlockBasedTableFactory(table_options_));
752         constructor_ = new TableConstructor(
753             options_.comparator, true /* convert_to_internal_key_ */);
754         internal_comparator_.reset(
755             new InternalKeyComparator(options_.comparator));
756         break;
757 // Plain table is not supported in ROCKSDB_LITE
758 #ifndef ROCKSDB_LITE
759       case PLAIN_TABLE_SEMI_FIXED_PREFIX:
760         support_prev_ = false;
761         only_support_prefix_seek_ = true;
762         options_.prefix_extractor.reset(new FixedOrLessPrefixTransform(2));
763         options_.table_factory.reset(NewPlainTableFactory());
764         constructor_ = new TableConstructor(
765             options_.comparator, true /* convert_to_internal_key_ */);
766         internal_comparator_.reset(
767             new InternalKeyComparator(options_.comparator));
768         break;
769       case PLAIN_TABLE_FULL_STR_PREFIX:
770         support_prev_ = false;
771         only_support_prefix_seek_ = true;
772         options_.prefix_extractor.reset(NewNoopTransform());
773         options_.table_factory.reset(NewPlainTableFactory());
774         constructor_ = new TableConstructor(
775             options_.comparator, true /* convert_to_internal_key_ */);
776         internal_comparator_.reset(
777             new InternalKeyComparator(options_.comparator));
778         break;
779       case PLAIN_TABLE_TOTAL_ORDER:
780         support_prev_ = false;
781         only_support_prefix_seek_ = false;
782         options_.prefix_extractor = nullptr;
783 
784         {
785           PlainTableOptions plain_table_options;
786           plain_table_options.user_key_len = kPlainTableVariableLength;
787           plain_table_options.bloom_bits_per_key = 0;
788           plain_table_options.hash_table_ratio = 0;
789 
790           options_.table_factory.reset(
791               NewPlainTableFactory(plain_table_options));
792         }
793         constructor_ = new TableConstructor(
794             options_.comparator, true /* convert_to_internal_key_ */);
795         internal_comparator_.reset(
796             new InternalKeyComparator(options_.comparator));
797         break;
798 #endif  // !ROCKSDB_LITE
799       case BLOCK_TEST:
800         table_options_.block_size = 256;
801         options_.table_factory.reset(
802             new BlockBasedTableFactory(table_options_));
803         constructor_ = new BlockConstructor(options_.comparator);
804         break;
805       case MEMTABLE_TEST:
806         table_options_.block_size = 256;
807         options_.table_factory.reset(
808             new BlockBasedTableFactory(table_options_));
809         constructor_ = new MemTableConstructor(options_.comparator,
810                                                &write_buffer_);
811         break;
812       case DB_TEST:
813         table_options_.block_size = 256;
814         options_.table_factory.reset(
815             new BlockBasedTableFactory(table_options_));
816         constructor_ = new DBConstructor(options_.comparator);
817         break;
818     }
819     ioptions_ = ImmutableCFOptions(options_);
820     moptions_ = MutableCFOptions(options_);
821   }
822 
~HarnessTest()823   ~HarnessTest() override { delete constructor_; }
824 
Add(const std::string & key,const std::string & value)825   void Add(const std::string& key, const std::string& value) {
826     constructor_->Add(key, value);
827   }
828 
Test(Random * rnd)829   void Test(Random* rnd) {
830     std::vector<std::string> keys;
831     stl_wrappers::KVMap data;
832     constructor_->Finish(options_, ioptions_, moptions_, table_options_,
833                          *internal_comparator_, &keys, &data);
834 
835     TestForwardScan(keys, data);
836     if (support_prev_) {
837       TestBackwardScan(keys, data);
838     }
839     TestRandomAccess(rnd, keys, data);
840   }
841 
TestForwardScan(const std::vector<std::string> &,const stl_wrappers::KVMap & data)842   void TestForwardScan(const std::vector<std::string>& /*keys*/,
843                        const stl_wrappers::KVMap& data) {
844     InternalIterator* iter = constructor_->NewIterator();
845     ASSERT_TRUE(!iter->Valid());
846     iter->SeekToFirst();
847     for (stl_wrappers::KVMap::const_iterator model_iter = data.begin();
848          model_iter != data.end(); ++model_iter) {
849       ASSERT_EQ(ToString(data, model_iter), ToString(iter));
850       iter->Next();
851     }
852     ASSERT_TRUE(!iter->Valid());
853     if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
854       iter->~InternalIterator();
855     } else {
856       delete iter;
857     }
858   }
859 
TestBackwardScan(const std::vector<std::string> &,const stl_wrappers::KVMap & data)860   void TestBackwardScan(const std::vector<std::string>& /*keys*/,
861                         const stl_wrappers::KVMap& data) {
862     InternalIterator* iter = constructor_->NewIterator();
863     ASSERT_TRUE(!iter->Valid());
864     iter->SeekToLast();
865     for (stl_wrappers::KVMap::const_reverse_iterator model_iter = data.rbegin();
866          model_iter != data.rend(); ++model_iter) {
867       ASSERT_EQ(ToString(data, model_iter), ToString(iter));
868       iter->Prev();
869     }
870     ASSERT_TRUE(!iter->Valid());
871     if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
872       iter->~InternalIterator();
873     } else {
874       delete iter;
875     }
876   }
877 
TestRandomAccess(Random * rnd,const std::vector<std::string> & keys,const stl_wrappers::KVMap & data)878   void TestRandomAccess(Random* rnd, const std::vector<std::string>& keys,
879                         const stl_wrappers::KVMap& data) {
880     static const bool kVerbose = false;
881     InternalIterator* iter = constructor_->NewIterator();
882     ASSERT_TRUE(!iter->Valid());
883     stl_wrappers::KVMap::const_iterator model_iter = data.begin();
884     if (kVerbose) fprintf(stderr, "---\n");
885     for (int i = 0; i < 200; i++) {
886       const int toss = rnd->Uniform(support_prev_ ? 5 : 3);
887       switch (toss) {
888         case 0: {
889           if (iter->Valid()) {
890             if (kVerbose) fprintf(stderr, "Next\n");
891             iter->Next();
892             ++model_iter;
893             ASSERT_EQ(ToString(data, model_iter), ToString(iter));
894           }
895           break;
896         }
897 
898         case 1: {
899           if (kVerbose) fprintf(stderr, "SeekToFirst\n");
900           iter->SeekToFirst();
901           model_iter = data.begin();
902           ASSERT_EQ(ToString(data, model_iter), ToString(iter));
903           break;
904         }
905 
906         case 2: {
907           std::string key = PickRandomKey(rnd, keys);
908           model_iter = data.lower_bound(key);
909           if (kVerbose) fprintf(stderr, "Seek '%s'\n",
910                                 EscapeString(key).c_str());
911           iter->Seek(Slice(key));
912           ASSERT_EQ(ToString(data, model_iter), ToString(iter));
913           break;
914         }
915 
916         case 3: {
917           if (iter->Valid()) {
918             if (kVerbose) fprintf(stderr, "Prev\n");
919             iter->Prev();
920             if (model_iter == data.begin()) {
921               model_iter = data.end();   // Wrap around to invalid value
922             } else {
923               --model_iter;
924             }
925             ASSERT_EQ(ToString(data, model_iter), ToString(iter));
926           }
927           break;
928         }
929 
930         case 4: {
931           if (kVerbose) fprintf(stderr, "SeekToLast\n");
932           iter->SeekToLast();
933           if (keys.empty()) {
934             model_iter = data.end();
935           } else {
936             std::string last = data.rbegin()->first;
937             model_iter = data.lower_bound(last);
938           }
939           ASSERT_EQ(ToString(data, model_iter), ToString(iter));
940           break;
941         }
942       }
943     }
944     if (constructor_->IsArenaMode() && !constructor_->AnywayDeleteIterator()) {
945       iter->~InternalIterator();
946     } else {
947       delete iter;
948     }
949   }
950 
ToString(const stl_wrappers::KVMap & data,const stl_wrappers::KVMap::const_iterator & it)951   std::string ToString(const stl_wrappers::KVMap& data,
952                        const stl_wrappers::KVMap::const_iterator& it) {
953     if (it == data.end()) {
954       return "END";
955     } else {
956       return "'" + it->first + "->" + it->second + "'";
957     }
958   }
959 
ToString(const stl_wrappers::KVMap & data,const stl_wrappers::KVMap::const_reverse_iterator & it)960   std::string ToString(const stl_wrappers::KVMap& data,
961                        const stl_wrappers::KVMap::const_reverse_iterator& it) {
962     if (it == data.rend()) {
963       return "END";
964     } else {
965       return "'" + it->first + "->" + it->second + "'";
966     }
967   }
968 
ToString(const InternalIterator * it)969   std::string ToString(const InternalIterator* it) {
970     if (!it->Valid()) {
971       return "END";
972     } else {
973       return "'" + it->key().ToString() + "->" + it->value().ToString() + "'";
974     }
975   }
976 
PickRandomKey(Random * rnd,const std::vector<std::string> & keys)977   std::string PickRandomKey(Random* rnd, const std::vector<std::string>& keys) {
978     if (keys.empty()) {
979       return "foo";
980     } else {
981       const int index = rnd->Uniform(static_cast<int>(keys.size()));
982       std::string result = keys[index];
983       switch (rnd->Uniform(support_prev_ ? 3 : 1)) {
984         case 0:
985           // Return an existing key
986           break;
987         case 1: {
988           // Attempt to return something smaller than an existing key
989           if (result.size() > 0 && result[result.size() - 1] > '\0'
990               && (!only_support_prefix_seek_
991                   || options_.prefix_extractor->Transform(result).size()
992                   < result.size())) {
993             result[result.size() - 1]--;
994           }
995           break;
996       }
997         case 2: {
998           // Return something larger than an existing key
999           Increment(options_.comparator, &result);
1000           break;
1001         }
1002       }
1003       return result;
1004     }
1005   }
1006 
1007   // Returns nullptr if not running against a DB
db() const1008   DB* db() const { return constructor_->db(); }
1009 
RandomizedHarnessTest(size_t part,size_t total)1010   void RandomizedHarnessTest(size_t part, size_t total) {
1011     std::vector<TestArgs> args = GenerateArgList();
1012     assert(part);
1013     assert(part <= total);
1014     for (size_t i = 0; i < args.size(); i++) {
1015       if ((i % total) + 1 != part) {
1016         continue;
1017       }
1018       Init(args[i]);
1019       Random rnd(test::RandomSeed() + 5);
1020       for (int num_entries = 0; num_entries < 2000;
1021            num_entries += (num_entries < 50 ? 1 : 200)) {
1022         for (int e = 0; e < num_entries; e++) {
1023           std::string v;
1024           Add(test::RandomKey(&rnd, rnd.Skewed(4)),
1025               test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
1026         }
1027         Test(&rnd);
1028       }
1029     }
1030   }
1031 
1032  private:
1033   Options options_ = Options();
1034   ImmutableCFOptions ioptions_;
1035   MutableCFOptions moptions_;
1036   BlockBasedTableOptions table_options_ = BlockBasedTableOptions();
1037   Constructor* constructor_;
1038   WriteBufferManager write_buffer_;
1039   bool support_prev_;
1040   bool only_support_prefix_seek_;
1041   std::shared_ptr<InternalKeyComparator> internal_comparator_;
1042 };
1043 
Between(uint64_t val,uint64_t low,uint64_t high)1044 static bool Between(uint64_t val, uint64_t low, uint64_t high) {
1045   bool result = (val >= low) && (val <= high);
1046   if (!result) {
1047     fprintf(stderr, "Value %llu is not in range [%llu, %llu]\n",
1048             (unsigned long long)(val),
1049             (unsigned long long)(low),
1050             (unsigned long long)(high));
1051   }
1052   return result;
1053 }
1054 
1055 // Tests against all kinds of tables
1056 class TableTest : public testing::Test {
1057  public:
GetPlainInternalComparator(const Comparator * comp)1058   const InternalKeyComparator& GetPlainInternalComparator(
1059       const Comparator* comp) {
1060     if (!plain_internal_comparator) {
1061       plain_internal_comparator.reset(
1062           new test::PlainInternalKeyComparator(comp));
1063     }
1064     return *plain_internal_comparator;
1065   }
1066   void IndexTest(BlockBasedTableOptions table_options);
1067 
1068  private:
1069   std::unique_ptr<InternalKeyComparator> plain_internal_comparator;
1070 };
1071 
1072 class GeneralTableTest : public TableTest {};
1073 class BlockBasedTableTest
1074     : public TableTest,
1075       virtual public ::testing::WithParamInterface<uint32_t> {
1076  public:
BlockBasedTableTest()1077   BlockBasedTableTest() : format_(GetParam()) {
1078     env_ = ROCKSDB_NAMESPACE::Env::Default();
1079   }
1080 
GetBlockBasedTableOptions()1081   BlockBasedTableOptions GetBlockBasedTableOptions() {
1082     BlockBasedTableOptions options;
1083     options.format_version = format_;
1084     return options;
1085   }
1086 
SetupTracingTest(TableConstructor * c)1087   void SetupTracingTest(TableConstructor* c) {
1088     test_path_ = test::PerThreadDBPath("block_based_table_tracing_test");
1089     EXPECT_OK(env_->CreateDir(test_path_));
1090     trace_file_path_ = test_path_ + "/block_cache_trace_file";
1091     TraceOptions trace_opt;
1092     std::unique_ptr<TraceWriter> trace_writer;
1093     EXPECT_OK(NewFileTraceWriter(env_, EnvOptions(), trace_file_path_,
1094                                  &trace_writer));
1095     c->block_cache_tracer_.StartTrace(env_, trace_opt, std::move(trace_writer));
1096     {
1097       std::string user_key = "k01";
1098       InternalKey internal_key(user_key, 0, kTypeValue);
1099       std::string encoded_key = internal_key.Encode().ToString();
1100       c->Add(encoded_key, kDummyValue);
1101     }
1102     {
1103       std::string user_key = "k02";
1104       InternalKey internal_key(user_key, 0, kTypeValue);
1105       std::string encoded_key = internal_key.Encode().ToString();
1106       c->Add(encoded_key, kDummyValue);
1107     }
1108   }
1109 
VerifyBlockAccessTrace(TableConstructor * c,const std::vector<BlockCacheTraceRecord> & expected_records)1110   void VerifyBlockAccessTrace(
1111       TableConstructor* c,
1112       const std::vector<BlockCacheTraceRecord>& expected_records) {
1113     c->block_cache_tracer_.EndTrace();
1114 
1115     std::unique_ptr<TraceReader> trace_reader;
1116     Status s =
1117         NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
1118     EXPECT_OK(s);
1119     BlockCacheTraceReader reader(std::move(trace_reader));
1120     BlockCacheTraceHeader header;
1121     EXPECT_OK(reader.ReadHeader(&header));
1122     uint32_t index = 0;
1123     while (s.ok()) {
1124       BlockCacheTraceRecord access;
1125       s = reader.ReadAccess(&access);
1126       if (!s.ok()) {
1127         break;
1128       }
1129       ASSERT_LT(index, expected_records.size());
1130       EXPECT_NE("", access.block_key);
1131       EXPECT_EQ(access.block_type, expected_records[index].block_type);
1132       EXPECT_GT(access.block_size, 0);
1133       EXPECT_EQ(access.caller, expected_records[index].caller);
1134       EXPECT_EQ(access.no_insert, expected_records[index].no_insert);
1135       EXPECT_EQ(access.is_cache_hit, expected_records[index].is_cache_hit);
1136       // Get
1137       if (access.caller == TableReaderCaller::kUserGet) {
1138         EXPECT_EQ(access.referenced_key,
1139                   expected_records[index].referenced_key);
1140         EXPECT_EQ(access.get_id, expected_records[index].get_id);
1141         EXPECT_EQ(access.get_from_user_specified_snapshot,
1142                   expected_records[index].get_from_user_specified_snapshot);
1143         if (access.block_type == TraceType::kBlockTraceDataBlock) {
1144           EXPECT_GT(access.referenced_data_size, 0);
1145           EXPECT_GT(access.num_keys_in_block, 0);
1146           EXPECT_EQ(access.referenced_key_exist_in_block,
1147                     expected_records[index].referenced_key_exist_in_block);
1148         }
1149       } else {
1150         EXPECT_EQ(access.referenced_key, "");
1151         EXPECT_EQ(access.get_id, 0);
1152         EXPECT_TRUE(access.get_from_user_specified_snapshot == Boolean::kFalse);
1153         EXPECT_EQ(access.referenced_data_size, 0);
1154         EXPECT_EQ(access.num_keys_in_block, 0);
1155         EXPECT_TRUE(access.referenced_key_exist_in_block == Boolean::kFalse);
1156       }
1157       index++;
1158     }
1159     EXPECT_EQ(index, expected_records.size());
1160     EXPECT_OK(env_->DeleteFile(trace_file_path_));
1161     EXPECT_OK(env_->DeleteDir(test_path_));
1162   }
1163 
1164  protected:
1165   uint64_t IndexUncompressedHelper(bool indexCompress);
1166 
1167  private:
1168   uint32_t format_;
1169   Env* env_;
1170   std::string trace_file_path_;
1171   std::string test_path_;
1172 };
1173 class PlainTableTest : public TableTest {};
1174 class TablePropertyTest : public testing::Test {};
1175 class BBTTailPrefetchTest : public TableTest {};
1176 
1177 // The helper class to test the file checksum
1178 class FileChecksumTestHelper {
1179  public:
FileChecksumTestHelper(bool convert_to_internal_key=false)1180   FileChecksumTestHelper(bool convert_to_internal_key = false)
1181       : convert_to_internal_key_(convert_to_internal_key) {
1182     sink_ = new test::StringSink();
1183   }
~FileChecksumTestHelper()1184   ~FileChecksumTestHelper() {}
1185 
CreateWriteableFile()1186   void CreateWriteableFile() {
1187     file_writer_.reset(test::GetWritableFileWriter(sink_, "" /* don't care */));
1188   }
1189 
SetFileChecksumGenerator(FileChecksumGenerator * checksum_generator)1190   void SetFileChecksumGenerator(FileChecksumGenerator* checksum_generator) {
1191     if (file_writer_ != nullptr) {
1192       file_writer_->TEST_SetFileChecksumGenerator(checksum_generator);
1193     } else {
1194       delete checksum_generator;
1195     }
1196   }
1197 
GetFileWriter()1198   WritableFileWriter* GetFileWriter() { return file_writer_.get(); }
1199 
ResetTableBuilder(std::unique_ptr<TableBuilder> && builder)1200   Status ResetTableBuilder(std::unique_ptr<TableBuilder>&& builder) {
1201     assert(builder != nullptr);
1202     table_builder_ = std::move(builder);
1203     return Status::OK();
1204   }
1205 
AddKVtoKVMap(int num_entries)1206   void AddKVtoKVMap(int num_entries) {
1207     Random rnd(test::RandomSeed());
1208     for (int i = 0; i < num_entries; i++) {
1209       std::string v;
1210       test::RandomString(&rnd, 100, &v);
1211       kv_map_[test::RandomKey(&rnd, 20)] = v;
1212     }
1213   }
1214 
WriteKVAndFlushTable()1215   Status WriteKVAndFlushTable() {
1216     for (const auto kv : kv_map_) {
1217       if (convert_to_internal_key_) {
1218         ParsedInternalKey ikey(kv.first, kMaxSequenceNumber, kTypeValue);
1219         std::string encoded;
1220         AppendInternalKey(&encoded, ikey);
1221         table_builder_->Add(encoded, kv.second);
1222       } else {
1223         table_builder_->Add(kv.first, kv.second);
1224       }
1225       EXPECT_TRUE(table_builder_->status().ok());
1226     }
1227     Status s = table_builder_->Finish();
1228     file_writer_->Flush();
1229     EXPECT_TRUE(s.ok());
1230 
1231     EXPECT_EQ(sink_->contents().size(), table_builder_->FileSize());
1232     return s;
1233   }
1234 
GetFileChecksum()1235   std::string GetFileChecksum() {
1236     file_writer_->Close();
1237     return table_builder_->GetFileChecksum();
1238   }
1239 
GetFileChecksumFuncName()1240   const char* GetFileChecksumFuncName() {
1241     return table_builder_->GetFileChecksumFuncName();
1242   }
1243 
CalculateFileChecksum(FileChecksumGenerator * file_checksum_generator,std::string * checksum)1244   Status CalculateFileChecksum(FileChecksumGenerator* file_checksum_generator,
1245                                std::string* checksum) {
1246     assert(file_checksum_generator != nullptr);
1247     cur_uniq_id_ = checksum_uniq_id_++;
1248     test::StringSink* ss_rw =
1249         ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(
1250             file_writer_.get());
1251     file_reader_.reset(test::GetRandomAccessFileReader(
1252         new test::StringSource(ss_rw->contents())));
1253     std::unique_ptr<char[]> scratch(new char[2048]);
1254     Slice result;
1255     uint64_t offset = 0;
1256     Status s;
1257     s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
1258                            false);
1259     if (!s.ok()) {
1260       return s;
1261     }
1262     while (result.size() != 0) {
1263       file_checksum_generator->Update(scratch.get(), result.size());
1264       offset += static_cast<uint64_t>(result.size());
1265       s = file_reader_->Read(offset, 2048, &result, scratch.get(), nullptr,
1266                              false);
1267       if (!s.ok()) {
1268         return s;
1269       }
1270     }
1271     EXPECT_EQ(offset, static_cast<uint64_t>(table_builder_->FileSize()));
1272     file_checksum_generator->Finalize();
1273     *checksum = file_checksum_generator->GetChecksum();
1274     return Status::OK();
1275   }
1276 
1277  private:
1278   bool convert_to_internal_key_;
1279   uint64_t cur_uniq_id_;
1280   std::unique_ptr<WritableFileWriter> file_writer_;
1281   std::unique_ptr<RandomAccessFileReader> file_reader_;
1282   std::unique_ptr<TableBuilder> table_builder_;
1283   stl_wrappers::KVMap kv_map_;
1284   test::StringSink* sink_;
1285 
1286   static uint64_t checksum_uniq_id_;
1287 };
1288 
1289 uint64_t FileChecksumTestHelper::checksum_uniq_id_ = 1;
1290 
1291 INSTANTIATE_TEST_CASE_P(FormatDef, BlockBasedTableTest,
1292                         testing::Values(test::kDefaultFormatVersion));
1293 INSTANTIATE_TEST_CASE_P(FormatLatest, BlockBasedTableTest,
1294                         testing::Values(test::kLatestFormatVersion));
1295 
1296 // This test serves as the living tutorial for the prefix scan of user collected
1297 // properties.
TEST_F(TablePropertyTest,PrefixScanTest)1298 TEST_F(TablePropertyTest, PrefixScanTest) {
1299   UserCollectedProperties props{{"num.111.1", "1"},
1300                                 {"num.111.2", "2"},
1301                                 {"num.111.3", "3"},
1302                                 {"num.333.1", "1"},
1303                                 {"num.333.2", "2"},
1304                                 {"num.333.3", "3"},
1305                                 {"num.555.1", "1"},
1306                                 {"num.555.2", "2"},
1307                                 {"num.555.3", "3"}, };
1308 
1309   // prefixes that exist
1310   for (const std::string& prefix : {"num.111", "num.333", "num.555"}) {
1311     int num = 0;
1312     for (auto pos = props.lower_bound(prefix);
1313          pos != props.end() &&
1314              pos->first.compare(0, prefix.size(), prefix) == 0;
1315          ++pos) {
1316       ++num;
1317       auto key = prefix + "." + ToString(num);
1318       ASSERT_EQ(key, pos->first);
1319       ASSERT_EQ(ToString(num), pos->second);
1320     }
1321     ASSERT_EQ(3, num);
1322   }
1323 
1324   // prefixes that don't exist
1325   for (const std::string& prefix :
1326        {"num.000", "num.222", "num.444", "num.666"}) {
1327     auto pos = props.lower_bound(prefix);
1328     ASSERT_TRUE(pos == props.end() ||
1329                 pos->first.compare(0, prefix.size(), prefix) != 0);
1330   }
1331 }
1332 
1333 // This test include all the basic checks except those for index size and block
1334 // size, which will be conducted in separated unit tests.
TEST_P(BlockBasedTableTest,BasicBlockBasedTableProperties)1335 TEST_P(BlockBasedTableTest, BasicBlockBasedTableProperties) {
1336   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1337 
1338   c.Add("a1", "val1");
1339   c.Add("b2", "val2");
1340   c.Add("c3", "val3");
1341   c.Add("d4", "val4");
1342   c.Add("e5", "val5");
1343   c.Add("f6", "val6");
1344   c.Add("g7", "val7");
1345   c.Add("h8", "val8");
1346   c.Add("j9", "val9");
1347   uint64_t diff_internal_user_bytes = 9 * 8;  // 8 is seq size, 9 k-v totally
1348 
1349   std::vector<std::string> keys;
1350   stl_wrappers::KVMap kvmap;
1351   Options options;
1352   options.compression = kNoCompression;
1353   options.statistics = CreateDBStatistics();
1354   options.statistics->set_stats_level(StatsLevel::kAll);
1355   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1356   table_options.block_restart_interval = 1;
1357   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1358 
1359   ImmutableCFOptions ioptions(options);
1360   MutableCFOptions moptions(options);
1361   ioptions.statistics = options.statistics.get();
1362   c.Finish(options, ioptions, moptions, table_options,
1363            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1364   ASSERT_EQ(options.statistics->getTickerCount(NUMBER_BLOCK_NOT_COMPRESSED), 0);
1365 
1366   auto& props = *c.GetTableReader()->GetTableProperties();
1367   ASSERT_EQ(kvmap.size(), props.num_entries);
1368 
1369   auto raw_key_size = kvmap.size() * 2ul;
1370   auto raw_value_size = kvmap.size() * 4ul;
1371 
1372   ASSERT_EQ(raw_key_size + diff_internal_user_bytes, props.raw_key_size);
1373   ASSERT_EQ(raw_value_size, props.raw_value_size);
1374   ASSERT_EQ(1ul, props.num_data_blocks);
1375   ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
1376 
1377   // Verify data size.
1378   BlockBuilder block_builder(1);
1379   for (const auto& item : kvmap) {
1380     block_builder.Add(item.first, item.second);
1381   }
1382   Slice content = block_builder.Finish();
1383   ASSERT_EQ(content.size() + kBlockTrailerSize + diff_internal_user_bytes,
1384             props.data_size);
1385   c.ResetTableReader();
1386 }
1387 
1388 #ifdef SNAPPY
IndexUncompressedHelper(bool compressed)1389 uint64_t BlockBasedTableTest::IndexUncompressedHelper(bool compressed) {
1390   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1391   constexpr size_t kNumKeys = 10000;
1392 
1393   for (size_t k = 0; k < kNumKeys; ++k) {
1394     c.Add("key" + ToString(k), "val" + ToString(k));
1395   }
1396 
1397   std::vector<std::string> keys;
1398   stl_wrappers::KVMap kvmap;
1399   Options options;
1400   options.compression = kSnappyCompression;
1401   options.statistics = CreateDBStatistics();
1402   options.statistics->set_stats_level(StatsLevel::kAll);
1403   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1404   table_options.block_restart_interval = 1;
1405   table_options.enable_index_compression = compressed;
1406   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1407 
1408   ImmutableCFOptions ioptions(options);
1409   MutableCFOptions moptions(options);
1410   ioptions.statistics = options.statistics.get();
1411   c.Finish(options, ioptions, moptions, table_options,
1412            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1413   c.ResetTableReader();
1414   return options.statistics->getTickerCount(NUMBER_BLOCK_COMPRESSED);
1415 }
TEST_P(BlockBasedTableTest,IndexUncompressed)1416 TEST_P(BlockBasedTableTest, IndexUncompressed) {
1417   uint64_t tbl1_compressed_cnt = IndexUncompressedHelper(true);
1418   uint64_t tbl2_compressed_cnt = IndexUncompressedHelper(false);
1419   // tbl1_compressed_cnt should include 1 index block
1420   EXPECT_EQ(tbl2_compressed_cnt + 1, tbl1_compressed_cnt);
1421 }
1422 #endif  // SNAPPY
1423 
TEST_P(BlockBasedTableTest,BlockBasedTableProperties2)1424 TEST_P(BlockBasedTableTest, BlockBasedTableProperties2) {
1425   TableConstructor c(&reverse_key_comparator);
1426   std::vector<std::string> keys;
1427   stl_wrappers::KVMap kvmap;
1428 
1429   {
1430     Options options;
1431     options.compression = CompressionType::kNoCompression;
1432     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1433     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1434 
1435     const ImmutableCFOptions ioptions(options);
1436     const MutableCFOptions moptions(options);
1437     c.Finish(options, ioptions, moptions, table_options,
1438              GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1439 
1440     auto& props = *c.GetTableReader()->GetTableProperties();
1441 
1442     // Default comparator
1443     ASSERT_EQ("leveldb.BytewiseComparator", props.comparator_name);
1444     // No merge operator
1445     ASSERT_EQ("nullptr", props.merge_operator_name);
1446     // No prefix extractor
1447     ASSERT_EQ("nullptr", props.prefix_extractor_name);
1448     // No property collectors
1449     ASSERT_EQ("[]", props.property_collectors_names);
1450     // No filter policy is used
1451     ASSERT_EQ("", props.filter_policy_name);
1452     // Compression type == that set:
1453     ASSERT_EQ("NoCompression", props.compression_name);
1454     c.ResetTableReader();
1455   }
1456 
1457   {
1458     Options options;
1459     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1460     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1461     options.comparator = &reverse_key_comparator;
1462     options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1463     options.prefix_extractor.reset(NewNoopTransform());
1464     options.table_properties_collector_factories.emplace_back(
1465         new DummyPropertiesCollectorFactory1());
1466     options.table_properties_collector_factories.emplace_back(
1467         new DummyPropertiesCollectorFactory2());
1468 
1469     const ImmutableCFOptions ioptions(options);
1470     const MutableCFOptions moptions(options);
1471     c.Finish(options, ioptions, moptions, table_options,
1472              GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1473 
1474     auto& props = *c.GetTableReader()->GetTableProperties();
1475 
1476     ASSERT_EQ("rocksdb.ReverseBytewiseComparator", props.comparator_name);
1477     ASSERT_EQ("UInt64AddOperator", props.merge_operator_name);
1478     ASSERT_EQ("rocksdb.Noop", props.prefix_extractor_name);
1479     ASSERT_EQ("[DummyPropertiesCollector1,DummyPropertiesCollector2]",
1480               props.property_collectors_names);
1481     ASSERT_EQ("", props.filter_policy_name);  // no filter policy is used
1482     c.ResetTableReader();
1483   }
1484 }
1485 
TEST_P(BlockBasedTableTest,RangeDelBlock)1486 TEST_P(BlockBasedTableTest, RangeDelBlock) {
1487   TableConstructor c(BytewiseComparator());
1488   std::vector<std::string> keys = {"1pika", "2chu"};
1489   std::vector<std::string> vals = {"p", "c"};
1490 
1491   std::vector<RangeTombstone> expected_tombstones = {
1492       {"1pika", "2chu", 0},
1493       {"2chu", "c", 1},
1494       {"2chu", "c", 0},
1495       {"c", "p", 0},
1496   };
1497 
1498   for (int i = 0; i < 2; i++) {
1499     RangeTombstone t(keys[i], vals[i], i);
1500     std::pair<InternalKey, Slice> p = t.Serialize();
1501     c.Add(p.first.Encode().ToString(), p.second);
1502   }
1503 
1504   std::vector<std::string> sorted_keys;
1505   stl_wrappers::KVMap kvmap;
1506   Options options;
1507   options.compression = kNoCompression;
1508   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1509   table_options.block_restart_interval = 1;
1510   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1511 
1512   const ImmutableCFOptions ioptions(options);
1513   const MutableCFOptions moptions(options);
1514   std::unique_ptr<InternalKeyComparator> internal_cmp(
1515       new InternalKeyComparator(options.comparator));
1516   c.Finish(options, ioptions, moptions, table_options, *internal_cmp,
1517            &sorted_keys, &kvmap);
1518 
1519   for (int j = 0; j < 2; ++j) {
1520     std::unique_ptr<InternalIterator> iter(
1521         c.GetTableReader()->NewRangeTombstoneIterator(ReadOptions()));
1522     if (j > 0) {
1523       // For second iteration, delete the table reader object and verify the
1524       // iterator can still access its metablock's range tombstones.
1525       c.ResetTableReader();
1526     }
1527     ASSERT_FALSE(iter->Valid());
1528     iter->SeekToFirst();
1529     ASSERT_TRUE(iter->Valid());
1530     for (size_t i = 0; i < expected_tombstones.size(); i++) {
1531       ASSERT_TRUE(iter->Valid());
1532       ParsedInternalKey parsed_key;
1533       ASSERT_TRUE(ParseInternalKey(iter->key(), &parsed_key));
1534       RangeTombstone t(parsed_key, iter->value());
1535       const auto& expected_t = expected_tombstones[i];
1536       ASSERT_EQ(t.start_key_, expected_t.start_key_);
1537       ASSERT_EQ(t.end_key_, expected_t.end_key_);
1538       ASSERT_EQ(t.seq_, expected_t.seq_);
1539       iter->Next();
1540     }
1541     ASSERT_TRUE(!iter->Valid());
1542   }
1543 }
1544 
TEST_P(BlockBasedTableTest,FilterPolicyNameProperties)1545 TEST_P(BlockBasedTableTest, FilterPolicyNameProperties) {
1546   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1547   c.Add("a1", "val1");
1548   std::vector<std::string> keys;
1549   stl_wrappers::KVMap kvmap;
1550   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1551   table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1552   Options options;
1553   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1554 
1555   const ImmutableCFOptions ioptions(options);
1556   const MutableCFOptions moptions(options);
1557   c.Finish(options, ioptions, moptions, table_options,
1558            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1559   auto& props = *c.GetTableReader()->GetTableProperties();
1560   ASSERT_EQ("rocksdb.BuiltinBloomFilter", props.filter_policy_name);
1561   c.ResetTableReader();
1562 }
1563 
1564 //
1565 // BlockBasedTableTest::PrefetchTest
1566 //
AssertKeysInCache(BlockBasedTable * table_reader,const std::vector<std::string> & keys_in_cache,const std::vector<std::string> & keys_not_in_cache,bool convert=false)1567 void AssertKeysInCache(BlockBasedTable* table_reader,
1568                        const std::vector<std::string>& keys_in_cache,
1569                        const std::vector<std::string>& keys_not_in_cache,
1570                        bool convert = false) {
1571   if (convert) {
1572     for (auto key : keys_in_cache) {
1573       InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1574       ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1575     }
1576     for (auto key : keys_not_in_cache) {
1577       InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
1578       ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
1579     }
1580   } else {
1581     for (auto key : keys_in_cache) {
1582       ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), key));
1583     }
1584     for (auto key : keys_not_in_cache) {
1585       ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), key));
1586     }
1587   }
1588 }
1589 
PrefetchRange(TableConstructor * c,Options * opt,BlockBasedTableOptions * table_options,const char * key_begin,const char * key_end,const std::vector<std::string> & keys_in_cache,const std::vector<std::string> & keys_not_in_cache,const Status expected_status=Status::OK ())1590 void PrefetchRange(TableConstructor* c, Options* opt,
1591                    BlockBasedTableOptions* table_options, const char* key_begin,
1592                    const char* key_end,
1593                    const std::vector<std::string>& keys_in_cache,
1594                    const std::vector<std::string>& keys_not_in_cache,
1595                    const Status expected_status = Status::OK()) {
1596   // reset the cache and reopen the table
1597   table_options->block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1598   opt->table_factory.reset(NewBlockBasedTableFactory(*table_options));
1599   const ImmutableCFOptions ioptions2(*opt);
1600   const MutableCFOptions moptions(*opt);
1601   ASSERT_OK(c->Reopen(ioptions2, moptions));
1602 
1603   // prefetch
1604   auto* table_reader = dynamic_cast<BlockBasedTable*>(c->GetTableReader());
1605   Status s;
1606   std::unique_ptr<Slice> begin, end;
1607   std::unique_ptr<InternalKey> i_begin, i_end;
1608   if (key_begin != nullptr) {
1609     if (c->ConvertToInternalKey()) {
1610       i_begin.reset(new InternalKey(key_begin, kMaxSequenceNumber, kTypeValue));
1611       begin.reset(new Slice(i_begin->Encode()));
1612     } else {
1613       begin.reset(new Slice(key_begin));
1614     }
1615   }
1616   if (key_end != nullptr) {
1617     if (c->ConvertToInternalKey()) {
1618       i_end.reset(new InternalKey(key_end, kMaxSequenceNumber, kTypeValue));
1619       end.reset(new Slice(i_end->Encode()));
1620     } else {
1621       end.reset(new Slice(key_end));
1622     }
1623   }
1624   s = table_reader->Prefetch(begin.get(), end.get());
1625 
1626   ASSERT_TRUE(s.code() == expected_status.code());
1627 
1628   // assert our expectation in cache warmup
1629   AssertKeysInCache(table_reader, keys_in_cache, keys_not_in_cache,
1630                     c->ConvertToInternalKey());
1631   c->ResetTableReader();
1632 }
1633 
TEST_P(BlockBasedTableTest,PrefetchTest)1634 TEST_P(BlockBasedTableTest, PrefetchTest) {
1635   // The purpose of this test is to test the prefetching operation built into
1636   // BlockBasedTable.
1637   Options opt;
1638   std::unique_ptr<InternalKeyComparator> ikc;
1639   ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
1640   opt.compression = kNoCompression;
1641   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1642   table_options.block_size = 1024;
1643   // big enough so we don't ever lose cached values.
1644   table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
1645   opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
1646 
1647   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
1648   c.Add("k01", "hello");
1649   c.Add("k02", "hello2");
1650   c.Add("k03", std::string(10000, 'x'));
1651   c.Add("k04", std::string(200000, 'x'));
1652   c.Add("k05", std::string(300000, 'x'));
1653   c.Add("k06", "hello3");
1654   c.Add("k07", std::string(100000, 'x'));
1655   std::vector<std::string> keys;
1656   stl_wrappers::KVMap kvmap;
1657   const ImmutableCFOptions ioptions(opt);
1658   const MutableCFOptions moptions(opt);
1659   c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
1660   c.ResetTableReader();
1661 
1662   // We get the following data spread :
1663   //
1664   // Data block         Index
1665   // ========================
1666   // [ k01 k02 k03 ]    k03
1667   // [ k04         ]    k04
1668   // [ k05         ]    k05
1669   // [ k06 k07     ]    k07
1670 
1671 
1672   // Simple
1673   PrefetchRange(&c, &opt, &table_options,
1674                 /*key_range=*/"k01", "k05",
1675                 /*keys_in_cache=*/{"k01", "k02", "k03", "k04", "k05"},
1676                 /*keys_not_in_cache=*/{"k06", "k07"});
1677   PrefetchRange(&c, &opt, &table_options, "k01", "k01", {"k01", "k02", "k03"},
1678                 {"k04", "k05", "k06", "k07"});
1679   // odd
1680   PrefetchRange(&c, &opt, &table_options, "a", "z",
1681                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1682   PrefetchRange(&c, &opt, &table_options, "k00", "k00", {"k01", "k02", "k03"},
1683                 {"k04", "k05", "k06", "k07"});
1684   // Edge cases
1685   PrefetchRange(&c, &opt, &table_options, "k00", "k06",
1686                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1687   PrefetchRange(&c, &opt, &table_options, "k00", "zzz",
1688                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1689   // null keys
1690   PrefetchRange(&c, &opt, &table_options, nullptr, nullptr,
1691                 {"k01", "k02", "k03", "k04", "k05", "k06", "k07"}, {});
1692   PrefetchRange(&c, &opt, &table_options, "k04", nullptr,
1693                 {"k04", "k05", "k06", "k07"}, {"k01", "k02", "k03"});
1694   PrefetchRange(&c, &opt, &table_options, nullptr, "k05",
1695                 {"k01", "k02", "k03", "k04", "k05"}, {"k06", "k07"});
1696   // invalid
1697   PrefetchRange(&c, &opt, &table_options, "k06", "k00", {}, {},
1698                 Status::InvalidArgument(Slice("k06 "), Slice("k07")));
1699   c.ResetTableReader();
1700 }
1701 
TEST_P(BlockBasedTableTest,TotalOrderSeekOnHashIndex)1702 TEST_P(BlockBasedTableTest, TotalOrderSeekOnHashIndex) {
1703   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1704   for (int i = 0; i <= 5; ++i) {
1705     Options options;
1706     // Make each key/value an individual block
1707     table_options.block_size = 64;
1708     switch (i) {
1709     case 0:
1710       // Binary search index
1711       table_options.index_type = BlockBasedTableOptions::kBinarySearch;
1712       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1713       break;
1714     case 1:
1715       // Hash search index
1716       table_options.index_type = BlockBasedTableOptions::kHashSearch;
1717       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1718       options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1719       break;
1720     case 2:
1721       // Hash search index with hash_index_allow_collision
1722       table_options.index_type = BlockBasedTableOptions::kHashSearch;
1723       table_options.hash_index_allow_collision = true;
1724       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1725       options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1726       break;
1727     case 3:
1728       // Hash search index with filter policy
1729       table_options.index_type = BlockBasedTableOptions::kHashSearch;
1730       table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1731       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1732       options.prefix_extractor.reset(NewFixedPrefixTransform(4));
1733       break;
1734     case 4:
1735       // Two-level index
1736       table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1737       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1738       break;
1739     case 5:
1740       // Binary search with first key
1741       table_options.index_type =
1742           BlockBasedTableOptions::kBinarySearchWithFirstKey;
1743       options.table_factory.reset(new BlockBasedTableFactory(table_options));
1744       break;
1745     }
1746 
1747     TableConstructor c(BytewiseComparator(),
1748                        true /* convert_to_internal_key_ */);
1749     c.Add("aaaa1", std::string('a', 56));
1750     c.Add("bbaa1", std::string('a', 56));
1751     c.Add("cccc1", std::string('a', 56));
1752     c.Add("bbbb1", std::string('a', 56));
1753     c.Add("baaa1", std::string('a', 56));
1754     c.Add("abbb1", std::string('a', 56));
1755     c.Add("cccc2", std::string('a', 56));
1756     std::vector<std::string> keys;
1757     stl_wrappers::KVMap kvmap;
1758     const ImmutableCFOptions ioptions(options);
1759     const MutableCFOptions moptions(options);
1760     c.Finish(options, ioptions, moptions, table_options,
1761              GetPlainInternalComparator(options.comparator), &keys, &kvmap);
1762     auto props = c.GetTableReader()->GetTableProperties();
1763     ASSERT_EQ(7u, props->num_data_blocks);
1764     auto* reader = c.GetTableReader();
1765     ReadOptions ro;
1766     ro.total_order_seek = true;
1767     std::unique_ptr<InternalIterator> iter(reader->NewIterator(
1768         ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1769         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1770 
1771     iter->Seek(InternalKey("b", 0, kTypeValue).Encode());
1772     ASSERT_OK(iter->status());
1773     ASSERT_TRUE(iter->Valid());
1774     ASSERT_EQ("baaa1", ExtractUserKey(iter->key()).ToString());
1775     iter->Next();
1776     ASSERT_OK(iter->status());
1777     ASSERT_TRUE(iter->Valid());
1778     ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1779 
1780     iter->Seek(InternalKey("bb", 0, kTypeValue).Encode());
1781     ASSERT_OK(iter->status());
1782     ASSERT_TRUE(iter->Valid());
1783     ASSERT_EQ("bbaa1", ExtractUserKey(iter->key()).ToString());
1784     iter->Next();
1785     ASSERT_OK(iter->status());
1786     ASSERT_TRUE(iter->Valid());
1787     ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1788 
1789     iter->Seek(InternalKey("bbb", 0, kTypeValue).Encode());
1790     ASSERT_OK(iter->status());
1791     ASSERT_TRUE(iter->Valid());
1792     ASSERT_EQ("bbbb1", ExtractUserKey(iter->key()).ToString());
1793     iter->Next();
1794     ASSERT_OK(iter->status());
1795     ASSERT_TRUE(iter->Valid());
1796     ASSERT_EQ("cccc1", ExtractUserKey(iter->key()).ToString());
1797   }
1798 }
1799 
TEST_P(BlockBasedTableTest,NoopTransformSeek)1800 TEST_P(BlockBasedTableTest, NoopTransformSeek) {
1801   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1802   table_options.filter_policy.reset(NewBloomFilterPolicy(10));
1803 
1804   Options options;
1805   options.comparator = BytewiseComparator();
1806   options.table_factory.reset(new BlockBasedTableFactory(table_options));
1807   options.prefix_extractor.reset(NewNoopTransform());
1808 
1809   TableConstructor c(options.comparator);
1810   // To tickle the PrefixMayMatch bug it is important that the
1811   // user-key is a single byte so that the index key exactly matches
1812   // the user-key.
1813   InternalKey key("a", 1, kTypeValue);
1814   c.Add(key.Encode().ToString(), "b");
1815   std::vector<std::string> keys;
1816   stl_wrappers::KVMap kvmap;
1817   const ImmutableCFOptions ioptions(options);
1818   const MutableCFOptions moptions(options);
1819   const InternalKeyComparator internal_comparator(options.comparator);
1820   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1821            &keys, &kvmap);
1822 
1823   auto* reader = c.GetTableReader();
1824   for (int i = 0; i < 2; ++i) {
1825     ReadOptions ro;
1826     ro.total_order_seek = (i == 0);
1827     std::unique_ptr<InternalIterator> iter(reader->NewIterator(
1828         ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
1829         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1830 
1831     iter->Seek(key.Encode());
1832     ASSERT_OK(iter->status());
1833     ASSERT_TRUE(iter->Valid());
1834     ASSERT_EQ("a", ExtractUserKey(iter->key()).ToString());
1835   }
1836 }
1837 
TEST_P(BlockBasedTableTest,SkipPrefixBloomFilter)1838 TEST_P(BlockBasedTableTest, SkipPrefixBloomFilter) {
1839   // if DB is opened with a prefix extractor of a different name,
1840   // prefix bloom is skipped when read the file
1841   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
1842   table_options.filter_policy.reset(NewBloomFilterPolicy(2));
1843   table_options.whole_key_filtering = false;
1844 
1845   Options options;
1846   options.comparator = BytewiseComparator();
1847   options.table_factory.reset(new BlockBasedTableFactory(table_options));
1848   options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1849 
1850   TableConstructor c(options.comparator);
1851   InternalKey key("abcdefghijk", 1, kTypeValue);
1852   c.Add(key.Encode().ToString(), "test");
1853   std::vector<std::string> keys;
1854   stl_wrappers::KVMap kvmap;
1855   const ImmutableCFOptions ioptions(options);
1856   const MutableCFOptions moptions(options);
1857   const InternalKeyComparator internal_comparator(options.comparator);
1858   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
1859            &keys, &kvmap);
1860   // TODO(Zhongyi): update test to use MutableCFOptions
1861   options.prefix_extractor.reset(NewFixedPrefixTransform(9));
1862   const ImmutableCFOptions new_ioptions(options);
1863   const MutableCFOptions new_moptions(options);
1864   c.Reopen(new_ioptions, new_moptions);
1865   auto reader = c.GetTableReader();
1866   std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
1867       ReadOptions(), new_moptions.prefix_extractor.get(), /*arena=*/nullptr,
1868       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1869 
1870   // Test point lookup
1871   // only one kv
1872   for (auto& kv : kvmap) {
1873     db_iter->Seek(kv.first);
1874     ASSERT_TRUE(db_iter->Valid());
1875     ASSERT_OK(db_iter->status());
1876     ASSERT_EQ(db_iter->key(), kv.first);
1877     ASSERT_EQ(db_iter->value(), kv.second);
1878   }
1879 }
1880 
RandomString(Random * rnd,int len)1881 static std::string RandomString(Random* rnd, int len) {
1882   std::string r;
1883   test::RandomString(rnd, len, &r);
1884   return r;
1885 }
1886 
AddInternalKey(TableConstructor * c,const std::string & prefix,std::string value="v",int=800)1887 void AddInternalKey(TableConstructor* c, const std::string& prefix,
1888                     std::string value = "v", int /*suffix_len*/ = 800) {
1889   static Random rnd(1023);
1890   InternalKey k(prefix + RandomString(&rnd, 800), 0, kTypeValue);
1891   c->Add(k.Encode().ToString(), value);
1892 }
1893 
IndexTest(BlockBasedTableOptions table_options)1894 void TableTest::IndexTest(BlockBasedTableOptions table_options) {
1895   TableConstructor c(BytewiseComparator());
1896 
1897   // keys with prefix length 3, make sure the key/value is big enough to fill
1898   // one block
1899   AddInternalKey(&c, "0015");
1900   AddInternalKey(&c, "0035");
1901 
1902   AddInternalKey(&c, "0054");
1903   AddInternalKey(&c, "0055");
1904 
1905   AddInternalKey(&c, "0056");
1906   AddInternalKey(&c, "0057");
1907 
1908   AddInternalKey(&c, "0058");
1909   AddInternalKey(&c, "0075");
1910 
1911   AddInternalKey(&c, "0076");
1912   AddInternalKey(&c, "0095");
1913 
1914   std::vector<std::string> keys;
1915   stl_wrappers::KVMap kvmap;
1916   Options options;
1917   options.prefix_extractor.reset(NewFixedPrefixTransform(3));
1918   table_options.block_size = 1700;
1919   table_options.block_cache = NewLRUCache(1024, 4);
1920   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1921 
1922   std::unique_ptr<InternalKeyComparator> comparator(
1923       new InternalKeyComparator(BytewiseComparator()));
1924   const ImmutableCFOptions ioptions(options);
1925   const MutableCFOptions moptions(options);
1926   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
1927            &kvmap);
1928   auto reader = c.GetTableReader();
1929 
1930   auto props = reader->GetTableProperties();
1931   ASSERT_EQ(5u, props->num_data_blocks);
1932 
1933   // TODO(Zhongyi): update test to use MutableCFOptions
1934   std::unique_ptr<InternalIterator> index_iter(reader->NewIterator(
1935       ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
1936       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
1937 
1938   // -- Find keys do not exist, but have common prefix.
1939   std::vector<std::string> prefixes = {"001", "003", "005", "007", "009"};
1940   std::vector<std::string> lower_bound = {
1941       keys[0], keys[1], keys[2], keys[7], keys[9],
1942   };
1943 
1944   // find the lower bound of the prefix
1945   for (size_t i = 0; i < prefixes.size(); ++i) {
1946     index_iter->Seek(InternalKey(prefixes[i], 0, kTypeValue).Encode());
1947     ASSERT_OK(index_iter->status());
1948     ASSERT_TRUE(index_iter->Valid());
1949 
1950     // seek the first element in the block
1951     ASSERT_EQ(lower_bound[i], index_iter->key().ToString());
1952     ASSERT_EQ("v", index_iter->value().ToString());
1953   }
1954 
1955   // find the upper bound of prefixes
1956   std::vector<std::string> upper_bound = {keys[1], keys[2], keys[7], keys[9], };
1957 
1958   // find existing keys
1959   for (const auto& item : kvmap) {
1960     auto ukey = ExtractUserKey(item.first).ToString();
1961     index_iter->Seek(ukey);
1962 
1963     // ASSERT_OK(regular_iter->status());
1964     ASSERT_OK(index_iter->status());
1965 
1966     // ASSERT_TRUE(regular_iter->Valid());
1967     ASSERT_TRUE(index_iter->Valid());
1968 
1969     ASSERT_EQ(item.first, index_iter->key().ToString());
1970     ASSERT_EQ(item.second, index_iter->value().ToString());
1971   }
1972 
1973   for (size_t i = 0; i < prefixes.size(); ++i) {
1974     // the key is greater than any existing keys.
1975     auto key = prefixes[i] + "9";
1976     index_iter->Seek(InternalKey(key, 0, kTypeValue).Encode());
1977 
1978     ASSERT_TRUE(index_iter->status().ok() || index_iter->status().IsNotFound());
1979     ASSERT_TRUE(!index_iter->status().IsNotFound() || !index_iter->Valid());
1980     if (i == prefixes.size() - 1) {
1981       // last key
1982       ASSERT_TRUE(!index_iter->Valid());
1983     } else {
1984       ASSERT_TRUE(index_iter->Valid());
1985       // seek the first element in the block
1986       ASSERT_EQ(upper_bound[i], index_iter->key().ToString());
1987       ASSERT_EQ("v", index_iter->value().ToString());
1988     }
1989   }
1990 
1991   // find keys with prefix that don't match any of the existing prefixes.
1992   std::vector<std::string> non_exist_prefixes = {"002", "004", "006", "008"};
1993   for (const auto& prefix : non_exist_prefixes) {
1994     index_iter->Seek(InternalKey(prefix, 0, kTypeValue).Encode());
1995     // regular_iter->Seek(prefix);
1996 
1997     ASSERT_OK(index_iter->status());
1998     // Seek to non-existing prefixes should yield either invalid, or a
1999     // key with prefix greater than the target.
2000     if (index_iter->Valid()) {
2001       Slice ukey = ExtractUserKey(index_iter->key());
2002       Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
2003       ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) < 0);
2004     }
2005   }
2006   for (const auto& prefix : non_exist_prefixes) {
2007     index_iter->SeekForPrev(InternalKey(prefix, 0, kTypeValue).Encode());
2008     // regular_iter->Seek(prefix);
2009 
2010     ASSERT_OK(index_iter->status());
2011     // Seek to non-existing prefixes should yield either invalid, or a
2012     // key with prefix greater than the target.
2013     if (index_iter->Valid()) {
2014       Slice ukey = ExtractUserKey(index_iter->key());
2015       Slice ukey_prefix = options.prefix_extractor->Transform(ukey);
2016       ASSERT_TRUE(BytewiseComparator()->Compare(prefix, ukey_prefix) > 0);
2017     }
2018   }
2019 
2020   {
2021     // Test reseek case. It should impact partitioned index more.
2022     ReadOptions ro;
2023     ro.total_order_seek = true;
2024     std::unique_ptr<InternalIterator> index_iter2(reader->NewIterator(
2025         ro, moptions.prefix_extractor.get(), /*arena=*/nullptr,
2026         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2027 
2028     // Things to cover in partitioned index:
2029     // 1. Both of Seek() and SeekToLast() has optimization to prevent
2030     //    rereek leaf index block if it remains to the same one, and
2031     //    they reuse the same variable.
2032     // 2. When Next() or Prev() is called, the block moves, so the
2033     //    optimization should kick in only with the current one.
2034     index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
2035     ASSERT_TRUE(index_iter2->Valid());
2036     ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));
2037 
2038     index_iter2->SeekToLast();
2039     ASSERT_TRUE(index_iter2->Valid());
2040     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2041 
2042     index_iter2->Seek(InternalKey("0055", 0, kTypeValue).Encode());
2043     ASSERT_TRUE(index_iter2->Valid());
2044     ASSERT_EQ("0055", index_iter2->key().ToString().substr(0, 4));
2045 
2046     index_iter2->SeekToLast();
2047     ASSERT_TRUE(index_iter2->Valid());
2048     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2049     index_iter2->Prev();
2050     ASSERT_TRUE(index_iter2->Valid());
2051     index_iter2->Prev();
2052     ASSERT_TRUE(index_iter2->Valid());
2053     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2054 
2055     index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
2056     ASSERT_TRUE(index_iter2->Valid());
2057     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2058     index_iter2->Prev();
2059     ASSERT_TRUE(index_iter2->Valid());
2060     index_iter2->Prev();
2061     ASSERT_TRUE(index_iter2->Valid());
2062     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2063 
2064     index_iter2->SeekToLast();
2065     ASSERT_TRUE(index_iter2->Valid());
2066     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2067 
2068     index_iter2->Seek(InternalKey("0095", 0, kTypeValue).Encode());
2069     ASSERT_TRUE(index_iter2->Valid());
2070     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2071 
2072     index_iter2->Prev();
2073     ASSERT_TRUE(index_iter2->Valid());
2074     index_iter2->Prev();
2075     ASSERT_TRUE(index_iter2->Valid());
2076     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2077 
2078     index_iter2->Seek(InternalKey("0075", 0, kTypeValue).Encode());
2079     ASSERT_TRUE(index_iter2->Valid());
2080     ASSERT_EQ("0075", index_iter2->key().ToString().substr(0, 4));
2081 
2082     index_iter2->Next();
2083     ASSERT_TRUE(index_iter2->Valid());
2084     index_iter2->Next();
2085     ASSERT_TRUE(index_iter2->Valid());
2086     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2087 
2088     index_iter2->SeekToLast();
2089     ASSERT_TRUE(index_iter2->Valid());
2090     ASSERT_EQ("0095", index_iter2->key().ToString().substr(0, 4));
2091   }
2092 
2093   c.ResetTableReader();
2094 }
2095 
TEST_P(BlockBasedTableTest,BinaryIndexTest)2096 TEST_P(BlockBasedTableTest, BinaryIndexTest) {
2097   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2098   table_options.index_type = BlockBasedTableOptions::kBinarySearch;
2099   IndexTest(table_options);
2100 }
2101 
TEST_P(BlockBasedTableTest,HashIndexTest)2102 TEST_P(BlockBasedTableTest, HashIndexTest) {
2103   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2104   table_options.index_type = BlockBasedTableOptions::kHashSearch;
2105   IndexTest(table_options);
2106 }
2107 
TEST_P(BlockBasedTableTest,PartitionIndexTest)2108 TEST_P(BlockBasedTableTest, PartitionIndexTest) {
2109   const int max_index_keys = 5;
2110   const int est_max_index_key_value_size = 32;
2111   const int est_max_index_size = max_index_keys * est_max_index_key_value_size;
2112   for (int i = 1; i <= est_max_index_size + 1; i++) {
2113     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2114     table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
2115     table_options.metadata_block_size = i;
2116     IndexTest(table_options);
2117   }
2118 }
2119 
TEST_P(BlockBasedTableTest,IndexSeekOptimizationIncomplete)2120 TEST_P(BlockBasedTableTest, IndexSeekOptimizationIncomplete) {
2121   std::unique_ptr<InternalKeyComparator> comparator(
2122       new InternalKeyComparator(BytewiseComparator()));
2123   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2124   Options options;
2125   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2126   const ImmutableCFOptions ioptions(options);
2127   const MutableCFOptions moptions(options);
2128 
2129   TableConstructor c(BytewiseComparator());
2130   AddInternalKey(&c, "pika");
2131 
2132   std::vector<std::string> keys;
2133   stl_wrappers::KVMap kvmap;
2134   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2135            &kvmap);
2136   ASSERT_EQ(1, keys.size());
2137 
2138   auto reader = c.GetTableReader();
2139   ReadOptions ropt;
2140   ropt.read_tier = ReadTier::kBlockCacheTier;
2141   std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2142       ropt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2143       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2144 
2145   auto ikey = [](Slice user_key) {
2146     return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2147   };
2148 
2149   iter->Seek(ikey("pika"));
2150   ASSERT_FALSE(iter->Valid());
2151   ASSERT_TRUE(iter->status().IsIncomplete());
2152 
2153   // This used to crash at some point.
2154   iter->Seek(ikey("pika"));
2155   ASSERT_FALSE(iter->Valid());
2156   ASSERT_TRUE(iter->status().IsIncomplete());
2157 }
2158 
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKey1)2159 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey1) {
2160   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2161   table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
2162   IndexTest(table_options);
2163 }
2164 
2165 class CustomFlushBlockPolicy : public FlushBlockPolicyFactory,
2166                                public FlushBlockPolicy {
2167  public:
CustomFlushBlockPolicy(std::vector<int> keys_per_block)2168   explicit CustomFlushBlockPolicy(std::vector<int> keys_per_block)
2169       : keys_per_block_(keys_per_block) {}
2170 
Name() const2171   const char* Name() const override { return "table_test"; }
NewFlushBlockPolicy(const BlockBasedTableOptions &,const BlockBuilder &) const2172   FlushBlockPolicy* NewFlushBlockPolicy(const BlockBasedTableOptions&,
2173                                         const BlockBuilder&) const override {
2174     return new CustomFlushBlockPolicy(keys_per_block_);
2175   }
2176 
Update(const Slice &,const Slice &)2177   bool Update(const Slice&, const Slice&) override {
2178     if (keys_in_current_block_ >= keys_per_block_.at(current_block_idx_)) {
2179       ++current_block_idx_;
2180       keys_in_current_block_ = 1;
2181       return true;
2182     }
2183 
2184     ++keys_in_current_block_;
2185     return false;
2186   }
2187 
2188   std::vector<int> keys_per_block_;
2189 
2190   int current_block_idx_ = 0;
2191   int keys_in_current_block_ = 0;
2192 };
2193 
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKey2)2194 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKey2) {
2195   for (int use_first_key = 0; use_first_key < 2; ++use_first_key) {
2196     SCOPED_TRACE("use_first_key = " + std::to_string(use_first_key));
2197     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2198     table_options.index_type =
2199         use_first_key ? BlockBasedTableOptions::kBinarySearchWithFirstKey
2200                       : BlockBasedTableOptions::kBinarySearch;
2201     table_options.block_cache = NewLRUCache(10000);  // fits all blocks
2202     table_options.index_shortening =
2203         BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
2204     table_options.flush_block_policy_factory =
2205         std::make_shared<CustomFlushBlockPolicy>(std::vector<int>{2, 1, 3, 2});
2206     Options options;
2207     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2208     options.statistics = CreateDBStatistics();
2209     Statistics* stats = options.statistics.get();
2210     std::unique_ptr<InternalKeyComparator> comparator(
2211         new InternalKeyComparator(BytewiseComparator()));
2212     const ImmutableCFOptions ioptions(options);
2213     const MutableCFOptions moptions(options);
2214 
2215     TableConstructor c(BytewiseComparator());
2216 
2217     // Block 0.
2218     AddInternalKey(&c, "aaaa", "v0");
2219     AddInternalKey(&c, "aaac", "v1");
2220 
2221     // Block 1.
2222     AddInternalKey(&c, "aaca", "v2");
2223 
2224     // Block 2.
2225     AddInternalKey(&c, "caaa", "v3");
2226     AddInternalKey(&c, "caac", "v4");
2227     AddInternalKey(&c, "caae", "v5");
2228 
2229     // Block 3.
2230     AddInternalKey(&c, "ccaa", "v6");
2231     AddInternalKey(&c, "ccac", "v7");
2232 
2233     // Write the file.
2234     std::vector<std::string> keys;
2235     stl_wrappers::KVMap kvmap;
2236     c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2237              &kvmap);
2238     ASSERT_EQ(8, keys.size());
2239 
2240     auto reader = c.GetTableReader();
2241     auto props = reader->GetTableProperties();
2242     ASSERT_EQ(4u, props->num_data_blocks);
2243     std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2244         ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2245         /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2246 
2247     // Shouldn't have read data blocks before iterator is seeked.
2248     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2249     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2250 
2251     auto ikey = [](Slice user_key) {
2252       return InternalKey(user_key, 0, kTypeValue).Encode().ToString();
2253     };
2254 
2255     // Seek to a key between blocks. If index contains first key, we shouldn't
2256     // read any data blocks until value is requested.
2257     iter->Seek(ikey("aaba"));
2258     ASSERT_TRUE(iter->Valid());
2259     EXPECT_EQ(keys[2], iter->key().ToString());
2260     EXPECT_EQ(use_first_key ? 0 : 1,
2261               stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2262     EXPECT_EQ("v2", iter->value().ToString());
2263     EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2264     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2265 
2266     // Seek to the middle of a block. The block should be read right away.
2267     iter->Seek(ikey("caab"));
2268     ASSERT_TRUE(iter->Valid());
2269     EXPECT_EQ(keys[4], iter->key().ToString());
2270     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2271     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2272     EXPECT_EQ("v4", iter->value().ToString());
2273     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2274 
2275     // Seek to just before the same block and don't access value.
2276     // The iterator should keep pinning the block contents.
2277     iter->Seek(ikey("baaa"));
2278     ASSERT_TRUE(iter->Valid());
2279     EXPECT_EQ(keys[3], iter->key().ToString());
2280     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2281 
2282     // Seek to the same block again to check that the block is still pinned.
2283     iter->Seek(ikey("caae"));
2284     ASSERT_TRUE(iter->Valid());
2285     EXPECT_EQ(keys[5], iter->key().ToString());
2286     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2287     EXPECT_EQ("v5", iter->value().ToString());
2288     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2289     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2290 
2291     // Step forward and fall through to the next block. Don't access value.
2292     iter->Next();
2293     ASSERT_TRUE(iter->Valid());
2294     EXPECT_EQ(keys[6], iter->key().ToString());
2295     EXPECT_EQ(use_first_key ? 2 : 3,
2296               stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2297     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2298 
2299     // Step forward again. Block should be read.
2300     iter->Next();
2301     ASSERT_TRUE(iter->Valid());
2302     EXPECT_EQ(keys[7], iter->key().ToString());
2303     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2304     EXPECT_EQ("v7", iter->value().ToString());
2305     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2306 
2307     // Step forward and reach the end.
2308     iter->Next();
2309     EXPECT_FALSE(iter->Valid());
2310     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2311     EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2312 
2313     // Seek to a single-key block and step forward without accessing value.
2314     iter->Seek(ikey("aaca"));
2315     ASSERT_TRUE(iter->Valid());
2316     EXPECT_EQ(keys[2], iter->key().ToString());
2317     EXPECT_EQ(use_first_key ? 0 : 1,
2318               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2319 
2320     iter->Next();
2321     ASSERT_TRUE(iter->Valid());
2322     EXPECT_EQ(keys[3], iter->key().ToString());
2323     EXPECT_EQ(use_first_key ? 1 : 2,
2324               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2325     EXPECT_EQ("v3", iter->value().ToString());
2326     EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2327     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2328 
2329     // Seek between blocks and step back without accessing value.
2330     iter->Seek(ikey("aaca"));
2331     ASSERT_TRUE(iter->Valid());
2332     EXPECT_EQ(keys[2], iter->key().ToString());
2333     EXPECT_EQ(use_first_key ? 2 : 3,
2334               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2335     EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2336 
2337     iter->Prev();
2338     ASSERT_TRUE(iter->Valid());
2339     EXPECT_EQ(keys[1], iter->key().ToString());
2340     EXPECT_EQ(use_first_key ? 2 : 3,
2341               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2342     // All blocks are in cache now, there'll be no more misses ever.
2343     EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2344     EXPECT_EQ("v1", iter->value().ToString());
2345 
2346     // Next into the next block again.
2347     iter->Next();
2348     ASSERT_TRUE(iter->Valid());
2349     EXPECT_EQ(keys[2], iter->key().ToString());
2350     EXPECT_EQ(use_first_key ? 2 : 4,
2351               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2352 
2353     // Seek to first and step back without accessing value.
2354     iter->SeekToFirst();
2355     ASSERT_TRUE(iter->Valid());
2356     EXPECT_EQ(keys[0], iter->key().ToString());
2357     EXPECT_EQ(use_first_key ? 2 : 5,
2358               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2359 
2360     iter->Prev();
2361     EXPECT_FALSE(iter->Valid());
2362     EXPECT_EQ(use_first_key ? 2 : 5,
2363               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2364 
2365     // Do some SeekForPrev() and SeekToLast() just to cover all methods.
2366     iter->SeekForPrev(ikey("caad"));
2367     ASSERT_TRUE(iter->Valid());
2368     EXPECT_EQ(keys[4], iter->key().ToString());
2369     EXPECT_EQ(use_first_key ? 3 : 6,
2370               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2371     EXPECT_EQ("v4", iter->value().ToString());
2372     EXPECT_EQ(use_first_key ? 3 : 6,
2373               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2374 
2375     iter->SeekToLast();
2376     ASSERT_TRUE(iter->Valid());
2377     EXPECT_EQ(keys[7], iter->key().ToString());
2378     EXPECT_EQ(use_first_key ? 4 : 7,
2379               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2380     EXPECT_EQ("v7", iter->value().ToString());
2381     EXPECT_EQ(use_first_key ? 4 : 7,
2382               stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2383 
2384     EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2385 
2386     c.ResetTableReader();
2387   }
2388 }
2389 
TEST_P(BlockBasedTableTest,BinaryIndexWithFirstKeyGlobalSeqno)2390 TEST_P(BlockBasedTableTest, BinaryIndexWithFirstKeyGlobalSeqno) {
2391   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2392   table_options.index_type = BlockBasedTableOptions::kBinarySearchWithFirstKey;
2393   table_options.block_cache = NewLRUCache(10000);
2394   Options options;
2395   options.statistics = CreateDBStatistics();
2396   Statistics* stats = options.statistics.get();
2397   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2398   std::unique_ptr<InternalKeyComparator> comparator(
2399       new InternalKeyComparator(BytewiseComparator()));
2400   const ImmutableCFOptions ioptions(options);
2401   const MutableCFOptions moptions(options);
2402 
2403   TableConstructor c(BytewiseComparator(), /* convert_to_internal_key */ false,
2404                      /* level */ -1, /* largest_seqno */ 42);
2405 
2406   c.Add(InternalKey("b", 0, kTypeValue).Encode().ToString(), "x");
2407   c.Add(InternalKey("c", 0, kTypeValue).Encode().ToString(), "y");
2408 
2409   std::vector<std::string> keys;
2410   stl_wrappers::KVMap kvmap;
2411   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
2412            &kvmap);
2413   ASSERT_EQ(2, keys.size());
2414 
2415   auto reader = c.GetTableReader();
2416   auto props = reader->GetTableProperties();
2417   ASSERT_EQ(1u, props->num_data_blocks);
2418   std::unique_ptr<InternalIterator> iter(reader->NewIterator(
2419       ReadOptions(), /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
2420       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
2421 
2422   iter->Seek(InternalKey("a", 0, kTypeValue).Encode().ToString());
2423   ASSERT_TRUE(iter->Valid());
2424   EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
2425             iter->key().ToString());
2426   EXPECT_NE(keys[0], iter->key().ToString());
2427   // Key should have been served from index, without reading data blocks.
2428   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2429 
2430   EXPECT_EQ("x", iter->value().ToString());
2431   EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
2432   EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
2433   EXPECT_EQ(InternalKey("b", 42, kTypeValue).Encode().ToString(),
2434             iter->key().ToString());
2435 
2436   c.ResetTableReader();
2437 }
2438 
2439 // It's very hard to figure out the index block size of a block accurately.
2440 // To make sure we get the index size, we just make sure as key number
2441 // grows, the filter block size also grows.
TEST_P(BlockBasedTableTest,IndexSizeStat)2442 TEST_P(BlockBasedTableTest, IndexSizeStat) {
2443   uint64_t last_index_size = 0;
2444 
2445   // we need to use random keys since the pure human readable texts
2446   // may be well compressed, resulting insignifcant change of index
2447   // block size.
2448   Random rnd(test::RandomSeed());
2449   std::vector<std::string> keys;
2450 
2451   for (int i = 0; i < 100; ++i) {
2452     keys.push_back(RandomString(&rnd, 10000));
2453   }
2454 
2455   // Each time we load one more key to the table. the table index block
2456   // size is expected to be larger than last time's.
2457   for (size_t i = 1; i < keys.size(); ++i) {
2458     TableConstructor c(BytewiseComparator(),
2459                        true /* convert_to_internal_key_ */);
2460     for (size_t j = 0; j < i; ++j) {
2461       c.Add(keys[j], "val");
2462     }
2463 
2464     std::vector<std::string> ks;
2465     stl_wrappers::KVMap kvmap;
2466     Options options;
2467     options.compression = kNoCompression;
2468     BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2469     table_options.block_restart_interval = 1;
2470     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2471 
2472     const ImmutableCFOptions ioptions(options);
2473     const MutableCFOptions moptions(options);
2474     c.Finish(options, ioptions, moptions, table_options,
2475              GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2476     auto index_size = c.GetTableReader()->GetTableProperties()->index_size;
2477     ASSERT_GT(index_size, last_index_size);
2478     last_index_size = index_size;
2479     c.ResetTableReader();
2480   }
2481 }
2482 
TEST_P(BlockBasedTableTest,NumBlockStat)2483 TEST_P(BlockBasedTableTest, NumBlockStat) {
2484   Random rnd(test::RandomSeed());
2485   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2486   Options options;
2487   options.compression = kNoCompression;
2488   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2489   table_options.block_restart_interval = 1;
2490   table_options.block_size = 1000;
2491   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2492 
2493   for (int i = 0; i < 10; ++i) {
2494     // the key/val are slightly smaller than block size, so that each block
2495     // holds roughly one key/value pair.
2496     c.Add(RandomString(&rnd, 900), "val");
2497   }
2498 
2499   std::vector<std::string> ks;
2500   stl_wrappers::KVMap kvmap;
2501   const ImmutableCFOptions ioptions(options);
2502   const MutableCFOptions moptions(options);
2503   c.Finish(options, ioptions, moptions, table_options,
2504            GetPlainInternalComparator(options.comparator), &ks, &kvmap);
2505   ASSERT_EQ(kvmap.size(),
2506             c.GetTableReader()->GetTableProperties()->num_data_blocks);
2507   c.ResetTableReader();
2508 }
2509 
TEST_P(BlockBasedTableTest,TracingGetTest)2510 TEST_P(BlockBasedTableTest, TracingGetTest) {
2511   TableConstructor c(BytewiseComparator());
2512   Options options;
2513   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2514   options.create_if_missing = true;
2515   table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2516   table_options.cache_index_and_filter_blocks = true;
2517   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2518   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2519   SetupTracingTest(&c);
2520   std::vector<std::string> keys;
2521   stl_wrappers::KVMap kvmap;
2522   ImmutableCFOptions ioptions(options);
2523   MutableCFOptions moptions(options);
2524   c.Finish(options, ioptions, moptions, table_options,
2525            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2526   std::string user_key = "k01";
2527   InternalKey internal_key(user_key, 0, kTypeValue);
2528   std::string encoded_key = internal_key.Encode().ToString();
2529   for (uint32_t i = 1; i <= 2; i++) {
2530     PinnableSlice value;
2531     GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2532                            GetContext::kNotFound, user_key, &value, nullptr,
2533                            nullptr, true, nullptr, nullptr, nullptr, nullptr,
2534                            nullptr, nullptr, /*tracing_get_id=*/i);
2535     get_perf_context()->Reset();
2536     ASSERT_OK(c.GetTableReader()->Get(ReadOptions(), encoded_key, &get_context,
2537                                       moptions.prefix_extractor.get()));
2538     ASSERT_EQ(get_context.State(), GetContext::kFound);
2539     ASSERT_EQ(value.ToString(), kDummyValue);
2540   }
2541 
2542   // Verify traces.
2543   std::vector<BlockCacheTraceRecord> expected_records;
2544   // The first two records should be prefetching index and filter blocks.
2545   BlockCacheTraceRecord record;
2546   record.block_type = TraceType::kBlockTraceIndexBlock;
2547   record.caller = TableReaderCaller::kPrefetch;
2548   record.is_cache_hit = Boolean::kFalse;
2549   record.no_insert = Boolean::kFalse;
2550   expected_records.push_back(record);
2551   record.block_type = TraceType::kBlockTraceFilterBlock;
2552   expected_records.push_back(record);
2553   // Then we should have three records for one index, one filter, and one data
2554   // block access.
2555   record.get_id = 1;
2556   record.block_type = TraceType::kBlockTraceIndexBlock;
2557   record.caller = TableReaderCaller::kUserGet;
2558   record.get_from_user_specified_snapshot = Boolean::kFalse;
2559   record.referenced_key = encoded_key;
2560   record.referenced_key_exist_in_block = Boolean::kTrue;
2561   record.is_cache_hit = Boolean::kTrue;
2562   expected_records.push_back(record);
2563   record.block_type = TraceType::kBlockTraceFilterBlock;
2564   expected_records.push_back(record);
2565   record.is_cache_hit = Boolean::kFalse;
2566   record.block_type = TraceType::kBlockTraceDataBlock;
2567   expected_records.push_back(record);
2568   // The second get should all observe cache hits.
2569   record.is_cache_hit = Boolean::kTrue;
2570   record.get_id = 2;
2571   record.block_type = TraceType::kBlockTraceIndexBlock;
2572   record.caller = TableReaderCaller::kUserGet;
2573   record.get_from_user_specified_snapshot = Boolean::kFalse;
2574   record.referenced_key = encoded_key;
2575   expected_records.push_back(record);
2576   record.block_type = TraceType::kBlockTraceFilterBlock;
2577   expected_records.push_back(record);
2578   record.block_type = TraceType::kBlockTraceDataBlock;
2579   expected_records.push_back(record);
2580   VerifyBlockAccessTrace(&c, expected_records);
2581   c.ResetTableReader();
2582 }
2583 
TEST_P(BlockBasedTableTest,TracingApproximateOffsetOfTest)2584 TEST_P(BlockBasedTableTest, TracingApproximateOffsetOfTest) {
2585   TableConstructor c(BytewiseComparator());
2586   Options options;
2587   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2588   options.create_if_missing = true;
2589   table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2590   table_options.cache_index_and_filter_blocks = true;
2591   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2592   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2593   SetupTracingTest(&c);
2594   std::vector<std::string> keys;
2595   stl_wrappers::KVMap kvmap;
2596   ImmutableCFOptions ioptions(options);
2597   MutableCFOptions moptions(options);
2598   c.Finish(options, ioptions, moptions, table_options,
2599            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2600   for (uint32_t i = 1; i <= 2; i++) {
2601     std::string user_key = "k01";
2602     InternalKey internal_key(user_key, 0, kTypeValue);
2603     std::string encoded_key = internal_key.Encode().ToString();
2604     c.GetTableReader()->ApproximateOffsetOf(
2605         encoded_key, TableReaderCaller::kUserApproximateSize);
2606   }
2607   // Verify traces.
2608   std::vector<BlockCacheTraceRecord> expected_records;
2609   // The first two records should be prefetching index and filter blocks.
2610   BlockCacheTraceRecord record;
2611   record.block_type = TraceType::kBlockTraceIndexBlock;
2612   record.caller = TableReaderCaller::kPrefetch;
2613   record.is_cache_hit = Boolean::kFalse;
2614   record.no_insert = Boolean::kFalse;
2615   expected_records.push_back(record);
2616   record.block_type = TraceType::kBlockTraceFilterBlock;
2617   expected_records.push_back(record);
2618   // Then we should have two records for only index blocks.
2619   record.block_type = TraceType::kBlockTraceIndexBlock;
2620   record.caller = TableReaderCaller::kUserApproximateSize;
2621   record.is_cache_hit = Boolean::kTrue;
2622   expected_records.push_back(record);
2623   expected_records.push_back(record);
2624   VerifyBlockAccessTrace(&c, expected_records);
2625   c.ResetTableReader();
2626 }
2627 
TEST_P(BlockBasedTableTest,TracingIterator)2628 TEST_P(BlockBasedTableTest, TracingIterator) {
2629   TableConstructor c(BytewiseComparator());
2630   Options options;
2631   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2632   options.create_if_missing = true;
2633   table_options.block_cache = NewLRUCache(1024 * 1024, 0);
2634   table_options.cache_index_and_filter_blocks = true;
2635   table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
2636   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2637   SetupTracingTest(&c);
2638   std::vector<std::string> keys;
2639   stl_wrappers::KVMap kvmap;
2640   ImmutableCFOptions ioptions(options);
2641   MutableCFOptions moptions(options);
2642   c.Finish(options, ioptions, moptions, table_options,
2643            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2644 
2645   for (uint32_t i = 1; i <= 2; i++) {
2646     std::unique_ptr<InternalIterator> iter(c.GetTableReader()->NewIterator(
2647         ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
2648         /*skip_filters=*/false, TableReaderCaller::kUserIterator));
2649     iter->SeekToFirst();
2650     while (iter->Valid()) {
2651       iter->key();
2652       iter->value();
2653       iter->Next();
2654     }
2655     ASSERT_OK(iter->status());
2656     iter.reset();
2657   }
2658 
2659   // Verify traces.
2660   std::vector<BlockCacheTraceRecord> expected_records;
2661   // The first two records should be prefetching index and filter blocks.
2662   BlockCacheTraceRecord record;
2663   record.block_type = TraceType::kBlockTraceIndexBlock;
2664   record.caller = TableReaderCaller::kPrefetch;
2665   record.is_cache_hit = Boolean::kFalse;
2666   record.no_insert = Boolean::kFalse;
2667   expected_records.push_back(record);
2668   record.block_type = TraceType::kBlockTraceFilterBlock;
2669   expected_records.push_back(record);
2670   // Then we should have three records for index and two data block access.
2671   record.block_type = TraceType::kBlockTraceIndexBlock;
2672   record.caller = TableReaderCaller::kUserIterator;
2673   record.is_cache_hit = Boolean::kTrue;
2674   expected_records.push_back(record);
2675   record.block_type = TraceType::kBlockTraceDataBlock;
2676   record.is_cache_hit = Boolean::kFalse;
2677   expected_records.push_back(record);
2678   expected_records.push_back(record);
2679   // When we iterate this file for the second time, we should observe all cache
2680   // hits.
2681   record.block_type = TraceType::kBlockTraceIndexBlock;
2682   record.is_cache_hit = Boolean::kTrue;
2683   expected_records.push_back(record);
2684   record.block_type = TraceType::kBlockTraceDataBlock;
2685   expected_records.push_back(record);
2686   expected_records.push_back(record);
2687   VerifyBlockAccessTrace(&c, expected_records);
2688   c.ResetTableReader();
2689 }
2690 
2691 // A simple tool that takes the snapshot of block cache statistics.
2692 class BlockCachePropertiesSnapshot {
2693  public:
BlockCachePropertiesSnapshot(Statistics * statistics)2694   explicit BlockCachePropertiesSnapshot(Statistics* statistics) {
2695     block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_MISS);
2696     block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_HIT);
2697     index_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS);
2698     index_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT);
2699     data_block_cache_miss = statistics->getTickerCount(BLOCK_CACHE_DATA_MISS);
2700     data_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_DATA_HIT);
2701     filter_block_cache_miss =
2702         statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS);
2703     filter_block_cache_hit = statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT);
2704     block_cache_bytes_read = statistics->getTickerCount(BLOCK_CACHE_BYTES_READ);
2705     block_cache_bytes_write =
2706         statistics->getTickerCount(BLOCK_CACHE_BYTES_WRITE);
2707   }
2708 
AssertIndexBlockStat(int64_t expected_index_block_cache_miss,int64_t expected_index_block_cache_hit)2709   void AssertIndexBlockStat(int64_t expected_index_block_cache_miss,
2710                             int64_t expected_index_block_cache_hit) {
2711     ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
2712     ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2713   }
2714 
AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,int64_t expected_filter_block_cache_hit)2715   void AssertFilterBlockStat(int64_t expected_filter_block_cache_miss,
2716                              int64_t expected_filter_block_cache_hit) {
2717     ASSERT_EQ(expected_filter_block_cache_miss, filter_block_cache_miss);
2718     ASSERT_EQ(expected_filter_block_cache_hit, filter_block_cache_hit);
2719   }
2720 
2721   // Check if the fetched props matches the expected ones.
2722   // TODO(kailiu) Use this only when you disabled filter policy!
AssertEqual(int64_t expected_index_block_cache_miss,int64_t expected_index_block_cache_hit,int64_t expected_data_block_cache_miss,int64_t expected_data_block_cache_hit) const2723   void AssertEqual(int64_t expected_index_block_cache_miss,
2724                    int64_t expected_index_block_cache_hit,
2725                    int64_t expected_data_block_cache_miss,
2726                    int64_t expected_data_block_cache_hit) const {
2727     ASSERT_EQ(expected_index_block_cache_miss, index_block_cache_miss);
2728     ASSERT_EQ(expected_index_block_cache_hit, index_block_cache_hit);
2729     ASSERT_EQ(expected_data_block_cache_miss, data_block_cache_miss);
2730     ASSERT_EQ(expected_data_block_cache_hit, data_block_cache_hit);
2731     ASSERT_EQ(expected_index_block_cache_miss + expected_data_block_cache_miss,
2732               block_cache_miss);
2733     ASSERT_EQ(expected_index_block_cache_hit + expected_data_block_cache_hit,
2734               block_cache_hit);
2735   }
2736 
GetCacheBytesRead()2737   int64_t GetCacheBytesRead() { return block_cache_bytes_read; }
2738 
GetCacheBytesWrite()2739   int64_t GetCacheBytesWrite() { return block_cache_bytes_write; }
2740 
2741  private:
2742   int64_t block_cache_miss = 0;
2743   int64_t block_cache_hit = 0;
2744   int64_t index_block_cache_miss = 0;
2745   int64_t index_block_cache_hit = 0;
2746   int64_t data_block_cache_miss = 0;
2747   int64_t data_block_cache_hit = 0;
2748   int64_t filter_block_cache_miss = 0;
2749   int64_t filter_block_cache_hit = 0;
2750   int64_t block_cache_bytes_read = 0;
2751   int64_t block_cache_bytes_write = 0;
2752 };
2753 
2754 // Make sure, by default, index/filter blocks were pre-loaded (meaning we won't
2755 // use block cache to store them).
TEST_P(BlockBasedTableTest,BlockCacheDisabledTest)2756 TEST_P(BlockBasedTableTest, BlockCacheDisabledTest) {
2757   Options options;
2758   options.create_if_missing = true;
2759   options.statistics = CreateDBStatistics();
2760   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2761   table_options.block_cache = NewLRUCache(1024, 4);
2762   table_options.filter_policy.reset(NewBloomFilterPolicy(10));
2763   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2764   std::vector<std::string> keys;
2765   stl_wrappers::KVMap kvmap;
2766 
2767   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2768   c.Add("key", "value");
2769   const ImmutableCFOptions ioptions(options);
2770   const MutableCFOptions moptions(options);
2771   c.Finish(options, ioptions, moptions, table_options,
2772            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2773 
2774   // preloading filter/index blocks is enabled.
2775   auto reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2776   ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2777   ASSERT_FALSE(reader->TEST_IndexBlockInCache());
2778 
2779   {
2780     // nothing happens in the beginning
2781     BlockCachePropertiesSnapshot props(options.statistics.get());
2782     props.AssertIndexBlockStat(0, 0);
2783     props.AssertFilterBlockStat(0, 0);
2784   }
2785 
2786   {
2787     GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2788                            GetContext::kNotFound, Slice(), nullptr, nullptr,
2789                            nullptr, true, nullptr, nullptr);
2790     // a hack that just to trigger BlockBasedTable::GetFilter.
2791     reader->Get(ReadOptions(), "non-exist-key", &get_context,
2792                 moptions.prefix_extractor.get());
2793     BlockCachePropertiesSnapshot props(options.statistics.get());
2794     props.AssertIndexBlockStat(0, 0);
2795     props.AssertFilterBlockStat(0, 0);
2796   }
2797 }
2798 
2799 // Due to the difficulities of the intersaction between statistics, this test
2800 // only tests the case when "index block is put to block cache"
TEST_P(BlockBasedTableTest,FilterBlockInBlockCache)2801 TEST_P(BlockBasedTableTest, FilterBlockInBlockCache) {
2802   // -- Table construction
2803   Options options;
2804   options.create_if_missing = true;
2805   options.statistics = CreateDBStatistics();
2806 
2807   // Enable the cache for index/filter blocks
2808   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
2809   LRUCacheOptions co;
2810   co.capacity = 2048;
2811   co.num_shard_bits = 2;
2812   co.metadata_charge_policy = kDontChargeCacheMetadata;
2813   table_options.block_cache = NewLRUCache(co);
2814   table_options.cache_index_and_filter_blocks = true;
2815   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2816   std::vector<std::string> keys;
2817   stl_wrappers::KVMap kvmap;
2818 
2819   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
2820   c.Add("key", "value");
2821   const ImmutableCFOptions ioptions(options);
2822   const MutableCFOptions moptions(options);
2823   c.Finish(options, ioptions, moptions, table_options,
2824            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2825   // preloading filter/index blocks is prohibited.
2826   auto* reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
2827   ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2828   ASSERT_TRUE(reader->TEST_IndexBlockInCache());
2829 
2830   // -- PART 1: Open with regular block cache.
2831   // Since block_cache is disabled, no cache activities will be involved.
2832   std::unique_ptr<InternalIterator> iter;
2833 
2834   int64_t last_cache_bytes_read = 0;
2835   // At first, no block will be accessed.
2836   {
2837     BlockCachePropertiesSnapshot props(options.statistics.get());
2838     // index will be added to block cache.
2839     props.AssertEqual(1,  // index block miss
2840                       0, 0, 0);
2841     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2842     ASSERT_EQ(props.GetCacheBytesWrite(),
2843               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2844     last_cache_bytes_read = props.GetCacheBytesRead();
2845   }
2846 
2847   // Only index block will be accessed
2848   {
2849     iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2850     BlockCachePropertiesSnapshot props(options.statistics.get());
2851     // NOTE: to help better highlight the "detla" of each ticker, I use
2852     // <last_value> + <added_value> to indicate the increment of changed
2853     // value; other numbers remain the same.
2854     props.AssertEqual(1, 0 + 1,  // index block hit
2855                       0, 0);
2856     // Cache hit, bytes read from cache should increase
2857     ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2858     ASSERT_EQ(props.GetCacheBytesWrite(),
2859               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2860     last_cache_bytes_read = props.GetCacheBytesRead();
2861   }
2862 
2863   // Only data block will be accessed
2864   {
2865     iter->SeekToFirst();
2866     BlockCachePropertiesSnapshot props(options.statistics.get());
2867     props.AssertEqual(1, 1, 0 + 1,  // data block miss
2868                       0);
2869     // Cache miss, Bytes read from cache should not change
2870     ASSERT_EQ(props.GetCacheBytesRead(), last_cache_bytes_read);
2871     ASSERT_EQ(props.GetCacheBytesWrite(),
2872               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2873     last_cache_bytes_read = props.GetCacheBytesRead();
2874   }
2875 
2876   // Data block will be in cache
2877   {
2878     iter.reset(c.NewIterator(moptions.prefix_extractor.get()));
2879     iter->SeekToFirst();
2880     BlockCachePropertiesSnapshot props(options.statistics.get());
2881     props.AssertEqual(1, 1 + 1, /* index block hit */
2882                       1, 0 + 1 /* data block hit */);
2883     // Cache hit, bytes read from cache should increase
2884     ASSERT_GT(props.GetCacheBytesRead(), last_cache_bytes_read);
2885     ASSERT_EQ(props.GetCacheBytesWrite(),
2886               static_cast<int64_t>(table_options.block_cache->GetUsage()));
2887   }
2888   // release the iterator so that the block cache can reset correctly.
2889   iter.reset();
2890 
2891   c.ResetTableReader();
2892 
2893   // -- PART 2: Open with very small block cache
2894   // In this test, no block will ever get hit since the block cache is
2895   // too small to fit even one entry.
2896   table_options.block_cache = NewLRUCache(1, 4);
2897   options.statistics = CreateDBStatistics();
2898   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2899   const ImmutableCFOptions ioptions2(options);
2900   const MutableCFOptions moptions2(options);
2901   c.Reopen(ioptions2, moptions2);
2902   {
2903     BlockCachePropertiesSnapshot props(options.statistics.get());
2904     props.AssertEqual(1,  // index block miss
2905                       0, 0, 0);
2906     // Cache miss, Bytes read from cache should not change
2907     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2908   }
2909 
2910   {
2911     // Both index and data block get accessed.
2912     // It first cache index block then data block. But since the cache size
2913     // is only 1, index block will be purged after data block is inserted.
2914     iter.reset(c.NewIterator(moptions2.prefix_extractor.get()));
2915     BlockCachePropertiesSnapshot props(options.statistics.get());
2916     props.AssertEqual(1 + 1,  // index block miss
2917                       0, 0,   // data block miss
2918                       0);
2919     // Cache hit, bytes read from cache should increase
2920     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2921   }
2922 
2923   {
2924     // SeekToFirst() accesses data block. With similar reason, we expect data
2925     // block's cache miss.
2926     iter->SeekToFirst();
2927     BlockCachePropertiesSnapshot props(options.statistics.get());
2928     props.AssertEqual(2, 0, 0 + 1,  // data block miss
2929                       0);
2930     // Cache miss, Bytes read from cache should not change
2931     ASSERT_EQ(props.GetCacheBytesRead(), 0);
2932   }
2933   iter.reset();
2934   c.ResetTableReader();
2935 
2936   // -- PART 3: Open table with bloom filter enabled but not in SST file
2937   table_options.block_cache = NewLRUCache(4096, 4);
2938   table_options.cache_index_and_filter_blocks = false;
2939   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2940 
2941   TableConstructor c3(BytewiseComparator());
2942   std::string user_key = "k01";
2943   InternalKey internal_key(user_key, 0, kTypeValue);
2944   c3.Add(internal_key.Encode().ToString(), "hello");
2945   ImmutableCFOptions ioptions3(options);
2946   MutableCFOptions moptions3(options);
2947   // Generate table without filter policy
2948   c3.Finish(options, ioptions3, moptions3, table_options,
2949             GetPlainInternalComparator(options.comparator), &keys, &kvmap);
2950   c3.ResetTableReader();
2951 
2952   // Open table with filter policy
2953   table_options.filter_policy.reset(NewBloomFilterPolicy(1));
2954   options.table_factory.reset(new BlockBasedTableFactory(table_options));
2955   options.statistics = CreateDBStatistics();
2956   ImmutableCFOptions ioptions4(options);
2957   MutableCFOptions moptions4(options);
2958   ASSERT_OK(c3.Reopen(ioptions4, moptions4));
2959   reader = dynamic_cast<BlockBasedTable*>(c3.GetTableReader());
2960   ASSERT_FALSE(reader->TEST_FilterBlockInCache());
2961   PinnableSlice value;
2962   GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
2963                          GetContext::kNotFound, user_key, &value, nullptr,
2964                          nullptr, true, nullptr, nullptr);
2965   ASSERT_OK(reader->Get(ReadOptions(), internal_key.Encode(), &get_context,
2966                         moptions4.prefix_extractor.get()));
2967   ASSERT_STREQ(value.data(), "hello");
2968   BlockCachePropertiesSnapshot props(options.statistics.get());
2969   props.AssertFilterBlockStat(0, 0);
2970   c3.ResetTableReader();
2971 }
2972 
ValidateBlockSizeDeviation(int value,int expected)2973 void ValidateBlockSizeDeviation(int value, int expected) {
2974   BlockBasedTableOptions table_options;
2975   table_options.block_size_deviation = value;
2976   BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
2977 
2978   const BlockBasedTableOptions* normalized_table_options =
2979       (const BlockBasedTableOptions*)factory->GetOptions();
2980   ASSERT_EQ(normalized_table_options->block_size_deviation, expected);
2981 
2982   delete factory;
2983 }
2984 
ValidateBlockRestartInterval(int value,int expected)2985 void ValidateBlockRestartInterval(int value, int expected) {
2986   BlockBasedTableOptions table_options;
2987   table_options.block_restart_interval = value;
2988   BlockBasedTableFactory* factory = new BlockBasedTableFactory(table_options);
2989 
2990   const BlockBasedTableOptions* normalized_table_options =
2991       (const BlockBasedTableOptions*)factory->GetOptions();
2992   ASSERT_EQ(normalized_table_options->block_restart_interval, expected);
2993 
2994   delete factory;
2995 }
2996 
TEST_P(BlockBasedTableTest,InvalidOptions)2997 TEST_P(BlockBasedTableTest, InvalidOptions) {
2998   // invalid values for block_size_deviation (<0 or >100) are silently set to 0
2999   ValidateBlockSizeDeviation(-10, 0);
3000   ValidateBlockSizeDeviation(-1, 0);
3001   ValidateBlockSizeDeviation(0, 0);
3002   ValidateBlockSizeDeviation(1, 1);
3003   ValidateBlockSizeDeviation(99, 99);
3004   ValidateBlockSizeDeviation(100, 100);
3005   ValidateBlockSizeDeviation(101, 0);
3006   ValidateBlockSizeDeviation(1000, 0);
3007 
3008   // invalid values for block_restart_interval (<1) are silently set to 1
3009   ValidateBlockRestartInterval(-10, 1);
3010   ValidateBlockRestartInterval(-1, 1);
3011   ValidateBlockRestartInterval(0, 1);
3012   ValidateBlockRestartInterval(1, 1);
3013   ValidateBlockRestartInterval(2, 2);
3014   ValidateBlockRestartInterval(1000, 1000);
3015 }
3016 
TEST_P(BlockBasedTableTest,BlockReadCountTest)3017 TEST_P(BlockBasedTableTest, BlockReadCountTest) {
3018   // bloom_filter_type = 0 -- block-based filter
3019   // bloom_filter_type = 0 -- full filter
3020   for (int bloom_filter_type = 0; bloom_filter_type < 2; ++bloom_filter_type) {
3021     for (int index_and_filter_in_cache = 0; index_and_filter_in_cache < 2;
3022          ++index_and_filter_in_cache) {
3023       Options options;
3024       options.create_if_missing = true;
3025 
3026       BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3027       table_options.block_cache = NewLRUCache(1, 0);
3028       table_options.cache_index_and_filter_blocks = index_and_filter_in_cache;
3029       table_options.filter_policy.reset(
3030           NewBloomFilterPolicy(10, bloom_filter_type == 0));
3031       options.table_factory.reset(new BlockBasedTableFactory(table_options));
3032       std::vector<std::string> keys;
3033       stl_wrappers::KVMap kvmap;
3034 
3035       TableConstructor c(BytewiseComparator());
3036       std::string user_key = "k04";
3037       InternalKey internal_key(user_key, 0, kTypeValue);
3038       std::string encoded_key = internal_key.Encode().ToString();
3039       c.Add(encoded_key, "hello");
3040       ImmutableCFOptions ioptions(options);
3041       MutableCFOptions moptions(options);
3042       // Generate table with filter policy
3043       c.Finish(options, ioptions, moptions, table_options,
3044                GetPlainInternalComparator(options.comparator), &keys, &kvmap);
3045       auto reader = c.GetTableReader();
3046       PinnableSlice value;
3047       {
3048         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3049                                GetContext::kNotFound, user_key, &value, nullptr,
3050                                nullptr, true, nullptr, nullptr);
3051         get_perf_context()->Reset();
3052         ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
3053                               moptions.prefix_extractor.get()));
3054         if (index_and_filter_in_cache) {
3055           // data, index and filter block
3056           ASSERT_EQ(get_perf_context()->block_read_count, 3);
3057           ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
3058           ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3059         } else {
3060           // just the data block
3061           ASSERT_EQ(get_perf_context()->block_read_count, 1);
3062         }
3063         ASSERT_EQ(get_context.State(), GetContext::kFound);
3064         ASSERT_STREQ(value.data(), "hello");
3065       }
3066 
3067       // Get non-existing key
3068       user_key = "does-not-exist";
3069       internal_key = InternalKey(user_key, 0, kTypeValue);
3070       encoded_key = internal_key.Encode().ToString();
3071 
3072       value.Reset();
3073       {
3074         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
3075                                GetContext::kNotFound, user_key, &value, nullptr,
3076                                nullptr, true, nullptr, nullptr);
3077         get_perf_context()->Reset();
3078         ASSERT_OK(reader->Get(ReadOptions(), encoded_key, &get_context,
3079                               moptions.prefix_extractor.get()));
3080         ASSERT_EQ(get_context.State(), GetContext::kNotFound);
3081       }
3082 
3083       if (index_and_filter_in_cache) {
3084         if (bloom_filter_type == 0) {
3085           // with block-based, we read index and then the filter
3086           ASSERT_EQ(get_perf_context()->block_read_count, 2);
3087           ASSERT_EQ(get_perf_context()->index_block_read_count, 1);
3088           ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3089         } else {
3090           // with full-filter, we read filter first and then we stop
3091           ASSERT_EQ(get_perf_context()->block_read_count, 1);
3092           ASSERT_EQ(get_perf_context()->filter_block_read_count, 1);
3093         }
3094       } else {
3095         // filter is already in memory and it figures out that the key doesn't
3096         // exist
3097         ASSERT_EQ(get_perf_context()->block_read_count, 0);
3098       }
3099     }
3100   }
3101 }
3102 
TEST_P(BlockBasedTableTest,BlockCacheLeak)3103 TEST_P(BlockBasedTableTest, BlockCacheLeak) {
3104   // Check that when we reopen a table we don't lose access to blocks already
3105   // in the cache. This test checks whether the Table actually makes use of the
3106   // unique ID from the file.
3107 
3108   Options opt;
3109   std::unique_ptr<InternalKeyComparator> ikc;
3110   ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
3111   opt.compression = kNoCompression;
3112   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3113   table_options.block_size = 1024;
3114   // big enough so we don't ever lose cached values.
3115   table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3116   opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3117 
3118   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3119   c.Add("k01", "hello");
3120   c.Add("k02", "hello2");
3121   c.Add("k03", std::string(10000, 'x'));
3122   c.Add("k04", std::string(200000, 'x'));
3123   c.Add("k05", std::string(300000, 'x'));
3124   c.Add("k06", "hello3");
3125   c.Add("k07", std::string(100000, 'x'));
3126   std::vector<std::string> keys;
3127   stl_wrappers::KVMap kvmap;
3128   const ImmutableCFOptions ioptions(opt);
3129   const MutableCFOptions moptions(opt);
3130   c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
3131 
3132   std::unique_ptr<InternalIterator> iter(
3133       c.NewIterator(moptions.prefix_extractor.get()));
3134   iter->SeekToFirst();
3135   while (iter->Valid()) {
3136     iter->key();
3137     iter->value();
3138     iter->Next();
3139   }
3140   ASSERT_OK(iter->status());
3141   iter.reset();
3142 
3143   const ImmutableCFOptions ioptions1(opt);
3144   const MutableCFOptions moptions1(opt);
3145   ASSERT_OK(c.Reopen(ioptions1, moptions1));
3146   auto table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
3147   for (const std::string& key : keys) {
3148     InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
3149     ASSERT_TRUE(table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
3150   }
3151   c.ResetTableReader();
3152 
3153   // rerun with different block cache
3154   table_options.block_cache = NewLRUCache(16 * 1024 * 1024, 4);
3155   opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3156   const ImmutableCFOptions ioptions2(opt);
3157   const MutableCFOptions moptions2(opt);
3158   ASSERT_OK(c.Reopen(ioptions2, moptions2));
3159   table_reader = dynamic_cast<BlockBasedTable*>(c.GetTableReader());
3160   for (const std::string& key : keys) {
3161     InternalKey ikey(key, kMaxSequenceNumber, kTypeValue);
3162     ASSERT_TRUE(!table_reader->TEST_KeyInCache(ReadOptions(), ikey.Encode()));
3163   }
3164   c.ResetTableReader();
3165 }
3166 
3167 namespace {
3168 class CustomMemoryAllocator : public MemoryAllocator {
3169  public:
Name() const3170   const char* Name() const override { return "CustomMemoryAllocator"; }
3171 
Allocate(size_t size)3172   void* Allocate(size_t size) override {
3173     ++numAllocations;
3174     auto ptr = new char[size + 16];
3175     memcpy(ptr, "memory_allocator_", 16);  // mangle first 16 bytes
3176     return reinterpret_cast<void*>(ptr + 16);
3177   }
Deallocate(void * p)3178   void Deallocate(void* p) override {
3179     ++numDeallocations;
3180     char* ptr = reinterpret_cast<char*>(p) - 16;
3181     delete[] ptr;
3182   }
3183 
3184   std::atomic<int> numAllocations;
3185   std::atomic<int> numDeallocations;
3186 };
3187 }  // namespace
3188 
TEST_P(BlockBasedTableTest,MemoryAllocator)3189 TEST_P(BlockBasedTableTest, MemoryAllocator) {
3190   auto custom_memory_allocator = std::make_shared<CustomMemoryAllocator>();
3191   {
3192     Options opt;
3193     std::unique_ptr<InternalKeyComparator> ikc;
3194     ikc.reset(new test::PlainInternalKeyComparator(opt.comparator));
3195     opt.compression = kNoCompression;
3196     BlockBasedTableOptions table_options;
3197     table_options.block_size = 1024;
3198     LRUCacheOptions lruOptions;
3199     lruOptions.memory_allocator = custom_memory_allocator;
3200     lruOptions.capacity = 16 * 1024 * 1024;
3201     lruOptions.num_shard_bits = 4;
3202     table_options.block_cache = NewLRUCache(std::move(lruOptions));
3203     opt.table_factory.reset(NewBlockBasedTableFactory(table_options));
3204 
3205     TableConstructor c(BytewiseComparator(),
3206                        true /* convert_to_internal_key_ */);
3207     c.Add("k01", "hello");
3208     c.Add("k02", "hello2");
3209     c.Add("k03", std::string(10000, 'x'));
3210     c.Add("k04", std::string(200000, 'x'));
3211     c.Add("k05", std::string(300000, 'x'));
3212     c.Add("k06", "hello3");
3213     c.Add("k07", std::string(100000, 'x'));
3214     std::vector<std::string> keys;
3215     stl_wrappers::KVMap kvmap;
3216     const ImmutableCFOptions ioptions(opt);
3217     const MutableCFOptions moptions(opt);
3218     c.Finish(opt, ioptions, moptions, table_options, *ikc, &keys, &kvmap);
3219 
3220     std::unique_ptr<InternalIterator> iter(
3221         c.NewIterator(moptions.prefix_extractor.get()));
3222     iter->SeekToFirst();
3223     while (iter->Valid()) {
3224       iter->key();
3225       iter->value();
3226       iter->Next();
3227     }
3228     ASSERT_OK(iter->status());
3229   }
3230 
3231   // out of scope, block cache should have been deleted, all allocations
3232   // deallocated
3233   EXPECT_EQ(custom_memory_allocator->numAllocations.load(),
3234             custom_memory_allocator->numDeallocations.load());
3235   // make sure that allocations actually happened through the cache allocator
3236   EXPECT_GT(custom_memory_allocator->numAllocations.load(), 0);
3237 }
3238 
3239 // Test the file checksum of block based table
TEST_P(BlockBasedTableTest,NoFileChecksum)3240 TEST_P(BlockBasedTableTest, NoFileChecksum) {
3241   Options options;
3242   ImmutableCFOptions ioptions(options);
3243   MutableCFOptions moptions(options);
3244   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3245   std::unique_ptr<InternalKeyComparator> comparator(
3246       new InternalKeyComparator(BytewiseComparator()));
3247   SequenceNumber largest_seqno = 0;
3248   int level = 0;
3249   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3250       int_tbl_prop_collector_factories;
3251 
3252   if (largest_seqno != 0) {
3253     // Pretend that it's an external file written by SstFileWriter.
3254     int_tbl_prop_collector_factories.emplace_back(
3255         new SstFileWriterPropertiesCollectorFactory(2 /* version */,
3256                                                     0 /* global_seqno*/));
3257   }
3258   std::string column_family_name;
3259 
3260   FileChecksumTestHelper f(true);
3261   f.CreateWriteableFile();
3262   std::unique_ptr<TableBuilder> builder;
3263   builder.reset(ioptions.table_factory->NewTableBuilder(
3264       TableBuilderOptions(ioptions, moptions, *comparator,
3265                           &int_tbl_prop_collector_factories,
3266                           options.compression, options.sample_for_compression,
3267                           options.compression_opts, false /* skip_filters */,
3268                           column_family_name, level),
3269       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3270       f.GetFileWriter()));
3271   f.ResetTableBuilder(std::move(builder));
3272   f.AddKVtoKVMap(1000);
3273   f.WriteKVAndFlushTable();
3274   ASSERT_STREQ(f.GetFileChecksumFuncName(),
3275                kUnknownFileChecksumFuncName.c_str());
3276   ASSERT_STREQ(f.GetFileChecksum().c_str(), kUnknownFileChecksum.c_str());
3277 }
3278 
TEST_P(BlockBasedTableTest,Crc32FileChecksum)3279 TEST_P(BlockBasedTableTest, Crc32FileChecksum) {
3280   FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
3281       new FileChecksumGenCrc32cFactory();
3282   Options options;
3283   options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3284   ImmutableCFOptions ioptions(options);
3285   MutableCFOptions moptions(options);
3286   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
3287   std::unique_ptr<InternalKeyComparator> comparator(
3288       new InternalKeyComparator(BytewiseComparator()));
3289   SequenceNumber largest_seqno = 0;
3290   int level = 0;
3291   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3292       int_tbl_prop_collector_factories;
3293 
3294   if (largest_seqno != 0) {
3295     // Pretend that it's an external file written by SstFileWriter.
3296     int_tbl_prop_collector_factories.emplace_back(
3297         new SstFileWriterPropertiesCollectorFactory(2 /* version */,
3298                                                     0 /* global_seqno*/));
3299   }
3300   std::string column_family_name;
3301 
3302   FileChecksumGenContext gen_context;
3303   gen_context.file_name = "db/tmp";
3304   std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
3305       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3306           gen_context);
3307   FileChecksumTestHelper f(true);
3308   f.CreateWriteableFile();
3309   f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
3310   std::unique_ptr<TableBuilder> builder;
3311   builder.reset(ioptions.table_factory->NewTableBuilder(
3312       TableBuilderOptions(ioptions, moptions, *comparator,
3313                           &int_tbl_prop_collector_factories,
3314                           options.compression, options.sample_for_compression,
3315                           options.compression_opts, false /* skip_filters */,
3316                           column_family_name, level),
3317       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3318       f.GetFileWriter()));
3319   f.ResetTableBuilder(std::move(builder));
3320   f.AddKVtoKVMap(1000);
3321   f.WriteKVAndFlushTable();
3322   ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3323 
3324   std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
3325       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3326           gen_context);
3327   std::string checksum;
3328   ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
3329   ASSERT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3330 }
3331 
3332 // Plain table is not supported in ROCKSDB_LITE
3333 #ifndef ROCKSDB_LITE
TEST_F(PlainTableTest,BasicPlainTableProperties)3334 TEST_F(PlainTableTest, BasicPlainTableProperties) {
3335   PlainTableOptions plain_table_options;
3336   plain_table_options.user_key_len = 8;
3337   plain_table_options.bloom_bits_per_key = 8;
3338   plain_table_options.hash_table_ratio = 0;
3339 
3340   PlainTableFactory factory(plain_table_options);
3341   test::StringSink sink;
3342   std::unique_ptr<WritableFileWriter> file_writer(
3343       test::GetWritableFileWriter(new test::StringSink(), "" /* don't care */));
3344   Options options;
3345   const ImmutableCFOptions ioptions(options);
3346   const MutableCFOptions moptions(options);
3347   InternalKeyComparator ikc(options.comparator);
3348   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3349       int_tbl_prop_collector_factories;
3350   std::string column_family_name;
3351   int unknown_level = -1;
3352   std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3353       TableBuilderOptions(
3354           ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
3355           kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
3356           false /* skip_filters */, column_family_name, unknown_level),
3357       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3358       file_writer.get()));
3359 
3360   for (char c = 'a'; c <= 'z'; ++c) {
3361     std::string key(8, c);
3362     key.append("\1       ");  // PlainTable expects internal key structure
3363     std::string value(28, c + 42);
3364     builder->Add(key, value);
3365   }
3366   ASSERT_OK(builder->Finish());
3367   file_writer->Flush();
3368 
3369   test::StringSink* ss =
3370       ROCKSDB_NAMESPACE::test::GetStringSinkFromLegacyWriter(file_writer.get());
3371   std::unique_ptr<RandomAccessFileReader> file_reader(
3372       test::GetRandomAccessFileReader(
3373           new test::StringSource(ss->contents(), 72242, true)));
3374 
3375   TableProperties* props = nullptr;
3376   auto s = ReadTableProperties(file_reader.get(), ss->contents().size(),
3377                                kPlainTableMagicNumber, ioptions,
3378                                &props, true /* compression_type_missing */);
3379   std::unique_ptr<TableProperties> props_guard(props);
3380   ASSERT_OK(s);
3381 
3382   ASSERT_EQ(0ul, props->index_size);
3383   ASSERT_EQ(0ul, props->filter_size);
3384   ASSERT_EQ(16ul * 26, props->raw_key_size);
3385   ASSERT_EQ(28ul * 26, props->raw_value_size);
3386   ASSERT_EQ(26ul, props->num_entries);
3387   ASSERT_EQ(1ul, props->num_data_blocks);
3388 }
3389 
TEST_F(PlainTableTest,NoFileChecksum)3390 TEST_F(PlainTableTest, NoFileChecksum) {
3391   PlainTableOptions plain_table_options;
3392   plain_table_options.user_key_len = 20;
3393   plain_table_options.bloom_bits_per_key = 8;
3394   plain_table_options.hash_table_ratio = 0;
3395   PlainTableFactory factory(plain_table_options);
3396 
3397   Options options;
3398   const ImmutableCFOptions ioptions(options);
3399   const MutableCFOptions moptions(options);
3400   InternalKeyComparator ikc(options.comparator);
3401   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3402       int_tbl_prop_collector_factories;
3403   std::string column_family_name;
3404   int unknown_level = -1;
3405   FileChecksumTestHelper f(true);
3406   f.CreateWriteableFile();
3407 
3408   std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3409       TableBuilderOptions(
3410           ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
3411           kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
3412           false /* skip_filters */, column_family_name, unknown_level),
3413       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3414       f.GetFileWriter()));
3415   f.ResetTableBuilder(std::move(builder));
3416   f.AddKVtoKVMap(1000);
3417   f.WriteKVAndFlushTable();
3418   ASSERT_STREQ(f.GetFileChecksumFuncName(),
3419                kUnknownFileChecksumFuncName.c_str());
3420   EXPECT_EQ(f.GetFileChecksum(), kUnknownFileChecksum.c_str());
3421 }
3422 
TEST_F(PlainTableTest,Crc32FileChecksum)3423 TEST_F(PlainTableTest, Crc32FileChecksum) {
3424   PlainTableOptions plain_table_options;
3425   plain_table_options.user_key_len = 20;
3426   plain_table_options.bloom_bits_per_key = 8;
3427   plain_table_options.hash_table_ratio = 0;
3428   PlainTableFactory factory(plain_table_options);
3429 
3430   FileChecksumGenCrc32cFactory* file_checksum_gen_factory =
3431       new FileChecksumGenCrc32cFactory();
3432   Options options;
3433   options.file_checksum_gen_factory.reset(file_checksum_gen_factory);
3434   const ImmutableCFOptions ioptions(options);
3435   const MutableCFOptions moptions(options);
3436   InternalKeyComparator ikc(options.comparator);
3437   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
3438       int_tbl_prop_collector_factories;
3439   std::string column_family_name;
3440   int unknown_level = -1;
3441 
3442   FileChecksumGenContext gen_context;
3443   gen_context.file_name = "db/tmp";
3444   std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen1 =
3445       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3446           gen_context);
3447   FileChecksumTestHelper f(true);
3448   f.CreateWriteableFile();
3449   f.SetFileChecksumGenerator(checksum_crc32_gen1.release());
3450 
3451   std::unique_ptr<TableBuilder> builder(factory.NewTableBuilder(
3452       TableBuilderOptions(
3453           ioptions, moptions, ikc, &int_tbl_prop_collector_factories,
3454           kNoCompression, 0 /* sample_for_compression */, CompressionOptions(),
3455           false /* skip_filters */, column_family_name, unknown_level),
3456       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
3457       f.GetFileWriter()));
3458   f.ResetTableBuilder(std::move(builder));
3459   f.AddKVtoKVMap(1000);
3460   f.WriteKVAndFlushTable();
3461   ASSERT_STREQ(f.GetFileChecksumFuncName(), "FileChecksumCrc32c");
3462 
3463   std::unique_ptr<FileChecksumGenerator> checksum_crc32_gen2 =
3464       options.file_checksum_gen_factory->CreateFileChecksumGenerator(
3465           gen_context);
3466   std::string checksum;
3467   ASSERT_OK(f.CalculateFileChecksum(checksum_crc32_gen2.get(), &checksum));
3468   EXPECT_STREQ(f.GetFileChecksum().c_str(), checksum.c_str());
3469 }
3470 
3471 #endif  // !ROCKSDB_LITE
3472 
TEST_F(GeneralTableTest,ApproximateOffsetOfPlain)3473 TEST_F(GeneralTableTest, ApproximateOffsetOfPlain) {
3474   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3475   c.Add("k01", "hello");
3476   c.Add("k02", "hello2");
3477   c.Add("k03", std::string(10000, 'x'));
3478   c.Add("k04", std::string(200000, 'x'));
3479   c.Add("k05", std::string(300000, 'x'));
3480   c.Add("k06", "hello3");
3481   c.Add("k07", std::string(100000, 'x'));
3482   std::vector<std::string> keys;
3483   stl_wrappers::KVMap kvmap;
3484   Options options;
3485   test::PlainInternalKeyComparator internal_comparator(options.comparator);
3486   options.compression = kNoCompression;
3487   BlockBasedTableOptions table_options;
3488   table_options.block_size = 1024;
3489   const ImmutableCFOptions ioptions(options);
3490   const MutableCFOptions moptions(options);
3491   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
3492            &keys, &kvmap);
3493 
3494   ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
3495   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
3496   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01a"),      0,      0));
3497   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
3498   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),       0,      0));
3499   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),   10000,  11000));
3500   // k04 and k05 will be in two consecutive blocks, the index is
3501   // an arbitrary slice between k04 and k05, either before or after k04a
3502   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04a"), 10000, 211000));
3503   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"),  210000, 211000));
3504   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"),  510000, 511000));
3505   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"),  510000, 511000));
3506   ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 612000));
3507   c.ResetTableReader();
3508 }
3509 
DoCompressionTest(CompressionType comp)3510 static void DoCompressionTest(CompressionType comp) {
3511   Random rnd(301);
3512   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
3513   std::string tmp;
3514   c.Add("k01", "hello");
3515   c.Add("k02", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
3516   c.Add("k03", "hello3");
3517   c.Add("k04", test::CompressibleString(&rnd, 0.25, 10000, &tmp));
3518   std::vector<std::string> keys;
3519   stl_wrappers::KVMap kvmap;
3520   Options options;
3521   test::PlainInternalKeyComparator ikc(options.comparator);
3522   options.compression = comp;
3523   BlockBasedTableOptions table_options;
3524   table_options.block_size = 1024;
3525   const ImmutableCFOptions ioptions(options);
3526   const MutableCFOptions moptions(options);
3527   c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
3528 
3529   ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"),       0,      0));
3530   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"),       0,      0));
3531   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"),       0,      0));
3532   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"),    2000,   3500));
3533   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"),    2000,   3500));
3534   ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),    4000,   6500));
3535   c.ResetTableReader();
3536 }
3537 
TEST_F(GeneralTableTest,ApproximateOffsetOfCompressed)3538 TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
3539   std::vector<CompressionType> compression_state;
3540   if (!Snappy_Supported()) {
3541     fprintf(stderr, "skipping snappy compression tests\n");
3542   } else {
3543     compression_state.push_back(kSnappyCompression);
3544   }
3545 
3546   if (!Zlib_Supported()) {
3547     fprintf(stderr, "skipping zlib compression tests\n");
3548   } else {
3549     compression_state.push_back(kZlibCompression);
3550   }
3551 
3552   // TODO(kailiu) DoCompressionTest() doesn't work with BZip2.
3553   /*
3554   if (!BZip2_Supported()) {
3555     fprintf(stderr, "skipping bzip2 compression tests\n");
3556   } else {
3557     compression_state.push_back(kBZip2Compression);
3558   }
3559   */
3560 
3561   if (!LZ4_Supported()) {
3562     fprintf(stderr, "skipping lz4 and lz4hc compression tests\n");
3563   } else {
3564     compression_state.push_back(kLZ4Compression);
3565     compression_state.push_back(kLZ4HCCompression);
3566   }
3567 
3568   if (!XPRESS_Supported()) {
3569     fprintf(stderr, "skipping xpress and xpress compression tests\n");
3570   }
3571   else {
3572     compression_state.push_back(kXpressCompression);
3573   }
3574 
3575   for (auto state : compression_state) {
3576     DoCompressionTest(state);
3577   }
3578 }
3579 
3580 #ifndef ROCKSDB_VALGRIND_RUN
3581 // RandomizedHarnessTest is very slow for certain combination of arguments
3582 // Split into 8 pieces to reduce the time individual tests take.
TEST_F(HarnessTest,Randomized1)3583 TEST_F(HarnessTest, Randomized1) {
3584   // part 1 out of 8
3585   const size_t part = 1;
3586   const size_t total = 8;
3587   RandomizedHarnessTest(part, total);
3588 }
3589 
TEST_F(HarnessTest,Randomized2)3590 TEST_F(HarnessTest, Randomized2) {
3591   // part 2 out of 8
3592   const size_t part = 2;
3593   const size_t total = 8;
3594   RandomizedHarnessTest(part, total);
3595 }
3596 
TEST_F(HarnessTest,Randomized3)3597 TEST_F(HarnessTest, Randomized3) {
3598   // part 3 out of 8
3599   const size_t part = 3;
3600   const size_t total = 8;
3601   RandomizedHarnessTest(part, total);
3602 }
3603 
TEST_F(HarnessTest,Randomized4)3604 TEST_F(HarnessTest, Randomized4) {
3605   // part 4 out of 8
3606   const size_t part = 4;
3607   const size_t total = 8;
3608   RandomizedHarnessTest(part, total);
3609 }
3610 
TEST_F(HarnessTest,Randomized5)3611 TEST_F(HarnessTest, Randomized5) {
3612   // part 5 out of 8
3613   const size_t part = 5;
3614   const size_t total = 8;
3615   RandomizedHarnessTest(part, total);
3616 }
3617 
TEST_F(HarnessTest,Randomized6)3618 TEST_F(HarnessTest, Randomized6) {
3619   // part 6 out of 8
3620   const size_t part = 6;
3621   const size_t total = 8;
3622   RandomizedHarnessTest(part, total);
3623 }
3624 
TEST_F(HarnessTest,Randomized7)3625 TEST_F(HarnessTest, Randomized7) {
3626   // part 7 out of 8
3627   const size_t part = 7;
3628   const size_t total = 8;
3629   RandomizedHarnessTest(part, total);
3630 }
3631 
TEST_F(HarnessTest,Randomized8)3632 TEST_F(HarnessTest, Randomized8) {
3633   // part 8 out of 8
3634   const size_t part = 8;
3635   const size_t total = 8;
3636   RandomizedHarnessTest(part, total);
3637 }
3638 
3639 #ifndef ROCKSDB_LITE
TEST_F(HarnessTest,RandomizedLongDB)3640 TEST_F(HarnessTest, RandomizedLongDB) {
3641   Random rnd(test::RandomSeed());
3642   TestArgs args = {DB_TEST, false, 16, kNoCompression, 0, false};
3643   Init(args);
3644   int num_entries = 100000;
3645   for (int e = 0; e < num_entries; e++) {
3646     std::string v;
3647     Add(test::RandomKey(&rnd, rnd.Skewed(4)),
3648         test::RandomString(&rnd, rnd.Skewed(5), &v).ToString());
3649   }
3650   Test(&rnd);
3651 
3652   // We must have created enough data to force merging
3653   int files = 0;
3654   for (int level = 0; level < db()->NumberLevels(); level++) {
3655     std::string value;
3656     char name[100];
3657     snprintf(name, sizeof(name), "rocksdb.num-files-at-level%d", level);
3658     ASSERT_TRUE(db()->GetProperty(name, &value));
3659     files += atoi(value.c_str());
3660   }
3661   ASSERT_GT(files, 0);
3662 }
3663 #endif  // ROCKSDB_LITE
3664 #endif  // ROCKSDB_VALGRIND_RUN
3665 
3666 class MemTableTest : public testing::Test {};
3667 
TEST_F(MemTableTest,Simple)3668 TEST_F(MemTableTest, Simple) {
3669   InternalKeyComparator cmp(BytewiseComparator());
3670   auto table_factory = std::make_shared<SkipListFactory>();
3671   Options options;
3672   options.memtable_factory = table_factory;
3673   ImmutableCFOptions ioptions(options);
3674   WriteBufferManager wb(options.db_write_buffer_size);
3675   MemTable* memtable =
3676       new MemTable(cmp, ioptions, MutableCFOptions(options), &wb,
3677                    kMaxSequenceNumber, 0 /* column_family_id */);
3678   memtable->Ref();
3679   WriteBatch batch;
3680   WriteBatchInternal::SetSequence(&batch, 100);
3681   batch.Put(std::string("k1"), std::string("v1"));
3682   batch.Put(std::string("k2"), std::string("v2"));
3683   batch.Put(std::string("k3"), std::string("v3"));
3684   batch.Put(std::string("largekey"), std::string("vlarge"));
3685   batch.DeleteRange(std::string("chi"), std::string("xigua"));
3686   batch.DeleteRange(std::string("begin"), std::string("end"));
3687   ColumnFamilyMemTablesDefault cf_mems_default(memtable);
3688   ASSERT_TRUE(
3689       WriteBatchInternal::InsertInto(&batch, &cf_mems_default, nullptr, nullptr)
3690           .ok());
3691 
3692   for (int i = 0; i < 2; ++i) {
3693     Arena arena;
3694     ScopedArenaIterator arena_iter_guard;
3695     std::unique_ptr<InternalIterator> iter_guard;
3696     InternalIterator* iter;
3697     if (i == 0) {
3698       iter = memtable->NewIterator(ReadOptions(), &arena);
3699       arena_iter_guard.set(iter);
3700     } else {
3701       iter = memtable->NewRangeTombstoneIterator(
3702           ReadOptions(), kMaxSequenceNumber /* read_seq */);
3703       iter_guard.reset(iter);
3704     }
3705     if (iter == nullptr) {
3706       continue;
3707     }
3708     iter->SeekToFirst();
3709     while (iter->Valid()) {
3710       fprintf(stderr, "key: '%s' -> '%s'\n", iter->key().ToString().c_str(),
3711               iter->value().ToString().c_str());
3712       iter->Next();
3713     }
3714   }
3715 
3716   delete memtable->Unref();
3717 }
3718 
3719 // Test the empty key
TEST_F(HarnessTest,SimpleEmptyKey)3720 TEST_F(HarnessTest, SimpleEmptyKey) {
3721   auto args = GenerateArgList();
3722   for (const auto& arg : args) {
3723     Init(arg);
3724     Random rnd(test::RandomSeed() + 1);
3725     Add("", "v");
3726     Test(&rnd);
3727   }
3728 }
3729 
TEST_F(HarnessTest,SimpleSingle)3730 TEST_F(HarnessTest, SimpleSingle) {
3731   auto args = GenerateArgList();
3732   for (const auto& arg : args) {
3733     Init(arg);
3734     Random rnd(test::RandomSeed() + 2);
3735     Add("abc", "v");
3736     Test(&rnd);
3737   }
3738 }
3739 
TEST_F(HarnessTest,SimpleMulti)3740 TEST_F(HarnessTest, SimpleMulti) {
3741   auto args = GenerateArgList();
3742   for (const auto& arg : args) {
3743     Init(arg);
3744     Random rnd(test::RandomSeed() + 3);
3745     Add("abc", "v");
3746     Add("abcd", "v");
3747     Add("ac", "v2");
3748     Test(&rnd);
3749   }
3750 }
3751 
TEST_F(HarnessTest,SimpleSpecialKey)3752 TEST_F(HarnessTest, SimpleSpecialKey) {
3753   auto args = GenerateArgList();
3754   for (const auto& arg : args) {
3755     Init(arg);
3756     Random rnd(test::RandomSeed() + 4);
3757     Add("\xff\xff", "v3");
3758     Test(&rnd);
3759   }
3760 }
3761 
TEST_F(HarnessTest,FooterTests)3762 TEST_F(HarnessTest, FooterTests) {
3763   {
3764     // upconvert legacy block based
3765     std::string encoded;
3766     Footer footer(kLegacyBlockBasedTableMagicNumber, 0);
3767     BlockHandle meta_index(10, 5), index(20, 15);
3768     footer.set_metaindex_handle(meta_index);
3769     footer.set_index_handle(index);
3770     footer.EncodeTo(&encoded);
3771     Footer decoded_footer;
3772     Slice encoded_slice(encoded);
3773     decoded_footer.DecodeFrom(&encoded_slice);
3774     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3775     ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3776     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3777     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3778     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3779     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3780     ASSERT_EQ(decoded_footer.version(), 0U);
3781   }
3782   {
3783     // xxhash block based
3784     std::string encoded;
3785     Footer footer(kBlockBasedTableMagicNumber, 1);
3786     BlockHandle meta_index(10, 5), index(20, 15);
3787     footer.set_metaindex_handle(meta_index);
3788     footer.set_index_handle(index);
3789     footer.set_checksum(kxxHash);
3790     footer.EncodeTo(&encoded);
3791     Footer decoded_footer;
3792     Slice encoded_slice(encoded);
3793     decoded_footer.DecodeFrom(&encoded_slice);
3794     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3795     ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3796     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3797     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3798     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3799     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3800     ASSERT_EQ(decoded_footer.version(), 1U);
3801   }
3802   {
3803     // xxhash64 block based
3804     std::string encoded;
3805     Footer footer(kBlockBasedTableMagicNumber, 1);
3806     BlockHandle meta_index(10, 5), index(20, 15);
3807     footer.set_metaindex_handle(meta_index);
3808     footer.set_index_handle(index);
3809     footer.set_checksum(kxxHash64);
3810     footer.EncodeTo(&encoded);
3811     Footer decoded_footer;
3812     Slice encoded_slice(encoded);
3813     decoded_footer.DecodeFrom(&encoded_slice);
3814     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3815     ASSERT_EQ(decoded_footer.checksum(), kxxHash64);
3816     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3817     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3818     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3819     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3820     ASSERT_EQ(decoded_footer.version(), 1U);
3821   }
3822 // Plain table is not supported in ROCKSDB_LITE
3823 #ifndef ROCKSDB_LITE
3824   {
3825     // upconvert legacy plain table
3826     std::string encoded;
3827     Footer footer(kLegacyPlainTableMagicNumber, 0);
3828     BlockHandle meta_index(10, 5), index(20, 15);
3829     footer.set_metaindex_handle(meta_index);
3830     footer.set_index_handle(index);
3831     footer.EncodeTo(&encoded);
3832     Footer decoded_footer;
3833     Slice encoded_slice(encoded);
3834     decoded_footer.DecodeFrom(&encoded_slice);
3835     ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3836     ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3837     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3838     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3839     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3840     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3841     ASSERT_EQ(decoded_footer.version(), 0U);
3842   }
3843   {
3844     // xxhash block based
3845     std::string encoded;
3846     Footer footer(kPlainTableMagicNumber, 1);
3847     BlockHandle meta_index(10, 5), index(20, 15);
3848     footer.set_metaindex_handle(meta_index);
3849     footer.set_index_handle(index);
3850     footer.set_checksum(kxxHash);
3851     footer.EncodeTo(&encoded);
3852     Footer decoded_footer;
3853     Slice encoded_slice(encoded);
3854     decoded_footer.DecodeFrom(&encoded_slice);
3855     ASSERT_EQ(decoded_footer.table_magic_number(), kPlainTableMagicNumber);
3856     ASSERT_EQ(decoded_footer.checksum(), kxxHash);
3857     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3858     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3859     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3860     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3861     ASSERT_EQ(decoded_footer.version(), 1U);
3862   }
3863 #endif  // !ROCKSDB_LITE
3864   {
3865     // version == 2
3866     std::string encoded;
3867     Footer footer(kBlockBasedTableMagicNumber, 2);
3868     BlockHandle meta_index(10, 5), index(20, 15);
3869     footer.set_metaindex_handle(meta_index);
3870     footer.set_index_handle(index);
3871     footer.EncodeTo(&encoded);
3872     Footer decoded_footer;
3873     Slice encoded_slice(encoded);
3874     decoded_footer.DecodeFrom(&encoded_slice);
3875     ASSERT_EQ(decoded_footer.table_magic_number(), kBlockBasedTableMagicNumber);
3876     ASSERT_EQ(decoded_footer.checksum(), kCRC32c);
3877     ASSERT_EQ(decoded_footer.metaindex_handle().offset(), meta_index.offset());
3878     ASSERT_EQ(decoded_footer.metaindex_handle().size(), meta_index.size());
3879     ASSERT_EQ(decoded_footer.index_handle().offset(), index.offset());
3880     ASSERT_EQ(decoded_footer.index_handle().size(), index.size());
3881     ASSERT_EQ(decoded_footer.version(), 2U);
3882   }
3883 }
3884 
3885 class IndexBlockRestartIntervalTest
3886     : public TableTest,
3887       public ::testing::WithParamInterface<std::pair<int, bool>> {
3888  public:
GetRestartValues()3889   static std::vector<std::pair<int, bool>> GetRestartValues() {
3890     return {{-1, false}, {0, false},  {1, false}, {8, false},
3891             {16, false}, {32, false}, {-1, true}, {0, true},
3892             {1, true},   {8, true},   {16, true}, {32, true}};
3893   }
3894 };
3895 
3896 INSTANTIATE_TEST_CASE_P(
3897     IndexBlockRestartIntervalTest, IndexBlockRestartIntervalTest,
3898     ::testing::ValuesIn(IndexBlockRestartIntervalTest::GetRestartValues()));
3899 
TEST_P(IndexBlockRestartIntervalTest,IndexBlockRestartInterval)3900 TEST_P(IndexBlockRestartIntervalTest, IndexBlockRestartInterval) {
3901   const int kKeysInTable = 10000;
3902   const int kKeySize = 100;
3903   const int kValSize = 500;
3904 
3905   const int index_block_restart_interval = std::get<0>(GetParam());
3906   const bool value_delta_encoding = std::get<1>(GetParam());
3907 
3908   Options options;
3909   BlockBasedTableOptions table_options;
3910   table_options.block_size = 64;  // small block size to get big index block
3911   table_options.index_block_restart_interval = index_block_restart_interval;
3912   if (value_delta_encoding) {
3913     table_options.format_version = 4;
3914   }
3915   options.table_factory.reset(new BlockBasedTableFactory(table_options));
3916 
3917   TableConstructor c(BytewiseComparator());
3918   static Random rnd(301);
3919   for (int i = 0; i < kKeysInTable; i++) {
3920     InternalKey k(RandomString(&rnd, kKeySize), 0, kTypeValue);
3921     c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
3922   }
3923 
3924   std::vector<std::string> keys;
3925   stl_wrappers::KVMap kvmap;
3926   std::unique_ptr<InternalKeyComparator> comparator(
3927       new InternalKeyComparator(BytewiseComparator()));
3928   const ImmutableCFOptions ioptions(options);
3929   const MutableCFOptions moptions(options);
3930   c.Finish(options, ioptions, moptions, table_options, *comparator, &keys,
3931            &kvmap);
3932   auto reader = c.GetTableReader();
3933 
3934   std::unique_ptr<InternalIterator> db_iter(reader->NewIterator(
3935       ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
3936       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
3937 
3938   // Test point lookup
3939   for (auto& kv : kvmap) {
3940     db_iter->Seek(kv.first);
3941 
3942     ASSERT_TRUE(db_iter->Valid());
3943     ASSERT_OK(db_iter->status());
3944     ASSERT_EQ(db_iter->key(), kv.first);
3945     ASSERT_EQ(db_iter->value(), kv.second);
3946   }
3947 
3948   // Test iterating
3949   auto kv_iter = kvmap.begin();
3950   for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
3951     ASSERT_EQ(db_iter->key(), kv_iter->first);
3952     ASSERT_EQ(db_iter->value(), kv_iter->second);
3953     kv_iter++;
3954   }
3955   ASSERT_EQ(kv_iter, kvmap.end());
3956   c.ResetTableReader();
3957 }
3958 
3959 class PrefixTest : public testing::Test {
3960  public:
PrefixTest()3961   PrefixTest() : testing::Test() {}
~PrefixTest()3962   ~PrefixTest() override {}
3963 };
3964 
3965 namespace {
3966 // A simple PrefixExtractor that only works for test PrefixAndWholeKeyTest
3967 class TestPrefixExtractor : public ROCKSDB_NAMESPACE::SliceTransform {
3968  public:
~TestPrefixExtractor()3969   ~TestPrefixExtractor() override{};
Name() const3970   const char* Name() const override { return "TestPrefixExtractor"; }
3971 
Transform(const ROCKSDB_NAMESPACE::Slice & src) const3972   ROCKSDB_NAMESPACE::Slice Transform(
3973       const ROCKSDB_NAMESPACE::Slice& src) const override {
3974     assert(IsValid(src));
3975     return ROCKSDB_NAMESPACE::Slice(src.data(), 3);
3976   }
3977 
InDomain(const ROCKSDB_NAMESPACE::Slice & src) const3978   bool InDomain(const ROCKSDB_NAMESPACE::Slice& src) const override {
3979     assert(IsValid(src));
3980     return true;
3981   }
3982 
InRange(const ROCKSDB_NAMESPACE::Slice &) const3983   bool InRange(const ROCKSDB_NAMESPACE::Slice& /*dst*/) const override {
3984     return true;
3985   }
3986 
IsValid(const ROCKSDB_NAMESPACE::Slice & src) const3987   bool IsValid(const ROCKSDB_NAMESPACE::Slice& src) const {
3988     if (src.size() != 4) {
3989       return false;
3990     }
3991     if (src[0] != '[') {
3992       return false;
3993     }
3994     if (src[1] < '0' || src[1] > '9') {
3995       return false;
3996     }
3997     if (src[2] != ']') {
3998       return false;
3999     }
4000     if (src[3] < '0' || src[3] > '9') {
4001       return false;
4002     }
4003     return true;
4004   }
4005 };
4006 }  // namespace
4007 
TEST_F(PrefixTest,PrefixAndWholeKeyTest)4008 TEST_F(PrefixTest, PrefixAndWholeKeyTest) {
4009   ROCKSDB_NAMESPACE::Options options;
4010   options.compaction_style = ROCKSDB_NAMESPACE::kCompactionStyleUniversal;
4011   options.num_levels = 20;
4012   options.create_if_missing = true;
4013   options.optimize_filters_for_hits = false;
4014   options.target_file_size_base = 268435456;
4015   options.prefix_extractor = std::make_shared<TestPrefixExtractor>();
4016   ROCKSDB_NAMESPACE::BlockBasedTableOptions bbto;
4017   bbto.filter_policy.reset(ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10));
4018   bbto.block_size = 262144;
4019   bbto.whole_key_filtering = true;
4020 
4021   const std::string kDBPath = test::PerThreadDBPath("table_prefix_test");
4022   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4023   DestroyDB(kDBPath, options);
4024   ROCKSDB_NAMESPACE::DB* db;
4025   ASSERT_OK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4026 
4027   // Create a bunch of keys with 10 filters.
4028   for (int i = 0; i < 10; i++) {
4029     std::string prefix = "[" + std::to_string(i) + "]";
4030     for (int j = 0; j < 10; j++) {
4031       std::string key = prefix + std::to_string(j);
4032       db->Put(ROCKSDB_NAMESPACE::WriteOptions(), key, "1");
4033     }
4034   }
4035 
4036   // Trigger compaction.
4037   db->CompactRange(CompactRangeOptions(), nullptr, nullptr);
4038   delete db;
4039   // In the second round, turn whole_key_filtering off and expect
4040   // rocksdb still works.
4041 }
4042 
4043 /*
4044  * Disable TableWithGlobalSeqno since RocksDB does not store global_seqno in
4045  * the SST file any more. Instead, RocksDB deduces global_seqno from the
4046  * MANIFEST while reading from an SST. Therefore, it's not possible to test the
4047  * functionality of global_seqno in a single, isolated unit test without the
4048  * involvement of Version, VersionSet, etc.
4049  */
TEST_P(BlockBasedTableTest,DISABLED_TableWithGlobalSeqno)4050 TEST_P(BlockBasedTableTest, DISABLED_TableWithGlobalSeqno) {
4051   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4052   test::StringSink* sink = new test::StringSink();
4053   std::unique_ptr<WritableFileWriter> file_writer(
4054       test::GetWritableFileWriter(sink, "" /* don't care */));
4055   Options options;
4056   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4057   const ImmutableCFOptions ioptions(options);
4058   const MutableCFOptions moptions(options);
4059   InternalKeyComparator ikc(options.comparator);
4060   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
4061       int_tbl_prop_collector_factories;
4062   int_tbl_prop_collector_factories.emplace_back(
4063       new SstFileWriterPropertiesCollectorFactory(2 /* version */,
4064                                                   0 /* global_seqno*/));
4065   std::string column_family_name;
4066   std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4067       TableBuilderOptions(ioptions, moptions, ikc,
4068                           &int_tbl_prop_collector_factories, kNoCompression,
4069                           0 /* sample_for_compression */, CompressionOptions(),
4070                           false /* skip_filters */, column_family_name, -1),
4071       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
4072       file_writer.get()));
4073 
4074   for (char c = 'a'; c <= 'z'; ++c) {
4075     std::string key(8, c);
4076     std::string value = key;
4077     InternalKey ik(key, 0, kTypeValue);
4078 
4079     builder->Add(ik.Encode(), value);
4080   }
4081   ASSERT_OK(builder->Finish());
4082   file_writer->Flush();
4083 
4084   test::RandomRWStringSink ss_rw(sink);
4085   uint32_t version;
4086   uint64_t global_seqno;
4087   uint64_t global_seqno_offset;
4088 
4089   // Helper function to get version, global_seqno, global_seqno_offset
4090   std::function<void()> GetVersionAndGlobalSeqno = [&]() {
4091     std::unique_ptr<RandomAccessFileReader> file_reader(
4092         test::GetRandomAccessFileReader(
4093             new test::StringSource(ss_rw.contents(), 73342, true)));
4094 
4095     TableProperties* props = nullptr;
4096     ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
4097                                   kBlockBasedTableMagicNumber, ioptions,
4098                                   &props, true /* compression_type_missing */));
4099 
4100     UserCollectedProperties user_props = props->user_collected_properties;
4101     version = DecodeFixed32(
4102         user_props[ExternalSstFilePropertyNames::kVersion].c_str());
4103     global_seqno = DecodeFixed64(
4104         user_props[ExternalSstFilePropertyNames::kGlobalSeqno].c_str());
4105     global_seqno_offset =
4106         props->properties_offsets[ExternalSstFilePropertyNames::kGlobalSeqno];
4107 
4108     delete props;
4109   };
4110 
4111   // Helper function to update the value of the global seqno in the file
4112   std::function<void(uint64_t)> SetGlobalSeqno = [&](uint64_t val) {
4113     std::string new_global_seqno;
4114     PutFixed64(&new_global_seqno, val);
4115 
4116     ASSERT_OK(ss_rw.Write(global_seqno_offset, new_global_seqno));
4117   };
4118 
4119   // Helper function to get the contents of the table InternalIterator
4120   std::unique_ptr<TableReader> table_reader;
4121   std::function<InternalIterator*()> GetTableInternalIter = [&]() {
4122     std::unique_ptr<RandomAccessFileReader> file_reader(
4123         test::GetRandomAccessFileReader(
4124             new test::StringSource(ss_rw.contents(), 73342, true)));
4125 
4126     options.table_factory->NewTableReader(
4127         TableReaderOptions(ioptions, moptions.prefix_extractor.get(),
4128                            EnvOptions(), ikc),
4129         std::move(file_reader), ss_rw.contents().size(), &table_reader);
4130 
4131     return table_reader->NewIterator(
4132         ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
4133         /*skip_filters=*/false, TableReaderCaller::kUncategorized);
4134   };
4135 
4136   GetVersionAndGlobalSeqno();
4137   ASSERT_EQ(2u, version);
4138   ASSERT_EQ(0u, global_seqno);
4139 
4140   InternalIterator* iter = GetTableInternalIter();
4141   char current_c = 'a';
4142   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4143     ParsedInternalKey pik;
4144     ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
4145 
4146     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4147     ASSERT_EQ(pik.sequence, 0);
4148     ASSERT_EQ(pik.user_key, iter->value());
4149     ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4150     current_c++;
4151   }
4152   ASSERT_EQ(current_c, 'z' + 1);
4153   delete iter;
4154 
4155   // Update global sequence number to 10
4156   SetGlobalSeqno(10);
4157   GetVersionAndGlobalSeqno();
4158   ASSERT_EQ(2u, version);
4159   ASSERT_EQ(10u, global_seqno);
4160 
4161   iter = GetTableInternalIter();
4162   current_c = 'a';
4163   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4164     ParsedInternalKey pik;
4165     ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
4166 
4167     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4168     ASSERT_EQ(pik.sequence, 10);
4169     ASSERT_EQ(pik.user_key, iter->value());
4170     ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4171     current_c++;
4172   }
4173   ASSERT_EQ(current_c, 'z' + 1);
4174 
4175   // Verify Seek
4176   for (char c = 'a'; c <= 'z'; c++) {
4177     std::string k = std::string(8, c);
4178     InternalKey ik(k, 10, kValueTypeForSeek);
4179     iter->Seek(ik.Encode());
4180     ASSERT_TRUE(iter->Valid());
4181 
4182     ParsedInternalKey pik;
4183     ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
4184 
4185     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4186     ASSERT_EQ(pik.sequence, 10);
4187     ASSERT_EQ(pik.user_key.ToString(), k);
4188     ASSERT_EQ(iter->value().ToString(), k);
4189   }
4190   delete iter;
4191 
4192   // Update global sequence number to 3
4193   SetGlobalSeqno(3);
4194   GetVersionAndGlobalSeqno();
4195   ASSERT_EQ(2u, version);
4196   ASSERT_EQ(3u, global_seqno);
4197 
4198   iter = GetTableInternalIter();
4199   current_c = 'a';
4200   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4201     ParsedInternalKey pik;
4202     ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
4203 
4204     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4205     ASSERT_EQ(pik.sequence, 3);
4206     ASSERT_EQ(pik.user_key, iter->value());
4207     ASSERT_EQ(pik.user_key.ToString(), std::string(8, current_c));
4208     current_c++;
4209   }
4210   ASSERT_EQ(current_c, 'z' + 1);
4211 
4212   // Verify Seek
4213   for (char c = 'a'; c <= 'z'; c++) {
4214     std::string k = std::string(8, c);
4215     // seqno=4 is less than 3 so we still should get our key
4216     InternalKey ik(k, 4, kValueTypeForSeek);
4217     iter->Seek(ik.Encode());
4218     ASSERT_TRUE(iter->Valid());
4219 
4220     ParsedInternalKey pik;
4221     ASSERT_TRUE(ParseInternalKey(iter->key(), &pik));
4222 
4223     ASSERT_EQ(pik.type, ValueType::kTypeValue);
4224     ASSERT_EQ(pik.sequence, 3);
4225     ASSERT_EQ(pik.user_key.ToString(), k);
4226     ASSERT_EQ(iter->value().ToString(), k);
4227   }
4228 
4229   delete iter;
4230 }
4231 
TEST_P(BlockBasedTableTest,BlockAlignTest)4232 TEST_P(BlockBasedTableTest, BlockAlignTest) {
4233   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4234   bbto.block_align = true;
4235   test::StringSink* sink = new test::StringSink();
4236   std::unique_ptr<WritableFileWriter> file_writer(
4237       test::GetWritableFileWriter(sink, "" /* don't care */));
4238   Options options;
4239   options.compression = kNoCompression;
4240   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4241   const ImmutableCFOptions ioptions(options);
4242   const MutableCFOptions moptions(options);
4243   InternalKeyComparator ikc(options.comparator);
4244   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
4245       int_tbl_prop_collector_factories;
4246   std::string column_family_name;
4247   std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4248       TableBuilderOptions(ioptions, moptions, ikc,
4249                           &int_tbl_prop_collector_factories, kNoCompression,
4250                           0 /* sample_for_compression */, CompressionOptions(),
4251                           false /* skip_filters */, column_family_name, -1),
4252       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
4253       file_writer.get()));
4254 
4255   for (int i = 1; i <= 10000; ++i) {
4256     std::ostringstream ostr;
4257     ostr << std::setfill('0') << std::setw(5) << i;
4258     std::string key = ostr.str();
4259     std::string value = "val";
4260     InternalKey ik(key, 0, kTypeValue);
4261 
4262     builder->Add(ik.Encode(), value);
4263   }
4264   ASSERT_OK(builder->Finish());
4265   file_writer->Flush();
4266 
4267   test::RandomRWStringSink ss_rw(sink);
4268   std::unique_ptr<RandomAccessFileReader> file_reader(
4269       test::GetRandomAccessFileReader(
4270           new test::StringSource(ss_rw.contents(), 73342, true)));
4271 
4272   // Helper function to get version, global_seqno, global_seqno_offset
4273   std::function<void()> VerifyBlockAlignment = [&]() {
4274     TableProperties* props = nullptr;
4275     ASSERT_OK(ReadTableProperties(file_reader.get(), ss_rw.contents().size(),
4276                                   kBlockBasedTableMagicNumber, ioptions,
4277                                   &props, true /* compression_type_missing */));
4278 
4279     uint64_t data_block_size = props->data_size / props->num_data_blocks;
4280     ASSERT_EQ(data_block_size, 4096);
4281     ASSERT_EQ(props->data_size, data_block_size * props->num_data_blocks);
4282     delete props;
4283   };
4284 
4285   VerifyBlockAlignment();
4286 
4287   // The below block of code verifies that we can read back the keys. Set
4288   // block_align to false when creating the reader to ensure we can flip between
4289   // the two modes without any issues
4290   std::unique_ptr<TableReader> table_reader;
4291   bbto.block_align = false;
4292   Options options2;
4293   options2.table_factory.reset(NewBlockBasedTableFactory(bbto));
4294   ImmutableCFOptions ioptions2(options2);
4295   const MutableCFOptions moptions2(options2);
4296 
4297   ASSERT_OK(ioptions.table_factory->NewTableReader(
4298       TableReaderOptions(ioptions2, moptions2.prefix_extractor.get(),
4299                          EnvOptions(),
4300                          GetPlainInternalComparator(options2.comparator)),
4301       std::move(file_reader), ss_rw.contents().size(), &table_reader));
4302 
4303   std::unique_ptr<InternalIterator> db_iter(table_reader->NewIterator(
4304       ReadOptions(), moptions2.prefix_extractor.get(), /*arena=*/nullptr,
4305       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4306 
4307   int expected_key = 1;
4308   for (db_iter->SeekToFirst(); db_iter->Valid(); db_iter->Next()) {
4309     std::ostringstream ostr;
4310     ostr << std::setfill('0') << std::setw(5) << expected_key++;
4311     std::string key = ostr.str();
4312     std::string value = "val";
4313 
4314     ASSERT_OK(db_iter->status());
4315     ASSERT_EQ(ExtractUserKey(db_iter->key()).ToString(), key);
4316     ASSERT_EQ(db_iter->value().ToString(), value);
4317   }
4318   expected_key--;
4319   ASSERT_EQ(expected_key, 10000);
4320   table_reader.reset();
4321 }
4322 
TEST_P(BlockBasedTableTest,PropertiesBlockRestartPointTest)4323 TEST_P(BlockBasedTableTest, PropertiesBlockRestartPointTest) {
4324   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4325   bbto.block_align = true;
4326   test::StringSink* sink = new test::StringSink();
4327   std::unique_ptr<WritableFileWriter> file_writer(
4328       test::GetWritableFileWriter(sink, "" /* don't care */));
4329 
4330   Options options;
4331   options.compression = kNoCompression;
4332   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4333 
4334   const ImmutableCFOptions ioptions(options);
4335   const MutableCFOptions moptions(options);
4336   InternalKeyComparator ikc(options.comparator);
4337   std::vector<std::unique_ptr<IntTblPropCollectorFactory>>
4338       int_tbl_prop_collector_factories;
4339   std::string column_family_name;
4340 
4341   std::unique_ptr<TableBuilder> builder(options.table_factory->NewTableBuilder(
4342       TableBuilderOptions(ioptions, moptions, ikc,
4343                           &int_tbl_prop_collector_factories, kNoCompression,
4344                           0 /* sample_for_compression */, CompressionOptions(),
4345                           false /* skip_filters */, column_family_name, -1),
4346       TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
4347       file_writer.get()));
4348 
4349   for (int i = 1; i <= 10000; ++i) {
4350     std::ostringstream ostr;
4351     ostr << std::setfill('0') << std::setw(5) << i;
4352     std::string key = ostr.str();
4353     std::string value = "val";
4354     InternalKey ik(key, 0, kTypeValue);
4355 
4356     builder->Add(ik.Encode(), value);
4357   }
4358   ASSERT_OK(builder->Finish());
4359   file_writer->Flush();
4360 
4361   test::RandomRWStringSink ss_rw(sink);
4362   std::unique_ptr<RandomAccessFileReader> file_reader(
4363       test::GetRandomAccessFileReader(
4364           new test::StringSource(ss_rw.contents(), 73342, true)));
4365 
4366   {
4367     RandomAccessFileReader* file = file_reader.get();
4368     uint64_t file_size = ss_rw.contents().size();
4369 
4370     Footer footer;
4371     ASSERT_OK(ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
4372                                  &footer, kBlockBasedTableMagicNumber));
4373 
4374     auto BlockFetchHelper = [&](const BlockHandle& handle, BlockType block_type,
4375                                 BlockContents* contents) {
4376       ReadOptions read_options;
4377       read_options.verify_checksums = false;
4378       PersistentCacheOptions cache_options;
4379 
4380       BlockFetcher block_fetcher(
4381           file, nullptr /* prefetch_buffer */, footer, read_options, handle,
4382           contents, ioptions, false /* decompress */,
4383           false /*maybe_compressed*/, block_type,
4384           UncompressionDict::GetEmptyDict(), cache_options);
4385 
4386       ASSERT_OK(block_fetcher.ReadBlockContents());
4387     };
4388 
4389     // -- Read metaindex block
4390     auto metaindex_handle = footer.metaindex_handle();
4391     BlockContents metaindex_contents;
4392 
4393     BlockFetchHelper(metaindex_handle, BlockType::kMetaIndex,
4394                      &metaindex_contents);
4395     Block metaindex_block(std::move(metaindex_contents));
4396 
4397     std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
4398         BytewiseComparator(), BytewiseComparator(),
4399         kDisableGlobalSequenceNumber));
4400     bool found_properties_block = true;
4401     ASSERT_OK(SeekToPropertiesBlock(meta_iter.get(), &found_properties_block));
4402     ASSERT_TRUE(found_properties_block);
4403 
4404     // -- Read properties block
4405     Slice v = meta_iter->value();
4406     BlockHandle properties_handle;
4407     ASSERT_OK(properties_handle.DecodeFrom(&v));
4408     BlockContents properties_contents;
4409 
4410     BlockFetchHelper(properties_handle, BlockType::kProperties,
4411                      &properties_contents);
4412     Block properties_block(std::move(properties_contents));
4413 
4414     ASSERT_EQ(properties_block.NumRestarts(), 1u);
4415   }
4416 }
4417 
TEST_P(BlockBasedTableTest,PropertiesMetaBlockLast)4418 TEST_P(BlockBasedTableTest, PropertiesMetaBlockLast) {
4419   // The properties meta-block should come at the end since we always need to
4420   // read it when opening a file, unlike index/filter/other meta-blocks, which
4421   // are sometimes read depending on the user's configuration. This ordering
4422   // allows us to do a small readahead on the end of the file to read properties
4423   // and meta-index blocks with one I/O.
4424   TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
4425   c.Add("a1", "val1");
4426   c.Add("b2", "val2");
4427   c.Add("c3", "val3");
4428   c.Add("d4", "val4");
4429   c.Add("e5", "val5");
4430   c.Add("f6", "val6");
4431   c.Add("g7", "val7");
4432   c.Add("h8", "val8");
4433   c.Add("j9", "val9");
4434 
4435   // write an SST file
4436   Options options;
4437   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
4438   table_options.filter_policy.reset(NewBloomFilterPolicy(
4439       8 /* bits_per_key */, false /* use_block_based_filter */));
4440   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
4441   ImmutableCFOptions ioptions(options);
4442   MutableCFOptions moptions(options);
4443   std::vector<std::string> keys;
4444   stl_wrappers::KVMap kvmap;
4445   c.Finish(options, ioptions, moptions, table_options,
4446            GetPlainInternalComparator(options.comparator), &keys, &kvmap);
4447 
4448   // get file reader
4449   test::StringSink* table_sink = c.TEST_GetSink();
4450   std::unique_ptr<RandomAccessFileReader> table_reader{
4451       test::GetRandomAccessFileReader(
4452           new test::StringSource(table_sink->contents(), 0 /* unique_id */,
4453                                  false /* allow_mmap_reads */))};
4454   size_t table_size = table_sink->contents().size();
4455 
4456   // read footer
4457   Footer footer;
4458   ASSERT_OK(ReadFooterFromFile(table_reader.get(),
4459                                nullptr /* prefetch_buffer */, table_size,
4460                                &footer, kBlockBasedTableMagicNumber));
4461 
4462   // read metaindex
4463   auto metaindex_handle = footer.metaindex_handle();
4464   BlockContents metaindex_contents;
4465   PersistentCacheOptions pcache_opts;
4466   BlockFetcher block_fetcher(
4467       table_reader.get(), nullptr /* prefetch_buffer */, footer, ReadOptions(),
4468       metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
4469       false /*maybe_compressed*/, BlockType::kMetaIndex,
4470       UncompressionDict::GetEmptyDict(), pcache_opts,
4471       nullptr /*memory_allocator*/);
4472   ASSERT_OK(block_fetcher.ReadBlockContents());
4473   Block metaindex_block(std::move(metaindex_contents));
4474 
4475   // verify properties block comes last
4476   std::unique_ptr<InternalIterator> metaindex_iter{
4477       metaindex_block.NewDataIterator(options.comparator, options.comparator,
4478                                       kDisableGlobalSequenceNumber)};
4479   uint64_t max_offset = 0;
4480   std::string key_at_max_offset;
4481   for (metaindex_iter->SeekToFirst(); metaindex_iter->Valid();
4482        metaindex_iter->Next()) {
4483     BlockHandle handle;
4484     Slice value = metaindex_iter->value();
4485     ASSERT_OK(handle.DecodeFrom(&value));
4486     if (handle.offset() > max_offset) {
4487       max_offset = handle.offset();
4488       key_at_max_offset = metaindex_iter->key().ToString();
4489     }
4490   }
4491   ASSERT_EQ(kPropertiesBlock, key_at_max_offset);
4492   // index handle is stored in footer rather than metaindex block, so need
4493   // separate logic to verify it comes before properties block.
4494   ASSERT_GT(max_offset, footer.index_handle().offset());
4495   c.ResetTableReader();
4496 }
4497 
TEST_P(BlockBasedTableTest,BadOptions)4498 TEST_P(BlockBasedTableTest, BadOptions) {
4499   ROCKSDB_NAMESPACE::Options options;
4500   options.compression = kNoCompression;
4501   BlockBasedTableOptions bbto = GetBlockBasedTableOptions();
4502   bbto.block_size = 4000;
4503   bbto.block_align = true;
4504 
4505   const std::string kDBPath =
4506       test::PerThreadDBPath("block_based_table_bad_options_test");
4507   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4508   DestroyDB(kDBPath, options);
4509   ROCKSDB_NAMESPACE::DB* db;
4510   ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4511 
4512   bbto.block_size = 4096;
4513   options.compression = kSnappyCompression;
4514   options.table_factory.reset(NewBlockBasedTableFactory(bbto));
4515   ASSERT_NOK(ROCKSDB_NAMESPACE::DB::Open(options, kDBPath, &db));
4516 }
4517 
TEST_F(BBTTailPrefetchTest,TestTailPrefetchStats)4518 TEST_F(BBTTailPrefetchTest, TestTailPrefetchStats) {
4519   TailPrefetchStats tpstats;
4520   ASSERT_EQ(0, tpstats.GetSuggestedPrefetchSize());
4521   tpstats.RecordEffectiveSize(size_t{1000});
4522   tpstats.RecordEffectiveSize(size_t{1005});
4523   tpstats.RecordEffectiveSize(size_t{1002});
4524   ASSERT_EQ(1005, tpstats.GetSuggestedPrefetchSize());
4525 
4526   // One single super large value shouldn't influence much
4527   tpstats.RecordEffectiveSize(size_t{1002000});
4528   tpstats.RecordEffectiveSize(size_t{999});
4529   ASSERT_LE(1005, tpstats.GetSuggestedPrefetchSize());
4530   ASSERT_GT(1200, tpstats.GetSuggestedPrefetchSize());
4531 
4532   // Only history of 32 is kept
4533   for (int i = 0; i < 32; i++) {
4534     tpstats.RecordEffectiveSize(size_t{100});
4535   }
4536   ASSERT_EQ(100, tpstats.GetSuggestedPrefetchSize());
4537 
4538   // 16 large values and 16 small values. The result should be closer
4539   // to the small value as the algorithm.
4540   for (int i = 0; i < 16; i++) {
4541     tpstats.RecordEffectiveSize(size_t{1000});
4542   }
4543   tpstats.RecordEffectiveSize(size_t{10});
4544   tpstats.RecordEffectiveSize(size_t{20});
4545   for (int i = 0; i < 6; i++) {
4546     tpstats.RecordEffectiveSize(size_t{100});
4547   }
4548   ASSERT_LE(80, tpstats.GetSuggestedPrefetchSize());
4549   ASSERT_GT(200, tpstats.GetSuggestedPrefetchSize());
4550 }
4551 
TEST_F(BBTTailPrefetchTest,FilePrefetchBufferMinOffset)4552 TEST_F(BBTTailPrefetchTest, FilePrefetchBufferMinOffset) {
4553   TailPrefetchStats tpstats;
4554   FilePrefetchBuffer buffer(nullptr, 0, 0, false, true);
4555   buffer.TryReadFromCache(500, 10, nullptr);
4556   buffer.TryReadFromCache(480, 10, nullptr);
4557   buffer.TryReadFromCache(490, 10, nullptr);
4558   ASSERT_EQ(480, buffer.min_offset_read());
4559 }
4560 
TEST_P(BlockBasedTableTest,DataBlockHashIndex)4561 TEST_P(BlockBasedTableTest, DataBlockHashIndex) {
4562   const int kNumKeys = 500;
4563   const int kKeySize = 8;
4564   const int kValSize = 40;
4565 
4566   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
4567   table_options.data_block_index_type =
4568       BlockBasedTableOptions::kDataBlockBinaryAndHash;
4569 
4570   Options options;
4571   options.comparator = BytewiseComparator();
4572 
4573   options.table_factory.reset(new BlockBasedTableFactory(table_options));
4574 
4575   TableConstructor c(options.comparator);
4576 
4577   static Random rnd(1048);
4578   for (int i = 0; i < kNumKeys; i++) {
4579     // padding one "0" to mark existent keys.
4580     std::string random_key(RandomString(&rnd, kKeySize - 1) + "1");
4581     InternalKey k(random_key, 0, kTypeValue);
4582     c.Add(k.Encode().ToString(), RandomString(&rnd, kValSize));
4583   }
4584 
4585   std::vector<std::string> keys;
4586   stl_wrappers::KVMap kvmap;
4587   const ImmutableCFOptions ioptions(options);
4588   const MutableCFOptions moptions(options);
4589   const InternalKeyComparator internal_comparator(options.comparator);
4590   c.Finish(options, ioptions, moptions, table_options, internal_comparator,
4591            &keys, &kvmap);
4592 
4593   auto reader = c.GetTableReader();
4594 
4595   std::unique_ptr<InternalIterator> seek_iter;
4596   seek_iter.reset(reader->NewIterator(
4597       ReadOptions(), moptions.prefix_extractor.get(), /*arena=*/nullptr,
4598       /*skip_filters=*/false, TableReaderCaller::kUncategorized));
4599   for (int i = 0; i < 2; ++i) {
4600     ReadOptions ro;
4601     // for every kv, we seek using two method: Get() and Seek()
4602     // Get() will use the SuffixIndexHash in Block. For non-existent key it
4603     //      will invalidate the iterator
4604     // Seek() will use the default BinarySeek() in Block. So for non-existent
4605     //      key it will land at the closest key that is large than target.
4606 
4607     // Search for existent keys
4608     for (auto& kv : kvmap) {
4609       if (i == 0) {
4610         // Search using Seek()
4611         seek_iter->Seek(kv.first);
4612         ASSERT_OK(seek_iter->status());
4613         ASSERT_TRUE(seek_iter->Valid());
4614         ASSERT_EQ(seek_iter->key(), kv.first);
4615         ASSERT_EQ(seek_iter->value(), kv.second);
4616       } else {
4617         // Search using Get()
4618         PinnableSlice value;
4619         std::string user_key = ExtractUserKey(kv.first).ToString();
4620         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
4621                                GetContext::kNotFound, user_key, &value, nullptr,
4622                                nullptr, true, nullptr, nullptr);
4623         ASSERT_OK(reader->Get(ro, kv.first, &get_context,
4624                               moptions.prefix_extractor.get()));
4625         ASSERT_EQ(get_context.State(), GetContext::kFound);
4626         ASSERT_EQ(value, Slice(kv.second));
4627         value.Reset();
4628       }
4629     }
4630 
4631     // Search for non-existent keys
4632     for (auto& kv : kvmap) {
4633       std::string user_key = ExtractUserKey(kv.first).ToString();
4634       user_key.back() = '0';  // make it non-existent key
4635       InternalKey internal_key(user_key, 0, kTypeValue);
4636       std::string encoded_key = internal_key.Encode().ToString();
4637       if (i == 0) {  // Search using Seek()
4638         seek_iter->Seek(encoded_key);
4639         ASSERT_OK(seek_iter->status());
4640         if (seek_iter->Valid()) {
4641           ASSERT_TRUE(BytewiseComparator()->Compare(
4642                           user_key, ExtractUserKey(seek_iter->key())) < 0);
4643         }
4644       } else {  // Search using Get()
4645         PinnableSlice value;
4646         GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
4647                                GetContext::kNotFound, user_key, &value, nullptr,
4648                                nullptr, true, nullptr, nullptr);
4649         ASSERT_OK(reader->Get(ro, encoded_key, &get_context,
4650                               moptions.prefix_extractor.get()));
4651         ASSERT_EQ(get_context.State(), GetContext::kNotFound);
4652         value.Reset();
4653       }
4654     }
4655   }
4656 }
4657 
4658 // BlockBasedTableIterator should invalidate itself and return
4659 // OutOfBound()=true immediately after Seek(), to allow LevelIterator
4660 // filter out corresponding level.
TEST_P(BlockBasedTableTest,OutOfBoundOnSeek)4661 TEST_P(BlockBasedTableTest, OutOfBoundOnSeek) {
4662   TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
4663   c.Add("foo", "v1");
4664   std::vector<std::string> keys;
4665   stl_wrappers::KVMap kvmap;
4666   Options options;
4667   BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
4668   options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
4669   const ImmutableCFOptions ioptions(options);
4670   const MutableCFOptions moptions(options);
4671   c.Finish(options, ioptions, moptions, table_opt,
4672            GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
4673   auto* reader = c.GetTableReader();
4674   ReadOptions read_opt;
4675   std::string upper_bound = "bar";
4676   Slice upper_bound_slice(upper_bound);
4677   read_opt.iterate_upper_bound = &upper_bound_slice;
4678   std::unique_ptr<InternalIterator> iter;
4679   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4680       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4681       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4682   iter->SeekToFirst();
4683   ASSERT_FALSE(iter->Valid());
4684   ASSERT_TRUE(iter->IsOutOfBound());
4685   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4686       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4687       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4688   iter->Seek("foo");
4689   ASSERT_FALSE(iter->Valid());
4690   ASSERT_TRUE(iter->IsOutOfBound());
4691 }
4692 
4693 // BlockBasedTableIterator should invalidate itself and return
4694 // OutOfBound()=true after Next(), if it finds current index key is no smaller
4695 // than upper bound, unless it is pointing to the last data block.
TEST_P(BlockBasedTableTest,OutOfBoundOnNext)4696 TEST_P(BlockBasedTableTest, OutOfBoundOnNext) {
4697   TableConstructor c(BytewiseComparator(), true /*convert_to_internal_key*/);
4698   c.Add("bar", "v");
4699   c.Add("foo", "v");
4700   std::vector<std::string> keys;
4701   stl_wrappers::KVMap kvmap;
4702   Options options;
4703   BlockBasedTableOptions table_opt(GetBlockBasedTableOptions());
4704   table_opt.flush_block_policy_factory =
4705       std::make_shared<FlushBlockEveryKeyPolicyFactory>();
4706   options.table_factory.reset(NewBlockBasedTableFactory(table_opt));
4707   const ImmutableCFOptions ioptions(options);
4708   const MutableCFOptions moptions(options);
4709   c.Finish(options, ioptions, moptions, table_opt,
4710            GetPlainInternalComparator(BytewiseComparator()), &keys, &kvmap);
4711   auto* reader = c.GetTableReader();
4712   ReadOptions read_opt;
4713   std::string ub1 = "bar_after";
4714   Slice ub_slice1(ub1);
4715   read_opt.iterate_upper_bound = &ub_slice1;
4716   std::unique_ptr<InternalIterator> iter;
4717   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4718       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4719       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4720   iter->Seek("bar");
4721   ASSERT_TRUE(iter->Valid());
4722   ASSERT_EQ("bar", iter->key());
4723   iter->Next();
4724   ASSERT_FALSE(iter->Valid());
4725   ASSERT_TRUE(iter->IsOutOfBound());
4726   std::string ub2 = "foo_after";
4727   Slice ub_slice2(ub2);
4728   read_opt.iterate_upper_bound = &ub_slice2;
4729   iter.reset(new KeyConvertingIterator(reader->NewIterator(
4730       read_opt, /*prefix_extractor=*/nullptr, /*arena=*/nullptr,
4731       /*skip_filters=*/false, TableReaderCaller::kUncategorized)));
4732   iter->Seek("foo");
4733   ASSERT_TRUE(iter->Valid());
4734   ASSERT_EQ("foo", iter->key());
4735   iter->Next();
4736   ASSERT_FALSE(iter->Valid());
4737   ASSERT_FALSE(iter->IsOutOfBound());
4738 }
4739 
4740 }  // namespace ROCKSDB_NAMESPACE
4741 
main(int argc,char ** argv)4742 int main(int argc, char** argv) {
4743   ::testing::InitGoogleTest(&argc, argv);
4744   return RUN_ALL_TESTS();
4745 }
4746