1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #pragma once
11 #include <stdint.h>
12 
13 #include <memory>
14 #include <string>
15 
16 #include "db/dbformat.h"
17 #include "options/options_helper.h"
18 #include "options/options_parser.h"
19 #include "rocksdb/flush_block_policy.h"
20 #include "rocksdb/table.h"
21 
22 namespace ROCKSDB_NAMESPACE {
23 
24 struct EnvOptions;
25 
26 class BlockBasedTableBuilder;
27 
28 // A class used to track actual bytes written from the tail in the recent SST
29 // file opens, and provide a suggestion for following open.
30 class TailPrefetchStats {
31  public:
32   void RecordEffectiveSize(size_t len);
33   // 0 indicates no information to determine.
34   size_t GetSuggestedPrefetchSize();
35 
36  private:
37   const static size_t kNumTracked = 32;
38   size_t records_[kNumTracked];
39   port::Mutex mutex_;
40   size_t next_ = 0;
41   size_t num_records_ = 0;
42 };
43 
44 class BlockBasedTableFactory : public TableFactory {
45  public:
46   explicit BlockBasedTableFactory(
47       const BlockBasedTableOptions& table_options = BlockBasedTableOptions());
48 
~BlockBasedTableFactory()49   ~BlockBasedTableFactory() {}
50 
Name()51   const char* Name() const override { return kName.c_str(); }
52 
53   Status NewTableReader(
54       const TableReaderOptions& table_reader_options,
55       std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
56       std::unique_ptr<TableReader>* table_reader,
57       bool prefetch_index_and_filter_in_cache = true) const override;
58 
59   TableBuilder* NewTableBuilder(
60       const TableBuilderOptions& table_builder_options,
61       uint32_t column_family_id, WritableFileWriter* file) const override;
62 
63   // Sanitizes the specified DB Options.
64   Status SanitizeOptions(const DBOptions& db_opts,
65                          const ColumnFamilyOptions& cf_opts) const override;
66 
67   std::string GetPrintableTableOptions() const override;
68 
69   Status GetOptionString(std::string* opt_string,
70                          const std::string& delimiter) const override;
71 
72   const BlockBasedTableOptions& table_options() const;
73 
GetOptions()74   void* GetOptions() override { return &table_options_; }
75 
IsDeleteRangeSupported()76   bool IsDeleteRangeSupported() const override { return true; }
77 
78   static const std::string kName;
79 
80  private:
81   BlockBasedTableOptions table_options_;
82   mutable TailPrefetchStats tail_prefetch_stats_;
83 };
84 
85 extern const std::string kHashIndexPrefixesBlock;
86 extern const std::string kHashIndexPrefixesMetadataBlock;
87 extern const std::string kPropTrue;
88 extern const std::string kPropFalse;
89 
90 #ifndef ROCKSDB_LITE
91 extern Status VerifyBlockBasedTableFactory(
92     const BlockBasedTableFactory* base_tf,
93     const BlockBasedTableFactory* file_tf,
94     OptionsSanityCheckLevel sanity_check_level);
95 
96 static std::unordered_map<std::string, OptionTypeInfo>
97     block_based_table_type_info = {
98         /* currently not supported
99           std::shared_ptr<Cache> block_cache = nullptr;
100           std::shared_ptr<Cache> block_cache_compressed = nullptr;
101          */
102         {"flush_block_policy_factory",
103          {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory),
104           OptionType::kFlushBlockPolicyFactory, OptionVerificationType::kByName,
105           false, 0}},
106         {"cache_index_and_filter_blocks",
107          {offsetof(struct BlockBasedTableOptions,
108                    cache_index_and_filter_blocks),
109           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
110         {"cache_index_and_filter_blocks_with_high_priority",
111          {offsetof(struct BlockBasedTableOptions,
112                    cache_index_and_filter_blocks_with_high_priority),
113           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
114         {"pin_l0_filter_and_index_blocks_in_cache",
115          {offsetof(struct BlockBasedTableOptions,
116                    pin_l0_filter_and_index_blocks_in_cache),
117           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
118         {"index_type",
119          {offsetof(struct BlockBasedTableOptions, index_type),
120           OptionType::kBlockBasedTableIndexType,
121           OptionVerificationType::kNormal, false, 0}},
122         {"hash_index_allow_collision",
123          {offsetof(struct BlockBasedTableOptions, hash_index_allow_collision),
124           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
125         {"data_block_index_type",
126          {offsetof(struct BlockBasedTableOptions, data_block_index_type),
127           OptionType::kBlockBasedTableDataBlockIndexType,
128           OptionVerificationType::kNormal, false, 0}},
129         {"index_shortening",
130          {offsetof(struct BlockBasedTableOptions, index_shortening),
131           OptionType::kBlockBasedTableIndexShorteningMode,
132           OptionVerificationType::kNormal, false, 0}},
133         {"data_block_hash_table_util_ratio",
134          {offsetof(struct BlockBasedTableOptions,
135                    data_block_hash_table_util_ratio),
136           OptionType::kDouble, OptionVerificationType::kNormal, false, 0}},
137         {"checksum",
138          {offsetof(struct BlockBasedTableOptions, checksum),
139           OptionType::kChecksumType, OptionVerificationType::kNormal, false,
140           0}},
141         {"no_block_cache",
142          {offsetof(struct BlockBasedTableOptions, no_block_cache),
143           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
144         {"block_size",
145          {offsetof(struct BlockBasedTableOptions, block_size),
146           OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}},
147         {"block_size_deviation",
148          {offsetof(struct BlockBasedTableOptions, block_size_deviation),
149           OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
150         {"block_restart_interval",
151          {offsetof(struct BlockBasedTableOptions, block_restart_interval),
152           OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
153         {"index_block_restart_interval",
154          {offsetof(struct BlockBasedTableOptions, index_block_restart_interval),
155           OptionType::kInt, OptionVerificationType::kNormal, false, 0}},
156         {"index_per_partition",
157          {0, OptionType::kUInt64T, OptionVerificationType::kDeprecated, false,
158           0}},
159         {"metadata_block_size",
160          {offsetof(struct BlockBasedTableOptions, metadata_block_size),
161           OptionType::kUInt64T, OptionVerificationType::kNormal, false, 0}},
162         {"partition_filters",
163          {offsetof(struct BlockBasedTableOptions, partition_filters),
164           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
165         {"filter_policy",
166          {offsetof(struct BlockBasedTableOptions, filter_policy),
167           OptionType::kFilterPolicy, OptionVerificationType::kByName, false,
168           0}},
169         {"whole_key_filtering",
170          {offsetof(struct BlockBasedTableOptions, whole_key_filtering),
171           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
172         {"skip_table_builder_flush",
173          {0, OptionType::kBoolean, OptionVerificationType::kDeprecated, false,
174           0}},
175         {"format_version",
176          {offsetof(struct BlockBasedTableOptions, format_version),
177           OptionType::kUInt32T, OptionVerificationType::kNormal, false, 0}},
178         {"verify_compression",
179          {offsetof(struct BlockBasedTableOptions, verify_compression),
180           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
181         {"read_amp_bytes_per_bit",
182          {offsetof(struct BlockBasedTableOptions, read_amp_bytes_per_bit),
183           OptionType::kSizeT, OptionVerificationType::kNormal, false, 0}},
184         {"enable_index_compression",
185          {offsetof(struct BlockBasedTableOptions, enable_index_compression),
186           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
187         {"block_align",
188          {offsetof(struct BlockBasedTableOptions, block_align),
189           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}},
190         {"pin_top_level_index_and_filter",
191          {offsetof(struct BlockBasedTableOptions,
192                    pin_top_level_index_and_filter),
193           OptionType::kBoolean, OptionVerificationType::kNormal, false, 0}}};
194 #endif  // !ROCKSDB_LITE
195 }  // namespace ROCKSDB_NAMESPACE
196