1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include <stdint.h>
11 #include <cinttypes>
12 
13 #include <memory>
14 #include <string>
15 
16 #include "options/options_helper.h"
17 #include "port/port.h"
18 #include "rocksdb/cache.h"
19 #include "rocksdb/convenience.h"
20 #include "rocksdb/flush_block_policy.h"
21 #include "table/block_based/block_based_table_builder.h"
22 #include "table/block_based/block_based_table_factory.h"
23 #include "table/block_based/block_based_table_reader.h"
24 #include "table/format.h"
25 #include "util/mutexlock.h"
26 #include "util/string_util.h"
27 
28 namespace ROCKSDB_NAMESPACE {
29 
RecordEffectiveSize(size_t len)30 void TailPrefetchStats::RecordEffectiveSize(size_t len) {
31   MutexLock l(&mutex_);
32   if (num_records_ < kNumTracked) {
33     num_records_++;
34   }
35   records_[next_++] = len;
36   if (next_ == kNumTracked) {
37     next_ = 0;
38   }
39 }
40 
GetSuggestedPrefetchSize()41 size_t TailPrefetchStats::GetSuggestedPrefetchSize() {
42   std::vector<size_t> sorted;
43   {
44     MutexLock l(&mutex_);
45 
46     if (num_records_ == 0) {
47       return 0;
48     }
49     sorted.assign(records_, records_ + num_records_);
50   }
51 
52   // Of the historic size, we find the maximum one that satisifis the condtiion
53   // that if prefetching all, less than 1/8 will be wasted.
54   std::sort(sorted.begin(), sorted.end());
55 
56   // Assuming we have 5 data points, and after sorting it looks like this:
57   //
58   //                                     +---+
59   //                             +---+   |   |
60   //                             |   |   |   |
61   //                             |   |   |   |
62   //                             |   |   |   |
63   //                             |   |   |   |
64   //                    +---+    |   |   |   |
65   //                    |   |    |   |   |   |
66   //           +---+    |   |    |   |   |   |
67   //           |   |    |   |    |   |   |   |
68   //  +---+    |   |    |   |    |   |   |   |
69   //  |   |    |   |    |   |    |   |   |   |
70   //  |   |    |   |    |   |    |   |   |   |
71   //  |   |    |   |    |   |    |   |   |   |
72   //  |   |    |   |    |   |    |   |   |   |
73   //  |   |    |   |    |   |    |   |   |   |
74   //  +---+    +---+    +---+    +---+   +---+
75   //
76   // and we use every of the value as a candidate, and estimate how much we
77   // wasted, compared to read. For example, when we use the 3rd record
78   // as candiate. This area is what we read:
79   //                                     +---+
80   //                             +---+   |   |
81   //                             |   |   |   |
82   //                             |   |   |   |
83   //                             |   |   |   |
84   //                             |   |   |   |
85   //  ***  ***  ***  ***+ ***  ***  *** *** **
86   //  *                 |   |    |   |   |   |
87   //           +---+    |   |    |   |   |   *
88   //  *        |   |    |   |    |   |   |   |
89   //  +---+    |   |    |   |    |   |   |   *
90   //  *   |    |   |    | X |    |   |   |   |
91   //  |   |    |   |    |   |    |   |   |   *
92   //  *   |    |   |    |   |    |   |   |   |
93   //  |   |    |   |    |   |    |   |   |   *
94   //  *   |    |   |    |   |    |   |   |   |
95   //  *** *** ***-***  ***--*** ***--*** +****
96   // which is (size of the record) X (number of records).
97   //
98   // While wasted is this area:
99   //                                     +---+
100   //                             +---+   |   |
101   //                             |   |   |   |
102   //                             |   |   |   |
103   //                             |   |   |   |
104   //                             |   |   |   |
105   //  ***  ***  ***  ****---+    |   |   |   |
106   //  *                 *   |    |   |   |   |
107   //  *        *-***  ***   |    |   |   |   |
108   //  *        *   |    |   |    |   |   |   |
109   //  *--**  ***   |    |   |    |   |   |   |
110   //  |   |    |   |    | X |    |   |   |   |
111   //  |   |    |   |    |   |    |   |   |   |
112   //  |   |    |   |    |   |    |   |   |   |
113   //  |   |    |   |    |   |    |   |   |   |
114   //  |   |    |   |    |   |    |   |   |   |
115   //  +---+    +---+    +---+    +---+   +---+
116   //
117   // Which can be calculated iteratively.
118   // The difference between wasted using 4st and 3rd record, will
119   // be following area:
120   //                                     +---+
121   //  +--+  +-+   ++  +-+  +-+   +---+   |   |
122   //  + xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
123   //    xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
124   //  + xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
125   //  | xxxxxxxxxxxxxxxxxxxxxxxx |   |   |   |
126   //  +-+ +-+  +-+  ++  +---+ +--+   |   |   |
127   //  |                 |   |    |   |   |   |
128   //           +---+ ++ |   |    |   |   |   |
129   //  |        |   |    |   |    | X |   |   |
130   //  +---+ ++ |   |    |   |    |   |   |   |
131   //  |   |    |   |    |   |    |   |   |   |
132   //  |   |    |   |    |   |    |   |   |   |
133   //  |   |    |   |    |   |    |   |   |   |
134   //  |   |    |   |    |   |    |   |   |   |
135   //  |   |    |   |    |   |    |   |   |   |
136   //  +---+    +---+    +---+    +---+   +---+
137   //
138   // which will be the size difference between 4st and 3rd record,
139   // times 3, which is number of records before the 4st.
140   // Here we assume that all data within the prefetch range will be useful. In
141   // reality, it may not be the case when a partial block is inside the range,
142   // or there are data in the middle that is not read. We ignore those cases
143   // for simplicity.
144   assert(!sorted.empty());
145   size_t prev_size = sorted[0];
146   size_t max_qualified_size = sorted[0];
147   size_t wasted = 0;
148   for (size_t i = 1; i < sorted.size(); i++) {
149     size_t read = sorted[i] * sorted.size();
150     wasted += (sorted[i] - prev_size) * i;
151     if (wasted <= read / 8) {
152       max_qualified_size = sorted[i];
153     }
154     prev_size = sorted[i];
155   }
156   const size_t kMaxPrefetchSize = 512 * 1024;  // Never exceed 512KB
157   return std::min(kMaxPrefetchSize, max_qualified_size);
158 }
159 
160 // TODO(myabandeh): We should return an error instead of silently changing the
161 // options
BlockBasedTableFactory(const BlockBasedTableOptions & _table_options)162 BlockBasedTableFactory::BlockBasedTableFactory(
163     const BlockBasedTableOptions& _table_options)
164     : table_options_(_table_options) {
165   if (table_options_.flush_block_policy_factory == nullptr) {
166     table_options_.flush_block_policy_factory.reset(
167         new FlushBlockBySizePolicyFactory());
168   }
169   if (table_options_.no_block_cache) {
170     table_options_.block_cache.reset();
171   } else if (table_options_.block_cache == nullptr) {
172     LRUCacheOptions co;
173     co.capacity = 8 << 20;
174     // It makes little sense to pay overhead for mid-point insertion while the
175     // block size is only 8MB.
176     co.high_pri_pool_ratio = 0.0;
177     table_options_.block_cache = NewLRUCache(co);
178   }
179   if (table_options_.block_size_deviation < 0 ||
180       table_options_.block_size_deviation > 100) {
181     table_options_.block_size_deviation = 0;
182   }
183   if (table_options_.block_restart_interval < 1) {
184     table_options_.block_restart_interval = 1;
185   }
186   if (table_options_.index_block_restart_interval < 1) {
187     table_options_.index_block_restart_interval = 1;
188   }
189   if (table_options_.index_type == BlockBasedTableOptions::kHashSearch &&
190       table_options_.index_block_restart_interval != 1) {
191     // Currently kHashSearch is incompatible with index_block_restart_interval > 1
192     table_options_.index_block_restart_interval = 1;
193   }
194   if (table_options_.partition_filters &&
195       table_options_.index_type !=
196           BlockBasedTableOptions::kTwoLevelIndexSearch) {
197     // We do not support partitioned filters without partitioning indexes
198     table_options_.partition_filters = false;
199   }
200 }
201 
NewTableReader(const TableReaderOptions & table_reader_options,std::unique_ptr<RandomAccessFileReader> && file,uint64_t file_size,std::unique_ptr<TableReader> * table_reader,bool prefetch_index_and_filter_in_cache) const202 Status BlockBasedTableFactory::NewTableReader(
203     const TableReaderOptions& table_reader_options,
204     std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
205     std::unique_ptr<TableReader>* table_reader,
206     bool prefetch_index_and_filter_in_cache) const {
207   return BlockBasedTable::Open(
208       table_reader_options.ioptions, table_reader_options.env_options,
209       table_options_, table_reader_options.internal_comparator, std::move(file),
210       file_size, table_reader, table_reader_options.prefix_extractor,
211       prefetch_index_and_filter_in_cache, table_reader_options.skip_filters,
212       table_reader_options.level, table_reader_options.immortal,
213       table_reader_options.largest_seqno, &tail_prefetch_stats_,
214       table_reader_options.block_cache_tracer);
215 }
216 
NewTableBuilder(const TableBuilderOptions & table_builder_options,uint32_t column_family_id,WritableFileWriter * file) const217 TableBuilder* BlockBasedTableFactory::NewTableBuilder(
218     const TableBuilderOptions& table_builder_options, uint32_t column_family_id,
219     WritableFileWriter* file) const {
220   auto table_builder = new BlockBasedTableBuilder(
221       table_builder_options.ioptions, table_builder_options.moptions,
222       table_options_, table_builder_options.internal_comparator,
223       table_builder_options.int_tbl_prop_collector_factories, column_family_id,
224       file, table_builder_options.compression_type,
225       table_builder_options.sample_for_compression,
226       table_builder_options.compression_opts,
227       table_builder_options.skip_filters,
228       table_builder_options.column_family_name, table_builder_options.level,
229       table_builder_options.creation_time,
230       table_builder_options.oldest_key_time,
231       table_builder_options.target_file_size,
232       table_builder_options.file_creation_time);
233 
234   return table_builder;
235 }
236 
SanitizeOptions(const DBOptions & db_opts,const ColumnFamilyOptions & cf_opts) const237 Status BlockBasedTableFactory::SanitizeOptions(
238     const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts) const {
239   if (table_options_.index_type == BlockBasedTableOptions::kHashSearch &&
240       cf_opts.prefix_extractor == nullptr) {
241     return Status::InvalidArgument(
242         "Hash index is specified for block-based "
243         "table, but prefix_extractor is not given");
244   }
245   if (table_options_.cache_index_and_filter_blocks &&
246       table_options_.no_block_cache) {
247     return Status::InvalidArgument(
248         "Enable cache_index_and_filter_blocks, "
249         ", but block cache is disabled");
250   }
251   if (table_options_.pin_l0_filter_and_index_blocks_in_cache &&
252       table_options_.no_block_cache) {
253     return Status::InvalidArgument(
254         "Enable pin_l0_filter_and_index_blocks_in_cache, "
255         ", but block cache is disabled");
256   }
257   if (!BlockBasedTableSupportedVersion(table_options_.format_version)) {
258     return Status::InvalidArgument(
259         "Unsupported BlockBasedTable format_version. Please check "
260         "include/rocksdb/table.h for more info");
261   }
262   if (table_options_.block_align && (cf_opts.compression != kNoCompression)) {
263     return Status::InvalidArgument(
264         "Enable block_align, but compression "
265         "enabled");
266   }
267   if (table_options_.block_align &&
268       (table_options_.block_size & (table_options_.block_size - 1))) {
269     return Status::InvalidArgument(
270         "Block alignment requested but block size is not a power of 2");
271   }
272   if (table_options_.block_size > port::kMaxUint32) {
273     return Status::InvalidArgument(
274         "block size exceeds maximum number (4GiB) allowed");
275   }
276   if (table_options_.data_block_index_type ==
277           BlockBasedTableOptions::kDataBlockBinaryAndHash &&
278       table_options_.data_block_hash_table_util_ratio <= 0) {
279     return Status::InvalidArgument(
280         "data_block_hash_table_util_ratio should be greater than 0 when "
281         "data_block_index_type is set to kDataBlockBinaryAndHash");
282   }
283   if (db_opts.unordered_write && cf_opts.max_successive_merges > 0) {
284     // TODO(myabandeh): support it
285     return Status::InvalidArgument(
286         "max_successive_merges larger than 0 is currently inconsistent with "
287         "unordered_write");
288   }
289   return Status::OK();
290 }
291 
GetPrintableTableOptions() const292 std::string BlockBasedTableFactory::GetPrintableTableOptions() const {
293   std::string ret;
294   ret.reserve(20000);
295   const int kBufferSize = 200;
296   char buffer[kBufferSize];
297 
298   snprintf(buffer, kBufferSize, "  flush_block_policy_factory: %s (%p)\n",
299            table_options_.flush_block_policy_factory->Name(),
300            static_cast<void*>(table_options_.flush_block_policy_factory.get()));
301   ret.append(buffer);
302   snprintf(buffer, kBufferSize, "  cache_index_and_filter_blocks: %d\n",
303            table_options_.cache_index_and_filter_blocks);
304   ret.append(buffer);
305   snprintf(buffer, kBufferSize,
306            "  cache_index_and_filter_blocks_with_high_priority: %d\n",
307            table_options_.cache_index_and_filter_blocks_with_high_priority);
308   ret.append(buffer);
309   snprintf(buffer, kBufferSize,
310            "  pin_l0_filter_and_index_blocks_in_cache: %d\n",
311            table_options_.pin_l0_filter_and_index_blocks_in_cache);
312   ret.append(buffer);
313   snprintf(buffer, kBufferSize, "  pin_top_level_index_and_filter: %d\n",
314            table_options_.pin_top_level_index_and_filter);
315   ret.append(buffer);
316   snprintf(buffer, kBufferSize, "  index_type: %d\n",
317            table_options_.index_type);
318   ret.append(buffer);
319   snprintf(buffer, kBufferSize, "  data_block_index_type: %d\n",
320            table_options_.data_block_index_type);
321   ret.append(buffer);
322   snprintf(buffer, kBufferSize, "  index_shortening: %d\n",
323            static_cast<int>(table_options_.index_shortening));
324   ret.append(buffer);
325   snprintf(buffer, kBufferSize, "  data_block_hash_table_util_ratio: %lf\n",
326            table_options_.data_block_hash_table_util_ratio);
327   ret.append(buffer);
328   snprintf(buffer, kBufferSize, "  hash_index_allow_collision: %d\n",
329            table_options_.hash_index_allow_collision);
330   ret.append(buffer);
331   snprintf(buffer, kBufferSize, "  checksum: %d\n", table_options_.checksum);
332   ret.append(buffer);
333   snprintf(buffer, kBufferSize, "  no_block_cache: %d\n",
334            table_options_.no_block_cache);
335   ret.append(buffer);
336   snprintf(buffer, kBufferSize, "  block_cache: %p\n",
337            static_cast<void*>(table_options_.block_cache.get()));
338   ret.append(buffer);
339   if (table_options_.block_cache) {
340     const char* block_cache_name = table_options_.block_cache->Name();
341     if (block_cache_name != nullptr) {
342       snprintf(buffer, kBufferSize, "  block_cache_name: %s\n",
343                block_cache_name);
344       ret.append(buffer);
345     }
346     ret.append("  block_cache_options:\n");
347     ret.append(table_options_.block_cache->GetPrintableOptions());
348   }
349   snprintf(buffer, kBufferSize, "  block_cache_compressed: %p\n",
350            static_cast<void*>(table_options_.block_cache_compressed.get()));
351   ret.append(buffer);
352   if (table_options_.block_cache_compressed) {
353     const char* block_cache_compressed_name =
354         table_options_.block_cache_compressed->Name();
355     if (block_cache_compressed_name != nullptr) {
356       snprintf(buffer, kBufferSize, "  block_cache_name: %s\n",
357                block_cache_compressed_name);
358       ret.append(buffer);
359     }
360     ret.append("  block_cache_compressed_options:\n");
361     ret.append(table_options_.block_cache_compressed->GetPrintableOptions());
362   }
363   snprintf(buffer, kBufferSize, "  persistent_cache: %p\n",
364            static_cast<void*>(table_options_.persistent_cache.get()));
365   ret.append(buffer);
366   if (table_options_.persistent_cache) {
367     snprintf(buffer, kBufferSize, "  persistent_cache_options:\n");
368     ret.append(buffer);
369     ret.append(table_options_.persistent_cache->GetPrintableOptions());
370   }
371   snprintf(buffer, kBufferSize, "  block_size: %" ROCKSDB_PRIszt "\n",
372            table_options_.block_size);
373   ret.append(buffer);
374   snprintf(buffer, kBufferSize, "  block_size_deviation: %d\n",
375            table_options_.block_size_deviation);
376   ret.append(buffer);
377   snprintf(buffer, kBufferSize, "  block_restart_interval: %d\n",
378            table_options_.block_restart_interval);
379   ret.append(buffer);
380   snprintf(buffer, kBufferSize, "  index_block_restart_interval: %d\n",
381            table_options_.index_block_restart_interval);
382   ret.append(buffer);
383   snprintf(buffer, kBufferSize, "  metadata_block_size: %" PRIu64 "\n",
384            table_options_.metadata_block_size);
385   ret.append(buffer);
386   snprintf(buffer, kBufferSize, "  partition_filters: %d\n",
387            table_options_.partition_filters);
388   ret.append(buffer);
389   snprintf(buffer, kBufferSize, "  use_delta_encoding: %d\n",
390            table_options_.use_delta_encoding);
391   ret.append(buffer);
392   snprintf(buffer, kBufferSize, "  filter_policy: %s\n",
393            table_options_.filter_policy == nullptr
394                ? "nullptr"
395                : table_options_.filter_policy->Name());
396   ret.append(buffer);
397   snprintf(buffer, kBufferSize, "  whole_key_filtering: %d\n",
398            table_options_.whole_key_filtering);
399   ret.append(buffer);
400   snprintf(buffer, kBufferSize, "  verify_compression: %d\n",
401            table_options_.verify_compression);
402   ret.append(buffer);
403   snprintf(buffer, kBufferSize, "  read_amp_bytes_per_bit: %d\n",
404            table_options_.read_amp_bytes_per_bit);
405   ret.append(buffer);
406   snprintf(buffer, kBufferSize, "  format_version: %d\n",
407            table_options_.format_version);
408   ret.append(buffer);
409   snprintf(buffer, kBufferSize, "  enable_index_compression: %d\n",
410            table_options_.enable_index_compression);
411   ret.append(buffer);
412   snprintf(buffer, kBufferSize, "  block_align: %d\n",
413            table_options_.block_align);
414   ret.append(buffer);
415   return ret;
416 }
417 
418 #ifndef ROCKSDB_LITE
419 namespace {
SerializeSingleBlockBasedTableOption(std::string * opt_string,const BlockBasedTableOptions & bbt_options,const std::string & name,const std::string & delimiter)420 bool SerializeSingleBlockBasedTableOption(
421     std::string* opt_string, const BlockBasedTableOptions& bbt_options,
422     const std::string& name, const std::string& delimiter) {
423   auto iter = block_based_table_type_info.find(name);
424   if (iter == block_based_table_type_info.end()) {
425     return false;
426   }
427   auto& opt_info = iter->second;
428   const char* opt_address =
429       reinterpret_cast<const char*>(&bbt_options) + opt_info.offset;
430   std::string value;
431   bool result = SerializeSingleOptionHelper(opt_address, opt_info.type, &value);
432   if (result) {
433     *opt_string = name + "=" + value + delimiter;
434   }
435   return result;
436 }
437 }  // namespace
438 
GetOptionString(std::string * opt_string,const std::string & delimiter) const439 Status BlockBasedTableFactory::GetOptionString(
440     std::string* opt_string, const std::string& delimiter) const {
441   assert(opt_string);
442   opt_string->clear();
443   for (auto iter = block_based_table_type_info.begin();
444        iter != block_based_table_type_info.end(); ++iter) {
445     if (iter->second.verification == OptionVerificationType::kDeprecated) {
446       // If the option is no longer used in rocksdb and marked as deprecated,
447       // we skip it in the serialization.
448       continue;
449     }
450     std::string single_output;
451     bool result = SerializeSingleBlockBasedTableOption(
452         &single_output, table_options_, iter->first, delimiter);
453     assert(result);
454     if (result) {
455       opt_string->append(single_output);
456     }
457   }
458   return Status::OK();
459 }
460 #else
GetOptionString(std::string *,const std::string &) const461 Status BlockBasedTableFactory::GetOptionString(
462     std::string* /*opt_string*/, const std::string& /*delimiter*/) const {
463   return Status::OK();
464 }
465 #endif  // !ROCKSDB_LITE
466 
table_options() const467 const BlockBasedTableOptions& BlockBasedTableFactory::table_options() const {
468   return table_options_;
469 }
470 
471 #ifndef ROCKSDB_LITE
472 namespace {
ParseBlockBasedTableOption(const std::string & name,const std::string & org_value,BlockBasedTableOptions * new_options,bool input_strings_escaped=false,bool ignore_unknown_options=false)473 std::string ParseBlockBasedTableOption(const std::string& name,
474                                        const std::string& org_value,
475                                        BlockBasedTableOptions* new_options,
476                                        bool input_strings_escaped = false,
477                                        bool ignore_unknown_options = false) {
478   const std::string& value =
479       input_strings_escaped ? UnescapeOptionString(org_value) : org_value;
480   if (!input_strings_escaped) {
481     // if the input string is not escaped, it means this function is
482     // invoked from SetOptions, which takes the old format.
483     if (name == "block_cache" || name == "block_cache_compressed") {
484       // cache options can be specified in the following format
485       //   "block_cache={capacity=1M;num_shard_bits=4;
486       //    strict_capacity_limit=true;high_pri_pool_ratio=0.5;}"
487       // To support backward compatibility, the following format
488       // is also supported.
489       //   "block_cache=1M"
490       std::shared_ptr<Cache> cache;
491       // block_cache is specified in format block_cache=<cache_size>.
492       if (value.find('=') == std::string::npos) {
493         cache = NewLRUCache(ParseSizeT(value));
494       } else {
495         LRUCacheOptions cache_opts;
496         if (!ParseOptionHelper(reinterpret_cast<char*>(&cache_opts),
497                                OptionType::kLRUCacheOptions, value)) {
498           return "Invalid cache options";
499         }
500         cache = NewLRUCache(cache_opts);
501       }
502 
503       if (name == "block_cache") {
504         new_options->block_cache = cache;
505       } else {
506         new_options->block_cache_compressed = cache;
507       }
508       return "";
509     } else if (name == "filter_policy") {
510       // Expect the following format
511       // bloomfilter:int:bool
512       const std::string kName = "bloomfilter:";
513       if (value.compare(0, kName.size(), kName) != 0) {
514         return "Invalid filter policy name";
515       }
516       size_t pos = value.find(':', kName.size());
517       if (pos == std::string::npos) {
518         return "Invalid filter policy config, missing bits_per_key";
519       }
520       double bits_per_key =
521           ParseDouble(trim(value.substr(kName.size(), pos - kName.size())));
522       bool use_block_based_builder =
523           ParseBoolean("use_block_based_builder", trim(value.substr(pos + 1)));
524       new_options->filter_policy.reset(
525           NewBloomFilterPolicy(bits_per_key, use_block_based_builder));
526       return "";
527     }
528   }
529   const auto iter = block_based_table_type_info.find(name);
530   if (iter == block_based_table_type_info.end()) {
531     if (ignore_unknown_options) {
532       return "";
533     } else {
534       return "Unrecognized option";
535     }
536   }
537   const auto& opt_info = iter->second;
538   if (opt_info.verification != OptionVerificationType::kDeprecated &&
539       !ParseOptionHelper(reinterpret_cast<char*>(new_options) + opt_info.offset,
540                          opt_info.type, value)) {
541     return "Invalid value";
542   }
543   return "";
544 }
545 }  // namespace
546 
GetBlockBasedTableOptionsFromString(const BlockBasedTableOptions & table_options,const std::string & opts_str,BlockBasedTableOptions * new_table_options)547 Status GetBlockBasedTableOptionsFromString(
548     const BlockBasedTableOptions& table_options, const std::string& opts_str,
549     BlockBasedTableOptions* new_table_options) {
550   std::unordered_map<std::string, std::string> opts_map;
551   Status s = StringToMap(opts_str, &opts_map);
552   if (!s.ok()) {
553     return s;
554   }
555 
556   return GetBlockBasedTableOptionsFromMap(table_options, opts_map,
557                                           new_table_options);
558 }
559 
GetBlockBasedTableOptionsFromMap(const BlockBasedTableOptions & table_options,const std::unordered_map<std::string,std::string> & opts_map,BlockBasedTableOptions * new_table_options,bool input_strings_escaped,bool ignore_unknown_options)560 Status GetBlockBasedTableOptionsFromMap(
561     const BlockBasedTableOptions& table_options,
562     const std::unordered_map<std::string, std::string>& opts_map,
563     BlockBasedTableOptions* new_table_options, bool input_strings_escaped,
564     bool ignore_unknown_options) {
565   assert(new_table_options);
566   *new_table_options = table_options;
567   for (const auto& o : opts_map) {
568     auto error_message = ParseBlockBasedTableOption(
569         o.first, o.second, new_table_options, input_strings_escaped,
570         ignore_unknown_options);
571     if (error_message != "") {
572       const auto iter = block_based_table_type_info.find(o.first);
573       if (iter == block_based_table_type_info.end() ||
574           !input_strings_escaped ||  // !input_strings_escaped indicates
575                                      // the old API, where everything is
576                                      // parsable.
577           (iter->second.verification != OptionVerificationType::kByName &&
578            iter->second.verification !=
579                OptionVerificationType::kByNameAllowNull &&
580            iter->second.verification !=
581                OptionVerificationType::kByNameAllowFromNull &&
582            iter->second.verification != OptionVerificationType::kDeprecated)) {
583         // Restore "new_options" to the default "base_options".
584         *new_table_options = table_options;
585         return Status::InvalidArgument("Can't parse BlockBasedTableOptions:",
586                                        o.first + " " + error_message);
587       }
588     }
589   }
590   return Status::OK();
591 }
592 
VerifyBlockBasedTableFactory(const BlockBasedTableFactory * base_tf,const BlockBasedTableFactory * file_tf,OptionsSanityCheckLevel sanity_check_level)593 Status VerifyBlockBasedTableFactory(
594     const BlockBasedTableFactory* base_tf,
595     const BlockBasedTableFactory* file_tf,
596     OptionsSanityCheckLevel sanity_check_level) {
597   if ((base_tf != nullptr) != (file_tf != nullptr) &&
598       sanity_check_level > kSanityLevelNone) {
599     return Status::Corruption(
600         "[RocksDBOptionsParser]: Inconsistent TableFactory class type");
601   }
602   if (base_tf == nullptr) {
603     return Status::OK();
604   }
605   assert(file_tf != nullptr);
606 
607   const auto& base_opt = base_tf->table_options();
608   const auto& file_opt = file_tf->table_options();
609 
610   for (auto& pair : block_based_table_type_info) {
611     if (pair.second.verification == OptionVerificationType::kDeprecated) {
612       // We skip checking deprecated variables as they might
613       // contain random values since they might not be initialized
614       continue;
615     }
616     if (BBTOptionSanityCheckLevel(pair.first) <= sanity_check_level) {
617       if (!AreEqualOptions(reinterpret_cast<const char*>(&base_opt),
618                            reinterpret_cast<const char*>(&file_opt),
619                            pair.second, pair.first, nullptr)) {
620         return Status::Corruption(
621             "[RocksDBOptionsParser]: "
622             "failed the verification on BlockBasedTableOptions::",
623             pair.first);
624       }
625     }
626   }
627   return Status::OK();
628 }
629 #endif  // !ROCKSDB_LITE
630 
NewBlockBasedTableFactory(const BlockBasedTableOptions & _table_options)631 TableFactory* NewBlockBasedTableFactory(
632     const BlockBasedTableOptions& _table_options) {
633   return new BlockBasedTableFactory(_table_options);
634 }
635 
636 const std::string BlockBasedTableFactory::kName = "BlockBasedTable";
637 const std::string BlockBasedTablePropertyNames::kIndexType =
638     "rocksdb.block.based.table.index.type";
639 const std::string BlockBasedTablePropertyNames::kWholeKeyFiltering =
640     "rocksdb.block.based.table.whole.key.filtering";
641 const std::string BlockBasedTablePropertyNames::kPrefixFiltering =
642     "rocksdb.block.based.table.prefix.filtering";
643 const std::string kHashIndexPrefixesBlock = "rocksdb.hashindex.prefixes";
644 const std::string kHashIndexPrefixesMetadataBlock =
645     "rocksdb.hashindex.metadata";
646 const std::string kPropTrue = "1";
647 const std::string kPropFalse = "0";
648 
649 }  // namespace ROCKSDB_NAMESPACE
650