1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 #include "table/meta_blocks.h"
6 
7 #include <map>
8 #include <string>
9 
10 #include "block_fetcher.h"
11 #include "db/table_properties_collector.h"
12 #include "file/random_access_file_reader.h"
13 #include "rocksdb/table.h"
14 #include "rocksdb/table_properties.h"
15 #include "table/block_based/block.h"
16 #include "table/format.h"
17 #include "table/internal_iterator.h"
18 #include "table/persistent_cache_helper.h"
19 #include "table/table_properties_internal.h"
20 #include "test_util/sync_point.h"
21 #include "util/coding.h"
22 
23 namespace ROCKSDB_NAMESPACE {
24 
MetaIndexBuilder()25 MetaIndexBuilder::MetaIndexBuilder()
26     : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
27 
Add(const std::string & key,const BlockHandle & handle)28 void MetaIndexBuilder::Add(const std::string& key,
29                            const BlockHandle& handle) {
30   std::string handle_encoding;
31   handle.EncodeTo(&handle_encoding);
32   meta_block_handles_.insert({key, handle_encoding});
33 }
34 
Finish()35 Slice MetaIndexBuilder::Finish() {
36   for (const auto& metablock : meta_block_handles_) {
37     meta_index_block_->Add(metablock.first, metablock.second);
38   }
39   return meta_index_block_->Finish();
40 }
41 
42 // Property block will be read sequentially and cached in a heap located
43 // object, so there's no need for restart points. Thus we set the restart
44 // interval to infinity to save space.
PropertyBlockBuilder()45 PropertyBlockBuilder::PropertyBlockBuilder()
46     : properties_block_(
47           new BlockBuilder(port::kMaxInt32 /* restart interval */)) {}
48 
Add(const std::string & name,const std::string & val)49 void PropertyBlockBuilder::Add(const std::string& name,
50                                const std::string& val) {
51   props_.insert({name, val});
52 }
53 
Add(const std::string & name,uint64_t val)54 void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) {
55   assert(props_.find(name) == props_.end());
56 
57   std::string dst;
58   PutVarint64(&dst, val);
59 
60   Add(name, dst);
61 }
62 
Add(const UserCollectedProperties & user_collected_properties)63 void PropertyBlockBuilder::Add(
64     const UserCollectedProperties& user_collected_properties) {
65   for (const auto& prop : user_collected_properties) {
66     Add(prop.first, prop.second);
67   }
68 }
69 
AddTableProperty(const TableProperties & props)70 void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
71   TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start",
72                            const_cast<TableProperties*>(&props));
73 
74   Add(TablePropertiesNames::kRawKeySize, props.raw_key_size);
75   Add(TablePropertiesNames::kRawValueSize, props.raw_value_size);
76   Add(TablePropertiesNames::kDataSize, props.data_size);
77   Add(TablePropertiesNames::kIndexSize, props.index_size);
78   if (props.index_partitions != 0) {
79     Add(TablePropertiesNames::kIndexPartitions, props.index_partitions);
80     Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size);
81   }
82   Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key);
83   Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
84       props.index_value_is_delta_encoded);
85   Add(TablePropertiesNames::kNumEntries, props.num_entries);
86   Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
87   Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
88   Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
89   Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks);
90   Add(TablePropertiesNames::kFilterSize, props.filter_size);
91   Add(TablePropertiesNames::kFormatVersion, props.format_version);
92   Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len);
93   Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id);
94   Add(TablePropertiesNames::kCreationTime, props.creation_time);
95   Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time);
96   if (props.file_creation_time > 0) {
97     Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time);
98   }
99 
100   if (!props.filter_policy_name.empty()) {
101     Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
102   }
103   if (!props.comparator_name.empty()) {
104     Add(TablePropertiesNames::kComparator, props.comparator_name);
105   }
106 
107   if (!props.merge_operator_name.empty()) {
108     Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name);
109   }
110   if (!props.prefix_extractor_name.empty()) {
111     Add(TablePropertiesNames::kPrefixExtractorName,
112         props.prefix_extractor_name);
113   }
114   if (!props.property_collectors_names.empty()) {
115     Add(TablePropertiesNames::kPropertyCollectors,
116         props.property_collectors_names);
117   }
118   if (!props.column_family_name.empty()) {
119     Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name);
120   }
121 
122   if (!props.compression_name.empty()) {
123     Add(TablePropertiesNames::kCompression, props.compression_name);
124   }
125   if (!props.compression_options.empty()) {
126     Add(TablePropertiesNames::kCompressionOptions, props.compression_options);
127   }
128 }
129 
Finish()130 Slice PropertyBlockBuilder::Finish() {
131   for (const auto& prop : props_) {
132     properties_block_->Add(prop.first, prop.second);
133   }
134 
135   return properties_block_->Finish();
136 }
137 
LogPropertiesCollectionError(Logger * info_log,const std::string & method,const std::string & name)138 void LogPropertiesCollectionError(
139     Logger* info_log, const std::string& method, const std::string& name) {
140   assert(method == "Add" || method == "Finish");
141 
142   std::string msg =
143     "Encountered error when calling TablePropertiesCollector::" +
144     method + "() with collector name: " + name;
145   ROCKS_LOG_ERROR(info_log, "%s", msg.c_str());
146 }
147 
NotifyCollectTableCollectorsOnAdd(const Slice & key,const Slice & value,uint64_t file_size,const std::vector<std::unique_ptr<IntTblPropCollector>> & collectors,Logger * info_log)148 bool NotifyCollectTableCollectorsOnAdd(
149     const Slice& key, const Slice& value, uint64_t file_size,
150     const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
151     Logger* info_log) {
152   bool all_succeeded = true;
153   for (auto& collector : collectors) {
154     Status s = collector->InternalAdd(key, value, file_size);
155     all_succeeded = all_succeeded && s.ok();
156     if (!s.ok()) {
157       LogPropertiesCollectionError(info_log, "Add" /* method */,
158                                    collector->Name());
159     }
160   }
161   return all_succeeded;
162 }
163 
NotifyCollectTableCollectorsOnBlockAdd(const std::vector<std::unique_ptr<IntTblPropCollector>> & collectors,const uint64_t blockRawBytes,const uint64_t blockCompressedBytesFast,const uint64_t blockCompressedBytesSlow)164 void NotifyCollectTableCollectorsOnBlockAdd(
165     const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
166     const uint64_t blockRawBytes, const uint64_t blockCompressedBytesFast,
167     const uint64_t blockCompressedBytesSlow) {
168   for (auto& collector : collectors) {
169     collector->BlockAdd(blockRawBytes, blockCompressedBytesFast,
170                         blockCompressedBytesSlow);
171   }
172 }
173 
NotifyCollectTableCollectorsOnFinish(const std::vector<std::unique_ptr<IntTblPropCollector>> & collectors,Logger * info_log,PropertyBlockBuilder * builder)174 bool NotifyCollectTableCollectorsOnFinish(
175     const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
176     Logger* info_log, PropertyBlockBuilder* builder) {
177   bool all_succeeded = true;
178   for (auto& collector : collectors) {
179     UserCollectedProperties user_collected_properties;
180     Status s = collector->Finish(&user_collected_properties);
181 
182     all_succeeded = all_succeeded && s.ok();
183     if (!s.ok()) {
184       LogPropertiesCollectionError(info_log, "Finish" /* method */,
185                                    collector->Name());
186     } else {
187       builder->Add(user_collected_properties);
188     }
189   }
190 
191   return all_succeeded;
192 }
193 
ReadProperties(const Slice & handle_value,RandomAccessFileReader * file,FilePrefetchBuffer * prefetch_buffer,const Footer & footer,const ImmutableCFOptions & ioptions,TableProperties ** table_properties,bool verify_checksum,BlockHandle * ret_block_handle,CacheAllocationPtr * verification_buf,bool,MemoryAllocator * memory_allocator)194 Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
195                       FilePrefetchBuffer* prefetch_buffer, const Footer& footer,
196                       const ImmutableCFOptions& ioptions,
197                       TableProperties** table_properties, bool verify_checksum,
198                       BlockHandle* ret_block_handle,
199                       CacheAllocationPtr* verification_buf,
200                       bool /*compression_type_missing*/,
201                       MemoryAllocator* memory_allocator) {
202   assert(table_properties);
203 
204   Slice v = handle_value;
205   BlockHandle handle;
206   if (!handle.DecodeFrom(&v).ok()) {
207     return Status::InvalidArgument("Failed to decode properties block handle");
208   }
209 
210   BlockContents block_contents;
211   ReadOptions read_options;
212   read_options.verify_checksums = verify_checksum;
213   Status s;
214   PersistentCacheOptions cache_options;
215 
216   BlockFetcher block_fetcher(
217       file, prefetch_buffer, footer, read_options, handle, &block_contents,
218       ioptions, false /* decompress */, false /*maybe_compressed*/,
219       BlockType::kProperties, UncompressionDict::GetEmptyDict(), cache_options,
220       memory_allocator);
221   s = block_fetcher.ReadBlockContents();
222   // property block is never compressed. Need to add uncompress logic if we are
223   // to compress it..
224 
225   if (!s.ok()) {
226     return s;
227   }
228 
229   Block properties_block(std::move(block_contents));
230   DataBlockIter iter;
231   properties_block.NewDataIterator(BytewiseComparator(), BytewiseComparator(),
232                                    kDisableGlobalSequenceNumber, &iter);
233 
234   auto new_table_properties = new TableProperties();
235   // All pre-defined properties of type uint64_t
236   std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
237       {TablePropertiesNames::kDataSize, &new_table_properties->data_size},
238       {TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
239       {TablePropertiesNames::kIndexPartitions,
240        &new_table_properties->index_partitions},
241       {TablePropertiesNames::kTopLevelIndexSize,
242        &new_table_properties->top_level_index_size},
243       {TablePropertiesNames::kIndexKeyIsUserKey,
244        &new_table_properties->index_key_is_user_key},
245       {TablePropertiesNames::kIndexValueIsDeltaEncoded,
246        &new_table_properties->index_value_is_delta_encoded},
247       {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
248       {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
249       {TablePropertiesNames::kRawValueSize,
250        &new_table_properties->raw_value_size},
251       {TablePropertiesNames::kNumDataBlocks,
252        &new_table_properties->num_data_blocks},
253       {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
254       {TablePropertiesNames::kDeletedKeys,
255        &new_table_properties->num_deletions},
256       {TablePropertiesNames::kMergeOperands,
257        &new_table_properties->num_merge_operands},
258       {TablePropertiesNames::kNumRangeDeletions,
259        &new_table_properties->num_range_deletions},
260       {TablePropertiesNames::kFormatVersion,
261        &new_table_properties->format_version},
262       {TablePropertiesNames::kFixedKeyLen,
263        &new_table_properties->fixed_key_len},
264       {TablePropertiesNames::kColumnFamilyId,
265        &new_table_properties->column_family_id},
266       {TablePropertiesNames::kCreationTime,
267        &new_table_properties->creation_time},
268       {TablePropertiesNames::kOldestKeyTime,
269        &new_table_properties->oldest_key_time},
270       {TablePropertiesNames::kFileCreationTime,
271        &new_table_properties->file_creation_time},
272   };
273 
274   std::string last_key;
275   for (iter.SeekToFirstOrReport(); iter.Valid(); iter.NextOrReport()) {
276     s = iter.status();
277     if (!s.ok()) {
278       break;
279     }
280 
281     auto key = iter.key().ToString();
282     // properties block should be strictly sorted with no duplicate key.
283     if (!last_key.empty() &&
284         BytewiseComparator()->Compare(key, last_key) <= 0) {
285       s = Status::Corruption("properties unsorted");
286       break;
287     }
288     last_key = key;
289 
290     auto raw_val = iter.value();
291     auto pos = predefined_uint64_properties.find(key);
292 
293     new_table_properties->properties_offsets.insert(
294         {key, handle.offset() + iter.ValueOffset()});
295 
296     if (pos != predefined_uint64_properties.end()) {
297       if (key == TablePropertiesNames::kDeletedKeys ||
298           key == TablePropertiesNames::kMergeOperands) {
299         // Insert in user-collected properties for API backwards compatibility
300         new_table_properties->user_collected_properties.insert(
301             {key, raw_val.ToString()});
302       }
303       // handle predefined rocksdb properties
304       uint64_t val;
305       if (!GetVarint64(&raw_val, &val)) {
306         // skip malformed value
307         auto error_msg =
308           "Detect malformed value in properties meta-block:"
309           "\tkey: " + key + "\tval: " + raw_val.ToString();
310         ROCKS_LOG_ERROR(ioptions.info_log, "%s", error_msg.c_str());
311         continue;
312       }
313       *(pos->second) = val;
314     } else if (key == TablePropertiesNames::kFilterPolicy) {
315       new_table_properties->filter_policy_name = raw_val.ToString();
316     } else if (key == TablePropertiesNames::kColumnFamilyName) {
317       new_table_properties->column_family_name = raw_val.ToString();
318     } else if (key == TablePropertiesNames::kComparator) {
319       new_table_properties->comparator_name = raw_val.ToString();
320     } else if (key == TablePropertiesNames::kMergeOperator) {
321       new_table_properties->merge_operator_name = raw_val.ToString();
322     } else if (key == TablePropertiesNames::kPrefixExtractorName) {
323       new_table_properties->prefix_extractor_name = raw_val.ToString();
324     } else if (key == TablePropertiesNames::kPropertyCollectors) {
325       new_table_properties->property_collectors_names = raw_val.ToString();
326     } else if (key == TablePropertiesNames::kCompression) {
327       new_table_properties->compression_name = raw_val.ToString();
328     } else if (key == TablePropertiesNames::kCompressionOptions) {
329       new_table_properties->compression_options = raw_val.ToString();
330     } else {
331       // handle user-collected properties
332       new_table_properties->user_collected_properties.insert(
333           {key, raw_val.ToString()});
334     }
335   }
336   if (s.ok()) {
337     *table_properties = new_table_properties;
338     if (ret_block_handle != nullptr) {
339       *ret_block_handle = handle;
340     }
341     if (verification_buf != nullptr) {
342       size_t len = static_cast<size_t>(handle.size() + kBlockTrailerSize);
343       *verification_buf =
344           ROCKSDB_NAMESPACE::AllocateBlock(len, memory_allocator);
345       if (verification_buf->get() != nullptr) {
346         memcpy(verification_buf->get(), block_contents.data.data(), len);
347       }
348     }
349   } else {
350     delete new_table_properties;
351   }
352 
353   return s;
354 }
355 
ReadTableProperties(RandomAccessFileReader * file,uint64_t file_size,uint64_t table_magic_number,const ImmutableCFOptions & ioptions,TableProperties ** properties,bool compression_type_missing,MemoryAllocator * memory_allocator)356 Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
357                            uint64_t table_magic_number,
358                            const ImmutableCFOptions& ioptions,
359                            TableProperties** properties,
360                            bool compression_type_missing,
361                            MemoryAllocator* memory_allocator) {
362   // -- Read metaindex block
363   Footer footer;
364   auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
365                               &footer, table_magic_number);
366   if (!s.ok()) {
367     return s;
368   }
369 
370   auto metaindex_handle = footer.metaindex_handle();
371   BlockContents metaindex_contents;
372   ReadOptions read_options;
373   read_options.verify_checksums = false;
374   PersistentCacheOptions cache_options;
375 
376   BlockFetcher block_fetcher(
377       file, nullptr /* prefetch_buffer */, footer, read_options,
378       metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
379       false /*maybe_compressed*/, BlockType::kMetaIndex,
380       UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
381   s = block_fetcher.ReadBlockContents();
382   if (!s.ok()) {
383     return s;
384   }
385   // property blocks are never compressed. Need to add uncompress logic if we
386   // are to compress it.
387   Block metaindex_block(std::move(metaindex_contents));
388   std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
389       BytewiseComparator(), BytewiseComparator(),
390       kDisableGlobalSequenceNumber));
391 
392   // -- Read property block
393   bool found_properties_block = true;
394   s = SeekToPropertiesBlock(meta_iter.get(), &found_properties_block);
395   if (!s.ok()) {
396     return s;
397   }
398 
399   TableProperties table_properties;
400   if (found_properties_block == true) {
401     s = ReadProperties(
402         meta_iter->value(), file, nullptr /* prefetch_buffer */, footer,
403         ioptions, properties, false /* verify_checksum */,
404         nullptr /* ret_block_hanel */, nullptr /* ret_block_contents */,
405         compression_type_missing, memory_allocator);
406   } else {
407     s = Status::NotFound();
408   }
409 
410   return s;
411 }
412 
FindMetaBlock(InternalIterator * meta_index_iter,const std::string & meta_block_name,BlockHandle * block_handle)413 Status FindMetaBlock(InternalIterator* meta_index_iter,
414                      const std::string& meta_block_name,
415                      BlockHandle* block_handle) {
416   meta_index_iter->Seek(meta_block_name);
417   if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
418       meta_index_iter->key() == meta_block_name) {
419     Slice v = meta_index_iter->value();
420     return block_handle->DecodeFrom(&v);
421   } else {
422     return Status::Corruption("Cannot find the meta block", meta_block_name);
423   }
424 }
425 
FindMetaBlock(RandomAccessFileReader * file,uint64_t file_size,uint64_t table_magic_number,const ImmutableCFOptions & ioptions,const std::string & meta_block_name,BlockHandle * block_handle,bool,MemoryAllocator * memory_allocator)426 Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
427                      uint64_t table_magic_number,
428                      const ImmutableCFOptions& ioptions,
429                      const std::string& meta_block_name,
430                      BlockHandle* block_handle,
431                      bool /*compression_type_missing*/,
432                      MemoryAllocator* memory_allocator) {
433   Footer footer;
434   auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
435                               &footer, table_magic_number);
436   if (!s.ok()) {
437     return s;
438   }
439 
440   auto metaindex_handle = footer.metaindex_handle();
441   BlockContents metaindex_contents;
442   ReadOptions read_options;
443   read_options.verify_checksums = false;
444   PersistentCacheOptions cache_options;
445   BlockFetcher block_fetcher(
446       file, nullptr /* prefetch_buffer */, footer, read_options,
447       metaindex_handle, &metaindex_contents, ioptions,
448       false /* do decompression */, false /*maybe_compressed*/,
449       BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options,
450       memory_allocator);
451   s = block_fetcher.ReadBlockContents();
452   if (!s.ok()) {
453     return s;
454   }
455   // meta blocks are never compressed. Need to add uncompress logic if we are to
456   // compress it.
457   Block metaindex_block(std::move(metaindex_contents));
458 
459   std::unique_ptr<InternalIterator> meta_iter;
460   meta_iter.reset(metaindex_block.NewDataIterator(
461       BytewiseComparator(), BytewiseComparator(),
462       kDisableGlobalSequenceNumber));
463 
464   return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
465 }
466 
ReadMetaBlock(RandomAccessFileReader * file,FilePrefetchBuffer * prefetch_buffer,uint64_t file_size,uint64_t table_magic_number,const ImmutableCFOptions & ioptions,const std::string & meta_block_name,BlockType block_type,BlockContents * contents,bool,MemoryAllocator * memory_allocator)467 Status ReadMetaBlock(RandomAccessFileReader* file,
468                      FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
469                      uint64_t table_magic_number,
470                      const ImmutableCFOptions& ioptions,
471                      const std::string& meta_block_name, BlockType block_type,
472                      BlockContents* contents, bool /*compression_type_missing*/,
473                      MemoryAllocator* memory_allocator) {
474   Status status;
475   Footer footer;
476   status = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer,
477                               table_magic_number);
478   if (!status.ok()) {
479     return status;
480   }
481 
482   // Reading metaindex block
483   auto metaindex_handle = footer.metaindex_handle();
484   BlockContents metaindex_contents;
485   ReadOptions read_options;
486   read_options.verify_checksums = false;
487   PersistentCacheOptions cache_options;
488 
489   BlockFetcher block_fetcher(
490       file, prefetch_buffer, footer, read_options, metaindex_handle,
491       &metaindex_contents, ioptions, false /* decompress */,
492       false /*maybe_compressed*/, BlockType::kMetaIndex,
493       UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
494   status = block_fetcher.ReadBlockContents();
495   if (!status.ok()) {
496     return status;
497   }
498   // meta block is never compressed. Need to add uncompress logic if we are to
499   // compress it.
500 
501   // Finding metablock
502   Block metaindex_block(std::move(metaindex_contents));
503 
504   std::unique_ptr<InternalIterator> meta_iter;
505   meta_iter.reset(metaindex_block.NewDataIterator(
506       BytewiseComparator(), BytewiseComparator(),
507       kDisableGlobalSequenceNumber));
508 
509   BlockHandle block_handle;
510   status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);
511 
512   if (!status.ok()) {
513     return status;
514   }
515 
516   // Reading metablock
517   BlockFetcher block_fetcher2(
518       file, prefetch_buffer, footer, read_options, block_handle, contents,
519       ioptions, false /* decompress */, false /*maybe_compressed*/, block_type,
520       UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
521   return block_fetcher2.ReadBlockContents();
522 }
523 
524 }  // namespace ROCKSDB_NAMESPACE
525