1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 #include "table/meta_blocks.h"
6
7 #include <map>
8 #include <string>
9
10 #include "block_fetcher.h"
11 #include "db/table_properties_collector.h"
12 #include "file/random_access_file_reader.h"
13 #include "rocksdb/table.h"
14 #include "rocksdb/table_properties.h"
15 #include "table/block_based/block.h"
16 #include "table/format.h"
17 #include "table/internal_iterator.h"
18 #include "table/persistent_cache_helper.h"
19 #include "table/table_properties_internal.h"
20 #include "test_util/sync_point.h"
21 #include "util/coding.h"
22
23 namespace ROCKSDB_NAMESPACE {
24
MetaIndexBuilder()25 MetaIndexBuilder::MetaIndexBuilder()
26 : meta_index_block_(new BlockBuilder(1 /* restart interval */)) {}
27
Add(const std::string & key,const BlockHandle & handle)28 void MetaIndexBuilder::Add(const std::string& key,
29 const BlockHandle& handle) {
30 std::string handle_encoding;
31 handle.EncodeTo(&handle_encoding);
32 meta_block_handles_.insert({key, handle_encoding});
33 }
34
Finish()35 Slice MetaIndexBuilder::Finish() {
36 for (const auto& metablock : meta_block_handles_) {
37 meta_index_block_->Add(metablock.first, metablock.second);
38 }
39 return meta_index_block_->Finish();
40 }
41
42 // Property block will be read sequentially and cached in a heap located
43 // object, so there's no need for restart points. Thus we set the restart
44 // interval to infinity to save space.
PropertyBlockBuilder()45 PropertyBlockBuilder::PropertyBlockBuilder()
46 : properties_block_(
47 new BlockBuilder(port::kMaxInt32 /* restart interval */)) {}
48
Add(const std::string & name,const std::string & val)49 void PropertyBlockBuilder::Add(const std::string& name,
50 const std::string& val) {
51 props_.insert({name, val});
52 }
53
Add(const std::string & name,uint64_t val)54 void PropertyBlockBuilder::Add(const std::string& name, uint64_t val) {
55 assert(props_.find(name) == props_.end());
56
57 std::string dst;
58 PutVarint64(&dst, val);
59
60 Add(name, dst);
61 }
62
Add(const UserCollectedProperties & user_collected_properties)63 void PropertyBlockBuilder::Add(
64 const UserCollectedProperties& user_collected_properties) {
65 for (const auto& prop : user_collected_properties) {
66 Add(prop.first, prop.second);
67 }
68 }
69
AddTableProperty(const TableProperties & props)70 void PropertyBlockBuilder::AddTableProperty(const TableProperties& props) {
71 TEST_SYNC_POINT_CALLBACK("PropertyBlockBuilder::AddTableProperty:Start",
72 const_cast<TableProperties*>(&props));
73
74 Add(TablePropertiesNames::kRawKeySize, props.raw_key_size);
75 Add(TablePropertiesNames::kRawValueSize, props.raw_value_size);
76 Add(TablePropertiesNames::kDataSize, props.data_size);
77 Add(TablePropertiesNames::kIndexSize, props.index_size);
78 if (props.index_partitions != 0) {
79 Add(TablePropertiesNames::kIndexPartitions, props.index_partitions);
80 Add(TablePropertiesNames::kTopLevelIndexSize, props.top_level_index_size);
81 }
82 Add(TablePropertiesNames::kIndexKeyIsUserKey, props.index_key_is_user_key);
83 Add(TablePropertiesNames::kIndexValueIsDeltaEncoded,
84 props.index_value_is_delta_encoded);
85 Add(TablePropertiesNames::kNumEntries, props.num_entries);
86 Add(TablePropertiesNames::kDeletedKeys, props.num_deletions);
87 Add(TablePropertiesNames::kMergeOperands, props.num_merge_operands);
88 Add(TablePropertiesNames::kNumRangeDeletions, props.num_range_deletions);
89 Add(TablePropertiesNames::kNumDataBlocks, props.num_data_blocks);
90 Add(TablePropertiesNames::kFilterSize, props.filter_size);
91 Add(TablePropertiesNames::kFormatVersion, props.format_version);
92 Add(TablePropertiesNames::kFixedKeyLen, props.fixed_key_len);
93 Add(TablePropertiesNames::kColumnFamilyId, props.column_family_id);
94 Add(TablePropertiesNames::kCreationTime, props.creation_time);
95 Add(TablePropertiesNames::kOldestKeyTime, props.oldest_key_time);
96 if (props.file_creation_time > 0) {
97 Add(TablePropertiesNames::kFileCreationTime, props.file_creation_time);
98 }
99
100 if (!props.filter_policy_name.empty()) {
101 Add(TablePropertiesNames::kFilterPolicy, props.filter_policy_name);
102 }
103 if (!props.comparator_name.empty()) {
104 Add(TablePropertiesNames::kComparator, props.comparator_name);
105 }
106
107 if (!props.merge_operator_name.empty()) {
108 Add(TablePropertiesNames::kMergeOperator, props.merge_operator_name);
109 }
110 if (!props.prefix_extractor_name.empty()) {
111 Add(TablePropertiesNames::kPrefixExtractorName,
112 props.prefix_extractor_name);
113 }
114 if (!props.property_collectors_names.empty()) {
115 Add(TablePropertiesNames::kPropertyCollectors,
116 props.property_collectors_names);
117 }
118 if (!props.column_family_name.empty()) {
119 Add(TablePropertiesNames::kColumnFamilyName, props.column_family_name);
120 }
121
122 if (!props.compression_name.empty()) {
123 Add(TablePropertiesNames::kCompression, props.compression_name);
124 }
125 if (!props.compression_options.empty()) {
126 Add(TablePropertiesNames::kCompressionOptions, props.compression_options);
127 }
128 }
129
Finish()130 Slice PropertyBlockBuilder::Finish() {
131 for (const auto& prop : props_) {
132 properties_block_->Add(prop.first, prop.second);
133 }
134
135 return properties_block_->Finish();
136 }
137
LogPropertiesCollectionError(Logger * info_log,const std::string & method,const std::string & name)138 void LogPropertiesCollectionError(
139 Logger* info_log, const std::string& method, const std::string& name) {
140 assert(method == "Add" || method == "Finish");
141
142 std::string msg =
143 "Encountered error when calling TablePropertiesCollector::" +
144 method + "() with collector name: " + name;
145 ROCKS_LOG_ERROR(info_log, "%s", msg.c_str());
146 }
147
NotifyCollectTableCollectorsOnAdd(const Slice & key,const Slice & value,uint64_t file_size,const std::vector<std::unique_ptr<IntTblPropCollector>> & collectors,Logger * info_log)148 bool NotifyCollectTableCollectorsOnAdd(
149 const Slice& key, const Slice& value, uint64_t file_size,
150 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
151 Logger* info_log) {
152 bool all_succeeded = true;
153 for (auto& collector : collectors) {
154 Status s = collector->InternalAdd(key, value, file_size);
155 all_succeeded = all_succeeded && s.ok();
156 if (!s.ok()) {
157 LogPropertiesCollectionError(info_log, "Add" /* method */,
158 collector->Name());
159 }
160 }
161 return all_succeeded;
162 }
163
NotifyCollectTableCollectorsOnBlockAdd(const std::vector<std::unique_ptr<IntTblPropCollector>> & collectors,const uint64_t blockRawBytes,const uint64_t blockCompressedBytesFast,const uint64_t blockCompressedBytesSlow)164 void NotifyCollectTableCollectorsOnBlockAdd(
165 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
166 const uint64_t blockRawBytes, const uint64_t blockCompressedBytesFast,
167 const uint64_t blockCompressedBytesSlow) {
168 for (auto& collector : collectors) {
169 collector->BlockAdd(blockRawBytes, blockCompressedBytesFast,
170 blockCompressedBytesSlow);
171 }
172 }
173
NotifyCollectTableCollectorsOnFinish(const std::vector<std::unique_ptr<IntTblPropCollector>> & collectors,Logger * info_log,PropertyBlockBuilder * builder)174 bool NotifyCollectTableCollectorsOnFinish(
175 const std::vector<std::unique_ptr<IntTblPropCollector>>& collectors,
176 Logger* info_log, PropertyBlockBuilder* builder) {
177 bool all_succeeded = true;
178 for (auto& collector : collectors) {
179 UserCollectedProperties user_collected_properties;
180 Status s = collector->Finish(&user_collected_properties);
181
182 all_succeeded = all_succeeded && s.ok();
183 if (!s.ok()) {
184 LogPropertiesCollectionError(info_log, "Finish" /* method */,
185 collector->Name());
186 } else {
187 builder->Add(user_collected_properties);
188 }
189 }
190
191 return all_succeeded;
192 }
193
ReadProperties(const Slice & handle_value,RandomAccessFileReader * file,FilePrefetchBuffer * prefetch_buffer,const Footer & footer,const ImmutableCFOptions & ioptions,TableProperties ** table_properties,bool verify_checksum,BlockHandle * ret_block_handle,CacheAllocationPtr * verification_buf,bool,MemoryAllocator * memory_allocator)194 Status ReadProperties(const Slice& handle_value, RandomAccessFileReader* file,
195 FilePrefetchBuffer* prefetch_buffer, const Footer& footer,
196 const ImmutableCFOptions& ioptions,
197 TableProperties** table_properties, bool verify_checksum,
198 BlockHandle* ret_block_handle,
199 CacheAllocationPtr* verification_buf,
200 bool /*compression_type_missing*/,
201 MemoryAllocator* memory_allocator) {
202 assert(table_properties);
203
204 Slice v = handle_value;
205 BlockHandle handle;
206 if (!handle.DecodeFrom(&v).ok()) {
207 return Status::InvalidArgument("Failed to decode properties block handle");
208 }
209
210 BlockContents block_contents;
211 ReadOptions read_options;
212 read_options.verify_checksums = verify_checksum;
213 Status s;
214 PersistentCacheOptions cache_options;
215
216 BlockFetcher block_fetcher(
217 file, prefetch_buffer, footer, read_options, handle, &block_contents,
218 ioptions, false /* decompress */, false /*maybe_compressed*/,
219 BlockType::kProperties, UncompressionDict::GetEmptyDict(), cache_options,
220 memory_allocator);
221 s = block_fetcher.ReadBlockContents();
222 // property block is never compressed. Need to add uncompress logic if we are
223 // to compress it..
224
225 if (!s.ok()) {
226 return s;
227 }
228
229 Block properties_block(std::move(block_contents));
230 DataBlockIter iter;
231 properties_block.NewDataIterator(BytewiseComparator(), BytewiseComparator(),
232 kDisableGlobalSequenceNumber, &iter);
233
234 auto new_table_properties = new TableProperties();
235 // All pre-defined properties of type uint64_t
236 std::unordered_map<std::string, uint64_t*> predefined_uint64_properties = {
237 {TablePropertiesNames::kDataSize, &new_table_properties->data_size},
238 {TablePropertiesNames::kIndexSize, &new_table_properties->index_size},
239 {TablePropertiesNames::kIndexPartitions,
240 &new_table_properties->index_partitions},
241 {TablePropertiesNames::kTopLevelIndexSize,
242 &new_table_properties->top_level_index_size},
243 {TablePropertiesNames::kIndexKeyIsUserKey,
244 &new_table_properties->index_key_is_user_key},
245 {TablePropertiesNames::kIndexValueIsDeltaEncoded,
246 &new_table_properties->index_value_is_delta_encoded},
247 {TablePropertiesNames::kFilterSize, &new_table_properties->filter_size},
248 {TablePropertiesNames::kRawKeySize, &new_table_properties->raw_key_size},
249 {TablePropertiesNames::kRawValueSize,
250 &new_table_properties->raw_value_size},
251 {TablePropertiesNames::kNumDataBlocks,
252 &new_table_properties->num_data_blocks},
253 {TablePropertiesNames::kNumEntries, &new_table_properties->num_entries},
254 {TablePropertiesNames::kDeletedKeys,
255 &new_table_properties->num_deletions},
256 {TablePropertiesNames::kMergeOperands,
257 &new_table_properties->num_merge_operands},
258 {TablePropertiesNames::kNumRangeDeletions,
259 &new_table_properties->num_range_deletions},
260 {TablePropertiesNames::kFormatVersion,
261 &new_table_properties->format_version},
262 {TablePropertiesNames::kFixedKeyLen,
263 &new_table_properties->fixed_key_len},
264 {TablePropertiesNames::kColumnFamilyId,
265 &new_table_properties->column_family_id},
266 {TablePropertiesNames::kCreationTime,
267 &new_table_properties->creation_time},
268 {TablePropertiesNames::kOldestKeyTime,
269 &new_table_properties->oldest_key_time},
270 {TablePropertiesNames::kFileCreationTime,
271 &new_table_properties->file_creation_time},
272 };
273
274 std::string last_key;
275 for (iter.SeekToFirstOrReport(); iter.Valid(); iter.NextOrReport()) {
276 s = iter.status();
277 if (!s.ok()) {
278 break;
279 }
280
281 auto key = iter.key().ToString();
282 // properties block should be strictly sorted with no duplicate key.
283 if (!last_key.empty() &&
284 BytewiseComparator()->Compare(key, last_key) <= 0) {
285 s = Status::Corruption("properties unsorted");
286 break;
287 }
288 last_key = key;
289
290 auto raw_val = iter.value();
291 auto pos = predefined_uint64_properties.find(key);
292
293 new_table_properties->properties_offsets.insert(
294 {key, handle.offset() + iter.ValueOffset()});
295
296 if (pos != predefined_uint64_properties.end()) {
297 if (key == TablePropertiesNames::kDeletedKeys ||
298 key == TablePropertiesNames::kMergeOperands) {
299 // Insert in user-collected properties for API backwards compatibility
300 new_table_properties->user_collected_properties.insert(
301 {key, raw_val.ToString()});
302 }
303 // handle predefined rocksdb properties
304 uint64_t val;
305 if (!GetVarint64(&raw_val, &val)) {
306 // skip malformed value
307 auto error_msg =
308 "Detect malformed value in properties meta-block:"
309 "\tkey: " + key + "\tval: " + raw_val.ToString();
310 ROCKS_LOG_ERROR(ioptions.info_log, "%s", error_msg.c_str());
311 continue;
312 }
313 *(pos->second) = val;
314 } else if (key == TablePropertiesNames::kFilterPolicy) {
315 new_table_properties->filter_policy_name = raw_val.ToString();
316 } else if (key == TablePropertiesNames::kColumnFamilyName) {
317 new_table_properties->column_family_name = raw_val.ToString();
318 } else if (key == TablePropertiesNames::kComparator) {
319 new_table_properties->comparator_name = raw_val.ToString();
320 } else if (key == TablePropertiesNames::kMergeOperator) {
321 new_table_properties->merge_operator_name = raw_val.ToString();
322 } else if (key == TablePropertiesNames::kPrefixExtractorName) {
323 new_table_properties->prefix_extractor_name = raw_val.ToString();
324 } else if (key == TablePropertiesNames::kPropertyCollectors) {
325 new_table_properties->property_collectors_names = raw_val.ToString();
326 } else if (key == TablePropertiesNames::kCompression) {
327 new_table_properties->compression_name = raw_val.ToString();
328 } else if (key == TablePropertiesNames::kCompressionOptions) {
329 new_table_properties->compression_options = raw_val.ToString();
330 } else {
331 // handle user-collected properties
332 new_table_properties->user_collected_properties.insert(
333 {key, raw_val.ToString()});
334 }
335 }
336 if (s.ok()) {
337 *table_properties = new_table_properties;
338 if (ret_block_handle != nullptr) {
339 *ret_block_handle = handle;
340 }
341 if (verification_buf != nullptr) {
342 size_t len = static_cast<size_t>(handle.size() + kBlockTrailerSize);
343 *verification_buf =
344 ROCKSDB_NAMESPACE::AllocateBlock(len, memory_allocator);
345 if (verification_buf->get() != nullptr) {
346 memcpy(verification_buf->get(), block_contents.data.data(), len);
347 }
348 }
349 } else {
350 delete new_table_properties;
351 }
352
353 return s;
354 }
355
ReadTableProperties(RandomAccessFileReader * file,uint64_t file_size,uint64_t table_magic_number,const ImmutableCFOptions & ioptions,TableProperties ** properties,bool compression_type_missing,MemoryAllocator * memory_allocator)356 Status ReadTableProperties(RandomAccessFileReader* file, uint64_t file_size,
357 uint64_t table_magic_number,
358 const ImmutableCFOptions& ioptions,
359 TableProperties** properties,
360 bool compression_type_missing,
361 MemoryAllocator* memory_allocator) {
362 // -- Read metaindex block
363 Footer footer;
364 auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
365 &footer, table_magic_number);
366 if (!s.ok()) {
367 return s;
368 }
369
370 auto metaindex_handle = footer.metaindex_handle();
371 BlockContents metaindex_contents;
372 ReadOptions read_options;
373 read_options.verify_checksums = false;
374 PersistentCacheOptions cache_options;
375
376 BlockFetcher block_fetcher(
377 file, nullptr /* prefetch_buffer */, footer, read_options,
378 metaindex_handle, &metaindex_contents, ioptions, false /* decompress */,
379 false /*maybe_compressed*/, BlockType::kMetaIndex,
380 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
381 s = block_fetcher.ReadBlockContents();
382 if (!s.ok()) {
383 return s;
384 }
385 // property blocks are never compressed. Need to add uncompress logic if we
386 // are to compress it.
387 Block metaindex_block(std::move(metaindex_contents));
388 std::unique_ptr<InternalIterator> meta_iter(metaindex_block.NewDataIterator(
389 BytewiseComparator(), BytewiseComparator(),
390 kDisableGlobalSequenceNumber));
391
392 // -- Read property block
393 bool found_properties_block = true;
394 s = SeekToPropertiesBlock(meta_iter.get(), &found_properties_block);
395 if (!s.ok()) {
396 return s;
397 }
398
399 TableProperties table_properties;
400 if (found_properties_block == true) {
401 s = ReadProperties(
402 meta_iter->value(), file, nullptr /* prefetch_buffer */, footer,
403 ioptions, properties, false /* verify_checksum */,
404 nullptr /* ret_block_hanel */, nullptr /* ret_block_contents */,
405 compression_type_missing, memory_allocator);
406 } else {
407 s = Status::NotFound();
408 }
409
410 return s;
411 }
412
FindMetaBlock(InternalIterator * meta_index_iter,const std::string & meta_block_name,BlockHandle * block_handle)413 Status FindMetaBlock(InternalIterator* meta_index_iter,
414 const std::string& meta_block_name,
415 BlockHandle* block_handle) {
416 meta_index_iter->Seek(meta_block_name);
417 if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
418 meta_index_iter->key() == meta_block_name) {
419 Slice v = meta_index_iter->value();
420 return block_handle->DecodeFrom(&v);
421 } else {
422 return Status::Corruption("Cannot find the meta block", meta_block_name);
423 }
424 }
425
FindMetaBlock(RandomAccessFileReader * file,uint64_t file_size,uint64_t table_magic_number,const ImmutableCFOptions & ioptions,const std::string & meta_block_name,BlockHandle * block_handle,bool,MemoryAllocator * memory_allocator)426 Status FindMetaBlock(RandomAccessFileReader* file, uint64_t file_size,
427 uint64_t table_magic_number,
428 const ImmutableCFOptions& ioptions,
429 const std::string& meta_block_name,
430 BlockHandle* block_handle,
431 bool /*compression_type_missing*/,
432 MemoryAllocator* memory_allocator) {
433 Footer footer;
434 auto s = ReadFooterFromFile(file, nullptr /* prefetch_buffer */, file_size,
435 &footer, table_magic_number);
436 if (!s.ok()) {
437 return s;
438 }
439
440 auto metaindex_handle = footer.metaindex_handle();
441 BlockContents metaindex_contents;
442 ReadOptions read_options;
443 read_options.verify_checksums = false;
444 PersistentCacheOptions cache_options;
445 BlockFetcher block_fetcher(
446 file, nullptr /* prefetch_buffer */, footer, read_options,
447 metaindex_handle, &metaindex_contents, ioptions,
448 false /* do decompression */, false /*maybe_compressed*/,
449 BlockType::kMetaIndex, UncompressionDict::GetEmptyDict(), cache_options,
450 memory_allocator);
451 s = block_fetcher.ReadBlockContents();
452 if (!s.ok()) {
453 return s;
454 }
455 // meta blocks are never compressed. Need to add uncompress logic if we are to
456 // compress it.
457 Block metaindex_block(std::move(metaindex_contents));
458
459 std::unique_ptr<InternalIterator> meta_iter;
460 meta_iter.reset(metaindex_block.NewDataIterator(
461 BytewiseComparator(), BytewiseComparator(),
462 kDisableGlobalSequenceNumber));
463
464 return FindMetaBlock(meta_iter.get(), meta_block_name, block_handle);
465 }
466
ReadMetaBlock(RandomAccessFileReader * file,FilePrefetchBuffer * prefetch_buffer,uint64_t file_size,uint64_t table_magic_number,const ImmutableCFOptions & ioptions,const std::string & meta_block_name,BlockType block_type,BlockContents * contents,bool,MemoryAllocator * memory_allocator)467 Status ReadMetaBlock(RandomAccessFileReader* file,
468 FilePrefetchBuffer* prefetch_buffer, uint64_t file_size,
469 uint64_t table_magic_number,
470 const ImmutableCFOptions& ioptions,
471 const std::string& meta_block_name, BlockType block_type,
472 BlockContents* contents, bool /*compression_type_missing*/,
473 MemoryAllocator* memory_allocator) {
474 Status status;
475 Footer footer;
476 status = ReadFooterFromFile(file, prefetch_buffer, file_size, &footer,
477 table_magic_number);
478 if (!status.ok()) {
479 return status;
480 }
481
482 // Reading metaindex block
483 auto metaindex_handle = footer.metaindex_handle();
484 BlockContents metaindex_contents;
485 ReadOptions read_options;
486 read_options.verify_checksums = false;
487 PersistentCacheOptions cache_options;
488
489 BlockFetcher block_fetcher(
490 file, prefetch_buffer, footer, read_options, metaindex_handle,
491 &metaindex_contents, ioptions, false /* decompress */,
492 false /*maybe_compressed*/, BlockType::kMetaIndex,
493 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
494 status = block_fetcher.ReadBlockContents();
495 if (!status.ok()) {
496 return status;
497 }
498 // meta block is never compressed. Need to add uncompress logic if we are to
499 // compress it.
500
501 // Finding metablock
502 Block metaindex_block(std::move(metaindex_contents));
503
504 std::unique_ptr<InternalIterator> meta_iter;
505 meta_iter.reset(metaindex_block.NewDataIterator(
506 BytewiseComparator(), BytewiseComparator(),
507 kDisableGlobalSequenceNumber));
508
509 BlockHandle block_handle;
510 status = FindMetaBlock(meta_iter.get(), meta_block_name, &block_handle);
511
512 if (!status.ok()) {
513 return status;
514 }
515
516 // Reading metablock
517 BlockFetcher block_fetcher2(
518 file, prefetch_buffer, footer, read_options, block_handle, contents,
519 ioptions, false /* decompress */, false /*maybe_compressed*/, block_type,
520 UncompressionDict::GetEmptyDict(), cache_options, memory_allocator);
521 return block_fetcher2.ReadBlockContents();
522 }
523
524 } // namespace ROCKSDB_NAMESPACE
525