1 // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file. See the AUTHORS file for names of contributors.
5
6 #ifndef ROCKSDB_LITE
7
8 #include "table/plain/plain_table_reader.h"
9
10 #include <string>
11 #include <vector>
12
13 #include "db/dbformat.h"
14
15 #include "rocksdb/cache.h"
16 #include "rocksdb/comparator.h"
17 #include "rocksdb/env.h"
18 #include "rocksdb/filter_policy.h"
19 #include "rocksdb/options.h"
20 #include "rocksdb/statistics.h"
21
22 #include "table/block_based/block.h"
23 #include "table/block_based/filter_block.h"
24 #include "table/format.h"
25 #include "table/get_context.h"
26 #include "table/internal_iterator.h"
27 #include "table/meta_blocks.h"
28 #include "table/plain/plain_table_bloom.h"
29 #include "table/plain/plain_table_factory.h"
30 #include "table/plain/plain_table_key_coding.h"
31 #include "table/two_level_iterator.h"
32
33 #include "memory/arena.h"
34 #include "monitoring/histogram.h"
35 #include "monitoring/perf_context_imp.h"
36 #include "util/coding.h"
37 #include "util/dynamic_bloom.h"
38 #include "util/hash.h"
39 #include "util/stop_watch.h"
40 #include "util/string_util.h"
41
42 namespace ROCKSDB_NAMESPACE {
43
44 namespace {
45
46 // Safely getting a uint32_t element from a char array, where, starting from
47 // `base`, every 4 bytes are considered as an fixed 32 bit integer.
GetFixed32Element(const char * base,size_t offset)48 inline uint32_t GetFixed32Element(const char* base, size_t offset) {
49 return DecodeFixed32(base + offset * sizeof(uint32_t));
50 }
51 } // namespace
52
53 // Iterator to iterate IndexedTable
54 class PlainTableIterator : public InternalIterator {
55 public:
56 explicit PlainTableIterator(PlainTableReader* table, bool use_prefix_seek);
57 // No copying allowed
58 PlainTableIterator(const PlainTableIterator&) = delete;
59 void operator=(const Iterator&) = delete;
60
61 ~PlainTableIterator() override;
62
63 bool Valid() const override;
64
65 void SeekToFirst() override;
66
67 void SeekToLast() override;
68
69 void Seek(const Slice& target) override;
70
71 void SeekForPrev(const Slice& target) override;
72
73 void Next() override;
74
75 void Prev() override;
76
77 Slice key() const override;
78
79 Slice value() const override;
80
81 Status status() const override;
82
83 private:
84 PlainTableReader* table_;
85 PlainTableKeyDecoder decoder_;
86 bool use_prefix_seek_;
87 uint32_t offset_;
88 uint32_t next_offset_;
89 Slice key_;
90 Slice value_;
91 Status status_;
92 };
93
94 extern const uint64_t kPlainTableMagicNumber;
PlainTableReader(const ImmutableCFOptions & ioptions,std::unique_ptr<RandomAccessFileReader> && file,const EnvOptions & storage_options,const InternalKeyComparator & icomparator,EncodingType encoding_type,uint64_t file_size,const TableProperties * table_properties,const SliceTransform * prefix_extractor)95 PlainTableReader::PlainTableReader(
96 const ImmutableCFOptions& ioptions,
97 std::unique_ptr<RandomAccessFileReader>&& file,
98 const EnvOptions& storage_options, const InternalKeyComparator& icomparator,
99 EncodingType encoding_type, uint64_t file_size,
100 const TableProperties* table_properties,
101 const SliceTransform* prefix_extractor)
102 : internal_comparator_(icomparator),
103 encoding_type_(encoding_type),
104 full_scan_mode_(false),
105 user_key_len_(static_cast<uint32_t>(table_properties->fixed_key_len)),
106 prefix_extractor_(prefix_extractor),
107 enable_bloom_(false),
108 bloom_(6),
109 file_info_(std::move(file), storage_options,
110 static_cast<uint32_t>(table_properties->data_size)),
111 ioptions_(ioptions),
112 file_size_(file_size),
113 table_properties_(nullptr) {}
114
~PlainTableReader()115 PlainTableReader::~PlainTableReader() {
116 }
117
Open(const ImmutableCFOptions & ioptions,const EnvOptions & env_options,const InternalKeyComparator & internal_comparator,std::unique_ptr<RandomAccessFileReader> && file,uint64_t file_size,std::unique_ptr<TableReader> * table_reader,const int bloom_bits_per_key,double hash_table_ratio,size_t index_sparseness,size_t huge_page_tlb_size,bool full_scan_mode,const bool immortal_table,const SliceTransform * prefix_extractor)118 Status PlainTableReader::Open(
119 const ImmutableCFOptions& ioptions, const EnvOptions& env_options,
120 const InternalKeyComparator& internal_comparator,
121 std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
122 std::unique_ptr<TableReader>* table_reader, const int bloom_bits_per_key,
123 double hash_table_ratio, size_t index_sparseness, size_t huge_page_tlb_size,
124 bool full_scan_mode, const bool immortal_table,
125 const SliceTransform* prefix_extractor) {
126 if (file_size > PlainTableIndex::kMaxFileSize) {
127 return Status::NotSupported("File is too large for PlainTableReader!");
128 }
129
130 TableProperties* props_ptr = nullptr;
131 auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
132 ioptions, &props_ptr,
133 true /* compression_type_missing */);
134 std::shared_ptr<TableProperties> props(props_ptr);
135 if (!s.ok()) {
136 return s;
137 }
138
139 assert(hash_table_ratio >= 0.0);
140 auto& user_props = props->user_collected_properties;
141 auto prefix_extractor_in_file = props->prefix_extractor_name;
142
143 if (!full_scan_mode &&
144 !prefix_extractor_in_file.empty() /* old version sst file*/
145 && prefix_extractor_in_file != "nullptr") {
146 if (!prefix_extractor) {
147 return Status::InvalidArgument(
148 "Prefix extractor is missing when opening a PlainTable built "
149 "using a prefix extractor");
150 } else if (prefix_extractor_in_file.compare(prefix_extractor->Name()) !=
151 0) {
152 return Status::InvalidArgument(
153 "Prefix extractor given doesn't match the one used to build "
154 "PlainTable");
155 }
156 }
157
158 EncodingType encoding_type = kPlain;
159 auto encoding_type_prop =
160 user_props.find(PlainTablePropertyNames::kEncodingType);
161 if (encoding_type_prop != user_props.end()) {
162 encoding_type = static_cast<EncodingType>(
163 DecodeFixed32(encoding_type_prop->second.c_str()));
164 }
165
166 std::unique_ptr<PlainTableReader> new_reader(new PlainTableReader(
167 ioptions, std::move(file), env_options, internal_comparator,
168 encoding_type, file_size, props.get(), prefix_extractor));
169
170 s = new_reader->MmapDataIfNeeded();
171 if (!s.ok()) {
172 return s;
173 }
174
175 if (!full_scan_mode) {
176 s = new_reader->PopulateIndex(props.get(), bloom_bits_per_key,
177 hash_table_ratio, index_sparseness,
178 huge_page_tlb_size);
179 if (!s.ok()) {
180 return s;
181 }
182 } else {
183 // Flag to indicate it is a full scan mode so that none of the indexes
184 // can be used.
185 new_reader->full_scan_mode_ = true;
186 }
187 // PopulateIndex can add to the props, so don't store them until now
188 new_reader->table_properties_ = props;
189
190 if (immortal_table && new_reader->file_info_.is_mmap_mode) {
191 new_reader->dummy_cleanable_.reset(new Cleanable());
192 }
193
194 *table_reader = std::move(new_reader);
195 return s;
196 }
197
SetupForCompaction()198 void PlainTableReader::SetupForCompaction() {
199 }
200
NewIterator(const ReadOptions & options,const SliceTransform *,Arena * arena,bool,TableReaderCaller,size_t)201 InternalIterator* PlainTableReader::NewIterator(
202 const ReadOptions& options, const SliceTransform* /* prefix_extractor */,
203 Arena* arena, bool /*skip_filters*/, TableReaderCaller /*caller*/,
204 size_t /*compaction_readahead_size*/) {
205 // Not necessarily used here, but make sure this has been initialized
206 assert(table_properties_);
207
208 // Auto prefix mode is not implemented in PlainTable.
209 bool use_prefix_seek = !IsTotalOrderMode() && !options.total_order_seek &&
210 !options.auto_prefix_mode;
211 if (arena == nullptr) {
212 return new PlainTableIterator(this, use_prefix_seek);
213 } else {
214 auto mem = arena->AllocateAligned(sizeof(PlainTableIterator));
215 return new (mem) PlainTableIterator(this, use_prefix_seek);
216 }
217 }
218
PopulateIndexRecordList(PlainTableIndexBuilder * index_builder,std::vector<uint32_t> * prefix_hashes)219 Status PlainTableReader::PopulateIndexRecordList(
220 PlainTableIndexBuilder* index_builder,
221 std::vector<uint32_t>* prefix_hashes) {
222 Slice prev_key_prefix_slice;
223 std::string prev_key_prefix_buf;
224 uint32_t pos = data_start_offset_;
225
226 bool is_first_record = true;
227 Slice key_prefix_slice;
228 PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
229 prefix_extractor_);
230 while (pos < file_info_.data_end_offset) {
231 uint32_t key_offset = pos;
232 ParsedInternalKey key;
233 Slice value_slice;
234 bool seekable = false;
235 Status s = Next(&decoder, &pos, &key, nullptr, &value_slice, &seekable);
236 if (!s.ok()) {
237 return s;
238 }
239
240 key_prefix_slice = GetPrefix(key);
241 if (enable_bloom_) {
242 bloom_.AddHash(GetSliceHash(key.user_key));
243 } else {
244 if (is_first_record || prev_key_prefix_slice != key_prefix_slice) {
245 if (!is_first_record) {
246 prefix_hashes->push_back(GetSliceHash(prev_key_prefix_slice));
247 }
248 if (file_info_.is_mmap_mode) {
249 prev_key_prefix_slice = key_prefix_slice;
250 } else {
251 prev_key_prefix_buf = key_prefix_slice.ToString();
252 prev_key_prefix_slice = prev_key_prefix_buf;
253 }
254 }
255 }
256
257 index_builder->AddKeyPrefix(GetPrefix(key), key_offset);
258
259 if (!seekable && is_first_record) {
260 return Status::Corruption("Key for a prefix is not seekable");
261 }
262
263 is_first_record = false;
264 }
265
266 prefix_hashes->push_back(GetSliceHash(key_prefix_slice));
267 auto s = index_.InitFromRawData(index_builder->Finish());
268 return s;
269 }
270
AllocateBloom(int bloom_bits_per_key,int num_keys,size_t huge_page_tlb_size)271 void PlainTableReader::AllocateBloom(int bloom_bits_per_key, int num_keys,
272 size_t huge_page_tlb_size) {
273 uint32_t bloom_total_bits = num_keys * bloom_bits_per_key;
274 if (bloom_total_bits > 0) {
275 enable_bloom_ = true;
276 bloom_.SetTotalBits(&arena_, bloom_total_bits, ioptions_.bloom_locality,
277 huge_page_tlb_size, ioptions_.info_log);
278 }
279 }
280
FillBloom(const std::vector<uint32_t> & prefix_hashes)281 void PlainTableReader::FillBloom(const std::vector<uint32_t>& prefix_hashes) {
282 assert(bloom_.IsInitialized());
283 for (const auto prefix_hash : prefix_hashes) {
284 bloom_.AddHash(prefix_hash);
285 }
286 }
287
MmapDataIfNeeded()288 Status PlainTableReader::MmapDataIfNeeded() {
289 if (file_info_.is_mmap_mode) {
290 // Get mmapped memory.
291 return file_info_.file->Read(0, static_cast<size_t>(file_size_),
292 &file_info_.file_data, nullptr, nullptr);
293 }
294 return Status::OK();
295 }
296
PopulateIndex(TableProperties * props,int bloom_bits_per_key,double hash_table_ratio,size_t index_sparseness,size_t huge_page_tlb_size)297 Status PlainTableReader::PopulateIndex(TableProperties* props,
298 int bloom_bits_per_key,
299 double hash_table_ratio,
300 size_t index_sparseness,
301 size_t huge_page_tlb_size) {
302 assert(props != nullptr);
303
304 BlockContents index_block_contents;
305 Status s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
306 file_size_, kPlainTableMagicNumber, ioptions_,
307 PlainTableIndexBuilder::kPlainTableIndexBlock,
308 BlockType::kIndex, &index_block_contents,
309 true /* compression_type_missing */);
310
311 bool index_in_file = s.ok();
312
313 BlockContents bloom_block_contents;
314 bool bloom_in_file = false;
315 // We only need to read the bloom block if index block is in file.
316 if (index_in_file) {
317 s = ReadMetaBlock(file_info_.file.get(), nullptr /* prefetch_buffer */,
318 file_size_, kPlainTableMagicNumber, ioptions_,
319 BloomBlockBuilder::kBloomBlock, BlockType::kFilter,
320 &bloom_block_contents,
321 true /* compression_type_missing */);
322 bloom_in_file = s.ok() && bloom_block_contents.data.size() > 0;
323 }
324
325 Slice* bloom_block;
326 if (bloom_in_file) {
327 // If bloom_block_contents.allocation is not empty (which will be the case
328 // for non-mmap mode), it holds the alloated memory for the bloom block.
329 // It needs to be kept alive to keep `bloom_block` valid.
330 bloom_block_alloc_ = std::move(bloom_block_contents.allocation);
331 bloom_block = &bloom_block_contents.data;
332 } else {
333 bloom_block = nullptr;
334 }
335
336 Slice* index_block;
337 if (index_in_file) {
338 // If index_block_contents.allocation is not empty (which will be the case
339 // for non-mmap mode), it holds the alloated memory for the index block.
340 // It needs to be kept alive to keep `index_block` valid.
341 index_block_alloc_ = std::move(index_block_contents.allocation);
342 index_block = &index_block_contents.data;
343 } else {
344 index_block = nullptr;
345 }
346
347 if ((prefix_extractor_ == nullptr) && (hash_table_ratio != 0)) {
348 // moptions.prefix_extractor is requried for a hash-based look-up.
349 return Status::NotSupported(
350 "PlainTable requires a prefix extractor enable prefix hash mode.");
351 }
352
353 // First, read the whole file, for every kIndexIntervalForSamePrefixKeys rows
354 // for a prefix (starting from the first one), generate a record of (hash,
355 // offset) and append it to IndexRecordList, which is a data structure created
356 // to store them.
357
358 if (!index_in_file) {
359 // Allocate bloom filter here for total order mode.
360 if (IsTotalOrderMode()) {
361 AllocateBloom(bloom_bits_per_key,
362 static_cast<uint32_t>(props->num_entries),
363 huge_page_tlb_size);
364 }
365 } else if (bloom_in_file) {
366 enable_bloom_ = true;
367 auto num_blocks_property = props->user_collected_properties.find(
368 PlainTablePropertyNames::kNumBloomBlocks);
369
370 uint32_t num_blocks = 0;
371 if (num_blocks_property != props->user_collected_properties.end()) {
372 Slice temp_slice(num_blocks_property->second);
373 if (!GetVarint32(&temp_slice, &num_blocks)) {
374 num_blocks = 0;
375 }
376 }
377 // cast away const qualifier, because bloom_ won't be changed
378 bloom_.SetRawData(const_cast<char*>(bloom_block->data()),
379 static_cast<uint32_t>(bloom_block->size()) * 8,
380 num_blocks);
381 } else {
382 // Index in file but no bloom in file. Disable bloom filter in this case.
383 enable_bloom_ = false;
384 bloom_bits_per_key = 0;
385 }
386
387 PlainTableIndexBuilder index_builder(&arena_, ioptions_, prefix_extractor_,
388 index_sparseness, hash_table_ratio,
389 huge_page_tlb_size);
390
391 std::vector<uint32_t> prefix_hashes;
392 if (!index_in_file) {
393 // Populates _bloom if enabled (total order mode)
394 s = PopulateIndexRecordList(&index_builder, &prefix_hashes);
395 if (!s.ok()) {
396 return s;
397 }
398 } else {
399 s = index_.InitFromRawData(*index_block);
400 if (!s.ok()) {
401 return s;
402 }
403 }
404
405 if (!index_in_file) {
406 if (!IsTotalOrderMode()) {
407 // Calculated bloom filter size and allocate memory for
408 // bloom filter based on the number of prefixes, then fill it.
409 AllocateBloom(bloom_bits_per_key, index_.GetNumPrefixes(),
410 huge_page_tlb_size);
411 if (enable_bloom_) {
412 FillBloom(prefix_hashes);
413 }
414 }
415 }
416
417 // Fill two table properties.
418 if (!index_in_file) {
419 props->user_collected_properties["plain_table_hash_table_size"] =
420 ToString(index_.GetIndexSize() * PlainTableIndex::kOffsetLen);
421 props->user_collected_properties["plain_table_sub_index_size"] =
422 ToString(index_.GetSubIndexSize());
423 } else {
424 props->user_collected_properties["plain_table_hash_table_size"] =
425 ToString(0);
426 props->user_collected_properties["plain_table_sub_index_size"] =
427 ToString(0);
428 }
429
430 return Status::OK();
431 }
432
GetOffset(PlainTableKeyDecoder * decoder,const Slice & target,const Slice & prefix,uint32_t prefix_hash,bool & prefix_matched,uint32_t * offset) const433 Status PlainTableReader::GetOffset(PlainTableKeyDecoder* decoder,
434 const Slice& target, const Slice& prefix,
435 uint32_t prefix_hash, bool& prefix_matched,
436 uint32_t* offset) const {
437 prefix_matched = false;
438 uint32_t prefix_index_offset;
439 auto res = index_.GetOffset(prefix_hash, &prefix_index_offset);
440 if (res == PlainTableIndex::kNoPrefixForBucket) {
441 *offset = file_info_.data_end_offset;
442 return Status::OK();
443 } else if (res == PlainTableIndex::kDirectToFile) {
444 *offset = prefix_index_offset;
445 return Status::OK();
446 }
447
448 // point to sub-index, need to do a binary search
449 uint32_t upper_bound;
450 const char* base_ptr =
451 index_.GetSubIndexBasePtrAndUpperBound(prefix_index_offset, &upper_bound);
452 uint32_t low = 0;
453 uint32_t high = upper_bound;
454 ParsedInternalKey mid_key;
455 ParsedInternalKey parsed_target;
456 if (!ParseInternalKey(target, &parsed_target)) {
457 return Status::Corruption(Slice());
458 }
459
460 // The key is between [low, high). Do a binary search between it.
461 while (high - low > 1) {
462 uint32_t mid = (high + low) / 2;
463 uint32_t file_offset = GetFixed32Element(base_ptr, mid);
464 uint32_t tmp;
465 Status s = decoder->NextKeyNoValue(file_offset, &mid_key, nullptr, &tmp);
466 if (!s.ok()) {
467 return s;
468 }
469 int cmp_result = internal_comparator_.Compare(mid_key, parsed_target);
470 if (cmp_result < 0) {
471 low = mid;
472 } else {
473 if (cmp_result == 0) {
474 // Happen to have found the exact key or target is smaller than the
475 // first key after base_offset.
476 prefix_matched = true;
477 *offset = file_offset;
478 return Status::OK();
479 } else {
480 high = mid;
481 }
482 }
483 }
484 // Both of the key at the position low or low+1 could share the same
485 // prefix as target. We need to rule out one of them to avoid to go
486 // to the wrong prefix.
487 ParsedInternalKey low_key;
488 uint32_t tmp;
489 uint32_t low_key_offset = GetFixed32Element(base_ptr, low);
490 Status s = decoder->NextKeyNoValue(low_key_offset, &low_key, nullptr, &tmp);
491 if (!s.ok()) {
492 return s;
493 }
494
495 if (GetPrefix(low_key) == prefix) {
496 prefix_matched = true;
497 *offset = low_key_offset;
498 } else if (low + 1 < upper_bound) {
499 // There is possible a next prefix, return it
500 prefix_matched = false;
501 *offset = GetFixed32Element(base_ptr, low + 1);
502 } else {
503 // target is larger than a key of the last prefix in this bucket
504 // but with a different prefix. Key does not exist.
505 *offset = file_info_.data_end_offset;
506 }
507 return Status::OK();
508 }
509
MatchBloom(uint32_t hash) const510 bool PlainTableReader::MatchBloom(uint32_t hash) const {
511 if (!enable_bloom_) {
512 return true;
513 }
514
515 if (bloom_.MayContainHash(hash)) {
516 PERF_COUNTER_ADD(bloom_sst_hit_count, 1);
517 return true;
518 } else {
519 PERF_COUNTER_ADD(bloom_sst_miss_count, 1);
520 return false;
521 }
522 }
523
Next(PlainTableKeyDecoder * decoder,uint32_t * offset,ParsedInternalKey * parsed_key,Slice * internal_key,Slice * value,bool * seekable) const524 Status PlainTableReader::Next(PlainTableKeyDecoder* decoder, uint32_t* offset,
525 ParsedInternalKey* parsed_key,
526 Slice* internal_key, Slice* value,
527 bool* seekable) const {
528 if (*offset == file_info_.data_end_offset) {
529 *offset = file_info_.data_end_offset;
530 return Status::OK();
531 }
532
533 if (*offset > file_info_.data_end_offset) {
534 return Status::Corruption("Offset is out of file size");
535 }
536
537 uint32_t bytes_read;
538 Status s = decoder->NextKey(*offset, parsed_key, internal_key, value,
539 &bytes_read, seekable);
540 if (!s.ok()) {
541 return s;
542 }
543 *offset = *offset + bytes_read;
544 return Status::OK();
545 }
546
Prepare(const Slice & target)547 void PlainTableReader::Prepare(const Slice& target) {
548 if (enable_bloom_) {
549 uint32_t prefix_hash = GetSliceHash(GetPrefix(target));
550 bloom_.Prefetch(prefix_hash);
551 }
552 }
553
Get(const ReadOptions &,const Slice & target,GetContext * get_context,const SliceTransform *,bool)554 Status PlainTableReader::Get(const ReadOptions& /*ro*/, const Slice& target,
555 GetContext* get_context,
556 const SliceTransform* /* prefix_extractor */,
557 bool /*skip_filters*/) {
558 // Check bloom filter first.
559 Slice prefix_slice;
560 uint32_t prefix_hash;
561 if (IsTotalOrderMode()) {
562 if (full_scan_mode_) {
563 status_ =
564 Status::InvalidArgument("Get() is not allowed in full scan mode.");
565 }
566 // Match whole user key for bloom filter check.
567 if (!MatchBloom(GetSliceHash(GetUserKey(target)))) {
568 return Status::OK();
569 }
570 // in total order mode, there is only one bucket 0, and we always use empty
571 // prefix.
572 prefix_slice = Slice();
573 prefix_hash = 0;
574 } else {
575 prefix_slice = GetPrefix(target);
576 prefix_hash = GetSliceHash(prefix_slice);
577 if (!MatchBloom(prefix_hash)) {
578 return Status::OK();
579 }
580 }
581 uint32_t offset;
582 bool prefix_match;
583 PlainTableKeyDecoder decoder(&file_info_, encoding_type_, user_key_len_,
584 prefix_extractor_);
585 Status s = GetOffset(&decoder, target, prefix_slice, prefix_hash,
586 prefix_match, &offset);
587
588 if (!s.ok()) {
589 return s;
590 }
591 ParsedInternalKey found_key;
592 ParsedInternalKey parsed_target;
593 if (!ParseInternalKey(target, &parsed_target)) {
594 return Status::Corruption(Slice());
595 }
596 Slice found_value;
597 while (offset < file_info_.data_end_offset) {
598 s = Next(&decoder, &offset, &found_key, nullptr, &found_value);
599 if (!s.ok()) {
600 return s;
601 }
602 if (!prefix_match) {
603 // Need to verify prefix for the first key found if it is not yet
604 // checked.
605 if (GetPrefix(found_key) != prefix_slice) {
606 return Status::OK();
607 }
608 prefix_match = true;
609 }
610 // TODO(ljin): since we know the key comparison result here,
611 // can we enable the fast path?
612 if (internal_comparator_.Compare(found_key, parsed_target) >= 0) {
613 bool dont_care __attribute__((__unused__));
614 if (!get_context->SaveValue(found_key, found_value, &dont_care,
615 dummy_cleanable_.get())) {
616 break;
617 }
618 }
619 }
620 return Status::OK();
621 }
622
ApproximateOffsetOf(const Slice &,TableReaderCaller)623 uint64_t PlainTableReader::ApproximateOffsetOf(const Slice& /*key*/,
624 TableReaderCaller /*caller*/) {
625 return 0;
626 }
627
ApproximateSize(const Slice &,const Slice &,TableReaderCaller)628 uint64_t PlainTableReader::ApproximateSize(const Slice& /*start*/,
629 const Slice& /*end*/,
630 TableReaderCaller /*caller*/) {
631 return 0;
632 }
633
PlainTableIterator(PlainTableReader * table,bool use_prefix_seek)634 PlainTableIterator::PlainTableIterator(PlainTableReader* table,
635 bool use_prefix_seek)
636 : table_(table),
637 decoder_(&table_->file_info_, table_->encoding_type_,
638 table_->user_key_len_, table_->prefix_extractor_),
639 use_prefix_seek_(use_prefix_seek) {
640 next_offset_ = offset_ = table_->file_info_.data_end_offset;
641 }
642
~PlainTableIterator()643 PlainTableIterator::~PlainTableIterator() {
644 }
645
Valid() const646 bool PlainTableIterator::Valid() const {
647 return offset_ < table_->file_info_.data_end_offset &&
648 offset_ >= table_->data_start_offset_;
649 }
650
SeekToFirst()651 void PlainTableIterator::SeekToFirst() {
652 status_ = Status::OK();
653 next_offset_ = table_->data_start_offset_;
654 if (next_offset_ >= table_->file_info_.data_end_offset) {
655 next_offset_ = offset_ = table_->file_info_.data_end_offset;
656 } else {
657 Next();
658 }
659 }
660
SeekToLast()661 void PlainTableIterator::SeekToLast() {
662 assert(false);
663 status_ = Status::NotSupported("SeekToLast() is not supported in PlainTable");
664 next_offset_ = offset_ = table_->file_info_.data_end_offset;
665 }
666
Seek(const Slice & target)667 void PlainTableIterator::Seek(const Slice& target) {
668 if (use_prefix_seek_ != !table_->IsTotalOrderMode()) {
669 // This check is done here instead of NewIterator() to permit creating an
670 // iterator with total_order_seek = true even if we won't be able to Seek()
671 // it. This is needed for compaction: it creates iterator with
672 // total_order_seek = true but usually never does Seek() on it,
673 // only SeekToFirst().
674 status_ =
675 Status::InvalidArgument(
676 "total_order_seek not implemented for PlainTable.");
677 offset_ = next_offset_ = table_->file_info_.data_end_offset;
678 return;
679 }
680
681 // If the user doesn't set prefix seek option and we are not able to do a
682 // total Seek(). assert failure.
683 if (table_->IsTotalOrderMode()) {
684 if (table_->full_scan_mode_) {
685 status_ =
686 Status::InvalidArgument("Seek() is not allowed in full scan mode.");
687 offset_ = next_offset_ = table_->file_info_.data_end_offset;
688 return;
689 } else if (table_->GetIndexSize() > 1) {
690 assert(false);
691 status_ = Status::NotSupported(
692 "PlainTable cannot issue non-prefix seek unless in total order "
693 "mode.");
694 offset_ = next_offset_ = table_->file_info_.data_end_offset;
695 return;
696 }
697 }
698
699 Slice prefix_slice = table_->GetPrefix(target);
700 uint32_t prefix_hash = 0;
701 // Bloom filter is ignored in total-order mode.
702 if (!table_->IsTotalOrderMode()) {
703 prefix_hash = GetSliceHash(prefix_slice);
704 if (!table_->MatchBloom(prefix_hash)) {
705 status_ = Status::OK();
706 offset_ = next_offset_ = table_->file_info_.data_end_offset;
707 return;
708 }
709 }
710 bool prefix_match;
711 status_ = table_->GetOffset(&decoder_, target, prefix_slice, prefix_hash,
712 prefix_match, &next_offset_);
713 if (!status_.ok()) {
714 offset_ = next_offset_ = table_->file_info_.data_end_offset;
715 return;
716 }
717
718 if (next_offset_ < table_->file_info_.data_end_offset) {
719 for (Next(); status_.ok() && Valid(); Next()) {
720 if (!prefix_match) {
721 // Need to verify the first key's prefix
722 if (table_->GetPrefix(key()) != prefix_slice) {
723 offset_ = next_offset_ = table_->file_info_.data_end_offset;
724 break;
725 }
726 prefix_match = true;
727 }
728 if (table_->internal_comparator_.Compare(key(), target) >= 0) {
729 break;
730 }
731 }
732 } else {
733 offset_ = table_->file_info_.data_end_offset;
734 }
735 }
736
SeekForPrev(const Slice &)737 void PlainTableIterator::SeekForPrev(const Slice& /*target*/) {
738 assert(false);
739 status_ =
740 Status::NotSupported("SeekForPrev() is not supported in PlainTable");
741 offset_ = next_offset_ = table_->file_info_.data_end_offset;
742 }
743
Next()744 void PlainTableIterator::Next() {
745 offset_ = next_offset_;
746 if (offset_ < table_->file_info_.data_end_offset) {
747 Slice tmp_slice;
748 ParsedInternalKey parsed_key;
749 status_ =
750 table_->Next(&decoder_, &next_offset_, &parsed_key, &key_, &value_);
751 if (!status_.ok()) {
752 offset_ = next_offset_ = table_->file_info_.data_end_offset;
753 }
754 }
755 }
756
Prev()757 void PlainTableIterator::Prev() {
758 assert(false);
759 }
760
key() const761 Slice PlainTableIterator::key() const {
762 assert(Valid());
763 return key_;
764 }
765
value() const766 Slice PlainTableIterator::value() const {
767 assert(Valid());
768 return value_;
769 }
770
status() const771 Status PlainTableIterator::status() const {
772 return status_;
773 }
774
775 } // namespace ROCKSDB_NAMESPACE
776 #endif // ROCKSDB_LITE
777