1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 #pragma once 10 #include "table/block_based/block_based_table_reader.h" 11 12 #include "table/block_based/block_based_table_reader_impl.h" 13 #include "table/block_based/block_prefetcher.h" 14 #include "table/block_based/reader_common.h" 15 16 namespace ROCKSDB_NAMESPACE { 17 // Iterates over the contents of BlockBasedTable. 18 class BlockBasedTableIterator : public InternalIteratorBase<Slice> { 19 // compaction_readahead_size: its value will only be used if for_compaction = 20 // true 21 public: 22 BlockBasedTableIterator( 23 const BlockBasedTable* table, const ReadOptions& read_options, 24 const InternalKeyComparator& icomp, 25 std::unique_ptr<InternalIteratorBase<IndexValue>>&& index_iter, 26 bool check_filter, bool need_upper_bound_check, 27 const SliceTransform* prefix_extractor, TableReaderCaller caller, 28 size_t compaction_readahead_size = 0) table_(table)29 : table_(table), 30 read_options_(read_options), 31 icomp_(icomp), 32 user_comparator_(icomp.user_comparator()), 33 index_iter_(std::move(index_iter)), 34 pinned_iters_mgr_(nullptr), 35 block_iter_points_to_real_block_(false), 36 check_filter_(check_filter), 37 need_upper_bound_check_(need_upper_bound_check), 38 prefix_extractor_(prefix_extractor), 39 lookup_context_(caller), 40 block_prefetcher_(compaction_readahead_size) {} 41 ~BlockBasedTableIterator()42 ~BlockBasedTableIterator() {} 43 44 void Seek(const Slice& target) override; 45 void SeekForPrev(const Slice& target) override; 46 void SeekToFirst() override; 47 void SeekToLast() override; 48 void Next() final override; 49 bool NextAndGetResult(IterateResult* result) override; 50 void Prev() override; Valid()51 bool Valid() const override { 52 return !is_out_of_bound_ && 53 (is_at_first_key_from_index_ || 54 (block_iter_points_to_real_block_ && block_iter_.Valid())); 55 } key()56 Slice key() const override { 57 assert(Valid()); 58 if (is_at_first_key_from_index_) { 59 return index_iter_->value().first_internal_key; 60 } else { 61 return block_iter_.key(); 62 } 63 } user_key()64 Slice user_key() const override { 65 assert(Valid()); 66 if (is_at_first_key_from_index_) { 67 return ExtractUserKey(index_iter_->value().first_internal_key); 68 } else { 69 return block_iter_.user_key(); 70 } 71 } value()72 Slice value() const override { 73 assert(Valid()); 74 75 // Load current block if not loaded. 76 if (is_at_first_key_from_index_ && 77 !const_cast<BlockBasedTableIterator*>(this) 78 ->MaterializeCurrentBlock()) { 79 // Oops, index is not consistent with block contents, but we have 80 // no good way to report error at this point. Let's return empty value. 81 return Slice(); 82 } 83 84 return block_iter_.value(); 85 } status()86 Status status() const override { 87 // Prefix index set status to NotFound when the prefix does not exist 88 if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) { 89 return index_iter_->status(); 90 } else if (block_iter_points_to_real_block_) { 91 return block_iter_.status(); 92 } else { 93 return Status::OK(); 94 } 95 } 96 97 // Whether iterator invalidated for being out of bound. IsOutOfBound()98 bool IsOutOfBound() override { return is_out_of_bound_; } 99 MayBeOutOfUpperBound()100 inline bool MayBeOutOfUpperBound() override { 101 assert(Valid()); 102 return !data_block_within_upper_bound_; 103 } 104 SetPinnedItersMgr(PinnedIteratorsManager * pinned_iters_mgr)105 void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override { 106 pinned_iters_mgr_ = pinned_iters_mgr; 107 } IsKeyPinned()108 bool IsKeyPinned() const override { 109 // Our key comes either from block_iter_'s current key 110 // or index_iter_'s current *value*. 111 return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && 112 ((is_at_first_key_from_index_ && index_iter_->IsValuePinned()) || 113 (block_iter_points_to_real_block_ && block_iter_.IsKeyPinned())); 114 } IsValuePinned()115 bool IsValuePinned() const override { 116 // Load current block if not loaded. 117 if (is_at_first_key_from_index_) { 118 const_cast<BlockBasedTableIterator*>(this)->MaterializeCurrentBlock(); 119 } 120 // BlockIter::IsValuePinned() is always true. No need to check 121 return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() && 122 block_iter_points_to_real_block_; 123 } 124 ResetDataIter()125 void ResetDataIter() { 126 if (block_iter_points_to_real_block_) { 127 if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) { 128 block_iter_.DelegateCleanupsTo(pinned_iters_mgr_); 129 } 130 block_iter_.Invalidate(Status::OK()); 131 block_iter_points_to_real_block_ = false; 132 } 133 } 134 SavePrevIndexValue()135 void SavePrevIndexValue() { 136 if (block_iter_points_to_real_block_) { 137 // Reseek. If they end up with the same data block, we shouldn't re-fetch 138 // the same data block. 139 prev_block_offset_ = index_iter_->value().handle.offset(); 140 } 141 } 142 143 private: 144 enum class IterDirection { 145 kForward, 146 kBackward, 147 }; 148 149 const BlockBasedTable* table_; 150 const ReadOptions read_options_; 151 const InternalKeyComparator& icomp_; 152 UserComparatorWrapper user_comparator_; 153 std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_; 154 PinnedIteratorsManager* pinned_iters_mgr_; 155 DataBlockIter block_iter_; 156 157 // True if block_iter_ is initialized and points to the same block 158 // as index iterator. 159 bool block_iter_points_to_real_block_; 160 // See InternalIteratorBase::IsOutOfBound(). 161 bool is_out_of_bound_ = false; 162 // Whether current data block being fully within iterate upper bound. 163 bool data_block_within_upper_bound_ = false; 164 // True if we're standing at the first key of a block, and we haven't loaded 165 // that block yet. A call to value() will trigger loading the block. 166 bool is_at_first_key_from_index_ = false; 167 bool check_filter_; 168 // TODO(Zhongyi): pick a better name 169 bool need_upper_bound_check_; 170 const SliceTransform* prefix_extractor_; 171 uint64_t prev_block_offset_ = std::numeric_limits<uint64_t>::max(); 172 BlockCacheLookupContext lookup_context_; 173 174 BlockPrefetcher block_prefetcher_; 175 176 // If `target` is null, seek to first. 177 void SeekImpl(const Slice* target); 178 179 void InitDataBlock(); 180 bool MaterializeCurrentBlock(); 181 void FindKeyForward(); 182 void FindBlockForward(); 183 void FindKeyBackward(); 184 void CheckOutOfBound(); 185 186 // Check if data block is fully within iterate_upper_bound. 187 // 188 // Note MyRocks may update iterate bounds between seek. To workaround it, 189 // we need to check and update data_block_within_upper_bound_ accordingly. 190 void CheckDataBlockWithinUpperBound(); 191 CheckPrefixMayMatch(const Slice & ikey,IterDirection direction)192 bool CheckPrefixMayMatch(const Slice& ikey, IterDirection direction) { 193 if (need_upper_bound_check_ && direction == IterDirection::kBackward) { 194 // Upper bound check isn't sufficnet for backward direction to 195 // guarantee the same result as total order, so disable prefix 196 // check. 197 return true; 198 } 199 if (check_filter_ && 200 !table_->PrefixMayMatch(ikey, read_options_, prefix_extractor_, 201 need_upper_bound_check_, &lookup_context_)) { 202 // TODO remember the iterator is invalidated because of prefix 203 // match. This can avoid the upper level file iterator to falsely 204 // believe the position is the end of the SST file and move to 205 // the first key of the next file. 206 ResetDataIter(); 207 return false; 208 } 209 return true; 210 } 211 }; 212 } // namespace ROCKSDB_NAMESPACE 213