1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10 #include "table/block_based/block_based_table_reader.h"
11 
12 #include "table/block_based/block_based_table_reader_impl.h"
13 #include "table/block_based/block_prefetcher.h"
14 #include "table/block_based/reader_common.h"
15 
16 namespace ROCKSDB_NAMESPACE {
17 // Iterates over the contents of BlockBasedTable.
18 class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
19   // compaction_readahead_size: its value will only be used if for_compaction =
20   // true
21  public:
22   BlockBasedTableIterator(
23       const BlockBasedTable* table, const ReadOptions& read_options,
24       const InternalKeyComparator& icomp,
25       std::unique_ptr<InternalIteratorBase<IndexValue>>&& index_iter,
26       bool check_filter, bool need_upper_bound_check,
27       const SliceTransform* prefix_extractor, TableReaderCaller caller,
28       size_t compaction_readahead_size = 0)
table_(table)29       : table_(table),
30         read_options_(read_options),
31         icomp_(icomp),
32         user_comparator_(icomp.user_comparator()),
33         index_iter_(std::move(index_iter)),
34         pinned_iters_mgr_(nullptr),
35         block_iter_points_to_real_block_(false),
36         check_filter_(check_filter),
37         need_upper_bound_check_(need_upper_bound_check),
38         prefix_extractor_(prefix_extractor),
39         lookup_context_(caller),
40         block_prefetcher_(compaction_readahead_size) {}
41 
~BlockBasedTableIterator()42   ~BlockBasedTableIterator() {}
43 
44   void Seek(const Slice& target) override;
45   void SeekForPrev(const Slice& target) override;
46   void SeekToFirst() override;
47   void SeekToLast() override;
48   void Next() final override;
49   bool NextAndGetResult(IterateResult* result) override;
50   void Prev() override;
Valid()51   bool Valid() const override {
52     return !is_out_of_bound_ &&
53            (is_at_first_key_from_index_ ||
54             (block_iter_points_to_real_block_ && block_iter_.Valid()));
55   }
key()56   Slice key() const override {
57     assert(Valid());
58     if (is_at_first_key_from_index_) {
59       return index_iter_->value().first_internal_key;
60     } else {
61       return block_iter_.key();
62     }
63   }
user_key()64   Slice user_key() const override {
65     assert(Valid());
66     if (is_at_first_key_from_index_) {
67       return ExtractUserKey(index_iter_->value().first_internal_key);
68     } else {
69       return block_iter_.user_key();
70     }
71   }
value()72   Slice value() const override {
73     assert(Valid());
74 
75     // Load current block if not loaded.
76     if (is_at_first_key_from_index_ &&
77         !const_cast<BlockBasedTableIterator*>(this)
78              ->MaterializeCurrentBlock()) {
79       // Oops, index is not consistent with block contents, but we have
80       // no good way to report error at this point. Let's return empty value.
81       return Slice();
82     }
83 
84     return block_iter_.value();
85   }
status()86   Status status() const override {
87     // Prefix index set status to NotFound when the prefix does not exist
88     if (!index_iter_->status().ok() && !index_iter_->status().IsNotFound()) {
89       return index_iter_->status();
90     } else if (block_iter_points_to_real_block_) {
91       return block_iter_.status();
92     } else {
93       return Status::OK();
94     }
95   }
96 
97   // Whether iterator invalidated for being out of bound.
IsOutOfBound()98   bool IsOutOfBound() override { return is_out_of_bound_; }
99 
MayBeOutOfUpperBound()100   inline bool MayBeOutOfUpperBound() override {
101     assert(Valid());
102     return !data_block_within_upper_bound_;
103   }
104 
SetPinnedItersMgr(PinnedIteratorsManager * pinned_iters_mgr)105   void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
106     pinned_iters_mgr_ = pinned_iters_mgr;
107   }
IsKeyPinned()108   bool IsKeyPinned() const override {
109     // Our key comes either from block_iter_'s current key
110     // or index_iter_'s current *value*.
111     return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
112            ((is_at_first_key_from_index_ && index_iter_->IsValuePinned()) ||
113             (block_iter_points_to_real_block_ && block_iter_.IsKeyPinned()));
114   }
IsValuePinned()115   bool IsValuePinned() const override {
116     // Load current block if not loaded.
117     if (is_at_first_key_from_index_) {
118       const_cast<BlockBasedTableIterator*>(this)->MaterializeCurrentBlock();
119     }
120     // BlockIter::IsValuePinned() is always true. No need to check
121     return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
122            block_iter_points_to_real_block_;
123   }
124 
ResetDataIter()125   void ResetDataIter() {
126     if (block_iter_points_to_real_block_) {
127       if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) {
128         block_iter_.DelegateCleanupsTo(pinned_iters_mgr_);
129       }
130       block_iter_.Invalidate(Status::OK());
131       block_iter_points_to_real_block_ = false;
132     }
133   }
134 
SavePrevIndexValue()135   void SavePrevIndexValue() {
136     if (block_iter_points_to_real_block_) {
137       // Reseek. If they end up with the same data block, we shouldn't re-fetch
138       // the same data block.
139       prev_block_offset_ = index_iter_->value().handle.offset();
140     }
141   }
142 
143  private:
144   enum class IterDirection {
145     kForward,
146     kBackward,
147   };
148 
149   const BlockBasedTable* table_;
150   const ReadOptions read_options_;
151   const InternalKeyComparator& icomp_;
152   UserComparatorWrapper user_comparator_;
153   std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;
154   PinnedIteratorsManager* pinned_iters_mgr_;
155   DataBlockIter block_iter_;
156 
157   // True if block_iter_ is initialized and points to the same block
158   // as index iterator.
159   bool block_iter_points_to_real_block_;
160   // See InternalIteratorBase::IsOutOfBound().
161   bool is_out_of_bound_ = false;
162   // Whether current data block being fully within iterate upper bound.
163   bool data_block_within_upper_bound_ = false;
164   // True if we're standing at the first key of a block, and we haven't loaded
165   // that block yet. A call to value() will trigger loading the block.
166   bool is_at_first_key_from_index_ = false;
167   bool check_filter_;
168   // TODO(Zhongyi): pick a better name
169   bool need_upper_bound_check_;
170   const SliceTransform* prefix_extractor_;
171   uint64_t prev_block_offset_ = std::numeric_limits<uint64_t>::max();
172   BlockCacheLookupContext lookup_context_;
173 
174   BlockPrefetcher block_prefetcher_;
175 
176   // If `target` is null, seek to first.
177   void SeekImpl(const Slice* target);
178 
179   void InitDataBlock();
180   bool MaterializeCurrentBlock();
181   void FindKeyForward();
182   void FindBlockForward();
183   void FindKeyBackward();
184   void CheckOutOfBound();
185 
186   // Check if data block is fully within iterate_upper_bound.
187   //
188   // Note MyRocks may update iterate bounds between seek. To workaround it,
189   // we need to check and update data_block_within_upper_bound_ accordingly.
190   void CheckDataBlockWithinUpperBound();
191 
CheckPrefixMayMatch(const Slice & ikey,IterDirection direction)192   bool CheckPrefixMayMatch(const Slice& ikey, IterDirection direction) {
193     if (need_upper_bound_check_ && direction == IterDirection::kBackward) {
194       // Upper bound check isn't sufficnet for backward direction to
195       // guarantee the same result as total order, so disable prefix
196       // check.
197       return true;
198     }
199     if (check_filter_ &&
200         !table_->PrefixMayMatch(ikey, read_options_, prefix_extractor_,
201                                 need_upper_bound_check_, &lookup_context_)) {
202       // TODO remember the iterator is invalidated because of prefix
203       // match. This can avoid the upper level file iterator to falsely
204       // believe the position is the end of the SST file and move to
205       // the first key of the next file.
206       ResetDataIter();
207       return false;
208     }
209     return true;
210   }
211 };
212 }  // namespace ROCKSDB_NAMESPACE
213