1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 #include <algorithm>
8 #include <array>
9 #include <string>
10 #include "db/lookup_key.h"
11 #include "db/merge_context.h"
12 #include "rocksdb/env.h"
13 #include "rocksdb/statistics.h"
14 #include "rocksdb/types.h"
15 #include "util/autovector.h"
16 
17 namespace ROCKSDB_NAMESPACE {
18 class GetContext;
19 
20 struct KeyContext {
21   const Slice* key;
22   LookupKey* lkey;
23   Slice ukey;
24   Slice ikey;
25   ColumnFamilyHandle* column_family;
26   Status* s;
27   MergeContext merge_context;
28   SequenceNumber max_covering_tombstone_seq;
29   bool key_exists;
30   void* cb_arg;
31   PinnableSlice* value;
32   std::string* timestamp;
33   GetContext* get_context;
34 
KeyContextKeyContext35   KeyContext(ColumnFamilyHandle* col_family, const Slice& user_key,
36              PinnableSlice* val, std::string* ts, Status* stat)
37       : key(&user_key),
38         lkey(nullptr),
39         column_family(col_family),
40         s(stat),
41         max_covering_tombstone_seq(0),
42         key_exists(false),
43         cb_arg(nullptr),
44         value(val),
45         timestamp(ts),
46         get_context(nullptr) {}
47 
48   KeyContext() = default;
49 };
50 
51 // The MultiGetContext class is a container for the sorted list of keys that
52 // we need to lookup in a batch. Its main purpose is to make batch execution
53 // easier by allowing various stages of the MultiGet lookups to operate on
54 // subsets of keys, potentially non-contiguous. In order to accomplish this,
55 // it defines the following classes -
56 //
57 // MultiGetContext::Range
58 // MultiGetContext::Range::Iterator
59 // MultiGetContext::Range::IteratorWrapper
60 //
61 // Here is an example of how this can be used -
62 //
63 // {
64 //    MultiGetContext ctx(...);
65 //    MultiGetContext::Range range = ctx.GetMultiGetRange();
66 //
67 //    // Iterate to determine some subset of the keys
68 //    MultiGetContext::Range::Iterator start = range.begin();
69 //    MultiGetContext::Range::Iterator end = ...;
70 //
71 //    // Make a new range with a subset of keys
72 //    MultiGetContext::Range subrange(range, start, end);
73 //
74 //    // Define an auxillary vector, if needed, to hold additional data for
75 //    // each key
76 //    std::array<Foo, MultiGetContext::MAX_BATCH_SIZE> aux;
77 //
78 //    // Iterate over the subrange and the auxillary vector simultaneously
79 //    MultiGetContext::Range::Iterator iter = subrange.begin();
80 //    for (; iter != subrange.end(); ++iter) {
81 //      KeyContext& key = *iter;
82 //      Foo& aux_key = aux_iter[iter.index()];
83 //      ...
84 //    }
85 //  }
86 class MultiGetContext {
87  public:
88   // Limit the number of keys in a batch to this number. Benchmarks show that
89   // there is negligible benefit for batches exceeding this. Keeping this < 64
90   // simplifies iteration, as well as reduces the amount of stack allocations
91   // htat need to be performed
92   static const int MAX_BATCH_SIZE = 32;
93 
MultiGetContext(autovector<KeyContext *,MAX_BATCH_SIZE> * sorted_keys,size_t begin,size_t num_keys,SequenceNumber snapshot)94   MultiGetContext(autovector<KeyContext*, MAX_BATCH_SIZE>* sorted_keys,
95                   size_t begin, size_t num_keys, SequenceNumber snapshot)
96       : num_keys_(num_keys),
97         value_mask_(0),
98         lookup_key_ptr_(reinterpret_cast<LookupKey*>(lookup_key_stack_buf)) {
99     if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) {
100       lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]);
101       lookup_key_ptr_ = reinterpret_cast<LookupKey*>(
102           lookup_key_heap_buf.get());
103     }
104 
105     for (size_t iter = 0; iter != num_keys_; ++iter) {
106       // autovector may not be contiguous storage, so make a copy
107       sorted_keys_[iter] = (*sorted_keys)[begin + iter];
108       sorted_keys_[iter]->lkey = new (&lookup_key_ptr_[iter])
109           LookupKey(*sorted_keys_[iter]->key, snapshot);
110       sorted_keys_[iter]->ukey = sorted_keys_[iter]->lkey->user_key();
111       sorted_keys_[iter]->ikey = sorted_keys_[iter]->lkey->internal_key();
112     }
113   }
114 
~MultiGetContext()115   ~MultiGetContext() {
116     for (size_t i = 0; i < num_keys_; ++i) {
117       lookup_key_ptr_[i].~LookupKey();
118     }
119   }
120 
121  private:
122   static const int MAX_LOOKUP_KEYS_ON_STACK = 16;
123   alignas(alignof(LookupKey))
124     char lookup_key_stack_buf[sizeof(LookupKey) * MAX_LOOKUP_KEYS_ON_STACK];
125   std::array<KeyContext*, MAX_BATCH_SIZE> sorted_keys_;
126   size_t num_keys_;
127   uint64_t value_mask_;
128   std::unique_ptr<char[]> lookup_key_heap_buf;
129   LookupKey* lookup_key_ptr_;
130 
131  public:
132   // MultiGetContext::Range - Specifies a range of keys, by start and end index,
133   // from the parent MultiGetContext. Each range contains a bit vector that
134   // indicates whether the corresponding keys need to be processed or skipped.
135   // A Range object can be copy constructed, and the new object inherits the
136   // original Range's bit vector. This is useful for progressively skipping
137   // keys as the lookup goes through various stages. For example, when looking
138   // up keys in the same SST file, a Range is created excluding keys not
139   // belonging to that file. A new Range is then copy constructed and individual
140   // keys are skipped based on bloom filter lookup.
141   class Range {
142    public:
143     // MultiGetContext::Range::Iterator - A forward iterator that iterates over
144     // non-skippable keys in a Range, as well as keys whose final value has been
145     // found. The latter is tracked by MultiGetContext::value_mask_
146     class Iterator {
147      public:
148       // -- iterator traits
149       typedef Iterator self_type;
150       typedef KeyContext value_type;
151       typedef KeyContext& reference;
152       typedef KeyContext* pointer;
153       typedef int difference_type;
154       typedef std::forward_iterator_tag iterator_category;
155 
Iterator(const Range * range,size_t idx)156       Iterator(const Range* range, size_t idx)
157           : range_(range), ctx_(range->ctx_), index_(idx) {
158         while (index_ < range_->end_ &&
159                (1ull << index_) &
160                    (range_->ctx_->value_mask_ | range_->skip_mask_))
161           index_++;
162       }
163 
164       Iterator(const Iterator&) = default;
165       Iterator& operator=(const Iterator&) = default;
166 
167       Iterator& operator++() {
168         while (++index_ < range_->end_ &&
169                (1ull << index_) &
170                    (range_->ctx_->value_mask_ | range_->skip_mask_))
171           ;
172         return *this;
173       }
174 
175       bool operator==(Iterator other) const {
176         assert(range_->ctx_ == other.range_->ctx_);
177         return index_ == other.index_;
178       }
179 
180       bool operator!=(Iterator other) const {
181         assert(range_->ctx_ == other.range_->ctx_);
182         return index_ != other.index_;
183       }
184 
185       KeyContext& operator*() {
186         assert(index_ < range_->end_ && index_ >= range_->start_);
187         return *(ctx_->sorted_keys_[index_]);
188       }
189 
190       KeyContext* operator->() {
191         assert(index_ < range_->end_ && index_ >= range_->start_);
192         return ctx_->sorted_keys_[index_];
193       }
194 
index()195       size_t index() { return index_; }
196 
197      private:
198       friend Range;
199       const Range* range_;
200       const MultiGetContext* ctx_;
201       size_t index_;
202     };
203 
Range(const Range & mget_range,const Iterator & first,const Iterator & last)204     Range(const Range& mget_range,
205           const Iterator& first,
206           const Iterator& last) {
207       ctx_ = mget_range.ctx_;
208       start_ = first.index_;
209       end_ = last.index_;
210       skip_mask_ = mget_range.skip_mask_;
211     }
212 
213     Range() = default;
214 
begin()215     Iterator begin() const { return Iterator(this, start_); }
216 
end()217     Iterator end() const { return Iterator(this, end_); }
218 
empty()219     bool empty() {
220       return (((1ull << end_) - 1) & ~((1ull << start_) - 1) &
221               ~(ctx_->value_mask_ | skip_mask_)) == 0;
222     }
223 
SkipKey(const Iterator & iter)224     void SkipKey(const Iterator& iter) { skip_mask_ |= 1ull << iter.index_; }
225 
226     // Update the value_mask_ in MultiGetContext so its
227     // immediately reflected in all the Range Iterators
MarkKeyDone(Iterator & iter)228     void MarkKeyDone(Iterator& iter) {
229       ctx_->value_mask_ |= (1ull << iter.index_);
230     }
231 
CheckKeyDone(Iterator & iter)232     bool CheckKeyDone(Iterator& iter) {
233       return ctx_->value_mask_ & (1ull << iter.index_);
234     }
235 
KeysLeft()236     uint64_t KeysLeft() {
237       uint64_t new_val = skip_mask_ | ctx_->value_mask_;
238       uint64_t count = 0;
239       while (new_val) {
240         new_val = new_val & (new_val - 1);
241         count++;
242       }
243       return end_ - count;
244     }
245 
246    private:
247     friend MultiGetContext;
248     MultiGetContext* ctx_;
249     size_t start_;
250     size_t end_;
251     uint64_t skip_mask_;
252 
Range(MultiGetContext * ctx,size_t num_keys)253     Range(MultiGetContext* ctx, size_t num_keys)
254         : ctx_(ctx), start_(0), end_(num_keys), skip_mask_(0) {}
255   };
256 
257   // Return the initial range that encompasses all the keys in the batch
GetMultiGetRange()258   Range GetMultiGetRange() { return Range(this, num_keys_); }
259 };
260 
261 }  // namespace ROCKSDB_NAMESPACE
262