1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 #pragma once 7 #include <algorithm> 8 #include <array> 9 #include <string> 10 #include "db/lookup_key.h" 11 #include "db/merge_context.h" 12 #include "rocksdb/env.h" 13 #include "rocksdb/statistics.h" 14 #include "rocksdb/types.h" 15 #include "util/autovector.h" 16 17 namespace ROCKSDB_NAMESPACE { 18 class GetContext; 19 20 struct KeyContext { 21 const Slice* key; 22 LookupKey* lkey; 23 Slice ukey; 24 Slice ikey; 25 ColumnFamilyHandle* column_family; 26 Status* s; 27 MergeContext merge_context; 28 SequenceNumber max_covering_tombstone_seq; 29 bool key_exists; 30 void* cb_arg; 31 PinnableSlice* value; 32 std::string* timestamp; 33 GetContext* get_context; 34 KeyContextKeyContext35 KeyContext(ColumnFamilyHandle* col_family, const Slice& user_key, 36 PinnableSlice* val, std::string* ts, Status* stat) 37 : key(&user_key), 38 lkey(nullptr), 39 column_family(col_family), 40 s(stat), 41 max_covering_tombstone_seq(0), 42 key_exists(false), 43 cb_arg(nullptr), 44 value(val), 45 timestamp(ts), 46 get_context(nullptr) {} 47 48 KeyContext() = default; 49 }; 50 51 // The MultiGetContext class is a container for the sorted list of keys that 52 // we need to lookup in a batch. Its main purpose is to make batch execution 53 // easier by allowing various stages of the MultiGet lookups to operate on 54 // subsets of keys, potentially non-contiguous. In order to accomplish this, 55 // it defines the following classes - 56 // 57 // MultiGetContext::Range 58 // MultiGetContext::Range::Iterator 59 // MultiGetContext::Range::IteratorWrapper 60 // 61 // Here is an example of how this can be used - 62 // 63 // { 64 // MultiGetContext ctx(...); 65 // MultiGetContext::Range range = ctx.GetMultiGetRange(); 66 // 67 // // Iterate to determine some subset of the keys 68 // MultiGetContext::Range::Iterator start = range.begin(); 69 // MultiGetContext::Range::Iterator end = ...; 70 // 71 // // Make a new range with a subset of keys 72 // MultiGetContext::Range subrange(range, start, end); 73 // 74 // // Define an auxillary vector, if needed, to hold additional data for 75 // // each key 76 // std::array<Foo, MultiGetContext::MAX_BATCH_SIZE> aux; 77 // 78 // // Iterate over the subrange and the auxillary vector simultaneously 79 // MultiGetContext::Range::Iterator iter = subrange.begin(); 80 // for (; iter != subrange.end(); ++iter) { 81 // KeyContext& key = *iter; 82 // Foo& aux_key = aux_iter[iter.index()]; 83 // ... 84 // } 85 // } 86 class MultiGetContext { 87 public: 88 // Limit the number of keys in a batch to this number. Benchmarks show that 89 // there is negligible benefit for batches exceeding this. Keeping this < 64 90 // simplifies iteration, as well as reduces the amount of stack allocations 91 // htat need to be performed 92 static const int MAX_BATCH_SIZE = 32; 93 MultiGetContext(autovector<KeyContext *,MAX_BATCH_SIZE> * sorted_keys,size_t begin,size_t num_keys,SequenceNumber snapshot)94 MultiGetContext(autovector<KeyContext*, MAX_BATCH_SIZE>* sorted_keys, 95 size_t begin, size_t num_keys, SequenceNumber snapshot) 96 : num_keys_(num_keys), 97 value_mask_(0), 98 lookup_key_ptr_(reinterpret_cast<LookupKey*>(lookup_key_stack_buf)) { 99 if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) { 100 lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]); 101 lookup_key_ptr_ = reinterpret_cast<LookupKey*>( 102 lookup_key_heap_buf.get()); 103 } 104 105 for (size_t iter = 0; iter != num_keys_; ++iter) { 106 // autovector may not be contiguous storage, so make a copy 107 sorted_keys_[iter] = (*sorted_keys)[begin + iter]; 108 sorted_keys_[iter]->lkey = new (&lookup_key_ptr_[iter]) 109 LookupKey(*sorted_keys_[iter]->key, snapshot); 110 sorted_keys_[iter]->ukey = sorted_keys_[iter]->lkey->user_key(); 111 sorted_keys_[iter]->ikey = sorted_keys_[iter]->lkey->internal_key(); 112 } 113 } 114 ~MultiGetContext()115 ~MultiGetContext() { 116 for (size_t i = 0; i < num_keys_; ++i) { 117 lookup_key_ptr_[i].~LookupKey(); 118 } 119 } 120 121 private: 122 static const int MAX_LOOKUP_KEYS_ON_STACK = 16; 123 alignas(alignof(LookupKey)) 124 char lookup_key_stack_buf[sizeof(LookupKey) * MAX_LOOKUP_KEYS_ON_STACK]; 125 std::array<KeyContext*, MAX_BATCH_SIZE> sorted_keys_; 126 size_t num_keys_; 127 uint64_t value_mask_; 128 std::unique_ptr<char[]> lookup_key_heap_buf; 129 LookupKey* lookup_key_ptr_; 130 131 public: 132 // MultiGetContext::Range - Specifies a range of keys, by start and end index, 133 // from the parent MultiGetContext. Each range contains a bit vector that 134 // indicates whether the corresponding keys need to be processed or skipped. 135 // A Range object can be copy constructed, and the new object inherits the 136 // original Range's bit vector. This is useful for progressively skipping 137 // keys as the lookup goes through various stages. For example, when looking 138 // up keys in the same SST file, a Range is created excluding keys not 139 // belonging to that file. A new Range is then copy constructed and individual 140 // keys are skipped based on bloom filter lookup. 141 class Range { 142 public: 143 // MultiGetContext::Range::Iterator - A forward iterator that iterates over 144 // non-skippable keys in a Range, as well as keys whose final value has been 145 // found. The latter is tracked by MultiGetContext::value_mask_ 146 class Iterator { 147 public: 148 // -- iterator traits 149 typedef Iterator self_type; 150 typedef KeyContext value_type; 151 typedef KeyContext& reference; 152 typedef KeyContext* pointer; 153 typedef int difference_type; 154 typedef std::forward_iterator_tag iterator_category; 155 Iterator(const Range * range,size_t idx)156 Iterator(const Range* range, size_t idx) 157 : range_(range), ctx_(range->ctx_), index_(idx) { 158 while (index_ < range_->end_ && 159 (1ull << index_) & 160 (range_->ctx_->value_mask_ | range_->skip_mask_)) 161 index_++; 162 } 163 164 Iterator(const Iterator&) = default; 165 Iterator& operator=(const Iterator&) = default; 166 167 Iterator& operator++() { 168 while (++index_ < range_->end_ && 169 (1ull << index_) & 170 (range_->ctx_->value_mask_ | range_->skip_mask_)) 171 ; 172 return *this; 173 } 174 175 bool operator==(Iterator other) const { 176 assert(range_->ctx_ == other.range_->ctx_); 177 return index_ == other.index_; 178 } 179 180 bool operator!=(Iterator other) const { 181 assert(range_->ctx_ == other.range_->ctx_); 182 return index_ != other.index_; 183 } 184 185 KeyContext& operator*() { 186 assert(index_ < range_->end_ && index_ >= range_->start_); 187 return *(ctx_->sorted_keys_[index_]); 188 } 189 190 KeyContext* operator->() { 191 assert(index_ < range_->end_ && index_ >= range_->start_); 192 return ctx_->sorted_keys_[index_]; 193 } 194 index()195 size_t index() { return index_; } 196 197 private: 198 friend Range; 199 const Range* range_; 200 const MultiGetContext* ctx_; 201 size_t index_; 202 }; 203 Range(const Range & mget_range,const Iterator & first,const Iterator & last)204 Range(const Range& mget_range, 205 const Iterator& first, 206 const Iterator& last) { 207 ctx_ = mget_range.ctx_; 208 start_ = first.index_; 209 end_ = last.index_; 210 skip_mask_ = mget_range.skip_mask_; 211 } 212 213 Range() = default; 214 begin()215 Iterator begin() const { return Iterator(this, start_); } 216 end()217 Iterator end() const { return Iterator(this, end_); } 218 empty()219 bool empty() { 220 return (((1ull << end_) - 1) & ~((1ull << start_) - 1) & 221 ~(ctx_->value_mask_ | skip_mask_)) == 0; 222 } 223 SkipKey(const Iterator & iter)224 void SkipKey(const Iterator& iter) { skip_mask_ |= 1ull << iter.index_; } 225 226 // Update the value_mask_ in MultiGetContext so its 227 // immediately reflected in all the Range Iterators MarkKeyDone(Iterator & iter)228 void MarkKeyDone(Iterator& iter) { 229 ctx_->value_mask_ |= (1ull << iter.index_); 230 } 231 CheckKeyDone(Iterator & iter)232 bool CheckKeyDone(Iterator& iter) { 233 return ctx_->value_mask_ & (1ull << iter.index_); 234 } 235 KeysLeft()236 uint64_t KeysLeft() { 237 uint64_t new_val = skip_mask_ | ctx_->value_mask_; 238 uint64_t count = 0; 239 while (new_val) { 240 new_val = new_val & (new_val - 1); 241 count++; 242 } 243 return end_ - count; 244 } 245 246 private: 247 friend MultiGetContext; 248 MultiGetContext* ctx_; 249 size_t start_; 250 size_t end_; 251 uint64_t skip_mask_; 252 Range(MultiGetContext * ctx,size_t num_keys)253 Range(MultiGetContext* ctx, size_t num_keys) 254 : ctx_(ctx), start_(0), end_(num_keys), skip_mask_(0) {} 255 }; 256 257 // Return the initial range that encompasses all the keys in the batch GetMultiGetRange()258 Range GetMultiGetRange() { return Range(this, num_keys_); } 259 }; 260 261 } // namespace ROCKSDB_NAMESPACE 262