1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #pragma once
11 #include <stdio.h>
12 #include <memory>
13 #include <string>
14 #include <utility>
15 #include "db/lookup_key.h"
16 #include "db/merge_context.h"
17 #include "logging/logging.h"
18 #include "monitoring/perf_context_imp.h"
19 #include "rocksdb/comparator.h"
20 #include "rocksdb/db.h"
21 #include "rocksdb/filter_policy.h"
22 #include "rocksdb/slice.h"
23 #include "rocksdb/slice_transform.h"
24 #include "rocksdb/table.h"
25 #include "rocksdb/types.h"
26 #include "util/coding.h"
27 #include "util/user_comparator_wrapper.h"
28
29 namespace ROCKSDB_NAMESPACE {
30
31 // The file declares data structures and functions that deal with internal
32 // keys.
33 // Each internal key contains a user key, a sequence number (SequenceNumber)
34 // and a type (ValueType), and they are usually encoded together.
35 // There are some related helper classes here.
36
37 class InternalKey;
38
39 // Value types encoded as the last component of internal keys.
40 // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
41 // data structures.
42 // The highest bit of the value type needs to be reserved to SST tables
43 // for them to do more flexible encoding.
44 enum ValueType : unsigned char {
45 kTypeDeletion = 0x0,
46 kTypeValue = 0x1,
47 kTypeMerge = 0x2,
48 kTypeLogData = 0x3, // WAL only.
49 kTypeColumnFamilyDeletion = 0x4, // WAL only.
50 kTypeColumnFamilyValue = 0x5, // WAL only.
51 kTypeColumnFamilyMerge = 0x6, // WAL only.
52 kTypeSingleDeletion = 0x7,
53 kTypeColumnFamilySingleDeletion = 0x8, // WAL only.
54 kTypeBeginPrepareXID = 0x9, // WAL only.
55 kTypeEndPrepareXID = 0xA, // WAL only.
56 kTypeCommitXID = 0xB, // WAL only.
57 kTypeRollbackXID = 0xC, // WAL only.
58 kTypeNoop = 0xD, // WAL only.
59 kTypeColumnFamilyRangeDeletion = 0xE, // WAL only.
60 kTypeRangeDeletion = 0xF, // meta block
61 kTypeColumnFamilyBlobIndex = 0x10, // Blob DB only
62 kTypeBlobIndex = 0x11, // Blob DB only
63 // When the prepared record is also persisted in db, we use a different
64 // record. This is to ensure that the WAL that is generated by a WritePolicy
65 // is not mistakenly read by another, which would result into data
66 // inconsistency.
67 kTypeBeginPersistedPrepareXID = 0x12, // WAL only.
68 // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL
69 // generated by WriteUnprepared write policy is not mistakenly read by
70 // another.
71 kTypeBeginUnprepareXID = 0x13, // WAL only.
72 kMaxValue = 0x7F // Not used for storing records.
73 };
74
75 // Defined in dbformat.cc
76 extern const ValueType kValueTypeForSeek;
77 extern const ValueType kValueTypeForSeekForPrev;
78
79 // Checks whether a type is an inline value type
80 // (i.e. a type used in memtable skiplist and sst file datablock).
IsValueType(ValueType t)81 inline bool IsValueType(ValueType t) {
82 return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex;
83 }
84
85 // Checks whether a type is from user operation
86 // kTypeRangeDeletion is in meta block so this API is separated from above
IsExtendedValueType(ValueType t)87 inline bool IsExtendedValueType(ValueType t) {
88 return IsValueType(t) || t == kTypeRangeDeletion;
89 }
90
91 // We leave eight bits empty at the bottom so a type and sequence#
92 // can be packed together into 64-bits.
93 static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
94
95 static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
96
97 // The data structure that represents an internal key in the way that user_key,
98 // sequence number and type are stored in separated forms.
99 struct ParsedInternalKey {
100 Slice user_key;
101 SequenceNumber sequence;
102 ValueType type;
103
ParsedInternalKeyParsedInternalKey104 ParsedInternalKey()
105 : sequence(kMaxSequenceNumber) // Make code analyzer happy
106 {} // Intentionally left uninitialized (for speed)
107 // u contains timestamp if user timestamp feature is enabled.
ParsedInternalKeyParsedInternalKey108 ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
109 : user_key(u), sequence(seq), type(t) {}
110 std::string DebugString(bool hex = false) const;
111
clearParsedInternalKey112 void clear() {
113 user_key.clear();
114 sequence = 0;
115 type = kTypeDeletion;
116 }
117 };
118
119 // Return the length of the encoding of "key".
InternalKeyEncodingLength(const ParsedInternalKey & key)120 inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
121 return key.user_key.size() + 8;
122 }
123
124 // Pack a sequence number and a ValueType into a uint64_t
125 extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t);
126
127 // Given the result of PackSequenceAndType, store the sequence number in *seq
128 // and the ValueType in *t.
129 extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t);
130
131 EntryType GetEntryType(ValueType value_type);
132
133 // Append the serialization of "key" to *result.
134 extern void AppendInternalKey(std::string* result,
135 const ParsedInternalKey& key);
136
137 // Append the serialization of "key" to *result, replacing the original
138 // timestamp with argument ts.
139 extern void AppendInternalKeyWithDifferentTimestamp(
140 std::string* result, const ParsedInternalKey& key, const Slice& ts);
141
142 // Serialized internal key consists of user key followed by footer.
143 // This function appends the footer to *result, assuming that *result already
144 // contains the user key at the end.
145 extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
146 ValueType t);
147
148 // Attempt to parse an internal key from "internal_key". On success,
149 // stores the parsed data in "*result", and returns true.
150 //
151 // On error, returns false, leaves "*result" in an undefined state.
152 extern bool ParseInternalKey(const Slice& internal_key,
153 ParsedInternalKey* result);
154
155 // Returns the user key portion of an internal key.
ExtractUserKey(const Slice & internal_key)156 inline Slice ExtractUserKey(const Slice& internal_key) {
157 assert(internal_key.size() >= 8);
158 return Slice(internal_key.data(), internal_key.size() - 8);
159 }
160
ExtractUserKeyAndStripTimestamp(const Slice & internal_key,size_t ts_sz)161 inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
162 size_t ts_sz) {
163 assert(internal_key.size() >= 8 + ts_sz);
164 return Slice(internal_key.data(), internal_key.size() - 8 - ts_sz);
165 }
166
StripTimestampFromUserKey(const Slice & user_key,size_t ts_sz)167 inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
168 assert(user_key.size() >= ts_sz);
169 return Slice(user_key.data(), user_key.size() - ts_sz);
170 }
171
ExtractTimestampFromUserKey(const Slice & user_key,size_t ts_sz)172 inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
173 assert(user_key.size() >= ts_sz);
174 return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz);
175 }
176
ExtractInternalKeyFooter(const Slice & internal_key)177 inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
178 assert(internal_key.size() >= 8);
179 const size_t n = internal_key.size();
180 return DecodeFixed64(internal_key.data() + n - 8);
181 }
182
ExtractValueType(const Slice & internal_key)183 inline ValueType ExtractValueType(const Slice& internal_key) {
184 uint64_t num = ExtractInternalKeyFooter(internal_key);
185 unsigned char c = num & 0xff;
186 return static_cast<ValueType>(c);
187 }
188
189 // A comparator for internal keys that uses a specified comparator for
190 // the user key portion and breaks ties by decreasing sequence number.
191 class InternalKeyComparator
192 #ifdef NDEBUG
193 final
194 #endif
195 : public Comparator {
196 private:
197 UserComparatorWrapper user_comparator_;
198 std::string name_;
199
200 public:
InternalKeyComparator(const Comparator * c)201 explicit InternalKeyComparator(const Comparator* c)
202 : Comparator(c->timestamp_size()),
203 user_comparator_(c),
204 name_("rocksdb.InternalKeyComparator:" +
205 std::string(user_comparator_.Name())) {}
~InternalKeyComparator()206 virtual ~InternalKeyComparator() {}
207
208 virtual const char* Name() const override;
209 virtual int Compare(const Slice& a, const Slice& b) const override;
210 // Same as Compare except that it excludes the value type from comparison
211 virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
212 virtual void FindShortestSeparator(std::string* start,
213 const Slice& limit) const override;
214 virtual void FindShortSuccessor(std::string* key) const override;
215
user_comparator()216 const Comparator* user_comparator() const {
217 return user_comparator_.user_comparator();
218 }
219
220 int Compare(const InternalKey& a, const InternalKey& b) const;
221 int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
GetRootComparator()222 virtual const Comparator* GetRootComparator() const override {
223 return user_comparator_.GetRootComparator();
224 }
225 };
226
227 // The class represent the internal key in encoded form.
228 class InternalKey {
229 private:
230 std::string rep_;
231
232 public:
InternalKey()233 InternalKey() {} // Leave rep_ as empty to indicate it is invalid
InternalKey(const Slice & _user_key,SequenceNumber s,ValueType t)234 InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
235 AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
236 }
237
238 // sets the internal key to be bigger or equal to all internal keys with this
239 // user key
SetMaxPossibleForUserKey(const Slice & _user_key)240 void SetMaxPossibleForUserKey(const Slice& _user_key) {
241 AppendInternalKey(
242 &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0)));
243 }
244
245 // sets the internal key to be smaller or equal to all internal keys with this
246 // user key
SetMinPossibleForUserKey(const Slice & _user_key)247 void SetMinPossibleForUserKey(const Slice& _user_key) {
248 AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber,
249 kValueTypeForSeek));
250 }
251
Valid()252 bool Valid() const {
253 ParsedInternalKey parsed;
254 return ParseInternalKey(Slice(rep_), &parsed);
255 }
256
DecodeFrom(const Slice & s)257 void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
Encode()258 Slice Encode() const {
259 assert(!rep_.empty());
260 return rep_;
261 }
262
user_key()263 Slice user_key() const { return ExtractUserKey(rep_); }
size()264 size_t size() { return rep_.size(); }
265
Set(const Slice & _user_key,SequenceNumber s,ValueType t)266 void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
267 SetFrom(ParsedInternalKey(_user_key, s, t));
268 }
269
SetFrom(const ParsedInternalKey & p)270 void SetFrom(const ParsedInternalKey& p) {
271 rep_.clear();
272 AppendInternalKey(&rep_, p);
273 }
274
Clear()275 void Clear() { rep_.clear(); }
276
277 // The underlying representation.
278 // Intended only to be used together with ConvertFromUserKey().
rep()279 std::string* rep() { return &rep_; }
280
281 // Assuming that *rep() contains a user key, this method makes internal key
282 // out of it in-place. This saves a memcpy compared to Set()/SetFrom().
ConvertFromUserKey(SequenceNumber s,ValueType t)283 void ConvertFromUserKey(SequenceNumber s, ValueType t) {
284 AppendInternalKeyFooter(&rep_, s, t);
285 }
286
287 std::string DebugString(bool hex = false) const;
288 };
289
Compare(const InternalKey & a,const InternalKey & b)290 inline int InternalKeyComparator::Compare(const InternalKey& a,
291 const InternalKey& b) const {
292 return Compare(a.Encode(), b.Encode());
293 }
294
ParseInternalKey(const Slice & internal_key,ParsedInternalKey * result)295 inline bool ParseInternalKey(const Slice& internal_key,
296 ParsedInternalKey* result) {
297 const size_t n = internal_key.size();
298 if (n < 8) return false;
299 uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
300 unsigned char c = num & 0xff;
301 result->sequence = num >> 8;
302 result->type = static_cast<ValueType>(c);
303 assert(result->type <= ValueType::kMaxValue);
304 result->user_key = Slice(internal_key.data(), n - 8);
305 return IsExtendedValueType(result->type);
306 }
307
308 // Update the sequence number in the internal key.
309 // Guarantees not to invalidate ikey.data().
UpdateInternalKey(std::string * ikey,uint64_t seq,ValueType t)310 inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) {
311 size_t ikey_sz = ikey->size();
312 assert(ikey_sz >= 8);
313 uint64_t newval = (seq << 8) | t;
314
315 // Note: Since C++11, strings are guaranteed to be stored contiguously and
316 // string::operator[]() is guaranteed not to change ikey.data().
317 EncodeFixed64(&(*ikey)[ikey_sz - 8], newval);
318 }
319
320 // Get the sequence number from the internal key
GetInternalKeySeqno(const Slice & internal_key)321 inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
322 const size_t n = internal_key.size();
323 assert(n >= 8);
324 uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
325 return num >> 8;
326 }
327
328 // The class to store keys in an efficient way. It allows:
329 // 1. Users can either copy the key into it, or have it point to an unowned
330 // address.
331 // 2. For copied key, a short inline buffer is kept to reduce memory
332 // allocation for smaller keys.
333 // 3. It tracks user key or internal key, and allow conversion between them.
334 class IterKey {
335 public:
IterKey()336 IterKey()
337 : buf_(space_),
338 key_(buf_),
339 key_size_(0),
340 buf_size_(sizeof(space_)),
341 is_user_key_(true) {}
342 // No copying allowed
343 IterKey(const IterKey&) = delete;
344 void operator=(const IterKey&) = delete;
345
~IterKey()346 ~IterKey() { ResetBuffer(); }
347
348 // The bool will be picked up by the next calls to SetKey
SetIsUserKey(bool is_user_key)349 void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
350
351 // Returns the key in whichever format that was provided to KeyIter
GetKey()352 Slice GetKey() const { return Slice(key_, key_size_); }
353
GetInternalKey()354 Slice GetInternalKey() const {
355 assert(!IsUserKey());
356 return Slice(key_, key_size_);
357 }
358
GetUserKey()359 Slice GetUserKey() const {
360 if (IsUserKey()) {
361 return Slice(key_, key_size_);
362 } else {
363 assert(key_size_ >= 8);
364 return Slice(key_, key_size_ - 8);
365 }
366 }
367
Size()368 size_t Size() const { return key_size_; }
369
Clear()370 void Clear() { key_size_ = 0; }
371
372 // Append "non_shared_data" to its back, from "shared_len"
373 // This function is used in Block::Iter::ParseNextKey
374 // shared_len: bytes in [0, shard_len-1] would be remained
375 // non_shared_data: data to be append, its length must be >= non_shared_len
TrimAppend(const size_t shared_len,const char * non_shared_data,const size_t non_shared_len)376 void TrimAppend(const size_t shared_len, const char* non_shared_data,
377 const size_t non_shared_len) {
378 assert(shared_len <= key_size_);
379 size_t total_size = shared_len + non_shared_len;
380
381 if (IsKeyPinned() /* key is not in buf_ */) {
382 // Copy the key from external memory to buf_ (copy shared_len bytes)
383 EnlargeBufferIfNeeded(total_size);
384 memcpy(buf_, key_, shared_len);
385 } else if (total_size > buf_size_) {
386 // Need to allocate space, delete previous space
387 char* p = new char[total_size];
388 memcpy(p, key_, shared_len);
389
390 if (buf_ != space_) {
391 delete[] buf_;
392 }
393
394 buf_ = p;
395 buf_size_ = total_size;
396 }
397
398 memcpy(buf_ + shared_len, non_shared_data, non_shared_len);
399 key_ = buf_;
400 key_size_ = total_size;
401 }
402
403 Slice SetKey(const Slice& key, bool copy = true) {
404 // is_user_key_ expected to be set already via SetIsUserKey
405 return SetKeyImpl(key, copy);
406 }
407
408 Slice SetUserKey(const Slice& key, bool copy = true) {
409 is_user_key_ = true;
410 return SetKeyImpl(key, copy);
411 }
412
413 Slice SetInternalKey(const Slice& key, bool copy = true) {
414 is_user_key_ = false;
415 return SetKeyImpl(key, copy);
416 }
417
418 // Copies the content of key, updates the reference to the user key in ikey
419 // and returns a Slice referencing the new copy.
SetInternalKey(const Slice & key,ParsedInternalKey * ikey)420 Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) {
421 size_t key_n = key.size();
422 assert(key_n >= 8);
423 SetInternalKey(key);
424 ikey->user_key = Slice(key_, key_n - 8);
425 return Slice(key_, key_n);
426 }
427
428 // Copy the key into IterKey own buf_
OwnKey()429 void OwnKey() {
430 assert(IsKeyPinned() == true);
431
432 Reserve(key_size_);
433 memcpy(buf_, key_, key_size_);
434 key_ = buf_;
435 }
436
437 // Update the sequence number in the internal key. Guarantees not to
438 // invalidate slices to the key (and the user key).
UpdateInternalKey(uint64_t seq,ValueType t)439 void UpdateInternalKey(uint64_t seq, ValueType t) {
440 assert(!IsKeyPinned());
441 assert(key_size_ >= 8);
442 uint64_t newval = (seq << 8) | t;
443 EncodeFixed64(&buf_[key_size_ - 8], newval);
444 }
445
IsKeyPinned()446 bool IsKeyPinned() const { return (key_ != buf_); }
447
448 void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
449 SequenceNumber s,
450 ValueType value_type = kValueTypeForSeek,
451 const Slice* ts = nullptr) {
452 size_t psize = key_prefix.size();
453 size_t usize = user_key.size();
454 size_t ts_sz = (ts != nullptr ? ts->size() : 0);
455 EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t) + ts_sz);
456 if (psize > 0) {
457 memcpy(buf_, key_prefix.data(), psize);
458 }
459 memcpy(buf_ + psize, user_key.data(), usize);
460 if (ts) {
461 memcpy(buf_ + psize + usize, ts->data(), ts_sz);
462 }
463 EncodeFixed64(buf_ + usize + psize + ts_sz,
464 PackSequenceAndType(s, value_type));
465
466 key_ = buf_;
467 key_size_ = psize + usize + sizeof(uint64_t) + ts_sz;
468 is_user_key_ = false;
469 }
470
471 void SetInternalKey(const Slice& user_key, SequenceNumber s,
472 ValueType value_type = kValueTypeForSeek,
473 const Slice* ts = nullptr) {
474 SetInternalKey(Slice(), user_key, s, value_type, ts);
475 }
476
Reserve(size_t size)477 void Reserve(size_t size) {
478 EnlargeBufferIfNeeded(size);
479 key_size_ = size;
480 }
481
SetInternalKey(const ParsedInternalKey & parsed_key)482 void SetInternalKey(const ParsedInternalKey& parsed_key) {
483 SetInternalKey(Slice(), parsed_key);
484 }
485
SetInternalKey(const Slice & key_prefix,const ParsedInternalKey & parsed_key_suffix)486 void SetInternalKey(const Slice& key_prefix,
487 const ParsedInternalKey& parsed_key_suffix) {
488 SetInternalKey(key_prefix, parsed_key_suffix.user_key,
489 parsed_key_suffix.sequence, parsed_key_suffix.type);
490 }
491
EncodeLengthPrefixedKey(const Slice & key)492 void EncodeLengthPrefixedKey(const Slice& key) {
493 auto size = key.size();
494 EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size)));
495 char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size));
496 memcpy(ptr, key.data(), size);
497 key_ = buf_;
498 is_user_key_ = true;
499 }
500
IsUserKey()501 bool IsUserKey() const { return is_user_key_; }
502
503 private:
504 char* buf_;
505 const char* key_;
506 size_t key_size_;
507 size_t buf_size_;
508 char space_[32]; // Avoid allocation for short keys
509 bool is_user_key_;
510
SetKeyImpl(const Slice & key,bool copy)511 Slice SetKeyImpl(const Slice& key, bool copy) {
512 size_t size = key.size();
513 if (copy) {
514 // Copy key to buf_
515 EnlargeBufferIfNeeded(size);
516 memcpy(buf_, key.data(), size);
517 key_ = buf_;
518 } else {
519 // Update key_ to point to external memory
520 key_ = key.data();
521 }
522 key_size_ = size;
523 return Slice(key_, key_size_);
524 }
525
ResetBuffer()526 void ResetBuffer() {
527 if (buf_ != space_) {
528 delete[] buf_;
529 buf_ = space_;
530 }
531 buf_size_ = sizeof(space_);
532 key_size_ = 0;
533 }
534
535 // Enlarge the buffer size if needed based on key_size.
536 // By default, static allocated buffer is used. Once there is a key
537 // larger than the static allocated buffer, another buffer is dynamically
538 // allocated, until a larger key buffer is requested. In that case, we
539 // reallocate buffer and delete the old one.
EnlargeBufferIfNeeded(size_t key_size)540 void EnlargeBufferIfNeeded(size_t key_size) {
541 // If size is smaller than buffer size, continue using current buffer,
542 // or the static allocated one, as default
543 if (key_size > buf_size_) {
544 EnlargeBuffer(key_size);
545 }
546 }
547
548 void EnlargeBuffer(size_t key_size);
549 };
550
551 // Convert from a SliceTranform of user keys, to a SliceTransform of
552 // user keys.
553 class InternalKeySliceTransform : public SliceTransform {
554 public:
InternalKeySliceTransform(const SliceTransform * transform)555 explicit InternalKeySliceTransform(const SliceTransform* transform)
556 : transform_(transform) {}
557
Name()558 virtual const char* Name() const override { return transform_->Name(); }
559
Transform(const Slice & src)560 virtual Slice Transform(const Slice& src) const override {
561 auto user_key = ExtractUserKey(src);
562 return transform_->Transform(user_key);
563 }
564
InDomain(const Slice & src)565 virtual bool InDomain(const Slice& src) const override {
566 auto user_key = ExtractUserKey(src);
567 return transform_->InDomain(user_key);
568 }
569
InRange(const Slice & dst)570 virtual bool InRange(const Slice& dst) const override {
571 auto user_key = ExtractUserKey(dst);
572 return transform_->InRange(user_key);
573 }
574
user_prefix_extractor()575 const SliceTransform* user_prefix_extractor() const { return transform_; }
576
577 private:
578 // Like comparator, InternalKeySliceTransform will not take care of the
579 // deletion of transform_
580 const SliceTransform* const transform_;
581 };
582
583 // Read the key of a record from a write batch.
584 // if this record represent the default column family then cf_record
585 // must be passed as false, otherwise it must be passed as true.
586 extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key,
587 bool cf_record);
588
589 // Read record from a write batch piece from input.
590 // tag, column_family, key, value and blob are return values. Callers own the
591 // Slice they point to.
592 // Tag is defined as ValueType.
593 // input will be advanced to after the record.
594 extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
595 uint32_t* column_family, Slice* key,
596 Slice* value, Slice* blob, Slice* xid);
597
598 // When user call DeleteRange() to delete a range of keys,
599 // we will store a serialized RangeTombstone in MemTable and SST.
600 // the struct here is a easy-understood form
601 // start/end_key_ is the start/end user key of the range to be deleted
602 struct RangeTombstone {
603 Slice start_key_;
604 Slice end_key_;
605 SequenceNumber seq_;
606 RangeTombstone() = default;
RangeTombstoneRangeTombstone607 RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
608 : start_key_(sk), end_key_(ek), seq_(sn) {}
609
RangeTombstoneRangeTombstone610 RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
611 start_key_ = parsed_key.user_key;
612 seq_ = parsed_key.sequence;
613 end_key_ = value;
614 }
615
616 // be careful to use Serialize(), allocates new memory
SerializeRangeTombstone617 std::pair<InternalKey, Slice> Serialize() const {
618 auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
619 Slice value = end_key_;
620 return std::make_pair(std::move(key), std::move(value));
621 }
622
623 // be careful to use SerializeKey(), allocates new memory
SerializeKeyRangeTombstone624 InternalKey SerializeKey() const {
625 return InternalKey(start_key_, seq_, kTypeRangeDeletion);
626 }
627
628 // The tombstone end-key is exclusive, so we generate an internal-key here
629 // which has a similar property. Using kMaxSequenceNumber guarantees that
630 // the returned internal-key will compare less than any other internal-key
631 // with the same user-key. This in turn guarantees that the serialized
632 // end-key for a tombstone such as [a-b] will compare less than the key "b".
633 //
634 // be careful to use SerializeEndKey(), allocates new memory
SerializeEndKeyRangeTombstone635 InternalKey SerializeEndKey() const {
636 return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
637 }
638 };
639
Compare(const Slice & akey,const Slice & bkey)640 inline int InternalKeyComparator::Compare(const Slice& akey,
641 const Slice& bkey) const {
642 // Order by:
643 // increasing user key (according to user-supplied comparator)
644 // decreasing sequence number
645 // decreasing type (though sequence# should be enough to disambiguate)
646 int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
647 if (r == 0) {
648 const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
649 const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
650 if (anum > bnum) {
651 r = -1;
652 } else if (anum < bnum) {
653 r = +1;
654 }
655 }
656 return r;
657 }
658
CompareKeySeq(const Slice & akey,const Slice & bkey)659 inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
660 const Slice& bkey) const {
661 // Order by:
662 // increasing user key (according to user-supplied comparator)
663 // decreasing sequence number
664 int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
665 if (r == 0) {
666 // Shift the number to exclude the last byte which contains the value type
667 const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8;
668 const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8;
669 if (anum > bnum) {
670 r = -1;
671 } else if (anum < bnum) {
672 r = +1;
673 }
674 }
675 return r;
676 }
677
678 // Wrap InternalKeyComparator as a comparator class for ParsedInternalKey.
679 struct ParsedInternalKeyComparator {
ParsedInternalKeyComparatorParsedInternalKeyComparator680 explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)
681 : cmp(c) {}
682
operatorParsedInternalKeyComparator683 bool operator()(const ParsedInternalKey& a,
684 const ParsedInternalKey& b) const {
685 return cmp->Compare(a, b) < 0;
686 }
687
688 const InternalKeyComparator* cmp;
689 };
690
691 } // namespace ROCKSDB_NAMESPACE
692