1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #pragma once
11 #include <stdio.h>
12 #include <memory>
13 #include <string>
14 #include <utility>
15 #include "db/lookup_key.h"
16 #include "db/merge_context.h"
17 #include "logging/logging.h"
18 #include "monitoring/perf_context_imp.h"
19 #include "rocksdb/comparator.h"
20 #include "rocksdb/db.h"
21 #include "rocksdb/filter_policy.h"
22 #include "rocksdb/slice.h"
23 #include "rocksdb/slice_transform.h"
24 #include "rocksdb/table.h"
25 #include "rocksdb/types.h"
26 #include "util/coding.h"
27 #include "util/user_comparator_wrapper.h"
28 
29 namespace ROCKSDB_NAMESPACE {
30 
31 // The file declares data structures and functions that deal with internal
32 // keys.
33 // Each internal key contains a user key, a sequence number (SequenceNumber)
34 // and a type (ValueType), and they are usually encoded together.
35 // There are some related helper classes here.
36 
37 class InternalKey;
38 
39 // Value types encoded as the last component of internal keys.
40 // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
41 // data structures.
42 // The highest bit of the value type needs to be reserved to SST tables
43 // for them to do more flexible encoding.
44 enum ValueType : unsigned char {
45   kTypeDeletion = 0x0,
46   kTypeValue = 0x1,
47   kTypeMerge = 0x2,
48   kTypeLogData = 0x3,               // WAL only.
49   kTypeColumnFamilyDeletion = 0x4,  // WAL only.
50   kTypeColumnFamilyValue = 0x5,     // WAL only.
51   kTypeColumnFamilyMerge = 0x6,     // WAL only.
52   kTypeSingleDeletion = 0x7,
53   kTypeColumnFamilySingleDeletion = 0x8,  // WAL only.
54   kTypeBeginPrepareXID = 0x9,             // WAL only.
55   kTypeEndPrepareXID = 0xA,               // WAL only.
56   kTypeCommitXID = 0xB,                   // WAL only.
57   kTypeRollbackXID = 0xC,                 // WAL only.
58   kTypeNoop = 0xD,                        // WAL only.
59   kTypeColumnFamilyRangeDeletion = 0xE,   // WAL only.
60   kTypeRangeDeletion = 0xF,               // meta block
61   kTypeColumnFamilyBlobIndex = 0x10,      // Blob DB only
62   kTypeBlobIndex = 0x11,                  // Blob DB only
63   // When the prepared record is also persisted in db, we use a different
64   // record. This is to ensure that the WAL that is generated by a WritePolicy
65   // is not mistakenly read by another, which would result into data
66   // inconsistency.
67   kTypeBeginPersistedPrepareXID = 0x12,  // WAL only.
68   // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL
69   // generated by WriteUnprepared write policy is not mistakenly read by
70   // another.
71   kTypeBeginUnprepareXID = 0x13,  // WAL only.
72   kMaxValue = 0x7F                // Not used for storing records.
73 };
74 
75 // Defined in dbformat.cc
76 extern const ValueType kValueTypeForSeek;
77 extern const ValueType kValueTypeForSeekForPrev;
78 
79 // Checks whether a type is an inline value type
80 // (i.e. a type used in memtable skiplist and sst file datablock).
IsValueType(ValueType t)81 inline bool IsValueType(ValueType t) {
82   return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex;
83 }
84 
85 // Checks whether a type is from user operation
86 // kTypeRangeDeletion is in meta block so this API is separated from above
IsExtendedValueType(ValueType t)87 inline bool IsExtendedValueType(ValueType t) {
88   return IsValueType(t) || t == kTypeRangeDeletion;
89 }
90 
91 // We leave eight bits empty at the bottom so a type and sequence#
92 // can be packed together into 64-bits.
93 static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
94 
95 static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
96 
97 // The data structure that represents an internal key in the way that user_key,
98 // sequence number and type are stored in separated forms.
99 struct ParsedInternalKey {
100   Slice user_key;
101   SequenceNumber sequence;
102   ValueType type;
103 
ParsedInternalKeyParsedInternalKey104   ParsedInternalKey()
105       : sequence(kMaxSequenceNumber)  // Make code analyzer happy
106   {}  // Intentionally left uninitialized (for speed)
107   // u contains timestamp if user timestamp feature is enabled.
ParsedInternalKeyParsedInternalKey108   ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
109       : user_key(u), sequence(seq), type(t) {}
110   std::string DebugString(bool hex = false) const;
111 
clearParsedInternalKey112   void clear() {
113     user_key.clear();
114     sequence = 0;
115     type = kTypeDeletion;
116   }
117 };
118 
119 // Return the length of the encoding of "key".
InternalKeyEncodingLength(const ParsedInternalKey & key)120 inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
121   return key.user_key.size() + 8;
122 }
123 
124 // Pack a sequence number and a ValueType into a uint64_t
125 extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t);
126 
127 // Given the result of PackSequenceAndType, store the sequence number in *seq
128 // and the ValueType in *t.
129 extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t);
130 
131 EntryType GetEntryType(ValueType value_type);
132 
133 // Append the serialization of "key" to *result.
134 extern void AppendInternalKey(std::string* result,
135                               const ParsedInternalKey& key);
136 
137 // Append the serialization of "key" to *result, replacing the original
138 // timestamp with argument ts.
139 extern void AppendInternalKeyWithDifferentTimestamp(
140     std::string* result, const ParsedInternalKey& key, const Slice& ts);
141 
142 // Serialized internal key consists of user key followed by footer.
143 // This function appends the footer to *result, assuming that *result already
144 // contains the user key at the end.
145 extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
146                                     ValueType t);
147 
148 // Attempt to parse an internal key from "internal_key".  On success,
149 // stores the parsed data in "*result", and returns true.
150 //
151 // On error, returns false, leaves "*result" in an undefined state.
152 extern bool ParseInternalKey(const Slice& internal_key,
153                              ParsedInternalKey* result);
154 
155 // Returns the user key portion of an internal key.
ExtractUserKey(const Slice & internal_key)156 inline Slice ExtractUserKey(const Slice& internal_key) {
157   assert(internal_key.size() >= 8);
158   return Slice(internal_key.data(), internal_key.size() - 8);
159 }
160 
ExtractUserKeyAndStripTimestamp(const Slice & internal_key,size_t ts_sz)161 inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
162                                              size_t ts_sz) {
163   assert(internal_key.size() >= 8 + ts_sz);
164   return Slice(internal_key.data(), internal_key.size() - 8 - ts_sz);
165 }
166 
StripTimestampFromUserKey(const Slice & user_key,size_t ts_sz)167 inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
168   assert(user_key.size() >= ts_sz);
169   return Slice(user_key.data(), user_key.size() - ts_sz);
170 }
171 
ExtractTimestampFromUserKey(const Slice & user_key,size_t ts_sz)172 inline Slice ExtractTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
173   assert(user_key.size() >= ts_sz);
174   return Slice(user_key.data() + user_key.size() - ts_sz, ts_sz);
175 }
176 
ExtractInternalKeyFooter(const Slice & internal_key)177 inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
178   assert(internal_key.size() >= 8);
179   const size_t n = internal_key.size();
180   return DecodeFixed64(internal_key.data() + n - 8);
181 }
182 
ExtractValueType(const Slice & internal_key)183 inline ValueType ExtractValueType(const Slice& internal_key) {
184   uint64_t num = ExtractInternalKeyFooter(internal_key);
185   unsigned char c = num & 0xff;
186   return static_cast<ValueType>(c);
187 }
188 
189 // A comparator for internal keys that uses a specified comparator for
190 // the user key portion and breaks ties by decreasing sequence number.
191 class InternalKeyComparator
192 #ifdef NDEBUG
193     final
194 #endif
195     : public Comparator {
196  private:
197   UserComparatorWrapper user_comparator_;
198   std::string name_;
199 
200  public:
InternalKeyComparator(const Comparator * c)201   explicit InternalKeyComparator(const Comparator* c)
202       : Comparator(c->timestamp_size()),
203         user_comparator_(c),
204         name_("rocksdb.InternalKeyComparator:" +
205               std::string(user_comparator_.Name())) {}
~InternalKeyComparator()206   virtual ~InternalKeyComparator() {}
207 
208   virtual const char* Name() const override;
209   virtual int Compare(const Slice& a, const Slice& b) const override;
210   // Same as Compare except that it excludes the value type from comparison
211   virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
212   virtual void FindShortestSeparator(std::string* start,
213                                      const Slice& limit) const override;
214   virtual void FindShortSuccessor(std::string* key) const override;
215 
user_comparator()216   const Comparator* user_comparator() const {
217     return user_comparator_.user_comparator();
218   }
219 
220   int Compare(const InternalKey& a, const InternalKey& b) const;
221   int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
GetRootComparator()222   virtual const Comparator* GetRootComparator() const override {
223     return user_comparator_.GetRootComparator();
224   }
225 };
226 
227 // The class represent the internal key in encoded form.
228 class InternalKey {
229  private:
230   std::string rep_;
231 
232  public:
InternalKey()233   InternalKey() {}  // Leave rep_ as empty to indicate it is invalid
InternalKey(const Slice & _user_key,SequenceNumber s,ValueType t)234   InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
235     AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
236   }
237 
238   // sets the internal key to be bigger or equal to all internal keys with this
239   // user key
SetMaxPossibleForUserKey(const Slice & _user_key)240   void SetMaxPossibleForUserKey(const Slice& _user_key) {
241     AppendInternalKey(
242         &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0)));
243   }
244 
245   // sets the internal key to be smaller or equal to all internal keys with this
246   // user key
SetMinPossibleForUserKey(const Slice & _user_key)247   void SetMinPossibleForUserKey(const Slice& _user_key) {
248     AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber,
249                                                kValueTypeForSeek));
250   }
251 
Valid()252   bool Valid() const {
253     ParsedInternalKey parsed;
254     return ParseInternalKey(Slice(rep_), &parsed);
255   }
256 
DecodeFrom(const Slice & s)257   void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
Encode()258   Slice Encode() const {
259     assert(!rep_.empty());
260     return rep_;
261   }
262 
user_key()263   Slice user_key() const { return ExtractUserKey(rep_); }
size()264   size_t size() { return rep_.size(); }
265 
Set(const Slice & _user_key,SequenceNumber s,ValueType t)266   void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
267     SetFrom(ParsedInternalKey(_user_key, s, t));
268   }
269 
SetFrom(const ParsedInternalKey & p)270   void SetFrom(const ParsedInternalKey& p) {
271     rep_.clear();
272     AppendInternalKey(&rep_, p);
273   }
274 
Clear()275   void Clear() { rep_.clear(); }
276 
277   // The underlying representation.
278   // Intended only to be used together with ConvertFromUserKey().
rep()279   std::string* rep() { return &rep_; }
280 
281   // Assuming that *rep() contains a user key, this method makes internal key
282   // out of it in-place. This saves a memcpy compared to Set()/SetFrom().
ConvertFromUserKey(SequenceNumber s,ValueType t)283   void ConvertFromUserKey(SequenceNumber s, ValueType t) {
284     AppendInternalKeyFooter(&rep_, s, t);
285   }
286 
287   std::string DebugString(bool hex = false) const;
288 };
289 
Compare(const InternalKey & a,const InternalKey & b)290 inline int InternalKeyComparator::Compare(const InternalKey& a,
291                                           const InternalKey& b) const {
292   return Compare(a.Encode(), b.Encode());
293 }
294 
ParseInternalKey(const Slice & internal_key,ParsedInternalKey * result)295 inline bool ParseInternalKey(const Slice& internal_key,
296                              ParsedInternalKey* result) {
297   const size_t n = internal_key.size();
298   if (n < 8) return false;
299   uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
300   unsigned char c = num & 0xff;
301   result->sequence = num >> 8;
302   result->type = static_cast<ValueType>(c);
303   assert(result->type <= ValueType::kMaxValue);
304   result->user_key = Slice(internal_key.data(), n - 8);
305   return IsExtendedValueType(result->type);
306 }
307 
308 // Update the sequence number in the internal key.
309 // Guarantees not to invalidate ikey.data().
UpdateInternalKey(std::string * ikey,uint64_t seq,ValueType t)310 inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) {
311   size_t ikey_sz = ikey->size();
312   assert(ikey_sz >= 8);
313   uint64_t newval = (seq << 8) | t;
314 
315   // Note: Since C++11, strings are guaranteed to be stored contiguously and
316   // string::operator[]() is guaranteed not to change ikey.data().
317   EncodeFixed64(&(*ikey)[ikey_sz - 8], newval);
318 }
319 
320 // Get the sequence number from the internal key
GetInternalKeySeqno(const Slice & internal_key)321 inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
322   const size_t n = internal_key.size();
323   assert(n >= 8);
324   uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
325   return num >> 8;
326 }
327 
328 // The class to store keys in an efficient way. It allows:
329 // 1. Users can either copy the key into it, or have it point to an unowned
330 //    address.
331 // 2. For copied key, a short inline buffer is kept to reduce memory
332 //    allocation for smaller keys.
333 // 3. It tracks user key or internal key, and allow conversion between them.
334 class IterKey {
335  public:
IterKey()336   IterKey()
337       : buf_(space_),
338         key_(buf_),
339         key_size_(0),
340         buf_size_(sizeof(space_)),
341         is_user_key_(true) {}
342   // No copying allowed
343   IterKey(const IterKey&) = delete;
344   void operator=(const IterKey&) = delete;
345 
~IterKey()346   ~IterKey() { ResetBuffer(); }
347 
348   // The bool will be picked up by the next calls to SetKey
SetIsUserKey(bool is_user_key)349   void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
350 
351   // Returns the key in whichever format that was provided to KeyIter
GetKey()352   Slice GetKey() const { return Slice(key_, key_size_); }
353 
GetInternalKey()354   Slice GetInternalKey() const {
355     assert(!IsUserKey());
356     return Slice(key_, key_size_);
357   }
358 
GetUserKey()359   Slice GetUserKey() const {
360     if (IsUserKey()) {
361       return Slice(key_, key_size_);
362     } else {
363       assert(key_size_ >= 8);
364       return Slice(key_, key_size_ - 8);
365     }
366   }
367 
Size()368   size_t Size() const { return key_size_; }
369 
Clear()370   void Clear() { key_size_ = 0; }
371 
372   // Append "non_shared_data" to its back, from "shared_len"
373   // This function is used in Block::Iter::ParseNextKey
374   // shared_len: bytes in [0, shard_len-1] would be remained
375   // non_shared_data: data to be append, its length must be >= non_shared_len
TrimAppend(const size_t shared_len,const char * non_shared_data,const size_t non_shared_len)376   void TrimAppend(const size_t shared_len, const char* non_shared_data,
377                   const size_t non_shared_len) {
378     assert(shared_len <= key_size_);
379     size_t total_size = shared_len + non_shared_len;
380 
381     if (IsKeyPinned() /* key is not in buf_ */) {
382       // Copy the key from external memory to buf_ (copy shared_len bytes)
383       EnlargeBufferIfNeeded(total_size);
384       memcpy(buf_, key_, shared_len);
385     } else if (total_size > buf_size_) {
386       // Need to allocate space, delete previous space
387       char* p = new char[total_size];
388       memcpy(p, key_, shared_len);
389 
390       if (buf_ != space_) {
391         delete[] buf_;
392       }
393 
394       buf_ = p;
395       buf_size_ = total_size;
396     }
397 
398     memcpy(buf_ + shared_len, non_shared_data, non_shared_len);
399     key_ = buf_;
400     key_size_ = total_size;
401   }
402 
403   Slice SetKey(const Slice& key, bool copy = true) {
404     // is_user_key_ expected to be set already via SetIsUserKey
405     return SetKeyImpl(key, copy);
406   }
407 
408   Slice SetUserKey(const Slice& key, bool copy = true) {
409     is_user_key_ = true;
410     return SetKeyImpl(key, copy);
411   }
412 
413   Slice SetInternalKey(const Slice& key, bool copy = true) {
414     is_user_key_ = false;
415     return SetKeyImpl(key, copy);
416   }
417 
418   // Copies the content of key, updates the reference to the user key in ikey
419   // and returns a Slice referencing the new copy.
SetInternalKey(const Slice & key,ParsedInternalKey * ikey)420   Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) {
421     size_t key_n = key.size();
422     assert(key_n >= 8);
423     SetInternalKey(key);
424     ikey->user_key = Slice(key_, key_n - 8);
425     return Slice(key_, key_n);
426   }
427 
428   // Copy the key into IterKey own buf_
OwnKey()429   void OwnKey() {
430     assert(IsKeyPinned() == true);
431 
432     Reserve(key_size_);
433     memcpy(buf_, key_, key_size_);
434     key_ = buf_;
435   }
436 
437   // Update the sequence number in the internal key.  Guarantees not to
438   // invalidate slices to the key (and the user key).
UpdateInternalKey(uint64_t seq,ValueType t)439   void UpdateInternalKey(uint64_t seq, ValueType t) {
440     assert(!IsKeyPinned());
441     assert(key_size_ >= 8);
442     uint64_t newval = (seq << 8) | t;
443     EncodeFixed64(&buf_[key_size_ - 8], newval);
444   }
445 
IsKeyPinned()446   bool IsKeyPinned() const { return (key_ != buf_); }
447 
448   void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
449                       SequenceNumber s,
450                       ValueType value_type = kValueTypeForSeek,
451                       const Slice* ts = nullptr) {
452     size_t psize = key_prefix.size();
453     size_t usize = user_key.size();
454     size_t ts_sz = (ts != nullptr ? ts->size() : 0);
455     EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t) + ts_sz);
456     if (psize > 0) {
457       memcpy(buf_, key_prefix.data(), psize);
458     }
459     memcpy(buf_ + psize, user_key.data(), usize);
460     if (ts) {
461       memcpy(buf_ + psize + usize, ts->data(), ts_sz);
462     }
463     EncodeFixed64(buf_ + usize + psize + ts_sz,
464                   PackSequenceAndType(s, value_type));
465 
466     key_ = buf_;
467     key_size_ = psize + usize + sizeof(uint64_t) + ts_sz;
468     is_user_key_ = false;
469   }
470 
471   void SetInternalKey(const Slice& user_key, SequenceNumber s,
472                       ValueType value_type = kValueTypeForSeek,
473                       const Slice* ts = nullptr) {
474     SetInternalKey(Slice(), user_key, s, value_type, ts);
475   }
476 
Reserve(size_t size)477   void Reserve(size_t size) {
478     EnlargeBufferIfNeeded(size);
479     key_size_ = size;
480   }
481 
SetInternalKey(const ParsedInternalKey & parsed_key)482   void SetInternalKey(const ParsedInternalKey& parsed_key) {
483     SetInternalKey(Slice(), parsed_key);
484   }
485 
SetInternalKey(const Slice & key_prefix,const ParsedInternalKey & parsed_key_suffix)486   void SetInternalKey(const Slice& key_prefix,
487                       const ParsedInternalKey& parsed_key_suffix) {
488     SetInternalKey(key_prefix, parsed_key_suffix.user_key,
489                    parsed_key_suffix.sequence, parsed_key_suffix.type);
490   }
491 
EncodeLengthPrefixedKey(const Slice & key)492   void EncodeLengthPrefixedKey(const Slice& key) {
493     auto size = key.size();
494     EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size)));
495     char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size));
496     memcpy(ptr, key.data(), size);
497     key_ = buf_;
498     is_user_key_ = true;
499   }
500 
IsUserKey()501   bool IsUserKey() const { return is_user_key_; }
502 
503  private:
504   char* buf_;
505   const char* key_;
506   size_t key_size_;
507   size_t buf_size_;
508   char space_[32];  // Avoid allocation for short keys
509   bool is_user_key_;
510 
SetKeyImpl(const Slice & key,bool copy)511   Slice SetKeyImpl(const Slice& key, bool copy) {
512     size_t size = key.size();
513     if (copy) {
514       // Copy key to buf_
515       EnlargeBufferIfNeeded(size);
516       memcpy(buf_, key.data(), size);
517       key_ = buf_;
518     } else {
519       // Update key_ to point to external memory
520       key_ = key.data();
521     }
522     key_size_ = size;
523     return Slice(key_, key_size_);
524   }
525 
ResetBuffer()526   void ResetBuffer() {
527     if (buf_ != space_) {
528       delete[] buf_;
529       buf_ = space_;
530     }
531     buf_size_ = sizeof(space_);
532     key_size_ = 0;
533   }
534 
535   // Enlarge the buffer size if needed based on key_size.
536   // By default, static allocated buffer is used. Once there is a key
537   // larger than the static allocated buffer, another buffer is dynamically
538   // allocated, until a larger key buffer is requested. In that case, we
539   // reallocate buffer and delete the old one.
EnlargeBufferIfNeeded(size_t key_size)540   void EnlargeBufferIfNeeded(size_t key_size) {
541     // If size is smaller than buffer size, continue using current buffer,
542     // or the static allocated one, as default
543     if (key_size > buf_size_) {
544       EnlargeBuffer(key_size);
545     }
546   }
547 
548   void EnlargeBuffer(size_t key_size);
549 };
550 
551 // Convert from a SliceTranform of user keys, to a SliceTransform of
552 // user keys.
553 class InternalKeySliceTransform : public SliceTransform {
554  public:
InternalKeySliceTransform(const SliceTransform * transform)555   explicit InternalKeySliceTransform(const SliceTransform* transform)
556       : transform_(transform) {}
557 
Name()558   virtual const char* Name() const override { return transform_->Name(); }
559 
Transform(const Slice & src)560   virtual Slice Transform(const Slice& src) const override {
561     auto user_key = ExtractUserKey(src);
562     return transform_->Transform(user_key);
563   }
564 
InDomain(const Slice & src)565   virtual bool InDomain(const Slice& src) const override {
566     auto user_key = ExtractUserKey(src);
567     return transform_->InDomain(user_key);
568   }
569 
InRange(const Slice & dst)570   virtual bool InRange(const Slice& dst) const override {
571     auto user_key = ExtractUserKey(dst);
572     return transform_->InRange(user_key);
573   }
574 
user_prefix_extractor()575   const SliceTransform* user_prefix_extractor() const { return transform_; }
576 
577  private:
578   // Like comparator, InternalKeySliceTransform will not take care of the
579   // deletion of transform_
580   const SliceTransform* const transform_;
581 };
582 
583 // Read the key of a record from a write batch.
584 // if this record represent the default column family then cf_record
585 // must be passed as false, otherwise it must be passed as true.
586 extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key,
587                                        bool cf_record);
588 
589 // Read record from a write batch piece from input.
590 // tag, column_family, key, value and blob are return values. Callers own the
591 // Slice they point to.
592 // Tag is defined as ValueType.
593 // input will be advanced to after the record.
594 extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
595                                        uint32_t* column_family, Slice* key,
596                                        Slice* value, Slice* blob, Slice* xid);
597 
598 // When user call DeleteRange() to delete a range of keys,
599 // we will store a serialized RangeTombstone in MemTable and SST.
600 // the struct here is a easy-understood form
601 // start/end_key_ is the start/end user key of the range to be deleted
602 struct RangeTombstone {
603   Slice start_key_;
604   Slice end_key_;
605   SequenceNumber seq_;
606   RangeTombstone() = default;
RangeTombstoneRangeTombstone607   RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
608       : start_key_(sk), end_key_(ek), seq_(sn) {}
609 
RangeTombstoneRangeTombstone610   RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
611     start_key_ = parsed_key.user_key;
612     seq_ = parsed_key.sequence;
613     end_key_ = value;
614   }
615 
616   // be careful to use Serialize(), allocates new memory
SerializeRangeTombstone617   std::pair<InternalKey, Slice> Serialize() const {
618     auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
619     Slice value = end_key_;
620     return std::make_pair(std::move(key), std::move(value));
621   }
622 
623   // be careful to use SerializeKey(), allocates new memory
SerializeKeyRangeTombstone624   InternalKey SerializeKey() const {
625     return InternalKey(start_key_, seq_, kTypeRangeDeletion);
626   }
627 
628   // The tombstone end-key is exclusive, so we generate an internal-key here
629   // which has a similar property. Using kMaxSequenceNumber guarantees that
630   // the returned internal-key will compare less than any other internal-key
631   // with the same user-key. This in turn guarantees that the serialized
632   // end-key for a tombstone such as [a-b] will compare less than the key "b".
633   //
634   // be careful to use SerializeEndKey(), allocates new memory
SerializeEndKeyRangeTombstone635   InternalKey SerializeEndKey() const {
636     return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
637   }
638 };
639 
Compare(const Slice & akey,const Slice & bkey)640 inline int InternalKeyComparator::Compare(const Slice& akey,
641                                           const Slice& bkey) const {
642   // Order by:
643   //    increasing user key (according to user-supplied comparator)
644   //    decreasing sequence number
645   //    decreasing type (though sequence# should be enough to disambiguate)
646   int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
647   if (r == 0) {
648     const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
649     const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
650     if (anum > bnum) {
651       r = -1;
652     } else if (anum < bnum) {
653       r = +1;
654     }
655   }
656   return r;
657 }
658 
CompareKeySeq(const Slice & akey,const Slice & bkey)659 inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
660                                                 const Slice& bkey) const {
661   // Order by:
662   //    increasing user key (according to user-supplied comparator)
663   //    decreasing sequence number
664   int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
665   if (r == 0) {
666     // Shift the number to exclude the last byte which contains the value type
667     const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8;
668     const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8;
669     if (anum > bnum) {
670       r = -1;
671     } else if (anum < bnum) {
672       r = +1;
673     }
674   }
675   return r;
676 }
677 
678 // Wrap InternalKeyComparator as a comparator class for ParsedInternalKey.
679 struct ParsedInternalKeyComparator {
ParsedInternalKeyComparatorParsedInternalKeyComparator680   explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)
681       : cmp(c) {}
682 
operatorParsedInternalKeyComparator683   bool operator()(const ParsedInternalKey& a,
684                   const ParsedInternalKey& b) const {
685     return cmp->Compare(a, b) < 0;
686   }
687 
688   const InternalKeyComparator* cmp;
689 };
690 
691 }  // namespace ROCKSDB_NAMESPACE
692