1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the LICENSE file. See the AUTHORS file for names of contributors. 8 // 9 // A WriteBatchWithIndex with a binary searchable index built for all the keys 10 // inserted. 11 #pragma once 12 13 #ifndef ROCKSDB_LITE 14 15 #include <memory> 16 #include <string> 17 #include <vector> 18 19 #include "rocksdb/comparator.h" 20 #include "rocksdb/iterator.h" 21 #include "rocksdb/slice.h" 22 #include "rocksdb/status.h" 23 #include "rocksdb/write_batch.h" 24 #include "rocksdb/write_batch_base.h" 25 26 namespace ROCKSDB_NAMESPACE { 27 28 class ColumnFamilyHandle; 29 class Comparator; 30 class DB; 31 class ReadCallback; 32 struct ReadOptions; 33 struct DBOptions; 34 35 enum WriteType { 36 kPutRecord, 37 kMergeRecord, 38 kDeleteRecord, 39 kSingleDeleteRecord, 40 kDeleteRangeRecord, 41 kLogDataRecord, 42 kXIDRecord, 43 }; 44 45 // an entry for Put, Merge, Delete, or SingleDelete entry for write batches. 46 // Used in WBWIIterator. 47 struct WriteEntry { 48 WriteType type; 49 Slice key; 50 Slice value; 51 }; 52 53 // Iterator of one column family out of a WriteBatchWithIndex. 54 class WBWIIterator { 55 public: ~WBWIIterator()56 virtual ~WBWIIterator() {} 57 58 virtual bool Valid() const = 0; 59 60 virtual void SeekToFirst() = 0; 61 62 virtual void SeekToLast() = 0; 63 64 virtual void Seek(const Slice& key) = 0; 65 66 virtual void SeekForPrev(const Slice& key) = 0; 67 68 virtual void Next() = 0; 69 70 virtual void Prev() = 0; 71 72 // the return WriteEntry is only valid until the next mutation of 73 // WriteBatchWithIndex 74 virtual WriteEntry Entry() const = 0; 75 76 virtual Status status() const = 0; 77 }; 78 79 // A WriteBatchWithIndex with a binary searchable index built for all the keys 80 // inserted. 81 // In Put(), Merge() Delete(), or SingleDelete(), the same function of the 82 // wrapped will be called. At the same time, indexes will be built. 83 // By calling GetWriteBatch(), a user will get the WriteBatch for the data 84 // they inserted, which can be used for DB::Write(). 85 // A user can call NewIterator() to create an iterator. 86 class WriteBatchWithIndex : public WriteBatchBase { 87 public: 88 // backup_index_comparator: the backup comparator used to compare keys 89 // within the same column family, if column family is not given in the 90 // interface, or we can't find a column family from the column family handle 91 // passed in, backup_index_comparator will be used for the column family. 92 // reserved_bytes: reserved bytes in underlying WriteBatch 93 // max_bytes: maximum size of underlying WriteBatch in bytes 94 // overwrite_key: if true, overwrite the key in the index when inserting 95 // the same key as previously, so iterator will never 96 // show two entries with the same key. 97 explicit WriteBatchWithIndex( 98 const Comparator* backup_index_comparator = BytewiseComparator(), 99 size_t reserved_bytes = 0, bool overwrite_key = false, 100 size_t max_bytes = 0); 101 102 ~WriteBatchWithIndex() override; 103 WriteBatchWithIndex(WriteBatchWithIndex&&); 104 WriteBatchWithIndex& operator=(WriteBatchWithIndex&&); 105 106 using WriteBatchBase::Put; 107 Status Put(ColumnFamilyHandle* column_family, const Slice& key, 108 const Slice& value) override; 109 110 Status Put(const Slice& key, const Slice& value) override; 111 112 using WriteBatchBase::Merge; 113 Status Merge(ColumnFamilyHandle* column_family, const Slice& key, 114 const Slice& value) override; 115 116 Status Merge(const Slice& key, const Slice& value) override; 117 118 using WriteBatchBase::Delete; 119 Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override; 120 Status Delete(const Slice& key) override; 121 122 using WriteBatchBase::SingleDelete; 123 Status SingleDelete(ColumnFamilyHandle* column_family, 124 const Slice& key) override; 125 Status SingleDelete(const Slice& key) override; 126 127 using WriteBatchBase::DeleteRange; DeleteRange(ColumnFamilyHandle *,const Slice &,const Slice &)128 Status DeleteRange(ColumnFamilyHandle* /* column_family */, 129 const Slice& /* begin_key */, 130 const Slice& /* end_key */) override { 131 return Status::NotSupported( 132 "DeleteRange unsupported in WriteBatchWithIndex"); 133 } DeleteRange(const Slice &,const Slice &)134 Status DeleteRange(const Slice& /* begin_key */, 135 const Slice& /* end_key */) override { 136 return Status::NotSupported( 137 "DeleteRange unsupported in WriteBatchWithIndex"); 138 } 139 140 using WriteBatchBase::PutLogData; 141 Status PutLogData(const Slice& blob) override; 142 143 using WriteBatchBase::Clear; 144 void Clear() override; 145 146 using WriteBatchBase::GetWriteBatch; 147 WriteBatch* GetWriteBatch() override; 148 149 // Create an iterator of a column family. User can call iterator.Seek() to 150 // search to the next entry of or after a key. Keys will be iterated in the 151 // order given by index_comparator. For multiple updates on the same key, 152 // each update will be returned as a separate entry, in the order of update 153 // time. 154 // 155 // The returned iterator should be deleted by the caller. 156 WBWIIterator* NewIterator(ColumnFamilyHandle* column_family); 157 // Create an iterator of the default column family. 158 WBWIIterator* NewIterator(); 159 160 // Will create a new Iterator that will use WBWIIterator as a delta and 161 // base_iterator as base. 162 // 163 // This function is only supported if the WriteBatchWithIndex was 164 // constructed with overwrite_key=true. 165 // 166 // The returned iterator should be deleted by the caller. 167 // The base_iterator is now 'owned' by the returned iterator. Deleting the 168 // returned iterator will also delete the base_iterator. 169 // 170 // Updating write batch with the current key of the iterator is not safe. 171 // We strongly recommand users not to do it. It will invalidate the current 172 // key() and value() of the iterator. This invalidation happens even before 173 // the write batch update finishes. The state may recover after Next() is 174 // called. 175 Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family, 176 Iterator* base_iterator, 177 const ReadOptions* opts = nullptr); 178 // default column family 179 Iterator* NewIteratorWithBase(Iterator* base_iterator); 180 181 // Similar to DB::Get() but will only read the key from this batch. 182 // If the batch does not have enough data to resolve Merge operations, 183 // MergeInProgress status may be returned. 184 Status GetFromBatch(ColumnFamilyHandle* column_family, 185 const DBOptions& options, const Slice& key, 186 std::string* value); 187 188 // Similar to previous function but does not require a column_family. 189 // Note: An InvalidArgument status will be returned if there are any Merge 190 // operators for this key. Use previous method instead. GetFromBatch(const DBOptions & options,const Slice & key,std::string * value)191 Status GetFromBatch(const DBOptions& options, const Slice& key, 192 std::string* value) { 193 return GetFromBatch(nullptr, options, key, value); 194 } 195 196 // Similar to DB::Get() but will also read writes from this batch. 197 // 198 // This function will query both this batch and the DB and then merge 199 // the results using the DB's merge operator (if the batch contains any 200 // merge requests). 201 // 202 // Setting read_options.snapshot will affect what is read from the DB 203 // but will NOT change which keys are read from the batch (the keys in 204 // this batch do not yet belong to any snapshot and will be fetched 205 // regardless). 206 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, 207 const Slice& key, std::string* value); 208 209 // An overload of the above method that receives a PinnableSlice 210 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, 211 const Slice& key, PinnableSlice* value); 212 213 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, 214 ColumnFamilyHandle* column_family, const Slice& key, 215 std::string* value); 216 217 // An overload of the above method that receives a PinnableSlice 218 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, 219 ColumnFamilyHandle* column_family, const Slice& key, 220 PinnableSlice* value); 221 222 void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options, 223 ColumnFamilyHandle* column_family, 224 const size_t num_keys, const Slice* keys, 225 PinnableSlice* values, Status* statuses, 226 bool sorted_input); 227 228 // Records the state of the batch for future calls to RollbackToSavePoint(). 229 // May be called multiple times to set multiple save points. 230 void SetSavePoint() override; 231 232 // Remove all entries in this batch (Put, Merge, Delete, SingleDelete, 233 // PutLogData) since the most recent call to SetSavePoint() and removes the 234 // most recent save point. 235 // If there is no previous call to SetSavePoint(), behaves the same as 236 // Clear(). 237 // 238 // Calling RollbackToSavePoint invalidates any open iterators on this batch. 239 // 240 // Returns Status::OK() on success, 241 // Status::NotFound() if no previous call to SetSavePoint(), 242 // or other Status on corruption. 243 Status RollbackToSavePoint() override; 244 245 // Pop the most recent save point. 246 // If there is no previous call to SetSavePoint(), Status::NotFound() 247 // will be returned. 248 // Otherwise returns Status::OK(). 249 Status PopSavePoint() override; 250 251 void SetMaxBytes(size_t max_bytes) override; 252 size_t GetDataSize() const; 253 254 private: 255 friend class PessimisticTransactionDB; 256 friend class WritePreparedTxn; 257 friend class WriteUnpreparedTxn; 258 friend class WriteBatchWithIndex_SubBatchCnt_Test; 259 // Returns the number of sub-batches inside the write batch. A sub-batch 260 // starts right before inserting a key that is a duplicate of a key in the 261 // last sub-batch. 262 size_t SubBatchCnt(); 263 264 Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options, 265 ColumnFamilyHandle* column_family, const Slice& key, 266 PinnableSlice* value, ReadCallback* callback); 267 void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options, 268 ColumnFamilyHandle* column_family, 269 const size_t num_keys, const Slice* keys, 270 PinnableSlice* values, Status* statuses, 271 bool sorted_input, ReadCallback* callback); 272 struct Rep; 273 std::unique_ptr<Rep> rep; 274 }; 275 276 } // namespace ROCKSDB_NAMESPACE 277 278 #endif // !ROCKSDB_LITE 279