1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 //
9 // A WriteBatchWithIndex with a binary searchable index built for all the keys
10 // inserted.
11 #pragma once
12 
13 #ifndef ROCKSDB_LITE
14 
15 #include <memory>
16 #include <string>
17 #include <vector>
18 
19 #include "rocksdb/comparator.h"
20 #include "rocksdb/iterator.h"
21 #include "rocksdb/slice.h"
22 #include "rocksdb/status.h"
23 #include "rocksdb/write_batch.h"
24 #include "rocksdb/write_batch_base.h"
25 
26 namespace ROCKSDB_NAMESPACE {
27 
28 class ColumnFamilyHandle;
29 class Comparator;
30 class DB;
31 class ReadCallback;
32 struct ReadOptions;
33 struct DBOptions;
34 
35 enum WriteType {
36   kPutRecord,
37   kMergeRecord,
38   kDeleteRecord,
39   kSingleDeleteRecord,
40   kDeleteRangeRecord,
41   kLogDataRecord,
42   kXIDRecord,
43 };
44 
45 // an entry for Put, Merge, Delete, or SingleDelete entry for write batches.
46 // Used in WBWIIterator.
47 struct WriteEntry {
48   WriteType type;
49   Slice key;
50   Slice value;
51 };
52 
53 // Iterator of one column family out of a WriteBatchWithIndex.
54 class WBWIIterator {
55  public:
~WBWIIterator()56   virtual ~WBWIIterator() {}
57 
58   virtual bool Valid() const = 0;
59 
60   virtual void SeekToFirst() = 0;
61 
62   virtual void SeekToLast() = 0;
63 
64   virtual void Seek(const Slice& key) = 0;
65 
66   virtual void SeekForPrev(const Slice& key) = 0;
67 
68   virtual void Next() = 0;
69 
70   virtual void Prev() = 0;
71 
72   // the return WriteEntry is only valid until the next mutation of
73   // WriteBatchWithIndex
74   virtual WriteEntry Entry() const = 0;
75 
76   virtual Status status() const = 0;
77 };
78 
79 // A WriteBatchWithIndex with a binary searchable index built for all the keys
80 // inserted.
81 // In Put(), Merge() Delete(), or SingleDelete(), the same function of the
82 // wrapped will be called. At the same time, indexes will be built.
83 // By calling GetWriteBatch(), a user will get the WriteBatch for the data
84 // they inserted, which can be used for DB::Write().
85 // A user can call NewIterator() to create an iterator.
86 class WriteBatchWithIndex : public WriteBatchBase {
87  public:
88   // backup_index_comparator: the backup comparator used to compare keys
89   // within the same column family, if column family is not given in the
90   // interface, or we can't find a column family from the column family handle
91   // passed in, backup_index_comparator will be used for the column family.
92   // reserved_bytes: reserved bytes in underlying WriteBatch
93   // max_bytes: maximum size of underlying WriteBatch in bytes
94   // overwrite_key: if true, overwrite the key in the index when inserting
95   //                the same key as previously, so iterator will never
96   //                show two entries with the same key.
97   explicit WriteBatchWithIndex(
98       const Comparator* backup_index_comparator = BytewiseComparator(),
99       size_t reserved_bytes = 0, bool overwrite_key = false,
100       size_t max_bytes = 0);
101 
102   ~WriteBatchWithIndex() override;
103   WriteBatchWithIndex(WriteBatchWithIndex&&);
104   WriteBatchWithIndex& operator=(WriteBatchWithIndex&&);
105 
106   using WriteBatchBase::Put;
107   Status Put(ColumnFamilyHandle* column_family, const Slice& key,
108              const Slice& value) override;
109 
110   Status Put(const Slice& key, const Slice& value) override;
111 
112   using WriteBatchBase::Merge;
113   Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
114                const Slice& value) override;
115 
116   Status Merge(const Slice& key, const Slice& value) override;
117 
118   using WriteBatchBase::Delete;
119   Status Delete(ColumnFamilyHandle* column_family, const Slice& key) override;
120   Status Delete(const Slice& key) override;
121 
122   using WriteBatchBase::SingleDelete;
123   Status SingleDelete(ColumnFamilyHandle* column_family,
124                       const Slice& key) override;
125   Status SingleDelete(const Slice& key) override;
126 
127   using WriteBatchBase::DeleteRange;
DeleteRange(ColumnFamilyHandle *,const Slice &,const Slice &)128   Status DeleteRange(ColumnFamilyHandle* /* column_family */,
129                      const Slice& /* begin_key */,
130                      const Slice& /* end_key */) override {
131     return Status::NotSupported(
132         "DeleteRange unsupported in WriteBatchWithIndex");
133   }
DeleteRange(const Slice &,const Slice &)134   Status DeleteRange(const Slice& /* begin_key */,
135                      const Slice& /* end_key */) override {
136     return Status::NotSupported(
137         "DeleteRange unsupported in WriteBatchWithIndex");
138   }
139 
140   using WriteBatchBase::PutLogData;
141   Status PutLogData(const Slice& blob) override;
142 
143   using WriteBatchBase::Clear;
144   void Clear() override;
145 
146   using WriteBatchBase::GetWriteBatch;
147   WriteBatch* GetWriteBatch() override;
148 
149   // Create an iterator of a column family. User can call iterator.Seek() to
150   // search to the next entry of or after a key. Keys will be iterated in the
151   // order given by index_comparator. For multiple updates on the same key,
152   // each update will be returned as a separate entry, in the order of update
153   // time.
154   //
155   // The returned iterator should be deleted by the caller.
156   WBWIIterator* NewIterator(ColumnFamilyHandle* column_family);
157   // Create an iterator of the default column family.
158   WBWIIterator* NewIterator();
159 
160   // Will create a new Iterator that will use WBWIIterator as a delta and
161   // base_iterator as base.
162   //
163   // This function is only supported if the WriteBatchWithIndex was
164   // constructed with overwrite_key=true.
165   //
166   // The returned iterator should be deleted by the caller.
167   // The base_iterator is now 'owned' by the returned iterator. Deleting the
168   // returned iterator will also delete the base_iterator.
169   //
170   // Updating write batch with the current key of the iterator is not safe.
171   // We strongly recommand users not to do it. It will invalidate the current
172   // key() and value() of the iterator. This invalidation happens even before
173   // the write batch update finishes. The state may recover after Next() is
174   // called.
175   Iterator* NewIteratorWithBase(ColumnFamilyHandle* column_family,
176                                 Iterator* base_iterator,
177                                 const ReadOptions* opts = nullptr);
178   // default column family
179   Iterator* NewIteratorWithBase(Iterator* base_iterator);
180 
181   // Similar to DB::Get() but will only read the key from this batch.
182   // If the batch does not have enough data to resolve Merge operations,
183   // MergeInProgress status may be returned.
184   Status GetFromBatch(ColumnFamilyHandle* column_family,
185                       const DBOptions& options, const Slice& key,
186                       std::string* value);
187 
188   // Similar to previous function but does not require a column_family.
189   // Note:  An InvalidArgument status will be returned if there are any Merge
190   // operators for this key.  Use previous method instead.
GetFromBatch(const DBOptions & options,const Slice & key,std::string * value)191   Status GetFromBatch(const DBOptions& options, const Slice& key,
192                       std::string* value) {
193     return GetFromBatch(nullptr, options, key, value);
194   }
195 
196   // Similar to DB::Get() but will also read writes from this batch.
197   //
198   // This function will query both this batch and the DB and then merge
199   // the results using the DB's merge operator (if the batch contains any
200   // merge requests).
201   //
202   // Setting read_options.snapshot will affect what is read from the DB
203   // but will NOT change which keys are read from the batch (the keys in
204   // this batch do not yet belong to any snapshot and will be fetched
205   // regardless).
206   Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
207                            const Slice& key, std::string* value);
208 
209   // An overload of the above method that receives a PinnableSlice
210   Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
211                            const Slice& key, PinnableSlice* value);
212 
213   Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
214                            ColumnFamilyHandle* column_family, const Slice& key,
215                            std::string* value);
216 
217   // An overload of the above method that receives a PinnableSlice
218   Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
219                            ColumnFamilyHandle* column_family, const Slice& key,
220                            PinnableSlice* value);
221 
222   void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
223                               ColumnFamilyHandle* column_family,
224                               const size_t num_keys, const Slice* keys,
225                               PinnableSlice* values, Status* statuses,
226                               bool sorted_input);
227 
228   // Records the state of the batch for future calls to RollbackToSavePoint().
229   // May be called multiple times to set multiple save points.
230   void SetSavePoint() override;
231 
232   // Remove all entries in this batch (Put, Merge, Delete, SingleDelete,
233   // PutLogData) since the most recent call to SetSavePoint() and removes the
234   // most recent save point.
235   // If there is no previous call to SetSavePoint(), behaves the same as
236   // Clear().
237   //
238   // Calling RollbackToSavePoint invalidates any open iterators on this batch.
239   //
240   // Returns Status::OK() on success,
241   //         Status::NotFound() if no previous call to SetSavePoint(),
242   //         or other Status on corruption.
243   Status RollbackToSavePoint() override;
244 
245   // Pop the most recent save point.
246   // If there is no previous call to SetSavePoint(), Status::NotFound()
247   // will be returned.
248   // Otherwise returns Status::OK().
249   Status PopSavePoint() override;
250 
251   void SetMaxBytes(size_t max_bytes) override;
252   size_t GetDataSize() const;
253 
254  private:
255   friend class PessimisticTransactionDB;
256   friend class WritePreparedTxn;
257   friend class WriteUnpreparedTxn;
258   friend class WriteBatchWithIndex_SubBatchCnt_Test;
259   // Returns the number of sub-batches inside the write batch. A sub-batch
260   // starts right before inserting a key that is a duplicate of a key in the
261   // last sub-batch.
262   size_t SubBatchCnt();
263 
264   Status GetFromBatchAndDB(DB* db, const ReadOptions& read_options,
265                            ColumnFamilyHandle* column_family, const Slice& key,
266                            PinnableSlice* value, ReadCallback* callback);
267   void MultiGetFromBatchAndDB(DB* db, const ReadOptions& read_options,
268                               ColumnFamilyHandle* column_family,
269                               const size_t num_keys, const Slice* keys,
270                               PinnableSlice* values, Status* statuses,
271                               bool sorted_input, ReadCallback* callback);
272   struct Rep;
273   std::unique_ptr<Rep> rep;
274 };
275 
276 }  // namespace ROCKSDB_NAMESPACE
277 
278 #endif  // !ROCKSDB_LITE
279