1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 // Copyright (c) 2013 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
8 
9 #pragma once
10 
11 #include <cassert>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "rocksdb/rocksdb_namespace.h"
17 
18 namespace ROCKSDB_NAMESPACE {
19 
20 class Slice;
21 class SliceTransform;
22 
23 // Context information of a compaction run
24 struct CompactionFilterContext {
25   // Does this compaction run include all data files
26   bool is_full_compaction;
27   // Is this compaction requested by the client (true),
28   // or is it occurring as an automatic compaction process
29   bool is_manual_compaction;
30 };
31 
32 // CompactionFilter allows an application to modify/delete a key-value at
33 // the time of compaction.
34 
35 class CompactionFilter {
36  public:
37   enum ValueType {
38     kValue,
39     kMergeOperand,
40     kBlobIndex,  // used internally by BlobDB.
41   };
42 
43   enum class Decision {
44     kKeep,
45     kRemove,
46     kChangeValue,
47     kRemoveAndSkipUntil,
48   };
49 
50   enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError };
51 
52   // Context information of a compaction run
53   struct Context {
54     // Does this compaction run include all data files
55     bool is_full_compaction;
56     // Is this compaction requested by the client (true),
57     // or is it occurring as an automatic compaction process
58     bool is_manual_compaction;
59     // Which column family this compaction is for.
60     uint32_t column_family_id;
61   };
62 
~CompactionFilter()63   virtual ~CompactionFilter() {}
64 
65   // The compaction process invokes this
66   // method for kv that is being compacted. A return value
67   // of false indicates that the kv should be preserved in the
68   // output of this compaction run and a return value of true
69   // indicates that this key-value should be removed from the
70   // output of the compaction.  The application can inspect
71   // the existing value of the key and make decision based on it.
72   //
73   // Key-Values that are results of merge operation during compaction are not
74   // passed into this function. Currently, when you have a mix of Put()s and
75   // Merge()s on a same key, we only guarantee to process the merge operands
76   // through the compaction filters. Put()s might be processed, or might not.
77   //
78   // When the value is to be preserved, the application has the option
79   // to modify the existing_value and pass it back through new_value.
80   // value_changed needs to be set to true in this case.
81   //
82   // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a
83   // DB* object) will not guarantee to preserve the state of the DB with
84   // CompactionFilter. Data seen from a snapshot might disppear after a
85   // compaction finishes. If you use snapshots, think twice about whether you
86   // want to use compaction filter and whether you are using it in a safe way.
87   //
88   // If multithreaded compaction is being used *and* a single CompactionFilter
89   // instance was supplied via Options::compaction_filter, this method may be
90   // called from different threads concurrently.  The application must ensure
91   // that the call is thread-safe.
92   //
93   // If the CompactionFilter was created by a factory, then it will only ever
94   // be used by a single thread that is doing the compaction run, and this
95   // call does not need to be thread-safe.  However, multiple filters may be
96   // in existence and operating concurrently.
Filter(int,const Slice &,const Slice &,std::string *,bool *)97   virtual bool Filter(int /*level*/, const Slice& /*key*/,
98                       const Slice& /*existing_value*/,
99                       std::string* /*new_value*/,
100                       bool* /*value_changed*/) const {
101     return false;
102   }
103 
104   // The compaction process invokes this method on every merge operand. If this
105   // method returns true, the merge operand will be ignored and not written out
106   // in the compaction output
107   //
108   // Note: If you are using a TransactionDB, it is not recommended to implement
109   // FilterMergeOperand().  If a Merge operation is filtered out, TransactionDB
110   // may not realize there is a write conflict and may allow a Transaction to
111   // Commit that should have failed.  Instead, it is better to implement any
112   // Merge filtering inside the MergeOperator.
FilterMergeOperand(int,const Slice &,const Slice &)113   virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/,
114                                   const Slice& /*operand*/) const {
115     return false;
116   }
117 
118   // An extended API. Called for both values and merge operands.
119   // Allows changing value and skipping ranges of keys.
120   // The default implementation uses Filter() and FilterMergeOperand().
121   // If you're overriding this method, no need to override the other two.
122   // `value_type` indicates whether this key-value corresponds to a normal
123   // value (e.g. written with Put())  or a merge operand (written with Merge()).
124   //
125   // Possible return values:
126   //  * kKeep - keep the key-value pair.
127   //  * kRemove - remove the key-value pair or merge operand.
128   //  * kChangeValue - keep the key and change the value/operand to *new_value.
129   //  * kRemoveAndSkipUntil - remove this key-value pair, and also remove
130   //      all key-value pairs with key in [key, *skip_until). This range
131   //      of keys will be skipped without reading, potentially saving some
132   //      IO operations compared to removing the keys one by one.
133   //
134   //      *skip_until <= key is treated the same as Decision::kKeep
135   //      (since the range [key, *skip_until) is empty).
136   //
137   //      Caveats:
138   //       - The keys are skipped even if there are snapshots containing them,
139   //         i.e. values removed by kRemoveAndSkipUntil can disappear from a
140   //         snapshot - beware if you're using TransactionDB or
141   //         DB::GetSnapshot().
142   //       - If value for a key was overwritten or merged into (multiple Put()s
143   //         or Merge()s), and compaction filter skips this key with
144   //         kRemoveAndSkipUntil, it's possible that it will remove only
145   //         the new value, exposing the old value that was supposed to be
146   //         overwritten.
147   //       - Doesn't work with PlainTableFactory in prefix mode.
148   //       - If you use kRemoveAndSkipUntil, consider also reducing
149   //         compaction_readahead_size option.
150   //
151   // Note: If you are using a TransactionDB, it is not recommended to filter
152   // out or modify merge operands (ValueType::kMergeOperand).
153   // If a merge operation is filtered out, TransactionDB may not realize there
154   // is a write conflict and may allow a Transaction to Commit that should have
155   // failed. Instead, it is better to implement any Merge filtering inside the
156   // MergeOperator.
FilterV2(int level,const Slice & key,ValueType value_type,const Slice & existing_value,std::string * new_value,std::string *)157   virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
158                             const Slice& existing_value, std::string* new_value,
159                             std::string* /*skip_until*/) const {
160     switch (value_type) {
161       case ValueType::kValue: {
162         bool value_changed = false;
163         bool rv = Filter(level, key, existing_value, new_value, &value_changed);
164         if (rv) {
165           return Decision::kRemove;
166         }
167         return value_changed ? Decision::kChangeValue : Decision::kKeep;
168       }
169       case ValueType::kMergeOperand: {
170         bool rv = FilterMergeOperand(level, key, existing_value);
171         return rv ? Decision::kRemove : Decision::kKeep;
172       }
173       case ValueType::kBlobIndex:
174         return Decision::kKeep;
175     }
176     assert(false);
177     return Decision::kKeep;
178   }
179 
180   // Internal (BlobDB) use only. Do not override in application code.
PrepareBlobOutput(const Slice &,const Slice &,std::string *)181   virtual BlobDecision PrepareBlobOutput(const Slice& /* key */,
182                                          const Slice& /* existing_value */,
183                                          std::string* /* new_value */) const {
184     return BlobDecision::kKeep;
185   }
186 
187   // This function is deprecated. Snapshots will always be ignored for
188   // compaction filters, because we realized that not ignoring snapshots doesn't
189   // provide the gurantee we initially thought it would provide. Repeatable
190   // reads will not be guaranteed anyway. If you override the function and
191   // returns false, we will fail the compaction.
IgnoreSnapshots()192   virtual bool IgnoreSnapshots() const { return true; }
193 
194   // Returns a name that identifies this compaction filter.
195   // The name will be printed to LOG file on start up for diagnosis.
196   virtual const char* Name() const = 0;
197 };
198 
199 // Each compaction will create a new CompactionFilter allowing the
200 // application to know about different compactions
201 class CompactionFilterFactory {
202  public:
~CompactionFilterFactory()203   virtual ~CompactionFilterFactory() {}
204 
205   virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter(
206       const CompactionFilter::Context& context) = 0;
207 
208   // Returns a name that identifies this compaction filter factory.
209   virtual const char* Name() const = 0;
210 };
211 
212 }  // namespace ROCKSDB_NAMESPACE
213