1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // Copyright (c) 2013 The LevelDB Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style license that can be 7 // found in the LICENSE file. See the AUTHORS file for names of contributors. 8 9 #pragma once 10 11 #include <cassert> 12 #include <memory> 13 #include <string> 14 #include <vector> 15 16 #include "rocksdb/rocksdb_namespace.h" 17 18 namespace ROCKSDB_NAMESPACE { 19 20 class Slice; 21 class SliceTransform; 22 23 // Context information of a compaction run 24 struct CompactionFilterContext { 25 // Does this compaction run include all data files 26 bool is_full_compaction; 27 // Is this compaction requested by the client (true), 28 // or is it occurring as an automatic compaction process 29 bool is_manual_compaction; 30 }; 31 32 // CompactionFilter allows an application to modify/delete a key-value at 33 // the time of compaction. 34 35 class CompactionFilter { 36 public: 37 enum ValueType { 38 kValue, 39 kMergeOperand, 40 kBlobIndex, // used internally by BlobDB. 41 }; 42 43 enum class Decision { 44 kKeep, 45 kRemove, 46 kChangeValue, 47 kRemoveAndSkipUntil, 48 }; 49 50 enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError }; 51 52 // Context information of a compaction run 53 struct Context { 54 // Does this compaction run include all data files 55 bool is_full_compaction; 56 // Is this compaction requested by the client (true), 57 // or is it occurring as an automatic compaction process 58 bool is_manual_compaction; 59 // Which column family this compaction is for. 60 uint32_t column_family_id; 61 }; 62 ~CompactionFilter()63 virtual ~CompactionFilter() {} 64 65 // The compaction process invokes this 66 // method for kv that is being compacted. A return value 67 // of false indicates that the kv should be preserved in the 68 // output of this compaction run and a return value of true 69 // indicates that this key-value should be removed from the 70 // output of the compaction. The application can inspect 71 // the existing value of the key and make decision based on it. 72 // 73 // Key-Values that are results of merge operation during compaction are not 74 // passed into this function. Currently, when you have a mix of Put()s and 75 // Merge()s on a same key, we only guarantee to process the merge operands 76 // through the compaction filters. Put()s might be processed, or might not. 77 // 78 // When the value is to be preserved, the application has the option 79 // to modify the existing_value and pass it back through new_value. 80 // value_changed needs to be set to true in this case. 81 // 82 // Note that RocksDB snapshots (i.e. call GetSnapshot() API on a 83 // DB* object) will not guarantee to preserve the state of the DB with 84 // CompactionFilter. Data seen from a snapshot might disppear after a 85 // compaction finishes. If you use snapshots, think twice about whether you 86 // want to use compaction filter and whether you are using it in a safe way. 87 // 88 // If multithreaded compaction is being used *and* a single CompactionFilter 89 // instance was supplied via Options::compaction_filter, this method may be 90 // called from different threads concurrently. The application must ensure 91 // that the call is thread-safe. 92 // 93 // If the CompactionFilter was created by a factory, then it will only ever 94 // be used by a single thread that is doing the compaction run, and this 95 // call does not need to be thread-safe. However, multiple filters may be 96 // in existence and operating concurrently. Filter(int,const Slice &,const Slice &,std::string *,bool *)97 virtual bool Filter(int /*level*/, const Slice& /*key*/, 98 const Slice& /*existing_value*/, 99 std::string* /*new_value*/, 100 bool* /*value_changed*/) const { 101 return false; 102 } 103 104 // The compaction process invokes this method on every merge operand. If this 105 // method returns true, the merge operand will be ignored and not written out 106 // in the compaction output 107 // 108 // Note: If you are using a TransactionDB, it is not recommended to implement 109 // FilterMergeOperand(). If a Merge operation is filtered out, TransactionDB 110 // may not realize there is a write conflict and may allow a Transaction to 111 // Commit that should have failed. Instead, it is better to implement any 112 // Merge filtering inside the MergeOperator. FilterMergeOperand(int,const Slice &,const Slice &)113 virtual bool FilterMergeOperand(int /*level*/, const Slice& /*key*/, 114 const Slice& /*operand*/) const { 115 return false; 116 } 117 118 // An extended API. Called for both values and merge operands. 119 // Allows changing value and skipping ranges of keys. 120 // The default implementation uses Filter() and FilterMergeOperand(). 121 // If you're overriding this method, no need to override the other two. 122 // `value_type` indicates whether this key-value corresponds to a normal 123 // value (e.g. written with Put()) or a merge operand (written with Merge()). 124 // 125 // Possible return values: 126 // * kKeep - keep the key-value pair. 127 // * kRemove - remove the key-value pair or merge operand. 128 // * kChangeValue - keep the key and change the value/operand to *new_value. 129 // * kRemoveAndSkipUntil - remove this key-value pair, and also remove 130 // all key-value pairs with key in [key, *skip_until). This range 131 // of keys will be skipped without reading, potentially saving some 132 // IO operations compared to removing the keys one by one. 133 // 134 // *skip_until <= key is treated the same as Decision::kKeep 135 // (since the range [key, *skip_until) is empty). 136 // 137 // Caveats: 138 // - The keys are skipped even if there are snapshots containing them, 139 // i.e. values removed by kRemoveAndSkipUntil can disappear from a 140 // snapshot - beware if you're using TransactionDB or 141 // DB::GetSnapshot(). 142 // - If value for a key was overwritten or merged into (multiple Put()s 143 // or Merge()s), and compaction filter skips this key with 144 // kRemoveAndSkipUntil, it's possible that it will remove only 145 // the new value, exposing the old value that was supposed to be 146 // overwritten. 147 // - Doesn't work with PlainTableFactory in prefix mode. 148 // - If you use kRemoveAndSkipUntil, consider also reducing 149 // compaction_readahead_size option. 150 // 151 // Note: If you are using a TransactionDB, it is not recommended to filter 152 // out or modify merge operands (ValueType::kMergeOperand). 153 // If a merge operation is filtered out, TransactionDB may not realize there 154 // is a write conflict and may allow a Transaction to Commit that should have 155 // failed. Instead, it is better to implement any Merge filtering inside the 156 // MergeOperator. FilterV2(int level,const Slice & key,ValueType value_type,const Slice & existing_value,std::string * new_value,std::string *)157 virtual Decision FilterV2(int level, const Slice& key, ValueType value_type, 158 const Slice& existing_value, std::string* new_value, 159 std::string* /*skip_until*/) const { 160 switch (value_type) { 161 case ValueType::kValue: { 162 bool value_changed = false; 163 bool rv = Filter(level, key, existing_value, new_value, &value_changed); 164 if (rv) { 165 return Decision::kRemove; 166 } 167 return value_changed ? Decision::kChangeValue : Decision::kKeep; 168 } 169 case ValueType::kMergeOperand: { 170 bool rv = FilterMergeOperand(level, key, existing_value); 171 return rv ? Decision::kRemove : Decision::kKeep; 172 } 173 case ValueType::kBlobIndex: 174 return Decision::kKeep; 175 } 176 assert(false); 177 return Decision::kKeep; 178 } 179 180 // Internal (BlobDB) use only. Do not override in application code. PrepareBlobOutput(const Slice &,const Slice &,std::string *)181 virtual BlobDecision PrepareBlobOutput(const Slice& /* key */, 182 const Slice& /* existing_value */, 183 std::string* /* new_value */) const { 184 return BlobDecision::kKeep; 185 } 186 187 // This function is deprecated. Snapshots will always be ignored for 188 // compaction filters, because we realized that not ignoring snapshots doesn't 189 // provide the gurantee we initially thought it would provide. Repeatable 190 // reads will not be guaranteed anyway. If you override the function and 191 // returns false, we will fail the compaction. IgnoreSnapshots()192 virtual bool IgnoreSnapshots() const { return true; } 193 194 // Returns a name that identifies this compaction filter. 195 // The name will be printed to LOG file on start up for diagnosis. 196 virtual const char* Name() const = 0; 197 }; 198 199 // Each compaction will create a new CompactionFilter allowing the 200 // application to know about different compactions 201 class CompactionFilterFactory { 202 public: ~CompactionFilterFactory()203 virtual ~CompactionFilterFactory() {} 204 205 virtual std::unique_ptr<CompactionFilter> CreateCompactionFilter( 206 const CompactionFilter::Context& context) = 0; 207 208 // Returns a name that identifies this compaction filter factory. 209 virtual const char* Name() const = 0; 210 }; 211 212 } // namespace ROCKSDB_NAMESPACE 213