1 //  Copyright (c) 2016-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #include "db/db_test_util.h"
7 #include "port/stack_trace.h"
8 #include "rocksdb/utilities/write_batch_with_index.h"
9 #include "test_util/testutil.h"
10 #include "utilities/merge_operators.h"
11 
12 namespace ROCKSDB_NAMESPACE {
13 
14 class DBRangeDelTest : public DBTestBase {
15  public:
DBRangeDelTest()16   DBRangeDelTest() : DBTestBase("/db_range_del_test") {}
17 
GetNumericStr(int key)18   std::string GetNumericStr(int key) {
19     uint64_t uint64_key = static_cast<uint64_t>(key);
20     std::string str;
21     str.resize(8);
22     memcpy(&str[0], static_cast<void*>(&uint64_key), 8);
23     return str;
24   }
25 };
26 
27 // PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not
28 // supported in ROCKSDB_LITE
29 #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest,NonBlockBasedTableNotSupported)30 TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) {
31   // TODO: figure out why MmapReads trips the iterator pinning assertion in
32   // RangeDelAggregator. Ideally it would be supported; otherwise it should at
33   // least be explicitly unsupported.
34   for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) {
35     option_config_ = config;
36     DestroyAndReopen(CurrentOptions());
37     ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
38                                  "dr1", "dr1")
39                     .IsNotSupported());
40   }
41 }
42 
TEST_F(DBRangeDelTest,WriteBatchWithIndexNotSupported)43 TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) {
44   WriteBatchWithIndex indexedBatch{};
45   ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1")
46                   .IsNotSupported());
47   ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported());
48 }
49 
TEST_F(DBRangeDelTest,FlushOutputHasOnlyRangeTombstones)50 TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) {
51   do {
52     DestroyAndReopen(CurrentOptions());
53     ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
54                                "dr1", "dr2"));
55     ASSERT_OK(db_->Flush(FlushOptions()));
56     ASSERT_EQ(1, NumTableFilesAtLevel(0));
57   } while (ChangeOptions(kRangeDelSkipConfigs));
58 }
59 
TEST_F(DBRangeDelTest,CompactionOutputHasOnlyRangeTombstone)60 TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) {
61   do {
62     Options opts = CurrentOptions();
63     opts.disable_auto_compactions = true;
64     opts.statistics = CreateDBStatistics();
65     DestroyAndReopen(opts);
66 
67     // snapshot protects range tombstone from dropping due to becoming obsolete.
68     const Snapshot* snapshot = db_->GetSnapshot();
69     db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z");
70     db_->Flush(FlushOptions());
71 
72     ASSERT_EQ(1, NumTableFilesAtLevel(0));
73     ASSERT_EQ(0, NumTableFilesAtLevel(1));
74     dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
75                                 true /* disallow_trivial_move */);
76     ASSERT_EQ(0, NumTableFilesAtLevel(0));
77     ASSERT_EQ(1, NumTableFilesAtLevel(1));
78     ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
79     db_->ReleaseSnapshot(snapshot);
80     // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled
81     // compactions as the above assertions about the number of files in a level
82     // do not hold true.
83   } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction |
84                          kSkipFIFOCompaction));
85 }
86 
TEST_F(DBRangeDelTest,CompactionOutputFilesExactlyFilled)87 TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
88   // regression test for exactly filled compaction output files. Previously
89   // another file would be generated containing all range deletions, which
90   // could invalidate the non-overlapping file boundary invariant.
91   const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10;
92   Options options = CurrentOptions();
93   options.disable_auto_compactions = true;
94   options.level0_file_num_compaction_trigger = kNumFiles;
95   options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
96   options.num_levels = 2;
97   options.target_file_size_base = kFileBytes;
98   BlockBasedTableOptions table_options;
99   table_options.block_size_deviation = 50;  // each block holds two keys
100   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
101   Reopen(options);
102 
103   // snapshot protects range tombstone from dropping due to becoming obsolete.
104   const Snapshot* snapshot = db_->GetSnapshot();
105   db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(1));
106 
107   Random rnd(301);
108   for (int i = 0; i < kNumFiles; ++i) {
109     std::vector<std::string> values;
110     // Write 12K (4 values, each 3K)
111     for (int j = 0; j < kNumPerFile; j++) {
112       values.push_back(RandomString(&rnd, 3 << 10));
113       ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
114       if (j == 0 && i > 0) {
115         dbfull()->TEST_WaitForFlushMemTable();
116       }
117     }
118   }
119   // put extra key to trigger final flush
120   ASSERT_OK(Put("", ""));
121   dbfull()->TEST_WaitForFlushMemTable();
122   ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
123   ASSERT_EQ(0, NumTableFilesAtLevel(1));
124 
125   dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
126                               true /* disallow_trivial_move */);
127   ASSERT_EQ(0, NumTableFilesAtLevel(0));
128   ASSERT_EQ(2, NumTableFilesAtLevel(1));
129   db_->ReleaseSnapshot(snapshot);
130 }
131 
TEST_F(DBRangeDelTest,MaxCompactionBytesCutsOutputFiles)132 TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) {
133   // Ensures range deletion spanning multiple compaction output files that are
134   // cut by max_compaction_bytes will have non-overlapping key-ranges.
135   // https://github.com/facebook/rocksdb/issues/1778
136   const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12;
137   Options opts = CurrentOptions();
138   opts.comparator = test::Uint64Comparator();
139   opts.disable_auto_compactions = true;
140   opts.level0_file_num_compaction_trigger = kNumFiles;
141   opts.max_compaction_bytes = kNumPerFile * kBytesPerVal;
142   opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
143   // Want max_compaction_bytes to trigger the end of compaction output file, not
144   // target_file_size_base, so make the latter much bigger
145   opts.target_file_size_base = 100 * opts.max_compaction_bytes;
146   Reopen(opts);
147 
148   // snapshot protects range tombstone from dropping due to becoming obsolete.
149   const Snapshot* snapshot = db_->GetSnapshot();
150 
151   // It spans the whole key-range, thus will be included in all output files
152   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
153                              GetNumericStr(0),
154                              GetNumericStr(kNumFiles * kNumPerFile - 1)));
155   Random rnd(301);
156   for (int i = 0; i < kNumFiles; ++i) {
157     std::vector<std::string> values;
158     // Write 1MB (256 values, each 4K)
159     for (int j = 0; j < kNumPerFile; j++) {
160       values.push_back(RandomString(&rnd, kBytesPerVal));
161       ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j]));
162     }
163     // extra entry to trigger SpecialSkipListFactory's flush
164     ASSERT_OK(Put(GetNumericStr(kNumPerFile), ""));
165     dbfull()->TEST_WaitForFlushMemTable();
166     ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
167   }
168 
169   dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
170                               true /* disallow_trivial_move */);
171   ASSERT_EQ(0, NumTableFilesAtLevel(0));
172   ASSERT_GE(NumTableFilesAtLevel(1), 2);
173 
174   std::vector<std::vector<FileMetaData>> files;
175   dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
176 
177   for (size_t i = 0; i < files[1].size() - 1; ++i) {
178     ASSERT_TRUE(InternalKeyComparator(opts.comparator)
179                     .Compare(files[1][i].largest, files[1][i + 1].smallest) <
180                 0);
181   }
182   db_->ReleaseSnapshot(snapshot);
183 }
184 
TEST_F(DBRangeDelTest,SentinelsOmittedFromOutputFile)185 TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) {
186   // Regression test for bug where sentinel range deletions (i.e., ones with
187   // sequence number of zero) were included in output files.
188   // snapshot protects range tombstone from dropping due to becoming obsolete.
189   const Snapshot* snapshot = db_->GetSnapshot();
190 
191   // gaps between ranges creates sentinels in our internal representation
192   std::vector<std::pair<std::string, std::string>> range_dels = {{"a", "b"}, {"c", "d"}, {"e", "f"}};
193   for (const auto& range_del : range_dels) {
194     ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
195                                range_del.first, range_del.second));
196   }
197   ASSERT_OK(db_->Flush(FlushOptions()));
198   ASSERT_EQ(1, NumTableFilesAtLevel(0));
199 
200   std::vector<std::vector<FileMetaData>> files;
201   dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
202   ASSERT_GT(files[0][0].fd.smallest_seqno, 0);
203 
204   db_->ReleaseSnapshot(snapshot);
205 }
206 
TEST_F(DBRangeDelTest,FlushRangeDelsSameStartKey)207 TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) {
208   db_->Put(WriteOptions(), "b1", "val");
209   ASSERT_OK(
210       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
211   db_->Put(WriteOptions(), "b2", "val");
212   ASSERT_OK(
213       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
214   // first iteration verifies query correctness in memtable, second verifies
215   // query correctness for a single SST file
216   for (int i = 0; i < 2; ++i) {
217     if (i > 0) {
218       ASSERT_OK(db_->Flush(FlushOptions()));
219       ASSERT_EQ(1, NumTableFilesAtLevel(0));
220     }
221     std::string value;
222     ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
223     ASSERT_OK(db_->Get(ReadOptions(), "b2", &value));
224   }
225 }
226 
TEST_F(DBRangeDelTest,CompactRangeDelsSameStartKey)227 TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) {
228   db_->Put(WriteOptions(), "unused", "val");  // prevents empty after compaction
229   db_->Put(WriteOptions(), "b1", "val");
230   ASSERT_OK(db_->Flush(FlushOptions()));
231   ASSERT_OK(
232       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
233   ASSERT_OK(db_->Flush(FlushOptions()));
234   ASSERT_OK(
235       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
236   ASSERT_OK(db_->Flush(FlushOptions()));
237   ASSERT_EQ(3, NumTableFilesAtLevel(0));
238 
239   for (int i = 0; i < 2; ++i) {
240     if (i > 0) {
241       dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
242                                   true /* disallow_trivial_move */);
243       ASSERT_EQ(0, NumTableFilesAtLevel(0));
244       ASSERT_EQ(1, NumTableFilesAtLevel(1));
245     }
246     std::string value;
247     ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
248   }
249 }
250 #endif  // ROCKSDB_LITE
251 
TEST_F(DBRangeDelTest,FlushRemovesCoveredKeys)252 TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) {
253   const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250;
254   Options opts = CurrentOptions();
255   opts.comparator = test::Uint64Comparator();
256   Reopen(opts);
257 
258   // Write a third before snapshot, a third between snapshot and tombstone, and
259   // a third after the tombstone. Keys older than snapshot or newer than the
260   // tombstone should be preserved.
261   const Snapshot* snapshot = nullptr;
262   for (int i = 0; i < kNum; ++i) {
263     if (i == kNum / 3) {
264       snapshot = db_->GetSnapshot();
265     } else if (i == 2 * kNum / 3) {
266       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
267                        GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
268     }
269     db_->Put(WriteOptions(), GetNumericStr(i), "val");
270   }
271   db_->Flush(FlushOptions());
272 
273   for (int i = 0; i < kNum; ++i) {
274     ReadOptions read_opts;
275     read_opts.ignore_range_deletions = true;
276     std::string value;
277     if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) {
278       ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value));
279     } else {
280       ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound());
281     }
282   }
283   db_->ReleaseSnapshot(snapshot);
284 }
285 
286 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
287 #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest,CompactionRemovesCoveredKeys)288 TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) {
289   const int kNumPerFile = 100, kNumFiles = 4;
290   Options opts = CurrentOptions();
291   opts.comparator = test::Uint64Comparator();
292   opts.disable_auto_compactions = true;
293   opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
294   opts.num_levels = 2;
295   opts.statistics = CreateDBStatistics();
296   Reopen(opts);
297 
298   for (int i = 0; i < kNumFiles; ++i) {
299     if (i > 0) {
300       // range tombstone covers first half of the previous file
301       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
302                        GetNumericStr((i - 1) * kNumPerFile),
303                        GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2));
304     }
305     // Make sure a given key appears in each file so compaction won't be able to
306     // use trivial move, which would happen if the ranges were non-overlapping.
307     // Also, we need an extra element since flush is only triggered when the
308     // number of keys is one greater than SpecialSkipListFactory's limit.
309     // We choose a key outside the key-range used by the test to avoid conflict.
310     db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), "val");
311 
312     for (int j = 0; j < kNumPerFile; ++j) {
313       db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val");
314     }
315     dbfull()->TEST_WaitForFlushMemTable();
316     ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
317   }
318   db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
319   ASSERT_EQ(0, NumTableFilesAtLevel(0));
320   ASSERT_GT(NumTableFilesAtLevel(1), 0);
321   ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2,
322             TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL));
323 
324   for (int i = 0; i < kNumFiles; ++i) {
325     for (int j = 0; j < kNumPerFile; ++j) {
326       ReadOptions read_opts;
327       read_opts.ignore_range_deletions = true;
328       std::string value;
329       if (i == kNumFiles - 1 || j >= kNumPerFile / 2) {
330         ASSERT_OK(
331             db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value));
332       } else {
333         ASSERT_TRUE(
334             db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)
335                 .IsNotFound());
336       }
337     }
338   }
339 }
340 
TEST_F(DBRangeDelTest,ValidLevelSubcompactionBoundaries)341 TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) {
342   const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10;
343   Options options = CurrentOptions();
344   options.disable_auto_compactions = true;
345   options.level0_file_num_compaction_trigger = kNumFiles;
346   options.max_bytes_for_level_base = 2 * kFileBytes;
347   options.max_subcompactions = 4;
348   options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
349   options.num_levels = 3;
350   options.target_file_size_base = kFileBytes;
351   options.target_file_size_multiplier = 1;
352   Reopen(options);
353 
354   Random rnd(301);
355   for (int i = 0; i < 2; ++i) {
356     for (int j = 0; j < kNumFiles; ++j) {
357       if (i > 0) {
358         // delete [95,105) in two files, [295,305) in next two
359         int mid = (j + (1 - j % 2)) * kNumPerFile;
360         db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
361                          Key(mid - 5), Key(mid + 5));
362       }
363       std::vector<std::string> values;
364       // Write 100KB (100 values, each 1K)
365       for (int k = 0; k < kNumPerFile; k++) {
366         values.push_back(RandomString(&rnd, 990));
367         ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
368       }
369       // put extra key to trigger flush
370       ASSERT_OK(Put("", ""));
371       dbfull()->TEST_WaitForFlushMemTable();
372       if (j < kNumFiles - 1) {
373         // background compaction may happen early for kNumFiles'th file
374         ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
375       }
376       if (j == options.level0_file_num_compaction_trigger - 1) {
377         // When i == 1, compaction will output some files to L1, at which point
378         // L1 is not bottommost so range deletions cannot be compacted away. The
379         // new L1 files must be generated with non-overlapping key ranges even
380         // though multiple subcompactions see the same ranges deleted, else an
381         // assertion will fail.
382         //
383         // Only enable auto-compactions when we're ready; otherwise, the
384         // oversized L0 (relative to base_level) causes the compaction to run
385         // earlier.
386         ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()}));
387         dbfull()->TEST_WaitForCompact();
388         ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
389                                   {{"disable_auto_compactions", "true"}}));
390         ASSERT_EQ(NumTableFilesAtLevel(0), 0);
391         ASSERT_GT(NumTableFilesAtLevel(1), 0);
392         ASSERT_GT(NumTableFilesAtLevel(2), 0);
393       }
394     }
395   }
396 }
397 
TEST_F(DBRangeDelTest,ValidUniversalSubcompactionBoundaries)398 TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
399   const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4;
400   Options options = CurrentOptions();
401   options.compaction_options_universal.min_merge_width = kFilesPerLevel;
402   options.compaction_options_universal.max_merge_width = kFilesPerLevel;
403   options.compaction_options_universal.size_ratio = 10;
404   options.compaction_style = kCompactionStyleUniversal;
405   options.level0_file_num_compaction_trigger = kFilesPerLevel;
406   options.max_subcompactions = 4;
407   options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
408   options.num_levels = kNumLevels;
409   options.target_file_size_base = kNumPerFile << 10;
410   options.target_file_size_multiplier = 1;
411   Reopen(options);
412 
413   Random rnd(301);
414   for (int i = 0; i < kNumLevels - 1; ++i) {
415     for (int j = 0; j < kFilesPerLevel; ++j) {
416       if (i == kNumLevels - 2) {
417         // insert range deletions [95,105) in two files, [295,305) in next two
418         // to prepare L1 for later manual compaction.
419         int mid = (j + (1 - j % 2)) * kNumPerFile;
420         db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
421                          Key(mid - 5), Key(mid + 5));
422       }
423       std::vector<std::string> values;
424       // Write 100KB (100 values, each 1K)
425       for (int k = 0; k < kNumPerFile; k++) {
426         values.push_back(RandomString(&rnd, 990));
427         ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
428       }
429       // put extra key to trigger flush
430       ASSERT_OK(Put("", ""));
431       dbfull()->TEST_WaitForFlushMemTable();
432       if (j < kFilesPerLevel - 1) {
433         // background compaction may happen early for kFilesPerLevel'th file
434         ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
435       }
436     }
437     dbfull()->TEST_WaitForCompact();
438     ASSERT_EQ(NumTableFilesAtLevel(0), 0);
439     ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
440   }
441   // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
442   // happen since input level > 0; (2) range deletions are not dropped since
443   // output level is not bottommost. If no file boundary assertion fails, that
444   // probably means universal compaction + subcompaction + range deletion are
445   // compatible.
446   ASSERT_OK(dbfull()->RunManualCompaction(
447       reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
448           ->cfd(),
449       1 /* input_level */, 2 /* output_level */, CompactRangeOptions(),
450       nullptr /* begin */, nullptr /* end */, true /* exclusive */,
451       true /* disallow_trivial_move */,
452       port::kMaxUint64 /* max_file_num_to_ignore */));
453 }
454 #endif  // ROCKSDB_LITE
455 
TEST_F(DBRangeDelTest,CompactionRemovesCoveredMergeOperands)456 TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) {
457   const int kNumPerFile = 3, kNumFiles = 3;
458   Options opts = CurrentOptions();
459   opts.disable_auto_compactions = true;
460   opts.memtable_factory.reset(new SpecialSkipListFactory(2 * kNumPerFile));
461   opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
462   opts.num_levels = 2;
463   Reopen(opts);
464 
465   // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file
466   // requires an extra entry.
467   for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) {
468     if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) {
469       // Delete merge operands from all but the last file
470       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
471                        "key_");
472     }
473     std::string val;
474     PutFixed64(&val, i);
475     db_->Merge(WriteOptions(), "key", val);
476     // we need to prevent trivial move using Puts so compaction will actually
477     // process the merge operands.
478     db_->Put(WriteOptions(), "prevent_trivial_move", "");
479     if (i > 0 && i % kNumPerFile == 0) {
480       dbfull()->TEST_WaitForFlushMemTable();
481     }
482   }
483 
484   ReadOptions read_opts;
485   read_opts.ignore_range_deletions = true;
486   std::string expected, actual;
487   ASSERT_OK(db_->Get(read_opts, "key", &actual));
488   PutFixed64(&expected, 45);  // 1+2+...+9
489   ASSERT_EQ(expected, actual);
490 
491   db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
492 
493   expected.clear();
494   ASSERT_OK(db_->Get(read_opts, "key", &actual));
495   uint64_t tmp;
496   Slice tmp2(actual);
497   GetFixed64(&tmp2, &tmp);
498   PutFixed64(&expected, 30);  // 6+7+8+9 (earlier operands covered by tombstone)
499   ASSERT_EQ(expected, actual);
500 }
501 
TEST_F(DBRangeDelTest,PutDeleteRangeMergeFlush)502 TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) {
503   // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4)
504   // Flush. The `CompactionIterator` previously had a bug where we forgot to
505   // check for covering range tombstones when processing the (1) Put, causing
506   // it to reappear after the flush.
507   Options opts = CurrentOptions();
508   opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
509   Reopen(opts);
510 
511   std::string val;
512   PutFixed64(&val, 1);
513   ASSERT_OK(db_->Put(WriteOptions(), "key", val));
514   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
515                              "key", "key_"));
516   ASSERT_OK(db_->Merge(WriteOptions(), "key", val));
517   ASSERT_OK(db_->Flush(FlushOptions()));
518 
519   ReadOptions read_opts;
520   std::string expected, actual;
521   ASSERT_OK(db_->Get(read_opts, "key", &actual));
522   PutFixed64(&expected, 1);
523   ASSERT_EQ(expected, actual);
524 }
525 
526 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
527 #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest,ObsoleteTombstoneCleanup)528 TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) {
529   // During compaction to bottommost level, verify range tombstones older than
530   // the oldest snapshot are removed, while others are preserved.
531   Options opts = CurrentOptions();
532   opts.disable_auto_compactions = true;
533   opts.num_levels = 2;
534   opts.statistics = CreateDBStatistics();
535   Reopen(opts);
536 
537   db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1",
538                    "dr10");  // obsolete after compaction
539   db_->Put(WriteOptions(), "key", "val");
540   db_->Flush(FlushOptions());
541   const Snapshot* snapshot = db_->GetSnapshot();
542   db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2",
543                    "dr20");  // protected by snapshot
544   db_->Put(WriteOptions(), "key", "val");
545   db_->Flush(FlushOptions());
546 
547   ASSERT_EQ(2, NumTableFilesAtLevel(0));
548   ASSERT_EQ(0, NumTableFilesAtLevel(1));
549   db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
550   ASSERT_EQ(0, NumTableFilesAtLevel(0));
551   ASSERT_EQ(1, NumTableFilesAtLevel(1));
552   ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
553 
554   db_->ReleaseSnapshot(snapshot);
555 }
556 
TEST_F(DBRangeDelTest,TableEvictedDuringScan)557 TEST_F(DBRangeDelTest, TableEvictedDuringScan) {
558   // The RangeDelAggregator holds pointers into range deletion blocks created by
559   // table readers. This test ensures the aggregator can still access those
560   // blocks even if it outlives the table readers that created them.
561   //
562   // DBIter always keeps readers open for L0 files. So, in order to test
563   // aggregator outliving reader, we need to have deletions in L1 files, which
564   // are opened/closed on-demand during the scan. This is accomplished by
565   // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions
566   // from all lingering in L0 (there is at most one range deletion per L0 file).
567   //
568   // The first L1 file will contain a range deletion since its begin key is 0.
569   // SeekToFirst() references that table's reader and adds its range tombstone
570   // to the aggregator. Upon advancing beyond that table's key-range via Next(),
571   // the table reader will be unreferenced by the iterator. Since we manually
572   // call Evict() on all readers before the full scan, this unreference causes
573   // the reader's refcount to drop to zero and thus be destroyed.
574   //
575   // When it is destroyed, we do not remove its range deletions from the
576   // aggregator. So, subsequent calls to Next() must be able to use these
577   // deletions to decide whether a key is covered. This will work as long as
578   // the aggregator properly references the range deletion block.
579   const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5;
580   Options opts = CurrentOptions();
581   opts.comparator = test::Uint64Comparator();
582   opts.level0_file_num_compaction_trigger = 4;
583   opts.level0_stop_writes_trigger = 4;
584   opts.memtable_factory.reset(new SpecialSkipListFactory(1));
585   opts.num_levels = 2;
586   BlockBasedTableOptions bbto;
587   bbto.cache_index_and_filter_blocks = true;
588   bbto.block_cache = NewLRUCache(8 << 20);
589   opts.table_factory.reset(NewBlockBasedTableFactory(bbto));
590   Reopen(opts);
591 
592   // Hold a snapshot so range deletions can't become obsolete during compaction
593   // to bottommost level (i.e., L1).
594   const Snapshot* snapshot = db_->GetSnapshot();
595   for (int i = 0; i < kNum; ++i) {
596     db_->Put(WriteOptions(), GetNumericStr(i), "val");
597     if (i > 0) {
598       dbfull()->TEST_WaitForFlushMemTable();
599     }
600     if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) {
601       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
602                        GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
603     }
604   }
605   // Must be > 1 so the first L1 file can be closed before scan finishes
606   dbfull()->TEST_WaitForCompact();
607   ASSERT_GT(NumTableFilesAtLevel(1), 1);
608   std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_);
609 
610   ReadOptions read_opts;
611   auto* iter = db_->NewIterator(read_opts);
612   int expected = kRangeEnd;
613   iter->SeekToFirst();
614   for (auto file_number : file_numbers) {
615     // This puts table caches in the state of being externally referenced only
616     // so they are destroyed immediately upon iterator unreferencing.
617     TableCache::Evict(dbfull()->TEST_table_cache(), file_number);
618   }
619   for (; iter->Valid(); iter->Next()) {
620     ASSERT_EQ(GetNumericStr(expected), iter->key());
621     ++expected;
622     // Keep clearing block cache's LRU so range deletion block can be freed as
623     // soon as its refcount drops to zero.
624     bbto.block_cache->EraseUnRefEntries();
625   }
626   ASSERT_EQ(kNum, expected);
627   delete iter;
628   db_->ReleaseSnapshot(snapshot);
629 }
630 
TEST_F(DBRangeDelTest,GetCoveredKeyFromMutableMemtable)631 TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) {
632   do {
633     DestroyAndReopen(CurrentOptions());
634     db_->Put(WriteOptions(), "key", "val");
635     ASSERT_OK(
636         db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
637 
638     ReadOptions read_opts;
639     std::string value;
640     ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
641   } while (ChangeOptions(kRangeDelSkipConfigs));
642 }
643 
TEST_F(DBRangeDelTest,GetCoveredKeyFromImmutableMemtable)644 TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) {
645   do {
646     Options opts = CurrentOptions();
647     opts.max_write_buffer_number = 3;
648     opts.min_write_buffer_number_to_merge = 2;
649     // SpecialSkipListFactory lets us specify maximum number of elements the
650     // memtable can hold. It switches the active memtable to immutable (flush is
651     // prevented by the above options) upon inserting an element that would
652     // overflow the memtable.
653     opts.memtable_factory.reset(new SpecialSkipListFactory(1));
654     DestroyAndReopen(opts);
655 
656     db_->Put(WriteOptions(), "key", "val");
657     ASSERT_OK(
658         db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
659     db_->Put(WriteOptions(), "blah", "val");
660 
661     ReadOptions read_opts;
662     std::string value;
663     ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
664   } while (ChangeOptions(kRangeDelSkipConfigs));
665 }
666 
TEST_F(DBRangeDelTest,GetCoveredKeyFromSst)667 TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) {
668   do {
669     DestroyAndReopen(CurrentOptions());
670     db_->Put(WriteOptions(), "key", "val");
671     // snapshot prevents key from being deleted during flush
672     const Snapshot* snapshot = db_->GetSnapshot();
673     ASSERT_OK(
674         db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
675     ASSERT_OK(db_->Flush(FlushOptions()));
676 
677     ReadOptions read_opts;
678     std::string value;
679     ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
680     db_->ReleaseSnapshot(snapshot);
681   } while (ChangeOptions(kRangeDelSkipConfigs));
682 }
683 
TEST_F(DBRangeDelTest,GetCoveredMergeOperandFromMemtable)684 TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) {
685   const int kNumMergeOps = 10;
686   Options opts = CurrentOptions();
687   opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
688   Reopen(opts);
689 
690   for (int i = 0; i < kNumMergeOps; ++i) {
691     std::string val;
692     PutFixed64(&val, i);
693     db_->Merge(WriteOptions(), "key", val);
694     if (i == kNumMergeOps / 2) {
695       // deletes [0, 5]
696       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
697                        "key_");
698     }
699   }
700 
701   ReadOptions read_opts;
702   std::string expected, actual;
703   ASSERT_OK(db_->Get(read_opts, "key", &actual));
704   PutFixed64(&expected, 30);  // 6+7+8+9
705   ASSERT_EQ(expected, actual);
706 
707   expected.clear();
708   read_opts.ignore_range_deletions = true;
709   ASSERT_OK(db_->Get(read_opts, "key", &actual));
710   PutFixed64(&expected, 45);  // 0+1+2+...+9
711   ASSERT_EQ(expected, actual);
712 }
713 
TEST_F(DBRangeDelTest,GetIgnoresRangeDeletions)714 TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) {
715   Options opts = CurrentOptions();
716   opts.max_write_buffer_number = 4;
717   opts.min_write_buffer_number_to_merge = 3;
718   opts.memtable_factory.reset(new SpecialSkipListFactory(1));
719   Reopen(opts);
720 
721   db_->Put(WriteOptions(), "sst_key", "val");
722   // snapshot prevents key from being deleted during flush
723   const Snapshot* snapshot = db_->GetSnapshot();
724   ASSERT_OK(
725       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
726   ASSERT_OK(db_->Flush(FlushOptions()));
727   db_->Put(WriteOptions(), "imm_key", "val");
728   ASSERT_OK(
729       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
730   db_->Put(WriteOptions(), "mem_key", "val");
731   ASSERT_OK(
732       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
733 
734   ReadOptions read_opts;
735   read_opts.ignore_range_deletions = true;
736   for (std::string key : {"sst_key", "imm_key", "mem_key"}) {
737     std::string value;
738     ASSERT_OK(db_->Get(read_opts, key, &value));
739   }
740   db_->ReleaseSnapshot(snapshot);
741 }
742 
TEST_F(DBRangeDelTest,IteratorRemovesCoveredKeys)743 TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) {
744   const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
745   Options opts = CurrentOptions();
746   opts.comparator = test::Uint64Comparator();
747   opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
748   Reopen(opts);
749 
750   // Write half of the keys before the tombstone and half after the tombstone.
751   // Only covered keys (i.e., within the range and older than the tombstone)
752   // should be deleted.
753   for (int i = 0; i < kNum; ++i) {
754     if (i == kNum / 2) {
755       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
756                        GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
757     }
758     db_->Put(WriteOptions(), GetNumericStr(i), "val");
759   }
760   ReadOptions read_opts;
761   auto* iter = db_->NewIterator(read_opts);
762 
763   int expected = 0;
764   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
765     ASSERT_EQ(GetNumericStr(expected), iter->key());
766     if (expected == kRangeBegin - 1) {
767       expected = kNum / 2;
768     } else {
769       ++expected;
770     }
771   }
772   ASSERT_EQ(kNum, expected);
773   delete iter;
774 }
775 
TEST_F(DBRangeDelTest,IteratorOverUserSnapshot)776 TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) {
777   const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
778   Options opts = CurrentOptions();
779   opts.comparator = test::Uint64Comparator();
780   opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
781   Reopen(opts);
782 
783   const Snapshot* snapshot = nullptr;
784   // Put a snapshot before the range tombstone, verify an iterator using that
785   // snapshot sees all inserted keys.
786   for (int i = 0; i < kNum; ++i) {
787     if (i == kNum / 2) {
788       snapshot = db_->GetSnapshot();
789       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
790                        GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
791     }
792     db_->Put(WriteOptions(), GetNumericStr(i), "val");
793   }
794   ReadOptions read_opts;
795   read_opts.snapshot = snapshot;
796   auto* iter = db_->NewIterator(read_opts);
797 
798   int expected = 0;
799   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
800     ASSERT_EQ(GetNumericStr(expected), iter->key());
801     ++expected;
802   }
803   ASSERT_EQ(kNum / 2, expected);
804   delete iter;
805   db_->ReleaseSnapshot(snapshot);
806 }
807 
TEST_F(DBRangeDelTest,IteratorIgnoresRangeDeletions)808 TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) {
809   Options opts = CurrentOptions();
810   opts.max_write_buffer_number = 4;
811   opts.min_write_buffer_number_to_merge = 3;
812   opts.memtable_factory.reset(new SpecialSkipListFactory(1));
813   Reopen(opts);
814 
815   db_->Put(WriteOptions(), "sst_key", "val");
816   // snapshot prevents key from being deleted during flush
817   const Snapshot* snapshot = db_->GetSnapshot();
818   ASSERT_OK(
819       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
820   ASSERT_OK(db_->Flush(FlushOptions()));
821   db_->Put(WriteOptions(), "imm_key", "val");
822   ASSERT_OK(
823       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
824   db_->Put(WriteOptions(), "mem_key", "val");
825   ASSERT_OK(
826       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
827 
828   ReadOptions read_opts;
829   read_opts.ignore_range_deletions = true;
830   auto* iter = db_->NewIterator(read_opts);
831   int i = 0;
832   std::string expected[] = {"imm_key", "mem_key", "sst_key"};
833   for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) {
834     std::string key;
835     ASSERT_EQ(expected[i], iter->key());
836   }
837   ASSERT_EQ(3, i);
838   delete iter;
839   db_->ReleaseSnapshot(snapshot);
840 }
841 
842 #ifndef ROCKSDB_UBSAN_RUN
TEST_F(DBRangeDelTest,TailingIteratorRangeTombstoneUnsupported)843 TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) {
844   db_->Put(WriteOptions(), "key", "val");
845   // snapshot prevents key from being deleted during flush
846   const Snapshot* snapshot = db_->GetSnapshot();
847   ASSERT_OK(
848       db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
849 
850   // iterations check unsupported in memtable, l0, and then l1
851   for (int i = 0; i < 3; ++i) {
852     ReadOptions read_opts;
853     read_opts.tailing = true;
854     auto* iter = db_->NewIterator(read_opts);
855     if (i == 2) {
856       // For L1+, iterators over files are created on-demand, so need seek
857       iter->SeekToFirst();
858     }
859     ASSERT_TRUE(iter->status().IsNotSupported());
860     delete iter;
861     if (i == 0) {
862       ASSERT_OK(db_->Flush(FlushOptions()));
863     } else if (i == 1) {
864       MoveFilesToLevel(1);
865     }
866   }
867   db_->ReleaseSnapshot(snapshot);
868 }
869 
870 #endif  // !ROCKSDB_UBSAN_RUN
871 
TEST_F(DBRangeDelTest,SubcompactionHasEmptyDedicatedRangeDelFile)872 TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) {
873   const int kNumFiles = 2, kNumKeysPerFile = 4;
874   Options options = CurrentOptions();
875   options.compression = kNoCompression;
876   options.disable_auto_compactions = true;
877   options.level0_file_num_compaction_trigger = kNumFiles;
878   options.max_subcompactions = 2;
879   options.num_levels = 2;
880   options.target_file_size_base = 4096;
881   Reopen(options);
882 
883   // need a L1 file for subcompaction to be triggered
884   ASSERT_OK(
885       db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val"));
886   ASSERT_OK(db_->Flush(FlushOptions()));
887   MoveFilesToLevel(1);
888 
889   // put enough keys to fill up the first subcompaction, and later range-delete
890   // them so that the first subcompaction outputs no key-values. In that case
891   // it'll consider making an SST file dedicated to range deletions.
892   for (int i = 0; i < kNumKeysPerFile; ++i) {
893     ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i),
894                        std::string(1024, 'a')));
895   }
896   ASSERT_OK(db_->Flush(FlushOptions()));
897   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
898                              Key(kNumKeysPerFile)));
899 
900   // the above range tombstone can be dropped, so that one alone won't cause a
901   // dedicated file to be opened. We can make one protected by snapshot that
902   // must be considered. Make its range outside the first subcompaction's range
903   // to exercise the tricky part of the code.
904   const Snapshot* snapshot = db_->GetSnapshot();
905   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
906                              Key(kNumKeysPerFile + 1),
907                              Key(kNumKeysPerFile + 2)));
908   ASSERT_OK(db_->Flush(FlushOptions()));
909 
910   ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
911   ASSERT_EQ(1, NumTableFilesAtLevel(1));
912 
913   db_->EnableAutoCompaction({db_->DefaultColumnFamily()});
914   dbfull()->TEST_WaitForCompact();
915   db_->ReleaseSnapshot(snapshot);
916 }
917 
TEST_F(DBRangeDelTest,MemtableBloomFilter)918 TEST_F(DBRangeDelTest, MemtableBloomFilter) {
919   // regression test for #2743. the range delete tombstones in memtable should
920   // be added even when Get() skips searching due to its prefix bloom filter
921   const int kMemtableSize = 1 << 20;              // 1MB
922   const int kMemtablePrefixFilterSize = 1 << 13;  // 8KB
923   const int kNumKeys = 1000;
924   const int kPrefixLen = 8;
925   Options options = CurrentOptions();
926   options.memtable_prefix_bloom_size_ratio =
927       static_cast<double>(kMemtablePrefixFilterSize) / kMemtableSize;
928   options.prefix_extractor.reset(
929       ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen));
930   options.write_buffer_size = kMemtableSize;
931   Reopen(options);
932 
933   for (int i = 0; i < kNumKeys; ++i) {
934     ASSERT_OK(Put(Key(i), "val"));
935   }
936   Flush();
937   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
938                              Key(kNumKeys)));
939   for (int i = 0; i < kNumKeys; ++i) {
940     std::string value;
941     ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound());
942   }
943 }
944 
TEST_F(DBRangeDelTest,CompactionTreatsSplitInputLevelDeletionAtomically)945 TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
946   // This test originally verified that compaction treated files containing a
947   // split range deletion in the input level as an atomic unit. I.e.,
948   // compacting any input-level file(s) containing a portion of the range
949   // deletion causes all other input-level files containing portions of that
950   // same range deletion to be included in the compaction. Range deletion
951   // tombstones are now truncated to sstable boundaries which removed the need
952   // for that behavior (which could lead to excessively large
953   // compactions).
954   const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10;
955   Options options = CurrentOptions();
956   options.compression = kNoCompression;
957   options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
958   options.memtable_factory.reset(
959       new SpecialSkipListFactory(2 /* num_entries_flush */));
960   options.target_file_size_base = kValueBytes;
961   // i == 0: CompactFiles
962   // i == 1: CompactRange
963   // i == 2: automatic compaction
964   for (int i = 0; i < 3; ++i) {
965     DestroyAndReopen(options);
966 
967     ASSERT_OK(Put(Key(0), ""));
968     ASSERT_OK(db_->Flush(FlushOptions()));
969     MoveFilesToLevel(2);
970     ASSERT_EQ(1, NumTableFilesAtLevel(2));
971 
972     // snapshot protects range tombstone from dropping due to becoming obsolete.
973     const Snapshot* snapshot = db_->GetSnapshot();
974     db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
975                      Key(2 * kNumFilesPerLevel));
976 
977     Random rnd(301);
978     std::string value = RandomString(&rnd, kValueBytes);
979     for (int j = 0; j < kNumFilesPerLevel; ++j) {
980       // give files overlapping key-ranges to prevent trivial move
981       ASSERT_OK(Put(Key(j), value));
982       ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
983       if (j > 0) {
984         dbfull()->TEST_WaitForFlushMemTable();
985         ASSERT_EQ(j, NumTableFilesAtLevel(0));
986       }
987     }
988     // put extra key to trigger final flush
989     ASSERT_OK(Put("", ""));
990     dbfull()->TEST_WaitForFlushMemTable();
991     dbfull()->TEST_WaitForCompact();
992     ASSERT_EQ(0, NumTableFilesAtLevel(0));
993     ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1));
994 
995     ColumnFamilyMetaData meta;
996     db_->GetColumnFamilyMetaData(&meta);
997     if (i == 0) {
998       ASSERT_OK(db_->CompactFiles(
999           CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */));
1000       ASSERT_EQ(0, NumTableFilesAtLevel(1));
1001     } else if (i == 1) {
1002       auto begin_str = Key(0), end_str = Key(1);
1003       Slice begin = begin_str, end = end_str;
1004       ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end));
1005       ASSERT_EQ(3, NumTableFilesAtLevel(1));
1006     } else if (i == 2) {
1007       ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
1008                                 {{"max_bytes_for_level_base", "10000"}}));
1009       dbfull()->TEST_WaitForCompact();
1010       ASSERT_EQ(1, NumTableFilesAtLevel(1));
1011     }
1012     ASSERT_GT(NumTableFilesAtLevel(2), 0);
1013 
1014     db_->ReleaseSnapshot(snapshot);
1015   }
1016 }
1017 
TEST_F(DBRangeDelTest,RangeTombstoneEndKeyAsSstableUpperBound)1018 TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) {
1019   // Test the handling of the range-tombstone end-key as the
1020   // upper-bound for an sstable.
1021 
1022   const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10;
1023   Options options = CurrentOptions();
1024   options.compression = kNoCompression;
1025   options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
1026   options.memtable_factory.reset(
1027       new SpecialSkipListFactory(2 /* num_entries_flush */));
1028   options.target_file_size_base = kValueBytes;
1029   options.disable_auto_compactions = true;
1030 
1031   DestroyAndReopen(options);
1032 
1033   // Create an initial sstable at L2:
1034   //   [key000000#1,1, key000000#1,1]
1035   ASSERT_OK(Put(Key(0), ""));
1036   ASSERT_OK(db_->Flush(FlushOptions()));
1037   MoveFilesToLevel(2);
1038   ASSERT_EQ(1, NumTableFilesAtLevel(2));
1039 
1040   // A snapshot protects the range tombstone from dropping due to
1041   // becoming obsolete.
1042   const Snapshot* snapshot = db_->GetSnapshot();
1043   db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1044                    Key(0), Key(2 * kNumFilesPerLevel));
1045 
1046   // Create 2 additional sstables in L0. Note that the first sstable
1047   // contains the range tombstone.
1048   //   [key000000#3,1, key000004#72057594037927935,15]
1049   //   [key000001#5,1, key000002#6,1]
1050   Random rnd(301);
1051   std::string value = RandomString(&rnd, kValueBytes);
1052   for (int j = 0; j < kNumFilesPerLevel; ++j) {
1053     // Give files overlapping key-ranges to prevent a trivial move when we
1054     // compact from L0 to L1.
1055     ASSERT_OK(Put(Key(j), value));
1056     ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
1057     ASSERT_OK(db_->Flush(FlushOptions()));
1058     ASSERT_EQ(j + 1, NumTableFilesAtLevel(0));
1059   }
1060   // Compact the 2 L0 sstables to L1, resulting in the following LSM. There
1061   // are 2 sstables generated in L1 due to the target_file_size_base setting.
1062   //   L1:
1063   //     [key000000#3,1, key000002#72057594037927935,15]
1064   //     [key000002#6,1, key000004#72057594037927935,15]
1065   //   L2:
1066   //     [key000000#1,1, key000000#1,1]
1067   MoveFilesToLevel(1);
1068   ASSERT_EQ(2, NumTableFilesAtLevel(1));
1069 
1070   {
1071     // Compact the second sstable in L1:
1072     //   L1:
1073     //     [key000000#3,1, key000002#72057594037927935,15]
1074     //   L2:
1075     //     [key000000#1,1, key000000#1,1]
1076     //     [key000002#6,1, key000004#72057594037927935,15]
1077     //
1078     // At the same time, verify the compaction does not cause the key at the
1079     // endpoint (key000002#6,1) to disappear.
1080     ASSERT_EQ(value, Get(Key(2)));
1081     auto begin_str = Key(3);
1082     const ROCKSDB_NAMESPACE::Slice begin = begin_str;
1083     dbfull()->TEST_CompactRange(1, &begin, nullptr);
1084     ASSERT_EQ(1, NumTableFilesAtLevel(1));
1085     ASSERT_EQ(2, NumTableFilesAtLevel(2));
1086     ASSERT_EQ(value, Get(Key(2)));
1087   }
1088 
1089   {
1090     // Compact the first sstable in L1. This should be copacetic, but
1091     // was previously resulting in overlapping sstables in L2 due to
1092     // mishandling of the range tombstone end-key when used as the
1093     // largest key for an sstable. The resulting LSM structure should
1094     // be:
1095     //
1096     //   L2:
1097     //     [key000000#1,1, key000001#72057594037927935,15]
1098     //     [key000001#5,1, key000002#72057594037927935,15]
1099     //     [key000002#6,1, key000004#72057594037927935,15]
1100     auto begin_str = Key(0);
1101     const ROCKSDB_NAMESPACE::Slice begin = begin_str;
1102     dbfull()->TEST_CompactRange(1, &begin, &begin);
1103     ASSERT_EQ(0, NumTableFilesAtLevel(1));
1104     ASSERT_EQ(3, NumTableFilesAtLevel(2));
1105   }
1106 
1107   db_->ReleaseSnapshot(snapshot);
1108 }
1109 
TEST_F(DBRangeDelTest,UnorderedTombstones)1110 TEST_F(DBRangeDelTest, UnorderedTombstones) {
1111   // Regression test for #2752. Range delete tombstones between
1112   // different snapshot stripes are not stored in order, so the first
1113   // tombstone of each snapshot stripe should be checked as a smallest
1114   // candidate.
1115   Options options = CurrentOptions();
1116   DestroyAndReopen(options);
1117 
1118   auto cf = db_->DefaultColumnFamily();
1119 
1120   ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a"));
1121   ASSERT_OK(db_->Flush(FlushOptions(), cf));
1122   ASSERT_EQ(1, NumTableFilesAtLevel(0));
1123   ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
1124   ASSERT_EQ(1, NumTableFilesAtLevel(1));
1125 
1126   ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c"));
1127   // Hold a snapshot to separate these two delete ranges.
1128   auto snapshot = db_->GetSnapshot();
1129   ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b"));
1130   ASSERT_OK(db_->Flush(FlushOptions(), cf));
1131   db_->ReleaseSnapshot(snapshot);
1132 
1133   std::vector<std::vector<FileMetaData>> files;
1134   dbfull()->TEST_GetFilesMetaData(cf, &files);
1135   ASSERT_EQ(1, files[0].size());
1136   ASSERT_EQ("a", files[0][0].smallest.user_key());
1137   ASSERT_EQ("c", files[0][0].largest.user_key());
1138 
1139   std::string v;
1140   auto s = db_->Get(ReadOptions(), "a", &v);
1141   ASSERT_TRUE(s.IsNotFound());
1142 }
1143 
1144 class MockMergeOperator : public MergeOperator {
1145   // Mock non-associative operator. Non-associativity is expressed by lack of
1146   // implementation for any `PartialMerge*` functions.
1147  public:
FullMergeV2(const MergeOperationInput & merge_in,MergeOperationOutput * merge_out) const1148   bool FullMergeV2(const MergeOperationInput& merge_in,
1149                    MergeOperationOutput* merge_out) const override {
1150     assert(merge_out != nullptr);
1151     merge_out->new_value = merge_in.operand_list.back().ToString();
1152     return true;
1153   }
1154 
Name() const1155   const char* Name() const override { return "MockMergeOperator"; }
1156 };
1157 
TEST_F(DBRangeDelTest,KeyAtOverlappingEndpointReappears)1158 TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) {
1159   // This test uses a non-associative merge operator since that is a convenient
1160   // way to get compaction to write out files with overlapping user-keys at the
1161   // endpoints. Note, however, overlapping endpoints can also occur with other
1162   // value types (Put, etc.), assuming the right snapshots are present.
1163   const int kFileBytes = 1 << 20;
1164   const int kValueBytes = 1 << 10;
1165   const int kNumFiles = 4;
1166 
1167   Options options = CurrentOptions();
1168   options.compression = kNoCompression;
1169   options.disable_auto_compactions = true;
1170   options.merge_operator.reset(new MockMergeOperator());
1171   options.target_file_size_base = kFileBytes;
1172   Reopen(options);
1173 
1174   // Push dummy data to L3 so that our actual test files on L0-L2
1175   // will not be considered "bottommost" level, otherwise compaction
1176   // may prevent us from creating overlapping user keys
1177   // as on the bottommost layer MergeHelper
1178   ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy"));
1179   ASSERT_OK(db_->Flush(FlushOptions()));
1180   MoveFilesToLevel(3);
1181 
1182   Random rnd(301);
1183   const Snapshot* snapshot = nullptr;
1184   for (int i = 0; i < kNumFiles; ++i) {
1185     for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
1186       auto value = RandomString(&rnd, kValueBytes);
1187       ASSERT_OK(db_->Merge(WriteOptions(), "key", value));
1188     }
1189     if (i == kNumFiles - 1) {
1190       // Take snapshot to prevent covered merge operands from being dropped by
1191       // compaction.
1192       snapshot = db_->GetSnapshot();
1193       // The DeleteRange is the last write so all merge operands are covered.
1194       ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1195                                  "key", "key_"));
1196     }
1197     ASSERT_OK(db_->Flush(FlushOptions()));
1198   }
1199   ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1200   std::string value;
1201   ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1202 
1203   dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */,
1204                               nullptr /* end */, nullptr /* column_family */,
1205                               true /* disallow_trivial_move */);
1206   ASSERT_EQ(0, NumTableFilesAtLevel(0));
1207   // Now we have multiple files at L1 all containing a single user key, thus
1208   // guaranteeing overlap in the file endpoints.
1209   ASSERT_GT(NumTableFilesAtLevel(1), 1);
1210 
1211   // Verify no merge operands reappeared after the compaction.
1212   ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1213 
1214   // Compact and verify again. It's worthwhile because now the files have
1215   // tighter endpoints, so we can verify that doesn't mess anything up.
1216   dbfull()->TEST_CompactRange(1 /* level */, nullptr /* begin */,
1217                               nullptr /* end */, nullptr /* column_family */,
1218                               true /* disallow_trivial_move */);
1219   ASSERT_GT(NumTableFilesAtLevel(2), 1);
1220   ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1221 
1222   db_->ReleaseSnapshot(snapshot);
1223 }
1224 
TEST_F(DBRangeDelTest,UntruncatedTombstoneDoesNotDeleteNewerKey)1225 TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) {
1226   // Verify a key newer than a range tombstone cannot be deleted by being
1227   // compacted to the bottom level (and thus having its seqnum zeroed) before
1228   // the range tombstone. This used to happen when range tombstones were
1229   // untruncated on reads such that they extended past their file boundaries.
1230   //
1231   // Test summary:
1232   //
1233   // - L1 is bottommost.
1234   // - A couple snapshots are strategically taken to prevent seqnums from being
1235   //   zeroed, range tombstone from being dropped, merge operands from being
1236   //   dropped, and merge operands from being combined.
1237   // - Left half of files in L1 all have same user key, ensuring their file
1238   //   boundaries overlap. In the past this would cause range tombstones to be
1239   //   untruncated.
1240   // - Right half of L1 files all have different keys, ensuring no overlap.
1241   // - A range tombstone spans all L1 keys, so it is stored in every L1 file.
1242   // - Keys in the right side of the key-range are overwritten. These are
1243   //   compacted down to L1 after releasing snapshots such that their seqnums
1244   //   will be zeroed.
1245   // - A full range scan is performed. If the tombstone in the left L1 files
1246   //   were untruncated, it would now cover keys newer than it (but with zeroed
1247   //   seqnums) in the right L1 files.
1248   const int kFileBytes = 1 << 20;
1249   const int kValueBytes = 1 << 10;
1250   const int kNumFiles = 4;
1251   const int kMaxKey = kNumFiles* kFileBytes / kValueBytes;
1252   const int kKeysOverwritten = 10;
1253 
1254   Options options = CurrentOptions();
1255   options.compression = kNoCompression;
1256   options.disable_auto_compactions = true;
1257   options.merge_operator.reset(new MockMergeOperator());
1258   options.num_levels = 2;
1259   options.target_file_size_base = kFileBytes;
1260   Reopen(options);
1261 
1262   Random rnd(301);
1263   // - snapshots[0] prevents merge operands from being combined during
1264   //   compaction.
1265   // - snapshots[1] prevents merge operands from being dropped due to the
1266   //   covering range tombstone.
1267   const Snapshot* snapshots[] = {nullptr, nullptr};
1268   for (int i = 0; i < kNumFiles; ++i) {
1269     for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
1270       auto value = RandomString(&rnd, kValueBytes);
1271       std::string key;
1272       if (i < kNumFiles / 2) {
1273         key = Key(0);
1274       } else {
1275         key = Key(1 + i * kFileBytes / kValueBytes + j);
1276       }
1277       ASSERT_OK(db_->Merge(WriteOptions(), key, value));
1278     }
1279     if (i == 0) {
1280       snapshots[0] = db_->GetSnapshot();
1281     }
1282     if (i == kNumFiles - 1) {
1283       snapshots[1] = db_->GetSnapshot();
1284       // The DeleteRange is the last write so all merge operands are covered.
1285       ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1286                                  Key(0), Key(kMaxKey + 1)));
1287     }
1288     ASSERT_OK(db_->Flush(FlushOptions()));
1289   }
1290   ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1291 
1292   auto get_key_count = [this]() -> int {
1293     auto* iter = db_->NewIterator(ReadOptions());
1294     iter->SeekToFirst();
1295     int keys_found = 0;
1296     for (; iter->Valid(); iter->Next()) {
1297       ++keys_found;
1298     }
1299     delete iter;
1300     return keys_found;
1301   };
1302 
1303   // All keys should be covered
1304   ASSERT_EQ(0, get_key_count());
1305 
1306   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1307                               nullptr /* end_key */));
1308   ASSERT_EQ(0, NumTableFilesAtLevel(0));
1309   // Roughly the left half of L1 files should have overlapping boundary keys,
1310   // while the right half should not.
1311   ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
1312 
1313   // Now overwrite a few keys that are in L1 files that definitely don't have
1314   // overlapping boundary keys.
1315   for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) {
1316     auto value = RandomString(&rnd, kValueBytes);
1317     ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value));
1318   }
1319   ASSERT_OK(db_->Flush(FlushOptions()));
1320 
1321   // The overwritten keys are in L0 now, so clearly aren't covered by the range
1322   // tombstone in L1.
1323   ASSERT_EQ(kKeysOverwritten, get_key_count());
1324 
1325   // Release snapshots so seqnums can be zeroed when L0->L1 happens.
1326   db_->ReleaseSnapshot(snapshots[0]);
1327   db_->ReleaseSnapshot(snapshots[1]);
1328 
1329   auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1);
1330   auto end_key_storage = Key(kMaxKey);
1331   Slice begin_key(begin_key_storage);
1332   Slice end_key(end_key_storage);
1333   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key));
1334   ASSERT_EQ(0, NumTableFilesAtLevel(0));
1335   ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
1336 
1337   ASSERT_EQ(kKeysOverwritten, get_key_count());
1338 }
1339 
TEST_F(DBRangeDelTest,DeletedMergeOperandReappearsIterPrev)1340 TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) {
1341   // Exposes a bug where we were using
1342   // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands
1343   // in the forward direction. Confusingly, this case happened during
1344   // `DBIter::Prev`. It could cause assertion failure, or reappearing keys.
1345   const int kFileBytes = 1 << 20;
1346   const int kValueBytes = 1 << 10;
1347   // Need multiple keys so we can get results when calling `Prev()` after
1348   // `SeekToLast()`.
1349   const int kNumKeys = 3;
1350   const int kNumFiles = 4;
1351 
1352   Options options = CurrentOptions();
1353   options.compression = kNoCompression;
1354   options.disable_auto_compactions = true;
1355   options.merge_operator.reset(new MockMergeOperator());
1356   options.target_file_size_base = kFileBytes;
1357   Reopen(options);
1358 
1359   Random rnd(301);
1360   const Snapshot* snapshot = nullptr;
1361   for (int i = 0; i < kNumFiles; ++i) {
1362     for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
1363       auto value = RandomString(&rnd, kValueBytes);
1364       ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value));
1365       if (i == 0 && j == kNumKeys) {
1366         // Take snapshot to prevent covered merge operands from being dropped or
1367         // merged by compaction.
1368         snapshot = db_->GetSnapshot();
1369         // Do a DeleteRange near the beginning so only the oldest merge operand
1370         // for each key is covered. This ensures the sequence of events:
1371         //
1372         // - `DBIter::Prev()` is called
1373         // - After several same versions of the same user key are encountered,
1374         //   it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`.
1375         // - Binary searches to the newest version of the key, which is in the
1376         //   leftmost file containing the user key.
1377         // - Scans forwards to collect all merge operands. Eventually reaches
1378         //   the rightmost file containing the oldest merge operand, which
1379         //   should be covered by the `DeleteRange`. If `RangeDelAggregator`
1380         //   were not properly using `kForwardTraversal` here, that operand
1381         //   would reappear.
1382         ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1383                                    Key(0), Key(kNumKeys + 1)));
1384       }
1385     }
1386     ASSERT_OK(db_->Flush(FlushOptions()));
1387   }
1388   ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1389 
1390   ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1391                               nullptr /* end_key */));
1392   ASSERT_EQ(0, NumTableFilesAtLevel(0));
1393   ASSERT_GT(NumTableFilesAtLevel(1), 1);
1394 
1395   auto* iter = db_->NewIterator(ReadOptions());
1396   iter->SeekToLast();
1397   int keys_found = 0;
1398   for (; iter->Valid(); iter->Prev()) {
1399     ++keys_found;
1400   }
1401   delete iter;
1402   ASSERT_EQ(kNumKeys, keys_found);
1403 
1404   db_->ReleaseSnapshot(snapshot);
1405 }
1406 
TEST_F(DBRangeDelTest,SnapshotPreventsDroppedKeys)1407 TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) {
1408   const int kFileBytes = 1 << 20;
1409 
1410   Options options = CurrentOptions();
1411   options.compression = kNoCompression;
1412   options.disable_auto_compactions = true;
1413   options.target_file_size_base = kFileBytes;
1414   Reopen(options);
1415 
1416   ASSERT_OK(Put(Key(0), "a"));
1417   const Snapshot* snapshot = db_->GetSnapshot();
1418 
1419   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1420                              Key(10)));
1421 
1422   db_->Flush(FlushOptions());
1423 
1424   ReadOptions read_opts;
1425   read_opts.snapshot = snapshot;
1426   auto* iter = db_->NewIterator(read_opts);
1427 
1428   iter->SeekToFirst();
1429   ASSERT_TRUE(iter->Valid());
1430   ASSERT_EQ(Key(0), iter->key());
1431 
1432   iter->Next();
1433   ASSERT_FALSE(iter->Valid());
1434 
1435   delete iter;
1436   db_->ReleaseSnapshot(snapshot);
1437 }
1438 
TEST_F(DBRangeDelTest,SnapshotPreventsDroppedKeysInImmMemTables)1439 TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) {
1440   const int kFileBytes = 1 << 20;
1441 
1442   Options options = CurrentOptions();
1443   options.compression = kNoCompression;
1444   options.disable_auto_compactions = true;
1445   options.target_file_size_base = kFileBytes;
1446   Reopen(options);
1447 
1448   // block flush thread -> pin immtables in memory
1449   SyncPoint::GetInstance()->DisableProcessing();
1450   SyncPoint::GetInstance()->LoadDependency({
1451       {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator",
1452        "DBImpl::BGWorkFlush"},
1453   });
1454   SyncPoint::GetInstance()->EnableProcessing();
1455 
1456   ASSERT_OK(Put(Key(0), "a"));
1457   std::unique_ptr<const Snapshot, std::function<void(const Snapshot*)>>
1458       snapshot(db_->GetSnapshot(),
1459                [this](const Snapshot* s) { db_->ReleaseSnapshot(s); });
1460 
1461   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1462                              Key(10)));
1463 
1464   ASSERT_OK(dbfull()->TEST_SwitchMemtable());
1465 
1466   ReadOptions read_opts;
1467   read_opts.snapshot = snapshot.get();
1468   std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
1469 
1470   TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator");
1471 
1472   iter->SeekToFirst();
1473   ASSERT_TRUE(iter->Valid());
1474   ASSERT_EQ(Key(0), iter->key());
1475 
1476   iter->Next();
1477   ASSERT_FALSE(iter->Valid());
1478 }
1479 
TEST_F(DBRangeDelTest,RangeTombstoneWrittenToMinimalSsts)1480 TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) {
1481   // Adapted from
1482   // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398.
1483   // Regression test for issue where range tombstone was written to more files
1484   // than necessary when it began exactly at the begin key in the next
1485   // compaction output file.
1486   const int kFileBytes = 1 << 20;
1487   const int kValueBytes = 4 << 10;
1488   Options options = CurrentOptions();
1489   options.compression = kNoCompression;
1490   options.disable_auto_compactions = true;
1491   // Have a bit of slack in the size limits but we enforce them more strictly
1492   // when manually flushing/compacting.
1493   options.max_compaction_bytes = 2 * kFileBytes;
1494   options.target_file_size_base = 2 * kFileBytes;
1495   options.write_buffer_size = 2 * kFileBytes;
1496   Reopen(options);
1497 
1498   Random rnd(301);
1499   for (char first_char : {'a', 'b', 'c'}) {
1500     for (int i = 0; i < kFileBytes / kValueBytes; ++i) {
1501       std::string key(1, first_char);
1502       key.append(Key(i));
1503       std::string value = RandomString(&rnd, kValueBytes);
1504       ASSERT_OK(Put(key, value));
1505     }
1506     db_->Flush(FlushOptions());
1507     MoveFilesToLevel(2);
1508   }
1509   ASSERT_EQ(0, NumTableFilesAtLevel(0));
1510   ASSERT_EQ(3, NumTableFilesAtLevel(2));
1511 
1512   // Populate the memtable lightly while spanning the whole key-space. The
1513   // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple
1514   // files to prevent a large L1->L2 compaction later.
1515   ASSERT_OK(Put("a", "val"));
1516   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1517                              "c" + Key(1), "d"));
1518   // Our compaction output file cutting logic currently only considers point
1519   // keys. So, in order for the range tombstone to have a chance at landing at
1520   // the start of a new file, we need a point key at the range tombstone's
1521   // start.
1522   // TODO(ajkr): remove this `Put` after file cutting accounts for range
1523   // tombstones (#3977).
1524   ASSERT_OK(Put("c" + Key(1), "value"));
1525   db_->Flush(FlushOptions());
1526 
1527   // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone
1528   // and the range tombstone is only placed in the second SST.
1529   std::string begin_key_storage("c" + Key(1));
1530   Slice begin_key(begin_key_storage);
1531   std::string end_key_storage("d");
1532   Slice end_key(end_key_storage);
1533   dbfull()->TEST_CompactRange(0 /* level */, &begin_key /* begin */,
1534                               &end_key /* end */, nullptr /* column_family */,
1535                               true /* disallow_trivial_move */);
1536   ASSERT_EQ(2, NumTableFilesAtLevel(1));
1537 
1538   std::vector<LiveFileMetaData> all_metadata;
1539   std::vector<LiveFileMetaData> l1_metadata;
1540   db_->GetLiveFilesMetaData(&all_metadata);
1541   for (const auto& metadata : all_metadata) {
1542     if (metadata.level == 1) {
1543       l1_metadata.push_back(metadata);
1544     }
1545   }
1546   std::sort(l1_metadata.begin(), l1_metadata.end(),
1547             [&](const LiveFileMetaData& a, const LiveFileMetaData& b) {
1548               return options.comparator->Compare(a.smallestkey, b.smallestkey) <
1549                      0;
1550             });
1551   ASSERT_EQ("a", l1_metadata[0].smallestkey);
1552   ASSERT_EQ("a", l1_metadata[0].largestkey);
1553   ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey);
1554   ASSERT_EQ("d", l1_metadata[1].largestkey);
1555 
1556   TablePropertiesCollection all_table_props;
1557   ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props));
1558   int64_t num_range_deletions = 0;
1559   for (const auto& name_and_table_props : all_table_props) {
1560     const auto& name = name_and_table_props.first;
1561     const auto& table_props = name_and_table_props.second;
1562     // The range tombstone should only be output to the second L1 SST.
1563     if (name.size() >= l1_metadata[1].name.size() &&
1564         name.substr(name.size() - l1_metadata[1].name.size()).compare(l1_metadata[1].name) == 0) {
1565       ASSERT_EQ(1, table_props->num_range_deletions);
1566       ++num_range_deletions;
1567     } else {
1568       ASSERT_EQ(0, table_props->num_range_deletions);
1569     }
1570   }
1571   ASSERT_EQ(1, num_range_deletions);
1572 }
1573 
TEST_F(DBRangeDelTest,OverlappedTombstones)1574 TEST_F(DBRangeDelTest, OverlappedTombstones) {
1575   const int kNumPerFile = 4, kNumFiles = 2;
1576   Options options = CurrentOptions();
1577   options.disable_auto_compactions = true;
1578   options.max_compaction_bytes = 9 * 1024;
1579   DestroyAndReopen(options);
1580   Random rnd(301);
1581   for (int i = 0; i < kNumFiles; ++i) {
1582     std::vector<std::string> values;
1583     // Write 12K (4 values, each 3K)
1584     for (int j = 0; j < kNumPerFile; j++) {
1585       values.push_back(RandomString(&rnd, 3 << 10));
1586       ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1587     }
1588   }
1589   ASSERT_OK(db_->Flush(FlushOptions()));
1590   ASSERT_EQ(1, NumTableFilesAtLevel(0));
1591   MoveFilesToLevel(2);
1592   ASSERT_EQ(2, NumTableFilesAtLevel(2));
1593 
1594   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1),
1595                              Key((kNumFiles)*kNumPerFile + 1)));
1596   ASSERT_OK(db_->Flush(FlushOptions()));
1597 
1598   ASSERT_EQ(1, NumTableFilesAtLevel(0));
1599 
1600   dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1601                               true /* disallow_trivial_move */);
1602 
1603   // The tombstone range is not broken up into multiple SSTs which may incur a
1604   // large compaction with L2.
1605   ASSERT_EQ(1, NumTableFilesAtLevel(1));
1606   std::vector<std::vector<FileMetaData>> files;
1607   dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1608                               true /* disallow_trivial_move */);
1609   ASSERT_EQ(1, NumTableFilesAtLevel(2));
1610   ASSERT_EQ(0, NumTableFilesAtLevel(1));
1611 }
1612 
TEST_F(DBRangeDelTest,OverlappedKeys)1613 TEST_F(DBRangeDelTest, OverlappedKeys) {
1614   const int kNumPerFile = 4, kNumFiles = 2;
1615   Options options = CurrentOptions();
1616   options.disable_auto_compactions = true;
1617   options.max_compaction_bytes = 9 * 1024;
1618   DestroyAndReopen(options);
1619   Random rnd(301);
1620   for (int i = 0; i < kNumFiles; ++i) {
1621     std::vector<std::string> values;
1622     // Write 12K (4 values, each 3K)
1623     for (int j = 0; j < kNumPerFile; j++) {
1624       values.push_back(RandomString(&rnd, 3 << 10));
1625       ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1626     }
1627   }
1628   ASSERT_OK(db_->Flush(FlushOptions()));
1629   ASSERT_EQ(1, NumTableFilesAtLevel(0));
1630   MoveFilesToLevel(2);
1631   ASSERT_EQ(2, NumTableFilesAtLevel(2));
1632 
1633   for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) {
1634     ASSERT_OK(Put(Key(i), "0x123"));
1635   }
1636   ASSERT_OK(db_->Flush(FlushOptions()));
1637   ASSERT_EQ(1, NumTableFilesAtLevel(0));
1638 
1639   // The key range is broken up into three SSTs to avoid a future big compaction
1640   // with the grandparent
1641   dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1642                               true /* disallow_trivial_move */);
1643   ASSERT_EQ(3, NumTableFilesAtLevel(1));
1644 
1645   std::vector<std::vector<FileMetaData>> files;
1646   dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1647                               true /* disallow_trivial_move */);
1648   ASSERT_EQ(1, NumTableFilesAtLevel(2));
1649   ASSERT_EQ(0, NumTableFilesAtLevel(1));
1650 }
1651 
1652 #endif  // ROCKSDB_LITE
1653 
1654 }  // namespace ROCKSDB_NAMESPACE
1655 
main(int argc,char ** argv)1656 int main(int argc, char** argv) {
1657   ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1658   ::testing::InitGoogleTest(&argc, argv);
1659   return RUN_ALL_TESTS();
1660 }
1661