1 // Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #include "db/db_test_util.h"
7 #include "port/stack_trace.h"
8 #include "rocksdb/utilities/write_batch_with_index.h"
9 #include "test_util/testutil.h"
10 #include "utilities/merge_operators.h"
11
12 namespace ROCKSDB_NAMESPACE {
13
14 class DBRangeDelTest : public DBTestBase {
15 public:
DBRangeDelTest()16 DBRangeDelTest() : DBTestBase("/db_range_del_test") {}
17
GetNumericStr(int key)18 std::string GetNumericStr(int key) {
19 uint64_t uint64_key = static_cast<uint64_t>(key);
20 std::string str;
21 str.resize(8);
22 memcpy(&str[0], static_cast<void*>(&uint64_key), 8);
23 return str;
24 }
25 };
26
27 // PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not
28 // supported in ROCKSDB_LITE
29 #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest,NonBlockBasedTableNotSupported)30 TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) {
31 // TODO: figure out why MmapReads trips the iterator pinning assertion in
32 // RangeDelAggregator. Ideally it would be supported; otherwise it should at
33 // least be explicitly unsupported.
34 for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) {
35 option_config_ = config;
36 DestroyAndReopen(CurrentOptions());
37 ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
38 "dr1", "dr1")
39 .IsNotSupported());
40 }
41 }
42
TEST_F(DBRangeDelTest,WriteBatchWithIndexNotSupported)43 TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) {
44 WriteBatchWithIndex indexedBatch{};
45 ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1")
46 .IsNotSupported());
47 ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported());
48 }
49
TEST_F(DBRangeDelTest,FlushOutputHasOnlyRangeTombstones)50 TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) {
51 do {
52 DestroyAndReopen(CurrentOptions());
53 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
54 "dr1", "dr2"));
55 ASSERT_OK(db_->Flush(FlushOptions()));
56 ASSERT_EQ(1, NumTableFilesAtLevel(0));
57 } while (ChangeOptions(kRangeDelSkipConfigs));
58 }
59
TEST_F(DBRangeDelTest,CompactionOutputHasOnlyRangeTombstone)60 TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) {
61 do {
62 Options opts = CurrentOptions();
63 opts.disable_auto_compactions = true;
64 opts.statistics = CreateDBStatistics();
65 DestroyAndReopen(opts);
66
67 // snapshot protects range tombstone from dropping due to becoming obsolete.
68 const Snapshot* snapshot = db_->GetSnapshot();
69 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z");
70 db_->Flush(FlushOptions());
71
72 ASSERT_EQ(1, NumTableFilesAtLevel(0));
73 ASSERT_EQ(0, NumTableFilesAtLevel(1));
74 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
75 true /* disallow_trivial_move */);
76 ASSERT_EQ(0, NumTableFilesAtLevel(0));
77 ASSERT_EQ(1, NumTableFilesAtLevel(1));
78 ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
79 db_->ReleaseSnapshot(snapshot);
80 // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled
81 // compactions as the above assertions about the number of files in a level
82 // do not hold true.
83 } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction |
84 kSkipFIFOCompaction));
85 }
86
TEST_F(DBRangeDelTest,CompactionOutputFilesExactlyFilled)87 TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
88 // regression test for exactly filled compaction output files. Previously
89 // another file would be generated containing all range deletions, which
90 // could invalidate the non-overlapping file boundary invariant.
91 const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10;
92 Options options = CurrentOptions();
93 options.disable_auto_compactions = true;
94 options.level0_file_num_compaction_trigger = kNumFiles;
95 options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
96 options.num_levels = 2;
97 options.target_file_size_base = kFileBytes;
98 BlockBasedTableOptions table_options;
99 table_options.block_size_deviation = 50; // each block holds two keys
100 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
101 Reopen(options);
102
103 // snapshot protects range tombstone from dropping due to becoming obsolete.
104 const Snapshot* snapshot = db_->GetSnapshot();
105 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(1));
106
107 Random rnd(301);
108 for (int i = 0; i < kNumFiles; ++i) {
109 std::vector<std::string> values;
110 // Write 12K (4 values, each 3K)
111 for (int j = 0; j < kNumPerFile; j++) {
112 values.push_back(RandomString(&rnd, 3 << 10));
113 ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
114 if (j == 0 && i > 0) {
115 dbfull()->TEST_WaitForFlushMemTable();
116 }
117 }
118 }
119 // put extra key to trigger final flush
120 ASSERT_OK(Put("", ""));
121 dbfull()->TEST_WaitForFlushMemTable();
122 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
123 ASSERT_EQ(0, NumTableFilesAtLevel(1));
124
125 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
126 true /* disallow_trivial_move */);
127 ASSERT_EQ(0, NumTableFilesAtLevel(0));
128 ASSERT_EQ(2, NumTableFilesAtLevel(1));
129 db_->ReleaseSnapshot(snapshot);
130 }
131
TEST_F(DBRangeDelTest,MaxCompactionBytesCutsOutputFiles)132 TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) {
133 // Ensures range deletion spanning multiple compaction output files that are
134 // cut by max_compaction_bytes will have non-overlapping key-ranges.
135 // https://github.com/facebook/rocksdb/issues/1778
136 const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12;
137 Options opts = CurrentOptions();
138 opts.comparator = test::Uint64Comparator();
139 opts.disable_auto_compactions = true;
140 opts.level0_file_num_compaction_trigger = kNumFiles;
141 opts.max_compaction_bytes = kNumPerFile * kBytesPerVal;
142 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
143 // Want max_compaction_bytes to trigger the end of compaction output file, not
144 // target_file_size_base, so make the latter much bigger
145 opts.target_file_size_base = 100 * opts.max_compaction_bytes;
146 Reopen(opts);
147
148 // snapshot protects range tombstone from dropping due to becoming obsolete.
149 const Snapshot* snapshot = db_->GetSnapshot();
150
151 // It spans the whole key-range, thus will be included in all output files
152 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
153 GetNumericStr(0),
154 GetNumericStr(kNumFiles * kNumPerFile - 1)));
155 Random rnd(301);
156 for (int i = 0; i < kNumFiles; ++i) {
157 std::vector<std::string> values;
158 // Write 1MB (256 values, each 4K)
159 for (int j = 0; j < kNumPerFile; j++) {
160 values.push_back(RandomString(&rnd, kBytesPerVal));
161 ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j]));
162 }
163 // extra entry to trigger SpecialSkipListFactory's flush
164 ASSERT_OK(Put(GetNumericStr(kNumPerFile), ""));
165 dbfull()->TEST_WaitForFlushMemTable();
166 ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
167 }
168
169 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
170 true /* disallow_trivial_move */);
171 ASSERT_EQ(0, NumTableFilesAtLevel(0));
172 ASSERT_GE(NumTableFilesAtLevel(1), 2);
173
174 std::vector<std::vector<FileMetaData>> files;
175 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
176
177 for (size_t i = 0; i < files[1].size() - 1; ++i) {
178 ASSERT_TRUE(InternalKeyComparator(opts.comparator)
179 .Compare(files[1][i].largest, files[1][i + 1].smallest) <
180 0);
181 }
182 db_->ReleaseSnapshot(snapshot);
183 }
184
TEST_F(DBRangeDelTest,SentinelsOmittedFromOutputFile)185 TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) {
186 // Regression test for bug where sentinel range deletions (i.e., ones with
187 // sequence number of zero) were included in output files.
188 // snapshot protects range tombstone from dropping due to becoming obsolete.
189 const Snapshot* snapshot = db_->GetSnapshot();
190
191 // gaps between ranges creates sentinels in our internal representation
192 std::vector<std::pair<std::string, std::string>> range_dels = {{"a", "b"}, {"c", "d"}, {"e", "f"}};
193 for (const auto& range_del : range_dels) {
194 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
195 range_del.first, range_del.second));
196 }
197 ASSERT_OK(db_->Flush(FlushOptions()));
198 ASSERT_EQ(1, NumTableFilesAtLevel(0));
199
200 std::vector<std::vector<FileMetaData>> files;
201 dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
202 ASSERT_GT(files[0][0].fd.smallest_seqno, 0);
203
204 db_->ReleaseSnapshot(snapshot);
205 }
206
TEST_F(DBRangeDelTest,FlushRangeDelsSameStartKey)207 TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) {
208 db_->Put(WriteOptions(), "b1", "val");
209 ASSERT_OK(
210 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
211 db_->Put(WriteOptions(), "b2", "val");
212 ASSERT_OK(
213 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
214 // first iteration verifies query correctness in memtable, second verifies
215 // query correctness for a single SST file
216 for (int i = 0; i < 2; ++i) {
217 if (i > 0) {
218 ASSERT_OK(db_->Flush(FlushOptions()));
219 ASSERT_EQ(1, NumTableFilesAtLevel(0));
220 }
221 std::string value;
222 ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
223 ASSERT_OK(db_->Get(ReadOptions(), "b2", &value));
224 }
225 }
226
TEST_F(DBRangeDelTest,CompactRangeDelsSameStartKey)227 TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) {
228 db_->Put(WriteOptions(), "unused", "val"); // prevents empty after compaction
229 db_->Put(WriteOptions(), "b1", "val");
230 ASSERT_OK(db_->Flush(FlushOptions()));
231 ASSERT_OK(
232 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
233 ASSERT_OK(db_->Flush(FlushOptions()));
234 ASSERT_OK(
235 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
236 ASSERT_OK(db_->Flush(FlushOptions()));
237 ASSERT_EQ(3, NumTableFilesAtLevel(0));
238
239 for (int i = 0; i < 2; ++i) {
240 if (i > 0) {
241 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
242 true /* disallow_trivial_move */);
243 ASSERT_EQ(0, NumTableFilesAtLevel(0));
244 ASSERT_EQ(1, NumTableFilesAtLevel(1));
245 }
246 std::string value;
247 ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
248 }
249 }
250 #endif // ROCKSDB_LITE
251
TEST_F(DBRangeDelTest,FlushRemovesCoveredKeys)252 TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) {
253 const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250;
254 Options opts = CurrentOptions();
255 opts.comparator = test::Uint64Comparator();
256 Reopen(opts);
257
258 // Write a third before snapshot, a third between snapshot and tombstone, and
259 // a third after the tombstone. Keys older than snapshot or newer than the
260 // tombstone should be preserved.
261 const Snapshot* snapshot = nullptr;
262 for (int i = 0; i < kNum; ++i) {
263 if (i == kNum / 3) {
264 snapshot = db_->GetSnapshot();
265 } else if (i == 2 * kNum / 3) {
266 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
267 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
268 }
269 db_->Put(WriteOptions(), GetNumericStr(i), "val");
270 }
271 db_->Flush(FlushOptions());
272
273 for (int i = 0; i < kNum; ++i) {
274 ReadOptions read_opts;
275 read_opts.ignore_range_deletions = true;
276 std::string value;
277 if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) {
278 ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value));
279 } else {
280 ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound());
281 }
282 }
283 db_->ReleaseSnapshot(snapshot);
284 }
285
286 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
287 #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest,CompactionRemovesCoveredKeys)288 TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) {
289 const int kNumPerFile = 100, kNumFiles = 4;
290 Options opts = CurrentOptions();
291 opts.comparator = test::Uint64Comparator();
292 opts.disable_auto_compactions = true;
293 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
294 opts.num_levels = 2;
295 opts.statistics = CreateDBStatistics();
296 Reopen(opts);
297
298 for (int i = 0; i < kNumFiles; ++i) {
299 if (i > 0) {
300 // range tombstone covers first half of the previous file
301 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
302 GetNumericStr((i - 1) * kNumPerFile),
303 GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2));
304 }
305 // Make sure a given key appears in each file so compaction won't be able to
306 // use trivial move, which would happen if the ranges were non-overlapping.
307 // Also, we need an extra element since flush is only triggered when the
308 // number of keys is one greater than SpecialSkipListFactory's limit.
309 // We choose a key outside the key-range used by the test to avoid conflict.
310 db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), "val");
311
312 for (int j = 0; j < kNumPerFile; ++j) {
313 db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val");
314 }
315 dbfull()->TEST_WaitForFlushMemTable();
316 ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
317 }
318 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
319 ASSERT_EQ(0, NumTableFilesAtLevel(0));
320 ASSERT_GT(NumTableFilesAtLevel(1), 0);
321 ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2,
322 TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL));
323
324 for (int i = 0; i < kNumFiles; ++i) {
325 for (int j = 0; j < kNumPerFile; ++j) {
326 ReadOptions read_opts;
327 read_opts.ignore_range_deletions = true;
328 std::string value;
329 if (i == kNumFiles - 1 || j >= kNumPerFile / 2) {
330 ASSERT_OK(
331 db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value));
332 } else {
333 ASSERT_TRUE(
334 db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)
335 .IsNotFound());
336 }
337 }
338 }
339 }
340
TEST_F(DBRangeDelTest,ValidLevelSubcompactionBoundaries)341 TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) {
342 const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10;
343 Options options = CurrentOptions();
344 options.disable_auto_compactions = true;
345 options.level0_file_num_compaction_trigger = kNumFiles;
346 options.max_bytes_for_level_base = 2 * kFileBytes;
347 options.max_subcompactions = 4;
348 options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
349 options.num_levels = 3;
350 options.target_file_size_base = kFileBytes;
351 options.target_file_size_multiplier = 1;
352 Reopen(options);
353
354 Random rnd(301);
355 for (int i = 0; i < 2; ++i) {
356 for (int j = 0; j < kNumFiles; ++j) {
357 if (i > 0) {
358 // delete [95,105) in two files, [295,305) in next two
359 int mid = (j + (1 - j % 2)) * kNumPerFile;
360 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
361 Key(mid - 5), Key(mid + 5));
362 }
363 std::vector<std::string> values;
364 // Write 100KB (100 values, each 1K)
365 for (int k = 0; k < kNumPerFile; k++) {
366 values.push_back(RandomString(&rnd, 990));
367 ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
368 }
369 // put extra key to trigger flush
370 ASSERT_OK(Put("", ""));
371 dbfull()->TEST_WaitForFlushMemTable();
372 if (j < kNumFiles - 1) {
373 // background compaction may happen early for kNumFiles'th file
374 ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
375 }
376 if (j == options.level0_file_num_compaction_trigger - 1) {
377 // When i == 1, compaction will output some files to L1, at which point
378 // L1 is not bottommost so range deletions cannot be compacted away. The
379 // new L1 files must be generated with non-overlapping key ranges even
380 // though multiple subcompactions see the same ranges deleted, else an
381 // assertion will fail.
382 //
383 // Only enable auto-compactions when we're ready; otherwise, the
384 // oversized L0 (relative to base_level) causes the compaction to run
385 // earlier.
386 ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()}));
387 dbfull()->TEST_WaitForCompact();
388 ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
389 {{"disable_auto_compactions", "true"}}));
390 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
391 ASSERT_GT(NumTableFilesAtLevel(1), 0);
392 ASSERT_GT(NumTableFilesAtLevel(2), 0);
393 }
394 }
395 }
396 }
397
TEST_F(DBRangeDelTest,ValidUniversalSubcompactionBoundaries)398 TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
399 const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4;
400 Options options = CurrentOptions();
401 options.compaction_options_universal.min_merge_width = kFilesPerLevel;
402 options.compaction_options_universal.max_merge_width = kFilesPerLevel;
403 options.compaction_options_universal.size_ratio = 10;
404 options.compaction_style = kCompactionStyleUniversal;
405 options.level0_file_num_compaction_trigger = kFilesPerLevel;
406 options.max_subcompactions = 4;
407 options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
408 options.num_levels = kNumLevels;
409 options.target_file_size_base = kNumPerFile << 10;
410 options.target_file_size_multiplier = 1;
411 Reopen(options);
412
413 Random rnd(301);
414 for (int i = 0; i < kNumLevels - 1; ++i) {
415 for (int j = 0; j < kFilesPerLevel; ++j) {
416 if (i == kNumLevels - 2) {
417 // insert range deletions [95,105) in two files, [295,305) in next two
418 // to prepare L1 for later manual compaction.
419 int mid = (j + (1 - j % 2)) * kNumPerFile;
420 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
421 Key(mid - 5), Key(mid + 5));
422 }
423 std::vector<std::string> values;
424 // Write 100KB (100 values, each 1K)
425 for (int k = 0; k < kNumPerFile; k++) {
426 values.push_back(RandomString(&rnd, 990));
427 ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
428 }
429 // put extra key to trigger flush
430 ASSERT_OK(Put("", ""));
431 dbfull()->TEST_WaitForFlushMemTable();
432 if (j < kFilesPerLevel - 1) {
433 // background compaction may happen early for kFilesPerLevel'th file
434 ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
435 }
436 }
437 dbfull()->TEST_WaitForCompact();
438 ASSERT_EQ(NumTableFilesAtLevel(0), 0);
439 ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
440 }
441 // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
442 // happen since input level > 0; (2) range deletions are not dropped since
443 // output level is not bottommost. If no file boundary assertion fails, that
444 // probably means universal compaction + subcompaction + range deletion are
445 // compatible.
446 ASSERT_OK(dbfull()->RunManualCompaction(
447 reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
448 ->cfd(),
449 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(),
450 nullptr /* begin */, nullptr /* end */, true /* exclusive */,
451 true /* disallow_trivial_move */,
452 port::kMaxUint64 /* max_file_num_to_ignore */));
453 }
454 #endif // ROCKSDB_LITE
455
TEST_F(DBRangeDelTest,CompactionRemovesCoveredMergeOperands)456 TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) {
457 const int kNumPerFile = 3, kNumFiles = 3;
458 Options opts = CurrentOptions();
459 opts.disable_auto_compactions = true;
460 opts.memtable_factory.reset(new SpecialSkipListFactory(2 * kNumPerFile));
461 opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
462 opts.num_levels = 2;
463 Reopen(opts);
464
465 // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file
466 // requires an extra entry.
467 for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) {
468 if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) {
469 // Delete merge operands from all but the last file
470 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
471 "key_");
472 }
473 std::string val;
474 PutFixed64(&val, i);
475 db_->Merge(WriteOptions(), "key", val);
476 // we need to prevent trivial move using Puts so compaction will actually
477 // process the merge operands.
478 db_->Put(WriteOptions(), "prevent_trivial_move", "");
479 if (i > 0 && i % kNumPerFile == 0) {
480 dbfull()->TEST_WaitForFlushMemTable();
481 }
482 }
483
484 ReadOptions read_opts;
485 read_opts.ignore_range_deletions = true;
486 std::string expected, actual;
487 ASSERT_OK(db_->Get(read_opts, "key", &actual));
488 PutFixed64(&expected, 45); // 1+2+...+9
489 ASSERT_EQ(expected, actual);
490
491 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
492
493 expected.clear();
494 ASSERT_OK(db_->Get(read_opts, "key", &actual));
495 uint64_t tmp;
496 Slice tmp2(actual);
497 GetFixed64(&tmp2, &tmp);
498 PutFixed64(&expected, 30); // 6+7+8+9 (earlier operands covered by tombstone)
499 ASSERT_EQ(expected, actual);
500 }
501
TEST_F(DBRangeDelTest,PutDeleteRangeMergeFlush)502 TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) {
503 // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4)
504 // Flush. The `CompactionIterator` previously had a bug where we forgot to
505 // check for covering range tombstones when processing the (1) Put, causing
506 // it to reappear after the flush.
507 Options opts = CurrentOptions();
508 opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
509 Reopen(opts);
510
511 std::string val;
512 PutFixed64(&val, 1);
513 ASSERT_OK(db_->Put(WriteOptions(), "key", val));
514 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
515 "key", "key_"));
516 ASSERT_OK(db_->Merge(WriteOptions(), "key", val));
517 ASSERT_OK(db_->Flush(FlushOptions()));
518
519 ReadOptions read_opts;
520 std::string expected, actual;
521 ASSERT_OK(db_->Get(read_opts, "key", &actual));
522 PutFixed64(&expected, 1);
523 ASSERT_EQ(expected, actual);
524 }
525
526 // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
527 #ifndef ROCKSDB_LITE
TEST_F(DBRangeDelTest,ObsoleteTombstoneCleanup)528 TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) {
529 // During compaction to bottommost level, verify range tombstones older than
530 // the oldest snapshot are removed, while others are preserved.
531 Options opts = CurrentOptions();
532 opts.disable_auto_compactions = true;
533 opts.num_levels = 2;
534 opts.statistics = CreateDBStatistics();
535 Reopen(opts);
536
537 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1",
538 "dr10"); // obsolete after compaction
539 db_->Put(WriteOptions(), "key", "val");
540 db_->Flush(FlushOptions());
541 const Snapshot* snapshot = db_->GetSnapshot();
542 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2",
543 "dr20"); // protected by snapshot
544 db_->Put(WriteOptions(), "key", "val");
545 db_->Flush(FlushOptions());
546
547 ASSERT_EQ(2, NumTableFilesAtLevel(0));
548 ASSERT_EQ(0, NumTableFilesAtLevel(1));
549 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
550 ASSERT_EQ(0, NumTableFilesAtLevel(0));
551 ASSERT_EQ(1, NumTableFilesAtLevel(1));
552 ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
553
554 db_->ReleaseSnapshot(snapshot);
555 }
556
TEST_F(DBRangeDelTest,TableEvictedDuringScan)557 TEST_F(DBRangeDelTest, TableEvictedDuringScan) {
558 // The RangeDelAggregator holds pointers into range deletion blocks created by
559 // table readers. This test ensures the aggregator can still access those
560 // blocks even if it outlives the table readers that created them.
561 //
562 // DBIter always keeps readers open for L0 files. So, in order to test
563 // aggregator outliving reader, we need to have deletions in L1 files, which
564 // are opened/closed on-demand during the scan. This is accomplished by
565 // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions
566 // from all lingering in L0 (there is at most one range deletion per L0 file).
567 //
568 // The first L1 file will contain a range deletion since its begin key is 0.
569 // SeekToFirst() references that table's reader and adds its range tombstone
570 // to the aggregator. Upon advancing beyond that table's key-range via Next(),
571 // the table reader will be unreferenced by the iterator. Since we manually
572 // call Evict() on all readers before the full scan, this unreference causes
573 // the reader's refcount to drop to zero and thus be destroyed.
574 //
575 // When it is destroyed, we do not remove its range deletions from the
576 // aggregator. So, subsequent calls to Next() must be able to use these
577 // deletions to decide whether a key is covered. This will work as long as
578 // the aggregator properly references the range deletion block.
579 const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5;
580 Options opts = CurrentOptions();
581 opts.comparator = test::Uint64Comparator();
582 opts.level0_file_num_compaction_trigger = 4;
583 opts.level0_stop_writes_trigger = 4;
584 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
585 opts.num_levels = 2;
586 BlockBasedTableOptions bbto;
587 bbto.cache_index_and_filter_blocks = true;
588 bbto.block_cache = NewLRUCache(8 << 20);
589 opts.table_factory.reset(NewBlockBasedTableFactory(bbto));
590 Reopen(opts);
591
592 // Hold a snapshot so range deletions can't become obsolete during compaction
593 // to bottommost level (i.e., L1).
594 const Snapshot* snapshot = db_->GetSnapshot();
595 for (int i = 0; i < kNum; ++i) {
596 db_->Put(WriteOptions(), GetNumericStr(i), "val");
597 if (i > 0) {
598 dbfull()->TEST_WaitForFlushMemTable();
599 }
600 if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) {
601 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
602 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
603 }
604 }
605 // Must be > 1 so the first L1 file can be closed before scan finishes
606 dbfull()->TEST_WaitForCompact();
607 ASSERT_GT(NumTableFilesAtLevel(1), 1);
608 std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_);
609
610 ReadOptions read_opts;
611 auto* iter = db_->NewIterator(read_opts);
612 int expected = kRangeEnd;
613 iter->SeekToFirst();
614 for (auto file_number : file_numbers) {
615 // This puts table caches in the state of being externally referenced only
616 // so they are destroyed immediately upon iterator unreferencing.
617 TableCache::Evict(dbfull()->TEST_table_cache(), file_number);
618 }
619 for (; iter->Valid(); iter->Next()) {
620 ASSERT_EQ(GetNumericStr(expected), iter->key());
621 ++expected;
622 // Keep clearing block cache's LRU so range deletion block can be freed as
623 // soon as its refcount drops to zero.
624 bbto.block_cache->EraseUnRefEntries();
625 }
626 ASSERT_EQ(kNum, expected);
627 delete iter;
628 db_->ReleaseSnapshot(snapshot);
629 }
630
TEST_F(DBRangeDelTest,GetCoveredKeyFromMutableMemtable)631 TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) {
632 do {
633 DestroyAndReopen(CurrentOptions());
634 db_->Put(WriteOptions(), "key", "val");
635 ASSERT_OK(
636 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
637
638 ReadOptions read_opts;
639 std::string value;
640 ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
641 } while (ChangeOptions(kRangeDelSkipConfigs));
642 }
643
TEST_F(DBRangeDelTest,GetCoveredKeyFromImmutableMemtable)644 TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) {
645 do {
646 Options opts = CurrentOptions();
647 opts.max_write_buffer_number = 3;
648 opts.min_write_buffer_number_to_merge = 2;
649 // SpecialSkipListFactory lets us specify maximum number of elements the
650 // memtable can hold. It switches the active memtable to immutable (flush is
651 // prevented by the above options) upon inserting an element that would
652 // overflow the memtable.
653 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
654 DestroyAndReopen(opts);
655
656 db_->Put(WriteOptions(), "key", "val");
657 ASSERT_OK(
658 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
659 db_->Put(WriteOptions(), "blah", "val");
660
661 ReadOptions read_opts;
662 std::string value;
663 ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
664 } while (ChangeOptions(kRangeDelSkipConfigs));
665 }
666
TEST_F(DBRangeDelTest,GetCoveredKeyFromSst)667 TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) {
668 do {
669 DestroyAndReopen(CurrentOptions());
670 db_->Put(WriteOptions(), "key", "val");
671 // snapshot prevents key from being deleted during flush
672 const Snapshot* snapshot = db_->GetSnapshot();
673 ASSERT_OK(
674 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
675 ASSERT_OK(db_->Flush(FlushOptions()));
676
677 ReadOptions read_opts;
678 std::string value;
679 ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
680 db_->ReleaseSnapshot(snapshot);
681 } while (ChangeOptions(kRangeDelSkipConfigs));
682 }
683
TEST_F(DBRangeDelTest,GetCoveredMergeOperandFromMemtable)684 TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) {
685 const int kNumMergeOps = 10;
686 Options opts = CurrentOptions();
687 opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
688 Reopen(opts);
689
690 for (int i = 0; i < kNumMergeOps; ++i) {
691 std::string val;
692 PutFixed64(&val, i);
693 db_->Merge(WriteOptions(), "key", val);
694 if (i == kNumMergeOps / 2) {
695 // deletes [0, 5]
696 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
697 "key_");
698 }
699 }
700
701 ReadOptions read_opts;
702 std::string expected, actual;
703 ASSERT_OK(db_->Get(read_opts, "key", &actual));
704 PutFixed64(&expected, 30); // 6+7+8+9
705 ASSERT_EQ(expected, actual);
706
707 expected.clear();
708 read_opts.ignore_range_deletions = true;
709 ASSERT_OK(db_->Get(read_opts, "key", &actual));
710 PutFixed64(&expected, 45); // 0+1+2+...+9
711 ASSERT_EQ(expected, actual);
712 }
713
TEST_F(DBRangeDelTest,GetIgnoresRangeDeletions)714 TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) {
715 Options opts = CurrentOptions();
716 opts.max_write_buffer_number = 4;
717 opts.min_write_buffer_number_to_merge = 3;
718 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
719 Reopen(opts);
720
721 db_->Put(WriteOptions(), "sst_key", "val");
722 // snapshot prevents key from being deleted during flush
723 const Snapshot* snapshot = db_->GetSnapshot();
724 ASSERT_OK(
725 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
726 ASSERT_OK(db_->Flush(FlushOptions()));
727 db_->Put(WriteOptions(), "imm_key", "val");
728 ASSERT_OK(
729 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
730 db_->Put(WriteOptions(), "mem_key", "val");
731 ASSERT_OK(
732 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
733
734 ReadOptions read_opts;
735 read_opts.ignore_range_deletions = true;
736 for (std::string key : {"sst_key", "imm_key", "mem_key"}) {
737 std::string value;
738 ASSERT_OK(db_->Get(read_opts, key, &value));
739 }
740 db_->ReleaseSnapshot(snapshot);
741 }
742
TEST_F(DBRangeDelTest,IteratorRemovesCoveredKeys)743 TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) {
744 const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
745 Options opts = CurrentOptions();
746 opts.comparator = test::Uint64Comparator();
747 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
748 Reopen(opts);
749
750 // Write half of the keys before the tombstone and half after the tombstone.
751 // Only covered keys (i.e., within the range and older than the tombstone)
752 // should be deleted.
753 for (int i = 0; i < kNum; ++i) {
754 if (i == kNum / 2) {
755 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
756 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
757 }
758 db_->Put(WriteOptions(), GetNumericStr(i), "val");
759 }
760 ReadOptions read_opts;
761 auto* iter = db_->NewIterator(read_opts);
762
763 int expected = 0;
764 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
765 ASSERT_EQ(GetNumericStr(expected), iter->key());
766 if (expected == kRangeBegin - 1) {
767 expected = kNum / 2;
768 } else {
769 ++expected;
770 }
771 }
772 ASSERT_EQ(kNum, expected);
773 delete iter;
774 }
775
TEST_F(DBRangeDelTest,IteratorOverUserSnapshot)776 TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) {
777 const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
778 Options opts = CurrentOptions();
779 opts.comparator = test::Uint64Comparator();
780 opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
781 Reopen(opts);
782
783 const Snapshot* snapshot = nullptr;
784 // Put a snapshot before the range tombstone, verify an iterator using that
785 // snapshot sees all inserted keys.
786 for (int i = 0; i < kNum; ++i) {
787 if (i == kNum / 2) {
788 snapshot = db_->GetSnapshot();
789 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
790 GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
791 }
792 db_->Put(WriteOptions(), GetNumericStr(i), "val");
793 }
794 ReadOptions read_opts;
795 read_opts.snapshot = snapshot;
796 auto* iter = db_->NewIterator(read_opts);
797
798 int expected = 0;
799 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
800 ASSERT_EQ(GetNumericStr(expected), iter->key());
801 ++expected;
802 }
803 ASSERT_EQ(kNum / 2, expected);
804 delete iter;
805 db_->ReleaseSnapshot(snapshot);
806 }
807
TEST_F(DBRangeDelTest,IteratorIgnoresRangeDeletions)808 TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) {
809 Options opts = CurrentOptions();
810 opts.max_write_buffer_number = 4;
811 opts.min_write_buffer_number_to_merge = 3;
812 opts.memtable_factory.reset(new SpecialSkipListFactory(1));
813 Reopen(opts);
814
815 db_->Put(WriteOptions(), "sst_key", "val");
816 // snapshot prevents key from being deleted during flush
817 const Snapshot* snapshot = db_->GetSnapshot();
818 ASSERT_OK(
819 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
820 ASSERT_OK(db_->Flush(FlushOptions()));
821 db_->Put(WriteOptions(), "imm_key", "val");
822 ASSERT_OK(
823 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
824 db_->Put(WriteOptions(), "mem_key", "val");
825 ASSERT_OK(
826 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
827
828 ReadOptions read_opts;
829 read_opts.ignore_range_deletions = true;
830 auto* iter = db_->NewIterator(read_opts);
831 int i = 0;
832 std::string expected[] = {"imm_key", "mem_key", "sst_key"};
833 for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) {
834 std::string key;
835 ASSERT_EQ(expected[i], iter->key());
836 }
837 ASSERT_EQ(3, i);
838 delete iter;
839 db_->ReleaseSnapshot(snapshot);
840 }
841
842 #ifndef ROCKSDB_UBSAN_RUN
TEST_F(DBRangeDelTest,TailingIteratorRangeTombstoneUnsupported)843 TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) {
844 db_->Put(WriteOptions(), "key", "val");
845 // snapshot prevents key from being deleted during flush
846 const Snapshot* snapshot = db_->GetSnapshot();
847 ASSERT_OK(
848 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
849
850 // iterations check unsupported in memtable, l0, and then l1
851 for (int i = 0; i < 3; ++i) {
852 ReadOptions read_opts;
853 read_opts.tailing = true;
854 auto* iter = db_->NewIterator(read_opts);
855 if (i == 2) {
856 // For L1+, iterators over files are created on-demand, so need seek
857 iter->SeekToFirst();
858 }
859 ASSERT_TRUE(iter->status().IsNotSupported());
860 delete iter;
861 if (i == 0) {
862 ASSERT_OK(db_->Flush(FlushOptions()));
863 } else if (i == 1) {
864 MoveFilesToLevel(1);
865 }
866 }
867 db_->ReleaseSnapshot(snapshot);
868 }
869
870 #endif // !ROCKSDB_UBSAN_RUN
871
TEST_F(DBRangeDelTest,SubcompactionHasEmptyDedicatedRangeDelFile)872 TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) {
873 const int kNumFiles = 2, kNumKeysPerFile = 4;
874 Options options = CurrentOptions();
875 options.compression = kNoCompression;
876 options.disable_auto_compactions = true;
877 options.level0_file_num_compaction_trigger = kNumFiles;
878 options.max_subcompactions = 2;
879 options.num_levels = 2;
880 options.target_file_size_base = 4096;
881 Reopen(options);
882
883 // need a L1 file for subcompaction to be triggered
884 ASSERT_OK(
885 db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val"));
886 ASSERT_OK(db_->Flush(FlushOptions()));
887 MoveFilesToLevel(1);
888
889 // put enough keys to fill up the first subcompaction, and later range-delete
890 // them so that the first subcompaction outputs no key-values. In that case
891 // it'll consider making an SST file dedicated to range deletions.
892 for (int i = 0; i < kNumKeysPerFile; ++i) {
893 ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i),
894 std::string(1024, 'a')));
895 }
896 ASSERT_OK(db_->Flush(FlushOptions()));
897 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
898 Key(kNumKeysPerFile)));
899
900 // the above range tombstone can be dropped, so that one alone won't cause a
901 // dedicated file to be opened. We can make one protected by snapshot that
902 // must be considered. Make its range outside the first subcompaction's range
903 // to exercise the tricky part of the code.
904 const Snapshot* snapshot = db_->GetSnapshot();
905 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
906 Key(kNumKeysPerFile + 1),
907 Key(kNumKeysPerFile + 2)));
908 ASSERT_OK(db_->Flush(FlushOptions()));
909
910 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
911 ASSERT_EQ(1, NumTableFilesAtLevel(1));
912
913 db_->EnableAutoCompaction({db_->DefaultColumnFamily()});
914 dbfull()->TEST_WaitForCompact();
915 db_->ReleaseSnapshot(snapshot);
916 }
917
TEST_F(DBRangeDelTest,MemtableBloomFilter)918 TEST_F(DBRangeDelTest, MemtableBloomFilter) {
919 // regression test for #2743. the range delete tombstones in memtable should
920 // be added even when Get() skips searching due to its prefix bloom filter
921 const int kMemtableSize = 1 << 20; // 1MB
922 const int kMemtablePrefixFilterSize = 1 << 13; // 8KB
923 const int kNumKeys = 1000;
924 const int kPrefixLen = 8;
925 Options options = CurrentOptions();
926 options.memtable_prefix_bloom_size_ratio =
927 static_cast<double>(kMemtablePrefixFilterSize) / kMemtableSize;
928 options.prefix_extractor.reset(
929 ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen));
930 options.write_buffer_size = kMemtableSize;
931 Reopen(options);
932
933 for (int i = 0; i < kNumKeys; ++i) {
934 ASSERT_OK(Put(Key(i), "val"));
935 }
936 Flush();
937 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
938 Key(kNumKeys)));
939 for (int i = 0; i < kNumKeys; ++i) {
940 std::string value;
941 ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound());
942 }
943 }
944
TEST_F(DBRangeDelTest,CompactionTreatsSplitInputLevelDeletionAtomically)945 TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
946 // This test originally verified that compaction treated files containing a
947 // split range deletion in the input level as an atomic unit. I.e.,
948 // compacting any input-level file(s) containing a portion of the range
949 // deletion causes all other input-level files containing portions of that
950 // same range deletion to be included in the compaction. Range deletion
951 // tombstones are now truncated to sstable boundaries which removed the need
952 // for that behavior (which could lead to excessively large
953 // compactions).
954 const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10;
955 Options options = CurrentOptions();
956 options.compression = kNoCompression;
957 options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
958 options.memtable_factory.reset(
959 new SpecialSkipListFactory(2 /* num_entries_flush */));
960 options.target_file_size_base = kValueBytes;
961 // i == 0: CompactFiles
962 // i == 1: CompactRange
963 // i == 2: automatic compaction
964 for (int i = 0; i < 3; ++i) {
965 DestroyAndReopen(options);
966
967 ASSERT_OK(Put(Key(0), ""));
968 ASSERT_OK(db_->Flush(FlushOptions()));
969 MoveFilesToLevel(2);
970 ASSERT_EQ(1, NumTableFilesAtLevel(2));
971
972 // snapshot protects range tombstone from dropping due to becoming obsolete.
973 const Snapshot* snapshot = db_->GetSnapshot();
974 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
975 Key(2 * kNumFilesPerLevel));
976
977 Random rnd(301);
978 std::string value = RandomString(&rnd, kValueBytes);
979 for (int j = 0; j < kNumFilesPerLevel; ++j) {
980 // give files overlapping key-ranges to prevent trivial move
981 ASSERT_OK(Put(Key(j), value));
982 ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
983 if (j > 0) {
984 dbfull()->TEST_WaitForFlushMemTable();
985 ASSERT_EQ(j, NumTableFilesAtLevel(0));
986 }
987 }
988 // put extra key to trigger final flush
989 ASSERT_OK(Put("", ""));
990 dbfull()->TEST_WaitForFlushMemTable();
991 dbfull()->TEST_WaitForCompact();
992 ASSERT_EQ(0, NumTableFilesAtLevel(0));
993 ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1));
994
995 ColumnFamilyMetaData meta;
996 db_->GetColumnFamilyMetaData(&meta);
997 if (i == 0) {
998 ASSERT_OK(db_->CompactFiles(
999 CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */));
1000 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1001 } else if (i == 1) {
1002 auto begin_str = Key(0), end_str = Key(1);
1003 Slice begin = begin_str, end = end_str;
1004 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end));
1005 ASSERT_EQ(3, NumTableFilesAtLevel(1));
1006 } else if (i == 2) {
1007 ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
1008 {{"max_bytes_for_level_base", "10000"}}));
1009 dbfull()->TEST_WaitForCompact();
1010 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1011 }
1012 ASSERT_GT(NumTableFilesAtLevel(2), 0);
1013
1014 db_->ReleaseSnapshot(snapshot);
1015 }
1016 }
1017
TEST_F(DBRangeDelTest,RangeTombstoneEndKeyAsSstableUpperBound)1018 TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) {
1019 // Test the handling of the range-tombstone end-key as the
1020 // upper-bound for an sstable.
1021
1022 const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10;
1023 Options options = CurrentOptions();
1024 options.compression = kNoCompression;
1025 options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
1026 options.memtable_factory.reset(
1027 new SpecialSkipListFactory(2 /* num_entries_flush */));
1028 options.target_file_size_base = kValueBytes;
1029 options.disable_auto_compactions = true;
1030
1031 DestroyAndReopen(options);
1032
1033 // Create an initial sstable at L2:
1034 // [key000000#1,1, key000000#1,1]
1035 ASSERT_OK(Put(Key(0), ""));
1036 ASSERT_OK(db_->Flush(FlushOptions()));
1037 MoveFilesToLevel(2);
1038 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1039
1040 // A snapshot protects the range tombstone from dropping due to
1041 // becoming obsolete.
1042 const Snapshot* snapshot = db_->GetSnapshot();
1043 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1044 Key(0), Key(2 * kNumFilesPerLevel));
1045
1046 // Create 2 additional sstables in L0. Note that the first sstable
1047 // contains the range tombstone.
1048 // [key000000#3,1, key000004#72057594037927935,15]
1049 // [key000001#5,1, key000002#6,1]
1050 Random rnd(301);
1051 std::string value = RandomString(&rnd, kValueBytes);
1052 for (int j = 0; j < kNumFilesPerLevel; ++j) {
1053 // Give files overlapping key-ranges to prevent a trivial move when we
1054 // compact from L0 to L1.
1055 ASSERT_OK(Put(Key(j), value));
1056 ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
1057 ASSERT_OK(db_->Flush(FlushOptions()));
1058 ASSERT_EQ(j + 1, NumTableFilesAtLevel(0));
1059 }
1060 // Compact the 2 L0 sstables to L1, resulting in the following LSM. There
1061 // are 2 sstables generated in L1 due to the target_file_size_base setting.
1062 // L1:
1063 // [key000000#3,1, key000002#72057594037927935,15]
1064 // [key000002#6,1, key000004#72057594037927935,15]
1065 // L2:
1066 // [key000000#1,1, key000000#1,1]
1067 MoveFilesToLevel(1);
1068 ASSERT_EQ(2, NumTableFilesAtLevel(1));
1069
1070 {
1071 // Compact the second sstable in L1:
1072 // L1:
1073 // [key000000#3,1, key000002#72057594037927935,15]
1074 // L2:
1075 // [key000000#1,1, key000000#1,1]
1076 // [key000002#6,1, key000004#72057594037927935,15]
1077 //
1078 // At the same time, verify the compaction does not cause the key at the
1079 // endpoint (key000002#6,1) to disappear.
1080 ASSERT_EQ(value, Get(Key(2)));
1081 auto begin_str = Key(3);
1082 const ROCKSDB_NAMESPACE::Slice begin = begin_str;
1083 dbfull()->TEST_CompactRange(1, &begin, nullptr);
1084 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1085 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1086 ASSERT_EQ(value, Get(Key(2)));
1087 }
1088
1089 {
1090 // Compact the first sstable in L1. This should be copacetic, but
1091 // was previously resulting in overlapping sstables in L2 due to
1092 // mishandling of the range tombstone end-key when used as the
1093 // largest key for an sstable. The resulting LSM structure should
1094 // be:
1095 //
1096 // L2:
1097 // [key000000#1,1, key000001#72057594037927935,15]
1098 // [key000001#5,1, key000002#72057594037927935,15]
1099 // [key000002#6,1, key000004#72057594037927935,15]
1100 auto begin_str = Key(0);
1101 const ROCKSDB_NAMESPACE::Slice begin = begin_str;
1102 dbfull()->TEST_CompactRange(1, &begin, &begin);
1103 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1104 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1105 }
1106
1107 db_->ReleaseSnapshot(snapshot);
1108 }
1109
TEST_F(DBRangeDelTest,UnorderedTombstones)1110 TEST_F(DBRangeDelTest, UnorderedTombstones) {
1111 // Regression test for #2752. Range delete tombstones between
1112 // different snapshot stripes are not stored in order, so the first
1113 // tombstone of each snapshot stripe should be checked as a smallest
1114 // candidate.
1115 Options options = CurrentOptions();
1116 DestroyAndReopen(options);
1117
1118 auto cf = db_->DefaultColumnFamily();
1119
1120 ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a"));
1121 ASSERT_OK(db_->Flush(FlushOptions(), cf));
1122 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1123 ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
1124 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1125
1126 ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c"));
1127 // Hold a snapshot to separate these two delete ranges.
1128 auto snapshot = db_->GetSnapshot();
1129 ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b"));
1130 ASSERT_OK(db_->Flush(FlushOptions(), cf));
1131 db_->ReleaseSnapshot(snapshot);
1132
1133 std::vector<std::vector<FileMetaData>> files;
1134 dbfull()->TEST_GetFilesMetaData(cf, &files);
1135 ASSERT_EQ(1, files[0].size());
1136 ASSERT_EQ("a", files[0][0].smallest.user_key());
1137 ASSERT_EQ("c", files[0][0].largest.user_key());
1138
1139 std::string v;
1140 auto s = db_->Get(ReadOptions(), "a", &v);
1141 ASSERT_TRUE(s.IsNotFound());
1142 }
1143
1144 class MockMergeOperator : public MergeOperator {
1145 // Mock non-associative operator. Non-associativity is expressed by lack of
1146 // implementation for any `PartialMerge*` functions.
1147 public:
FullMergeV2(const MergeOperationInput & merge_in,MergeOperationOutput * merge_out) const1148 bool FullMergeV2(const MergeOperationInput& merge_in,
1149 MergeOperationOutput* merge_out) const override {
1150 assert(merge_out != nullptr);
1151 merge_out->new_value = merge_in.operand_list.back().ToString();
1152 return true;
1153 }
1154
Name() const1155 const char* Name() const override { return "MockMergeOperator"; }
1156 };
1157
TEST_F(DBRangeDelTest,KeyAtOverlappingEndpointReappears)1158 TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) {
1159 // This test uses a non-associative merge operator since that is a convenient
1160 // way to get compaction to write out files with overlapping user-keys at the
1161 // endpoints. Note, however, overlapping endpoints can also occur with other
1162 // value types (Put, etc.), assuming the right snapshots are present.
1163 const int kFileBytes = 1 << 20;
1164 const int kValueBytes = 1 << 10;
1165 const int kNumFiles = 4;
1166
1167 Options options = CurrentOptions();
1168 options.compression = kNoCompression;
1169 options.disable_auto_compactions = true;
1170 options.merge_operator.reset(new MockMergeOperator());
1171 options.target_file_size_base = kFileBytes;
1172 Reopen(options);
1173
1174 // Push dummy data to L3 so that our actual test files on L0-L2
1175 // will not be considered "bottommost" level, otherwise compaction
1176 // may prevent us from creating overlapping user keys
1177 // as on the bottommost layer MergeHelper
1178 ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy"));
1179 ASSERT_OK(db_->Flush(FlushOptions()));
1180 MoveFilesToLevel(3);
1181
1182 Random rnd(301);
1183 const Snapshot* snapshot = nullptr;
1184 for (int i = 0; i < kNumFiles; ++i) {
1185 for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
1186 auto value = RandomString(&rnd, kValueBytes);
1187 ASSERT_OK(db_->Merge(WriteOptions(), "key", value));
1188 }
1189 if (i == kNumFiles - 1) {
1190 // Take snapshot to prevent covered merge operands from being dropped by
1191 // compaction.
1192 snapshot = db_->GetSnapshot();
1193 // The DeleteRange is the last write so all merge operands are covered.
1194 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1195 "key", "key_"));
1196 }
1197 ASSERT_OK(db_->Flush(FlushOptions()));
1198 }
1199 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1200 std::string value;
1201 ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1202
1203 dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */,
1204 nullptr /* end */, nullptr /* column_family */,
1205 true /* disallow_trivial_move */);
1206 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1207 // Now we have multiple files at L1 all containing a single user key, thus
1208 // guaranteeing overlap in the file endpoints.
1209 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1210
1211 // Verify no merge operands reappeared after the compaction.
1212 ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1213
1214 // Compact and verify again. It's worthwhile because now the files have
1215 // tighter endpoints, so we can verify that doesn't mess anything up.
1216 dbfull()->TEST_CompactRange(1 /* level */, nullptr /* begin */,
1217 nullptr /* end */, nullptr /* column_family */,
1218 true /* disallow_trivial_move */);
1219 ASSERT_GT(NumTableFilesAtLevel(2), 1);
1220 ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
1221
1222 db_->ReleaseSnapshot(snapshot);
1223 }
1224
TEST_F(DBRangeDelTest,UntruncatedTombstoneDoesNotDeleteNewerKey)1225 TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) {
1226 // Verify a key newer than a range tombstone cannot be deleted by being
1227 // compacted to the bottom level (and thus having its seqnum zeroed) before
1228 // the range tombstone. This used to happen when range tombstones were
1229 // untruncated on reads such that they extended past their file boundaries.
1230 //
1231 // Test summary:
1232 //
1233 // - L1 is bottommost.
1234 // - A couple snapshots are strategically taken to prevent seqnums from being
1235 // zeroed, range tombstone from being dropped, merge operands from being
1236 // dropped, and merge operands from being combined.
1237 // - Left half of files in L1 all have same user key, ensuring their file
1238 // boundaries overlap. In the past this would cause range tombstones to be
1239 // untruncated.
1240 // - Right half of L1 files all have different keys, ensuring no overlap.
1241 // - A range tombstone spans all L1 keys, so it is stored in every L1 file.
1242 // - Keys in the right side of the key-range are overwritten. These are
1243 // compacted down to L1 after releasing snapshots such that their seqnums
1244 // will be zeroed.
1245 // - A full range scan is performed. If the tombstone in the left L1 files
1246 // were untruncated, it would now cover keys newer than it (but with zeroed
1247 // seqnums) in the right L1 files.
1248 const int kFileBytes = 1 << 20;
1249 const int kValueBytes = 1 << 10;
1250 const int kNumFiles = 4;
1251 const int kMaxKey = kNumFiles* kFileBytes / kValueBytes;
1252 const int kKeysOverwritten = 10;
1253
1254 Options options = CurrentOptions();
1255 options.compression = kNoCompression;
1256 options.disable_auto_compactions = true;
1257 options.merge_operator.reset(new MockMergeOperator());
1258 options.num_levels = 2;
1259 options.target_file_size_base = kFileBytes;
1260 Reopen(options);
1261
1262 Random rnd(301);
1263 // - snapshots[0] prevents merge operands from being combined during
1264 // compaction.
1265 // - snapshots[1] prevents merge operands from being dropped due to the
1266 // covering range tombstone.
1267 const Snapshot* snapshots[] = {nullptr, nullptr};
1268 for (int i = 0; i < kNumFiles; ++i) {
1269 for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
1270 auto value = RandomString(&rnd, kValueBytes);
1271 std::string key;
1272 if (i < kNumFiles / 2) {
1273 key = Key(0);
1274 } else {
1275 key = Key(1 + i * kFileBytes / kValueBytes + j);
1276 }
1277 ASSERT_OK(db_->Merge(WriteOptions(), key, value));
1278 }
1279 if (i == 0) {
1280 snapshots[0] = db_->GetSnapshot();
1281 }
1282 if (i == kNumFiles - 1) {
1283 snapshots[1] = db_->GetSnapshot();
1284 // The DeleteRange is the last write so all merge operands are covered.
1285 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1286 Key(0), Key(kMaxKey + 1)));
1287 }
1288 ASSERT_OK(db_->Flush(FlushOptions()));
1289 }
1290 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1291
1292 auto get_key_count = [this]() -> int {
1293 auto* iter = db_->NewIterator(ReadOptions());
1294 iter->SeekToFirst();
1295 int keys_found = 0;
1296 for (; iter->Valid(); iter->Next()) {
1297 ++keys_found;
1298 }
1299 delete iter;
1300 return keys_found;
1301 };
1302
1303 // All keys should be covered
1304 ASSERT_EQ(0, get_key_count());
1305
1306 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1307 nullptr /* end_key */));
1308 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1309 // Roughly the left half of L1 files should have overlapping boundary keys,
1310 // while the right half should not.
1311 ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
1312
1313 // Now overwrite a few keys that are in L1 files that definitely don't have
1314 // overlapping boundary keys.
1315 for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) {
1316 auto value = RandomString(&rnd, kValueBytes);
1317 ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value));
1318 }
1319 ASSERT_OK(db_->Flush(FlushOptions()));
1320
1321 // The overwritten keys are in L0 now, so clearly aren't covered by the range
1322 // tombstone in L1.
1323 ASSERT_EQ(kKeysOverwritten, get_key_count());
1324
1325 // Release snapshots so seqnums can be zeroed when L0->L1 happens.
1326 db_->ReleaseSnapshot(snapshots[0]);
1327 db_->ReleaseSnapshot(snapshots[1]);
1328
1329 auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1);
1330 auto end_key_storage = Key(kMaxKey);
1331 Slice begin_key(begin_key_storage);
1332 Slice end_key(end_key_storage);
1333 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key));
1334 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1335 ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
1336
1337 ASSERT_EQ(kKeysOverwritten, get_key_count());
1338 }
1339
TEST_F(DBRangeDelTest,DeletedMergeOperandReappearsIterPrev)1340 TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) {
1341 // Exposes a bug where we were using
1342 // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands
1343 // in the forward direction. Confusingly, this case happened during
1344 // `DBIter::Prev`. It could cause assertion failure, or reappearing keys.
1345 const int kFileBytes = 1 << 20;
1346 const int kValueBytes = 1 << 10;
1347 // Need multiple keys so we can get results when calling `Prev()` after
1348 // `SeekToLast()`.
1349 const int kNumKeys = 3;
1350 const int kNumFiles = 4;
1351
1352 Options options = CurrentOptions();
1353 options.compression = kNoCompression;
1354 options.disable_auto_compactions = true;
1355 options.merge_operator.reset(new MockMergeOperator());
1356 options.target_file_size_base = kFileBytes;
1357 Reopen(options);
1358
1359 Random rnd(301);
1360 const Snapshot* snapshot = nullptr;
1361 for (int i = 0; i < kNumFiles; ++i) {
1362 for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
1363 auto value = RandomString(&rnd, kValueBytes);
1364 ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value));
1365 if (i == 0 && j == kNumKeys) {
1366 // Take snapshot to prevent covered merge operands from being dropped or
1367 // merged by compaction.
1368 snapshot = db_->GetSnapshot();
1369 // Do a DeleteRange near the beginning so only the oldest merge operand
1370 // for each key is covered. This ensures the sequence of events:
1371 //
1372 // - `DBIter::Prev()` is called
1373 // - After several same versions of the same user key are encountered,
1374 // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`.
1375 // - Binary searches to the newest version of the key, which is in the
1376 // leftmost file containing the user key.
1377 // - Scans forwards to collect all merge operands. Eventually reaches
1378 // the rightmost file containing the oldest merge operand, which
1379 // should be covered by the `DeleteRange`. If `RangeDelAggregator`
1380 // were not properly using `kForwardTraversal` here, that operand
1381 // would reappear.
1382 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1383 Key(0), Key(kNumKeys + 1)));
1384 }
1385 }
1386 ASSERT_OK(db_->Flush(FlushOptions()));
1387 }
1388 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
1389
1390 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
1391 nullptr /* end_key */));
1392 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1393 ASSERT_GT(NumTableFilesAtLevel(1), 1);
1394
1395 auto* iter = db_->NewIterator(ReadOptions());
1396 iter->SeekToLast();
1397 int keys_found = 0;
1398 for (; iter->Valid(); iter->Prev()) {
1399 ++keys_found;
1400 }
1401 delete iter;
1402 ASSERT_EQ(kNumKeys, keys_found);
1403
1404 db_->ReleaseSnapshot(snapshot);
1405 }
1406
TEST_F(DBRangeDelTest,SnapshotPreventsDroppedKeys)1407 TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) {
1408 const int kFileBytes = 1 << 20;
1409
1410 Options options = CurrentOptions();
1411 options.compression = kNoCompression;
1412 options.disable_auto_compactions = true;
1413 options.target_file_size_base = kFileBytes;
1414 Reopen(options);
1415
1416 ASSERT_OK(Put(Key(0), "a"));
1417 const Snapshot* snapshot = db_->GetSnapshot();
1418
1419 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1420 Key(10)));
1421
1422 db_->Flush(FlushOptions());
1423
1424 ReadOptions read_opts;
1425 read_opts.snapshot = snapshot;
1426 auto* iter = db_->NewIterator(read_opts);
1427
1428 iter->SeekToFirst();
1429 ASSERT_TRUE(iter->Valid());
1430 ASSERT_EQ(Key(0), iter->key());
1431
1432 iter->Next();
1433 ASSERT_FALSE(iter->Valid());
1434
1435 delete iter;
1436 db_->ReleaseSnapshot(snapshot);
1437 }
1438
TEST_F(DBRangeDelTest,SnapshotPreventsDroppedKeysInImmMemTables)1439 TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) {
1440 const int kFileBytes = 1 << 20;
1441
1442 Options options = CurrentOptions();
1443 options.compression = kNoCompression;
1444 options.disable_auto_compactions = true;
1445 options.target_file_size_base = kFileBytes;
1446 Reopen(options);
1447
1448 // block flush thread -> pin immtables in memory
1449 SyncPoint::GetInstance()->DisableProcessing();
1450 SyncPoint::GetInstance()->LoadDependency({
1451 {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator",
1452 "DBImpl::BGWorkFlush"},
1453 });
1454 SyncPoint::GetInstance()->EnableProcessing();
1455
1456 ASSERT_OK(Put(Key(0), "a"));
1457 std::unique_ptr<const Snapshot, std::function<void(const Snapshot*)>>
1458 snapshot(db_->GetSnapshot(),
1459 [this](const Snapshot* s) { db_->ReleaseSnapshot(s); });
1460
1461 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1462 Key(10)));
1463
1464 ASSERT_OK(dbfull()->TEST_SwitchMemtable());
1465
1466 ReadOptions read_opts;
1467 read_opts.snapshot = snapshot.get();
1468 std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
1469
1470 TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator");
1471
1472 iter->SeekToFirst();
1473 ASSERT_TRUE(iter->Valid());
1474 ASSERT_EQ(Key(0), iter->key());
1475
1476 iter->Next();
1477 ASSERT_FALSE(iter->Valid());
1478 }
1479
TEST_F(DBRangeDelTest,RangeTombstoneWrittenToMinimalSsts)1480 TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) {
1481 // Adapted from
1482 // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398.
1483 // Regression test for issue where range tombstone was written to more files
1484 // than necessary when it began exactly at the begin key in the next
1485 // compaction output file.
1486 const int kFileBytes = 1 << 20;
1487 const int kValueBytes = 4 << 10;
1488 Options options = CurrentOptions();
1489 options.compression = kNoCompression;
1490 options.disable_auto_compactions = true;
1491 // Have a bit of slack in the size limits but we enforce them more strictly
1492 // when manually flushing/compacting.
1493 options.max_compaction_bytes = 2 * kFileBytes;
1494 options.target_file_size_base = 2 * kFileBytes;
1495 options.write_buffer_size = 2 * kFileBytes;
1496 Reopen(options);
1497
1498 Random rnd(301);
1499 for (char first_char : {'a', 'b', 'c'}) {
1500 for (int i = 0; i < kFileBytes / kValueBytes; ++i) {
1501 std::string key(1, first_char);
1502 key.append(Key(i));
1503 std::string value = RandomString(&rnd, kValueBytes);
1504 ASSERT_OK(Put(key, value));
1505 }
1506 db_->Flush(FlushOptions());
1507 MoveFilesToLevel(2);
1508 }
1509 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1510 ASSERT_EQ(3, NumTableFilesAtLevel(2));
1511
1512 // Populate the memtable lightly while spanning the whole key-space. The
1513 // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple
1514 // files to prevent a large L1->L2 compaction later.
1515 ASSERT_OK(Put("a", "val"));
1516 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1517 "c" + Key(1), "d"));
1518 // Our compaction output file cutting logic currently only considers point
1519 // keys. So, in order for the range tombstone to have a chance at landing at
1520 // the start of a new file, we need a point key at the range tombstone's
1521 // start.
1522 // TODO(ajkr): remove this `Put` after file cutting accounts for range
1523 // tombstones (#3977).
1524 ASSERT_OK(Put("c" + Key(1), "value"));
1525 db_->Flush(FlushOptions());
1526
1527 // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone
1528 // and the range tombstone is only placed in the second SST.
1529 std::string begin_key_storage("c" + Key(1));
1530 Slice begin_key(begin_key_storage);
1531 std::string end_key_storage("d");
1532 Slice end_key(end_key_storage);
1533 dbfull()->TEST_CompactRange(0 /* level */, &begin_key /* begin */,
1534 &end_key /* end */, nullptr /* column_family */,
1535 true /* disallow_trivial_move */);
1536 ASSERT_EQ(2, NumTableFilesAtLevel(1));
1537
1538 std::vector<LiveFileMetaData> all_metadata;
1539 std::vector<LiveFileMetaData> l1_metadata;
1540 db_->GetLiveFilesMetaData(&all_metadata);
1541 for (const auto& metadata : all_metadata) {
1542 if (metadata.level == 1) {
1543 l1_metadata.push_back(metadata);
1544 }
1545 }
1546 std::sort(l1_metadata.begin(), l1_metadata.end(),
1547 [&](const LiveFileMetaData& a, const LiveFileMetaData& b) {
1548 return options.comparator->Compare(a.smallestkey, b.smallestkey) <
1549 0;
1550 });
1551 ASSERT_EQ("a", l1_metadata[0].smallestkey);
1552 ASSERT_EQ("a", l1_metadata[0].largestkey);
1553 ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey);
1554 ASSERT_EQ("d", l1_metadata[1].largestkey);
1555
1556 TablePropertiesCollection all_table_props;
1557 ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props));
1558 int64_t num_range_deletions = 0;
1559 for (const auto& name_and_table_props : all_table_props) {
1560 const auto& name = name_and_table_props.first;
1561 const auto& table_props = name_and_table_props.second;
1562 // The range tombstone should only be output to the second L1 SST.
1563 if (name.size() >= l1_metadata[1].name.size() &&
1564 name.substr(name.size() - l1_metadata[1].name.size()).compare(l1_metadata[1].name) == 0) {
1565 ASSERT_EQ(1, table_props->num_range_deletions);
1566 ++num_range_deletions;
1567 } else {
1568 ASSERT_EQ(0, table_props->num_range_deletions);
1569 }
1570 }
1571 ASSERT_EQ(1, num_range_deletions);
1572 }
1573
TEST_F(DBRangeDelTest,OverlappedTombstones)1574 TEST_F(DBRangeDelTest, OverlappedTombstones) {
1575 const int kNumPerFile = 4, kNumFiles = 2;
1576 Options options = CurrentOptions();
1577 options.disable_auto_compactions = true;
1578 options.max_compaction_bytes = 9 * 1024;
1579 DestroyAndReopen(options);
1580 Random rnd(301);
1581 for (int i = 0; i < kNumFiles; ++i) {
1582 std::vector<std::string> values;
1583 // Write 12K (4 values, each 3K)
1584 for (int j = 0; j < kNumPerFile; j++) {
1585 values.push_back(RandomString(&rnd, 3 << 10));
1586 ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1587 }
1588 }
1589 ASSERT_OK(db_->Flush(FlushOptions()));
1590 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1591 MoveFilesToLevel(2);
1592 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1593
1594 ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1),
1595 Key((kNumFiles)*kNumPerFile + 1)));
1596 ASSERT_OK(db_->Flush(FlushOptions()));
1597
1598 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1599
1600 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1601 true /* disallow_trivial_move */);
1602
1603 // The tombstone range is not broken up into multiple SSTs which may incur a
1604 // large compaction with L2.
1605 ASSERT_EQ(1, NumTableFilesAtLevel(1));
1606 std::vector<std::vector<FileMetaData>> files;
1607 dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1608 true /* disallow_trivial_move */);
1609 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1610 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1611 }
1612
TEST_F(DBRangeDelTest,OverlappedKeys)1613 TEST_F(DBRangeDelTest, OverlappedKeys) {
1614 const int kNumPerFile = 4, kNumFiles = 2;
1615 Options options = CurrentOptions();
1616 options.disable_auto_compactions = true;
1617 options.max_compaction_bytes = 9 * 1024;
1618 DestroyAndReopen(options);
1619 Random rnd(301);
1620 for (int i = 0; i < kNumFiles; ++i) {
1621 std::vector<std::string> values;
1622 // Write 12K (4 values, each 3K)
1623 for (int j = 0; j < kNumPerFile; j++) {
1624 values.push_back(RandomString(&rnd, 3 << 10));
1625 ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1626 }
1627 }
1628 ASSERT_OK(db_->Flush(FlushOptions()));
1629 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1630 MoveFilesToLevel(2);
1631 ASSERT_EQ(2, NumTableFilesAtLevel(2));
1632
1633 for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) {
1634 ASSERT_OK(Put(Key(i), "0x123"));
1635 }
1636 ASSERT_OK(db_->Flush(FlushOptions()));
1637 ASSERT_EQ(1, NumTableFilesAtLevel(0));
1638
1639 // The key range is broken up into three SSTs to avoid a future big compaction
1640 // with the grandparent
1641 dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1642 true /* disallow_trivial_move */);
1643 ASSERT_EQ(3, NumTableFilesAtLevel(1));
1644
1645 std::vector<std::vector<FileMetaData>> files;
1646 dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
1647 true /* disallow_trivial_move */);
1648 ASSERT_EQ(1, NumTableFilesAtLevel(2));
1649 ASSERT_EQ(0, NumTableFilesAtLevel(1));
1650 }
1651
1652 #endif // ROCKSDB_LITE
1653
1654 } // namespace ROCKSDB_NAMESPACE
1655
main(int argc,char ** argv)1656 int main(int argc, char** argv) {
1657 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
1658 ::testing::InitGoogleTest(&argc, argv);
1659 return RUN_ALL_TESTS();
1660 }
1661