1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "cache/clock_cache.h"
11
12 #ifndef SUPPORT_CLOCK_CACHE
13
14 namespace ROCKSDB_NAMESPACE {
15
NewClockCache(size_t,int,bool,CacheMetadataChargePolicy)16 std::shared_ptr<Cache> NewClockCache(
17 size_t /*capacity*/, int /*num_shard_bits*/, bool /*strict_capacity_limit*/,
18 CacheMetadataChargePolicy /*metadata_charge_policy*/) {
19 // Clock cache not supported.
20 return nullptr;
21 }
22
23 } // namespace ROCKSDB_NAMESPACE
24
25 #else
26
27 #include <assert.h>
28 #include <atomic>
29 #include <deque>
30
31 // "tbb/concurrent_hash_map.h" requires RTTI if exception is enabled.
32 // Disable it so users can chooose to disable RTTI.
33 #ifndef ROCKSDB_USE_RTTI
34 #define TBB_USE_EXCEPTIONS 0
35 #endif
36 #include "tbb/concurrent_hash_map.h"
37
38 #include "cache/sharded_cache.h"
39 #include "port/malloc.h"
40 #include "port/port.h"
41 #include "util/autovector.h"
42 #include "util/mutexlock.h"
43
44 namespace ROCKSDB_NAMESPACE {
45
46 namespace {
47
48 // An implementation of the Cache interface based on CLOCK algorithm, with
49 // better concurrent performance than LRUCache. The idea of CLOCK algorithm
50 // is to maintain all cache entries in a circular list, and an iterator
51 // (the "head") pointing to the last examined entry. Eviction starts from the
52 // current head. Each entry is given a second chance before eviction, if it
53 // has been access since last examine. In contrast to LRU, no modification
54 // to the internal data-structure (except for flipping the usage bit) needs
55 // to be done upon lookup. This gives us oppertunity to implement a cache
56 // with better concurrency.
57 //
58 // Each cache entry is represented by a cache handle, and all the handles
59 // are arranged in a circular list, as describe above. Upon erase of an entry,
60 // we never remove the handle. Instead, the handle is put into a recycle bin
61 // to be re-use. This is to avoid memory dealocation, which is hard to deal
62 // with in concurrent environment.
63 //
64 // The cache also maintains a concurrent hash map for lookup. Any concurrent
65 // hash map implementation should do the work. We currently use
66 // tbb::concurrent_hash_map because it supports concurrent erase.
67 //
68 // Each cache handle has the following flags and counters, which are squeeze
69 // in an atomic interger, to make sure the handle always be in a consistent
70 // state:
71 //
72 // * In-cache bit: whether the entry is reference by the cache itself. If
73 // an entry is in cache, its key would also be available in the hash map.
74 // * Usage bit: whether the entry has been access by user since last
75 // examine for eviction. Can be reset by eviction.
76 // * Reference count: reference count by user.
77 //
78 // An entry can be reference only when it's in cache. An entry can be evicted
79 // only when it is in cache, has no usage since last examine, and reference
80 // count is zero.
81 //
82 // The follow figure shows a possible layout of the cache. Boxes represents
83 // cache handles and numbers in each box being in-cache bit, usage bit and
84 // reference count respectively.
85 //
86 // hash map:
87 // +-------+--------+
88 // | key | handle |
89 // +-------+--------+
90 // | "foo" | 5 |-------------------------------------+
91 // +-------+--------+ |
92 // | "bar" | 2 |--+ |
93 // +-------+--------+ | |
94 // | |
95 // head | |
96 // | | |
97 // circular list: | | |
98 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
99 // |(0,0,0)|---|(1,1,0)|---|(0,0,0)|---|(0,1,3)|---|(1,0,0)|---| ...
100 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
101 // | |
102 // +-------+ +-----------+
103 // | |
104 // +---+---+
105 // recycle bin: | 1 | 3 |
106 // +---+---+
107 //
108 // Suppose we try to insert "baz" into the cache at this point and the cache is
109 // full. The cache will first look for entries to evict, starting from where
110 // head points to (the second entry). It resets usage bit of the second entry,
111 // skips the third and fourth entry since they are not in cache, and finally
112 // evict the fifth entry ("foo"). It looks at recycle bin for available handle,
113 // grabs handle 3, and insert the key into the handle. The following figure
114 // shows the resulting layout.
115 //
116 // hash map:
117 // +-------+--------+
118 // | key | handle |
119 // +-------+--------+
120 // | "baz" | 3 |-------------+
121 // +-------+--------+ |
122 // | "bar" | 2 |--+ |
123 // +-------+--------+ | |
124 // | |
125 // | | head
126 // | | |
127 // circular list: | | |
128 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
129 // |(0,0,0)|---|(1,0,0)|---|(1,0,0)|---|(0,1,3)|---|(0,0,0)|---| ...
130 // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
131 // | |
132 // +-------+ +-----------------------------------+
133 // | |
134 // +---+---+
135 // recycle bin: | 1 | 5 |
136 // +---+---+
137 //
138 // A global mutex guards the circular list, the head, and the recycle bin.
139 // We additionally require that modifying the hash map needs to hold the mutex.
140 // As such, Modifying the cache (such as Insert() and Erase()) require to
141 // hold the mutex. Lookup() only access the hash map and the flags associated
142 // with each handle, and don't require explicit locking. Release() has to
143 // acquire the mutex only when it releases the last reference to the entry and
144 // the entry has been erased from cache explicitly. A future improvement could
145 // be to remove the mutex completely.
146 //
147 // Benchmark:
148 // We run readrandom db_bench on a test DB of size 13GB, with size of each
149 // level:
150 //
151 // Level Files Size(MB)
152 // -------------------------
153 // L0 1 0.01
154 // L1 18 17.32
155 // L2 230 182.94
156 // L3 1186 1833.63
157 // L4 4602 8140.30
158 //
159 // We test with both 32 and 16 read threads, with 2GB cache size (the whole DB
160 // doesn't fits in) and 64GB cache size (the whole DB can fit in cache), and
161 // whether to put index and filter blocks in block cache. The benchmark runs
162 // with
163 // with RocksDB 4.10. We got the following result:
164 //
165 // Threads Cache Cache ClockCache LRUCache
166 // Size Index/Filter Throughput(MB/s) Hit Throughput(MB/s) Hit
167 // 32 2GB yes 466.7 85.9% 433.7 86.5%
168 // 32 2GB no 529.9 72.7% 532.7 73.9%
169 // 32 64GB yes 649.9 99.9% 507.9 99.9%
170 // 32 64GB no 740.4 99.9% 662.8 99.9%
171 // 16 2GB yes 278.4 85.9% 283.4 86.5%
172 // 16 2GB no 318.6 72.7% 335.8 73.9%
173 // 16 64GB yes 391.9 99.9% 353.3 99.9%
174 // 16 64GB no 433.8 99.8% 419.4 99.8%
175
176 // Cache entry meta data.
177 struct CacheHandle {
178 using Deleter = Cache::Deleter;
179
180 Slice key;
181 uint32_t hash;
182 void* value;
183 size_t charge;
184 Deleter* deleter;
185
186 // Flags and counters associated with the cache handle:
187 // lowest bit: n-cache bit
188 // second lowest bit: usage bit
189 // the rest bits: reference count
190 // The handle is unused when flags equals to 0. The thread decreases the count
191 // to 0 is responsible to put the handle back to recycle_ and cleanup memory.
192 std::atomic<uint32_t> flags;
193
194 CacheHandle() = default;
195
CacheHandleROCKSDB_NAMESPACE::__anon9da5ff120111::CacheHandle196 CacheHandle(const CacheHandle& a) { *this = a; }
197
CacheHandleROCKSDB_NAMESPACE::__anon9da5ff120111::CacheHandle198 CacheHandle(const Slice& k, void* v, Deleter* del)
199 : key(k), value(v), deleter(del) {}
200
operator =ROCKSDB_NAMESPACE::__anon9da5ff120111::CacheHandle201 CacheHandle& operator=(const CacheHandle& a) {
202 // Only copy members needed for deletion.
203 key = a.key;
204 value = a.value;
205 deleter = a.deleter;
206 return *this;
207 }
208
CalcTotalChargeROCKSDB_NAMESPACE::__anon9da5ff120111::CacheHandle209 inline static size_t CalcTotalCharge(
210 Slice key, size_t charge,
211 CacheMetadataChargePolicy metadata_charge_policy) {
212 size_t meta_charge = 0;
213 if (metadata_charge_policy == kFullChargeCacheMetadata) {
214 meta_charge += sizeof(CacheHandle);
215 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
216 meta_charge +=
217 malloc_usable_size(static_cast<void*>(const_cast<char*>(key.data())));
218 #else
219 meta_charge += key.size();
220 #endif
221 }
222 return charge + meta_charge;
223 }
224
CalcTotalChargeROCKSDB_NAMESPACE::__anon9da5ff120111::CacheHandle225 inline size_t CalcTotalCharge(
226 CacheMetadataChargePolicy metadata_charge_policy) {
227 return CalcTotalCharge(key, charge, metadata_charge_policy);
228 }
229 };
230
231 // Key of hash map. We store hash value with the key for convenience.
232 struct CacheKey {
233 Slice key;
234 uint32_t hash_value;
235
236 CacheKey() = default;
237
CacheKeyROCKSDB_NAMESPACE::__anon9da5ff120111::CacheKey238 CacheKey(const Slice& k, uint32_t h) {
239 key = k;
240 hash_value = h;
241 }
242
equalROCKSDB_NAMESPACE::__anon9da5ff120111::CacheKey243 static bool equal(const CacheKey& a, const CacheKey& b) {
244 return a.hash_value == b.hash_value && a.key == b.key;
245 }
246
hashROCKSDB_NAMESPACE::__anon9da5ff120111::CacheKey247 static size_t hash(const CacheKey& a) {
248 return static_cast<size_t>(a.hash_value);
249 }
250 };
251
252 struct CleanupContext {
253 // List of values to be deleted, along with the key and deleter.
254 autovector<CacheHandle> to_delete_value;
255
256 // List of keys to be deleted.
257 autovector<const char*> to_delete_key;
258 };
259
260 // A cache shard which maintains its own CLOCK cache.
261 class ClockCacheShard final : public CacheShard {
262 public:
263 // Hash map type.
264 typedef tbb::concurrent_hash_map<CacheKey, CacheHandle*, CacheKey> HashTable;
265
266 ClockCacheShard();
267 ~ClockCacheShard() override;
268
269 // Interfaces
270 void SetCapacity(size_t capacity) override;
271 void SetStrictCapacityLimit(bool strict_capacity_limit) override;
272 Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
273 Deleter* deleter, Cache::Handle** handle,
274 Cache::Priority priority) override;
275 Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
276 // If the entry in in cache, increase reference count and return true.
277 // Return false otherwise.
278 //
279 // Not necessary to hold mutex_ before being called.
280 bool Ref(Cache::Handle* handle) override;
281 bool Release(Cache::Handle* handle, bool force_erase = false) override;
282 void Erase(const Slice& key, uint32_t hash) override;
283 bool EraseAndConfirm(const Slice& key, uint32_t hash,
284 CleanupContext* context);
285 size_t GetUsage() const override;
286 size_t GetPinnedUsage() const override;
287 void EraseUnRefEntries() override;
288 void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
289 bool thread_safe) override;
290
291 private:
292 static const uint32_t kInCacheBit = 1;
293 static const uint32_t kUsageBit = 2;
294 static const uint32_t kRefsOffset = 2;
295 static const uint32_t kOneRef = 1 << kRefsOffset;
296
297 // Helper functions to extract cache handle flags and counters.
InCache(uint32_t flags)298 static bool InCache(uint32_t flags) { return flags & kInCacheBit; }
HasUsage(uint32_t flags)299 static bool HasUsage(uint32_t flags) { return flags & kUsageBit; }
CountRefs(uint32_t flags)300 static uint32_t CountRefs(uint32_t flags) { return flags >> kRefsOffset; }
301
302 // Decrease reference count of the entry. If this decreases the count to 0,
303 // recycle the entry. If set_usage is true, also set the usage bit.
304 //
305 // returns true if a value is erased.
306 //
307 // Not necessary to hold mutex_ before being called.
308 bool Unref(CacheHandle* handle, bool set_usage, CleanupContext* context);
309
310 // Unset in-cache bit of the entry. Recycle the handle if necessary.
311 //
312 // returns true if a value is erased.
313 //
314 // Has to hold mutex_ before being called.
315 bool UnsetInCache(CacheHandle* handle, CleanupContext* context);
316
317 // Put the handle back to recycle_ list, and put the value associated with
318 // it into to-be-deleted list. It doesn't cleanup the key as it might be
319 // reused by another handle.
320 //
321 // Has to hold mutex_ before being called.
322 void RecycleHandle(CacheHandle* handle, CleanupContext* context);
323
324 // Delete keys and values in to-be-deleted list. Call the method without
325 // holding mutex, as destructors can be expensive.
326 void Cleanup(const CleanupContext& context);
327
328 // Examine the handle for eviction. If the handle is in cache, usage bit is
329 // not set, and referece count is 0, evict it from cache. Otherwise unset
330 // the usage bit.
331 //
332 // Has to hold mutex_ before being called.
333 bool TryEvict(CacheHandle* value, CleanupContext* context);
334
335 // Scan through the circular list, evict entries until we get enough capacity
336 // for new cache entry of specific size. Return true if success, false
337 // otherwise.
338 //
339 // Has to hold mutex_ before being called.
340 bool EvictFromCache(size_t charge, CleanupContext* context);
341
342 CacheHandle* Insert(const Slice& key, uint32_t hash, void* value,
343 size_t charge, Deleter* deleter, bool hold_reference,
344 CleanupContext* context);
345
346 // Guards list_, head_, and recycle_. In addition, updating table_ also has
347 // to hold the mutex, to avoid the cache being in inconsistent state.
348 mutable port::Mutex mutex_;
349
350 // The circular list of cache handles. Initially the list is empty. Once a
351 // handle is needed by insertion, and no more handles are available in
352 // recycle bin, one more handle is appended to the end.
353 //
354 // We use std::deque for the circular list because we want to make sure
355 // pointers to handles are valid through out the life-cycle of the cache
356 // (in contrast to std::vector), and be able to grow the list (in contrast
357 // to statically allocated arrays).
358 std::deque<CacheHandle> list_;
359
360 // Pointer to the next handle in the circular list to be examine for
361 // eviction.
362 size_t head_;
363
364 // Recycle bin of cache handles.
365 autovector<CacheHandle*> recycle_;
366
367 // Maximum cache size.
368 std::atomic<size_t> capacity_;
369
370 // Current total size of the cache.
371 std::atomic<size_t> usage_;
372
373 // Total un-released cache size.
374 std::atomic<size_t> pinned_usage_;
375
376 // Whether allow insert into cache if cache is full.
377 std::atomic<bool> strict_capacity_limit_;
378
379 // Hash table (tbb::concurrent_hash_map) for lookup.
380 HashTable table_;
381 };
382
ClockCacheShard()383 ClockCacheShard::ClockCacheShard()
384 : head_(0), usage_(0), pinned_usage_(0), strict_capacity_limit_(false) {}
385
~ClockCacheShard()386 ClockCacheShard::~ClockCacheShard() {
387 for (auto& handle : list_) {
388 uint32_t flags = handle.flags.load(std::memory_order_relaxed);
389 if (InCache(flags) || CountRefs(flags) > 0) {
390 if (handle.deleter != nullptr) {
391 (*handle.deleter)(handle.key, handle.value);
392 }
393 delete[] handle.key.data();
394 }
395 }
396 }
397
GetUsage() const398 size_t ClockCacheShard::GetUsage() const {
399 return usage_.load(std::memory_order_relaxed);
400 }
401
GetPinnedUsage() const402 size_t ClockCacheShard::GetPinnedUsage() const {
403 return pinned_usage_.load(std::memory_order_relaxed);
404 }
405
ApplyToAllCacheEntries(void (* callback)(void *,size_t),bool thread_safe)406 void ClockCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
407 bool thread_safe) {
408 if (thread_safe) {
409 mutex_.Lock();
410 }
411 for (auto& handle : list_) {
412 // Use relaxed semantics instead of acquire semantics since we are either
413 // holding mutex, or don't have thread safe requirement.
414 uint32_t flags = handle.flags.load(std::memory_order_relaxed);
415 if (InCache(flags)) {
416 callback(handle.value, handle.charge);
417 }
418 }
419 if (thread_safe) {
420 mutex_.Unlock();
421 }
422 }
423
RecycleHandle(CacheHandle * handle,CleanupContext * context)424 void ClockCacheShard::RecycleHandle(CacheHandle* handle,
425 CleanupContext* context) {
426 mutex_.AssertHeld();
427 assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0);
428 context->to_delete_key.push_back(handle->key.data());
429 context->to_delete_value.emplace_back(*handle);
430 size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
431 handle->key.clear();
432 handle->value = nullptr;
433 handle->deleter = nullptr;
434 recycle_.push_back(handle);
435 usage_.fetch_sub(total_charge, std::memory_order_relaxed);
436 }
437
Cleanup(const CleanupContext & context)438 void ClockCacheShard::Cleanup(const CleanupContext& context) {
439 for (const CacheHandle& handle : context.to_delete_value) {
440 if (handle.deleter) {
441 (*handle.deleter)(handle.key, handle.value);
442 }
443 }
444 for (const char* key : context.to_delete_key) {
445 delete[] key;
446 }
447 }
448
Ref(Cache::Handle * h)449 bool ClockCacheShard::Ref(Cache::Handle* h) {
450 auto handle = reinterpret_cast<CacheHandle*>(h);
451 // CAS loop to increase reference count.
452 uint32_t flags = handle->flags.load(std::memory_order_relaxed);
453 while (InCache(flags)) {
454 // Use acquire semantics on success, as further operations on the cache
455 // entry has to be order after reference count is increased.
456 if (handle->flags.compare_exchange_weak(flags, flags + kOneRef,
457 std::memory_order_acquire,
458 std::memory_order_relaxed)) {
459 if (CountRefs(flags) == 0) {
460 // No reference count before the operation.
461 size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
462 pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
463 }
464 return true;
465 }
466 }
467 return false;
468 }
469
Unref(CacheHandle * handle,bool set_usage,CleanupContext * context)470 bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
471 CleanupContext* context) {
472 if (set_usage) {
473 handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed);
474 }
475 // Use acquire-release semantics as previous operations on the cache entry
476 // has to be order before reference count is decreased, and potential cleanup
477 // of the entry has to be order after.
478 uint32_t flags = handle->flags.fetch_sub(kOneRef, std::memory_order_acq_rel);
479 assert(CountRefs(flags) > 0);
480 if (CountRefs(flags) == 1) {
481 // this is the last reference.
482 size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
483 pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
484 // Cleanup if it is the last reference.
485 if (!InCache(flags)) {
486 MutexLock l(&mutex_);
487 RecycleHandle(handle, context);
488 }
489 }
490 return context->to_delete_value.size();
491 }
492
UnsetInCache(CacheHandle * handle,CleanupContext * context)493 bool ClockCacheShard::UnsetInCache(CacheHandle* handle,
494 CleanupContext* context) {
495 mutex_.AssertHeld();
496 // Use acquire-release semantics as previous operations on the cache entry
497 // has to be order before reference count is decreased, and potential cleanup
498 // of the entry has to be order after.
499 uint32_t flags =
500 handle->flags.fetch_and(~kInCacheBit, std::memory_order_acq_rel);
501 // Cleanup if it is the last reference.
502 if (InCache(flags) && CountRefs(flags) == 0) {
503 RecycleHandle(handle, context);
504 }
505 return context->to_delete_value.size();
506 }
507
TryEvict(CacheHandle * handle,CleanupContext * context)508 bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
509 mutex_.AssertHeld();
510 uint32_t flags = kInCacheBit;
511 if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire,
512 std::memory_order_relaxed)) {
513 bool erased __attribute__((__unused__)) =
514 table_.erase(CacheKey(handle->key, handle->hash));
515 assert(erased);
516 RecycleHandle(handle, context);
517 return true;
518 }
519 handle->flags.fetch_and(~kUsageBit, std::memory_order_relaxed);
520 return false;
521 }
522
EvictFromCache(size_t charge,CleanupContext * context)523 bool ClockCacheShard::EvictFromCache(size_t charge, CleanupContext* context) {
524 size_t usage = usage_.load(std::memory_order_relaxed);
525 size_t capacity = capacity_.load(std::memory_order_relaxed);
526 if (usage == 0) {
527 return charge <= capacity;
528 }
529 size_t new_head = head_;
530 bool second_iteration = false;
531 while (usage + charge > capacity) {
532 assert(new_head < list_.size());
533 if (TryEvict(&list_[new_head], context)) {
534 usage = usage_.load(std::memory_order_relaxed);
535 }
536 new_head = (new_head + 1 >= list_.size()) ? 0 : new_head + 1;
537 if (new_head == head_) {
538 if (second_iteration) {
539 return false;
540 } else {
541 second_iteration = true;
542 }
543 }
544 }
545 head_ = new_head;
546 return true;
547 }
548
SetCapacity(size_t capacity)549 void ClockCacheShard::SetCapacity(size_t capacity) {
550 CleanupContext context;
551 {
552 MutexLock l(&mutex_);
553 capacity_.store(capacity, std::memory_order_relaxed);
554 EvictFromCache(0, &context);
555 }
556 Cleanup(context);
557 }
558
SetStrictCapacityLimit(bool strict_capacity_limit)559 void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
560 strict_capacity_limit_.store(strict_capacity_limit,
561 std::memory_order_relaxed);
562 }
563
Insert(const Slice & key,uint32_t hash,void * value,size_t charge,Deleter * deleter,bool hold_reference,CleanupContext * context)564 CacheHandle* ClockCacheShard::Insert(const Slice& key, uint32_t hash,
565 void* value, size_t charge,
566 Deleter* deleter, bool hold_reference,
567 CleanupContext* context) {
568 size_t total_charge =
569 CacheHandle::CalcTotalCharge(key, charge, metadata_charge_policy_);
570 MutexLock l(&mutex_);
571 bool success = EvictFromCache(total_charge, context);
572 bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
573 if (!success && (strict || !hold_reference)) {
574 context->to_delete_key.push_back(key.data());
575 if (!hold_reference) {
576 context->to_delete_value.emplace_back(key, value, deleter);
577 }
578 return nullptr;
579 }
580 // Grab available handle from recycle bin. If recycle bin is empty, create
581 // and append new handle to end of circular list.
582 CacheHandle* handle = nullptr;
583 if (!recycle_.empty()) {
584 handle = recycle_.back();
585 recycle_.pop_back();
586 } else {
587 list_.emplace_back();
588 handle = &list_.back();
589 }
590 // Fill handle.
591 handle->key = key;
592 handle->hash = hash;
593 handle->value = value;
594 handle->charge = charge;
595 handle->deleter = deleter;
596 uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit;
597 handle->flags.store(flags, std::memory_order_relaxed);
598 HashTable::accessor accessor;
599 if (table_.find(accessor, CacheKey(key, hash))) {
600 CacheHandle* existing_handle = accessor->second;
601 table_.erase(accessor);
602 UnsetInCache(existing_handle, context);
603 }
604 table_.insert(HashTable::value_type(CacheKey(key, hash), handle));
605 if (hold_reference) {
606 pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
607 }
608 usage_.fetch_add(total_charge, std::memory_order_relaxed);
609 return handle;
610 }
611
Insert(const Slice & key,uint32_t hash,void * value,size_t charge,Deleter * deleter,Cache::Handle ** out_handle,Cache::Priority)612 Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
613 size_t charge, Deleter* deleter,
614 Cache::Handle** out_handle,
615 Cache::Priority /*priority*/) {
616 CleanupContext context;
617 HashTable::accessor accessor;
618 char* key_data = new char[key.size()];
619 memcpy(key_data, key.data(), key.size());
620 Slice key_copy(key_data, key.size());
621 CacheHandle* handle = Insert(key_copy, hash, value, charge, deleter,
622 out_handle != nullptr, &context);
623 Status s;
624 if (out_handle != nullptr) {
625 if (handle == nullptr) {
626 s = Status::Incomplete("Insert failed due to LRU cache being full.");
627 } else {
628 *out_handle = reinterpret_cast<Cache::Handle*>(handle);
629 }
630 }
631 Cleanup(context);
632 return s;
633 }
634
Lookup(const Slice & key,uint32_t hash)635 Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
636 HashTable::const_accessor accessor;
637 if (!table_.find(accessor, CacheKey(key, hash))) {
638 return nullptr;
639 }
640 CacheHandle* handle = accessor->second;
641 accessor.release();
642 // Ref() could fail if another thread sneak in and evict/erase the cache
643 // entry before we are able to hold reference.
644 if (!Ref(reinterpret_cast<Cache::Handle*>(handle))) {
645 return nullptr;
646 }
647 // Double check the key since the handle may now representing another key
648 // if other threads sneak in, evict/erase the entry and re-used the handle
649 // for another cache entry.
650 if (hash != handle->hash || key != handle->key) {
651 CleanupContext context;
652 Unref(handle, false, &context);
653 // It is possible Unref() delete the entry, so we need to cleanup.
654 Cleanup(context);
655 return nullptr;
656 }
657 return reinterpret_cast<Cache::Handle*>(handle);
658 }
659
Release(Cache::Handle * h,bool force_erase)660 bool ClockCacheShard::Release(Cache::Handle* h, bool force_erase) {
661 CleanupContext context;
662 CacheHandle* handle = reinterpret_cast<CacheHandle*>(h);
663 bool erased = Unref(handle, true, &context);
664 if (force_erase && !erased) {
665 erased = EraseAndConfirm(handle->key, handle->hash, &context);
666 }
667 Cleanup(context);
668 return erased;
669 }
670
Erase(const Slice & key,uint32_t hash)671 void ClockCacheShard::Erase(const Slice& key, uint32_t hash) {
672 CleanupContext context;
673 EraseAndConfirm(key, hash, &context);
674 Cleanup(context);
675 }
676
EraseAndConfirm(const Slice & key,uint32_t hash,CleanupContext * context)677 bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
678 CleanupContext* context) {
679 MutexLock l(&mutex_);
680 HashTable::accessor accessor;
681 bool erased = false;
682 if (table_.find(accessor, CacheKey(key, hash))) {
683 CacheHandle* handle = accessor->second;
684 table_.erase(accessor);
685 erased = UnsetInCache(handle, context);
686 }
687 return erased;
688 }
689
EraseUnRefEntries()690 void ClockCacheShard::EraseUnRefEntries() {
691 CleanupContext context;
692 {
693 MutexLock l(&mutex_);
694 table_.clear();
695 for (auto& handle : list_) {
696 UnsetInCache(&handle, &context);
697 }
698 }
699 Cleanup(context);
700 }
701
702 class ClockCache final : public ShardedCache {
703 public:
ClockCache(size_t capacity,int num_shard_bits,bool strict_capacity_limit,CacheMetadataChargePolicy metadata_charge_policy)704 ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
705 CacheMetadataChargePolicy metadata_charge_policy)
706 : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
707 int num_shards = 1 << num_shard_bits;
708 shards_ = new ClockCacheShard[num_shards];
709 for (int i = 0; i < num_shards; i++) {
710 shards_[i].set_metadata_charge_policy(metadata_charge_policy);
711 }
712 SetCapacity(capacity);
713 SetStrictCapacityLimit(strict_capacity_limit);
714 }
715
~ClockCache()716 ~ClockCache() override { delete[] shards_; }
717
Name() const718 const char* Name() const override { return "ClockCache"; }
719
GetShard(int shard)720 CacheShard* GetShard(int shard) override {
721 return reinterpret_cast<CacheShard*>(&shards_[shard]);
722 }
723
GetShard(int shard) const724 const CacheShard* GetShard(int shard) const override {
725 return reinterpret_cast<CacheShard*>(&shards_[shard]);
726 }
727
Value(Handle * handle)728 void* Value(Handle* handle) override {
729 return reinterpret_cast<const CacheHandle*>(handle)->value;
730 }
731
GetCharge(Handle * handle) const732 size_t GetCharge(Handle* handle) const override {
733 return reinterpret_cast<const CacheHandle*>(handle)->charge;
734 }
735
GetHash(Handle * handle) const736 uint32_t GetHash(Handle* handle) const override {
737 return reinterpret_cast<const CacheHandle*>(handle)->hash;
738 }
739
DisownData()740 void DisownData() override { shards_ = nullptr; }
741
742 private:
743 ClockCacheShard* shards_;
744 };
745
746 } // end anonymous namespace
747
NewClockCache(size_t capacity,int num_shard_bits,bool strict_capacity_limit,CacheMetadataChargePolicy metadata_charge_policy)748 std::shared_ptr<Cache> NewClockCache(
749 size_t capacity, int num_shard_bits, bool strict_capacity_limit,
750 CacheMetadataChargePolicy metadata_charge_policy) {
751 if (num_shard_bits < 0) {
752 num_shard_bits = GetDefaultCacheShardBits(capacity);
753 }
754 return std::make_shared<ClockCache>(
755 capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
756 }
757
758 } // namespace ROCKSDB_NAMESPACE
759
760 #endif // SUPPORT_CLOCK_CACHE
761