1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
5 #include <assert.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8
9 #include "leveldb/cache.h"
10 #include "port/port.h"
11 #include "util/hash.h"
12 #include "util/mutexlock.h"
13
14 namespace leveldb {
15
~Cache()16 Cache::~Cache() {
17 }
18
19 namespace {
20
21 // LRU cache implementation
22 //
23 // Cache entries have an "in_cache" boolean indicating whether the cache has a
24 // reference on the entry. The only ways that this can become false without the
25 // entry being passed to its "deleter" are via Erase(), via Insert() when
26 // an element with a duplicate key is inserted, or on destruction of the cache.
27 //
28 // The cache keeps two linked lists of items in the cache. All items in the
29 // cache are in one list or the other, and never both. Items still referenced
30 // by clients but erased from the cache are in neither list. The lists are:
31 // - in-use: contains the items currently referenced by clients, in no
32 // particular order. (This list is used for invariant checking. If we
33 // removed the check, elements that would otherwise be on this list could be
34 // left as disconnected singleton lists.)
35 // - LRU: contains the items not currently referenced by clients, in LRU order
36 // Elements are moved between these lists by the Ref() and Unref() methods,
37 // when they detect an element in the cache acquiring or losing its only
38 // external reference.
39
40 // An entry is a variable length heap-allocated structure. Entries
41 // are kept in a circular doubly linked list ordered by access time.
42 struct LRUHandle {
43 void* value;
44 void (*deleter)(const Slice&, void* value);
45 LRUHandle* next_hash;
46 LRUHandle* next;
47 LRUHandle* prev;
48 size_t charge; // TODO(opt): Only allow uint32_t?
49 size_t key_length;
50 bool in_cache; // Whether entry is in the cache.
51 uint32_t refs; // References, including cache reference, if present.
52 uint32_t hash; // Hash of key(); used for fast sharding and comparisons
53 char key_data[1]; // Beginning of key
54
keyleveldb::__anon90b7ddab0111::LRUHandle55 Slice key() const {
56 // For cheaper lookups, we allow a temporary Handle object
57 // to store a pointer to a key in "value".
58 if (next == this) {
59 return *(reinterpret_cast<Slice*>(value));
60 } else {
61 return Slice(key_data, key_length);
62 }
63 }
64 };
65
66 // We provide our own simple hash table since it removes a whole bunch
67 // of porting hacks and is also faster than some of the built-in hash
68 // table implementations in some of the compiler/runtime combinations
69 // we have tested. E.g., readrandom speeds up by ~5% over the g++
70 // 4.4.3's builtin hashtable.
71 class HandleTable {
72 public:
HandleTable()73 HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); }
~HandleTable()74 ~HandleTable() { delete[] list_; }
75
Lookup(const Slice & key,uint32_t hash)76 LRUHandle* Lookup(const Slice& key, uint32_t hash) {
77 return *FindPointer(key, hash);
78 }
79
Insert(LRUHandle * h)80 LRUHandle* Insert(LRUHandle* h) {
81 LRUHandle** ptr = FindPointer(h->key(), h->hash);
82 LRUHandle* old = *ptr;
83 h->next_hash = (old == NULL ? NULL : old->next_hash);
84 *ptr = h;
85 if (old == NULL) {
86 ++elems_;
87 if (elems_ > length_) {
88 // Since each cache entry is fairly large, we aim for a small
89 // average linked list length (<= 1).
90 Resize();
91 }
92 }
93 return old;
94 }
95
Remove(const Slice & key,uint32_t hash)96 LRUHandle* Remove(const Slice& key, uint32_t hash) {
97 LRUHandle** ptr = FindPointer(key, hash);
98 LRUHandle* result = *ptr;
99 if (result != NULL) {
100 *ptr = result->next_hash;
101 --elems_;
102 }
103 return result;
104 }
105
106 private:
107 // The table consists of an array of buckets where each bucket is
108 // a linked list of cache entries that hash into the bucket.
109 uint32_t length_;
110 uint32_t elems_;
111 LRUHandle** list_;
112
113 // Return a pointer to slot that points to a cache entry that
114 // matches key/hash. If there is no such cache entry, return a
115 // pointer to the trailing slot in the corresponding linked list.
FindPointer(const Slice & key,uint32_t hash)116 LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
117 LRUHandle** ptr = &list_[hash & (length_ - 1)];
118 while (*ptr != NULL &&
119 ((*ptr)->hash != hash || key != (*ptr)->key())) {
120 ptr = &(*ptr)->next_hash;
121 }
122 return ptr;
123 }
124
Resize()125 void Resize() {
126 uint32_t new_length = 4;
127 while (new_length < elems_) {
128 new_length *= 2;
129 }
130 LRUHandle** new_list = new LRUHandle*[new_length];
131 memset(new_list, 0, sizeof(new_list[0]) * new_length);
132 uint32_t count = 0;
133 for (uint32_t i = 0; i < length_; i++) {
134 LRUHandle* h = list_[i];
135 while (h != NULL) {
136 LRUHandle* next = h->next_hash;
137 uint32_t hash = h->hash;
138 LRUHandle** ptr = &new_list[hash & (new_length - 1)];
139 h->next_hash = *ptr;
140 *ptr = h;
141 h = next;
142 count++;
143 }
144 }
145 assert(elems_ == count);
146 delete[] list_;
147 list_ = new_list;
148 length_ = new_length;
149 }
150 };
151
152 // A single shard of sharded cache.
153 class LRUCache {
154 public:
155 LRUCache();
156 ~LRUCache();
157
158 // Separate from constructor so caller can easily make an array of LRUCache
SetCapacity(size_t capacity)159 void SetCapacity(size_t capacity) { capacity_ = capacity; }
160
161 // Like Cache methods, but with an extra "hash" parameter.
162 Cache::Handle* Insert(const Slice& key, uint32_t hash,
163 void* value, size_t charge,
164 void (*deleter)(const Slice& key, void* value));
165 Cache::Handle* Lookup(const Slice& key, uint32_t hash);
166 void Release(Cache::Handle* handle);
167 void Erase(const Slice& key, uint32_t hash);
168 void Prune();
TotalCharge() const169 size_t TotalCharge() const {
170 MutexLock l(&mutex_);
171 return usage_;
172 }
173
174 private:
175 void LRU_Remove(LRUHandle* e);
176 void LRU_Append(LRUHandle*list, LRUHandle* e);
177 void Ref(LRUHandle* e);
178 void Unref(LRUHandle* e);
179 bool FinishErase(LRUHandle* e);
180
181 // Initialized before use.
182 size_t capacity_;
183
184 // mutex_ protects the following state.
185 mutable port::Mutex mutex_;
186 size_t usage_;
187
188 // Dummy head of LRU list.
189 // lru.prev is newest entry, lru.next is oldest entry.
190 // Entries have refs==1 and in_cache==true.
191 LRUHandle lru_;
192
193 // Dummy head of in-use list.
194 // Entries are in use by clients, and have refs >= 2 and in_cache==true.
195 LRUHandle in_use_;
196
197 HandleTable table_;
198 };
199
LRUCache()200 LRUCache::LRUCache()
201 : usage_(0) {
202 // Make empty circular linked lists.
203 lru_.next = &lru_;
204 lru_.prev = &lru_;
205 in_use_.next = &in_use_;
206 in_use_.prev = &in_use_;
207 }
208
~LRUCache()209 LRUCache::~LRUCache() {
210 assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle
211 for (LRUHandle* e = lru_.next; e != &lru_; ) {
212 LRUHandle* next = e->next;
213 assert(e->in_cache);
214 e->in_cache = false;
215 assert(e->refs == 1); // Invariant of lru_ list.
216 Unref(e);
217 e = next;
218 }
219 }
220
Ref(LRUHandle * e)221 void LRUCache::Ref(LRUHandle* e) {
222 if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list.
223 LRU_Remove(e);
224 LRU_Append(&in_use_, e);
225 }
226 e->refs++;
227 }
228
Unref(LRUHandle * e)229 void LRUCache::Unref(LRUHandle* e) {
230 assert(e->refs > 0);
231 e->refs--;
232 if (e->refs == 0) { // Deallocate.
233 assert(!e->in_cache);
234 (*e->deleter)(e->key(), e->value);
235 free(e);
236 } else if (e->in_cache && e->refs == 1) { // No longer in use; move to lru_ list.
237 LRU_Remove(e);
238 LRU_Append(&lru_, e);
239 }
240 }
241
LRU_Remove(LRUHandle * e)242 void LRUCache::LRU_Remove(LRUHandle* e) {
243 e->next->prev = e->prev;
244 e->prev->next = e->next;
245 }
246
LRU_Append(LRUHandle * list,LRUHandle * e)247 void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
248 // Make "e" newest entry by inserting just before *list
249 e->next = list;
250 e->prev = list->prev;
251 e->prev->next = e;
252 e->next->prev = e;
253 }
254
Lookup(const Slice & key,uint32_t hash)255 Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
256 MutexLock l(&mutex_);
257 LRUHandle* e = table_.Lookup(key, hash);
258 if (e != NULL) {
259 Ref(e);
260 }
261 return reinterpret_cast<Cache::Handle*>(e);
262 }
263
Release(Cache::Handle * handle)264 void LRUCache::Release(Cache::Handle* handle) {
265 MutexLock l(&mutex_);
266 Unref(reinterpret_cast<LRUHandle*>(handle));
267 }
268
Insert(const Slice & key,uint32_t hash,void * value,size_t charge,void (* deleter)(const Slice & key,void * value))269 Cache::Handle* LRUCache::Insert(
270 const Slice& key, uint32_t hash, void* value, size_t charge,
271 void (*deleter)(const Slice& key, void* value)) {
272 MutexLock l(&mutex_);
273
274 LRUHandle* e = reinterpret_cast<LRUHandle*>(
275 malloc(sizeof(LRUHandle)-1 + key.size()));
276 e->value = value;
277 e->deleter = deleter;
278 e->charge = charge;
279 e->key_length = key.size();
280 e->hash = hash;
281 e->in_cache = false;
282 e->refs = 1; // for the returned handle.
283 memcpy(e->key_data, key.data(), key.size());
284
285 if (capacity_ > 0) {
286 e->refs++; // for the cache's reference.
287 e->in_cache = true;
288 LRU_Append(&in_use_, e);
289 usage_ += charge;
290 FinishErase(table_.Insert(e));
291 } // else don't cache. (Tests use capacity_==0 to turn off caching.)
292
293 while (usage_ > capacity_ && lru_.next != &lru_) {
294 LRUHandle* old = lru_.next;
295 assert(old->refs == 1);
296 bool erased = FinishErase(table_.Remove(old->key(), old->hash));
297 if (!erased) { // to avoid unused variable when compiled NDEBUG
298 assert(erased);
299 }
300 }
301
302 return reinterpret_cast<Cache::Handle*>(e);
303 }
304
305 // If e != NULL, finish removing *e from the cache; it has already been removed
306 // from the hash table. Return whether e != NULL. Requires mutex_ held.
FinishErase(LRUHandle * e)307 bool LRUCache::FinishErase(LRUHandle* e) {
308 if (e != NULL) {
309 assert(e->in_cache);
310 LRU_Remove(e);
311 e->in_cache = false;
312 usage_ -= e->charge;
313 Unref(e);
314 }
315 return e != NULL;
316 }
317
Erase(const Slice & key,uint32_t hash)318 void LRUCache::Erase(const Slice& key, uint32_t hash) {
319 MutexLock l(&mutex_);
320 FinishErase(table_.Remove(key, hash));
321 }
322
Prune()323 void LRUCache::Prune() {
324 MutexLock l(&mutex_);
325 while (lru_.next != &lru_) {
326 LRUHandle* e = lru_.next;
327 assert(e->refs == 1);
328 bool erased = FinishErase(table_.Remove(e->key(), e->hash));
329 if (!erased) { // to avoid unused variable when compiled NDEBUG
330 assert(erased);
331 }
332 }
333 }
334
335 static const int kNumShardBits = 4;
336 static const int kNumShards = 1 << kNumShardBits;
337
338 class ShardedLRUCache : public Cache {
339 private:
340 LRUCache shard_[kNumShards];
341 port::Mutex id_mutex_;
342 uint64_t last_id_;
343
HashSlice(const Slice & s)344 static inline uint32_t HashSlice(const Slice& s) {
345 return Hash(s.data(), s.size(), 0);
346 }
347
Shard(uint32_t hash)348 static uint32_t Shard(uint32_t hash) {
349 return hash >> (32 - kNumShardBits);
350 }
351
352 public:
ShardedLRUCache(size_t capacity)353 explicit ShardedLRUCache(size_t capacity)
354 : last_id_(0) {
355 const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
356 for (int s = 0; s < kNumShards; s++) {
357 shard_[s].SetCapacity(per_shard);
358 }
359 }
~ShardedLRUCache()360 virtual ~ShardedLRUCache() { }
Insert(const Slice & key,void * value,size_t charge,void (* deleter)(const Slice & key,void * value))361 virtual Handle* Insert(const Slice& key, void* value, size_t charge,
362 void (*deleter)(const Slice& key, void* value)) {
363 const uint32_t hash = HashSlice(key);
364 return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
365 }
Lookup(const Slice & key)366 virtual Handle* Lookup(const Slice& key) {
367 const uint32_t hash = HashSlice(key);
368 return shard_[Shard(hash)].Lookup(key, hash);
369 }
Release(Handle * handle)370 virtual void Release(Handle* handle) {
371 LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
372 shard_[Shard(h->hash)].Release(handle);
373 }
Erase(const Slice & key)374 virtual void Erase(const Slice& key) {
375 const uint32_t hash = HashSlice(key);
376 shard_[Shard(hash)].Erase(key, hash);
377 }
Value(Handle * handle)378 virtual void* Value(Handle* handle) {
379 return reinterpret_cast<LRUHandle*>(handle)->value;
380 }
NewId()381 virtual uint64_t NewId() {
382 MutexLock l(&id_mutex_);
383 return ++(last_id_);
384 }
Prune()385 virtual void Prune() {
386 for (int s = 0; s < kNumShards; s++) {
387 shard_[s].Prune();
388 }
389 }
TotalCharge() const390 virtual size_t TotalCharge() const {
391 size_t total = 0;
392 for (int s = 0; s < kNumShards; s++) {
393 total += shard_[s].TotalCharge();
394 }
395 return total;
396 }
397 };
398
399 } // end anonymous namespace
400
NewLRUCache(size_t capacity)401 Cache* NewLRUCache(size_t capacity) {
402 return new ShardedLRUCache(capacity);
403 }
404
405 } // namespace leveldb
406