1 // Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include "rocksdb/status.h"
9 
10 #include <memory>
11 
12 namespace ROCKSDB_NAMESPACE {
13 
14 // MemoryAllocator is an interface that a client can implement to supply custom
15 // memory allocation and deallocation methods. See rocksdb/cache.h for more
16 // information.
17 // All methods should be thread-safe.
18 class MemoryAllocator {
19  public:
20   virtual ~MemoryAllocator() = default;
21 
22   // Name of the cache allocator, printed in the log
23   virtual const char* Name() const = 0;
24 
25   // Allocate a block of at least size. Has to be thread-safe.
26   virtual void* Allocate(size_t size) = 0;
27 
28   // Deallocate previously allocated block. Has to be thread-safe.
29   virtual void Deallocate(void* p) = 0;
30 
31   // Returns the memory size of the block allocated at p. The default
32   // implementation that just returns the original allocation_size is fine.
UsableSize(void *,size_t allocation_size)33   virtual size_t UsableSize(void* /*p*/, size_t allocation_size) const {
34     // default implementation just returns the allocation size
35     return allocation_size;
36   }
37 };
38 
39 struct JemallocAllocatorOptions {
40   // Jemalloc tcache cache allocations by size class. For each size class,
41   // it caches between 20 (for large size classes) to 200 (for small size
42   // classes). To reduce tcache memory usage in case the allocator is access
43   // by large number of threads, we can control whether to cache an allocation
44   // by its size.
45   bool limit_tcache_size = false;
46 
47   // Lower bound of allocation size to use tcache, if limit_tcache_size=true.
48   // When used with block cache, it is recommneded to set it to block_size/4.
49   size_t tcache_size_lower_bound = 1024;
50 
51   // Upper bound of allocation size to use tcache, if limit_tcache_size=true.
52   // When used with block cache, it is recommneded to set it to block_size.
53   size_t tcache_size_upper_bound = 16 * 1024;
54 };
55 
56 // Generate memory allocators which allocates through Jemalloc and utilize
57 // MADV_DONTDUMP through madvice to exclude cache items from core dump.
58 // Applications can use the allocator with block cache to exclude block cache
59 // usage from core dump.
60 //
61 // Implementation details:
62 // The JemallocNodumpAllocator creates a delicated jemalloc arena, and all
63 // allocations of the JemallocNodumpAllocator is through the same arena.
64 // The memory allocator hooks memory allocation of the arena, and call
65 // madvice() with MADV_DONTDUMP flag to exclude the piece of memory from
66 // core dump. Side benefit of using single arena would be reduce of jemalloc
67 // metadata for some workload.
68 //
69 // To mitigate mutex contention for using one single arena, jemalloc tcache
70 // (thread-local cache) is enabled to cache unused allocations for future use.
71 // The tcache normally incur 0.5M extra memory usage per-thread. The usage
72 // can be reduce by limitting allocation sizes to cache.
73 extern Status NewJemallocNodumpAllocator(
74     JemallocAllocatorOptions& options,
75     std::shared_ptr<MemoryAllocator>* memory_allocator);
76 
77 }  // namespace ROCKSDB_NAMESPACE
78