1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <unordered_map>
9 
10 #include "cache/lru_cache.h"
11 #include "trace_replay/block_cache_tracer.h"
12 
13 namespace ROCKSDB_NAMESPACE {
14 
15 // A cache configuration provided by user.
16 struct CacheConfiguration {
17   std::string cache_name;  // LRU.
18   uint32_t num_shard_bits;
19   uint64_t ghost_cache_capacity;  // ghost cache capacity in bytes.
20   std::vector<uint64_t>
21       cache_capacities;  // simulate cache capacities in bytes.
22 
23   bool operator==(const CacheConfiguration& o) const {
24     return cache_name == o.cache_name && num_shard_bits == o.num_shard_bits &&
25            ghost_cache_capacity == o.ghost_cache_capacity;
26   }
27   bool operator<(const CacheConfiguration& o) const {
28     return cache_name < o.cache_name ||
29            (cache_name == o.cache_name && num_shard_bits < o.num_shard_bits) ||
30            (cache_name == o.cache_name && num_shard_bits == o.num_shard_bits &&
31             ghost_cache_capacity < o.ghost_cache_capacity);
32   }
33 };
34 
35 class MissRatioStats {
36  public:
reset_counter()37   void reset_counter() {
38     num_misses_ = 0;
39     num_accesses_ = 0;
40     user_accesses_ = 0;
41     user_misses_ = 0;
42   }
miss_ratio()43   double miss_ratio() const {
44     if (num_accesses_ == 0) {
45       return -1;
46     }
47     return static_cast<double>(num_misses_ * 100.0 / num_accesses_);
48   }
total_accesses()49   uint64_t total_accesses() const { return num_accesses_; }
total_misses()50   uint64_t total_misses() const { return num_misses_; }
51 
num_accesses_timeline()52   const std::map<uint64_t, uint64_t>& num_accesses_timeline() const {
53     return num_accesses_timeline_;
54   }
55 
num_misses_timeline()56   const std::map<uint64_t, uint64_t>& num_misses_timeline() const {
57     return num_misses_timeline_;
58   }
59 
user_miss_ratio()60   double user_miss_ratio() const {
61     if (user_accesses_ == 0) {
62       return -1;
63     }
64     return static_cast<double>(user_misses_ * 100.0 / user_accesses_);
65   }
user_accesses()66   uint64_t user_accesses() const { return user_accesses_; }
user_misses()67   uint64_t user_misses() const { return user_misses_; }
68 
69   void UpdateMetrics(uint64_t timestamp_in_ms, bool is_user_access,
70                      bool is_cache_miss);
71 
72  private:
73   uint64_t num_accesses_ = 0;
74   uint64_t num_misses_ = 0;
75   uint64_t user_accesses_ = 0;
76   uint64_t user_misses_ = 0;
77 
78   std::map<uint64_t, uint64_t> num_accesses_timeline_;
79   std::map<uint64_t, uint64_t> num_misses_timeline_;
80 };
81 
82 // A ghost cache admits an entry on its second access.
83 class GhostCache {
84  public:
85   explicit GhostCache(std::shared_ptr<Cache> sim_cache);
86   ~GhostCache() = default;
87   // No copy and move.
88   GhostCache(const GhostCache&) = delete;
89   GhostCache& operator=(const GhostCache&) = delete;
90   GhostCache(GhostCache&&) = delete;
91   GhostCache& operator=(GhostCache&&) = delete;
92 
93   // Returns true if the lookup_key is in the ghost cache.
94   // Returns false otherwise.
95   bool Admit(const Slice& lookup_key);
96 
97  private:
98   std::shared_ptr<Cache> sim_cache_;
99 };
100 
101 // A cache simulator that runs against a block cache trace.
102 class CacheSimulator {
103  public:
104   CacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
105                  std::shared_ptr<Cache> sim_cache);
106   virtual ~CacheSimulator() = default;
107   // No copy and move.
108   CacheSimulator(const CacheSimulator&) = delete;
109   CacheSimulator& operator=(const CacheSimulator&) = delete;
110   CacheSimulator(CacheSimulator&&) = delete;
111   CacheSimulator& operator=(CacheSimulator&&) = delete;
112 
113   virtual void Access(const BlockCacheTraceRecord& access);
114 
reset_counter()115   void reset_counter() { miss_ratio_stats_.reset_counter(); }
116 
miss_ratio_stats()117   const MissRatioStats& miss_ratio_stats() const { return miss_ratio_stats_; }
118 
119  protected:
120   MissRatioStats miss_ratio_stats_;
121   std::unique_ptr<GhostCache> ghost_cache_;
122   std::shared_ptr<Cache> sim_cache_;
123 };
124 
125 // A prioritized cache simulator that runs against a block cache trace.
126 // It inserts missing index/filter/uncompression-dictionary blocks with high
127 // priority in the cache.
128 class PrioritizedCacheSimulator : public CacheSimulator {
129  public:
PrioritizedCacheSimulator(std::unique_ptr<GhostCache> && ghost_cache,std::shared_ptr<Cache> sim_cache)130   PrioritizedCacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
131                             std::shared_ptr<Cache> sim_cache)
132       : CacheSimulator(std::move(ghost_cache), sim_cache) {}
133   void Access(const BlockCacheTraceRecord& access) override;
134 
135  protected:
136   // Access the key-value pair and returns true upon a cache miss.
137   void AccessKVPair(const Slice& key, uint64_t value_size,
138                     Cache::Priority priority,
139                     const BlockCacheTraceRecord& access, bool no_insert,
140                     bool is_user_access, bool* is_cache_miss, bool* admitted,
141                     bool update_metrics);
142 
143   Cache::Priority ComputeBlockPriority(
144       const BlockCacheTraceRecord& access) const;
145 };
146 
147 // A hybrid row and block cache simulator. It looks up/inserts key-value pairs
148 // referenced by Get/MultiGet requests, and not their accessed index/filter/data
149 // blocks.
150 //
151 // Upon a Get/MultiGet request, it looks up the referenced key first.
152 // If it observes a cache hit, future block accesses on this key-value pair is
153 // skipped since the request is served already. Otherwise, it continues to look
154 // up/insert its index/filter/data blocks. It also inserts the referenced
155 // key-value pair in the cache for future lookups.
156 class HybridRowBlockCacheSimulator : public PrioritizedCacheSimulator {
157  public:
HybridRowBlockCacheSimulator(std::unique_ptr<GhostCache> && ghost_cache,std::shared_ptr<Cache> sim_cache,bool insert_blocks_upon_row_kvpair_miss)158   HybridRowBlockCacheSimulator(std::unique_ptr<GhostCache>&& ghost_cache,
159                                std::shared_ptr<Cache> sim_cache,
160                                bool insert_blocks_upon_row_kvpair_miss)
161       : PrioritizedCacheSimulator(std::move(ghost_cache), sim_cache),
162         insert_blocks_upon_row_kvpair_miss_(
163             insert_blocks_upon_row_kvpair_miss) {}
164   void Access(const BlockCacheTraceRecord& access) override;
165 
166  private:
167   enum InsertResult : char {
168     INSERTED,
169     ADMITTED,
170     NO_INSERT,
171   };
172 
173   // We set is_complete to true when the referenced row-key of a get request
174   // hits the cache. If is_complete is true, we treat future accesses of this
175   // get request as hits.
176   //
177   // For each row key, it stores an enum. It is INSERTED when the
178   // kv-pair has been inserted into the cache, ADMITTED if it should be inserted
179   // but haven't been, NO_INSERT if it should not be inserted.
180   //
181   // A kv-pair is in ADMITTED state when we encounter this kv-pair but do not
182   // know its size. This may happen if the first access on the referenced key is
183   // an index/filter block.
184   struct GetRequestStatus {
185     bool is_complete = false;
186     std::map<std::string, InsertResult> row_key_status;
187   };
188 
189   // A map stores get_id to a map of row keys.
190   std::map<uint64_t, GetRequestStatus> getid_status_map_;
191   bool insert_blocks_upon_row_kvpair_miss_;
192 };
193 
194 // A block cache simulator that reports miss ratio curves given a set of cache
195 // configurations.
196 class BlockCacheTraceSimulator {
197  public:
198   // warmup_seconds: The number of seconds to warmup simulated caches. The
199   // hit/miss counters are reset after the warmup completes.
200   BlockCacheTraceSimulator(
201       uint64_t warmup_seconds, uint32_t downsample_ratio,
202       const std::vector<CacheConfiguration>& cache_configurations);
203   ~BlockCacheTraceSimulator() = default;
204   // No copy and move.
205   BlockCacheTraceSimulator(const BlockCacheTraceSimulator&) = delete;
206   BlockCacheTraceSimulator& operator=(const BlockCacheTraceSimulator&) = delete;
207   BlockCacheTraceSimulator(BlockCacheTraceSimulator&&) = delete;
208   BlockCacheTraceSimulator& operator=(BlockCacheTraceSimulator&&) = delete;
209 
210   Status InitializeCaches();
211 
212   void Access(const BlockCacheTraceRecord& access);
213 
214   const std::map<CacheConfiguration,
215                  std::vector<std::shared_ptr<CacheSimulator>>>&
sim_caches()216   sim_caches() const {
217     return sim_caches_;
218   }
219 
220  private:
221   const uint64_t warmup_seconds_;
222   const uint32_t downsample_ratio_;
223   const std::vector<CacheConfiguration> cache_configurations_;
224 
225   bool warmup_complete_ = false;
226   std::map<CacheConfiguration, std::vector<std::shared_ptr<CacheSimulator>>>
227       sim_caches_;
228   uint64_t trace_start_time_ = 0;
229 };
230 
231 }  // namespace ROCKSDB_NAMESPACE
232