1 // Copyright (c) 2013, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 #pragma once 7 8 #ifndef ROCKSDB_LITE 9 10 #include <limits> 11 #include <list> 12 #include <map> 13 #include <string> 14 #include <vector> 15 16 #include "monitoring/histogram.h" 17 #include "rocksdb/env.h" 18 #include "rocksdb/persistent_cache.h" 19 #include "rocksdb/status.h" 20 21 // Persistent Cache 22 // 23 // Persistent cache is tiered key-value cache that can use persistent medium. It 24 // is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM. 25 // The code has been kept generic but significant benchmark/design/development 26 // time has been spent to make sure the cache performs appropriately for 27 // respective storage medium. 28 // The file defines 29 // PersistentCacheTier : Implementation that handles individual cache tier 30 // PersistentTieresCache : Implementation that handles all tiers as a logical 31 // unit 32 // 33 // PersistentTieredCache architecture: 34 // +--------------------------+ PersistentCacheTier that handles multiple tiers 35 // | +----------------+ | 36 // | | RAM | PersistentCacheTier that handles RAM (VolatileCacheImpl) 37 // | +----------------+ | 38 // | | next | 39 // | v | 40 // | +----------------+ | 41 // | | NVM | PersistentCacheTier implementation that handles NVM 42 // | +----------------+ (BlockCacheImpl) 43 // | | next | 44 // | V | 45 // | +----------------+ | 46 // | | LE-SSD | PersistentCacheTier implementation that handles LE-SSD 47 // | +----------------+ (BlockCacheImpl) 48 // | | | 49 // | V | 50 // | null | 51 // +--------------------------+ 52 // | 53 // V 54 // null 55 namespace ROCKSDB_NAMESPACE { 56 57 // Persistent Cache Config 58 // 59 // This struct captures all the options that are used to configure persistent 60 // cache. Some of the terminologies used in naming the options are 61 // 62 // dispatch size : 63 // This is the size in which IO is dispatched to the device 64 // 65 // write buffer size : 66 // This is the size of an individual write buffer size. Write buffers are 67 // grouped to form buffered file. 68 // 69 // cache size : 70 // This is the logical maximum for the cache size 71 // 72 // qdepth : 73 // This is the max number of IOs that can issues to the device in parallel 74 // 75 // pepeling : 76 // The writer code path follows pipelined architecture, which means the 77 // operations are handed off from one stage to another 78 // 79 // pipelining backlog size : 80 // With the pipelined architecture, there can always be backlogging of ops in 81 // pipeline queues. This is the maximum backlog size after which ops are dropped 82 // from queue 83 struct PersistentCacheConfig { 84 explicit PersistentCacheConfig( 85 Env* const _env, const std::string& _path, const uint64_t _cache_size, 86 const std::shared_ptr<Logger>& _log, 87 const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) { 88 env = _env; 89 path = _path; 90 log = _log; 91 cache_size = _cache_size; 92 writer_dispatch_size = write_buffer_size = _write_buffer_size; 93 } 94 95 // 96 // Validate the settings. Our intentions are to catch erroneous settings ahead 97 // of time instead going violating invariants or causing dead locks. 98 // ValidateSettingsPersistentCacheConfig99 Status ValidateSettings() const { 100 // (1) check pre-conditions for variables 101 if (!env || path.empty()) { 102 return Status::InvalidArgument("empty or null args"); 103 } 104 105 // (2) assert size related invariants 106 // - cache size cannot be less than cache file size 107 // - individual write buffer size cannot be greater than cache file size 108 // - total write buffer size cannot be less than 2X cache file size 109 if (cache_size < cache_file_size || write_buffer_size >= cache_file_size || 110 write_buffer_size * write_buffer_count() < 2 * cache_file_size) { 111 return Status::InvalidArgument("invalid cache size"); 112 } 113 114 // (2) check writer settings 115 // - Queue depth cannot be 0 116 // - writer_dispatch_size cannot be greater than writer_buffer_size 117 // - dispatch size and buffer size need to be aligned 118 if (!writer_qdepth || writer_dispatch_size > write_buffer_size || 119 write_buffer_size % writer_dispatch_size) { 120 return Status::InvalidArgument("invalid writer settings"); 121 } 122 123 return Status::OK(); 124 } 125 126 // 127 // Env abstraction to use for systmer level operations 128 // 129 Env* env; 130 131 // 132 // Path for the block cache where blocks are persisted 133 // 134 std::string path; 135 136 // 137 // Log handle for logging messages 138 // 139 std::shared_ptr<Logger> log; 140 141 // 142 // Enable direct IO for reading 143 // 144 bool enable_direct_reads = true; 145 146 // 147 // Enable direct IO for writing 148 // 149 bool enable_direct_writes = false; 150 151 // 152 // Logical cache size 153 // 154 uint64_t cache_size = std::numeric_limits<uint64_t>::max(); 155 156 // cache-file-size 157 // 158 // Cache consists of multiples of small files. This parameter defines the 159 // size of an individual cache file 160 // 161 // default: 1M 162 uint32_t cache_file_size = 100ULL * 1024 * 1024; 163 164 // writer-qdepth 165 // 166 // The writers can issues IO to the devices in parallel. This parameter 167 // controls the max number if IOs that can issues in parallel to the block 168 // device 169 // 170 // default :1 171 uint32_t writer_qdepth = 1; 172 173 // pipeline-writes 174 // 175 // The write optionally follow pipelined architecture. This helps 176 // avoid regression in the eviction code path of the primary tier. This 177 // parameter defines if pipelining is enabled or disabled 178 // 179 // default: true 180 bool pipeline_writes = true; 181 182 // max-write-pipeline-backlog-size 183 // 184 // Max pipeline buffer size. This is the maximum backlog we can accumulate 185 // while waiting for writes. After the limit, new ops will be dropped. 186 // 187 // Default: 1GiB 188 uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024; 189 190 // write-buffer-size 191 // 192 // This is the size in which buffer slabs are allocated. 193 // 194 // Default: 1M 195 uint32_t write_buffer_size = 1ULL * 1024 * 1024; 196 197 // write-buffer-count 198 // 199 // This is the total number of buffer slabs. This is calculated as a factor of 200 // file size in order to avoid dead lock. write_buffer_countPersistentCacheConfig201 size_t write_buffer_count() const { 202 assert(write_buffer_size); 203 return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size / 204 write_buffer_size); 205 } 206 207 // writer-dispatch-size 208 // 209 // The writer thread will dispatch the IO at the specified IO size 210 // 211 // default: 1M 212 uint64_t writer_dispatch_size = 1ULL * 1024 * 1024; 213 214 // is_compressed 215 // 216 // This option determines if the cache will run in compressed mode or 217 // uncompressed mode 218 bool is_compressed = true; 219 220 PersistentCacheConfig MakePersistentCacheConfig( 221 const std::string& path, const uint64_t size, 222 const std::shared_ptr<Logger>& log); 223 224 std::string ToString() const; 225 }; 226 227 // Persistent Cache Tier 228 // 229 // This a logical abstraction that defines a tier of the persistent cache. Tiers 230 // can be stacked over one another. PersistentCahe provides the basic definition 231 // for accessing/storing in the cache. PersistentCacheTier extends the interface 232 // to enable management and stacking of tiers. 233 class PersistentCacheTier : public PersistentCache { 234 public: 235 typedef std::shared_ptr<PersistentCacheTier> Tier; 236 ~PersistentCacheTier()237 virtual ~PersistentCacheTier() {} 238 239 // Open the persistent cache tier 240 virtual Status Open(); 241 242 // Close the persistent cache tier 243 virtual Status Close(); 244 245 // Reserve space up to 'size' bytes 246 virtual bool Reserve(const size_t size); 247 248 // Erase a key from the cache 249 virtual bool Erase(const Slice& key); 250 251 // Print stats to string recursively 252 virtual std::string PrintStats(); 253 254 virtual PersistentCache::StatsType Stats() override; 255 256 // Insert to page cache 257 virtual Status Insert(const Slice& page_key, const char* data, 258 const size_t size) override = 0; 259 260 // Lookup page cache by page identifier 261 virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data, 262 size_t* size) override = 0; 263 264 // Does it store compressed data ? 265 virtual bool IsCompressed() override = 0; 266 267 virtual std::string GetPrintableOptions() const override = 0; 268 269 // Return a reference to next tier next_tier()270 virtual Tier& next_tier() { return next_tier_; } 271 272 // Set the value for next tier set_next_tier(const Tier & tier)273 virtual void set_next_tier(const Tier& tier) { 274 assert(!next_tier_); 275 next_tier_ = tier; 276 } 277 TEST_Flush()278 virtual void TEST_Flush() { 279 if (next_tier_) { 280 next_tier_->TEST_Flush(); 281 } 282 } 283 284 private: 285 Tier next_tier_; // next tier 286 }; 287 288 // PersistentTieredCache 289 // 290 // Abstraction that helps you construct a tiers of persistent caches as a 291 // unified cache. The tier(s) of cache will act a single tier for management 292 // ease and support PersistentCache methods for accessing data. 293 class PersistentTieredCache : public PersistentCacheTier { 294 public: 295 virtual ~PersistentTieredCache(); 296 297 Status Open() override; 298 Status Close() override; 299 bool Erase(const Slice& key) override; 300 std::string PrintStats() override; 301 PersistentCache::StatsType Stats() override; 302 Status Insert(const Slice& page_key, const char* data, 303 const size_t size) override; 304 Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data, 305 size_t* size) override; 306 bool IsCompressed() override; 307 GetPrintableOptions()308 std::string GetPrintableOptions() const override { 309 return "PersistentTieredCache"; 310 } 311 312 void AddTier(const Tier& tier); 313 next_tier()314 Tier& next_tier() override { 315 auto it = tiers_.end(); 316 return (*it)->next_tier(); 317 } 318 set_next_tier(const Tier & tier)319 void set_next_tier(const Tier& tier) override { 320 auto it = tiers_.end(); 321 (*it)->set_next_tier(tier); 322 } 323 TEST_Flush()324 void TEST_Flush() override { 325 assert(!tiers_.empty()); 326 tiers_.front()->TEST_Flush(); 327 PersistentCacheTier::TEST_Flush(); 328 } 329 330 protected: 331 std::list<Tier> tiers_; // list of tiers top-down 332 }; 333 334 } // namespace ROCKSDB_NAMESPACE 335 336 #endif 337