1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #pragma once
11 #ifndef ROCKSDB_LITE
12 
13 #include <cinttypes>
14 #include <functional>
15 #include <map>
16 #include <string>
17 #include <vector>
18 
19 #include "rocksdb/utilities/stackable_db.h"
20 
21 #include "rocksdb/env.h"
22 #include "rocksdb/options.h"
23 #include "rocksdb/status.h"
24 
25 namespace ROCKSDB_NAMESPACE {
26 
27 struct BackupableDBOptions {
28   // Where to keep the backup files. Has to be different than dbname_
29   // Best to set this to dbname_ + "/backups"
30   // Required
31   std::string backup_dir;
32 
33   // Backup Env object. It will be used for backup file I/O. If it's
34   // nullptr, backups will be written out using DBs Env. If it's
35   // non-nullptr, backup's I/O will be performed using this object.
36   // If you want to have backups on HDFS, use HDFS Env here!
37   // Default: nullptr
38   Env* backup_env;
39 
40   // If share_table_files == true, backup will assume that table files with
41   // same name have the same contents. This enables incremental backups and
42   // avoids unnecessary data copies.
43   // If share_table_files == false, each backup will be on its own and will
44   // not share any data with other backups.
45   // default: true
46   bool share_table_files;
47 
48   // Backup info and error messages will be written to info_log
49   // if non-nullptr.
50   // Default: nullptr
51   Logger* info_log;
52 
53   // If sync == true, we can guarantee you'll get consistent backup even
54   // on a machine crash/reboot. Backup process is slower with sync enabled.
55   // If sync == false, we don't guarantee anything on machine reboot. However,
56   // chances are some of the backups are consistent.
57   // Default: true
58   bool sync;
59 
60   // If true, it will delete whatever backups there are already
61   // Default: false
62   bool destroy_old_data;
63 
64   // If false, we won't backup log files. This option can be useful for backing
65   // up in-memory databases where log file are persisted, but table files are in
66   // memory.
67   // Default: true
68   bool backup_log_files;
69 
70   // Max bytes that can be transferred in a second during backup.
71   // If 0, go as fast as you can
72   // Default: 0
73   uint64_t backup_rate_limit;
74 
75   // Backup rate limiter. Used to control transfer speed for backup. If this is
76   // not null, backup_rate_limit is ignored.
77   // Default: nullptr
78   std::shared_ptr<RateLimiter> backup_rate_limiter{nullptr};
79 
80   // Max bytes that can be transferred in a second during restore.
81   // If 0, go as fast as you can
82   // Default: 0
83   uint64_t restore_rate_limit;
84 
85   // Restore rate limiter. Used to control transfer speed during restore. If
86   // this is not null, restore_rate_limit is ignored.
87   // Default: nullptr
88   std::shared_ptr<RateLimiter> restore_rate_limiter{nullptr};
89 
90   // Only used if share_table_files is set to true. If true, will consider that
91   // backups can come from different databases, hence a sst is not uniquely
92   // identifed by its name, but by the triple (file name, crc32, file length)
93   // Default: false
94   // Note: this is an experimental option, and you'll need to set it manually
95   // *turn it on only if you know what you're doing*
96   bool share_files_with_checksum;
97 
98   // Up to this many background threads will copy files for CreateNewBackup()
99   // and RestoreDBFromBackup()
100   // Default: 1
101   int max_background_operations;
102 
103   // During backup user can get callback every time next
104   // callback_trigger_interval_size bytes being copied.
105   // Default: 4194304
106   uint64_t callback_trigger_interval_size;
107 
108   // For BackupEngineReadOnly, Open() will open at most this many of the
109   // latest non-corrupted backups.
110   //
111   // Note: this setting is ignored (behaves like INT_MAX) for any kind of
112   // writable BackupEngine because it would inhibit accounting for shared
113   // files for proper backup deletion, including purging any incompletely
114   // created backups on creation of a new backup.
115   //
116   // Default: INT_MAX
117   int max_valid_backups_to_open;
118 
119   void Dump(Logger* logger) const;
120 
121   explicit BackupableDBOptions(
122       const std::string& _backup_dir, Env* _backup_env = nullptr,
123       bool _share_table_files = true, Logger* _info_log = nullptr,
124       bool _sync = true, bool _destroy_old_data = false,
125       bool _backup_log_files = true, uint64_t _backup_rate_limit = 0,
126       uint64_t _restore_rate_limit = 0, int _max_background_operations = 1,
127       uint64_t _callback_trigger_interval_size = 4 * 1024 * 1024,
128       int _max_valid_backups_to_open = INT_MAX)
backup_dirBackupableDBOptions129       : backup_dir(_backup_dir),
130         backup_env(_backup_env),
131         share_table_files(_share_table_files),
132         info_log(_info_log),
133         sync(_sync),
134         destroy_old_data(_destroy_old_data),
135         backup_log_files(_backup_log_files),
136         backup_rate_limit(_backup_rate_limit),
137         restore_rate_limit(_restore_rate_limit),
138         share_files_with_checksum(false),
139         max_background_operations(_max_background_operations),
140         callback_trigger_interval_size(_callback_trigger_interval_size),
141         max_valid_backups_to_open(_max_valid_backups_to_open) {
142     assert(share_table_files || !share_files_with_checksum);
143   }
144 };
145 
146 struct CreateBackupOptions {
147   // Flush will always trigger if 2PC is enabled.
148   // If write-ahead logs are disabled, set flush_before_backup=true to
149   // avoid losing unflushed key/value pairs from the memtable.
150   bool flush_before_backup = false;
151 
152   // Callback for reporting progress.
153   std::function<void()> progress_callback = []() {};
154 
155   // If false, background_thread_cpu_priority is ignored.
156   // Otherwise, the cpu priority can be decreased,
157   // if you try to increase the priority, the priority will not change.
158   // The initial priority of the threads is CpuPriority::kNormal,
159   // so you can decrease to priorities lower than kNormal.
160   bool decrease_background_thread_cpu_priority = false;
161   CpuPriority background_thread_cpu_priority = CpuPriority::kNormal;
162 };
163 
164 struct RestoreOptions {
165   // If true, restore won't overwrite the existing log files in wal_dir. It will
166   // also move all log files from archive directory to wal_dir. Use this option
167   // in combination with BackupableDBOptions::backup_log_files = false for
168   // persisting in-memory databases.
169   // Default: false
170   bool keep_log_files;
171 
172   explicit RestoreOptions(bool _keep_log_files = false)
keep_log_filesRestoreOptions173       : keep_log_files(_keep_log_files) {}
174 };
175 
176 typedef uint32_t BackupID;
177 
178 struct BackupInfo {
179   BackupID backup_id;
180   int64_t timestamp;
181   uint64_t size;
182 
183   uint32_t number_files;
184   std::string app_metadata;
185 
BackupInfoBackupInfo186   BackupInfo() {}
187 
BackupInfoBackupInfo188   BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
189              uint32_t _number_files, const std::string& _app_metadata)
190       : backup_id(_backup_id),
191         timestamp(_timestamp),
192         size(_size),
193         number_files(_number_files),
194         app_metadata(_app_metadata) {}
195 };
196 
197 class BackupStatistics {
198  public:
BackupStatistics()199   BackupStatistics() {
200     number_success_backup = 0;
201     number_fail_backup = 0;
202   }
203 
BackupStatistics(uint32_t _number_success_backup,uint32_t _number_fail_backup)204   BackupStatistics(uint32_t _number_success_backup,
205                    uint32_t _number_fail_backup)
206       : number_success_backup(_number_success_backup),
207         number_fail_backup(_number_fail_backup) {}
208 
~BackupStatistics()209   ~BackupStatistics() {}
210 
211   void IncrementNumberSuccessBackup();
212   void IncrementNumberFailBackup();
213 
214   uint32_t GetNumberSuccessBackup() const;
215   uint32_t GetNumberFailBackup() const;
216 
217   std::string ToString() const;
218 
219  private:
220   uint32_t number_success_backup;
221   uint32_t number_fail_backup;
222 };
223 
224 // A backup engine for accessing information about backups and restoring from
225 // them.
226 class BackupEngineReadOnly {
227  public:
~BackupEngineReadOnly()228   virtual ~BackupEngineReadOnly() {}
229 
230   static Status Open(const BackupableDBOptions& options, Env* db_env,
231                      BackupEngineReadOnly** backup_engine_ptr);
232   // keep for backward compatibility.
Open(Env * db_env,const BackupableDBOptions & options,BackupEngineReadOnly ** backup_engine_ptr)233   static Status Open(Env* db_env, const BackupableDBOptions& options,
234                      BackupEngineReadOnly** backup_engine_ptr) {
235     return BackupEngineReadOnly::Open(options, db_env, backup_engine_ptr);
236   }
237 
238   // Returns info about backups in backup_info
239   // You can GetBackupInfo safely, even with other BackupEngine performing
240   // backups on the same directory
241   virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
242 
243   // Returns info about corrupt backups in corrupt_backups
244   virtual void GetCorruptedBackups(
245       std::vector<BackupID>* corrupt_backup_ids) = 0;
246 
247   // Restoring DB from backup is NOT safe when there is another BackupEngine
248   // running that might call DeleteBackup() or PurgeOldBackups(). It is caller's
249   // responsibility to synchronize the operation, i.e. don't delete the backup
250   // when you're restoring from it
251   // See also the corresponding doc in BackupEngine
252   virtual Status RestoreDBFromBackup(const RestoreOptions& options,
253                                      BackupID backup_id,
254                                      const std::string& db_dir,
255                                      const std::string& wal_dir) = 0;
256 
257   // keep for backward compatibility.
258   virtual Status RestoreDBFromBackup(
259       BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
260       const RestoreOptions& options = RestoreOptions()) {
261     return RestoreDBFromBackup(options, backup_id, db_dir, wal_dir);
262   }
263 
264   // See the corresponding doc in BackupEngine
265   virtual Status RestoreDBFromLatestBackup(const RestoreOptions& options,
266                                            const std::string& db_dir,
267                                            const std::string& wal_dir) = 0;
268 
269   // keep for backward compatibility.
270   virtual Status RestoreDBFromLatestBackup(
271       const std::string& db_dir, const std::string& wal_dir,
272       const RestoreOptions& options = RestoreOptions()) {
273     return RestoreDBFromLatestBackup(options, db_dir, wal_dir);
274   }
275 
276   // checks that each file exists and that the size of the file matches our
277   // expectations. it does not check file checksum.
278   //
279   // If this BackupEngine created the backup, it compares the files' current
280   // sizes against the number of bytes written to them during creation.
281   // Otherwise, it compares the files' current sizes against their sizes when
282   // the BackupEngine was opened.
283   //
284   // Returns Status::OK() if all checks are good
285   virtual Status VerifyBackup(BackupID backup_id) = 0;
286 };
287 
288 // A backup engine for creating new backups.
289 class BackupEngine {
290  public:
~BackupEngine()291   virtual ~BackupEngine() {}
292 
293   // BackupableDBOptions have to be the same as the ones used in previous
294   // BackupEngines for the same backup directory.
295   static Status Open(const BackupableDBOptions& options, Env* db_env,
296                      BackupEngine** backup_engine_ptr);
297 
298   // keep for backward compatibility.
Open(Env * db_env,const BackupableDBOptions & options,BackupEngine ** backup_engine_ptr)299   static Status Open(Env* db_env, const BackupableDBOptions& options,
300                      BackupEngine** backup_engine_ptr) {
301     return BackupEngine::Open(options, db_env, backup_engine_ptr);
302   }
303 
304   // same as CreateNewBackup, but stores extra application metadata.
305   virtual Status CreateNewBackupWithMetadata(
306       const CreateBackupOptions& options, DB* db,
307       const std::string& app_metadata) = 0;
308 
309   // keep here for backward compatibility.
310   virtual Status CreateNewBackupWithMetadata(
311       DB* db, const std::string& app_metadata, bool flush_before_backup = false,
312       std::function<void()> progress_callback = []() {}) {
313     CreateBackupOptions options;
314     options.flush_before_backup = flush_before_backup;
315     options.progress_callback = progress_callback;
316     return CreateNewBackupWithMetadata(options, db, app_metadata);
317   }
318 
319   // Captures the state of the database in the latest backup
320   // NOT a thread safe call
CreateNewBackup(const CreateBackupOptions & options,DB * db)321   virtual Status CreateNewBackup(const CreateBackupOptions& options, DB* db) {
322     return CreateNewBackupWithMetadata(options, db, "");
323   }
324 
325   // keep here for backward compatibility.
326   virtual Status CreateNewBackup(DB* db, bool flush_before_backup = false,
327                                  std::function<void()> progress_callback =
328                                      []() {}) {
329     CreateBackupOptions options;
330     options.flush_before_backup = flush_before_backup;
331     options.progress_callback = progress_callback;
332     return CreateNewBackup(options, db);
333   }
334 
335   // Deletes old backups, keeping latest num_backups_to_keep alive.
336   // See also DeleteBackup.
337   virtual Status PurgeOldBackups(uint32_t num_backups_to_keep) = 0;
338 
339   // Deletes a specific backup. If this operation (or PurgeOldBackups)
340   // is not completed due to crash, power failure, etc. the state
341   // will be cleaned up the next time you call DeleteBackup,
342   // PurgeOldBackups, or GarbageCollect.
343   virtual Status DeleteBackup(BackupID backup_id) = 0;
344 
345   // Call this from another thread if you want to stop the backup
346   // that is currently happening. It will return immediatelly, will
347   // not wait for the backup to stop.
348   // The backup will stop ASAP and the call to CreateNewBackup will
349   // return Status::Incomplete(). It will not clean up after itself, but
350   // the state will remain consistent. The state will be cleaned up the
351   // next time you call CreateNewBackup or GarbageCollect.
352   virtual void StopBackup() = 0;
353 
354   // Returns info about backups in backup_info
355   virtual void GetBackupInfo(std::vector<BackupInfo>* backup_info) = 0;
356 
357   // Returns info about corrupt backups in corrupt_backups
358   virtual void GetCorruptedBackups(
359       std::vector<BackupID>* corrupt_backup_ids) = 0;
360 
361   // restore from backup with backup_id
362   // IMPORTANT -- if options_.share_table_files == true,
363   // options_.share_files_with_checksum == false, you restore DB from some
364   // backup that is not the latest, and you start creating new backups from the
365   // new DB, they will probably fail.
366   //
367   // Example: Let's say you have backups 1, 2, 3, 4, 5 and you restore 3.
368   // If you add new data to the DB and try creating a new backup now, the
369   // database will diverge from backups 4 and 5 and the new backup will fail.
370   // If you want to create new backup, you will first have to delete backups 4
371   // and 5.
372   virtual Status RestoreDBFromBackup(const RestoreOptions& options,
373                                      BackupID backup_id,
374                                      const std::string& db_dir,
375                                      const std::string& wal_dir) = 0;
376 
377   // keep for backward compatibility.
378   virtual Status RestoreDBFromBackup(
379       BackupID backup_id, const std::string& db_dir, const std::string& wal_dir,
380       const RestoreOptions& options = RestoreOptions()) {
381     return RestoreDBFromBackup(options, backup_id, db_dir, wal_dir);
382   }
383 
384   // restore from the latest backup
385   virtual Status RestoreDBFromLatestBackup(const RestoreOptions& options,
386                                            const std::string& db_dir,
387                                            const std::string& wal_dir) = 0;
388 
389   // keep for backward compatibility.
390   virtual Status RestoreDBFromLatestBackup(
391       const std::string& db_dir, const std::string& wal_dir,
392       const RestoreOptions& options = RestoreOptions()) {
393     return RestoreDBFromLatestBackup(options, db_dir, wal_dir);
394   }
395 
396   // checks that each file exists and that the size of the file matches our
397   // expectations. it does not check file checksum.
398   // Returns Status::OK() if all checks are good
399   virtual Status VerifyBackup(BackupID backup_id) = 0;
400 
401   // Will delete any files left over from incomplete creation or deletion of
402   // a backup. This is not normally needed as those operations also clean up
403   // after prior incomplete calls to the same kind of operation (create or
404   // delete).
405   // NOTE: This is not designed to delete arbitrary files added to the backup
406   // directory outside of BackupEngine, and clean-up is always subject to
407   // permissions on and availability of the underlying filesystem.
408   virtual Status GarbageCollect() = 0;
409 };
410 
411 }  // namespace ROCKSDB_NAMESPACE
412 #endif  // ROCKSDB_LITE
413