1 //  Copyright (c) 2018-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 #include "db/error_handler.h"
7 #include "db/db_impl/db_impl.h"
8 #include "db/event_helpers.h"
9 #include "file/sst_file_manager_impl.h"
10 
11 namespace ROCKSDB_NAMESPACE {
12 
13 // Maps to help decide the severity of an error based on the
14 // BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks
15 // is set or not. There are 3 maps, going from most specific to least specific
16 // (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and
17 // paranoid_checks). The less specific map serves as a catch all in case we miss
18 // a specific error code or subcode.
19 std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>,
20          Status::Severity>
21     ErrorSeverityMap = {
22         // Errors during BG compaction
23         {std::make_tuple(BackgroundErrorReason::kCompaction,
24                          Status::Code::kIOError, Status::SubCode::kNoSpace,
25                          true),
26          Status::Severity::kSoftError},
27         {std::make_tuple(BackgroundErrorReason::kCompaction,
28                          Status::Code::kIOError, Status::SubCode::kNoSpace,
29                          false),
30          Status::Severity::kNoError},
31         {std::make_tuple(BackgroundErrorReason::kCompaction,
32                          Status::Code::kIOError, Status::SubCode::kSpaceLimit,
33                          true),
34          Status::Severity::kHardError},
35         // Errors during BG flush
36         {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
37                          Status::SubCode::kNoSpace, true),
38          Status::Severity::kHardError},
39         {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
40                          Status::SubCode::kNoSpace, false),
41          Status::Severity::kNoError},
42         {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
43                          Status::SubCode::kSpaceLimit, true),
44          Status::Severity::kHardError},
45         // Errors during Write
46         {std::make_tuple(BackgroundErrorReason::kWriteCallback,
47                          Status::Code::kIOError, Status::SubCode::kNoSpace,
48                          true),
49          Status::Severity::kHardError},
50         {std::make_tuple(BackgroundErrorReason::kWriteCallback,
51                          Status::Code::kIOError, Status::SubCode::kNoSpace,
52                          false),
53          Status::Severity::kHardError},
54 };
55 
56 std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>, Status::Severity>
57     DefaultErrorSeverityMap = {
58         // Errors during BG compaction
59         {std::make_tuple(BackgroundErrorReason::kCompaction,
60                          Status::Code::kCorruption, true),
61          Status::Severity::kUnrecoverableError},
62         {std::make_tuple(BackgroundErrorReason::kCompaction,
63                          Status::Code::kCorruption, false),
64          Status::Severity::kNoError},
65         {std::make_tuple(BackgroundErrorReason::kCompaction,
66                          Status::Code::kIOError, true),
67          Status::Severity::kFatalError},
68         {std::make_tuple(BackgroundErrorReason::kCompaction,
69                          Status::Code::kIOError, false),
70          Status::Severity::kNoError},
71         // Errors during BG flush
72         {std::make_tuple(BackgroundErrorReason::kFlush,
73                          Status::Code::kCorruption, true),
74          Status::Severity::kUnrecoverableError},
75         {std::make_tuple(BackgroundErrorReason::kFlush,
76                          Status::Code::kCorruption, false),
77          Status::Severity::kNoError},
78         {std::make_tuple(BackgroundErrorReason::kFlush,
79                          Status::Code::kIOError, true),
80          Status::Severity::kFatalError},
81         {std::make_tuple(BackgroundErrorReason::kFlush,
82                          Status::Code::kIOError, false),
83          Status::Severity::kNoError},
84         // Errors during Write
85         {std::make_tuple(BackgroundErrorReason::kWriteCallback,
86                          Status::Code::kCorruption, true),
87          Status::Severity::kUnrecoverableError},
88         {std::make_tuple(BackgroundErrorReason::kWriteCallback,
89                          Status::Code::kCorruption, false),
90          Status::Severity::kNoError},
91         {std::make_tuple(BackgroundErrorReason::kWriteCallback,
92                          Status::Code::kIOError, true),
93          Status::Severity::kFatalError},
94         {std::make_tuple(BackgroundErrorReason::kWriteCallback,
95                          Status::Code::kIOError, false),
96          Status::Severity::kNoError},
97 };
98 
99 std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
100     DefaultReasonMap = {
101         // Errors during BG compaction
102         {std::make_tuple(BackgroundErrorReason::kCompaction, true),
103           Status::Severity::kFatalError},
104         {std::make_tuple(BackgroundErrorReason::kCompaction, false),
105           Status::Severity::kNoError},
106         // Errors during BG flush
107         {std::make_tuple(BackgroundErrorReason::kFlush, true),
108           Status::Severity::kFatalError},
109         {std::make_tuple(BackgroundErrorReason::kFlush, false),
110           Status::Severity::kNoError},
111         // Errors during Write
112         {std::make_tuple(BackgroundErrorReason::kWriteCallback, true),
113           Status::Severity::kFatalError},
114         {std::make_tuple(BackgroundErrorReason::kWriteCallback, false),
115           Status::Severity::kFatalError},
116         // Errors during Memtable update
117         {std::make_tuple(BackgroundErrorReason::kMemTable, true),
118           Status::Severity::kFatalError},
119         {std::make_tuple(BackgroundErrorReason::kMemTable, false),
120           Status::Severity::kFatalError},
121 };
122 
CancelErrorRecovery()123 void ErrorHandler::CancelErrorRecovery() {
124 #ifndef ROCKSDB_LITE
125   db_mutex_->AssertHeld();
126 
127   // We'll release the lock before calling sfm, so make sure no new
128   // recovery gets scheduled at that point
129   auto_recovery_ = false;
130   SstFileManagerImpl* sfm = reinterpret_cast<SstFileManagerImpl*>(
131       db_options_.sst_file_manager.get());
132   if (sfm) {
133     // This may or may not cancel a pending recovery
134     db_mutex_->Unlock();
135     bool cancelled = sfm->CancelErrorRecovery(this);
136     db_mutex_->Lock();
137     if (cancelled) {
138       recovery_in_prog_ = false;
139     }
140   }
141 #endif
142 }
143 
144 // This is the main function for looking at an error during a background
145 // operation and deciding the severity, and error recovery strategy. The high
146 // level algorithm is as follows -
147 // 1. Classify the severity of the error based on the ErrorSeverityMap,
148 //    DefaultErrorSeverityMap and DefaultReasonMap defined earlier
149 // 2. Call a Status code specific override function to adjust the severity
150 //    if needed. The reason for this is our ability to recover may depend on
151 //    the exact options enabled in DBOptions
152 // 3. Determine if auto recovery is possible. A listener notification callback
153 //    is called, which can disable the auto recovery even if we decide its
154 //    feasible
155 // 4. For Status::NoSpace() errors, rely on SstFileManagerImpl to control
156 //    the actual recovery. If no sst file manager is specified in DBOptions,
157 //    a default one is allocated during DB::Open(), so there will always be
158 //    one.
159 // This can also get called as part of a recovery operation. In that case, we
160 // also track the error separately in recovery_error_ so we can tell in the
161 // end whether recovery succeeded or not
SetBGError(const Status & bg_err,BackgroundErrorReason reason)162 Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) {
163   db_mutex_->AssertHeld();
164 
165   if (bg_err.ok()) {
166     return Status::OK();
167   }
168 
169   bool paranoid = db_options_.paranoid_checks;
170   Status::Severity sev = Status::Severity::kFatalError;
171   Status new_bg_err;
172   bool found = false;
173 
174   {
175     auto entry = ErrorSeverityMap.find(std::make_tuple(reason, bg_err.code(),
176           bg_err.subcode(), paranoid));
177     if (entry != ErrorSeverityMap.end()) {
178       sev = entry->second;
179       found = true;
180     }
181   }
182 
183   if (!found) {
184     auto entry = DefaultErrorSeverityMap.find(std::make_tuple(reason,
185           bg_err.code(), paranoid));
186     if (entry != DefaultErrorSeverityMap.end()) {
187       sev = entry->second;
188       found = true;
189     }
190   }
191 
192   if (!found) {
193     auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid));
194     if (entry != DefaultReasonMap.end()) {
195       sev = entry->second;
196     }
197   }
198 
199   new_bg_err = Status(bg_err, sev);
200 
201   // Check if recovery is currently in progress. If it is, we will save this
202   // error so we can check it at the end to see if recovery succeeded or not
203   if (recovery_in_prog_ && recovery_error_.ok()) {
204     recovery_error_ = new_bg_err;
205   }
206 
207   bool auto_recovery = auto_recovery_;
208   if (new_bg_err.severity() >= Status::Severity::kFatalError && auto_recovery) {
209     auto_recovery = false;
210   }
211 
212   // Allow some error specific overrides
213   if (new_bg_err == Status::NoSpace()) {
214     new_bg_err = OverrideNoSpaceError(new_bg_err, &auto_recovery);
215   }
216 
217   if (!new_bg_err.ok()) {
218     Status s = new_bg_err;
219     EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
220                                           db_mutex_, &auto_recovery);
221     if (!s.ok() && (s.severity() > bg_error_.severity())) {
222       bg_error_ = s;
223     } else {
224       // This error is less severe than previously encountered error. Don't
225       // take any further action
226       return bg_error_;
227     }
228   }
229 
230   if (auto_recovery) {
231     recovery_in_prog_ = true;
232 
233     // Kick-off error specific recovery
234     if (bg_error_ == Status::NoSpace()) {
235       RecoverFromNoSpace();
236     }
237   }
238   return bg_error_;
239 }
240 
SetBGError(const IOStatus & bg_io_err,BackgroundErrorReason reason)241 Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
242                                 BackgroundErrorReason reason) {
243   db_mutex_->AssertHeld();
244   if (bg_io_err.ok()) {
245     return Status::OK();
246   }
247   if (recovery_in_prog_ && recovery_error_.ok()) {
248     recovery_error_ = bg_io_err;
249   }
250   Status new_bg_io_err = bg_io_err;
251   Status s;
252   if (bg_io_err.GetDataLoss()) {
253     // FIrst, data loss is treated as unrecoverable error. So it can directly
254     // overwrite any existing bg_error_.
255     bool auto_recovery = false;
256     Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError);
257     bg_error_ = bg_err;
258     EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
259                                           db_mutex_, &auto_recovery);
260     return bg_error_;
261   } else if (bg_io_err.GetRetryable()) {
262     // Second, check if the error is a retryable IO error or not. if it is
263     // retryable error and its severity is higher than bg_error_, overwrite
264     // the bg_error_ with new error.
265     // In current stage, treat retryable error as HardError. No automatic
266     // recovery.
267     bool auto_recovery = false;
268     Status bg_err(new_bg_io_err, Status::Severity::kHardError);
269     EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
270                                           db_mutex_, &auto_recovery);
271     if (bg_err.severity() > bg_error_.severity()) {
272       bg_error_ = bg_err;
273     }
274     return bg_error_;
275   } else {
276     s = SetBGError(new_bg_io_err, reason);
277   }
278   return s;
279 }
280 
OverrideNoSpaceError(Status bg_error,bool * auto_recovery)281 Status ErrorHandler::OverrideNoSpaceError(Status bg_error,
282                                           bool* auto_recovery) {
283 #ifndef ROCKSDB_LITE
284   if (bg_error.severity() >= Status::Severity::kFatalError) {
285     return bg_error;
286   }
287 
288   if (db_options_.sst_file_manager.get() == nullptr) {
289     // We rely on SFM to poll for enough disk space and recover
290     *auto_recovery = false;
291     return bg_error;
292   }
293 
294   if (db_options_.allow_2pc &&
295       (bg_error.severity() <= Status::Severity::kSoftError)) {
296     // Don't know how to recover, as the contents of the current WAL file may
297     // be inconsistent, and it may be needed for 2PC. If 2PC is not enabled,
298     // we can just flush the memtable and discard the log
299     *auto_recovery = false;
300     return Status(bg_error, Status::Severity::kFatalError);
301   }
302 
303   {
304     uint64_t free_space;
305     if (db_options_.env->GetFreeSpace(db_options_.db_paths[0].path,
306                                       &free_space) == Status::NotSupported()) {
307       *auto_recovery = false;
308     }
309   }
310 
311   return bg_error;
312 #else
313   (void)auto_recovery;
314   return Status(bg_error, Status::Severity::kFatalError);
315 #endif
316 }
317 
RecoverFromNoSpace()318 void ErrorHandler::RecoverFromNoSpace() {
319 #ifndef ROCKSDB_LITE
320   SstFileManagerImpl* sfm =
321       reinterpret_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
322 
323   // Inform SFM of the error, so it can kick-off the recovery
324   if (sfm) {
325     sfm->StartErrorRecovery(this, bg_error_);
326   }
327 #endif
328 }
329 
ClearBGError()330 Status ErrorHandler::ClearBGError() {
331 #ifndef ROCKSDB_LITE
332   db_mutex_->AssertHeld();
333 
334   // Signal that recovery succeeded
335   if (recovery_error_.ok()) {
336     Status old_bg_error = bg_error_;
337     bg_error_ = Status::OK();
338     recovery_in_prog_ = false;
339     EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
340                                                  old_bg_error, db_mutex_);
341   }
342   return recovery_error_;
343 #else
344   return bg_error_;
345 #endif
346 }
347 
RecoverFromBGError(bool is_manual)348 Status ErrorHandler::RecoverFromBGError(bool is_manual) {
349 #ifndef ROCKSDB_LITE
350   InstrumentedMutexLock l(db_mutex_);
351   if (is_manual) {
352     // If its a manual recovery and there's a background recovery in progress
353     // return busy status
354     if (recovery_in_prog_) {
355       return Status::Busy();
356     }
357     recovery_in_prog_ = true;
358   }
359 
360   if (bg_error_.severity() == Status::Severity::kSoftError) {
361     // Simply clear the background error and return
362     recovery_error_ = Status::OK();
363     return ClearBGError();
364   }
365 
366   // Reset recovery_error_. We will use this to record any errors that happen
367   // during the recovery process. While recovering, the only operations that
368   // can generate background errors should be the flush operations
369   recovery_error_ = Status::OK();
370   Status s = db_->ResumeImpl();
371   // For manual recover, shutdown, and fatal error  cases, set
372   // recovery_in_prog_ to false. For automatic background recovery, leave it
373   // as is regardless of success or failure as it will be retried
374   if (is_manual || s.IsShutdownInProgress() ||
375       bg_error_.severity() >= Status::Severity::kFatalError) {
376     recovery_in_prog_ = false;
377   }
378   return s;
379 #else
380   (void)is_manual;
381   return bg_error_;
382 #endif
383 }
384 }  // namespace ROCKSDB_NAMESPACE
385