1 // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 #include "db/error_handler.h"
7 #include "db/db_impl/db_impl.h"
8 #include "db/event_helpers.h"
9 #include "file/sst_file_manager_impl.h"
10
11 namespace ROCKSDB_NAMESPACE {
12
13 // Maps to help decide the severity of an error based on the
14 // BackgroundErrorReason, Code, SubCode and whether db_options.paranoid_checks
15 // is set or not. There are 3 maps, going from most specific to least specific
16 // (i.e from all 4 fields in a tuple to only the BackgroundErrorReason and
17 // paranoid_checks). The less specific map serves as a catch all in case we miss
18 // a specific error code or subcode.
19 std::map<std::tuple<BackgroundErrorReason, Status::Code, Status::SubCode, bool>,
20 Status::Severity>
21 ErrorSeverityMap = {
22 // Errors during BG compaction
23 {std::make_tuple(BackgroundErrorReason::kCompaction,
24 Status::Code::kIOError, Status::SubCode::kNoSpace,
25 true),
26 Status::Severity::kSoftError},
27 {std::make_tuple(BackgroundErrorReason::kCompaction,
28 Status::Code::kIOError, Status::SubCode::kNoSpace,
29 false),
30 Status::Severity::kNoError},
31 {std::make_tuple(BackgroundErrorReason::kCompaction,
32 Status::Code::kIOError, Status::SubCode::kSpaceLimit,
33 true),
34 Status::Severity::kHardError},
35 // Errors during BG flush
36 {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
37 Status::SubCode::kNoSpace, true),
38 Status::Severity::kHardError},
39 {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
40 Status::SubCode::kNoSpace, false),
41 Status::Severity::kNoError},
42 {std::make_tuple(BackgroundErrorReason::kFlush, Status::Code::kIOError,
43 Status::SubCode::kSpaceLimit, true),
44 Status::Severity::kHardError},
45 // Errors during Write
46 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
47 Status::Code::kIOError, Status::SubCode::kNoSpace,
48 true),
49 Status::Severity::kHardError},
50 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
51 Status::Code::kIOError, Status::SubCode::kNoSpace,
52 false),
53 Status::Severity::kHardError},
54 };
55
56 std::map<std::tuple<BackgroundErrorReason, Status::Code, bool>, Status::Severity>
57 DefaultErrorSeverityMap = {
58 // Errors during BG compaction
59 {std::make_tuple(BackgroundErrorReason::kCompaction,
60 Status::Code::kCorruption, true),
61 Status::Severity::kUnrecoverableError},
62 {std::make_tuple(BackgroundErrorReason::kCompaction,
63 Status::Code::kCorruption, false),
64 Status::Severity::kNoError},
65 {std::make_tuple(BackgroundErrorReason::kCompaction,
66 Status::Code::kIOError, true),
67 Status::Severity::kFatalError},
68 {std::make_tuple(BackgroundErrorReason::kCompaction,
69 Status::Code::kIOError, false),
70 Status::Severity::kNoError},
71 // Errors during BG flush
72 {std::make_tuple(BackgroundErrorReason::kFlush,
73 Status::Code::kCorruption, true),
74 Status::Severity::kUnrecoverableError},
75 {std::make_tuple(BackgroundErrorReason::kFlush,
76 Status::Code::kCorruption, false),
77 Status::Severity::kNoError},
78 {std::make_tuple(BackgroundErrorReason::kFlush,
79 Status::Code::kIOError, true),
80 Status::Severity::kFatalError},
81 {std::make_tuple(BackgroundErrorReason::kFlush,
82 Status::Code::kIOError, false),
83 Status::Severity::kNoError},
84 // Errors during Write
85 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
86 Status::Code::kCorruption, true),
87 Status::Severity::kUnrecoverableError},
88 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
89 Status::Code::kCorruption, false),
90 Status::Severity::kNoError},
91 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
92 Status::Code::kIOError, true),
93 Status::Severity::kFatalError},
94 {std::make_tuple(BackgroundErrorReason::kWriteCallback,
95 Status::Code::kIOError, false),
96 Status::Severity::kNoError},
97 };
98
99 std::map<std::tuple<BackgroundErrorReason, bool>, Status::Severity>
100 DefaultReasonMap = {
101 // Errors during BG compaction
102 {std::make_tuple(BackgroundErrorReason::kCompaction, true),
103 Status::Severity::kFatalError},
104 {std::make_tuple(BackgroundErrorReason::kCompaction, false),
105 Status::Severity::kNoError},
106 // Errors during BG flush
107 {std::make_tuple(BackgroundErrorReason::kFlush, true),
108 Status::Severity::kFatalError},
109 {std::make_tuple(BackgroundErrorReason::kFlush, false),
110 Status::Severity::kNoError},
111 // Errors during Write
112 {std::make_tuple(BackgroundErrorReason::kWriteCallback, true),
113 Status::Severity::kFatalError},
114 {std::make_tuple(BackgroundErrorReason::kWriteCallback, false),
115 Status::Severity::kFatalError},
116 // Errors during Memtable update
117 {std::make_tuple(BackgroundErrorReason::kMemTable, true),
118 Status::Severity::kFatalError},
119 {std::make_tuple(BackgroundErrorReason::kMemTable, false),
120 Status::Severity::kFatalError},
121 };
122
CancelErrorRecovery()123 void ErrorHandler::CancelErrorRecovery() {
124 #ifndef ROCKSDB_LITE
125 db_mutex_->AssertHeld();
126
127 // We'll release the lock before calling sfm, so make sure no new
128 // recovery gets scheduled at that point
129 auto_recovery_ = false;
130 SstFileManagerImpl* sfm = reinterpret_cast<SstFileManagerImpl*>(
131 db_options_.sst_file_manager.get());
132 if (sfm) {
133 // This may or may not cancel a pending recovery
134 db_mutex_->Unlock();
135 bool cancelled = sfm->CancelErrorRecovery(this);
136 db_mutex_->Lock();
137 if (cancelled) {
138 recovery_in_prog_ = false;
139 }
140 }
141 #endif
142 }
143
144 // This is the main function for looking at an error during a background
145 // operation and deciding the severity, and error recovery strategy. The high
146 // level algorithm is as follows -
147 // 1. Classify the severity of the error based on the ErrorSeverityMap,
148 // DefaultErrorSeverityMap and DefaultReasonMap defined earlier
149 // 2. Call a Status code specific override function to adjust the severity
150 // if needed. The reason for this is our ability to recover may depend on
151 // the exact options enabled in DBOptions
152 // 3. Determine if auto recovery is possible. A listener notification callback
153 // is called, which can disable the auto recovery even if we decide its
154 // feasible
155 // 4. For Status::NoSpace() errors, rely on SstFileManagerImpl to control
156 // the actual recovery. If no sst file manager is specified in DBOptions,
157 // a default one is allocated during DB::Open(), so there will always be
158 // one.
159 // This can also get called as part of a recovery operation. In that case, we
160 // also track the error separately in recovery_error_ so we can tell in the
161 // end whether recovery succeeded or not
SetBGError(const Status & bg_err,BackgroundErrorReason reason)162 Status ErrorHandler::SetBGError(const Status& bg_err, BackgroundErrorReason reason) {
163 db_mutex_->AssertHeld();
164
165 if (bg_err.ok()) {
166 return Status::OK();
167 }
168
169 bool paranoid = db_options_.paranoid_checks;
170 Status::Severity sev = Status::Severity::kFatalError;
171 Status new_bg_err;
172 bool found = false;
173
174 {
175 auto entry = ErrorSeverityMap.find(std::make_tuple(reason, bg_err.code(),
176 bg_err.subcode(), paranoid));
177 if (entry != ErrorSeverityMap.end()) {
178 sev = entry->second;
179 found = true;
180 }
181 }
182
183 if (!found) {
184 auto entry = DefaultErrorSeverityMap.find(std::make_tuple(reason,
185 bg_err.code(), paranoid));
186 if (entry != DefaultErrorSeverityMap.end()) {
187 sev = entry->second;
188 found = true;
189 }
190 }
191
192 if (!found) {
193 auto entry = DefaultReasonMap.find(std::make_tuple(reason, paranoid));
194 if (entry != DefaultReasonMap.end()) {
195 sev = entry->second;
196 }
197 }
198
199 new_bg_err = Status(bg_err, sev);
200
201 // Check if recovery is currently in progress. If it is, we will save this
202 // error so we can check it at the end to see if recovery succeeded or not
203 if (recovery_in_prog_ && recovery_error_.ok()) {
204 recovery_error_ = new_bg_err;
205 }
206
207 bool auto_recovery = auto_recovery_;
208 if (new_bg_err.severity() >= Status::Severity::kFatalError && auto_recovery) {
209 auto_recovery = false;
210 }
211
212 // Allow some error specific overrides
213 if (new_bg_err == Status::NoSpace()) {
214 new_bg_err = OverrideNoSpaceError(new_bg_err, &auto_recovery);
215 }
216
217 if (!new_bg_err.ok()) {
218 Status s = new_bg_err;
219 EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
220 db_mutex_, &auto_recovery);
221 if (!s.ok() && (s.severity() > bg_error_.severity())) {
222 bg_error_ = s;
223 } else {
224 // This error is less severe than previously encountered error. Don't
225 // take any further action
226 return bg_error_;
227 }
228 }
229
230 if (auto_recovery) {
231 recovery_in_prog_ = true;
232
233 // Kick-off error specific recovery
234 if (bg_error_ == Status::NoSpace()) {
235 RecoverFromNoSpace();
236 }
237 }
238 return bg_error_;
239 }
240
SetBGError(const IOStatus & bg_io_err,BackgroundErrorReason reason)241 Status ErrorHandler::SetBGError(const IOStatus& bg_io_err,
242 BackgroundErrorReason reason) {
243 db_mutex_->AssertHeld();
244 if (bg_io_err.ok()) {
245 return Status::OK();
246 }
247 if (recovery_in_prog_ && recovery_error_.ok()) {
248 recovery_error_ = bg_io_err;
249 }
250 Status new_bg_io_err = bg_io_err;
251 Status s;
252 if (bg_io_err.GetDataLoss()) {
253 // FIrst, data loss is treated as unrecoverable error. So it can directly
254 // overwrite any existing bg_error_.
255 bool auto_recovery = false;
256 Status bg_err(new_bg_io_err, Status::Severity::kUnrecoverableError);
257 bg_error_ = bg_err;
258 EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
259 db_mutex_, &auto_recovery);
260 return bg_error_;
261 } else if (bg_io_err.GetRetryable()) {
262 // Second, check if the error is a retryable IO error or not. if it is
263 // retryable error and its severity is higher than bg_error_, overwrite
264 // the bg_error_ with new error.
265 // In current stage, treat retryable error as HardError. No automatic
266 // recovery.
267 bool auto_recovery = false;
268 Status bg_err(new_bg_io_err, Status::Severity::kHardError);
269 EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason, &s,
270 db_mutex_, &auto_recovery);
271 if (bg_err.severity() > bg_error_.severity()) {
272 bg_error_ = bg_err;
273 }
274 return bg_error_;
275 } else {
276 s = SetBGError(new_bg_io_err, reason);
277 }
278 return s;
279 }
280
OverrideNoSpaceError(Status bg_error,bool * auto_recovery)281 Status ErrorHandler::OverrideNoSpaceError(Status bg_error,
282 bool* auto_recovery) {
283 #ifndef ROCKSDB_LITE
284 if (bg_error.severity() >= Status::Severity::kFatalError) {
285 return bg_error;
286 }
287
288 if (db_options_.sst_file_manager.get() == nullptr) {
289 // We rely on SFM to poll for enough disk space and recover
290 *auto_recovery = false;
291 return bg_error;
292 }
293
294 if (db_options_.allow_2pc &&
295 (bg_error.severity() <= Status::Severity::kSoftError)) {
296 // Don't know how to recover, as the contents of the current WAL file may
297 // be inconsistent, and it may be needed for 2PC. If 2PC is not enabled,
298 // we can just flush the memtable and discard the log
299 *auto_recovery = false;
300 return Status(bg_error, Status::Severity::kFatalError);
301 }
302
303 {
304 uint64_t free_space;
305 if (db_options_.env->GetFreeSpace(db_options_.db_paths[0].path,
306 &free_space) == Status::NotSupported()) {
307 *auto_recovery = false;
308 }
309 }
310
311 return bg_error;
312 #else
313 (void)auto_recovery;
314 return Status(bg_error, Status::Severity::kFatalError);
315 #endif
316 }
317
RecoverFromNoSpace()318 void ErrorHandler::RecoverFromNoSpace() {
319 #ifndef ROCKSDB_LITE
320 SstFileManagerImpl* sfm =
321 reinterpret_cast<SstFileManagerImpl*>(db_options_.sst_file_manager.get());
322
323 // Inform SFM of the error, so it can kick-off the recovery
324 if (sfm) {
325 sfm->StartErrorRecovery(this, bg_error_);
326 }
327 #endif
328 }
329
ClearBGError()330 Status ErrorHandler::ClearBGError() {
331 #ifndef ROCKSDB_LITE
332 db_mutex_->AssertHeld();
333
334 // Signal that recovery succeeded
335 if (recovery_error_.ok()) {
336 Status old_bg_error = bg_error_;
337 bg_error_ = Status::OK();
338 recovery_in_prog_ = false;
339 EventHelpers::NotifyOnErrorRecoveryCompleted(db_options_.listeners,
340 old_bg_error, db_mutex_);
341 }
342 return recovery_error_;
343 #else
344 return bg_error_;
345 #endif
346 }
347
RecoverFromBGError(bool is_manual)348 Status ErrorHandler::RecoverFromBGError(bool is_manual) {
349 #ifndef ROCKSDB_LITE
350 InstrumentedMutexLock l(db_mutex_);
351 if (is_manual) {
352 // If its a manual recovery and there's a background recovery in progress
353 // return busy status
354 if (recovery_in_prog_) {
355 return Status::Busy();
356 }
357 recovery_in_prog_ = true;
358 }
359
360 if (bg_error_.severity() == Status::Severity::kSoftError) {
361 // Simply clear the background error and return
362 recovery_error_ = Status::OK();
363 return ClearBGError();
364 }
365
366 // Reset recovery_error_. We will use this to record any errors that happen
367 // during the recovery process. While recovering, the only operations that
368 // can generate background errors should be the flush operations
369 recovery_error_ = Status::OK();
370 Status s = db_->ResumeImpl();
371 // For manual recover, shutdown, and fatal error cases, set
372 // recovery_in_prog_ to false. For automatic background recovery, leave it
373 // as is regardless of success or failure as it will be retried
374 if (is_manual || s.IsShutdownInProgress() ||
375 bg_error_.severity() >= Status::Severity::kFatalError) {
376 recovery_in_prog_ = false;
377 }
378 return s;
379 #else
380 (void)is_manual;
381 return bg_error_;
382 #endif
383 }
384 } // namespace ROCKSDB_NAMESPACE
385