1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "btree_cache.h" 4 #include "btree_iter.h" 5 #include "error.h" 6 #include "journal.h" 7 #include "namei.h" 8 #include "recovery_passes.h" 9 #include "super.h" 10 #include "thread_with_file.h" 11 12 #define FSCK_ERR_RATELIMIT_NR 10 13 14 void bch2_log_msg_start(struct bch_fs *c, struct printbuf *out) 15 { 16 printbuf_indent_add_nextline(out, 2); 17 18 #ifdef BCACHEFS_LOG_PREFIX 19 prt_printf(out, bch2_log_msg(c, "")); 20 #endif 21 } 22 23 bool __bch2_inconsistent_error(struct bch_fs *c, struct printbuf *out) 24 { 25 set_bit(BCH_FS_error, &c->flags); 26 27 switch (c->opts.errors) { 28 case BCH_ON_ERROR_continue: 29 return false; 30 case BCH_ON_ERROR_fix_safe: 31 case BCH_ON_ERROR_ro: 32 if (bch2_fs_emergency_read_only(c)) 33 prt_printf(out, "inconsistency detected - emergency read only at journal seq %llu\n", 34 journal_cur_seq(&c->journal)); 35 return true; 36 case BCH_ON_ERROR_panic: 37 bch2_print_string_as_lines(KERN_ERR, out->buf); 38 panic(bch2_fmt(c, "panic after error")); 39 return true; 40 default: 41 BUG(); 42 } 43 } 44 45 bool bch2_inconsistent_error(struct bch_fs *c) 46 { 47 struct printbuf buf = PRINTBUF; 48 printbuf_indent_add_nextline(&buf, 2); 49 50 bool ret = __bch2_inconsistent_error(c, &buf); 51 if (ret) 52 bch_err(c, "%s", buf.buf); 53 printbuf_exit(&buf); 54 return ret; 55 } 56 57 __printf(3, 0) 58 static bool bch2_fs_trans_inconsistent(struct bch_fs *c, struct btree_trans *trans, 59 const char *fmt, va_list args) 60 { 61 struct printbuf buf = PRINTBUF; 62 63 bch2_log_msg_start(c, &buf); 64 65 prt_vprintf(&buf, fmt, args); 66 prt_newline(&buf); 67 68 if (trans) 69 bch2_trans_updates_to_text(&buf, trans); 70 bool ret = __bch2_inconsistent_error(c, &buf); 71 bch2_print_string_as_lines(KERN_ERR, buf.buf); 72 73 printbuf_exit(&buf); 74 return ret; 75 } 76 77 bool bch2_fs_inconsistent(struct bch_fs *c, const char *fmt, ...) 78 { 79 va_list args; 80 va_start(args, fmt); 81 bool ret = bch2_fs_trans_inconsistent(c, NULL, fmt, args); 82 va_end(args); 83 return ret; 84 } 85 86 bool bch2_trans_inconsistent(struct btree_trans *trans, const char *fmt, ...) 87 { 88 va_list args; 89 va_start(args, fmt); 90 bool ret = bch2_fs_trans_inconsistent(trans->c, trans, fmt, args); 91 va_end(args); 92 return ret; 93 } 94 95 int __bch2_topology_error(struct bch_fs *c, struct printbuf *out) 96 { 97 prt_printf(out, "btree topology error: "); 98 99 set_bit(BCH_FS_topology_error, &c->flags); 100 if (!test_bit(BCH_FS_recovery_running, &c->flags)) { 101 __bch2_inconsistent_error(c, out); 102 return -BCH_ERR_btree_need_topology_repair; 103 } else { 104 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology) ?: 105 -BCH_ERR_btree_node_read_validate_error; 106 } 107 } 108 109 int bch2_fs_topology_error(struct bch_fs *c, const char *fmt, ...) 110 { 111 struct printbuf buf = PRINTBUF; 112 113 bch2_log_msg_start(c, &buf); 114 115 va_list args; 116 va_start(args, fmt); 117 prt_vprintf(&buf, fmt, args); 118 va_end(args); 119 120 int ret = __bch2_topology_error(c, &buf); 121 bch2_print_string_as_lines(KERN_ERR, buf.buf); 122 123 printbuf_exit(&buf); 124 return ret; 125 } 126 127 void bch2_fatal_error(struct bch_fs *c) 128 { 129 if (bch2_fs_emergency_read_only(c)) 130 bch_err(c, "fatal error - emergency read only"); 131 } 132 133 void bch2_io_error_work(struct work_struct *work) 134 { 135 struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work); 136 struct bch_fs *c = ca->fs; 137 138 /* XXX: if it's reads or checksums that are failing, set it to failed */ 139 140 down_write(&c->state_lock); 141 unsigned long write_errors_start = READ_ONCE(ca->write_errors_start); 142 143 if (write_errors_start && 144 time_after(jiffies, 145 write_errors_start + c->opts.write_error_timeout * HZ)) { 146 if (ca->mi.state >= BCH_MEMBER_STATE_ro) 147 goto out; 148 149 bool dev = !__bch2_dev_set_state(c, ca, BCH_MEMBER_STATE_ro, 150 BCH_FORCE_IF_DEGRADED); 151 152 bch_err(ca, 153 "writes erroring for %u seconds, setting %s ro", 154 c->opts.write_error_timeout, 155 dev ? "device" : "filesystem"); 156 if (!dev) 157 bch2_fs_emergency_read_only(c); 158 159 } 160 out: 161 up_write(&c->state_lock); 162 } 163 164 void bch2_io_error(struct bch_dev *ca, enum bch_member_error_type type) 165 { 166 atomic64_inc(&ca->errors[type]); 167 168 if (type == BCH_MEMBER_ERROR_write && !ca->write_errors_start) 169 ca->write_errors_start = jiffies; 170 171 queue_work(system_long_wq, &ca->io_error_work); 172 } 173 174 enum ask_yn { 175 YN_NO, 176 YN_YES, 177 YN_ALLNO, 178 YN_ALLYES, 179 }; 180 181 static enum ask_yn parse_yn_response(char *buf) 182 { 183 buf = strim(buf); 184 185 if (strlen(buf) == 1) 186 switch (buf[0]) { 187 case 'n': 188 return YN_NO; 189 case 'y': 190 return YN_YES; 191 case 'N': 192 return YN_ALLNO; 193 case 'Y': 194 return YN_ALLYES; 195 } 196 return -1; 197 } 198 199 #ifdef __KERNEL__ 200 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) 201 { 202 struct stdio_redirect *stdio = c->stdio; 203 204 if (c->stdio_filter && c->stdio_filter != current) 205 stdio = NULL; 206 207 if (!stdio) 208 return YN_NO; 209 210 if (trans) 211 bch2_trans_unlock(trans); 212 213 unsigned long unlock_long_at = trans ? jiffies + HZ * 2 : 0; 214 darray_char line = {}; 215 int ret; 216 217 do { 218 unsigned long t; 219 bch2_print(c, " (y,n, or Y,N for all errors of this type) "); 220 rewait: 221 t = unlock_long_at 222 ? max_t(long, unlock_long_at - jiffies, 0) 223 : MAX_SCHEDULE_TIMEOUT; 224 225 int r = bch2_stdio_redirect_readline_timeout(stdio, &line, t); 226 if (r == -ETIME) { 227 bch2_trans_unlock_long(trans); 228 unlock_long_at = 0; 229 goto rewait; 230 } 231 232 if (r < 0) { 233 ret = YN_NO; 234 break; 235 } 236 237 darray_last(line) = '\0'; 238 } while ((ret = parse_yn_response(line.data)) < 0); 239 240 darray_exit(&line); 241 return ret; 242 } 243 #else 244 245 #include "tools-util.h" 246 247 static enum ask_yn bch2_fsck_ask_yn(struct bch_fs *c, struct btree_trans *trans) 248 { 249 char *buf = NULL; 250 size_t buflen = 0; 251 int ret; 252 253 do { 254 fputs(" (y,n, or Y,N for all errors of this type) ", stdout); 255 fflush(stdout); 256 257 if (getline(&buf, &buflen, stdin) < 0) 258 die("error reading from standard input"); 259 } while ((ret = parse_yn_response(buf)) < 0); 260 261 free(buf); 262 return ret; 263 } 264 265 #endif 266 267 static struct fsck_err_state *fsck_err_get(struct bch_fs *c, 268 enum bch_sb_error_id id) 269 { 270 struct fsck_err_state *s; 271 272 if (!test_bit(BCH_FS_fsck_running, &c->flags)) 273 return NULL; 274 275 list_for_each_entry(s, &c->fsck_error_msgs, list) 276 if (s->id == id) { 277 /* 278 * move it to the head of the list: repeated fsck errors 279 * are common 280 */ 281 list_move(&s->list, &c->fsck_error_msgs); 282 return s; 283 } 284 285 s = kzalloc(sizeof(*s), GFP_NOFS); 286 if (!s) { 287 if (!c->fsck_alloc_msgs_err) 288 bch_err(c, "kmalloc err, cannot ratelimit fsck errs"); 289 c->fsck_alloc_msgs_err = true; 290 return NULL; 291 } 292 293 INIT_LIST_HEAD(&s->list); 294 s->id = id; 295 list_add(&s->list, &c->fsck_error_msgs); 296 return s; 297 } 298 299 /* s/fix?/fixing/ s/recreate?/recreating/ */ 300 static void prt_actioning(struct printbuf *out, const char *action) 301 { 302 unsigned len = strlen(action); 303 304 BUG_ON(action[len - 1] != '?'); 305 --len; 306 307 if (action[len - 1] == 'e') 308 --len; 309 310 prt_bytes(out, action, len); 311 prt_str(out, "ing"); 312 } 313 314 static const u8 fsck_flags_extra[] = { 315 #define x(t, n, flags) [BCH_FSCK_ERR_##t] = flags, 316 BCH_SB_ERRS() 317 #undef x 318 }; 319 320 static int do_fsck_ask_yn(struct bch_fs *c, 321 struct btree_trans *trans, 322 struct printbuf *question, 323 const char *action) 324 { 325 prt_str(question, ", "); 326 prt_str(question, action); 327 328 if (bch2_fs_stdio_redirect(c)) 329 bch2_print(c, "%s", question->buf); 330 else 331 bch2_print_string_as_lines(KERN_ERR, question->buf); 332 333 int ask = bch2_fsck_ask_yn(c, trans); 334 335 if (trans) { 336 int ret = bch2_trans_relock(trans); 337 if (ret) 338 return ret; 339 } 340 341 return ask; 342 } 343 344 static struct fsck_err_state *count_fsck_err_locked(struct bch_fs *c, 345 enum bch_sb_error_id id, const char *msg, 346 bool *repeat, bool *print, bool *suppress) 347 { 348 bch2_sb_error_count(c, id); 349 350 struct fsck_err_state *s = fsck_err_get(c, id); 351 if (s) { 352 /* 353 * We may be called multiple times for the same error on 354 * transaction restart - this memoizes instead of asking the user 355 * multiple times for the same error: 356 */ 357 if (s->last_msg && !strcmp(msg, s->last_msg)) { 358 *repeat = true; 359 *print = false; 360 return s; 361 } 362 363 kfree(s->last_msg); 364 s->last_msg = kstrdup(msg, GFP_KERNEL); 365 366 if (c->opts.ratelimit_errors && 367 s->nr >= FSCK_ERR_RATELIMIT_NR) { 368 if (s->nr == FSCK_ERR_RATELIMIT_NR) 369 *suppress = true; 370 else 371 *print = false; 372 } 373 374 s->nr++; 375 } 376 return s; 377 } 378 379 void __bch2_count_fsck_err(struct bch_fs *c, 380 enum bch_sb_error_id id, const char *msg, 381 bool *repeat, bool *print, bool *suppress) 382 { 383 bch2_sb_error_count(c, id); 384 385 mutex_lock(&c->fsck_error_msgs_lock); 386 count_fsck_err_locked(c, id, msg, repeat, print, suppress); 387 mutex_unlock(&c->fsck_error_msgs_lock); 388 } 389 390 int __bch2_fsck_err(struct bch_fs *c, 391 struct btree_trans *trans, 392 enum bch_fsck_flags flags, 393 enum bch_sb_error_id err, 394 const char *fmt, ...) 395 { 396 va_list args; 397 struct printbuf buf = PRINTBUF, *out = &buf; 398 int ret = -BCH_ERR_fsck_ignore; 399 const char *action_orig = "fix?", *action = action_orig; 400 401 might_sleep(); 402 403 if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) 404 flags |= fsck_flags_extra[err]; 405 406 if (!c) 407 c = trans->c; 408 409 /* 410 * Ugly: if there's a transaction in the current task it has to be 411 * passed in to unlock if we prompt for user input. 412 * 413 * But, plumbing a transaction and transaction restarts into 414 * bkey_validate() is problematic. 415 * 416 * So: 417 * - make all bkey errors AUTOFIX, they're simple anyways (we just 418 * delete the key) 419 * - and we don't need to warn if we're not prompting 420 */ 421 WARN_ON((flags & FSCK_CAN_FIX) && 422 !(flags & FSCK_AUTOFIX) && 423 !trans && 424 bch2_current_has_btree_trans(c)); 425 426 if (test_bit(err, c->sb.errors_silent)) 427 return flags & FSCK_CAN_FIX 428 ? -BCH_ERR_fsck_fix 429 : -BCH_ERR_fsck_ignore; 430 431 printbuf_indent_add_nextline(out, 2); 432 433 #ifdef BCACHEFS_LOG_PREFIX 434 if (strncmp(fmt, "bcachefs", 8)) 435 prt_printf(out, bch2_log_msg(c, "")); 436 #endif 437 438 va_start(args, fmt); 439 prt_vprintf(out, fmt, args); 440 va_end(args); 441 442 /* Custom fix/continue/recreate/etc.? */ 443 if (out->buf[out->pos - 1] == '?') { 444 const char *p = strrchr(out->buf, ','); 445 if (p) { 446 out->pos = p - out->buf; 447 action = kstrdup(p + 2, GFP_KERNEL); 448 if (!action) { 449 ret = -ENOMEM; 450 goto err; 451 } 452 } 453 } 454 455 mutex_lock(&c->fsck_error_msgs_lock); 456 bool repeat = false, print = true, suppress = false; 457 bool inconsistent = false, exiting = false; 458 struct fsck_err_state *s = 459 count_fsck_err_locked(c, err, buf.buf, &repeat, &print, &suppress); 460 if (repeat) { 461 ret = s->ret; 462 goto err_unlock; 463 } 464 465 if ((flags & FSCK_AUTOFIX) && 466 (c->opts.errors == BCH_ON_ERROR_continue || 467 c->opts.errors == BCH_ON_ERROR_fix_safe)) { 468 prt_str(out, ", "); 469 if (flags & FSCK_CAN_FIX) { 470 prt_actioning(out, action); 471 ret = -BCH_ERR_fsck_fix; 472 } else { 473 prt_str(out, ", continuing"); 474 ret = -BCH_ERR_fsck_ignore; 475 } 476 477 goto print; 478 } else if (!test_bit(BCH_FS_fsck_running, &c->flags)) { 479 if (c->opts.errors != BCH_ON_ERROR_continue || 480 !(flags & (FSCK_CAN_FIX|FSCK_CAN_IGNORE))) { 481 prt_str(out, ", shutting down"); 482 inconsistent = true; 483 print = true; 484 ret = -BCH_ERR_fsck_errors_not_fixed; 485 } else if (flags & FSCK_CAN_FIX) { 486 prt_str(out, ", "); 487 prt_actioning(out, action); 488 ret = -BCH_ERR_fsck_fix; 489 } else { 490 prt_str(out, ", continuing"); 491 ret = -BCH_ERR_fsck_ignore; 492 } 493 } else if (c->opts.fix_errors == FSCK_FIX_exit) { 494 prt_str(out, ", exiting"); 495 ret = -BCH_ERR_fsck_errors_not_fixed; 496 } else if (flags & FSCK_CAN_FIX) { 497 int fix = s && s->fix 498 ? s->fix 499 : c->opts.fix_errors; 500 501 if (fix == FSCK_FIX_ask) { 502 print = false; 503 504 ret = do_fsck_ask_yn(c, trans, out, action); 505 if (ret < 0) 506 goto err_unlock; 507 508 if (ret >= YN_ALLNO && s) 509 s->fix = ret == YN_ALLNO 510 ? FSCK_FIX_no 511 : FSCK_FIX_yes; 512 513 ret = ret & 1 514 ? -BCH_ERR_fsck_fix 515 : -BCH_ERR_fsck_ignore; 516 } else if (fix == FSCK_FIX_yes || 517 (c->opts.nochanges && 518 !(flags & FSCK_CAN_IGNORE))) { 519 prt_str(out, ", "); 520 prt_actioning(out, action); 521 ret = -BCH_ERR_fsck_fix; 522 } else { 523 prt_str(out, ", not "); 524 prt_actioning(out, action); 525 } 526 } else if (!(flags & FSCK_CAN_IGNORE)) { 527 prt_str(out, " (repair unimplemented)"); 528 } 529 530 if (ret == -BCH_ERR_fsck_ignore && 531 (c->opts.fix_errors == FSCK_FIX_exit || 532 !(flags & FSCK_CAN_IGNORE))) 533 ret = -BCH_ERR_fsck_errors_not_fixed; 534 535 if (test_bit(BCH_FS_fsck_running, &c->flags) && 536 (ret != -BCH_ERR_fsck_fix && 537 ret != -BCH_ERR_fsck_ignore)) { 538 exiting = true; 539 print = true; 540 } 541 print: 542 prt_newline(out); 543 544 if (inconsistent) 545 __bch2_inconsistent_error(c, out); 546 else if (exiting) 547 prt_printf(out, "Unable to continue, halting\n"); 548 else if (suppress) 549 prt_printf(out, "Ratelimiting new instances of previous error\n"); 550 551 if (print) { 552 /* possibly strip an empty line, from printbuf_indent_add */ 553 while (out->pos && out->buf[out->pos - 1] == ' ') 554 --out->pos; 555 printbuf_nul_terminate(out); 556 557 if (bch2_fs_stdio_redirect(c)) 558 bch2_print(c, "%s", out->buf); 559 else 560 bch2_print_string_as_lines(KERN_ERR, out->buf); 561 } 562 563 if (s) 564 s->ret = ret; 565 566 /* 567 * We don't yet track whether the filesystem currently has errors, for 568 * log_fsck_err()s: that would require us to track for every error type 569 * which recovery pass corrects it, to get the fsck exit status correct: 570 */ 571 if (flags & FSCK_CAN_FIX) { 572 if (ret == -BCH_ERR_fsck_fix) { 573 set_bit(BCH_FS_errors_fixed, &c->flags); 574 } else { 575 set_bit(BCH_FS_errors_not_fixed, &c->flags); 576 set_bit(BCH_FS_error, &c->flags); 577 } 578 } 579 err_unlock: 580 mutex_unlock(&c->fsck_error_msgs_lock); 581 err: 582 if (action != action_orig) 583 kfree(action); 584 printbuf_exit(&buf); 585 return ret; 586 } 587 588 static const char * const bch2_bkey_validate_contexts[] = { 589 #define x(n) #n, 590 BKEY_VALIDATE_CONTEXTS() 591 #undef x 592 NULL 593 }; 594 595 int __bch2_bkey_fsck_err(struct bch_fs *c, 596 struct bkey_s_c k, 597 struct bkey_validate_context from, 598 enum bch_sb_error_id err, 599 const char *fmt, ...) 600 { 601 if (from.flags & BCH_VALIDATE_silent) 602 return -BCH_ERR_fsck_delete_bkey; 603 604 unsigned fsck_flags = 0; 605 if (!(from.flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) { 606 if (test_bit(err, c->sb.errors_silent)) 607 return -BCH_ERR_fsck_delete_bkey; 608 609 fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; 610 } 611 if (!WARN_ON(err >= ARRAY_SIZE(fsck_flags_extra))) 612 fsck_flags |= fsck_flags_extra[err]; 613 614 struct printbuf buf = PRINTBUF; 615 prt_printf(&buf, "invalid bkey in %s", 616 bch2_bkey_validate_contexts[from.from]); 617 618 if (from.from == BKEY_VALIDATE_journal) 619 prt_printf(&buf, " journal seq=%llu offset=%u", 620 from.journal_seq, from.journal_offset); 621 622 prt_str(&buf, " btree="); 623 bch2_btree_id_to_text(&buf, from.btree); 624 prt_printf(&buf, " level=%u: ", from.level); 625 626 bch2_bkey_val_to_text(&buf, c, k); 627 prt_newline(&buf); 628 629 va_list args; 630 va_start(args, fmt); 631 prt_vprintf(&buf, fmt, args); 632 va_end(args); 633 634 int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s, delete?", buf.buf); 635 printbuf_exit(&buf); 636 return ret; 637 } 638 639 void bch2_flush_fsck_errs(struct bch_fs *c) 640 { 641 struct fsck_err_state *s, *n; 642 643 mutex_lock(&c->fsck_error_msgs_lock); 644 645 list_for_each_entry_safe(s, n, &c->fsck_error_msgs, list) { 646 if (s->ratelimited && s->last_msg) 647 bch_err(c, "Saw %llu errors like:\n %s", s->nr, s->last_msg); 648 649 list_del(&s->list); 650 kfree(s->last_msg); 651 kfree(s); 652 } 653 654 mutex_unlock(&c->fsck_error_msgs_lock); 655 } 656 657 int bch2_inum_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, 658 subvol_inum inum, u64 offset) 659 { 660 u32 restart_count = trans->restart_count; 661 int ret = 0; 662 663 if (inum.subvol) { 664 ret = bch2_inum_to_path(trans, inum, out); 665 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 666 return ret; 667 } 668 if (!inum.subvol || ret) 669 prt_printf(out, "inum %llu:%llu", inum.subvol, inum.inum); 670 prt_printf(out, " offset %llu: ", offset); 671 672 return trans_was_restarted(trans, restart_count); 673 } 674 675 void bch2_inum_offset_err_msg(struct bch_fs *c, struct printbuf *out, 676 subvol_inum inum, u64 offset) 677 { 678 bch2_trans_do(c, bch2_inum_offset_err_msg_trans(trans, out, inum, offset)); 679 } 680 681 int bch2_inum_snap_offset_err_msg_trans(struct btree_trans *trans, struct printbuf *out, 682 struct bpos pos) 683 { 684 struct bch_fs *c = trans->c; 685 int ret = 0; 686 687 if (!bch2_snapshot_is_leaf(c, pos.snapshot)) 688 prt_str(out, "(multiple snapshots) "); 689 690 subvol_inum inum = { 691 .subvol = bch2_snapshot_tree_oldest_subvol(c, pos.snapshot), 692 .inum = pos.inode, 693 }; 694 695 if (inum.subvol) { 696 ret = bch2_inum_to_path(trans, inum, out); 697 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 698 return ret; 699 } 700 701 if (!inum.subvol || ret) 702 prt_printf(out, "inum %llu:%u", pos.inode, pos.snapshot); 703 704 prt_printf(out, " offset %llu: ", pos.offset << 8); 705 return 0; 706 } 707 708 void bch2_inum_snap_offset_err_msg(struct bch_fs *c, struct printbuf *out, 709 struct bpos pos) 710 { 711 bch2_trans_do(c, bch2_inum_snap_offset_err_msg_trans(trans, out, pos)); 712 } 713