1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "disk_accounting.h" 13 #include "error.h" 14 15 #include <linux/mm.h> 16 17 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, 18 struct bkey_validate_context from) 19 { 20 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 21 int ret = 0; 22 23 bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH, 24 c, backpointer_level_bad, 25 "backpointer level bad: %u >= %u", 26 bp.v->level, BTREE_MAX_DEPTH); 27 28 bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, 29 c, backpointer_dev_bad, 30 "backpointer for BCH_SB_MEMBER_INVALID"); 31 fsck_err: 32 return ret; 33 } 34 35 void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 36 { 37 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 38 39 rcu_read_lock(); 40 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); 41 if (ca) { 42 u32 bucket_offset; 43 struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); 44 rcu_read_unlock(); 45 prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset); 46 } else { 47 rcu_read_unlock(); 48 prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); 49 } 50 51 bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); 52 prt_printf(out, " suboffset=%u len=%u gen=%u pos=", 53 (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 54 bp.v->bucket_len, 55 bp.v->bucket_gen); 56 bch2_bpos_to_text(out, bp.v->pos); 57 } 58 59 void bch2_backpointer_swab(struct bkey_s k) 60 { 61 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 62 63 bp.v->bucket_len = swab32(bp.v->bucket_len); 64 bch2_bpos_swab(&bp.v->pos); 65 } 66 67 static bool extent_matches_bp(struct bch_fs *c, 68 enum btree_id btree_id, unsigned level, 69 struct bkey_s_c k, 70 struct bkey_s_c_backpointer bp) 71 { 72 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 73 const union bch_extent_entry *entry; 74 struct extent_ptr_decoded p; 75 76 rcu_read_lock(); 77 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 78 struct bpos bucket2; 79 struct bkey_i_backpointer bp2; 80 81 if (p.ptr.cached) 82 continue; 83 84 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 85 if (!ca) 86 continue; 87 88 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 89 if (bpos_eq(bp.k->p, bp2.k.p) && 90 !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) { 91 rcu_read_unlock(); 92 return true; 93 } 94 } 95 rcu_read_unlock(); 96 97 return false; 98 } 99 100 static noinline int backpointer_mod_err(struct btree_trans *trans, 101 struct bkey_s_c orig_k, 102 struct bkey_i_backpointer *new_bp, 103 struct bkey_s_c found_bp, 104 bool insert) 105 { 106 struct bch_fs *c = trans->c; 107 struct printbuf buf = PRINTBUF; 108 109 if (insert) { 110 prt_printf(&buf, "existing backpointer found when inserting "); 111 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); 112 prt_newline(&buf); 113 printbuf_indent_add(&buf, 2); 114 115 prt_printf(&buf, "found "); 116 bch2_bkey_val_to_text(&buf, c, found_bp); 117 prt_newline(&buf); 118 119 prt_printf(&buf, "for "); 120 bch2_bkey_val_to_text(&buf, c, orig_k); 121 122 bch_err(c, "%s", buf.buf); 123 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 124 prt_printf(&buf, "backpointer not found when deleting\n"); 125 printbuf_indent_add(&buf, 2); 126 127 prt_printf(&buf, "searching for "); 128 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); 129 prt_newline(&buf); 130 131 prt_printf(&buf, "got "); 132 bch2_bkey_val_to_text(&buf, c, found_bp); 133 prt_newline(&buf); 134 135 prt_printf(&buf, "for "); 136 bch2_bkey_val_to_text(&buf, c, orig_k); 137 138 bch_err(c, "%s", buf.buf); 139 } 140 141 printbuf_exit(&buf); 142 143 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 144 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 145 } else { 146 return 0; 147 } 148 } 149 150 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 151 struct bkey_s_c orig_k, 152 struct bkey_i_backpointer *bp, 153 bool insert) 154 { 155 struct btree_iter bp_iter; 156 struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 157 bp->k.p, 158 BTREE_ITER_intent| 159 BTREE_ITER_slots| 160 BTREE_ITER_with_updates); 161 int ret = bkey_err(k); 162 if (ret) 163 return ret; 164 165 if (insert 166 ? k.k->type 167 : (k.k->type != KEY_TYPE_backpointer || 168 memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { 169 ret = backpointer_mod_err(trans, orig_k, bp, k, insert); 170 if (ret) 171 goto err; 172 } 173 174 if (!insert) { 175 bp->k.type = KEY_TYPE_deleted; 176 set_bkey_val_u64s(&bp->k, 0); 177 } 178 179 ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); 180 err: 181 bch2_trans_iter_exit(trans, &bp_iter); 182 return ret; 183 } 184 185 static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) 186 { 187 return likely(!bch2_backpointers_no_use_write_buffer) 188 ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) 189 : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0); 190 } 191 192 static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, 193 struct bkey_s_c visiting_k, 194 struct bkey_buf *last_flushed) 195 { 196 return likely(!bch2_backpointers_no_use_write_buffer) 197 ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) 198 : 0; 199 } 200 201 static void backpointer_target_not_found(struct btree_trans *trans, 202 struct bkey_s_c_backpointer bp, 203 struct bkey_s_c target_k) 204 { 205 struct bch_fs *c = trans->c; 206 struct printbuf buf = PRINTBUF; 207 208 /* 209 * If we're using the btree write buffer, the backpointer we were 210 * looking at may have already been deleted - failure to find what it 211 * pointed to is not an error: 212 */ 213 if (likely(!bch2_backpointers_no_use_write_buffer)) 214 return; 215 216 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 217 bp.v->level ? "btree node" : "extent"); 218 bch2_bkey_val_to_text(&buf, c, bp.s_c); 219 prt_printf(&buf, "\n "); 220 221 bch2_bkey_val_to_text(&buf, c, target_k); 222 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 223 bch_err_ratelimited(c, "%s", buf.buf); 224 else 225 bch2_trans_inconsistent(trans, "%s", buf.buf); 226 227 printbuf_exit(&buf); 228 } 229 230 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 231 struct bkey_s_c_backpointer bp, 232 struct btree_iter *iter, 233 unsigned iter_flags) 234 { 235 struct bch_fs *c = trans->c; 236 237 if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) 238 return bkey_s_c_null; 239 240 if (likely(!bp.v->level)) { 241 bch2_trans_node_iter_init(trans, iter, 242 bp.v->btree_id, 243 bp.v->pos, 244 0, 0, 245 iter_flags); 246 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 247 if (bkey_err(k)) { 248 bch2_trans_iter_exit(trans, iter); 249 return k; 250 } 251 252 if (k.k && 253 extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) 254 return k; 255 256 bch2_trans_iter_exit(trans, iter); 257 backpointer_target_not_found(trans, bp, k); 258 return bkey_s_c_null; 259 } else { 260 struct btree *b = bch2_backpointer_get_node(trans, bp, iter); 261 262 if (IS_ERR_OR_NULL(b)) { 263 bch2_trans_iter_exit(trans, iter); 264 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 265 } 266 return bkey_i_to_s_c(&b->key); 267 } 268 } 269 270 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 271 struct bkey_s_c_backpointer bp, 272 struct btree_iter *iter) 273 { 274 struct bch_fs *c = trans->c; 275 276 BUG_ON(!bp.v->level); 277 278 bch2_trans_node_iter_init(trans, iter, 279 bp.v->btree_id, 280 bp.v->pos, 281 0, 282 bp.v->level - 1, 283 0); 284 struct btree *b = bch2_btree_iter_peek_node(iter); 285 if (IS_ERR_OR_NULL(b)) 286 goto err; 287 288 BUG_ON(b->c.level != bp.v->level - 1); 289 290 if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, 291 bkey_i_to_s_c(&b->key), bp)) 292 return b; 293 294 if (btree_node_will_make_reachable(b)) { 295 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 296 } else { 297 backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key)); 298 b = NULL; 299 } 300 err: 301 bch2_trans_iter_exit(trans, iter); 302 return b; 303 } 304 305 static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, 306 struct bkey_buf *last_flushed) 307 { 308 if (k.k->type != KEY_TYPE_backpointer) 309 return 0; 310 311 struct bch_fs *c = trans->c; 312 struct btree_iter alloc_iter = { NULL }; 313 struct bkey_s_c alloc_k; 314 struct printbuf buf = PRINTBUF; 315 int ret = 0; 316 317 struct bpos bucket; 318 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 319 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); 320 if (ret) 321 goto out; 322 323 if (fsck_err(trans, backpointer_to_missing_device, 324 "backpointer for missing device:\n%s", 325 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 326 ret = bch2_backpointer_del(trans, k.k->p); 327 goto out; 328 } 329 330 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 331 ret = bkey_err(alloc_k); 332 if (ret) 333 goto out; 334 335 if (alloc_k.k->type != KEY_TYPE_alloc_v4) { 336 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); 337 if (ret) 338 goto out; 339 340 if (fsck_err(trans, backpointer_to_missing_alloc, 341 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 342 alloc_iter.pos.inode, alloc_iter.pos.offset, 343 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 344 ret = bch2_backpointer_del(trans, k.k->p); 345 } 346 out: 347 fsck_err: 348 bch2_trans_iter_exit(trans, &alloc_iter); 349 printbuf_exit(&buf); 350 return ret; 351 } 352 353 /* verify that every backpointer has a corresponding alloc key */ 354 int bch2_check_btree_backpointers(struct bch_fs *c) 355 { 356 struct bkey_buf last_flushed; 357 bch2_bkey_buf_init(&last_flushed); 358 bkey_init(&last_flushed.k->k); 359 360 int ret = bch2_trans_run(c, 361 for_each_btree_key_commit(trans, iter, 362 BTREE_ID_backpointers, POS_MIN, 0, k, 363 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 364 bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); 365 366 bch2_bkey_buf_exit(&last_flushed, c); 367 bch_err_fn(c, ret); 368 return ret; 369 } 370 371 struct extents_to_bp_state { 372 struct bpos bp_start; 373 struct bpos bp_end; 374 struct bkey_buf last_flushed; 375 }; 376 377 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 378 struct bkey_s_c extent, unsigned dev) 379 { 380 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 381 int ret = PTR_ERR_OR_ZERO(n); 382 if (ret) 383 return ret; 384 385 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 386 return bch2_btree_insert_trans(trans, btree, n, 0); 387 } 388 389 static int check_extent_checksum(struct btree_trans *trans, 390 enum btree_id btree, struct bkey_s_c extent, 391 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 392 { 393 struct bch_fs *c = trans->c; 394 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 395 const union bch_extent_entry *entry; 396 struct extent_ptr_decoded p; 397 struct printbuf buf = PRINTBUF; 398 void *data_buf = NULL; 399 struct bio *bio = NULL; 400 size_t bytes; 401 int ret = 0; 402 403 if (bkey_is_btree_ptr(extent.k)) 404 return false; 405 406 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 407 if (p.ptr.dev == dev) 408 goto found; 409 BUG(); 410 found: 411 if (!p.crc.csum_type) 412 return false; 413 414 bytes = p.crc.compressed_size << 9; 415 416 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 417 if (!ca) 418 return false; 419 420 data_buf = kvmalloc(bytes, GFP_KERNEL); 421 if (!data_buf) { 422 ret = -ENOMEM; 423 goto err; 424 } 425 426 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 427 bio->bi_iter.bi_sector = p.ptr.offset; 428 bch2_bio_map(bio, data_buf, bytes); 429 ret = submit_bio_wait(bio); 430 if (ret) 431 goto err; 432 433 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 434 prt_printf(&buf, "\n "); 435 bch2_btree_id_to_text(&buf, btree); 436 prt_str(&buf, " "); 437 bch2_bkey_val_to_text(&buf, c, extent); 438 prt_printf(&buf, "\n "); 439 bch2_btree_id_to_text(&buf, o_btree); 440 prt_str(&buf, " "); 441 bch2_bkey_val_to_text(&buf, c, extent2); 442 443 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); 444 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 445 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 446 trans, dup_backpointer_to_bad_csum_extent, 447 "%s", buf.buf)) 448 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 449 fsck_err: 450 err: 451 if (bio) 452 bio_put(bio); 453 kvfree(data_buf); 454 percpu_ref_put(&ca->io_ref); 455 printbuf_exit(&buf); 456 return ret; 457 } 458 459 static int check_bp_exists(struct btree_trans *trans, 460 struct extents_to_bp_state *s, 461 struct bkey_i_backpointer *bp, 462 struct bkey_s_c orig_k) 463 { 464 struct bch_fs *c = trans->c; 465 struct btree_iter other_extent_iter = {}; 466 struct printbuf buf = PRINTBUF; 467 468 if (bpos_lt(bp->k.p, s->bp_start) || 469 bpos_gt(bp->k.p, s->bp_end)) 470 return 0; 471 472 struct btree_iter bp_iter; 473 struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); 474 int ret = bkey_err(bp_k); 475 if (ret) 476 goto err; 477 478 if (bp_k.k->type != KEY_TYPE_backpointer || 479 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { 480 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 481 if (ret) 482 goto err; 483 484 goto check_existing_bp; 485 } 486 out: 487 err: 488 fsck_err: 489 bch2_trans_iter_exit(trans, &other_extent_iter); 490 bch2_trans_iter_exit(trans, &bp_iter); 491 printbuf_exit(&buf); 492 return ret; 493 check_existing_bp: 494 /* Do we have a backpointer for a different extent? */ 495 if (bp_k.k->type != KEY_TYPE_backpointer) 496 goto missing; 497 498 struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); 499 500 struct bkey_s_c other_extent = 501 bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0); 502 ret = bkey_err(other_extent); 503 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 504 ret = 0; 505 if (ret) 506 goto err; 507 508 if (!other_extent.k) 509 goto missing; 510 511 if (bch2_extents_match(orig_k, other_extent)) { 512 printbuf_reset(&buf); 513 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 514 bch2_bkey_val_to_text(&buf, c, orig_k); 515 prt_str(&buf, "\n "); 516 bch2_bkey_val_to_text(&buf, c, other_extent); 517 bch_err(c, "%s", buf.buf); 518 519 if (other_extent.k->size <= orig_k.k->size) { 520 ret = drop_dev_and_update(trans, other_bp.v->btree_id, 521 other_extent, bp->k.p.inode); 522 if (ret) 523 goto err; 524 goto out; 525 } else { 526 ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); 527 if (ret) 528 goto err; 529 goto missing; 530 } 531 } 532 533 ret = check_extent_checksum(trans, 534 other_bp.v->btree_id, other_extent, 535 bp->v.btree_id, orig_k, 536 bp->k.p.inode); 537 if (ret < 0) 538 goto err; 539 if (ret) { 540 ret = 0; 541 goto missing; 542 } 543 544 ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, 545 other_bp.v->btree_id, other_extent, bp->k.p.inode); 546 if (ret < 0) 547 goto err; 548 if (ret) { 549 ret = 0; 550 goto out; 551 } 552 553 printbuf_reset(&buf); 554 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); 555 bch2_bkey_val_to_text(&buf, c, orig_k); 556 prt_str(&buf, "\n "); 557 bch2_bkey_val_to_text(&buf, c, other_extent); 558 bch_err(c, "%s", buf.buf); 559 ret = -BCH_ERR_fsck_repair_unimplemented; 560 goto err; 561 missing: 562 printbuf_reset(&buf); 563 prt_str(&buf, "missing backpointer "); 564 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); 565 prt_newline(&buf); 566 bch2_bkey_val_to_text(&buf, c, orig_k); 567 prt_printf(&buf, "\n got: "); 568 bch2_bkey_val_to_text(&buf, c, bp_k); 569 570 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 571 ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); 572 573 goto out; 574 } 575 576 static int check_extent_to_backpointers(struct btree_trans *trans, 577 struct extents_to_bp_state *s, 578 enum btree_id btree, unsigned level, 579 struct bkey_s_c k) 580 { 581 struct bch_fs *c = trans->c; 582 struct bkey_ptrs_c ptrs; 583 const union bch_extent_entry *entry; 584 struct extent_ptr_decoded p; 585 int ret; 586 587 ptrs = bch2_bkey_ptrs_c(k); 588 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 589 struct bpos bucket_pos; 590 struct bkey_i_backpointer bp; 591 592 if (p.ptr.cached) 593 continue; 594 595 rcu_read_lock(); 596 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); 597 if (ca) 598 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 599 rcu_read_unlock(); 600 601 if (!ca) 602 continue; 603 604 ret = check_bp_exists(trans, s, &bp, k); 605 if (ret) 606 return ret; 607 } 608 609 return 0; 610 } 611 612 static int check_btree_root_to_backpointers(struct btree_trans *trans, 613 struct extents_to_bp_state *s, 614 enum btree_id btree_id, 615 int *level) 616 { 617 struct bch_fs *c = trans->c; 618 struct btree_iter iter; 619 struct btree *b; 620 struct bkey_s_c k; 621 int ret; 622 retry: 623 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 624 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 625 b = bch2_btree_iter_peek_node(&iter); 626 ret = PTR_ERR_OR_ZERO(b); 627 if (ret) 628 goto err; 629 630 if (b != btree_node_root(c, b)) { 631 bch2_trans_iter_exit(trans, &iter); 632 goto retry; 633 } 634 635 *level = b->c.level; 636 637 k = bkey_i_to_s_c(&b->key); 638 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 639 err: 640 bch2_trans_iter_exit(trans, &iter); 641 return ret; 642 } 643 644 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 645 { 646 return (struct bbpos) { 647 .btree = bp.btree_id, 648 .pos = bp.pos, 649 }; 650 } 651 652 static u64 mem_may_pin_bytes(struct bch_fs *c) 653 { 654 struct sysinfo i; 655 si_meminfo(&i); 656 657 u64 mem_bytes = i.totalram * i.mem_unit; 658 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 659 } 660 661 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 662 { 663 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 664 } 665 666 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 667 u64 btree_leaf_mask, 668 u64 btree_interior_mask, 669 struct bbpos start, struct bbpos *end) 670 { 671 struct bch_fs *c = trans->c; 672 s64 mem_may_pin = mem_may_pin_bytes(c); 673 int ret = 0; 674 675 bch2_btree_cache_unpin(c); 676 677 btree_interior_mask |= btree_leaf_mask; 678 679 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask; 680 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask; 681 c->btree_cache.pinned_nodes_start = start; 682 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 683 684 for (enum btree_id btree = start.btree; 685 btree < BTREE_ID_NR && !ret; 686 btree++) { 687 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 688 689 if (!(BIT_ULL(btree) & btree_leaf_mask) && 690 !(BIT_ULL(btree) & btree_interior_mask)) 691 continue; 692 693 ret = __for_each_btree_node(trans, iter, btree, 694 btree == start.btree ? start.pos : POS_MIN, 695 0, depth, BTREE_ITER_prefetch, b, ({ 696 mem_may_pin -= btree_buf_bytes(b); 697 if (mem_may_pin <= 0) { 698 c->btree_cache.pinned_nodes_end = *end = 699 BBPOS(btree, b->key.k.p); 700 break; 701 } 702 bch2_node_pin(c, b); 703 0; 704 })); 705 } 706 707 return ret; 708 } 709 710 struct progress_indicator_state { 711 unsigned long next_print; 712 u64 nodes_seen; 713 u64 nodes_total; 714 struct btree *last_node; 715 }; 716 717 static inline void progress_init(struct progress_indicator_state *s, 718 struct bch_fs *c, 719 u64 btree_id_mask) 720 { 721 memset(s, 0, sizeof(*s)); 722 723 s->next_print = jiffies + HZ * 10; 724 725 for (unsigned i = 0; i < BTREE_ID_NR; i++) { 726 if (!(btree_id_mask & BIT_ULL(i))) 727 continue; 728 729 struct disk_accounting_pos acc = { 730 .type = BCH_DISK_ACCOUNTING_btree, 731 .btree.id = i, 732 }; 733 734 u64 v; 735 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); 736 s->nodes_total += div64_ul(v, btree_sectors(c)); 737 } 738 } 739 740 static inline bool progress_update_p(struct progress_indicator_state *s) 741 { 742 bool ret = time_after_eq(jiffies, s->next_print); 743 744 if (ret) 745 s->next_print = jiffies + HZ * 10; 746 return ret; 747 } 748 749 static void progress_update_iter(struct btree_trans *trans, 750 struct progress_indicator_state *s, 751 struct btree_iter *iter, 752 const char *msg) 753 { 754 struct bch_fs *c = trans->c; 755 struct btree *b = path_l(btree_iter_path(trans, iter))->b; 756 757 s->nodes_seen += b != s->last_node; 758 s->last_node = b; 759 760 if (progress_update_p(s)) { 761 struct printbuf buf = PRINTBUF; 762 unsigned percent = s->nodes_total 763 ? div64_u64(s->nodes_seen * 100, s->nodes_total) 764 : 0; 765 766 prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ", 767 msg, percent, s->nodes_seen, s->nodes_total); 768 bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); 769 770 bch_info(c, "%s", buf.buf); 771 printbuf_exit(&buf); 772 } 773 } 774 775 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 776 struct extents_to_bp_state *s) 777 { 778 struct bch_fs *c = trans->c; 779 struct progress_indicator_state progress; 780 int ret = 0; 781 782 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink)); 783 784 for (enum btree_id btree_id = 0; 785 btree_id < btree_id_nr_alive(c); 786 btree_id++) { 787 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 788 789 ret = commit_do(trans, NULL, NULL, 790 BCH_TRANS_COMMIT_no_enospc, 791 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 792 if (ret) 793 return ret; 794 795 while (level >= depth) { 796 struct btree_iter iter; 797 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 798 BTREE_ITER_prefetch); 799 800 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 801 progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); 802 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 803 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 804 })); 805 if (ret) 806 return ret; 807 808 --level; 809 } 810 } 811 812 return 0; 813 } 814 815 int bch2_check_extents_to_backpointers(struct bch_fs *c) 816 { 817 struct btree_trans *trans = bch2_trans_get(c); 818 struct extents_to_bp_state s = { .bp_start = POS_MIN }; 819 int ret; 820 821 bch2_bkey_buf_init(&s.last_flushed); 822 bkey_init(&s.last_flushed.k->k); 823 824 while (1) { 825 struct bbpos end; 826 ret = bch2_get_btree_in_memory_pos(trans, 827 BIT_ULL(BTREE_ID_backpointers), 828 BIT_ULL(BTREE_ID_backpointers), 829 BBPOS(BTREE_ID_backpointers, s.bp_start), &end); 830 if (ret) 831 break; 832 833 s.bp_end = end.pos; 834 835 if ( bpos_eq(s.bp_start, POS_MIN) && 836 !bpos_eq(s.bp_end, SPOS_MAX)) 837 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 838 __func__, btree_nodes_fit_in_ram(c)); 839 840 if (!bpos_eq(s.bp_start, POS_MIN) || 841 !bpos_eq(s.bp_end, SPOS_MAX)) { 842 struct printbuf buf = PRINTBUF; 843 844 prt_str(&buf, "check_extents_to_backpointers(): "); 845 bch2_bpos_to_text(&buf, s.bp_start); 846 prt_str(&buf, "-"); 847 bch2_bpos_to_text(&buf, s.bp_end); 848 849 bch_verbose(c, "%s", buf.buf); 850 printbuf_exit(&buf); 851 } 852 853 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 854 if (ret || bpos_eq(s.bp_end, SPOS_MAX)) 855 break; 856 857 s.bp_start = bpos_successor(s.bp_end); 858 } 859 bch2_trans_put(trans); 860 bch2_bkey_buf_exit(&s.last_flushed, c); 861 862 bch2_btree_cache_unpin(c); 863 864 bch_err_fn(c, ret); 865 return ret; 866 } 867 868 static int check_one_backpointer(struct btree_trans *trans, 869 struct bbpos start, 870 struct bbpos end, 871 struct bkey_s_c bp_k, 872 struct bkey_buf *last_flushed) 873 { 874 if (bp_k.k->type != KEY_TYPE_backpointer) 875 return 0; 876 877 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); 878 struct bch_fs *c = trans->c; 879 struct bbpos pos = bp_to_bbpos(*bp.v); 880 struct printbuf buf = PRINTBUF; 881 882 if (bbpos_cmp(pos, start) < 0 || 883 bbpos_cmp(pos, end) > 0) 884 return 0; 885 886 struct btree_iter iter; 887 struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0); 888 int ret = bkey_err(k); 889 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 890 return 0; 891 if (ret) 892 return ret; 893 894 if (!k.k) { 895 ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed); 896 if (ret) 897 goto out; 898 899 if (fsck_err(trans, backpointer_to_missing_ptr, 900 "backpointer for missing %s\n %s", 901 bp.v->level ? "btree node" : "extent", 902 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 903 ret = bch2_backpointer_del(trans, bp.k->p); 904 goto out; 905 } 906 } 907 out: 908 fsck_err: 909 bch2_trans_iter_exit(trans, &iter); 910 printbuf_exit(&buf); 911 return ret; 912 } 913 914 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 915 struct bbpos start, 916 struct bbpos end) 917 { 918 struct bch_fs *c = trans->c; 919 struct bkey_buf last_flushed; 920 struct progress_indicator_state progress; 921 922 bch2_bkey_buf_init(&last_flushed); 923 bkey_init(&last_flushed.k->k); 924 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); 925 926 int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 927 POS_MIN, BTREE_ITER_prefetch, k, 928 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 929 progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); 930 check_one_backpointer(trans, start, end, k, &last_flushed); 931 })); 932 933 bch2_bkey_buf_exit(&last_flushed, c); 934 return ret; 935 } 936 937 int bch2_check_backpointers_to_extents(struct bch_fs *c) 938 { 939 struct btree_trans *trans = bch2_trans_get(c); 940 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 941 int ret; 942 943 while (1) { 944 ret = bch2_get_btree_in_memory_pos(trans, 945 BIT_ULL(BTREE_ID_extents)| 946 BIT_ULL(BTREE_ID_reflink), 947 ~0, 948 start, &end); 949 if (ret) 950 break; 951 952 if (!bbpos_cmp(start, BBPOS_MIN) && 953 bbpos_cmp(end, BBPOS_MAX)) 954 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 955 __func__, btree_nodes_fit_in_ram(c)); 956 957 if (bbpos_cmp(start, BBPOS_MIN) || 958 bbpos_cmp(end, BBPOS_MAX)) { 959 struct printbuf buf = PRINTBUF; 960 961 prt_str(&buf, "check_backpointers_to_extents(): "); 962 bch2_bbpos_to_text(&buf, start); 963 prt_str(&buf, "-"); 964 bch2_bbpos_to_text(&buf, end); 965 966 bch_verbose(c, "%s", buf.buf); 967 printbuf_exit(&buf); 968 } 969 970 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 971 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 972 break; 973 974 start = bbpos_successor(end); 975 } 976 bch2_trans_put(trans); 977 978 bch2_btree_cache_unpin(c); 979 980 bch_err_fn(c, ret); 981 return ret; 982 } 983