1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "disk_accounting.h" 13 #include "error.h" 14 15 #include <linux/mm.h> 16 17 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, 18 struct bkey_validate_context from) 19 { 20 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 21 int ret = 0; 22 23 bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH, 24 c, backpointer_level_bad, 25 "backpointer level bad: %u >= %u", 26 bp.v->level, BTREE_MAX_DEPTH); 27 28 bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, 29 c, backpointer_dev_bad, 30 "backpointer for BCH_SB_MEMBER_INVALID"); 31 32 rcu_read_lock(); 33 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); 34 if (!ca) { 35 /* these will be caught by fsck */ 36 rcu_read_unlock(); 37 return 0; 38 } 39 40 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 41 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 42 rcu_read_unlock(); 43 44 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 45 !bpos_eq(bp.k->p, bp_pos), 46 c, backpointer_bucket_offset_wrong, 47 "backpointer bucket_offset wrong (%llu)", (u64) bp.v->bucket_offset); 48 fsck_err: 49 return ret; 50 } 51 52 void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 53 { 54 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 55 56 rcu_read_lock(); 57 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); 58 if (ca) { 59 u32 bucket_offset; 60 struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); 61 rcu_read_unlock(); 62 prt_printf(out, "bucket=%llu:%llu:%u", bucket.inode, bucket.offset, bucket_offset); 63 } else { 64 rcu_read_unlock(); 65 prt_printf(out, "sector=%llu:%llu", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); 66 } 67 68 bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); 69 prt_printf(out, " suboffset=%u len=%u pos=", 70 (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 71 bp.v->bucket_len); 72 bch2_bpos_to_text(out, bp.v->pos); 73 } 74 75 void bch2_backpointer_swab(struct bkey_s k) 76 { 77 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 78 79 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 80 bp.v->bucket_len = swab32(bp.v->bucket_len); 81 bch2_bpos_swab(&bp.v->pos); 82 } 83 84 static bool extent_matches_bp(struct bch_fs *c, 85 enum btree_id btree_id, unsigned level, 86 struct bkey_s_c k, 87 struct bkey_s_c_backpointer bp) 88 { 89 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 90 const union bch_extent_entry *entry; 91 struct extent_ptr_decoded p; 92 93 rcu_read_lock(); 94 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 95 struct bpos bucket2; 96 struct bkey_i_backpointer bp2; 97 98 if (p.ptr.cached) 99 continue; 100 101 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 102 if (!ca) 103 continue; 104 105 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 106 if (bpos_eq(bp.k->p, bp2.k.p) && 107 !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) { 108 rcu_read_unlock(); 109 return true; 110 } 111 } 112 rcu_read_unlock(); 113 114 return false; 115 } 116 117 static noinline int backpointer_mod_err(struct btree_trans *trans, 118 struct bkey_s_c orig_k, 119 struct bkey_i_backpointer *new_bp, 120 struct bkey_s_c found_bp, 121 bool insert) 122 { 123 struct bch_fs *c = trans->c; 124 struct printbuf buf = PRINTBUF; 125 126 if (insert) { 127 prt_printf(&buf, "existing backpointer found when inserting "); 128 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); 129 prt_newline(&buf); 130 printbuf_indent_add(&buf, 2); 131 132 prt_printf(&buf, "found "); 133 bch2_bkey_val_to_text(&buf, c, found_bp); 134 prt_newline(&buf); 135 136 prt_printf(&buf, "for "); 137 bch2_bkey_val_to_text(&buf, c, orig_k); 138 139 bch_err(c, "%s", buf.buf); 140 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 141 prt_printf(&buf, "backpointer not found when deleting\n"); 142 printbuf_indent_add(&buf, 2); 143 144 prt_printf(&buf, "searching for "); 145 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); 146 prt_newline(&buf); 147 148 prt_printf(&buf, "got "); 149 bch2_bkey_val_to_text(&buf, c, found_bp); 150 prt_newline(&buf); 151 152 prt_printf(&buf, "for "); 153 bch2_bkey_val_to_text(&buf, c, orig_k); 154 155 bch_err(c, "%s", buf.buf); 156 } 157 158 printbuf_exit(&buf); 159 160 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 161 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 162 } else { 163 return 0; 164 } 165 } 166 167 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 168 struct bkey_s_c orig_k, 169 struct bkey_i_backpointer *bp, 170 bool insert) 171 { 172 struct btree_iter bp_iter; 173 struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 174 bp->k.p, 175 BTREE_ITER_intent| 176 BTREE_ITER_slots| 177 BTREE_ITER_with_updates); 178 int ret = bkey_err(k); 179 if (ret) 180 return ret; 181 182 if (insert 183 ? k.k->type 184 : (k.k->type != KEY_TYPE_backpointer || 185 memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { 186 ret = backpointer_mod_err(trans, orig_k, bp, k, insert); 187 if (ret) 188 goto err; 189 } 190 191 if (!insert) { 192 bp->k.type = KEY_TYPE_deleted; 193 set_bkey_val_u64s(&bp->k, 0); 194 } 195 196 ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); 197 err: 198 bch2_trans_iter_exit(trans, &bp_iter); 199 return ret; 200 } 201 202 static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) 203 { 204 return likely(!bch2_backpointers_no_use_write_buffer) 205 ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) 206 : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0); 207 } 208 209 static int bch2_backpointers_maybe_flush(struct btree_trans *trans, 210 struct bkey_s_c visiting_k, 211 struct bkey_buf *last_flushed) 212 { 213 return likely(!bch2_backpointers_no_use_write_buffer) 214 ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) 215 : 0; 216 } 217 218 static void backpointer_target_not_found(struct btree_trans *trans, 219 struct bkey_s_c_backpointer bp, 220 struct bkey_s_c target_k) 221 { 222 struct bch_fs *c = trans->c; 223 struct printbuf buf = PRINTBUF; 224 225 /* 226 * If we're using the btree write buffer, the backpointer we were 227 * looking at may have already been deleted - failure to find what it 228 * pointed to is not an error: 229 */ 230 if (likely(!bch2_backpointers_no_use_write_buffer)) 231 return; 232 233 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 234 bp.v->level ? "btree node" : "extent"); 235 bch2_bkey_val_to_text(&buf, c, bp.s_c); 236 prt_printf(&buf, "\n "); 237 238 bch2_bkey_val_to_text(&buf, c, target_k); 239 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 240 bch_err_ratelimited(c, "%s", buf.buf); 241 else 242 bch2_trans_inconsistent(trans, "%s", buf.buf); 243 244 printbuf_exit(&buf); 245 } 246 247 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 248 struct bkey_s_c_backpointer bp, 249 struct btree_iter *iter, 250 unsigned iter_flags) 251 { 252 struct bch_fs *c = trans->c; 253 254 if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) 255 return bkey_s_c_null; 256 257 if (likely(!bp.v->level)) { 258 bch2_trans_node_iter_init(trans, iter, 259 bp.v->btree_id, 260 bp.v->pos, 261 0, 0, 262 iter_flags); 263 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 264 if (bkey_err(k)) { 265 bch2_trans_iter_exit(trans, iter); 266 return k; 267 } 268 269 if (k.k && 270 extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) 271 return k; 272 273 bch2_trans_iter_exit(trans, iter); 274 backpointer_target_not_found(trans, bp, k); 275 return bkey_s_c_null; 276 } else { 277 struct btree *b = bch2_backpointer_get_node(trans, bp, iter); 278 279 if (IS_ERR_OR_NULL(b)) { 280 bch2_trans_iter_exit(trans, iter); 281 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 282 } 283 return bkey_i_to_s_c(&b->key); 284 } 285 } 286 287 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 288 struct bkey_s_c_backpointer bp, 289 struct btree_iter *iter) 290 { 291 struct bch_fs *c = trans->c; 292 293 BUG_ON(!bp.v->level); 294 295 bch2_trans_node_iter_init(trans, iter, 296 bp.v->btree_id, 297 bp.v->pos, 298 0, 299 bp.v->level - 1, 300 0); 301 struct btree *b = bch2_btree_iter_peek_node(iter); 302 if (IS_ERR_OR_NULL(b)) 303 goto err; 304 305 BUG_ON(b->c.level != bp.v->level - 1); 306 307 if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, 308 bkey_i_to_s_c(&b->key), bp)) 309 return b; 310 311 if (btree_node_will_make_reachable(b)) { 312 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 313 } else { 314 backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key)); 315 b = NULL; 316 } 317 err: 318 bch2_trans_iter_exit(trans, iter); 319 return b; 320 } 321 322 static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, 323 struct bkey_buf *last_flushed) 324 { 325 if (k.k->type != KEY_TYPE_backpointer) 326 return 0; 327 328 struct bch_fs *c = trans->c; 329 struct btree_iter alloc_iter = { NULL }; 330 struct bkey_s_c alloc_k; 331 struct printbuf buf = PRINTBUF; 332 int ret = 0; 333 334 struct bpos bucket; 335 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 336 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); 337 if (ret) 338 goto out; 339 340 if (fsck_err(trans, backpointer_to_missing_device, 341 "backpointer for missing device:\n%s", 342 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 343 ret = bch2_backpointer_del(trans, k.k->p); 344 goto out; 345 } 346 347 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 348 ret = bkey_err(alloc_k); 349 if (ret) 350 goto out; 351 352 if (alloc_k.k->type != KEY_TYPE_alloc_v4) { 353 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); 354 if (ret) 355 goto out; 356 357 if (fsck_err(trans, backpointer_to_missing_alloc, 358 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 359 alloc_iter.pos.inode, alloc_iter.pos.offset, 360 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 361 ret = bch2_backpointer_del(trans, k.k->p); 362 } 363 out: 364 fsck_err: 365 bch2_trans_iter_exit(trans, &alloc_iter); 366 printbuf_exit(&buf); 367 return ret; 368 } 369 370 /* verify that every backpointer has a corresponding alloc key */ 371 int bch2_check_btree_backpointers(struct bch_fs *c) 372 { 373 struct bkey_buf last_flushed; 374 bch2_bkey_buf_init(&last_flushed); 375 bkey_init(&last_flushed.k->k); 376 377 int ret = bch2_trans_run(c, 378 for_each_btree_key_commit(trans, iter, 379 BTREE_ID_backpointers, POS_MIN, 0, k, 380 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 381 bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); 382 383 bch2_bkey_buf_exit(&last_flushed, c); 384 bch_err_fn(c, ret); 385 return ret; 386 } 387 388 struct extents_to_bp_state { 389 struct bpos bp_start; 390 struct bpos bp_end; 391 struct bkey_buf last_flushed; 392 }; 393 394 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 395 struct bkey_s_c extent, unsigned dev) 396 { 397 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 398 int ret = PTR_ERR_OR_ZERO(n); 399 if (ret) 400 return ret; 401 402 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 403 return bch2_btree_insert_trans(trans, btree, n, 0); 404 } 405 406 static int check_extent_checksum(struct btree_trans *trans, 407 enum btree_id btree, struct bkey_s_c extent, 408 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 409 { 410 struct bch_fs *c = trans->c; 411 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 412 const union bch_extent_entry *entry; 413 struct extent_ptr_decoded p; 414 struct printbuf buf = PRINTBUF; 415 void *data_buf = NULL; 416 struct bio *bio = NULL; 417 size_t bytes; 418 int ret = 0; 419 420 if (bkey_is_btree_ptr(extent.k)) 421 return false; 422 423 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 424 if (p.ptr.dev == dev) 425 goto found; 426 BUG(); 427 found: 428 if (!p.crc.csum_type) 429 return false; 430 431 bytes = p.crc.compressed_size << 9; 432 433 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 434 if (!ca) 435 return false; 436 437 data_buf = kvmalloc(bytes, GFP_KERNEL); 438 if (!data_buf) { 439 ret = -ENOMEM; 440 goto err; 441 } 442 443 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 444 bio->bi_iter.bi_sector = p.ptr.offset; 445 bch2_bio_map(bio, data_buf, bytes); 446 ret = submit_bio_wait(bio); 447 if (ret) 448 goto err; 449 450 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 451 prt_printf(&buf, "\n "); 452 bch2_btree_id_to_text(&buf, btree); 453 prt_str(&buf, " "); 454 bch2_bkey_val_to_text(&buf, c, extent); 455 prt_printf(&buf, "\n "); 456 bch2_btree_id_to_text(&buf, o_btree); 457 prt_str(&buf, " "); 458 bch2_bkey_val_to_text(&buf, c, extent2); 459 460 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); 461 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 462 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 463 trans, dup_backpointer_to_bad_csum_extent, 464 "%s", buf.buf)) 465 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 466 fsck_err: 467 err: 468 if (bio) 469 bio_put(bio); 470 kvfree(data_buf); 471 percpu_ref_put(&ca->io_ref); 472 printbuf_exit(&buf); 473 return ret; 474 } 475 476 static int check_bp_exists(struct btree_trans *trans, 477 struct extents_to_bp_state *s, 478 struct bkey_i_backpointer *bp, 479 struct bkey_s_c orig_k) 480 { 481 struct bch_fs *c = trans->c; 482 struct btree_iter other_extent_iter = {}; 483 struct printbuf buf = PRINTBUF; 484 485 if (bpos_lt(bp->k.p, s->bp_start) || 486 bpos_gt(bp->k.p, s->bp_end)) 487 return 0; 488 489 struct btree_iter bp_iter; 490 struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); 491 int ret = bkey_err(bp_k); 492 if (ret) 493 goto err; 494 495 if (bp_k.k->type != KEY_TYPE_backpointer || 496 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { 497 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 498 if (ret) 499 goto err; 500 501 goto check_existing_bp; 502 } 503 out: 504 err: 505 fsck_err: 506 bch2_trans_iter_exit(trans, &other_extent_iter); 507 bch2_trans_iter_exit(trans, &bp_iter); 508 printbuf_exit(&buf); 509 return ret; 510 check_existing_bp: 511 /* Do we have a backpointer for a different extent? */ 512 if (bp_k.k->type != KEY_TYPE_backpointer) 513 goto missing; 514 515 struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); 516 517 struct bkey_s_c other_extent = 518 bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0); 519 ret = bkey_err(other_extent); 520 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 521 ret = 0; 522 if (ret) 523 goto err; 524 525 if (!other_extent.k) 526 goto missing; 527 528 if (bch2_extents_match(orig_k, other_extent)) { 529 printbuf_reset(&buf); 530 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 531 bch2_bkey_val_to_text(&buf, c, orig_k); 532 prt_str(&buf, "\n "); 533 bch2_bkey_val_to_text(&buf, c, other_extent); 534 bch_err(c, "%s", buf.buf); 535 536 if (other_extent.k->size <= orig_k.k->size) { 537 ret = drop_dev_and_update(trans, other_bp.v->btree_id, 538 other_extent, bp->k.p.inode); 539 if (ret) 540 goto err; 541 goto out; 542 } else { 543 ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); 544 if (ret) 545 goto err; 546 goto missing; 547 } 548 } 549 550 ret = check_extent_checksum(trans, 551 other_bp.v->btree_id, other_extent, 552 bp->v.btree_id, orig_k, 553 bp->k.p.inode); 554 if (ret < 0) 555 goto err; 556 if (ret) { 557 ret = 0; 558 goto missing; 559 } 560 561 ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, 562 other_bp.v->btree_id, other_extent, bp->k.p.inode); 563 if (ret < 0) 564 goto err; 565 if (ret) { 566 ret = 0; 567 goto out; 568 } 569 570 printbuf_reset(&buf); 571 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); 572 bch2_bkey_val_to_text(&buf, c, orig_k); 573 prt_str(&buf, "\n "); 574 bch2_bkey_val_to_text(&buf, c, other_extent); 575 bch_err(c, "%s", buf.buf); 576 ret = -BCH_ERR_fsck_repair_unimplemented; 577 goto err; 578 missing: 579 printbuf_reset(&buf); 580 prt_str(&buf, "missing backpointer "); 581 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); 582 prt_newline(&buf); 583 bch2_bkey_val_to_text(&buf, c, orig_k); 584 prt_printf(&buf, "\n got: "); 585 bch2_bkey_val_to_text(&buf, c, bp_k); 586 587 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 588 ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); 589 590 goto out; 591 } 592 593 static int check_extent_to_backpointers(struct btree_trans *trans, 594 struct extents_to_bp_state *s, 595 enum btree_id btree, unsigned level, 596 struct bkey_s_c k) 597 { 598 struct bch_fs *c = trans->c; 599 struct bkey_ptrs_c ptrs; 600 const union bch_extent_entry *entry; 601 struct extent_ptr_decoded p; 602 int ret; 603 604 ptrs = bch2_bkey_ptrs_c(k); 605 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 606 struct bpos bucket_pos; 607 struct bkey_i_backpointer bp; 608 609 if (p.ptr.cached) 610 continue; 611 612 rcu_read_lock(); 613 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); 614 if (ca) 615 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 616 rcu_read_unlock(); 617 618 if (!ca) 619 continue; 620 621 ret = check_bp_exists(trans, s, &bp, k); 622 if (ret) 623 return ret; 624 } 625 626 return 0; 627 } 628 629 static int check_btree_root_to_backpointers(struct btree_trans *trans, 630 struct extents_to_bp_state *s, 631 enum btree_id btree_id, 632 int *level) 633 { 634 struct bch_fs *c = trans->c; 635 struct btree_iter iter; 636 struct btree *b; 637 struct bkey_s_c k; 638 int ret; 639 retry: 640 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 641 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 642 b = bch2_btree_iter_peek_node(&iter); 643 ret = PTR_ERR_OR_ZERO(b); 644 if (ret) 645 goto err; 646 647 if (b != btree_node_root(c, b)) { 648 bch2_trans_iter_exit(trans, &iter); 649 goto retry; 650 } 651 652 *level = b->c.level; 653 654 k = bkey_i_to_s_c(&b->key); 655 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 656 err: 657 bch2_trans_iter_exit(trans, &iter); 658 return ret; 659 } 660 661 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 662 { 663 return (struct bbpos) { 664 .btree = bp.btree_id, 665 .pos = bp.pos, 666 }; 667 } 668 669 static u64 mem_may_pin_bytes(struct bch_fs *c) 670 { 671 struct sysinfo i; 672 si_meminfo(&i); 673 674 u64 mem_bytes = i.totalram * i.mem_unit; 675 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 676 } 677 678 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 679 { 680 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 681 } 682 683 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 684 u64 btree_leaf_mask, 685 u64 btree_interior_mask, 686 struct bbpos start, struct bbpos *end) 687 { 688 struct bch_fs *c = trans->c; 689 s64 mem_may_pin = mem_may_pin_bytes(c); 690 int ret = 0; 691 692 bch2_btree_cache_unpin(c); 693 694 btree_interior_mask |= btree_leaf_mask; 695 696 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask; 697 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask; 698 c->btree_cache.pinned_nodes_start = start; 699 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 700 701 for (enum btree_id btree = start.btree; 702 btree < BTREE_ID_NR && !ret; 703 btree++) { 704 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 705 706 if (!(BIT_ULL(btree) & btree_leaf_mask) && 707 !(BIT_ULL(btree) & btree_interior_mask)) 708 continue; 709 710 ret = __for_each_btree_node(trans, iter, btree, 711 btree == start.btree ? start.pos : POS_MIN, 712 0, depth, BTREE_ITER_prefetch, b, ({ 713 mem_may_pin -= btree_buf_bytes(b); 714 if (mem_may_pin <= 0) { 715 c->btree_cache.pinned_nodes_end = *end = 716 BBPOS(btree, b->key.k.p); 717 break; 718 } 719 bch2_node_pin(c, b); 720 0; 721 })); 722 } 723 724 return ret; 725 } 726 727 struct progress_indicator_state { 728 unsigned long next_print; 729 u64 nodes_seen; 730 u64 nodes_total; 731 struct btree *last_node; 732 }; 733 734 static inline void progress_init(struct progress_indicator_state *s, 735 struct bch_fs *c, 736 u64 btree_id_mask) 737 { 738 memset(s, 0, sizeof(*s)); 739 740 s->next_print = jiffies + HZ * 10; 741 742 for (unsigned i = 0; i < BTREE_ID_NR; i++) { 743 if (!(btree_id_mask & BIT_ULL(i))) 744 continue; 745 746 struct disk_accounting_pos acc = { 747 .type = BCH_DISK_ACCOUNTING_btree, 748 .btree.id = i, 749 }; 750 751 u64 v; 752 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); 753 s->nodes_total += div64_ul(v, btree_sectors(c)); 754 } 755 } 756 757 static inline bool progress_update_p(struct progress_indicator_state *s) 758 { 759 bool ret = time_after_eq(jiffies, s->next_print); 760 761 if (ret) 762 s->next_print = jiffies + HZ * 10; 763 return ret; 764 } 765 766 static void progress_update_iter(struct btree_trans *trans, 767 struct progress_indicator_state *s, 768 struct btree_iter *iter, 769 const char *msg) 770 { 771 struct bch_fs *c = trans->c; 772 struct btree *b = path_l(btree_iter_path(trans, iter))->b; 773 774 s->nodes_seen += b != s->last_node; 775 s->last_node = b; 776 777 if (progress_update_p(s)) { 778 struct printbuf buf = PRINTBUF; 779 unsigned percent = s->nodes_total 780 ? div64_u64(s->nodes_seen * 100, s->nodes_total) 781 : 0; 782 783 prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ", 784 msg, percent, s->nodes_seen, s->nodes_total); 785 bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); 786 787 bch_info(c, "%s", buf.buf); 788 printbuf_exit(&buf); 789 } 790 } 791 792 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 793 struct extents_to_bp_state *s) 794 { 795 struct bch_fs *c = trans->c; 796 struct progress_indicator_state progress; 797 int ret = 0; 798 799 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink)); 800 801 for (enum btree_id btree_id = 0; 802 btree_id < btree_id_nr_alive(c); 803 btree_id++) { 804 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 805 806 ret = commit_do(trans, NULL, NULL, 807 BCH_TRANS_COMMIT_no_enospc, 808 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 809 if (ret) 810 return ret; 811 812 while (level >= depth) { 813 struct btree_iter iter; 814 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 815 BTREE_ITER_prefetch); 816 817 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 818 progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); 819 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 820 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 821 })); 822 if (ret) 823 return ret; 824 825 --level; 826 } 827 } 828 829 return 0; 830 } 831 832 int bch2_check_extents_to_backpointers(struct bch_fs *c) 833 { 834 struct btree_trans *trans = bch2_trans_get(c); 835 struct extents_to_bp_state s = { .bp_start = POS_MIN }; 836 int ret; 837 838 bch2_bkey_buf_init(&s.last_flushed); 839 bkey_init(&s.last_flushed.k->k); 840 841 while (1) { 842 struct bbpos end; 843 ret = bch2_get_btree_in_memory_pos(trans, 844 BIT_ULL(BTREE_ID_backpointers), 845 BIT_ULL(BTREE_ID_backpointers), 846 BBPOS(BTREE_ID_backpointers, s.bp_start), &end); 847 if (ret) 848 break; 849 850 s.bp_end = end.pos; 851 852 if ( bpos_eq(s.bp_start, POS_MIN) && 853 !bpos_eq(s.bp_end, SPOS_MAX)) 854 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 855 __func__, btree_nodes_fit_in_ram(c)); 856 857 if (!bpos_eq(s.bp_start, POS_MIN) || 858 !bpos_eq(s.bp_end, SPOS_MAX)) { 859 struct printbuf buf = PRINTBUF; 860 861 prt_str(&buf, "check_extents_to_backpointers(): "); 862 bch2_bpos_to_text(&buf, s.bp_start); 863 prt_str(&buf, "-"); 864 bch2_bpos_to_text(&buf, s.bp_end); 865 866 bch_verbose(c, "%s", buf.buf); 867 printbuf_exit(&buf); 868 } 869 870 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 871 if (ret || bpos_eq(s.bp_end, SPOS_MAX)) 872 break; 873 874 s.bp_start = bpos_successor(s.bp_end); 875 } 876 bch2_trans_put(trans); 877 bch2_bkey_buf_exit(&s.last_flushed, c); 878 879 bch2_btree_cache_unpin(c); 880 881 bch_err_fn(c, ret); 882 return ret; 883 } 884 885 static int check_one_backpointer(struct btree_trans *trans, 886 struct bbpos start, 887 struct bbpos end, 888 struct bkey_s_c bp_k, 889 struct bkey_buf *last_flushed) 890 { 891 if (bp_k.k->type != KEY_TYPE_backpointer) 892 return 0; 893 894 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); 895 struct bch_fs *c = trans->c; 896 struct bbpos pos = bp_to_bbpos(*bp.v); 897 struct printbuf buf = PRINTBUF; 898 899 if (bbpos_cmp(pos, start) < 0 || 900 bbpos_cmp(pos, end) > 0) 901 return 0; 902 903 struct btree_iter iter; 904 struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0); 905 int ret = bkey_err(k); 906 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 907 return 0; 908 if (ret) 909 return ret; 910 911 if (!k.k) { 912 ret = bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed); 913 if (ret) 914 goto out; 915 916 if (fsck_err(trans, backpointer_to_missing_ptr, 917 "backpointer for missing %s\n %s", 918 bp.v->level ? "btree node" : "extent", 919 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 920 ret = bch2_backpointer_del(trans, bp.k->p); 921 goto out; 922 } 923 } 924 out: 925 fsck_err: 926 bch2_trans_iter_exit(trans, &iter); 927 printbuf_exit(&buf); 928 return ret; 929 } 930 931 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 932 struct bbpos start, 933 struct bbpos end) 934 { 935 struct bch_fs *c = trans->c; 936 struct bkey_buf last_flushed; 937 struct progress_indicator_state progress; 938 939 bch2_bkey_buf_init(&last_flushed); 940 bkey_init(&last_flushed.k->k); 941 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); 942 943 int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 944 POS_MIN, BTREE_ITER_prefetch, k, 945 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 946 progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); 947 check_one_backpointer(trans, start, end, k, &last_flushed); 948 })); 949 950 bch2_bkey_buf_exit(&last_flushed, c); 951 return ret; 952 } 953 954 int bch2_check_backpointers_to_extents(struct bch_fs *c) 955 { 956 struct btree_trans *trans = bch2_trans_get(c); 957 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 958 int ret; 959 960 while (1) { 961 ret = bch2_get_btree_in_memory_pos(trans, 962 BIT_ULL(BTREE_ID_extents)| 963 BIT_ULL(BTREE_ID_reflink), 964 ~0, 965 start, &end); 966 if (ret) 967 break; 968 969 if (!bbpos_cmp(start, BBPOS_MIN) && 970 bbpos_cmp(end, BBPOS_MAX)) 971 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 972 __func__, btree_nodes_fit_in_ram(c)); 973 974 if (bbpos_cmp(start, BBPOS_MIN) || 975 bbpos_cmp(end, BBPOS_MAX)) { 976 struct printbuf buf = PRINTBUF; 977 978 prt_str(&buf, "check_backpointers_to_extents(): "); 979 bch2_bbpos_to_text(&buf, start); 980 prt_str(&buf, "-"); 981 bch2_bbpos_to_text(&buf, end); 982 983 bch_verbose(c, "%s", buf.buf); 984 printbuf_exit(&buf); 985 } 986 987 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 988 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 989 break; 990 991 start = bbpos_successor(end); 992 } 993 bch2_trans_put(trans); 994 995 bch2_btree_cache_unpin(c); 996 997 bch_err_fn(c, ret); 998 return ret; 999 } 1000