1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "disk_accounting.h" 13 #include "error.h" 14 #include "progress.h" 15 16 #include <linux/mm.h> 17 18 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, 19 struct bkey_validate_context from) 20 { 21 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 22 int ret = 0; 23 24 bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH, 25 c, backpointer_level_bad, 26 "backpointer level bad: %u >= %u", 27 bp.v->level, BTREE_MAX_DEPTH); 28 29 bkey_fsck_err_on(bp.k->p.inode == BCH_SB_MEMBER_INVALID, 30 c, backpointer_dev_bad, 31 "backpointer for BCH_SB_MEMBER_INVALID"); 32 fsck_err: 33 return ret; 34 } 35 36 void bch2_backpointer_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 37 { 38 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 39 40 rcu_read_lock(); 41 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); 42 if (ca) { 43 u32 bucket_offset; 44 struct bpos bucket = bp_pos_to_bucket_and_offset(ca, bp.k->p, &bucket_offset); 45 rcu_read_unlock(); 46 prt_printf(out, "bucket=%llu:%llu:%u ", bucket.inode, bucket.offset, bucket_offset); 47 } else { 48 rcu_read_unlock(); 49 prt_printf(out, "sector=%llu:%llu ", bp.k->p.inode, bp.k->p.offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT); 50 } 51 52 bch2_btree_id_level_to_text(out, bp.v->btree_id, bp.v->level); 53 prt_printf(out, " suboffset=%u len=%u gen=%u pos=", 54 (u32) bp.k->p.offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 55 bp.v->bucket_len, 56 bp.v->bucket_gen); 57 bch2_bpos_to_text(out, bp.v->pos); 58 } 59 60 void bch2_backpointer_swab(struct bkey_s k) 61 { 62 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 63 64 bp.v->bucket_len = swab32(bp.v->bucket_len); 65 bch2_bpos_swab(&bp.v->pos); 66 } 67 68 static bool extent_matches_bp(struct bch_fs *c, 69 enum btree_id btree_id, unsigned level, 70 struct bkey_s_c k, 71 struct bkey_s_c_backpointer bp) 72 { 73 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 74 const union bch_extent_entry *entry; 75 struct extent_ptr_decoded p; 76 77 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 78 struct bkey_i_backpointer bp2; 79 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bp2); 80 81 if (bpos_eq(bp.k->p, bp2.k.p) && 82 !memcmp(bp.v, &bp2.v, sizeof(bp2.v))) 83 return true; 84 } 85 86 return false; 87 } 88 89 static noinline int backpointer_mod_err(struct btree_trans *trans, 90 struct bkey_s_c orig_k, 91 struct bkey_i_backpointer *new_bp, 92 struct bkey_s_c found_bp, 93 bool insert) 94 { 95 struct bch_fs *c = trans->c; 96 struct printbuf buf = PRINTBUF; 97 98 if (insert) { 99 prt_printf(&buf, "existing backpointer found when inserting "); 100 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); 101 prt_newline(&buf); 102 printbuf_indent_add(&buf, 2); 103 104 prt_printf(&buf, "found "); 105 bch2_bkey_val_to_text(&buf, c, found_bp); 106 prt_newline(&buf); 107 108 prt_printf(&buf, "for "); 109 bch2_bkey_val_to_text(&buf, c, orig_k); 110 111 bch_err(c, "%s", buf.buf); 112 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 113 prt_printf(&buf, "backpointer not found when deleting\n"); 114 printbuf_indent_add(&buf, 2); 115 116 prt_printf(&buf, "searching for "); 117 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&new_bp->k_i)); 118 prt_newline(&buf); 119 120 prt_printf(&buf, "got "); 121 bch2_bkey_val_to_text(&buf, c, found_bp); 122 prt_newline(&buf); 123 124 prt_printf(&buf, "for "); 125 bch2_bkey_val_to_text(&buf, c, orig_k); 126 127 bch_err(c, "%s", buf.buf); 128 } 129 130 printbuf_exit(&buf); 131 132 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 133 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 134 } else { 135 return 0; 136 } 137 } 138 139 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 140 struct bkey_s_c orig_k, 141 struct bkey_i_backpointer *bp, 142 bool insert) 143 { 144 struct btree_iter bp_iter; 145 struct bkey_s_c k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 146 bp->k.p, 147 BTREE_ITER_intent| 148 BTREE_ITER_slots| 149 BTREE_ITER_with_updates); 150 int ret = bkey_err(k); 151 if (ret) 152 return ret; 153 154 if (insert 155 ? k.k->type 156 : (k.k->type != KEY_TYPE_backpointer || 157 memcmp(bkey_s_c_to_backpointer(k).v, &bp->v, sizeof(bp->v)))) { 158 ret = backpointer_mod_err(trans, orig_k, bp, k, insert); 159 if (ret) 160 goto err; 161 } 162 163 if (!insert) { 164 bp->k.type = KEY_TYPE_deleted; 165 set_bkey_val_u64s(&bp->k, 0); 166 } 167 168 ret = bch2_trans_update(trans, &bp_iter, &bp->k_i, 0); 169 err: 170 bch2_trans_iter_exit(trans, &bp_iter); 171 return ret; 172 } 173 174 static int bch2_backpointer_del(struct btree_trans *trans, struct bpos pos) 175 { 176 return (likely(!bch2_backpointers_no_use_write_buffer) 177 ? bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, pos) 178 : bch2_btree_delete(trans, BTREE_ID_backpointers, pos, 0)) ?: 179 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 180 } 181 182 static inline int bch2_backpointers_maybe_flush(struct btree_trans *trans, 183 struct bkey_s_c visiting_k, 184 struct bkey_buf *last_flushed) 185 { 186 return likely(!bch2_backpointers_no_use_write_buffer) 187 ? bch2_btree_write_buffer_maybe_flush(trans, visiting_k, last_flushed) 188 : 0; 189 } 190 191 static int backpointer_target_not_found(struct btree_trans *trans, 192 struct bkey_s_c_backpointer bp, 193 struct bkey_s_c target_k, 194 struct bkey_buf *last_flushed) 195 { 196 struct bch_fs *c = trans->c; 197 struct printbuf buf = PRINTBUF; 198 int ret = 0; 199 200 /* 201 * If we're using the btree write buffer, the backpointer we were 202 * looking at may have already been deleted - failure to find what it 203 * pointed to is not an error: 204 */ 205 ret = last_flushed 206 ? bch2_backpointers_maybe_flush(trans, bp.s_c, last_flushed) 207 : 0; 208 if (ret) 209 return ret; 210 211 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 212 bp.v->level ? "btree node" : "extent"); 213 bch2_bkey_val_to_text(&buf, c, bp.s_c); 214 215 prt_printf(&buf, "\n "); 216 bch2_bkey_val_to_text(&buf, c, target_k); 217 218 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(target_k); 219 const union bch_extent_entry *entry; 220 struct extent_ptr_decoded p; 221 bkey_for_each_ptr_decode(target_k.k, ptrs, p, entry) 222 if (p.ptr.dev == bp.k->p.inode) { 223 prt_printf(&buf, "\n "); 224 struct bkey_i_backpointer bp2; 225 bch2_extent_ptr_to_bp(c, bp.v->btree_id, bp.v->level, target_k, p, entry, &bp2); 226 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp2.k_i)); 227 } 228 229 if (fsck_err(trans, backpointer_to_missing_ptr, 230 "%s", buf.buf)) 231 ret = bch2_backpointer_del(trans, bp.k->p); 232 fsck_err: 233 printbuf_exit(&buf); 234 return ret; 235 } 236 237 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 238 struct bkey_s_c_backpointer bp, 239 struct btree_iter *iter, 240 unsigned iter_flags, 241 struct bkey_buf *last_flushed) 242 { 243 struct bch_fs *c = trans->c; 244 245 if (unlikely(bp.v->btree_id >= btree_id_nr_alive(c))) 246 return bkey_s_c_null; 247 248 bch2_trans_node_iter_init(trans, iter, 249 bp.v->btree_id, 250 bp.v->pos, 251 0, 252 bp.v->level, 253 iter_flags); 254 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 255 if (bkey_err(k)) { 256 bch2_trans_iter_exit(trans, iter); 257 return k; 258 } 259 260 if (k.k && 261 extent_matches_bp(c, bp.v->btree_id, bp.v->level, k, bp)) 262 return k; 263 264 bch2_trans_iter_exit(trans, iter); 265 266 if (!bp.v->level) { 267 int ret = backpointer_target_not_found(trans, bp, k, last_flushed); 268 return ret ? bkey_s_c_err(ret) : bkey_s_c_null; 269 } else { 270 struct btree *b = bch2_backpointer_get_node(trans, bp, iter, last_flushed); 271 if (b == ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node)) 272 return bkey_s_c_null; 273 if (IS_ERR_OR_NULL(b)) 274 return ((struct bkey_s_c) { .k = ERR_CAST(b) }); 275 276 return bkey_i_to_s_c(&b->key); 277 } 278 } 279 280 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 281 struct bkey_s_c_backpointer bp, 282 struct btree_iter *iter, 283 struct bkey_buf *last_flushed) 284 { 285 struct bch_fs *c = trans->c; 286 287 BUG_ON(!bp.v->level); 288 289 bch2_trans_node_iter_init(trans, iter, 290 bp.v->btree_id, 291 bp.v->pos, 292 0, 293 bp.v->level - 1, 294 0); 295 struct btree *b = bch2_btree_iter_peek_node(iter); 296 if (IS_ERR_OR_NULL(b)) 297 goto err; 298 299 BUG_ON(b->c.level != bp.v->level - 1); 300 301 if (extent_matches_bp(c, bp.v->btree_id, bp.v->level, 302 bkey_i_to_s_c(&b->key), bp)) 303 return b; 304 305 if (btree_node_will_make_reachable(b)) { 306 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 307 } else { 308 int ret = backpointer_target_not_found(trans, bp, bkey_i_to_s_c(&b->key), last_flushed); 309 b = ret ? ERR_PTR(ret) : NULL; 310 } 311 err: 312 bch2_trans_iter_exit(trans, iter); 313 return b; 314 } 315 316 static int bch2_check_backpointer_has_valid_bucket(struct btree_trans *trans, struct bkey_s_c k, 317 struct bkey_buf *last_flushed) 318 { 319 if (k.k->type != KEY_TYPE_backpointer) 320 return 0; 321 322 struct bch_fs *c = trans->c; 323 struct btree_iter alloc_iter = { NULL }; 324 struct bkey_s_c alloc_k; 325 struct printbuf buf = PRINTBUF; 326 int ret = 0; 327 328 struct bpos bucket; 329 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 330 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); 331 if (ret) 332 goto out; 333 334 if (fsck_err(trans, backpointer_to_missing_device, 335 "backpointer for missing device:\n%s", 336 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 337 ret = bch2_backpointer_del(trans, k.k->p); 338 goto out; 339 } 340 341 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 342 ret = bkey_err(alloc_k); 343 if (ret) 344 goto out; 345 346 if (alloc_k.k->type != KEY_TYPE_alloc_v4) { 347 ret = bch2_backpointers_maybe_flush(trans, k, last_flushed); 348 if (ret) 349 goto out; 350 351 if (fsck_err(trans, backpointer_to_missing_alloc, 352 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 353 alloc_iter.pos.inode, alloc_iter.pos.offset, 354 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 355 ret = bch2_backpointer_del(trans, k.k->p); 356 } 357 out: 358 fsck_err: 359 bch2_trans_iter_exit(trans, &alloc_iter); 360 printbuf_exit(&buf); 361 return ret; 362 } 363 364 /* verify that every backpointer has a corresponding alloc key */ 365 int bch2_check_btree_backpointers(struct bch_fs *c) 366 { 367 struct bkey_buf last_flushed; 368 bch2_bkey_buf_init(&last_flushed); 369 bkey_init(&last_flushed.k->k); 370 371 int ret = bch2_trans_run(c, 372 for_each_btree_key_commit(trans, iter, 373 BTREE_ID_backpointers, POS_MIN, 0, k, 374 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 375 bch2_check_backpointer_has_valid_bucket(trans, k, &last_flushed))); 376 377 bch2_bkey_buf_exit(&last_flushed, c); 378 bch_err_fn(c, ret); 379 return ret; 380 } 381 382 struct extents_to_bp_state { 383 struct bpos bp_start; 384 struct bpos bp_end; 385 struct bkey_buf last_flushed; 386 }; 387 388 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 389 struct bkey_s_c extent, unsigned dev) 390 { 391 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 392 int ret = PTR_ERR_OR_ZERO(n); 393 if (ret) 394 return ret; 395 396 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 397 return bch2_btree_insert_trans(trans, btree, n, 0); 398 } 399 400 static int check_extent_checksum(struct btree_trans *trans, 401 enum btree_id btree, struct bkey_s_c extent, 402 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 403 { 404 struct bch_fs *c = trans->c; 405 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 406 const union bch_extent_entry *entry; 407 struct extent_ptr_decoded p; 408 struct printbuf buf = PRINTBUF; 409 void *data_buf = NULL; 410 struct bio *bio = NULL; 411 size_t bytes; 412 int ret = 0; 413 414 if (bkey_is_btree_ptr(extent.k)) 415 return false; 416 417 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 418 if (p.ptr.dev == dev) 419 goto found; 420 BUG(); 421 found: 422 if (!p.crc.csum_type) 423 return false; 424 425 bytes = p.crc.compressed_size << 9; 426 427 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 428 if (!ca) 429 return false; 430 431 data_buf = kvmalloc(bytes, GFP_KERNEL); 432 if (!data_buf) { 433 ret = -ENOMEM; 434 goto err; 435 } 436 437 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 438 bio->bi_iter.bi_sector = p.ptr.offset; 439 bch2_bio_map(bio, data_buf, bytes); 440 ret = submit_bio_wait(bio); 441 if (ret) 442 goto err; 443 444 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 445 prt_printf(&buf, "\n "); 446 bch2_btree_id_to_text(&buf, btree); 447 prt_str(&buf, " "); 448 bch2_bkey_val_to_text(&buf, c, extent); 449 prt_printf(&buf, "\n "); 450 bch2_btree_id_to_text(&buf, o_btree); 451 prt_str(&buf, " "); 452 bch2_bkey_val_to_text(&buf, c, extent2); 453 454 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); 455 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 456 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 457 trans, dup_backpointer_to_bad_csum_extent, 458 "%s", buf.buf)) 459 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 460 fsck_err: 461 err: 462 if (bio) 463 bio_put(bio); 464 kvfree(data_buf); 465 percpu_ref_put(&ca->io_ref); 466 printbuf_exit(&buf); 467 return ret; 468 } 469 470 static int check_bp_exists(struct btree_trans *trans, 471 struct extents_to_bp_state *s, 472 struct bkey_i_backpointer *bp, 473 struct bkey_s_c orig_k) 474 { 475 struct bch_fs *c = trans->c; 476 struct btree_iter other_extent_iter = {}; 477 struct printbuf buf = PRINTBUF; 478 479 if (bpos_lt(bp->k.p, s->bp_start) || 480 bpos_gt(bp->k.p, s->bp_end)) 481 return 0; 482 483 struct btree_iter bp_iter; 484 struct bkey_s_c bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, bp->k.p, 0); 485 int ret = bkey_err(bp_k); 486 if (ret) 487 goto err; 488 489 if (bp_k.k->type != KEY_TYPE_backpointer || 490 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp->v, sizeof(bp->v))) { 491 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 492 if (ret) 493 goto err; 494 495 goto check_existing_bp; 496 } 497 out: 498 err: 499 fsck_err: 500 bch2_trans_iter_exit(trans, &other_extent_iter); 501 bch2_trans_iter_exit(trans, &bp_iter); 502 printbuf_exit(&buf); 503 return ret; 504 check_existing_bp: 505 /* Do we have a backpointer for a different extent? */ 506 if (bp_k.k->type != KEY_TYPE_backpointer) 507 goto missing; 508 509 struct bkey_s_c_backpointer other_bp = bkey_s_c_to_backpointer(bp_k); 510 511 struct bkey_s_c other_extent = 512 bch2_backpointer_get_key(trans, other_bp, &other_extent_iter, 0, NULL); 513 ret = bkey_err(other_extent); 514 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 515 ret = 0; 516 if (ret) 517 goto err; 518 519 if (!other_extent.k) 520 goto missing; 521 522 rcu_read_lock(); 523 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp->k.p.inode); 524 if (ca) { 525 struct bkey_ptrs_c other_extent_ptrs = bch2_bkey_ptrs_c(other_extent); 526 bkey_for_each_ptr(other_extent_ptrs, ptr) 527 if (ptr->dev == bp->k.p.inode && 528 dev_ptr_stale_rcu(ca, ptr)) { 529 ret = drop_dev_and_update(trans, other_bp.v->btree_id, 530 other_extent, bp->k.p.inode); 531 if (ret) 532 goto err; 533 goto out; 534 } 535 } 536 rcu_read_unlock(); 537 538 if (bch2_extents_match(orig_k, other_extent)) { 539 printbuf_reset(&buf); 540 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 541 bch2_bkey_val_to_text(&buf, c, orig_k); 542 prt_str(&buf, "\n "); 543 bch2_bkey_val_to_text(&buf, c, other_extent); 544 bch_err(c, "%s", buf.buf); 545 546 if (other_extent.k->size <= orig_k.k->size) { 547 ret = drop_dev_and_update(trans, other_bp.v->btree_id, 548 other_extent, bp->k.p.inode); 549 if (ret) 550 goto err; 551 goto out; 552 } else { 553 ret = drop_dev_and_update(trans, bp->v.btree_id, orig_k, bp->k.p.inode); 554 if (ret) 555 goto err; 556 goto missing; 557 } 558 } 559 560 ret = check_extent_checksum(trans, 561 other_bp.v->btree_id, other_extent, 562 bp->v.btree_id, orig_k, 563 bp->k.p.inode); 564 if (ret < 0) 565 goto err; 566 if (ret) { 567 ret = 0; 568 goto missing; 569 } 570 571 ret = check_extent_checksum(trans, bp->v.btree_id, orig_k, 572 other_bp.v->btree_id, other_extent, bp->k.p.inode); 573 if (ret < 0) 574 goto err; 575 if (ret) { 576 ret = 0; 577 goto out; 578 } 579 580 printbuf_reset(&buf); 581 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bp->k.p.inode); 582 bch2_bkey_val_to_text(&buf, c, orig_k); 583 prt_str(&buf, "\n "); 584 bch2_bkey_val_to_text(&buf, c, other_extent); 585 bch_err(c, "%s", buf.buf); 586 ret = -BCH_ERR_fsck_repair_unimplemented; 587 goto err; 588 missing: 589 printbuf_reset(&buf); 590 prt_str(&buf, "missing backpointer\n for: "); 591 bch2_bkey_val_to_text(&buf, c, orig_k); 592 prt_printf(&buf, "\n want: "); 593 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&bp->k_i)); 594 prt_printf(&buf, "\n got: "); 595 bch2_bkey_val_to_text(&buf, c, bp_k); 596 597 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 598 ret = bch2_bucket_backpointer_mod(trans, orig_k, bp, true); 599 600 goto out; 601 } 602 603 static int check_extent_to_backpointers(struct btree_trans *trans, 604 struct extents_to_bp_state *s, 605 enum btree_id btree, unsigned level, 606 struct bkey_s_c k) 607 { 608 struct bch_fs *c = trans->c; 609 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 610 const union bch_extent_entry *entry; 611 struct extent_ptr_decoded p; 612 613 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 614 if (p.ptr.cached) 615 continue; 616 617 if (p.ptr.dev == BCH_SB_MEMBER_INVALID) 618 continue; 619 620 rcu_read_lock(); 621 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); 622 bool check = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_mismatches); 623 bool empty = ca && test_bit(PTR_BUCKET_NR(ca, &p.ptr), ca->bucket_backpointer_empty); 624 rcu_read_unlock(); 625 626 if (check || empty) { 627 struct bkey_i_backpointer bp; 628 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bp); 629 630 int ret = check 631 ? check_bp_exists(trans, s, &bp, k) 632 : bch2_bucket_backpointer_mod(trans, k, &bp, true); 633 if (ret) 634 return ret; 635 } 636 } 637 638 return 0; 639 } 640 641 static int check_btree_root_to_backpointers(struct btree_trans *trans, 642 struct extents_to_bp_state *s, 643 enum btree_id btree_id, 644 int *level) 645 { 646 struct bch_fs *c = trans->c; 647 struct btree_iter iter; 648 struct btree *b; 649 struct bkey_s_c k; 650 int ret; 651 retry: 652 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 653 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 654 b = bch2_btree_iter_peek_node(&iter); 655 ret = PTR_ERR_OR_ZERO(b); 656 if (ret) 657 goto err; 658 659 if (b != btree_node_root(c, b)) { 660 bch2_trans_iter_exit(trans, &iter); 661 goto retry; 662 } 663 664 *level = b->c.level; 665 666 k = bkey_i_to_s_c(&b->key); 667 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 668 err: 669 bch2_trans_iter_exit(trans, &iter); 670 return ret; 671 } 672 673 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 674 { 675 return (struct bbpos) { 676 .btree = bp.btree_id, 677 .pos = bp.pos, 678 }; 679 } 680 681 static u64 mem_may_pin_bytes(struct bch_fs *c) 682 { 683 struct sysinfo i; 684 si_meminfo(&i); 685 686 u64 mem_bytes = i.totalram * i.mem_unit; 687 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 688 } 689 690 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 691 { 692 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 693 } 694 695 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 696 u64 btree_leaf_mask, 697 u64 btree_interior_mask, 698 struct bbpos start, struct bbpos *end) 699 { 700 struct bch_fs *c = trans->c; 701 s64 mem_may_pin = mem_may_pin_bytes(c); 702 int ret = 0; 703 704 bch2_btree_cache_unpin(c); 705 706 btree_interior_mask |= btree_leaf_mask; 707 708 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask; 709 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask; 710 c->btree_cache.pinned_nodes_start = start; 711 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 712 713 for (enum btree_id btree = start.btree; 714 btree < BTREE_ID_NR && !ret; 715 btree++) { 716 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 717 718 if (!(BIT_ULL(btree) & btree_leaf_mask) && 719 !(BIT_ULL(btree) & btree_interior_mask)) 720 continue; 721 722 ret = __for_each_btree_node(trans, iter, btree, 723 btree == start.btree ? start.pos : POS_MIN, 724 0, depth, BTREE_ITER_prefetch, b, ({ 725 mem_may_pin -= btree_buf_bytes(b); 726 if (mem_may_pin <= 0) { 727 c->btree_cache.pinned_nodes_end = *end = 728 BBPOS(btree, b->key.k.p); 729 break; 730 } 731 bch2_node_pin(c, b); 732 0; 733 })); 734 } 735 736 return ret; 737 } 738 739 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 740 struct extents_to_bp_state *s) 741 { 742 struct bch_fs *c = trans->c; 743 struct progress_indicator_state progress; 744 int ret = 0; 745 746 bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink)); 747 748 for (enum btree_id btree_id = 0; 749 btree_id < btree_id_nr_alive(c); 750 btree_id++) { 751 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 752 753 ret = commit_do(trans, NULL, NULL, 754 BCH_TRANS_COMMIT_no_enospc, 755 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 756 if (ret) 757 return ret; 758 759 while (level >= depth) { 760 struct btree_iter iter; 761 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 762 BTREE_ITER_prefetch); 763 764 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 765 bch2_progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); 766 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 767 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 768 })); 769 if (ret) 770 return ret; 771 772 --level; 773 } 774 } 775 776 return 0; 777 } 778 779 enum alloc_sector_counter { 780 ALLOC_dirty, 781 ALLOC_cached, 782 ALLOC_stripe, 783 ALLOC_SECTORS_NR 784 }; 785 786 static enum alloc_sector_counter data_type_to_alloc_counter(enum bch_data_type t) 787 { 788 switch (t) { 789 case BCH_DATA_btree: 790 case BCH_DATA_user: 791 return ALLOC_dirty; 792 case BCH_DATA_cached: 793 return ALLOC_cached; 794 case BCH_DATA_stripe: 795 return ALLOC_stripe; 796 default: 797 BUG(); 798 } 799 } 800 801 static int check_bucket_backpointers_to_extents(struct btree_trans *, struct bch_dev *, struct bpos); 802 803 static int check_bucket_backpointer_mismatch(struct btree_trans *trans, struct bkey_s_c alloc_k, 804 struct bkey_buf *last_flushed) 805 { 806 struct bch_fs *c = trans->c; 807 struct bch_alloc_v4 a_convert; 808 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); 809 bool need_commit = false; 810 811 if (a->data_type == BCH_DATA_sb || 812 a->data_type == BCH_DATA_journal || 813 a->data_type == BCH_DATA_parity) 814 return 0; 815 816 u32 sectors[ALLOC_SECTORS_NR]; 817 memset(sectors, 0, sizeof(sectors)); 818 819 struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(trans->c, alloc_k.k->p); 820 if (!ca) 821 return 0; 822 823 struct btree_iter iter; 824 struct bkey_s_c bp_k; 825 int ret = 0; 826 for_each_btree_key_max_norestart(trans, iter, BTREE_ID_backpointers, 827 bucket_pos_to_bp_start(ca, alloc_k.k->p), 828 bucket_pos_to_bp_end(ca, alloc_k.k->p), 0, bp_k, ret) { 829 if (bp_k.k->type != KEY_TYPE_backpointer) 830 continue; 831 832 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); 833 834 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen && 835 (bp.v->bucket_gen != a->gen || 836 bp.v->pad)) { 837 ret = bch2_backpointer_del(trans, bp_k.k->p); 838 if (ret) 839 break; 840 841 need_commit = true; 842 continue; 843 } 844 845 if (bp.v->bucket_gen != a->gen) 846 continue; 847 848 sectors[data_type_to_alloc_counter(bp.v->data_type)] += bp.v->bucket_len; 849 }; 850 bch2_trans_iter_exit(trans, &iter); 851 if (ret) 852 goto err; 853 854 if (need_commit) { 855 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 856 if (ret) 857 goto err; 858 } 859 860 /* Cached pointers don't have backpointers: */ 861 862 if (sectors[ALLOC_dirty] != a->dirty_sectors || 863 sectors[ALLOC_stripe] != a->stripe_sectors) { 864 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_backpointer_bucket_gen) { 865 ret = bch2_backpointers_maybe_flush(trans, alloc_k, last_flushed); 866 if (ret) 867 goto err; 868 } 869 870 if (sectors[ALLOC_dirty] > a->dirty_sectors || 871 sectors[ALLOC_stripe] > a->stripe_sectors) { 872 ret = check_bucket_backpointers_to_extents(trans, ca, alloc_k.k->p) ?: 873 -BCH_ERR_transaction_restart_nested; 874 goto err; 875 } 876 877 if (!sectors[ALLOC_dirty] && 878 !sectors[ALLOC_stripe]) 879 __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_empty); 880 else 881 __set_bit(alloc_k.k->p.offset, ca->bucket_backpointer_mismatches); 882 } 883 err: 884 bch2_dev_put(ca); 885 return ret; 886 } 887 888 static bool backpointer_node_has_missing(struct bch_fs *c, struct bkey_s_c k) 889 { 890 switch (k.k->type) { 891 case KEY_TYPE_btree_ptr_v2: { 892 bool ret = false; 893 894 rcu_read_lock(); 895 struct bpos pos = bkey_s_c_to_btree_ptr_v2(k).v->min_key; 896 while (pos.inode <= k.k->p.inode) { 897 if (pos.inode >= c->sb.nr_devices) 898 break; 899 900 struct bch_dev *ca = bch2_dev_rcu_noerror(c, pos.inode); 901 if (!ca) 902 goto next; 903 904 struct bpos bucket = bp_pos_to_bucket(ca, pos); 905 bucket.offset = find_next_bit(ca->bucket_backpointer_mismatches, 906 ca->mi.nbuckets, bucket.offset); 907 if (bucket.offset == ca->mi.nbuckets) 908 goto next; 909 910 ret = bpos_le(bucket_pos_to_bp_end(ca, bucket), k.k->p); 911 if (ret) 912 break; 913 next: 914 pos = SPOS(pos.inode + 1, 0, 0); 915 } 916 rcu_read_unlock(); 917 918 return ret; 919 } 920 case KEY_TYPE_btree_ptr: 921 return true; 922 default: 923 return false; 924 } 925 } 926 927 static int btree_node_get_and_pin(struct btree_trans *trans, struct bkey_i *k, 928 enum btree_id btree, unsigned level) 929 { 930 struct btree_iter iter; 931 bch2_trans_node_iter_init(trans, &iter, btree, k->k.p, 0, level, 0); 932 struct btree *b = bch2_btree_iter_peek_node(&iter); 933 int ret = PTR_ERR_OR_ZERO(b); 934 if (ret) 935 goto err; 936 937 if (b) 938 bch2_node_pin(trans->c, b); 939 err: 940 bch2_trans_iter_exit(trans, &iter); 941 return ret; 942 } 943 944 static int bch2_pin_backpointer_nodes_with_missing(struct btree_trans *trans, 945 struct bpos start, struct bpos *end) 946 { 947 struct bch_fs *c = trans->c; 948 int ret = 0; 949 950 struct bkey_buf tmp; 951 bch2_bkey_buf_init(&tmp); 952 953 bch2_btree_cache_unpin(c); 954 955 *end = SPOS_MAX; 956 957 s64 mem_may_pin = mem_may_pin_bytes(c); 958 struct btree_iter iter; 959 bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, 960 0, 1, BTREE_ITER_prefetch); 961 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 962 if (!backpointer_node_has_missing(c, k)) 963 continue; 964 965 mem_may_pin -= c->opts.btree_node_size; 966 if (mem_may_pin <= 0) 967 break; 968 969 bch2_bkey_buf_reassemble(&tmp, c, k); 970 struct btree_path *path = btree_iter_path(trans, &iter); 971 972 BUG_ON(path->level != 1); 973 974 bch2_btree_node_prefetch(trans, path, tmp.k, path->btree_id, path->level - 1); 975 })); 976 if (ret) 977 return ret; 978 979 struct bpos pinned = SPOS_MAX; 980 mem_may_pin = mem_may_pin_bytes(c); 981 bch2_trans_node_iter_init(trans, &iter, BTREE_ID_backpointers, start, 982 0, 1, BTREE_ITER_prefetch); 983 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 984 if (!backpointer_node_has_missing(c, k)) 985 continue; 986 987 mem_may_pin -= c->opts.btree_node_size; 988 if (mem_may_pin <= 0) { 989 *end = pinned; 990 break; 991 } 992 993 bch2_bkey_buf_reassemble(&tmp, c, k); 994 struct btree_path *path = btree_iter_path(trans, &iter); 995 996 BUG_ON(path->level != 1); 997 998 int ret2 = btree_node_get_and_pin(trans, tmp.k, path->btree_id, path->level - 1); 999 1000 if (!ret2) 1001 pinned = tmp.k->k.p; 1002 1003 ret; 1004 })); 1005 if (ret) 1006 return ret; 1007 1008 return ret; 1009 } 1010 1011 int bch2_check_extents_to_backpointers(struct bch_fs *c) 1012 { 1013 int ret = 0; 1014 1015 /* 1016 * Can't allow devices to come/go/resize while we have bucket bitmaps 1017 * allocated 1018 */ 1019 lockdep_assert_held(&c->state_lock); 1020 1021 for_each_member_device(c, ca) { 1022 BUG_ON(ca->bucket_backpointer_mismatches); 1023 ca->bucket_backpointer_mismatches = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), 1024 sizeof(unsigned long), 1025 GFP_KERNEL); 1026 ca->bucket_backpointer_empty = kvcalloc(BITS_TO_LONGS(ca->mi.nbuckets), 1027 sizeof(unsigned long), 1028 GFP_KERNEL); 1029 if (!ca->bucket_backpointer_mismatches || 1030 !ca->bucket_backpointer_empty) { 1031 bch2_dev_put(ca); 1032 ret = -BCH_ERR_ENOMEM_backpointer_mismatches_bitmap; 1033 goto err_free_bitmaps; 1034 } 1035 } 1036 1037 struct btree_trans *trans = bch2_trans_get(c); 1038 struct extents_to_bp_state s = { .bp_start = POS_MIN }; 1039 1040 bch2_bkey_buf_init(&s.last_flushed); 1041 bkey_init(&s.last_flushed.k->k); 1042 1043 ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, 1044 POS_MIN, BTREE_ITER_prefetch, k, ({ 1045 check_bucket_backpointer_mismatch(trans, k, &s.last_flushed); 1046 })); 1047 if (ret) 1048 goto err; 1049 1050 u64 nr_buckets = 0, nr_mismatches = 0, nr_empty = 0; 1051 for_each_member_device(c, ca) { 1052 nr_buckets += ca->mi.nbuckets; 1053 nr_mismatches += bitmap_weight(ca->bucket_backpointer_mismatches, ca->mi.nbuckets); 1054 nr_empty += bitmap_weight(ca->bucket_backpointer_empty, ca->mi.nbuckets); 1055 } 1056 1057 if (!nr_mismatches && !nr_empty) 1058 goto err; 1059 1060 bch_info(c, "scanning for missing backpointers in %llu/%llu buckets", 1061 nr_mismatches + nr_empty, nr_buckets); 1062 1063 while (1) { 1064 ret = bch2_pin_backpointer_nodes_with_missing(trans, s.bp_start, &s.bp_end); 1065 if (ret) 1066 break; 1067 1068 if ( bpos_eq(s.bp_start, POS_MIN) && 1069 !bpos_eq(s.bp_end, SPOS_MAX)) 1070 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 1071 __func__, btree_nodes_fit_in_ram(c)); 1072 1073 if (!bpos_eq(s.bp_start, POS_MIN) || 1074 !bpos_eq(s.bp_end, SPOS_MAX)) { 1075 struct printbuf buf = PRINTBUF; 1076 1077 prt_str(&buf, "check_extents_to_backpointers(): "); 1078 bch2_bpos_to_text(&buf, s.bp_start); 1079 prt_str(&buf, "-"); 1080 bch2_bpos_to_text(&buf, s.bp_end); 1081 1082 bch_verbose(c, "%s", buf.buf); 1083 printbuf_exit(&buf); 1084 } 1085 1086 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 1087 if (ret || bpos_eq(s.bp_end, SPOS_MAX)) 1088 break; 1089 1090 s.bp_start = bpos_successor(s.bp_end); 1091 } 1092 err: 1093 bch2_trans_put(trans); 1094 bch2_bkey_buf_exit(&s.last_flushed, c); 1095 bch2_btree_cache_unpin(c); 1096 err_free_bitmaps: 1097 for_each_member_device(c, ca) { 1098 kvfree(ca->bucket_backpointer_empty); 1099 ca->bucket_backpointer_empty = NULL; 1100 kvfree(ca->bucket_backpointer_mismatches); 1101 ca->bucket_backpointer_mismatches = NULL; 1102 } 1103 1104 bch_err_fn(c, ret); 1105 return ret; 1106 } 1107 1108 static int check_one_backpointer(struct btree_trans *trans, 1109 struct bbpos start, 1110 struct bbpos end, 1111 struct bkey_s_c bp_k, 1112 struct bkey_buf *last_flushed) 1113 { 1114 if (bp_k.k->type != KEY_TYPE_backpointer) 1115 return 0; 1116 1117 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); 1118 struct bbpos pos = bp_to_bbpos(*bp.v); 1119 1120 if (bbpos_cmp(pos, start) < 0 || 1121 bbpos_cmp(pos, end) > 0) 1122 return 0; 1123 1124 struct btree_iter iter; 1125 struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed); 1126 int ret = bkey_err(k); 1127 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 1128 return 0; 1129 if (ret) 1130 return ret; 1131 1132 bch2_trans_iter_exit(trans, &iter); 1133 return ret; 1134 } 1135 1136 static int check_bucket_backpointers_to_extents(struct btree_trans *trans, 1137 struct bch_dev *ca, struct bpos bucket) 1138 { 1139 u32 restart_count = trans->restart_count; 1140 struct bkey_buf last_flushed; 1141 bch2_bkey_buf_init(&last_flushed); 1142 bkey_init(&last_flushed.k->k); 1143 1144 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_backpointers, 1145 bucket_pos_to_bp_start(ca, bucket), 1146 bucket_pos_to_bp_end(ca, bucket), 1147 0, k, 1148 check_one_backpointer(trans, BBPOS_MIN, BBPOS_MAX, k, &last_flushed) 1149 ); 1150 1151 bch2_bkey_buf_exit(&last_flushed, trans->c); 1152 return ret ?: trans_was_restarted(trans, restart_count); 1153 } 1154 1155 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 1156 struct bbpos start, 1157 struct bbpos end) 1158 { 1159 struct bch_fs *c = trans->c; 1160 struct bkey_buf last_flushed; 1161 struct progress_indicator_state progress; 1162 1163 bch2_bkey_buf_init(&last_flushed); 1164 bkey_init(&last_flushed.k->k); 1165 bch2_progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); 1166 1167 int ret = for_each_btree_key(trans, iter, BTREE_ID_backpointers, 1168 POS_MIN, BTREE_ITER_prefetch, k, ({ 1169 bch2_progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); 1170 check_one_backpointer(trans, start, end, k, &last_flushed); 1171 })); 1172 1173 bch2_bkey_buf_exit(&last_flushed, c); 1174 return ret; 1175 } 1176 1177 int bch2_check_backpointers_to_extents(struct bch_fs *c) 1178 { 1179 struct btree_trans *trans = bch2_trans_get(c); 1180 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 1181 int ret; 1182 1183 while (1) { 1184 ret = bch2_get_btree_in_memory_pos(trans, 1185 BIT_ULL(BTREE_ID_extents)| 1186 BIT_ULL(BTREE_ID_reflink), 1187 ~0, 1188 start, &end); 1189 if (ret) 1190 break; 1191 1192 if (!bbpos_cmp(start, BBPOS_MIN) && 1193 bbpos_cmp(end, BBPOS_MAX)) 1194 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 1195 __func__, btree_nodes_fit_in_ram(c)); 1196 1197 if (bbpos_cmp(start, BBPOS_MIN) || 1198 bbpos_cmp(end, BBPOS_MAX)) { 1199 struct printbuf buf = PRINTBUF; 1200 1201 prt_str(&buf, "check_backpointers_to_extents(): "); 1202 bch2_bbpos_to_text(&buf, start); 1203 prt_str(&buf, "-"); 1204 bch2_bbpos_to_text(&buf, end); 1205 1206 bch_verbose(c, "%s", buf.buf); 1207 printbuf_exit(&buf); 1208 } 1209 1210 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 1211 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 1212 break; 1213 1214 start = bbpos_successor(end); 1215 } 1216 bch2_trans_put(trans); 1217 1218 bch2_btree_cache_unpin(c); 1219 1220 bch_err_fn(c, ret); 1221 return ret; 1222 } 1223