1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "alloc_background.h" 4 #include "alloc_foreground.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_io.h" 9 #include "btree_key_cache.h" 10 #include "btree_update.h" 11 #include "btree_update_interior.h" 12 #include "btree_gc.h" 13 #include "btree_write_buffer.h" 14 #include "buckets.h" 15 #include "buckets_waiting_for_journal.h" 16 #include "clock.h" 17 #include "debug.h" 18 #include "disk_accounting.h" 19 #include "ec.h" 20 #include "error.h" 21 #include "lru.h" 22 #include "recovery.h" 23 #include "trace.h" 24 #include "varint.h" 25 26 #include <linux/kthread.h> 27 #include <linux/math64.h> 28 #include <linux/random.h> 29 #include <linux/rculist.h> 30 #include <linux/rcupdate.h> 31 #include <linux/sched/task.h> 32 #include <linux/sort.h> 33 #include <linux/jiffies.h> 34 35 static void bch2_discard_one_bucket_fast(struct bch_dev *, u64); 36 37 /* Persistent alloc info: */ 38 39 static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = { 40 #define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8, 41 BCH_ALLOC_FIELDS_V1() 42 #undef x 43 }; 44 45 struct bkey_alloc_unpacked { 46 u64 journal_seq; 47 u8 gen; 48 u8 oldest_gen; 49 u8 data_type; 50 bool need_discard:1; 51 bool need_inc_gen:1; 52 #define x(_name, _bits) u##_bits _name; 53 BCH_ALLOC_FIELDS_V2() 54 #undef x 55 }; 56 57 static inline u64 alloc_field_v1_get(const struct bch_alloc *a, 58 const void **p, unsigned field) 59 { 60 unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field]; 61 u64 v; 62 63 if (!(a->fields & (1 << field))) 64 return 0; 65 66 switch (bytes) { 67 case 1: 68 v = *((const u8 *) *p); 69 break; 70 case 2: 71 v = le16_to_cpup(*p); 72 break; 73 case 4: 74 v = le32_to_cpup(*p); 75 break; 76 case 8: 77 v = le64_to_cpup(*p); 78 break; 79 default: 80 BUG(); 81 } 82 83 *p += bytes; 84 return v; 85 } 86 87 static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out, 88 struct bkey_s_c k) 89 { 90 const struct bch_alloc *in = bkey_s_c_to_alloc(k).v; 91 const void *d = in->data; 92 unsigned idx = 0; 93 94 out->gen = in->gen; 95 96 #define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++); 97 BCH_ALLOC_FIELDS_V1() 98 #undef x 99 } 100 101 static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out, 102 struct bkey_s_c k) 103 { 104 struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k); 105 const u8 *in = a.v->data; 106 const u8 *end = bkey_val_end(a); 107 unsigned fieldnr = 0; 108 int ret; 109 u64 v; 110 111 out->gen = a.v->gen; 112 out->oldest_gen = a.v->oldest_gen; 113 out->data_type = a.v->data_type; 114 115 #define x(_name, _bits) \ 116 if (fieldnr < a.v->nr_fields) { \ 117 ret = bch2_varint_decode_fast(in, end, &v); \ 118 if (ret < 0) \ 119 return ret; \ 120 in += ret; \ 121 } else { \ 122 v = 0; \ 123 } \ 124 out->_name = v; \ 125 if (v != out->_name) \ 126 return -1; \ 127 fieldnr++; 128 129 BCH_ALLOC_FIELDS_V2() 130 #undef x 131 return 0; 132 } 133 134 static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out, 135 struct bkey_s_c k) 136 { 137 struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k); 138 const u8 *in = a.v->data; 139 const u8 *end = bkey_val_end(a); 140 unsigned fieldnr = 0; 141 int ret; 142 u64 v; 143 144 out->gen = a.v->gen; 145 out->oldest_gen = a.v->oldest_gen; 146 out->data_type = a.v->data_type; 147 out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v); 148 out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v); 149 out->journal_seq = le64_to_cpu(a.v->journal_seq); 150 151 #define x(_name, _bits) \ 152 if (fieldnr < a.v->nr_fields) { \ 153 ret = bch2_varint_decode_fast(in, end, &v); \ 154 if (ret < 0) \ 155 return ret; \ 156 in += ret; \ 157 } else { \ 158 v = 0; \ 159 } \ 160 out->_name = v; \ 161 if (v != out->_name) \ 162 return -1; \ 163 fieldnr++; 164 165 BCH_ALLOC_FIELDS_V2() 166 #undef x 167 return 0; 168 } 169 170 static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k) 171 { 172 struct bkey_alloc_unpacked ret = { .gen = 0 }; 173 174 switch (k.k->type) { 175 case KEY_TYPE_alloc: 176 bch2_alloc_unpack_v1(&ret, k); 177 break; 178 case KEY_TYPE_alloc_v2: 179 bch2_alloc_unpack_v2(&ret, k); 180 break; 181 case KEY_TYPE_alloc_v3: 182 bch2_alloc_unpack_v3(&ret, k); 183 break; 184 } 185 186 return ret; 187 } 188 189 static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) 190 { 191 unsigned i, bytes = offsetof(struct bch_alloc, data); 192 193 for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++) 194 if (a->fields & (1 << i)) 195 bytes += BCH_ALLOC_V1_FIELD_BYTES[i]; 196 197 return DIV_ROUND_UP(bytes, sizeof(u64)); 198 } 199 200 int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, 201 struct bkey_validate_context from) 202 { 203 struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); 204 int ret = 0; 205 206 /* allow for unknown fields */ 207 bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), 208 c, alloc_v1_val_size_bad, 209 "incorrect value size (%zu < %u)", 210 bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); 211 fsck_err: 212 return ret; 213 } 214 215 int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, 216 struct bkey_validate_context from) 217 { 218 struct bkey_alloc_unpacked u; 219 int ret = 0; 220 221 bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), 222 c, alloc_v2_unpack_error, 223 "unpack error"); 224 fsck_err: 225 return ret; 226 } 227 228 int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, 229 struct bkey_validate_context from) 230 { 231 struct bkey_alloc_unpacked u; 232 int ret = 0; 233 234 bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), 235 c, alloc_v2_unpack_error, 236 "unpack error"); 237 fsck_err: 238 return ret; 239 } 240 241 int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, 242 struct bkey_validate_context from) 243 { 244 struct bch_alloc_v4 a; 245 int ret = 0; 246 247 bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k)); 248 249 bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k), 250 c, alloc_v4_val_size_bad, 251 "bad val size (%u > %zu)", 252 alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k)); 253 254 bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) && 255 BCH_ALLOC_V4_NR_BACKPOINTERS(&a), 256 c, alloc_v4_backpointers_start_bad, 257 "invalid backpointers_start"); 258 259 bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type, 260 c, alloc_key_data_type_bad, 261 "invalid data type (got %u should be %u)", 262 a.data_type, alloc_data_type(a, a.data_type)); 263 264 for (unsigned i = 0; i < 2; i++) 265 bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX, 266 c, alloc_key_io_time_bad, 267 "invalid io_time[%s]: %llu, max %llu", 268 i == READ ? "read" : "write", 269 a.io_time[i], LRU_TIME_MAX); 270 271 unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) > 272 offsetof(struct bch_alloc_v4, stripe_sectors) 273 ? a.stripe_sectors 274 : 0; 275 276 switch (a.data_type) { 277 case BCH_DATA_free: 278 case BCH_DATA_need_gc_gens: 279 case BCH_DATA_need_discard: 280 bkey_fsck_err_on(stripe_sectors || 281 a.dirty_sectors || 282 a.cached_sectors || 283 a.stripe, 284 c, alloc_key_empty_but_have_data, 285 "empty data type free but have data %u.%u.%u %u", 286 stripe_sectors, 287 a.dirty_sectors, 288 a.cached_sectors, 289 a.stripe); 290 break; 291 case BCH_DATA_sb: 292 case BCH_DATA_journal: 293 case BCH_DATA_btree: 294 case BCH_DATA_user: 295 case BCH_DATA_parity: 296 bkey_fsck_err_on(!a.dirty_sectors && 297 !stripe_sectors, 298 c, alloc_key_dirty_sectors_0, 299 "data_type %s but dirty_sectors==0", 300 bch2_data_type_str(a.data_type)); 301 break; 302 case BCH_DATA_cached: 303 bkey_fsck_err_on(!a.cached_sectors || 304 a.dirty_sectors || 305 stripe_sectors || 306 a.stripe, 307 c, alloc_key_cached_inconsistency, 308 "data type inconsistency"); 309 310 bkey_fsck_err_on(!a.io_time[READ] && 311 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, 312 c, alloc_key_cached_but_read_time_zero, 313 "cached bucket with read_time == 0"); 314 break; 315 case BCH_DATA_stripe: 316 break; 317 } 318 fsck_err: 319 return ret; 320 } 321 322 void bch2_alloc_v4_swab(struct bkey_s k) 323 { 324 struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v; 325 326 a->journal_seq = swab64(a->journal_seq); 327 a->flags = swab32(a->flags); 328 a->dirty_sectors = swab32(a->dirty_sectors); 329 a->cached_sectors = swab32(a->cached_sectors); 330 a->io_time[0] = swab64(a->io_time[0]); 331 a->io_time[1] = swab64(a->io_time[1]); 332 a->stripe = swab32(a->stripe); 333 a->nr_external_backpointers = swab32(a->nr_external_backpointers); 334 a->stripe_sectors = swab32(a->stripe_sectors); 335 } 336 337 void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 338 { 339 struct bch_alloc_v4 _a; 340 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); 341 struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL; 342 343 prt_newline(out); 344 printbuf_indent_add(out, 2); 345 346 prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen); 347 bch2_prt_data_type(out, a->data_type); 348 prt_newline(out); 349 prt_printf(out, "journal_seq %llu\n", a->journal_seq); 350 prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); 351 prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); 352 prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); 353 prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); 354 prt_printf(out, "cached_sectors %u\n", a->cached_sectors); 355 prt_printf(out, "stripe %u\n", a->stripe); 356 prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); 357 prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); 358 prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); 359 360 if (ca) 361 prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); 362 prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); 363 printbuf_indent_sub(out, 2); 364 365 bch2_dev_put(ca); 366 } 367 368 void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) 369 { 370 if (k.k->type == KEY_TYPE_alloc_v4) { 371 void *src, *dst; 372 373 *out = *bkey_s_c_to_alloc_v4(k).v; 374 375 src = alloc_v4_backpointers(out); 376 SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); 377 dst = alloc_v4_backpointers(out); 378 379 if (src < dst) 380 memset(src, 0, dst - src); 381 382 SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0); 383 } else { 384 struct bkey_alloc_unpacked u = bch2_alloc_unpack(k); 385 386 *out = (struct bch_alloc_v4) { 387 .journal_seq = u.journal_seq, 388 .flags = u.need_discard, 389 .gen = u.gen, 390 .oldest_gen = u.oldest_gen, 391 .data_type = u.data_type, 392 .stripe_redundancy = u.stripe_redundancy, 393 .dirty_sectors = u.dirty_sectors, 394 .cached_sectors = u.cached_sectors, 395 .io_time[READ] = u.read_time, 396 .io_time[WRITE] = u.write_time, 397 .stripe = u.stripe, 398 }; 399 400 SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); 401 } 402 } 403 404 static noinline struct bkey_i_alloc_v4 * 405 __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) 406 { 407 struct bkey_i_alloc_v4 *ret; 408 409 ret = bch2_trans_kmalloc(trans, max(bkey_bytes(k.k), sizeof(struct bkey_i_alloc_v4))); 410 if (IS_ERR(ret)) 411 return ret; 412 413 if (k.k->type == KEY_TYPE_alloc_v4) { 414 void *src, *dst; 415 416 bkey_reassemble(&ret->k_i, k); 417 418 src = alloc_v4_backpointers(&ret->v); 419 SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s); 420 dst = alloc_v4_backpointers(&ret->v); 421 422 if (src < dst) 423 memset(src, 0, dst - src); 424 425 SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0); 426 set_alloc_v4_u64s(ret); 427 } else { 428 bkey_alloc_v4_init(&ret->k_i); 429 ret->k.p = k.k->p; 430 bch2_alloc_to_v4(k, &ret->v); 431 } 432 return ret; 433 } 434 435 static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k) 436 { 437 struct bkey_s_c_alloc_v4 a; 438 439 if (likely(k.k->type == KEY_TYPE_alloc_v4) && 440 ((a = bkey_s_c_to_alloc_v4(k), true) && 441 BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0)) 442 return bch2_bkey_make_mut_noupdate_typed(trans, k, alloc_v4); 443 444 return __bch2_alloc_to_v4_mut(trans, k); 445 } 446 447 struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k) 448 { 449 return bch2_alloc_to_v4_mut_inlined(trans, k); 450 } 451 452 struct bkey_i_alloc_v4 * 453 bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_iter *iter, 454 struct bpos pos) 455 { 456 struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos, 457 BTREE_ITER_with_updates| 458 BTREE_ITER_cached| 459 BTREE_ITER_intent); 460 int ret = bkey_err(k); 461 if (unlikely(ret)) 462 return ERR_PTR(ret); 463 464 struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k); 465 ret = PTR_ERR_OR_ZERO(a); 466 if (unlikely(ret)) 467 goto err; 468 return a; 469 err: 470 bch2_trans_iter_exit(trans, iter); 471 return ERR_PTR(ret); 472 } 473 474 __flatten 475 struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos, 476 enum btree_iter_update_trigger_flags flags) 477 { 478 struct btree_iter iter; 479 struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos); 480 int ret = PTR_ERR_OR_ZERO(a); 481 if (ret) 482 return ERR_PTR(ret); 483 484 ret = bch2_trans_update(trans, &iter, &a->k_i, flags); 485 bch2_trans_iter_exit(trans, &iter); 486 return unlikely(ret) ? ERR_PTR(ret) : a; 487 } 488 489 static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset) 490 { 491 *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK; 492 493 pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS; 494 return pos; 495 } 496 497 static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset) 498 { 499 pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS; 500 pos.offset += offset; 501 return pos; 502 } 503 504 static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) 505 { 506 return k.k->type == KEY_TYPE_bucket_gens 507 ? bkey_s_c_to_bucket_gens(k).v->gens[offset] 508 : 0; 509 } 510 511 int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, 512 struct bkey_validate_context from) 513 { 514 int ret = 0; 515 516 bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), 517 c, bucket_gens_val_size_bad, 518 "bad val size (%zu != %zu)", 519 bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); 520 fsck_err: 521 return ret; 522 } 523 524 void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 525 { 526 struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens(k); 527 unsigned i; 528 529 for (i = 0; i < ARRAY_SIZE(g.v->gens); i++) { 530 if (i) 531 prt_char(out, ' '); 532 prt_printf(out, "%u", g.v->gens[i]); 533 } 534 } 535 536 int bch2_bucket_gens_init(struct bch_fs *c) 537 { 538 struct btree_trans *trans = bch2_trans_get(c); 539 struct bkey_i_bucket_gens g; 540 bool have_bucket_gens_key = false; 541 int ret; 542 543 ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, 544 BTREE_ITER_prefetch, k, ({ 545 /* 546 * Not a fsck error because this is checked/repaired by 547 * bch2_check_alloc_key() which runs later: 548 */ 549 if (!bch2_dev_bucket_exists(c, k.k->p)) 550 continue; 551 552 struct bch_alloc_v4 a; 553 u8 gen = bch2_alloc_to_v4(k, &a)->gen; 554 unsigned offset; 555 struct bpos pos = alloc_gens_pos(iter.pos, &offset); 556 int ret2 = 0; 557 558 if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) { 559 ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?: 560 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 561 if (ret2) 562 goto iter_err; 563 have_bucket_gens_key = false; 564 } 565 566 if (!have_bucket_gens_key) { 567 bkey_bucket_gens_init(&g.k_i); 568 g.k.p = pos; 569 have_bucket_gens_key = true; 570 } 571 572 g.v.gens[offset] = gen; 573 iter_err: 574 ret2; 575 })); 576 577 if (have_bucket_gens_key && !ret) 578 ret = commit_do(trans, NULL, NULL, 579 BCH_TRANS_COMMIT_no_enospc, 580 bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); 581 582 bch2_trans_put(trans); 583 584 bch_err_fn(c, ret); 585 return ret; 586 } 587 588 int bch2_alloc_read(struct bch_fs *c) 589 { 590 struct btree_trans *trans = bch2_trans_get(c); 591 struct bch_dev *ca = NULL; 592 int ret; 593 594 if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) { 595 ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN, 596 BTREE_ITER_prefetch, k, ({ 597 u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; 598 u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; 599 600 if (k.k->type != KEY_TYPE_bucket_gens) 601 continue; 602 603 ca = bch2_dev_iterate(c, ca, k.k->p.inode); 604 /* 605 * Not a fsck error because this is checked/repaired by 606 * bch2_check_alloc_key() which runs later: 607 */ 608 if (!ca) { 609 bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); 610 continue; 611 } 612 613 const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v; 614 615 for (u64 b = max_t(u64, ca->mi.first_bucket, start); 616 b < min_t(u64, ca->mi.nbuckets, end); 617 b++) 618 *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK]; 619 0; 620 })); 621 } else { 622 ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, 623 BTREE_ITER_prefetch, k, ({ 624 ca = bch2_dev_iterate(c, ca, k.k->p.inode); 625 /* 626 * Not a fsck error because this is checked/repaired by 627 * bch2_check_alloc_key() which runs later: 628 */ 629 if (!ca) { 630 bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); 631 continue; 632 } 633 634 if (k.k->p.offset < ca->mi.first_bucket) { 635 bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode, ca->mi.first_bucket)); 636 continue; 637 } 638 639 if (k.k->p.offset >= ca->mi.nbuckets) { 640 bch2_btree_iter_set_pos(&iter, POS(k.k->p.inode + 1, 0)); 641 continue; 642 } 643 644 struct bch_alloc_v4 a; 645 *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen; 646 0; 647 })); 648 } 649 650 bch2_dev_put(ca); 651 bch2_trans_put(trans); 652 653 bch_err_fn(c, ret); 654 return ret; 655 } 656 657 /* Free space/discard btree: */ 658 659 static int __need_discard_or_freespace_err(struct btree_trans *trans, 660 struct bkey_s_c alloc_k, 661 bool set, bool discard, bool repair) 662 { 663 struct bch_fs *c = trans->c; 664 enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0); 665 enum bch_sb_error_id err_id = discard 666 ? BCH_FSCK_ERR_need_discard_key_wrong 667 : BCH_FSCK_ERR_freespace_key_wrong; 668 enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace; 669 struct printbuf buf = PRINTBUF; 670 671 bch2_bkey_val_to_text(&buf, c, alloc_k); 672 673 int ret = __bch2_fsck_err(NULL, trans, flags, err_id, 674 "bucket incorrectly %sset in %s btree\n" 675 " %s", 676 set ? "" : "un", 677 bch2_btree_id_str(btree), 678 buf.buf); 679 if (ret == -BCH_ERR_fsck_ignore || 680 ret == -BCH_ERR_fsck_errors_not_fixed) 681 ret = 0; 682 683 printbuf_exit(&buf); 684 return ret; 685 } 686 687 #define need_discard_or_freespace_err(...) \ 688 fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__)) 689 690 #define need_discard_or_freespace_err_on(cond, ...) \ 691 (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false) 692 693 static int bch2_bucket_do_index(struct btree_trans *trans, 694 struct bch_dev *ca, 695 struct bkey_s_c alloc_k, 696 const struct bch_alloc_v4 *a, 697 bool set) 698 { 699 enum btree_id btree; 700 struct bpos pos; 701 702 if (a->data_type != BCH_DATA_free && 703 a->data_type != BCH_DATA_need_discard) 704 return 0; 705 706 switch (a->data_type) { 707 case BCH_DATA_free: 708 btree = BTREE_ID_freespace; 709 pos = alloc_freespace_pos(alloc_k.k->p, *a); 710 break; 711 case BCH_DATA_need_discard: 712 btree = BTREE_ID_need_discard; 713 pos = alloc_k.k->p; 714 break; 715 default: 716 return 0; 717 } 718 719 struct btree_iter iter; 720 struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent); 721 int ret = bkey_err(old); 722 if (ret) 723 return ret; 724 725 need_discard_or_freespace_err_on(ca->mi.freespace_initialized && 726 !old.k->type != set, 727 trans, alloc_k, set, 728 btree == BTREE_ID_need_discard, false); 729 730 ret = bch2_btree_bit_mod_iter(trans, &iter, set); 731 fsck_err: 732 bch2_trans_iter_exit(trans, &iter); 733 return ret; 734 } 735 736 static noinline int bch2_bucket_gen_update(struct btree_trans *trans, 737 struct bpos bucket, u8 gen) 738 { 739 struct btree_iter iter; 740 unsigned offset; 741 struct bpos pos = alloc_gens_pos(bucket, &offset); 742 struct bkey_i_bucket_gens *g; 743 struct bkey_s_c k; 744 int ret; 745 746 g = bch2_trans_kmalloc(trans, sizeof(*g)); 747 ret = PTR_ERR_OR_ZERO(g); 748 if (ret) 749 return ret; 750 751 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos, 752 BTREE_ITER_intent| 753 BTREE_ITER_with_updates); 754 ret = bkey_err(k); 755 if (ret) 756 return ret; 757 758 if (k.k->type != KEY_TYPE_bucket_gens) { 759 bkey_bucket_gens_init(&g->k_i); 760 g->k.p = iter.pos; 761 } else { 762 bkey_reassemble(&g->k_i, k); 763 } 764 765 g->v.gens[offset] = gen; 766 767 ret = bch2_trans_update(trans, &iter, &g->k_i, 0); 768 bch2_trans_iter_exit(trans, &iter); 769 return ret; 770 } 771 772 static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, struct bch_dev *ca, 773 enum bch_data_type data_type, 774 s64 delta_buckets, 775 s64 delta_sectors, 776 s64 delta_fragmented, unsigned flags) 777 { 778 struct disk_accounting_pos acc = { 779 .type = BCH_DISK_ACCOUNTING_dev_data_type, 780 .dev_data_type.dev = ca->dev_idx, 781 .dev_data_type.data_type = data_type, 782 }; 783 s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented }; 784 785 return bch2_disk_accounting_mod(trans, &acc, d, 3, flags & BTREE_TRIGGER_gc); 786 } 787 788 int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca, 789 const struct bch_alloc_v4 *old, 790 const struct bch_alloc_v4 *new, 791 unsigned flags) 792 { 793 s64 old_sectors = bch2_bucket_sectors(*old); 794 s64 new_sectors = bch2_bucket_sectors(*new); 795 if (old->data_type != new->data_type) { 796 int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type, 797 1, new_sectors, bch2_bucket_sectors_fragmented(ca, *new), flags) ?: 798 bch2_dev_data_type_accounting_mod(trans, ca, old->data_type, 799 -1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags); 800 if (ret) 801 return ret; 802 } else if (old_sectors != new_sectors) { 803 int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type, 804 0, 805 new_sectors - old_sectors, 806 bch2_bucket_sectors_fragmented(ca, *new) - 807 bch2_bucket_sectors_fragmented(ca, *old), flags); 808 if (ret) 809 return ret; 810 } 811 812 s64 old_unstriped = bch2_bucket_sectors_unstriped(*old); 813 s64 new_unstriped = bch2_bucket_sectors_unstriped(*new); 814 if (old_unstriped != new_unstriped) { 815 int ret = bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped, 816 !!new_unstriped - !!old_unstriped, 817 new_unstriped - old_unstriped, 818 0, 819 flags); 820 if (ret) 821 return ret; 822 } 823 824 return 0; 825 } 826 827 int bch2_trigger_alloc(struct btree_trans *trans, 828 enum btree_id btree, unsigned level, 829 struct bkey_s_c old, struct bkey_s new, 830 enum btree_iter_update_trigger_flags flags) 831 { 832 struct bch_fs *c = trans->c; 833 struct printbuf buf = PRINTBUF; 834 int ret = 0; 835 836 struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p); 837 if (!ca) 838 return -EIO; 839 840 struct bch_alloc_v4 old_a_convert; 841 const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); 842 843 struct bch_alloc_v4 *new_a; 844 if (likely(new.k->type == KEY_TYPE_alloc_v4)) { 845 new_a = bkey_s_to_alloc_v4(new).v; 846 } else { 847 BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair))); 848 849 struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); 850 ret = PTR_ERR_OR_ZERO(new_ka); 851 if (unlikely(ret)) 852 goto err; 853 new_a = &new_ka->v; 854 } 855 856 if (flags & BTREE_TRIGGER_transactional) { 857 alloc_data_type_set(new_a, new_a->data_type); 858 859 if (bch2_bucket_sectors_total(*new_a) > bch2_bucket_sectors_total(*old_a)) { 860 new_a->io_time[READ] = bch2_current_io_time(c, READ); 861 new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE); 862 SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); 863 SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true); 864 } 865 866 if (data_type_is_empty(new_a->data_type) && 867 BCH_ALLOC_V4_NEED_INC_GEN(new_a) && 868 !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) { 869 new_a->gen++; 870 SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false); 871 alloc_data_type_set(new_a, new_a->data_type); 872 } 873 874 if (old_a->data_type != new_a->data_type || 875 (new_a->data_type == BCH_DATA_free && 876 alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) { 877 ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?: 878 bch2_bucket_do_index(trans, ca, new.s_c, new_a, true); 879 if (ret) 880 goto err; 881 } 882 883 if (new_a->data_type == BCH_DATA_cached && 884 !new_a->io_time[READ]) 885 new_a->io_time[READ] = bch2_current_io_time(c, READ); 886 887 u64 old_lru = alloc_lru_idx_read(*old_a); 888 u64 new_lru = alloc_lru_idx_read(*new_a); 889 if (old_lru != new_lru) { 890 ret = bch2_lru_change(trans, new.k->p.inode, 891 bucket_to_u64(new.k->p), 892 old_lru, new_lru); 893 if (ret) 894 goto err; 895 } 896 897 old_lru = alloc_lru_idx_fragmentation(*old_a, ca); 898 new_lru = alloc_lru_idx_fragmentation(*new_a, ca); 899 if (old_lru != new_lru) { 900 ret = bch2_lru_change(trans, 901 BCH_LRU_FRAGMENTATION_START, 902 bucket_to_u64(new.k->p), 903 old_lru, new_lru); 904 if (ret) 905 goto err; 906 } 907 908 if (old_a->gen != new_a->gen) { 909 ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen); 910 if (ret) 911 goto err; 912 } 913 914 if ((flags & BTREE_TRIGGER_bucket_invalidate) && 915 old_a->cached_sectors) { 916 ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx, 917 -((s64) old_a->cached_sectors), 918 flags & BTREE_TRIGGER_gc); 919 if (ret) 920 goto err; 921 } 922 923 ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags); 924 if (ret) 925 goto err; 926 } 927 928 if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) { 929 u64 transaction_seq = trans->journal_res.seq; 930 931 if (log_fsck_err_on(transaction_seq && new_a->journal_seq > transaction_seq, 932 trans, alloc_key_journal_seq_in_future, 933 "bucket journal seq in future (currently at %llu)\n%s", 934 journal_cur_seq(&c->journal), 935 (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf))) 936 new_a->journal_seq = transaction_seq; 937 938 int is_empty_delta = (int) data_type_is_empty(new_a->data_type) - 939 (int) data_type_is_empty(old_a->data_type); 940 941 /* Record journal sequence number of empty -> nonempty transition: */ 942 if (is_empty_delta < 0) 943 new_a->journal_seq = max(new_a->journal_seq, transaction_seq); 944 945 /* 946 * Bucket becomes empty: mark it as waiting for a journal flush, 947 * unless updates since empty -> nonempty transition were never 948 * flushed - we may need to ask the journal not to flush 949 * intermediate sequence numbers: 950 */ 951 if (is_empty_delta > 0) { 952 if (new_a->journal_seq == transaction_seq || 953 bch2_journal_noflush_seq(&c->journal, new_a->journal_seq)) 954 new_a->journal_seq = 0; 955 else { 956 new_a->journal_seq = transaction_seq; 957 958 ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, 959 c->journal.flushed_seq_ondisk, 960 new.k->p.inode, new.k->p.offset, 961 transaction_seq); 962 if (bch2_fs_fatal_err_on(ret, c, 963 "setting bucket_needs_journal_commit: %s", bch2_err_str(ret))) 964 goto err; 965 } 966 } 967 968 if (new_a->gen != old_a->gen) { 969 rcu_read_lock(); 970 u8 *gen = bucket_gen(ca, new.k->p.offset); 971 if (unlikely(!gen)) { 972 rcu_read_unlock(); 973 goto invalid_bucket; 974 } 975 *gen = new_a->gen; 976 rcu_read_unlock(); 977 } 978 979 #define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; }) 980 #define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr) 981 #define bucket_flushed(a) (!a->journal_seq || a->journal_seq <= c->journal.flushed_seq_ondisk) 982 983 if (statechange(a->data_type == BCH_DATA_free) && 984 bucket_flushed(new_a)) 985 closure_wake_up(&c->freelist_wait); 986 987 if (statechange(a->data_type == BCH_DATA_need_discard) && 988 !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) && 989 bucket_flushed(new_a)) 990 bch2_discard_one_bucket_fast(ca, new.k->p.offset); 991 992 if (statechange(a->data_type == BCH_DATA_cached) && 993 !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) && 994 should_invalidate_buckets(ca, bch2_dev_usage_read(ca))) 995 bch2_dev_do_invalidates(ca); 996 997 if (statechange(a->data_type == BCH_DATA_need_gc_gens)) 998 bch2_gc_gens_async(c); 999 } 1000 1001 if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) { 1002 rcu_read_lock(); 1003 struct bucket *g = gc_bucket(ca, new.k->p.offset); 1004 if (unlikely(!g)) { 1005 rcu_read_unlock(); 1006 goto invalid_bucket; 1007 } 1008 g->gen_valid = 1; 1009 g->gen = new_a->gen; 1010 rcu_read_unlock(); 1011 } 1012 err: 1013 fsck_err: 1014 printbuf_exit(&buf); 1015 bch2_dev_put(ca); 1016 return ret; 1017 invalid_bucket: 1018 bch2_fs_inconsistent(c, "reference to invalid bucket\n %s", 1019 (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)); 1020 ret = -EIO; 1021 goto err; 1022 } 1023 1024 /* 1025 * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for 1026 * extents style btrees, but works on non-extents btrees: 1027 */ 1028 static struct bkey_s_c bch2_get_key_or_hole(struct btree_iter *iter, struct bpos end, struct bkey *hole) 1029 { 1030 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 1031 1032 if (bkey_err(k)) 1033 return k; 1034 1035 if (k.k->type) { 1036 return k; 1037 } else { 1038 struct btree_iter iter2; 1039 struct bpos next; 1040 1041 bch2_trans_copy_iter(&iter2, iter); 1042 1043 struct btree_path *path = btree_iter_path(iter->trans, iter); 1044 if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX)) 1045 end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p)); 1046 1047 end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1)); 1048 1049 /* 1050 * btree node min/max is a closed interval, upto takes a half 1051 * open interval: 1052 */ 1053 k = bch2_btree_iter_peek_max(&iter2, end); 1054 next = iter2.pos; 1055 bch2_trans_iter_exit(iter->trans, &iter2); 1056 1057 BUG_ON(next.offset >= iter->pos.offset + U32_MAX); 1058 1059 if (bkey_err(k)) 1060 return k; 1061 1062 bkey_init(hole); 1063 hole->p = iter->pos; 1064 1065 bch2_key_resize(hole, next.offset - iter->pos.offset); 1066 return (struct bkey_s_c) { hole, NULL }; 1067 } 1068 } 1069 1070 static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *bucket) 1071 { 1072 if (*ca) { 1073 if (bucket->offset < (*ca)->mi.first_bucket) 1074 bucket->offset = (*ca)->mi.first_bucket; 1075 1076 if (bucket->offset < (*ca)->mi.nbuckets) 1077 return true; 1078 1079 bch2_dev_put(*ca); 1080 *ca = NULL; 1081 bucket->inode++; 1082 bucket->offset = 0; 1083 } 1084 1085 rcu_read_lock(); 1086 *ca = __bch2_next_dev_idx(c, bucket->inode, NULL); 1087 if (*ca) { 1088 *bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket); 1089 bch2_dev_get(*ca); 1090 } 1091 rcu_read_unlock(); 1092 1093 return *ca != NULL; 1094 } 1095 1096 static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_iter *iter, 1097 struct bch_dev **ca, struct bkey *hole) 1098 { 1099 struct bch_fs *c = iter->trans->c; 1100 struct bkey_s_c k; 1101 again: 1102 k = bch2_get_key_or_hole(iter, POS_MAX, hole); 1103 if (bkey_err(k)) 1104 return k; 1105 1106 *ca = bch2_dev_iterate_noerror(c, *ca, k.k->p.inode); 1107 1108 if (!k.k->type) { 1109 struct bpos hole_start = bkey_start_pos(k.k); 1110 1111 if (!*ca || !bucket_valid(*ca, hole_start.offset)) { 1112 if (!next_bucket(c, ca, &hole_start)) 1113 return bkey_s_c_null; 1114 1115 bch2_btree_iter_set_pos(iter, hole_start); 1116 goto again; 1117 } 1118 1119 if (k.k->p.offset > (*ca)->mi.nbuckets) 1120 bch2_key_resize(hole, (*ca)->mi.nbuckets - hole_start.offset); 1121 } 1122 1123 return k; 1124 } 1125 1126 static noinline_for_stack 1127 int bch2_check_alloc_key(struct btree_trans *trans, 1128 struct bkey_s_c alloc_k, 1129 struct btree_iter *alloc_iter, 1130 struct btree_iter *discard_iter, 1131 struct btree_iter *freespace_iter, 1132 struct btree_iter *bucket_gens_iter) 1133 { 1134 struct bch_fs *c = trans->c; 1135 struct bch_alloc_v4 a_convert; 1136 const struct bch_alloc_v4 *a; 1137 unsigned gens_offset; 1138 struct bkey_s_c k; 1139 struct printbuf buf = PRINTBUF; 1140 int ret = 0; 1141 1142 struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p); 1143 if (fsck_err_on(!ca, 1144 trans, alloc_key_to_missing_dev_bucket, 1145 "alloc key for invalid device:bucket %llu:%llu", 1146 alloc_k.k->p.inode, alloc_k.k->p.offset)) 1147 ret = bch2_btree_delete_at(trans, alloc_iter, 0); 1148 if (!ca) 1149 return ret; 1150 1151 if (!ca->mi.freespace_initialized) 1152 goto out; 1153 1154 a = bch2_alloc_to_v4(alloc_k, &a_convert); 1155 1156 bch2_btree_iter_set_pos(discard_iter, alloc_k.k->p); 1157 k = bch2_btree_iter_peek_slot(discard_iter); 1158 ret = bkey_err(k); 1159 if (ret) 1160 goto err; 1161 1162 bool is_discarded = a->data_type == BCH_DATA_need_discard; 1163 if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded, 1164 trans, alloc_k, !is_discarded, true, true)) { 1165 ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded); 1166 if (ret) 1167 goto err; 1168 } 1169 1170 bch2_btree_iter_set_pos(freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a)); 1171 k = bch2_btree_iter_peek_slot(freespace_iter); 1172 ret = bkey_err(k); 1173 if (ret) 1174 goto err; 1175 1176 bool is_free = a->data_type == BCH_DATA_free; 1177 if (need_discard_or_freespace_err_on(!!k.k->type != is_free, 1178 trans, alloc_k, !is_free, false, true)) { 1179 ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free); 1180 if (ret) 1181 goto err; 1182 } 1183 1184 bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset)); 1185 k = bch2_btree_iter_peek_slot(bucket_gens_iter); 1186 ret = bkey_err(k); 1187 if (ret) 1188 goto err; 1189 1190 if (fsck_err_on(a->gen != alloc_gen(k, gens_offset), 1191 trans, bucket_gens_key_wrong, 1192 "incorrect gen in bucket_gens btree (got %u should be %u)\n" 1193 " %s", 1194 alloc_gen(k, gens_offset), a->gen, 1195 (printbuf_reset(&buf), 1196 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { 1197 struct bkey_i_bucket_gens *g = 1198 bch2_trans_kmalloc(trans, sizeof(*g)); 1199 1200 ret = PTR_ERR_OR_ZERO(g); 1201 if (ret) 1202 goto err; 1203 1204 if (k.k->type == KEY_TYPE_bucket_gens) { 1205 bkey_reassemble(&g->k_i, k); 1206 } else { 1207 bkey_bucket_gens_init(&g->k_i); 1208 g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset); 1209 } 1210 1211 g->v.gens[gens_offset] = a->gen; 1212 1213 ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0); 1214 if (ret) 1215 goto err; 1216 } 1217 out: 1218 err: 1219 fsck_err: 1220 bch2_dev_put(ca); 1221 printbuf_exit(&buf); 1222 return ret; 1223 } 1224 1225 static noinline_for_stack 1226 int bch2_check_alloc_hole_freespace(struct btree_trans *trans, 1227 struct bch_dev *ca, 1228 struct bpos start, 1229 struct bpos *end, 1230 struct btree_iter *freespace_iter) 1231 { 1232 struct bkey_s_c k; 1233 struct printbuf buf = PRINTBUF; 1234 int ret; 1235 1236 if (!ca->mi.freespace_initialized) 1237 return 0; 1238 1239 bch2_btree_iter_set_pos(freespace_iter, start); 1240 1241 k = bch2_btree_iter_peek_slot(freespace_iter); 1242 ret = bkey_err(k); 1243 if (ret) 1244 goto err; 1245 1246 *end = bkey_min(k.k->p, *end); 1247 1248 if (fsck_err_on(k.k->type != KEY_TYPE_set, 1249 trans, freespace_hole_missing, 1250 "hole in alloc btree missing in freespace btree\n" 1251 " device %llu buckets %llu-%llu", 1252 freespace_iter->pos.inode, 1253 freespace_iter->pos.offset, 1254 end->offset)) { 1255 struct bkey_i *update = 1256 bch2_trans_kmalloc(trans, sizeof(*update)); 1257 1258 ret = PTR_ERR_OR_ZERO(update); 1259 if (ret) 1260 goto err; 1261 1262 bkey_init(&update->k); 1263 update->k.type = KEY_TYPE_set; 1264 update->k.p = freespace_iter->pos; 1265 bch2_key_resize(&update->k, 1266 min_t(u64, U32_MAX, end->offset - 1267 freespace_iter->pos.offset)); 1268 1269 ret = bch2_trans_update(trans, freespace_iter, update, 0); 1270 if (ret) 1271 goto err; 1272 } 1273 err: 1274 fsck_err: 1275 printbuf_exit(&buf); 1276 return ret; 1277 } 1278 1279 static noinline_for_stack 1280 int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans, 1281 struct bpos start, 1282 struct bpos *end, 1283 struct btree_iter *bucket_gens_iter) 1284 { 1285 struct bkey_s_c k; 1286 struct printbuf buf = PRINTBUF; 1287 unsigned i, gens_offset, gens_end_offset; 1288 int ret; 1289 1290 bch2_btree_iter_set_pos(bucket_gens_iter, alloc_gens_pos(start, &gens_offset)); 1291 1292 k = bch2_btree_iter_peek_slot(bucket_gens_iter); 1293 ret = bkey_err(k); 1294 if (ret) 1295 goto err; 1296 1297 if (bkey_cmp(alloc_gens_pos(start, &gens_offset), 1298 alloc_gens_pos(*end, &gens_end_offset))) 1299 gens_end_offset = KEY_TYPE_BUCKET_GENS_NR; 1300 1301 if (k.k->type == KEY_TYPE_bucket_gens) { 1302 struct bkey_i_bucket_gens g; 1303 bool need_update = false; 1304 1305 bkey_reassemble(&g.k_i, k); 1306 1307 for (i = gens_offset; i < gens_end_offset; i++) { 1308 if (fsck_err_on(g.v.gens[i], trans, 1309 bucket_gens_hole_wrong, 1310 "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)", 1311 bucket_gens_pos_to_alloc(k.k->p, i).inode, 1312 bucket_gens_pos_to_alloc(k.k->p, i).offset, 1313 g.v.gens[i])) { 1314 g.v.gens[i] = 0; 1315 need_update = true; 1316 } 1317 } 1318 1319 if (need_update) { 1320 struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); 1321 1322 ret = PTR_ERR_OR_ZERO(u); 1323 if (ret) 1324 goto err; 1325 1326 memcpy(u, &g, sizeof(g)); 1327 1328 ret = bch2_trans_update(trans, bucket_gens_iter, u, 0); 1329 if (ret) 1330 goto err; 1331 } 1332 } 1333 1334 *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0)); 1335 err: 1336 fsck_err: 1337 printbuf_exit(&buf); 1338 return ret; 1339 } 1340 1341 struct check_discard_freespace_key_async { 1342 struct work_struct work; 1343 struct bch_fs *c; 1344 struct bbpos pos; 1345 }; 1346 1347 static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos) 1348 { 1349 struct btree_iter iter; 1350 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0); 1351 int ret = bkey_err(k); 1352 if (ret) 1353 return ret; 1354 1355 u8 gen; 1356 ret = k.k->type != KEY_TYPE_set 1357 ? bch2_check_discard_freespace_key(trans, &iter, &gen, false) 1358 : 0; 1359 bch2_trans_iter_exit(trans, &iter); 1360 return ret; 1361 } 1362 1363 static void check_discard_freespace_key_work(struct work_struct *work) 1364 { 1365 struct check_discard_freespace_key_async *w = 1366 container_of(work, struct check_discard_freespace_key_async, work); 1367 1368 bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos)); 1369 bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key); 1370 kfree(w); 1371 } 1372 1373 int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen, 1374 bool async_repair) 1375 { 1376 struct bch_fs *c = trans->c; 1377 enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard 1378 ? BCH_DATA_need_discard 1379 : BCH_DATA_free; 1380 struct printbuf buf = PRINTBUF; 1381 1382 struct bpos bucket = iter->pos; 1383 bucket.offset &= ~(~0ULL << 56); 1384 u64 genbits = iter->pos.offset & (~0ULL << 56); 1385 1386 struct btree_iter alloc_iter; 1387 struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, 1388 BTREE_ID_alloc, bucket, BTREE_ITER_cached); 1389 int ret = bkey_err(alloc_k); 1390 if (ret) 1391 return ret; 1392 1393 if (!bch2_dev_bucket_exists(c, bucket)) { 1394 if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket, 1395 "entry in %s btree for nonexistant dev:bucket %llu:%llu", 1396 bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset)) 1397 goto delete; 1398 ret = 1; 1399 goto out; 1400 } 1401 1402 struct bch_alloc_v4 a_convert; 1403 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert); 1404 1405 if (a->data_type != state || 1406 (state == BCH_DATA_free && 1407 genbits != alloc_freespace_genbits(*a))) { 1408 if (fsck_err(trans, need_discard_freespace_key_bad, 1409 "%s\n incorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)", 1410 (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf), 1411 bch2_btree_id_str(iter->btree_id), 1412 iter->pos.inode, 1413 iter->pos.offset, 1414 a->data_type == state, 1415 genbits >> 56, alloc_freespace_genbits(*a) >> 56)) 1416 goto delete; 1417 ret = 1; 1418 goto out; 1419 } 1420 1421 *gen = a->gen; 1422 out: 1423 fsck_err: 1424 bch2_set_btree_iter_dontneed(&alloc_iter); 1425 bch2_trans_iter_exit(trans, &alloc_iter); 1426 printbuf_exit(&buf); 1427 return ret; 1428 delete: 1429 if (!async_repair) { 1430 ret = bch2_btree_bit_mod_iter(trans, iter, false) ?: 1431 bch2_trans_commit(trans, NULL, NULL, 1432 BCH_TRANS_COMMIT_no_enospc) ?: 1433 -BCH_ERR_transaction_restart_commit; 1434 goto out; 1435 } else { 1436 /* 1437 * We can't repair here when called from the allocator path: the 1438 * commit will recurse back into the allocator 1439 */ 1440 struct check_discard_freespace_key_async *w = 1441 kzalloc(sizeof(*w), GFP_KERNEL); 1442 if (!w) 1443 goto out; 1444 1445 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) { 1446 kfree(w); 1447 goto out; 1448 } 1449 1450 INIT_WORK(&w->work, check_discard_freespace_key_work); 1451 w->c = c; 1452 w->pos = BBPOS(iter->btree_id, iter->pos); 1453 queue_work(c->write_ref_wq, &w->work); 1454 goto out; 1455 } 1456 } 1457 1458 static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter) 1459 { 1460 u8 gen; 1461 int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false); 1462 return ret < 0 ? ret : 0; 1463 } 1464 1465 /* 1466 * We've already checked that generation numbers in the bucket_gens btree are 1467 * valid for buckets that exist; this just checks for keys for nonexistent 1468 * buckets. 1469 */ 1470 static noinline_for_stack 1471 int bch2_check_bucket_gens_key(struct btree_trans *trans, 1472 struct btree_iter *iter, 1473 struct bkey_s_c k) 1474 { 1475 struct bch_fs *c = trans->c; 1476 struct bkey_i_bucket_gens g; 1477 u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset; 1478 u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset; 1479 u64 b; 1480 bool need_update = false; 1481 struct printbuf buf = PRINTBUF; 1482 int ret = 0; 1483 1484 BUG_ON(k.k->type != KEY_TYPE_bucket_gens); 1485 bkey_reassemble(&g.k_i, k); 1486 1487 struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode); 1488 if (!ca) { 1489 if (fsck_err(trans, bucket_gens_to_invalid_dev, 1490 "bucket_gens key for invalid device:\n %s", 1491 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1492 ret = bch2_btree_delete_at(trans, iter, 0); 1493 goto out; 1494 } 1495 1496 if (fsck_err_on(end <= ca->mi.first_bucket || 1497 start >= ca->mi.nbuckets, 1498 trans, bucket_gens_to_invalid_buckets, 1499 "bucket_gens key for invalid buckets:\n %s", 1500 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1501 ret = bch2_btree_delete_at(trans, iter, 0); 1502 goto out; 1503 } 1504 1505 for (b = start; b < ca->mi.first_bucket; b++) 1506 if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], 1507 trans, bucket_gens_nonzero_for_invalid_buckets, 1508 "bucket_gens key has nonzero gen for invalid bucket")) { 1509 g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; 1510 need_update = true; 1511 } 1512 1513 for (b = ca->mi.nbuckets; b < end; b++) 1514 if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK], 1515 trans, bucket_gens_nonzero_for_invalid_buckets, 1516 "bucket_gens key has nonzero gen for invalid bucket")) { 1517 g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0; 1518 need_update = true; 1519 } 1520 1521 if (need_update) { 1522 struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g)); 1523 1524 ret = PTR_ERR_OR_ZERO(u); 1525 if (ret) 1526 goto out; 1527 1528 memcpy(u, &g, sizeof(g)); 1529 ret = bch2_trans_update(trans, iter, u, 0); 1530 } 1531 out: 1532 fsck_err: 1533 bch2_dev_put(ca); 1534 printbuf_exit(&buf); 1535 return ret; 1536 } 1537 1538 int bch2_check_alloc_info(struct bch_fs *c) 1539 { 1540 struct btree_trans *trans = bch2_trans_get(c); 1541 struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter; 1542 struct bch_dev *ca = NULL; 1543 struct bkey hole; 1544 struct bkey_s_c k; 1545 int ret = 0; 1546 1547 bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN, 1548 BTREE_ITER_prefetch); 1549 bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN, 1550 BTREE_ITER_prefetch); 1551 bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN, 1552 BTREE_ITER_prefetch); 1553 bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN, 1554 BTREE_ITER_prefetch); 1555 1556 while (1) { 1557 struct bpos next; 1558 1559 bch2_trans_begin(trans); 1560 1561 k = bch2_get_key_or_real_bucket_hole(&iter, &ca, &hole); 1562 ret = bkey_err(k); 1563 if (ret) 1564 goto bkey_err; 1565 1566 if (!k.k) 1567 break; 1568 1569 if (k.k->type) { 1570 next = bpos_nosnap_successor(k.k->p); 1571 1572 ret = bch2_check_alloc_key(trans, 1573 k, &iter, 1574 &discard_iter, 1575 &freespace_iter, 1576 &bucket_gens_iter); 1577 if (ret) 1578 goto bkey_err; 1579 } else { 1580 next = k.k->p; 1581 1582 ret = bch2_check_alloc_hole_freespace(trans, ca, 1583 bkey_start_pos(k.k), 1584 &next, 1585 &freespace_iter) ?: 1586 bch2_check_alloc_hole_bucket_gens(trans, 1587 bkey_start_pos(k.k), 1588 &next, 1589 &bucket_gens_iter); 1590 if (ret) 1591 goto bkey_err; 1592 } 1593 1594 ret = bch2_trans_commit(trans, NULL, NULL, 1595 BCH_TRANS_COMMIT_no_enospc); 1596 if (ret) 1597 goto bkey_err; 1598 1599 bch2_btree_iter_set_pos(&iter, next); 1600 bkey_err: 1601 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 1602 continue; 1603 if (ret) 1604 break; 1605 } 1606 bch2_trans_iter_exit(trans, &bucket_gens_iter); 1607 bch2_trans_iter_exit(trans, &freespace_iter); 1608 bch2_trans_iter_exit(trans, &discard_iter); 1609 bch2_trans_iter_exit(trans, &iter); 1610 bch2_dev_put(ca); 1611 ca = NULL; 1612 1613 if (ret < 0) 1614 goto err; 1615 1616 ret = for_each_btree_key(trans, iter, 1617 BTREE_ID_need_discard, POS_MIN, 1618 BTREE_ITER_prefetch, k, 1619 bch2_check_discard_freespace_key_fsck(trans, &iter)); 1620 if (ret) 1621 goto err; 1622 1623 bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN, 1624 BTREE_ITER_prefetch); 1625 while (1) { 1626 bch2_trans_begin(trans); 1627 k = bch2_btree_iter_peek(&iter); 1628 if (!k.k) 1629 break; 1630 1631 ret = bkey_err(k) ?: 1632 bch2_check_discard_freespace_key_fsck(trans, &iter); 1633 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 1634 ret = 0; 1635 continue; 1636 } 1637 if (ret) { 1638 struct printbuf buf = PRINTBUF; 1639 bch2_bkey_val_to_text(&buf, c, k); 1640 1641 bch_err(c, "while checking %s", buf.buf); 1642 printbuf_exit(&buf); 1643 break; 1644 } 1645 1646 bch2_btree_iter_set_pos(&iter, bpos_nosnap_successor(iter.pos)); 1647 } 1648 bch2_trans_iter_exit(trans, &iter); 1649 if (ret) 1650 goto err; 1651 1652 ret = for_each_btree_key_commit(trans, iter, 1653 BTREE_ID_bucket_gens, POS_MIN, 1654 BTREE_ITER_prefetch, k, 1655 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1656 bch2_check_bucket_gens_key(trans, &iter, k)); 1657 err: 1658 bch2_trans_put(trans); 1659 bch_err_fn(c, ret); 1660 return ret; 1661 } 1662 1663 static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, 1664 struct btree_iter *alloc_iter, 1665 struct bkey_buf *last_flushed) 1666 { 1667 struct bch_fs *c = trans->c; 1668 struct bch_alloc_v4 a_convert; 1669 const struct bch_alloc_v4 *a; 1670 struct bkey_s_c alloc_k; 1671 struct printbuf buf = PRINTBUF; 1672 int ret; 1673 1674 alloc_k = bch2_btree_iter_peek(alloc_iter); 1675 if (!alloc_k.k) 1676 return 0; 1677 1678 ret = bkey_err(alloc_k); 1679 if (ret) 1680 return ret; 1681 1682 struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); 1683 if (!ca) 1684 return 0; 1685 1686 a = bch2_alloc_to_v4(alloc_k, &a_convert); 1687 1688 u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); 1689 if (lru_idx) { 1690 ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, 1691 lru_idx, alloc_k, last_flushed); 1692 if (ret) 1693 goto err; 1694 } 1695 1696 if (a->data_type != BCH_DATA_cached) 1697 goto err; 1698 1699 if (fsck_err_on(!a->io_time[READ], 1700 trans, alloc_key_cached_but_read_time_zero, 1701 "cached bucket with read_time 0\n" 1702 " %s", 1703 (printbuf_reset(&buf), 1704 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { 1705 struct bkey_i_alloc_v4 *a_mut = 1706 bch2_alloc_to_v4_mut(trans, alloc_k); 1707 ret = PTR_ERR_OR_ZERO(a_mut); 1708 if (ret) 1709 goto err; 1710 1711 a_mut->v.io_time[READ] = bch2_current_io_time(c, READ); 1712 ret = bch2_trans_update(trans, alloc_iter, 1713 &a_mut->k_i, BTREE_TRIGGER_norun); 1714 if (ret) 1715 goto err; 1716 1717 a = &a_mut->v; 1718 } 1719 1720 ret = bch2_lru_check_set(trans, alloc_k.k->p.inode, a->io_time[READ], 1721 alloc_k, last_flushed); 1722 if (ret) 1723 goto err; 1724 err: 1725 fsck_err: 1726 bch2_dev_put(ca); 1727 printbuf_exit(&buf); 1728 return ret; 1729 } 1730 1731 int bch2_check_alloc_to_lru_refs(struct bch_fs *c) 1732 { 1733 struct bkey_buf last_flushed; 1734 1735 bch2_bkey_buf_init(&last_flushed); 1736 bkey_init(&last_flushed.k->k); 1737 1738 int ret = bch2_trans_run(c, 1739 for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, 1740 POS_MIN, BTREE_ITER_prefetch, k, 1741 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1742 bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))); 1743 1744 bch2_bkey_buf_exit(&last_flushed, c); 1745 bch_err_fn(c, ret); 1746 return ret; 1747 } 1748 1749 static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress) 1750 { 1751 int ret; 1752 1753 mutex_lock(&ca->discard_buckets_in_flight_lock); 1754 darray_for_each(ca->discard_buckets_in_flight, i) 1755 if (i->bucket == bucket) { 1756 ret = -BCH_ERR_EEXIST_discard_in_flight_add; 1757 goto out; 1758 } 1759 1760 ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) { 1761 .in_progress = in_progress, 1762 .bucket = bucket, 1763 })); 1764 out: 1765 mutex_unlock(&ca->discard_buckets_in_flight_lock); 1766 return ret; 1767 } 1768 1769 static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket) 1770 { 1771 mutex_lock(&ca->discard_buckets_in_flight_lock); 1772 darray_for_each(ca->discard_buckets_in_flight, i) 1773 if (i->bucket == bucket) { 1774 BUG_ON(!i->in_progress); 1775 darray_remove_item(&ca->discard_buckets_in_flight, i); 1776 goto found; 1777 } 1778 BUG(); 1779 found: 1780 mutex_unlock(&ca->discard_buckets_in_flight_lock); 1781 } 1782 1783 struct discard_buckets_state { 1784 u64 seen; 1785 u64 open; 1786 u64 need_journal_commit; 1787 u64 discarded; 1788 u64 need_journal_commit_this_dev; 1789 }; 1790 1791 static int bch2_discard_one_bucket(struct btree_trans *trans, 1792 struct bch_dev *ca, 1793 struct btree_iter *need_discard_iter, 1794 struct bpos *discard_pos_done, 1795 struct discard_buckets_state *s, 1796 bool fastpath) 1797 { 1798 struct bch_fs *c = trans->c; 1799 struct bpos pos = need_discard_iter->pos; 1800 struct btree_iter iter = { NULL }; 1801 struct bkey_s_c k; 1802 struct bkey_i_alloc_v4 *a; 1803 struct printbuf buf = PRINTBUF; 1804 bool discard_locked = false; 1805 int ret = 0; 1806 1807 if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) { 1808 s->open++; 1809 goto out; 1810 } 1811 1812 if (bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal, 1813 c->journal.flushed_seq_ondisk, 1814 pos.inode, pos.offset)) { 1815 s->need_journal_commit++; 1816 s->need_journal_commit_this_dev++; 1817 goto out; 1818 } 1819 1820 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, 1821 need_discard_iter->pos, 1822 BTREE_ITER_cached); 1823 ret = bkey_err(k); 1824 if (ret) 1825 goto out; 1826 1827 a = bch2_alloc_to_v4_mut(trans, k); 1828 ret = PTR_ERR_OR_ZERO(a); 1829 if (ret) 1830 goto out; 1831 1832 if (a->v.data_type != BCH_DATA_need_discard) { 1833 if (need_discard_or_freespace_err(trans, k, true, true, true)) { 1834 ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false); 1835 if (ret) 1836 goto out; 1837 goto commit; 1838 } 1839 1840 goto out; 1841 } 1842 1843 if (a->v.journal_seq > c->journal.flushed_seq_ondisk) { 1844 if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info, 1845 trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s", 1846 a->v.journal_seq, 1847 c->journal.flushed_seq_ondisk, 1848 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1849 ret = -EIO; 1850 goto out; 1851 } 1852 1853 if (!fastpath) { 1854 if (discard_in_flight_add(ca, iter.pos.offset, true)) 1855 goto out; 1856 1857 discard_locked = true; 1858 } 1859 1860 if (!bkey_eq(*discard_pos_done, iter.pos) && 1861 ca->mi.discard && !c->opts.nochanges) { 1862 /* 1863 * This works without any other locks because this is the only 1864 * thread that removes items from the need_discard tree 1865 */ 1866 bch2_trans_unlock_long(trans); 1867 blkdev_issue_discard(ca->disk_sb.bdev, 1868 k.k->p.offset * ca->mi.bucket_size, 1869 ca->mi.bucket_size, 1870 GFP_KERNEL); 1871 *discard_pos_done = iter.pos; 1872 s->discarded++; 1873 1874 ret = bch2_trans_relock_notrace(trans); 1875 if (ret) 1876 goto out; 1877 } 1878 1879 SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false); 1880 alloc_data_type_set(&a->v, a->v.data_type); 1881 1882 ret = bch2_trans_update(trans, &iter, &a->k_i, 0); 1883 if (ret) 1884 goto out; 1885 commit: 1886 ret = bch2_trans_commit(trans, NULL, NULL, 1887 BCH_WATERMARK_btree| 1888 BCH_TRANS_COMMIT_no_enospc); 1889 if (ret) 1890 goto out; 1891 1892 count_event(c, bucket_discard); 1893 out: 1894 fsck_err: 1895 if (discard_locked) 1896 discard_in_flight_remove(ca, iter.pos.offset); 1897 if (!ret) 1898 s->seen++; 1899 bch2_trans_iter_exit(trans, &iter); 1900 printbuf_exit(&buf); 1901 return ret; 1902 } 1903 1904 static void bch2_do_discards_work(struct work_struct *work) 1905 { 1906 struct bch_dev *ca = container_of(work, struct bch_dev, discard_work); 1907 struct bch_fs *c = ca->fs; 1908 struct discard_buckets_state s = {}; 1909 struct bpos discard_pos_done = POS_MAX; 1910 int ret; 1911 1912 /* 1913 * We're doing the commit in bch2_discard_one_bucket instead of using 1914 * for_each_btree_key_commit() so that we can increment counters after 1915 * successful commit: 1916 */ 1917 ret = bch2_trans_run(c, 1918 for_each_btree_key_max(trans, iter, 1919 BTREE_ID_need_discard, 1920 POS(ca->dev_idx, 0), 1921 POS(ca->dev_idx, U64_MAX), 0, k, 1922 bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false))); 1923 1924 trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, 1925 bch2_err_str(ret)); 1926 1927 percpu_ref_put(&ca->io_ref); 1928 bch2_write_ref_put(c, BCH_WRITE_REF_discard); 1929 } 1930 1931 void bch2_dev_do_discards(struct bch_dev *ca) 1932 { 1933 struct bch_fs *c = ca->fs; 1934 1935 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) 1936 return; 1937 1938 if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 1939 goto put_write_ref; 1940 1941 if (queue_work(c->write_ref_wq, &ca->discard_work)) 1942 return; 1943 1944 percpu_ref_put(&ca->io_ref); 1945 put_write_ref: 1946 bch2_write_ref_put(c, BCH_WRITE_REF_discard); 1947 } 1948 1949 void bch2_do_discards(struct bch_fs *c) 1950 { 1951 for_each_member_device(c, ca) 1952 bch2_dev_do_discards(ca); 1953 } 1954 1955 static int bch2_do_discards_fast_one(struct btree_trans *trans, 1956 struct bch_dev *ca, 1957 u64 bucket, 1958 struct bpos *discard_pos_done, 1959 struct discard_buckets_state *s) 1960 { 1961 struct btree_iter need_discard_iter; 1962 struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter, 1963 BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0); 1964 int ret = bkey_err(discard_k); 1965 if (ret) 1966 return ret; 1967 1968 if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set, 1969 trans, discarding_bucket_not_in_need_discard_btree, 1970 "attempting to discard bucket %u:%llu not in need_discard btree", 1971 ca->dev_idx, bucket)) 1972 goto out; 1973 1974 ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true); 1975 out: 1976 fsck_err: 1977 bch2_trans_iter_exit(trans, &need_discard_iter); 1978 return ret; 1979 } 1980 1981 static void bch2_do_discards_fast_work(struct work_struct *work) 1982 { 1983 struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work); 1984 struct bch_fs *c = ca->fs; 1985 struct discard_buckets_state s = {}; 1986 struct bpos discard_pos_done = POS_MAX; 1987 struct btree_trans *trans = bch2_trans_get(c); 1988 int ret = 0; 1989 1990 while (1) { 1991 bool got_bucket = false; 1992 u64 bucket; 1993 1994 mutex_lock(&ca->discard_buckets_in_flight_lock); 1995 darray_for_each(ca->discard_buckets_in_flight, i) { 1996 if (i->in_progress) 1997 continue; 1998 1999 got_bucket = true; 2000 bucket = i->bucket; 2001 i->in_progress = true; 2002 break; 2003 } 2004 mutex_unlock(&ca->discard_buckets_in_flight_lock); 2005 2006 if (!got_bucket) 2007 break; 2008 2009 ret = lockrestart_do(trans, 2010 bch2_do_discards_fast_one(trans, ca, bucket, &discard_pos_done, &s)); 2011 bch_err_fn(c, ret); 2012 2013 discard_in_flight_remove(ca, bucket); 2014 2015 if (ret) 2016 break; 2017 } 2018 2019 trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); 2020 2021 bch2_trans_put(trans); 2022 percpu_ref_put(&ca->io_ref); 2023 bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); 2024 } 2025 2026 static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket) 2027 { 2028 struct bch_fs *c = ca->fs; 2029 2030 if (discard_in_flight_add(ca, bucket, false)) 2031 return; 2032 2033 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast)) 2034 return; 2035 2036 if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 2037 goto put_ref; 2038 2039 if (queue_work(c->write_ref_wq, &ca->discard_fast_work)) 2040 return; 2041 2042 percpu_ref_put(&ca->io_ref); 2043 put_ref: 2044 bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast); 2045 } 2046 2047 static int invalidate_one_bucket(struct btree_trans *trans, 2048 struct btree_iter *lru_iter, 2049 struct bkey_s_c lru_k, 2050 s64 *nr_to_invalidate) 2051 { 2052 struct bch_fs *c = trans->c; 2053 struct bkey_i_alloc_v4 *a = NULL; 2054 struct printbuf buf = PRINTBUF; 2055 struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); 2056 unsigned cached_sectors; 2057 int ret = 0; 2058 2059 if (*nr_to_invalidate <= 0) 2060 return 1; 2061 2062 if (!bch2_dev_bucket_exists(c, bucket)) { 2063 if (fsck_err(trans, lru_entry_to_invalid_bucket, 2064 "lru key points to nonexistent device:bucket %llu:%llu", 2065 bucket.inode, bucket.offset)) 2066 return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); 2067 goto out; 2068 } 2069 2070 if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) 2071 return 0; 2072 2073 a = bch2_trans_start_alloc_update(trans, bucket, BTREE_TRIGGER_bucket_invalidate); 2074 ret = PTR_ERR_OR_ZERO(a); 2075 if (ret) 2076 goto out; 2077 2078 /* We expect harmless races here due to the btree write buffer: */ 2079 if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(a->v)) 2080 goto out; 2081 2082 BUG_ON(a->v.data_type != BCH_DATA_cached); 2083 BUG_ON(a->v.dirty_sectors); 2084 2085 if (!a->v.cached_sectors) 2086 bch_err(c, "invalidating empty bucket, confused"); 2087 2088 cached_sectors = a->v.cached_sectors; 2089 2090 SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); 2091 a->v.gen++; 2092 a->v.data_type = 0; 2093 a->v.dirty_sectors = 0; 2094 a->v.stripe_sectors = 0; 2095 a->v.cached_sectors = 0; 2096 a->v.io_time[READ] = bch2_current_io_time(c, READ); 2097 a->v.io_time[WRITE] = bch2_current_io_time(c, WRITE); 2098 2099 ret = bch2_trans_commit(trans, NULL, NULL, 2100 BCH_WATERMARK_btree| 2101 BCH_TRANS_COMMIT_no_enospc); 2102 if (ret) 2103 goto out; 2104 2105 trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors); 2106 --*nr_to_invalidate; 2107 out: 2108 fsck_err: 2109 printbuf_exit(&buf); 2110 return ret; 2111 } 2112 2113 static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter, 2114 struct bch_dev *ca, bool *wrapped) 2115 { 2116 struct bkey_s_c k; 2117 again: 2118 k = bch2_btree_iter_peek_max(iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX)); 2119 if (!k.k && !*wrapped) { 2120 bch2_btree_iter_set_pos(iter, lru_pos(ca->dev_idx, 0, 0)); 2121 *wrapped = true; 2122 goto again; 2123 } 2124 2125 return k; 2126 } 2127 2128 static void bch2_do_invalidates_work(struct work_struct *work) 2129 { 2130 struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work); 2131 struct bch_fs *c = ca->fs; 2132 struct btree_trans *trans = bch2_trans_get(c); 2133 int ret = 0; 2134 2135 ret = bch2_btree_write_buffer_tryflush(trans); 2136 if (ret) 2137 goto err; 2138 2139 s64 nr_to_invalidate = 2140 should_invalidate_buckets(ca, bch2_dev_usage_read(ca)); 2141 struct btree_iter iter; 2142 bool wrapped = false; 2143 2144 bch2_trans_iter_init(trans, &iter, BTREE_ID_lru, 2145 lru_pos(ca->dev_idx, 0, 2146 ((bch2_current_io_time(c, READ) + U32_MAX) & 2147 LRU_TIME_MAX)), 0); 2148 2149 while (true) { 2150 bch2_trans_begin(trans); 2151 2152 struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped); 2153 ret = bkey_err(k); 2154 if (ret) 2155 goto restart_err; 2156 if (!k.k) 2157 break; 2158 2159 ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate); 2160 restart_err: 2161 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2162 continue; 2163 if (ret) 2164 break; 2165 2166 bch2_btree_iter_advance(&iter); 2167 } 2168 bch2_trans_iter_exit(trans, &iter); 2169 err: 2170 bch2_trans_put(trans); 2171 percpu_ref_put(&ca->io_ref); 2172 bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); 2173 } 2174 2175 void bch2_dev_do_invalidates(struct bch_dev *ca) 2176 { 2177 struct bch_fs *c = ca->fs; 2178 2179 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate)) 2180 return; 2181 2182 if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) 2183 goto put_ref; 2184 2185 if (queue_work(c->write_ref_wq, &ca->invalidate_work)) 2186 return; 2187 2188 percpu_ref_put(&ca->io_ref); 2189 put_ref: 2190 bch2_write_ref_put(c, BCH_WRITE_REF_invalidate); 2191 } 2192 2193 void bch2_do_invalidates(struct bch_fs *c) 2194 { 2195 for_each_member_device(c, ca) 2196 bch2_dev_do_invalidates(ca); 2197 } 2198 2199 int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, 2200 u64 bucket_start, u64 bucket_end) 2201 { 2202 struct btree_trans *trans = bch2_trans_get(c); 2203 struct btree_iter iter; 2204 struct bkey_s_c k; 2205 struct bkey hole; 2206 struct bpos end = POS(ca->dev_idx, bucket_end); 2207 struct bch_member *m; 2208 unsigned long last_updated = jiffies; 2209 int ret; 2210 2211 BUG_ON(bucket_start > bucket_end); 2212 BUG_ON(bucket_end > ca->mi.nbuckets); 2213 2214 bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, 2215 POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)), 2216 BTREE_ITER_prefetch); 2217 /* 2218 * Scan the alloc btree for every bucket on @ca, and add buckets to the 2219 * freespace/need_discard/need_gc_gens btrees as needed: 2220 */ 2221 while (1) { 2222 if (time_after(jiffies, last_updated + HZ * 10)) { 2223 bch_info(ca, "%s: currently at %llu/%llu", 2224 __func__, iter.pos.offset, ca->mi.nbuckets); 2225 last_updated = jiffies; 2226 } 2227 2228 bch2_trans_begin(trans); 2229 2230 if (bkey_ge(iter.pos, end)) { 2231 ret = 0; 2232 break; 2233 } 2234 2235 k = bch2_get_key_or_hole(&iter, end, &hole); 2236 ret = bkey_err(k); 2237 if (ret) 2238 goto bkey_err; 2239 2240 if (k.k->type) { 2241 /* 2242 * We process live keys in the alloc btree one at a 2243 * time: 2244 */ 2245 struct bch_alloc_v4 a_convert; 2246 const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert); 2247 2248 ret = bch2_bucket_do_index(trans, ca, k, a, true) ?: 2249 bch2_trans_commit(trans, NULL, NULL, 2250 BCH_TRANS_COMMIT_no_enospc); 2251 if (ret) 2252 goto bkey_err; 2253 2254 bch2_btree_iter_advance(&iter); 2255 } else { 2256 struct bkey_i *freespace; 2257 2258 freespace = bch2_trans_kmalloc(trans, sizeof(*freespace)); 2259 ret = PTR_ERR_OR_ZERO(freespace); 2260 if (ret) 2261 goto bkey_err; 2262 2263 bkey_init(&freespace->k); 2264 freespace->k.type = KEY_TYPE_set; 2265 freespace->k.p = k.k->p; 2266 freespace->k.size = k.k->size; 2267 2268 ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: 2269 bch2_trans_commit(trans, NULL, NULL, 2270 BCH_TRANS_COMMIT_no_enospc); 2271 if (ret) 2272 goto bkey_err; 2273 2274 bch2_btree_iter_set_pos(&iter, k.k->p); 2275 } 2276 bkey_err: 2277 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2278 continue; 2279 if (ret) 2280 break; 2281 } 2282 2283 bch2_trans_iter_exit(trans, &iter); 2284 bch2_trans_put(trans); 2285 2286 if (ret < 0) { 2287 bch_err_msg(ca, ret, "initializing free space"); 2288 return ret; 2289 } 2290 2291 mutex_lock(&c->sb_lock); 2292 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); 2293 SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true); 2294 mutex_unlock(&c->sb_lock); 2295 2296 return 0; 2297 } 2298 2299 int bch2_fs_freespace_init(struct bch_fs *c) 2300 { 2301 int ret = 0; 2302 bool doing_init = false; 2303 2304 /* 2305 * We can crash during the device add path, so we need to check this on 2306 * every mount: 2307 */ 2308 2309 for_each_member_device(c, ca) { 2310 if (ca->mi.freespace_initialized) 2311 continue; 2312 2313 if (!doing_init) { 2314 bch_info(c, "initializing freespace"); 2315 doing_init = true; 2316 } 2317 2318 ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets); 2319 if (ret) { 2320 bch2_dev_put(ca); 2321 bch_err_fn(c, ret); 2322 return ret; 2323 } 2324 } 2325 2326 if (doing_init) { 2327 mutex_lock(&c->sb_lock); 2328 bch2_write_super(c); 2329 mutex_unlock(&c->sb_lock); 2330 bch_verbose(c, "done initializing freespace"); 2331 } 2332 2333 return 0; 2334 } 2335 2336 /* device removal */ 2337 2338 int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca) 2339 { 2340 struct bpos start = POS(ca->dev_idx, 0); 2341 struct bpos end = POS(ca->dev_idx, U64_MAX); 2342 int ret; 2343 2344 /* 2345 * We clear the LRU and need_discard btrees first so that we don't race 2346 * with bch2_do_invalidates() and bch2_do_discards() 2347 */ 2348 ret = bch2_dev_remove_stripes(c, ca->dev_idx) ?: 2349 bch2_btree_delete_range(c, BTREE_ID_lru, start, end, 2350 BTREE_TRIGGER_norun, NULL) ?: 2351 bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end, 2352 BTREE_TRIGGER_norun, NULL) ?: 2353 bch2_btree_delete_range(c, BTREE_ID_freespace, start, end, 2354 BTREE_TRIGGER_norun, NULL) ?: 2355 bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end, 2356 BTREE_TRIGGER_norun, NULL) ?: 2357 bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end, 2358 BTREE_TRIGGER_norun, NULL) ?: 2359 bch2_btree_delete_range(c, BTREE_ID_alloc, start, end, 2360 BTREE_TRIGGER_norun, NULL) ?: 2361 bch2_dev_usage_remove(c, ca->dev_idx); 2362 bch_err_msg(ca, ret, "removing dev alloc info"); 2363 return ret; 2364 } 2365 2366 /* Bucket IO clocks: */ 2367 2368 static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, 2369 size_t bucket_nr, int rw) 2370 { 2371 struct bch_fs *c = trans->c; 2372 2373 struct btree_iter iter; 2374 struct bkey_i_alloc_v4 *a = 2375 bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr)); 2376 int ret = PTR_ERR_OR_ZERO(a); 2377 if (ret) 2378 return ret; 2379 2380 u64 now = bch2_current_io_time(c, rw); 2381 if (a->v.io_time[rw] == now) 2382 goto out; 2383 2384 a->v.io_time[rw] = now; 2385 2386 ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?: 2387 bch2_trans_commit(trans, NULL, NULL, 0); 2388 out: 2389 bch2_trans_iter_exit(trans, &iter); 2390 return ret; 2391 } 2392 2393 int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev, 2394 size_t bucket_nr, int rw) 2395 { 2396 if (bch2_trans_relock(trans)) 2397 bch2_trans_begin(trans); 2398 2399 return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw)); 2400 } 2401 2402 /* Startup/shutdown (ro/rw): */ 2403 2404 void bch2_recalc_capacity(struct bch_fs *c) 2405 { 2406 u64 capacity = 0, reserved_sectors = 0, gc_reserve; 2407 unsigned bucket_size_max = 0; 2408 unsigned long ra_pages = 0; 2409 2410 lockdep_assert_held(&c->state_lock); 2411 2412 for_each_online_member(c, ca) { 2413 struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi; 2414 2415 ra_pages += bdi->ra_pages; 2416 } 2417 2418 bch2_set_ra_pages(c, ra_pages); 2419 2420 for_each_rw_member(c, ca) { 2421 u64 dev_reserve = 0; 2422 2423 /* 2424 * We need to reserve buckets (from the number 2425 * of currently available buckets) against 2426 * foreground writes so that mainly copygc can 2427 * make forward progress. 2428 * 2429 * We need enough to refill the various reserves 2430 * from scratch - copygc will use its entire 2431 * reserve all at once, then run against when 2432 * its reserve is refilled (from the formerly 2433 * available buckets). 2434 * 2435 * This reserve is just used when considering if 2436 * allocations for foreground writes must wait - 2437 * not -ENOSPC calculations. 2438 */ 2439 2440 dev_reserve += ca->nr_btree_reserve * 2; 2441 dev_reserve += ca->mi.nbuckets >> 6; /* copygc reserve */ 2442 2443 dev_reserve += 1; /* btree write point */ 2444 dev_reserve += 1; /* copygc write point */ 2445 dev_reserve += 1; /* rebalance write point */ 2446 2447 dev_reserve *= ca->mi.bucket_size; 2448 2449 capacity += bucket_to_sector(ca, ca->mi.nbuckets - 2450 ca->mi.first_bucket); 2451 2452 reserved_sectors += dev_reserve * 2; 2453 2454 bucket_size_max = max_t(unsigned, bucket_size_max, 2455 ca->mi.bucket_size); 2456 } 2457 2458 gc_reserve = c->opts.gc_reserve_bytes 2459 ? c->opts.gc_reserve_bytes >> 9 2460 : div64_u64(capacity * c->opts.gc_reserve_percent, 100); 2461 2462 reserved_sectors = max(gc_reserve, reserved_sectors); 2463 2464 reserved_sectors = min(reserved_sectors, capacity); 2465 2466 c->reserved = reserved_sectors; 2467 c->capacity = capacity - reserved_sectors; 2468 2469 c->bucket_size_max = bucket_size_max; 2470 2471 /* Wake up case someone was waiting for buckets */ 2472 closure_wake_up(&c->freelist_wait); 2473 } 2474 2475 u64 bch2_min_rw_member_capacity(struct bch_fs *c) 2476 { 2477 u64 ret = U64_MAX; 2478 2479 for_each_rw_member(c, ca) 2480 ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size); 2481 return ret; 2482 } 2483 2484 static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca) 2485 { 2486 struct open_bucket *ob; 2487 bool ret = false; 2488 2489 for (ob = c->open_buckets; 2490 ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); 2491 ob++) { 2492 spin_lock(&ob->lock); 2493 if (ob->valid && !ob->on_partial_list && 2494 ob->dev == ca->dev_idx) 2495 ret = true; 2496 spin_unlock(&ob->lock); 2497 } 2498 2499 return ret; 2500 } 2501 2502 /* device goes ro: */ 2503 void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca) 2504 { 2505 lockdep_assert_held(&c->state_lock); 2506 2507 /* First, remove device from allocation groups: */ 2508 2509 for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) 2510 clear_bit(ca->dev_idx, c->rw_devs[i].d); 2511 2512 c->rw_devs_change_count++; 2513 2514 /* 2515 * Capacity is calculated based off of devices in allocation groups: 2516 */ 2517 bch2_recalc_capacity(c); 2518 2519 bch2_open_buckets_stop(c, ca, false); 2520 2521 /* 2522 * Wake up threads that were blocked on allocation, so they can notice 2523 * the device can no longer be removed and the capacity has changed: 2524 */ 2525 closure_wake_up(&c->freelist_wait); 2526 2527 /* 2528 * journal_res_get() can block waiting for free space in the journal - 2529 * it needs to notice there may not be devices to allocate from anymore: 2530 */ 2531 wake_up(&c->journal.wait); 2532 2533 /* Now wait for any in flight writes: */ 2534 2535 closure_wait_event(&c->open_buckets_wait, 2536 !bch2_dev_has_open_write_point(c, ca)); 2537 } 2538 2539 /* device goes rw: */ 2540 void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca) 2541 { 2542 lockdep_assert_held(&c->state_lock); 2543 2544 for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++) 2545 if (ca->mi.data_allowed & (1 << i)) 2546 set_bit(ca->dev_idx, c->rw_devs[i].d); 2547 2548 c->rw_devs_change_count++; 2549 } 2550 2551 void bch2_dev_allocator_background_exit(struct bch_dev *ca) 2552 { 2553 darray_exit(&ca->discard_buckets_in_flight); 2554 } 2555 2556 void bch2_dev_allocator_background_init(struct bch_dev *ca) 2557 { 2558 mutex_init(&ca->discard_buckets_in_flight_lock); 2559 INIT_WORK(&ca->discard_work, bch2_do_discards_work); 2560 INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work); 2561 INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work); 2562 } 2563 2564 void bch2_fs_allocator_background_init(struct bch_fs *c) 2565 { 2566 spin_lock_init(&c->freelist_lock); 2567 } 2568