17b3f84eaSKent Overstreet // SPDX-License-Identifier: GPL-2.0
27b3f84eaSKent Overstreet #include "bcachefs.h"
37b3f84eaSKent Overstreet #include "alloc_background.h"
47b3f84eaSKent Overstreet #include "alloc_foreground.h"
5a8c752bbSKent Overstreet #include "backpointers.h"
6d39881d2SKent Overstreet #include "bkey_buf.h"
77b3f84eaSKent Overstreet #include "btree_cache.h"
87b3f84eaSKent Overstreet #include "btree_io.h"
95d20ba48SKent Overstreet #include "btree_key_cache.h"
107b3f84eaSKent Overstreet #include "btree_update.h"
117b3f84eaSKent Overstreet #include "btree_update_interior.h"
127b3f84eaSKent Overstreet #include "btree_gc.h"
131b30ed5fSKent Overstreet #include "btree_write_buffer.h"
147b3f84eaSKent Overstreet #include "buckets.h"
1521aec962SKent Overstreet #include "buckets_waiting_for_journal.h"
167b3f84eaSKent Overstreet #include "clock.h"
177b3f84eaSKent Overstreet #include "debug.h"
181d16c605SKent Overstreet #include "disk_accounting.h"
19cd575ddfSKent Overstreet #include "ec.h"
207b3f84eaSKent Overstreet #include "error.h"
21c6b2826cSKent Overstreet #include "lru.h"
22d0734356SKent Overstreet #include "recovery.h"
237b3f84eaSKent Overstreet #include "trace.h"
247f4e1d5dSKent Overstreet #include "varint.h"
257b3f84eaSKent Overstreet
267b3f84eaSKent Overstreet #include <linux/kthread.h>
277b3f84eaSKent Overstreet #include <linux/math64.h>
287b3f84eaSKent Overstreet #include <linux/random.h>
297b3f84eaSKent Overstreet #include <linux/rculist.h>
307b3f84eaSKent Overstreet #include <linux/rcupdate.h>
317b3f84eaSKent Overstreet #include <linux/sched/task.h>
327b3f84eaSKent Overstreet #include <linux/sort.h>
33848c3ff8SChen Yufan #include <linux/jiffies.h>
347b3f84eaSKent Overstreet
3564ee1431SKent Overstreet static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
36a393f331SKent Overstreet
373d48a7f8SKent Overstreet /* Persistent alloc info: */
383d48a7f8SKent Overstreet
397f4e1d5dSKent Overstreet static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
407f4e1d5dSKent Overstreet #define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
417f4e1d5dSKent Overstreet BCH_ALLOC_FIELDS_V1()
4290541a74SKent Overstreet #undef x
4390541a74SKent Overstreet };
4490541a74SKent Overstreet
453d48a7f8SKent Overstreet struct bkey_alloc_unpacked {
463d48a7f8SKent Overstreet u64 journal_seq;
473d48a7f8SKent Overstreet u8 gen;
483d48a7f8SKent Overstreet u8 oldest_gen;
493d48a7f8SKent Overstreet u8 data_type;
503d48a7f8SKent Overstreet bool need_discard:1;
513d48a7f8SKent Overstreet bool need_inc_gen:1;
523d48a7f8SKent Overstreet #define x(_name, _bits) u##_bits _name;
533d48a7f8SKent Overstreet BCH_ALLOC_FIELDS_V2()
543d48a7f8SKent Overstreet #undef x
553d48a7f8SKent Overstreet };
567b3f84eaSKent Overstreet
alloc_field_v1_get(const struct bch_alloc * a,const void ** p,unsigned field)577f4e1d5dSKent Overstreet static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
5890541a74SKent Overstreet const void **p, unsigned field)
5990541a74SKent Overstreet {
607f4e1d5dSKent Overstreet unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field];
6190541a74SKent Overstreet u64 v;
6290541a74SKent Overstreet
6390541a74SKent Overstreet if (!(a->fields & (1 << field)))
6490541a74SKent Overstreet return 0;
6590541a74SKent Overstreet
6690541a74SKent Overstreet switch (bytes) {
6790541a74SKent Overstreet case 1:
6890541a74SKent Overstreet v = *((const u8 *) *p);
6990541a74SKent Overstreet break;
7090541a74SKent Overstreet case 2:
7190541a74SKent Overstreet v = le16_to_cpup(*p);
7290541a74SKent Overstreet break;
7390541a74SKent Overstreet case 4:
7490541a74SKent Overstreet v = le32_to_cpup(*p);
7590541a74SKent Overstreet break;
7690541a74SKent Overstreet case 8:
7790541a74SKent Overstreet v = le64_to_cpup(*p);
7890541a74SKent Overstreet break;
7990541a74SKent Overstreet default:
8090541a74SKent Overstreet BUG();
8190541a74SKent Overstreet }
8290541a74SKent Overstreet
8390541a74SKent Overstreet *p += bytes;
8490541a74SKent Overstreet return v;
8590541a74SKent Overstreet }
8690541a74SKent Overstreet
bch2_alloc_unpack_v1(struct bkey_alloc_unpacked * out,struct bkey_s_c k)877f4e1d5dSKent Overstreet static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out,
887f4e1d5dSKent Overstreet struct bkey_s_c k)
898fe826f9SKent Overstreet {
907f4e1d5dSKent Overstreet const struct bch_alloc *in = bkey_s_c_to_alloc(k).v;
917f4e1d5dSKent Overstreet const void *d = in->data;
928fe826f9SKent Overstreet unsigned idx = 0;
938fe826f9SKent Overstreet
947f4e1d5dSKent Overstreet out->gen = in->gen;
95460651eeSKent Overstreet
967f4e1d5dSKent Overstreet #define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
977f4e1d5dSKent Overstreet BCH_ALLOC_FIELDS_V1()
988fe826f9SKent Overstreet #undef x
99460651eeSKent Overstreet }
1007f4e1d5dSKent Overstreet
bch2_alloc_unpack_v2(struct bkey_alloc_unpacked * out,struct bkey_s_c k)1017f4e1d5dSKent Overstreet static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out,
1027f4e1d5dSKent Overstreet struct bkey_s_c k)
1037f4e1d5dSKent Overstreet {
1047f4e1d5dSKent Overstreet struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k);
1057f4e1d5dSKent Overstreet const u8 *in = a.v->data;
1067f4e1d5dSKent Overstreet const u8 *end = bkey_val_end(a);
1077f4e1d5dSKent Overstreet unsigned fieldnr = 0;
1087f4e1d5dSKent Overstreet int ret;
1097f4e1d5dSKent Overstreet u64 v;
1107f4e1d5dSKent Overstreet
1117f4e1d5dSKent Overstreet out->gen = a.v->gen;
1127f4e1d5dSKent Overstreet out->oldest_gen = a.v->oldest_gen;
1137f4e1d5dSKent Overstreet out->data_type = a.v->data_type;
1147f4e1d5dSKent Overstreet
1157f4e1d5dSKent Overstreet #define x(_name, _bits) \
1167f4e1d5dSKent Overstreet if (fieldnr < a.v->nr_fields) { \
1178d344587SKent Overstreet ret = bch2_varint_decode_fast(in, end, &v); \
1187f4e1d5dSKent Overstreet if (ret < 0) \
1197f4e1d5dSKent Overstreet return ret; \
1207f4e1d5dSKent Overstreet in += ret; \
1217f4e1d5dSKent Overstreet } else { \
1227f4e1d5dSKent Overstreet v = 0; \
1237f4e1d5dSKent Overstreet } \
1247f4e1d5dSKent Overstreet out->_name = v; \
1257f4e1d5dSKent Overstreet if (v != out->_name) \
1267f4e1d5dSKent Overstreet return -1; \
1277f4e1d5dSKent Overstreet fieldnr++;
1287f4e1d5dSKent Overstreet
1297f4e1d5dSKent Overstreet BCH_ALLOC_FIELDS_V2()
1307f4e1d5dSKent Overstreet #undef x
1317f4e1d5dSKent Overstreet return 0;
1327f4e1d5dSKent Overstreet }
1337f4e1d5dSKent Overstreet
bch2_alloc_unpack_v3(struct bkey_alloc_unpacked * out,struct bkey_s_c k)1343e52c222SKent Overstreet static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
1353e52c222SKent Overstreet struct bkey_s_c k)
1363e52c222SKent Overstreet {
1373e52c222SKent Overstreet struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k);
1383e52c222SKent Overstreet const u8 *in = a.v->data;
1393e52c222SKent Overstreet const u8 *end = bkey_val_end(a);
1403e52c222SKent Overstreet unsigned fieldnr = 0;
1413e52c222SKent Overstreet int ret;
1423e52c222SKent Overstreet u64 v;
1433e52c222SKent Overstreet
1443e52c222SKent Overstreet out->gen = a.v->gen;
1453e52c222SKent Overstreet out->oldest_gen = a.v->oldest_gen;
1463e52c222SKent Overstreet out->data_type = a.v->data_type;
1473d48a7f8SKent Overstreet out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v);
1483d48a7f8SKent Overstreet out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v);
1493e52c222SKent Overstreet out->journal_seq = le64_to_cpu(a.v->journal_seq);
1503e52c222SKent Overstreet
1513e52c222SKent Overstreet #define x(_name, _bits) \
1523e52c222SKent Overstreet if (fieldnr < a.v->nr_fields) { \
1533e52c222SKent Overstreet ret = bch2_varint_decode_fast(in, end, &v); \
1543e52c222SKent Overstreet if (ret < 0) \
1553e52c222SKent Overstreet return ret; \
1563e52c222SKent Overstreet in += ret; \
1573e52c222SKent Overstreet } else { \
1583e52c222SKent Overstreet v = 0; \
1593e52c222SKent Overstreet } \
1603e52c222SKent Overstreet out->_name = v; \
1613e52c222SKent Overstreet if (v != out->_name) \
1623e52c222SKent Overstreet return -1; \
1633e52c222SKent Overstreet fieldnr++;
1643e52c222SKent Overstreet
1653e52c222SKent Overstreet BCH_ALLOC_FIELDS_V2()
1663e52c222SKent Overstreet #undef x
1673e52c222SKent Overstreet return 0;
1683e52c222SKent Overstreet }
1693e52c222SKent Overstreet
bch2_alloc_unpack(struct bkey_s_c k)1703d48a7f8SKent Overstreet static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
1717f4e1d5dSKent Overstreet {
17219a614d2SKent Overstreet struct bkey_alloc_unpacked ret = { .gen = 0 };
1737f4e1d5dSKent Overstreet
1743e52c222SKent Overstreet switch (k.k->type) {
1753e52c222SKent Overstreet case KEY_TYPE_alloc:
1767f4e1d5dSKent Overstreet bch2_alloc_unpack_v1(&ret, k);
1773e52c222SKent Overstreet break;
1783e52c222SKent Overstreet case KEY_TYPE_alloc_v2:
1793e52c222SKent Overstreet bch2_alloc_unpack_v2(&ret, k);
1803e52c222SKent Overstreet break;
1813e52c222SKent Overstreet case KEY_TYPE_alloc_v3:
1823e52c222SKent Overstreet bch2_alloc_unpack_v3(&ret, k);
1833e52c222SKent Overstreet break;
1843e52c222SKent Overstreet }
1857f4e1d5dSKent Overstreet
1868fe826f9SKent Overstreet return ret;
1878fe826f9SKent Overstreet }
1888fe826f9SKent Overstreet
bch_alloc_v1_val_u64s(const struct bch_alloc * a)1896ad060b0SKent Overstreet static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
1907b3f84eaSKent Overstreet {
19190541a74SKent Overstreet unsigned i, bytes = offsetof(struct bch_alloc, data);
1927b3f84eaSKent Overstreet
1937f4e1d5dSKent Overstreet for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++)
19490541a74SKent Overstreet if (a->fields & (1 << i))
1957f4e1d5dSKent Overstreet bytes += BCH_ALLOC_V1_FIELD_BYTES[i];
1967b3f84eaSKent Overstreet
1977b3f84eaSKent Overstreet return DIV_ROUND_UP(bytes, sizeof(u64));
1987b3f84eaSKent Overstreet }
1997b3f84eaSKent Overstreet
bch2_alloc_v1_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)200d97de0d0SKent Overstreet int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k,
201a6f4794fSKent Overstreet struct bkey_validate_context from)
2027b3f84eaSKent Overstreet {
20326609b61SKent Overstreet struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
204b65db750SKent Overstreet int ret = 0;
20526609b61SKent Overstreet
206e8897337SKent Overstreet /* allow for unknown fields */
207d97de0d0SKent Overstreet bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v),
208d97de0d0SKent Overstreet c, alloc_v1_val_size_bad,
209b65db750SKent Overstreet "incorrect value size (%zu < %u)",
210f0ac7df2SKent Overstreet bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
211b65db750SKent Overstreet fsck_err:
212b65db750SKent Overstreet return ret;
2137b3f84eaSKent Overstreet }
2147b3f84eaSKent Overstreet
bch2_alloc_v2_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)215d97de0d0SKent Overstreet int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k,
216a6f4794fSKent Overstreet struct bkey_validate_context from)
2177f4e1d5dSKent Overstreet {
2187f4e1d5dSKent Overstreet struct bkey_alloc_unpacked u;
219b65db750SKent Overstreet int ret = 0;
2207f4e1d5dSKent Overstreet
221d97de0d0SKent Overstreet bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k),
222d97de0d0SKent Overstreet c, alloc_v2_unpack_error,
223b65db750SKent Overstreet "unpack error");
224b65db750SKent Overstreet fsck_err:
225b65db750SKent Overstreet return ret;
226f0ac7df2SKent Overstreet }
227f0ac7df2SKent Overstreet
bch2_alloc_v3_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)228d97de0d0SKent Overstreet int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
229a6f4794fSKent Overstreet struct bkey_validate_context from)
2303e52c222SKent Overstreet {
2313e52c222SKent Overstreet struct bkey_alloc_unpacked u;
232b65db750SKent Overstreet int ret = 0;
2333e52c222SKent Overstreet
234d97de0d0SKent Overstreet bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k),
235bafd41b4SThorsten Blum c, alloc_v3_unpack_error,
236b65db750SKent Overstreet "unpack error");
237b65db750SKent Overstreet fsck_err:
238b65db750SKent Overstreet return ret;
239f0ac7df2SKent Overstreet }
240f0ac7df2SKent Overstreet
bch2_alloc_v4_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)241d97de0d0SKent Overstreet int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
242a6f4794fSKent Overstreet struct bkey_validate_context from)
2437b3f84eaSKent Overstreet {
2448ed823b1SKent Overstreet struct bch_alloc_v4 a;
245b65db750SKent Overstreet int ret = 0;
24611c7d3e8SKent Overstreet
2478ed823b1SKent Overstreet bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k));
2488ed823b1SKent Overstreet
2498ed823b1SKent Overstreet bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k),
250d97de0d0SKent Overstreet c, alloc_v4_val_size_bad,
251b65db750SKent Overstreet "bad val size (%u > %zu)",
2528ed823b1SKent Overstreet alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k));
2533d48a7f8SKent Overstreet
2548ed823b1SKent Overstreet bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) &&
2558ed823b1SKent Overstreet BCH_ALLOC_V4_NR_BACKPOINTERS(&a),
256d97de0d0SKent Overstreet c, alloc_v4_backpointers_start_bad,
257b65db750SKent Overstreet "invalid backpointers_start");
258a8c752bbSKent Overstreet
2598ed823b1SKent Overstreet bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type,
260d97de0d0SKent Overstreet c, alloc_key_data_type_bad,
261b65db750SKent Overstreet "invalid data type (got %u should be %u)",
2628ed823b1SKent Overstreet a.data_type, alloc_data_type(a, a.data_type));
26362491956SKent Overstreet
264cff07e27SKent Overstreet for (unsigned i = 0; i < 2; i++)
2658ed823b1SKent Overstreet bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX,
266d97de0d0SKent Overstreet c, alloc_key_io_time_bad,
267cff07e27SKent Overstreet "invalid io_time[%s]: %llu, max %llu",
268cff07e27SKent Overstreet i == READ ? "read" : "write",
2698ed823b1SKent Overstreet a.io_time[i], LRU_TIME_MAX);
270cff07e27SKent Overstreet
2718ed823b1SKent Overstreet unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) >
27255f7962dSKent Overstreet offsetof(struct bch_alloc_v4, stripe_sectors)
2738ed823b1SKent Overstreet ? a.stripe_sectors
27455f7962dSKent Overstreet : 0;
27555f7962dSKent Overstreet
2768ed823b1SKent Overstreet switch (a.data_type) {
277822835ffSKent Overstreet case BCH_DATA_free:
278822835ffSKent Overstreet case BCH_DATA_need_gc_gens:
279822835ffSKent Overstreet case BCH_DATA_need_discard:
28055f7962dSKent Overstreet bkey_fsck_err_on(stripe_sectors ||
2818ed823b1SKent Overstreet a.dirty_sectors ||
2828ed823b1SKent Overstreet a.cached_sectors ||
2838ed823b1SKent Overstreet a.stripe,
284d97de0d0SKent Overstreet c, alloc_key_empty_but_have_data,
28555f7962dSKent Overstreet "empty data type free but have data %u.%u.%u %u",
28655f7962dSKent Overstreet stripe_sectors,
2878ed823b1SKent Overstreet a.dirty_sectors,
2888ed823b1SKent Overstreet a.cached_sectors,
2898ed823b1SKent Overstreet a.stripe);
290822835ffSKent Overstreet break;
291822835ffSKent Overstreet case BCH_DATA_sb:
292822835ffSKent Overstreet case BCH_DATA_journal:
293822835ffSKent Overstreet case BCH_DATA_btree:
294822835ffSKent Overstreet case BCH_DATA_user:
295822835ffSKent Overstreet case BCH_DATA_parity:
2968ed823b1SKent Overstreet bkey_fsck_err_on(!a.dirty_sectors &&
29755f7962dSKent Overstreet !stripe_sectors,
298d97de0d0SKent Overstreet c, alloc_key_dirty_sectors_0,
299b65db750SKent Overstreet "data_type %s but dirty_sectors==0",
3008ed823b1SKent Overstreet bch2_data_type_str(a.data_type));
301822835ffSKent Overstreet break;
302822835ffSKent Overstreet case BCH_DATA_cached:
3038ed823b1SKent Overstreet bkey_fsck_err_on(!a.cached_sectors ||
3048ed823b1SKent Overstreet a.dirty_sectors ||
30555f7962dSKent Overstreet stripe_sectors ||
3068ed823b1SKent Overstreet a.stripe,
307d97de0d0SKent Overstreet c, alloc_key_cached_inconsistency,
308b65db750SKent Overstreet "data type inconsistency");
309822835ffSKent Overstreet
3108ed823b1SKent Overstreet bkey_fsck_err_on(!a.io_time[READ] &&
311b65db750SKent Overstreet c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
312d97de0d0SKent Overstreet c, alloc_key_cached_but_read_time_zero,
313b65db750SKent Overstreet "cached bucket with read_time == 0");
314822835ffSKent Overstreet break;
315822835ffSKent Overstreet case BCH_DATA_stripe:
316822835ffSKent Overstreet break;
317822835ffSKent Overstreet }
318b65db750SKent Overstreet fsck_err:
319b65db750SKent Overstreet return ret;
3203d48a7f8SKent Overstreet }
3213d48a7f8SKent Overstreet
bch2_alloc_v4_swab(struct bkey_s k)3223d48a7f8SKent Overstreet void bch2_alloc_v4_swab(struct bkey_s k)
3233d48a7f8SKent Overstreet {
3243d48a7f8SKent Overstreet struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
3253d48a7f8SKent Overstreet
3269e779f3fSKent Overstreet a->journal_seq_nonempty = swab64(a->journal_seq_nonempty);
3279e779f3fSKent Overstreet a->journal_seq_empty = swab64(a->journal_seq_empty);
3283d48a7f8SKent Overstreet a->flags = swab32(a->flags);
3293d48a7f8SKent Overstreet a->dirty_sectors = swab32(a->dirty_sectors);
3303d48a7f8SKent Overstreet a->cached_sectors = swab32(a->cached_sectors);
3313d48a7f8SKent Overstreet a->io_time[0] = swab64(a->io_time[0]);
3323d48a7f8SKent Overstreet a->io_time[1] = swab64(a->io_time[1]);
3333d48a7f8SKent Overstreet a->stripe = swab32(a->stripe);
3343d48a7f8SKent Overstreet a->nr_external_backpointers = swab32(a->nr_external_backpointers);
33555f7962dSKent Overstreet a->stripe_sectors = swab32(a->stripe_sectors);
3363d48a7f8SKent Overstreet }
3373d48a7f8SKent Overstreet
bch2_alloc_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)3383d48a7f8SKent Overstreet void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
3393d48a7f8SKent Overstreet {
34019a614d2SKent Overstreet struct bch_alloc_v4 _a;
34119a614d2SKent Overstreet const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
342260af156SKent Overstreet struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL;
3433d48a7f8SKent Overstreet
34419a614d2SKent Overstreet prt_newline(out);
34519a614d2SKent Overstreet printbuf_indent_add(out, 2);
3463d48a7f8SKent Overstreet
347e58f963cSKent Overstreet prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
348e58f963cSKent Overstreet bch2_prt_data_type(out, a->data_type);
34919a614d2SKent Overstreet prt_newline(out);
3509e779f3fSKent Overstreet prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty);
3519e779f3fSKent Overstreet prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty);
3527423330eSKent Overstreet prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a));
3537423330eSKent Overstreet prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a));
3547423330eSKent Overstreet prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors);
35555f7962dSKent Overstreet prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors);
3567423330eSKent Overstreet prt_printf(out, "cached_sectors %u\n", a->cached_sectors);
3577423330eSKent Overstreet prt_printf(out, "stripe %u\n", a->stripe);
3587423330eSKent Overstreet prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy);
3597423330eSKent Overstreet prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]);
3607423330eSKent Overstreet prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]);
361260af156SKent Overstreet
362260af156SKent Overstreet if (ca)
363260af156SKent Overstreet prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca));
3647423330eSKent Overstreet prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a));
365a8c752bbSKent Overstreet printbuf_indent_sub(out, 2);
366260af156SKent Overstreet
367260af156SKent Overstreet bch2_dev_put(ca);
36819a614d2SKent Overstreet }
36919a614d2SKent Overstreet
__bch2_alloc_to_v4(struct bkey_s_c k,struct bch_alloc_v4 * out)37019a614d2SKent Overstreet void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
37119a614d2SKent Overstreet {
37219a614d2SKent Overstreet if (k.k->type == KEY_TYPE_alloc_v4) {
37319a614d2SKent Overstreet void *src, *dst;
37419a614d2SKent Overstreet
37519a614d2SKent Overstreet *out = *bkey_s_c_to_alloc_v4(k).v;
37619a614d2SKent Overstreet
37719a614d2SKent Overstreet src = alloc_v4_backpointers(out);
37819a614d2SKent Overstreet SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
37919a614d2SKent Overstreet dst = alloc_v4_backpointers(out);
38019a614d2SKent Overstreet
38119a614d2SKent Overstreet if (src < dst)
38219a614d2SKent Overstreet memset(src, 0, dst - src);
38362a03559SKent Overstreet
38462a03559SKent Overstreet SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0);
38519a614d2SKent Overstreet } else {
38619a614d2SKent Overstreet struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
38719a614d2SKent Overstreet
38819a614d2SKent Overstreet *out = (struct bch_alloc_v4) {
3899e779f3fSKent Overstreet .journal_seq_nonempty = u.journal_seq,
39019a614d2SKent Overstreet .flags = u.need_discard,
39119a614d2SKent Overstreet .gen = u.gen,
39219a614d2SKent Overstreet .oldest_gen = u.oldest_gen,
39319a614d2SKent Overstreet .data_type = u.data_type,
39419a614d2SKent Overstreet .stripe_redundancy = u.stripe_redundancy,
39519a614d2SKent Overstreet .dirty_sectors = u.dirty_sectors,
39619a614d2SKent Overstreet .cached_sectors = u.cached_sectors,
39719a614d2SKent Overstreet .io_time[READ] = u.read_time,
39819a614d2SKent Overstreet .io_time[WRITE] = u.write_time,
39919a614d2SKent Overstreet .stripe = u.stripe,
40019a614d2SKent Overstreet };
40119a614d2SKent Overstreet
40219a614d2SKent Overstreet SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
40319a614d2SKent Overstreet }
40419a614d2SKent Overstreet }
40519a614d2SKent Overstreet
40619a614d2SKent Overstreet static noinline struct bkey_i_alloc_v4 *
__bch2_alloc_to_v4_mut(struct btree_trans * trans,struct bkey_s_c k)40719a614d2SKent Overstreet __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
40819a614d2SKent Overstreet {
40919a614d2SKent Overstreet struct bkey_i_alloc_v4 *ret;
41019a614d2SKent Overstreet
411615fccadSKent Overstreet ret = bch2_trans_kmalloc(trans, max(bkey_bytes(k.k), sizeof(struct bkey_i_alloc_v4)));
41219a614d2SKent Overstreet if (IS_ERR(ret))
41319a614d2SKent Overstreet return ret;
41419a614d2SKent Overstreet
41562a03559SKent Overstreet if (k.k->type == KEY_TYPE_alloc_v4) {
41662a03559SKent Overstreet void *src, *dst;
41762a03559SKent Overstreet
41819a614d2SKent Overstreet bkey_reassemble(&ret->k_i, k);
41919a614d2SKent Overstreet
42019a614d2SKent Overstreet src = alloc_v4_backpointers(&ret->v);
42119a614d2SKent Overstreet SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
42219a614d2SKent Overstreet dst = alloc_v4_backpointers(&ret->v);
42319a614d2SKent Overstreet
42419a614d2SKent Overstreet if (src < dst)
42519a614d2SKent Overstreet memset(src, 0, dst - src);
42662a03559SKent Overstreet
42762a03559SKent Overstreet SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0);
42819a614d2SKent Overstreet set_alloc_v4_u64s(ret);
42919a614d2SKent Overstreet } else {
43019a614d2SKent Overstreet bkey_alloc_v4_init(&ret->k_i);
43119a614d2SKent Overstreet ret->k.p = k.k->p;
43219a614d2SKent Overstreet bch2_alloc_to_v4(k, &ret->v);
43319a614d2SKent Overstreet }
43419a614d2SKent Overstreet return ret;
43519a614d2SKent Overstreet }
43619a614d2SKent Overstreet
bch2_alloc_to_v4_mut_inlined(struct btree_trans * trans,struct bkey_s_c k)43719a614d2SKent Overstreet static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
43819a614d2SKent Overstreet {
43962a03559SKent Overstreet struct bkey_s_c_alloc_v4 a;
44062a03559SKent Overstreet
44119a614d2SKent Overstreet if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
44262a03559SKent Overstreet ((a = bkey_s_c_to_alloc_v4(k), true) &&
44334dfa5dbSKent Overstreet BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0))
444dbda63bbSKent Overstreet return bch2_bkey_make_mut_noupdate_typed(trans, k, alloc_v4);
44519a614d2SKent Overstreet
44619a614d2SKent Overstreet return __bch2_alloc_to_v4_mut(trans, k);
44719a614d2SKent Overstreet }
44819a614d2SKent Overstreet
bch2_alloc_to_v4_mut(struct btree_trans * trans,struct bkey_s_c k)44919a614d2SKent Overstreet struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
45019a614d2SKent Overstreet {
45119a614d2SKent Overstreet return bch2_alloc_to_v4_mut_inlined(trans, k);
45219a614d2SKent Overstreet }
45319a614d2SKent Overstreet
45419a614d2SKent Overstreet struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update_noupdate(struct btree_trans * trans,struct btree_iter * iter,struct bpos pos)455abe2f470SKent Overstreet bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_iter *iter,
45619a614d2SKent Overstreet struct bpos pos)
45719a614d2SKent Overstreet {
458abe2f470SKent Overstreet struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos,
4595dd8c60eSKent Overstreet BTREE_ITER_with_updates|
4605dd8c60eSKent Overstreet BTREE_ITER_cached|
4615dd8c60eSKent Overstreet BTREE_ITER_intent);
462abe2f470SKent Overstreet int ret = bkey_err(k);
46319a614d2SKent Overstreet if (unlikely(ret))
464bcb79a51SKent Overstreet return ERR_PTR(ret);
46519a614d2SKent Overstreet
466abe2f470SKent Overstreet struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k);
46719a614d2SKent Overstreet ret = PTR_ERR_OR_ZERO(a);
46819a614d2SKent Overstreet if (unlikely(ret))
46919a614d2SKent Overstreet goto err;
47019a614d2SKent Overstreet return a;
47119a614d2SKent Overstreet err:
47219a614d2SKent Overstreet bch2_trans_iter_exit(trans, iter);
47319a614d2SKent Overstreet return ERR_PTR(ret);
4747b3f84eaSKent Overstreet }
4757b3f84eaSKent Overstreet
476abe2f470SKent Overstreet __flatten
bch2_trans_start_alloc_update(struct btree_trans * trans,struct bpos pos,enum btree_iter_update_trigger_flags flags)477e0d5bc6aSKent Overstreet struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos,
478e0d5bc6aSKent Overstreet enum btree_iter_update_trigger_flags flags)
479abe2f470SKent Overstreet {
480abe2f470SKent Overstreet struct btree_iter iter;
481abe2f470SKent Overstreet struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
482abe2f470SKent Overstreet int ret = PTR_ERR_OR_ZERO(a);
483abe2f470SKent Overstreet if (ret)
484abe2f470SKent Overstreet return ERR_PTR(ret);
485abe2f470SKent Overstreet
486e0d5bc6aSKent Overstreet ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
487abe2f470SKent Overstreet bch2_trans_iter_exit(trans, &iter);
488abe2f470SKent Overstreet return unlikely(ret) ? ERR_PTR(ret) : a;
489abe2f470SKent Overstreet }
490abe2f470SKent Overstreet
alloc_gens_pos(struct bpos pos,unsigned * offset)4915250b74dSKent Overstreet static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset)
4925250b74dSKent Overstreet {
4935250b74dSKent Overstreet *offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK;
4945250b74dSKent Overstreet
4955250b74dSKent Overstreet pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS;
4965250b74dSKent Overstreet return pos;
4975250b74dSKent Overstreet }
4985250b74dSKent Overstreet
bucket_gens_pos_to_alloc(struct bpos pos,unsigned offset)4995250b74dSKent Overstreet static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset)
5005250b74dSKent Overstreet {
5015250b74dSKent Overstreet pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS;
5025250b74dSKent Overstreet pos.offset += offset;
5035250b74dSKent Overstreet return pos;
5045250b74dSKent Overstreet }
5055250b74dSKent Overstreet
alloc_gen(struct bkey_s_c k,unsigned offset)5065250b74dSKent Overstreet static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
5075250b74dSKent Overstreet {
5085250b74dSKent Overstreet return k.k->type == KEY_TYPE_bucket_gens
5095250b74dSKent Overstreet ? bkey_s_c_to_bucket_gens(k).v->gens[offset]
5105250b74dSKent Overstreet : 0;
5115250b74dSKent Overstreet }
5125250b74dSKent Overstreet
bch2_bucket_gens_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)513d97de0d0SKent Overstreet int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k,
514a6f4794fSKent Overstreet struct bkey_validate_context from)
5155250b74dSKent Overstreet {
516b65db750SKent Overstreet int ret = 0;
5175250b74dSKent Overstreet
518d97de0d0SKent Overstreet bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens),
519d97de0d0SKent Overstreet c, bucket_gens_val_size_bad,
520b65db750SKent Overstreet "bad val size (%zu != %zu)",
521b65db750SKent Overstreet bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
522b65db750SKent Overstreet fsck_err:
523b65db750SKent Overstreet return ret;
5245250b74dSKent Overstreet }
5255250b74dSKent Overstreet
bch2_bucket_gens_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)5265250b74dSKent Overstreet void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
5275250b74dSKent Overstreet {
5285250b74dSKent Overstreet struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens(k);
5295250b74dSKent Overstreet unsigned i;
5305250b74dSKent Overstreet
5315250b74dSKent Overstreet for (i = 0; i < ARRAY_SIZE(g.v->gens); i++) {
5325250b74dSKent Overstreet if (i)
5335250b74dSKent Overstreet prt_char(out, ' ');
5345250b74dSKent Overstreet prt_printf(out, "%u", g.v->gens[i]);
5355250b74dSKent Overstreet }
5365250b74dSKent Overstreet }
5375250b74dSKent Overstreet
bch2_bucket_gens_init(struct bch_fs * c)5385250b74dSKent Overstreet int bch2_bucket_gens_init(struct bch_fs *c)
5395250b74dSKent Overstreet {
5406bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c);
5415250b74dSKent Overstreet struct bkey_i_bucket_gens g;
5425250b74dSKent Overstreet bool have_bucket_gens_key = false;
5435250b74dSKent Overstreet int ret;
5445250b74dSKent Overstreet
5455028b907SKent Overstreet ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
5465dd8c60eSKent Overstreet BTREE_ITER_prefetch, k, ({
5475250b74dSKent Overstreet /*
5485250b74dSKent Overstreet * Not a fsck error because this is checked/repaired by
5495250b74dSKent Overstreet * bch2_check_alloc_key() which runs later:
5505250b74dSKent Overstreet */
5515250b74dSKent Overstreet if (!bch2_dev_bucket_exists(c, k.k->p))
5525250b74dSKent Overstreet continue;
5535250b74dSKent Overstreet
55480eab7a7SKent Overstreet struct bch_alloc_v4 a;
55580eab7a7SKent Overstreet u8 gen = bch2_alloc_to_v4(k, &a)->gen;
55680eab7a7SKent Overstreet unsigned offset;
55780eab7a7SKent Overstreet struct bpos pos = alloc_gens_pos(iter.pos, &offset);
5581ba6f48fSKent Overstreet int ret2 = 0;
5595250b74dSKent Overstreet
560c2a503f3SKent Overstreet if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) {
5611ba6f48fSKent Overstreet ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
5621ba6f48fSKent Overstreet bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
5631ba6f48fSKent Overstreet if (ret2)
5641ba6f48fSKent Overstreet goto iter_err;
5655250b74dSKent Overstreet have_bucket_gens_key = false;
5665250b74dSKent Overstreet }
5675250b74dSKent Overstreet
5685250b74dSKent Overstreet if (!have_bucket_gens_key) {
5695250b74dSKent Overstreet bkey_bucket_gens_init(&g.k_i);
5705250b74dSKent Overstreet g.k.p = pos;
5715250b74dSKent Overstreet have_bucket_gens_key = true;
5725250b74dSKent Overstreet }
5735250b74dSKent Overstreet
5745250b74dSKent Overstreet g.v.gens[offset] = gen;
5751ba6f48fSKent Overstreet iter_err:
5761ba6f48fSKent Overstreet ret2;
57727b2df98SKent Overstreet }));
5785250b74dSKent Overstreet
5795250b74dSKent Overstreet if (have_bucket_gens_key && !ret)
5806bd68ec2SKent Overstreet ret = commit_do(trans, NULL, NULL,
5813f0e297dSKent Overstreet BCH_TRANS_COMMIT_no_enospc,
5826bd68ec2SKent Overstreet bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
5835250b74dSKent Overstreet
5846bd68ec2SKent Overstreet bch2_trans_put(trans);
5855250b74dSKent Overstreet
5861bb3c2a9SKent Overstreet bch_err_fn(c, ret);
5875250b74dSKent Overstreet return ret;
5885250b74dSKent Overstreet }
5895250b74dSKent Overstreet
bch2_alloc_read(struct bch_fs * c)59010a6ced2SKent Overstreet int bch2_alloc_read(struct bch_fs *c)
5915250b74dSKent Overstreet {
5922dd202dbSKent Overstreet down_read(&c->state_lock);
5932dd202dbSKent Overstreet
5946bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c);
595fa6cce09SKent Overstreet struct bch_dev *ca = NULL;
5965250b74dSKent Overstreet int ret;
5975250b74dSKent Overstreet
59810a6ced2SKent Overstreet if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
5995028b907SKent Overstreet ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
6005dd8c60eSKent Overstreet BTREE_ITER_prefetch, k, ({
6015250b74dSKent Overstreet u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
6025250b74dSKent Overstreet u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
6035250b74dSKent Overstreet
6045250b74dSKent Overstreet if (k.k->type != KEY_TYPE_bucket_gens)
6055250b74dSKent Overstreet continue;
6065250b74dSKent Overstreet
607fa6cce09SKent Overstreet ca = bch2_dev_iterate(c, ca, k.k->p.inode);
6085250b74dSKent Overstreet /*
6095250b74dSKent Overstreet * Not a fsck error because this is checked/repaired by
6105250b74dSKent Overstreet * bch2_check_alloc_key() which runs later:
6115250b74dSKent Overstreet */
612fa6cce09SKent Overstreet if (!ca) {
6139180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
6145250b74dSKent Overstreet continue;
615fa6cce09SKent Overstreet }
6165250b74dSKent Overstreet
617fa6cce09SKent Overstreet const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v;
6185250b74dSKent Overstreet
61980eab7a7SKent Overstreet for (u64 b = max_t(u64, ca->mi.first_bucket, start);
6205250b74dSKent Overstreet b < min_t(u64, ca->mi.nbuckets, end);
6215250b74dSKent Overstreet b++)
6225250b74dSKent Overstreet *bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
62327b2df98SKent Overstreet 0;
62427b2df98SKent Overstreet }));
62510a6ced2SKent Overstreet } else {
6265028b907SKent Overstreet ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
6275dd8c60eSKent Overstreet BTREE_ITER_prefetch, k, ({
628fa6cce09SKent Overstreet ca = bch2_dev_iterate(c, ca, k.k->p.inode);
62910a6ced2SKent Overstreet /*
63010a6ced2SKent Overstreet * Not a fsck error because this is checked/repaired by
63110a6ced2SKent Overstreet * bch2_check_alloc_key() which runs later:
63210a6ced2SKent Overstreet */
633fa6cce09SKent Overstreet if (!ca) {
6349180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
63510a6ced2SKent Overstreet continue;
636fa6cce09SKent Overstreet }
63710a6ced2SKent Overstreet
638a319aeaeSKent Overstreet if (k.k->p.offset < ca->mi.first_bucket) {
6399180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket));
640a319aeaeSKent Overstreet continue;
641a319aeaeSKent Overstreet }
642a319aeaeSKent Overstreet
643a319aeaeSKent Overstreet if (k.k->p.offset >= ca->mi.nbuckets) {
6449180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
645a319aeaeSKent Overstreet continue;
646a319aeaeSKent Overstreet }
647a319aeaeSKent Overstreet
64880eab7a7SKent Overstreet struct bch_alloc_v4 a;
64910a6ced2SKent Overstreet *bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
65027b2df98SKent Overstreet 0;
65127b2df98SKent Overstreet }));
65210a6ced2SKent Overstreet }
6535250b74dSKent Overstreet
654fa6cce09SKent Overstreet bch2_dev_put(ca);
6556bd68ec2SKent Overstreet bch2_trans_put(trans);
6565250b74dSKent Overstreet
6572dd202dbSKent Overstreet up_read(&c->state_lock);
6581bb3c2a9SKent Overstreet bch_err_fn(c, ret);
6595250b74dSKent Overstreet return ret;
6605250b74dSKent Overstreet }
6615250b74dSKent Overstreet
662c6b2826cSKent Overstreet /* Free space/discard btree: */
663c6b2826cSKent Overstreet
__need_discard_or_freespace_err(struct btree_trans * trans,struct bkey_s_c alloc_k,bool set,bool discard,bool repair)664c8e58813SKent Overstreet static int __need_discard_or_freespace_err(struct btree_trans *trans,
665c8e58813SKent Overstreet struct bkey_s_c alloc_k,
666c8e58813SKent Overstreet bool set, bool discard, bool repair)
667c8e58813SKent Overstreet {
668c8e58813SKent Overstreet struct bch_fs *c = trans->c;
669c8e58813SKent Overstreet enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0);
670c8e58813SKent Overstreet enum bch_sb_error_id err_id = discard
671c8e58813SKent Overstreet ? BCH_FSCK_ERR_need_discard_key_wrong
672c8e58813SKent Overstreet : BCH_FSCK_ERR_freespace_key_wrong;
673c8e58813SKent Overstreet enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace;
674c8e58813SKent Overstreet struct printbuf buf = PRINTBUF;
675c8e58813SKent Overstreet
676c8e58813SKent Overstreet bch2_bkey_val_to_text(&buf, c, alloc_k);
677c8e58813SKent Overstreet
678c8e58813SKent Overstreet int ret = __bch2_fsck_err(NULL, trans, flags, err_id,
6791ece5323SKent Overstreet "bucket incorrectly %sset in %s btree\n%s",
680c8e58813SKent Overstreet set ? "" : "un",
681c8e58813SKent Overstreet bch2_btree_id_str(btree),
682c8e58813SKent Overstreet buf.buf);
683052210c3SKent Overstreet if (ret == -BCH_ERR_fsck_ignore ||
684052210c3SKent Overstreet ret == -BCH_ERR_fsck_errors_not_fixed)
685052210c3SKent Overstreet ret = 0;
686052210c3SKent Overstreet
687c8e58813SKent Overstreet printbuf_exit(&buf);
688c8e58813SKent Overstreet return ret;
689c8e58813SKent Overstreet }
690c8e58813SKent Overstreet
691c8e58813SKent Overstreet #define need_discard_or_freespace_err(...) \
692c8e58813SKent Overstreet fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__))
693c8e58813SKent Overstreet
694c8e58813SKent Overstreet #define need_discard_or_freespace_err_on(cond, ...) \
695c8e58813SKent Overstreet (unlikely(cond) ? need_discard_or_freespace_err(__VA_ARGS__) : false)
696c8e58813SKent Overstreet
bch2_bucket_do_index(struct btree_trans * trans,struct bch_dev * ca,struct bkey_s_c alloc_k,const struct bch_alloc_v4 * a,bool set)697c6b2826cSKent Overstreet static int bch2_bucket_do_index(struct btree_trans *trans,
698267039d0SKent Overstreet struct bch_dev *ca,
699c6b2826cSKent Overstreet struct bkey_s_c alloc_k,
700822835ffSKent Overstreet const struct bch_alloc_v4 *a,
701c6b2826cSKent Overstreet bool set)
702c6b2826cSKent Overstreet {
703c6b2826cSKent Overstreet enum btree_id btree;
7047d1918b0SKent Overstreet struct bpos pos;
705c6b2826cSKent Overstreet
706822835ffSKent Overstreet if (a->data_type != BCH_DATA_free &&
707822835ffSKent Overstreet a->data_type != BCH_DATA_need_discard)
708c6b2826cSKent Overstreet return 0;
709c6b2826cSKent Overstreet
710822835ffSKent Overstreet switch (a->data_type) {
711822835ffSKent Overstreet case BCH_DATA_free:
712c6b2826cSKent Overstreet btree = BTREE_ID_freespace;
7137d1918b0SKent Overstreet pos = alloc_freespace_pos(alloc_k.k->p, *a);
714c6b2826cSKent Overstreet break;
715822835ffSKent Overstreet case BCH_DATA_need_discard:
716c6b2826cSKent Overstreet btree = BTREE_ID_need_discard;
7177d1918b0SKent Overstreet pos = alloc_k.k->p;
718c6b2826cSKent Overstreet break;
719c6b2826cSKent Overstreet default:
720c6b2826cSKent Overstreet return 0;
721c6b2826cSKent Overstreet }
722c6b2826cSKent Overstreet
7237d1918b0SKent Overstreet struct btree_iter iter;
7247d1918b0SKent Overstreet struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent);
7257d1918b0SKent Overstreet int ret = bkey_err(old);
726c6b2826cSKent Overstreet if (ret)
727bcb79a51SKent Overstreet return ret;
728c6b2826cSKent Overstreet
729c8e58813SKent Overstreet need_discard_or_freespace_err_on(ca->mi.freespace_initialized &&
730c8e58813SKent Overstreet !old.k->type != set,
731c8e58813SKent Overstreet trans, alloc_k, set,
732c8e58813SKent Overstreet btree == BTREE_ID_need_discard, false);
733c6b2826cSKent Overstreet
7347d1918b0SKent Overstreet ret = bch2_btree_bit_mod_iter(trans, &iter, set);
735c8e58813SKent Overstreet fsck_err:
736c6b2826cSKent Overstreet bch2_trans_iter_exit(trans, &iter);
737c6b2826cSKent Overstreet return ret;
738c6b2826cSKent Overstreet }
739c6b2826cSKent Overstreet
bch2_bucket_gen_update(struct btree_trans * trans,struct bpos bucket,u8 gen)7405250b74dSKent Overstreet static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
7415250b74dSKent Overstreet struct bpos bucket, u8 gen)
7425250b74dSKent Overstreet {
7435250b74dSKent Overstreet struct btree_iter iter;
7445250b74dSKent Overstreet unsigned offset;
7455250b74dSKent Overstreet struct bpos pos = alloc_gens_pos(bucket, &offset);
7465250b74dSKent Overstreet struct bkey_i_bucket_gens *g;
7475250b74dSKent Overstreet struct bkey_s_c k;
7485250b74dSKent Overstreet int ret;
7495250b74dSKent Overstreet
7505250b74dSKent Overstreet g = bch2_trans_kmalloc(trans, sizeof(*g));
7515250b74dSKent Overstreet ret = PTR_ERR_OR_ZERO(g);
7525250b74dSKent Overstreet if (ret)
7535250b74dSKent Overstreet return ret;
7545250b74dSKent Overstreet
755bcb79a51SKent Overstreet k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos,
7565dd8c60eSKent Overstreet BTREE_ITER_intent|
7575dd8c60eSKent Overstreet BTREE_ITER_with_updates);
7585250b74dSKent Overstreet ret = bkey_err(k);
7595250b74dSKent Overstreet if (ret)
760bcb79a51SKent Overstreet return ret;
7615250b74dSKent Overstreet
7625250b74dSKent Overstreet if (k.k->type != KEY_TYPE_bucket_gens) {
7635250b74dSKent Overstreet bkey_bucket_gens_init(&g->k_i);
7645250b74dSKent Overstreet g->k.p = iter.pos;
7655250b74dSKent Overstreet } else {
7665250b74dSKent Overstreet bkey_reassemble(&g->k_i, k);
7675250b74dSKent Overstreet }
7685250b74dSKent Overstreet
7695250b74dSKent Overstreet g->v.gens[offset] = gen;
7705250b74dSKent Overstreet
7715250b74dSKent Overstreet ret = bch2_trans_update(trans, &iter, &g->k_i, 0);
7725250b74dSKent Overstreet bch2_trans_iter_exit(trans, &iter);
7735250b74dSKent Overstreet return ret;
7745250b74dSKent Overstreet }
7755250b74dSKent Overstreet
bch2_dev_data_type_accounting_mod(struct btree_trans * trans,struct bch_dev * ca,enum bch_data_type data_type,s64 delta_buckets,s64 delta_sectors,s64 delta_fragmented,unsigned flags)7761d16c605SKent Overstreet static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, struct bch_dev *ca,
7771d16c605SKent Overstreet enum bch_data_type data_type,
7781d16c605SKent Overstreet s64 delta_buckets,
7791d16c605SKent Overstreet s64 delta_sectors,
7801d16c605SKent Overstreet s64 delta_fragmented, unsigned flags)
7811d16c605SKent Overstreet {
7821d16c605SKent Overstreet s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
7831d16c605SKent Overstreet
784f4a584f4SKent Overstreet return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
785f4a584f4SKent Overstreet d, dev_data_type,
786f4a584f4SKent Overstreet .dev = ca->dev_idx,
787f4a584f4SKent Overstreet .data_type = data_type);
7881d16c605SKent Overstreet }
7891d16c605SKent Overstreet
bch2_alloc_key_to_dev_counters(struct btree_trans * trans,struct bch_dev * ca,const struct bch_alloc_v4 * old,const struct bch_alloc_v4 * new,unsigned flags)7901d16c605SKent Overstreet int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
7911d16c605SKent Overstreet const struct bch_alloc_v4 *old,
7921d16c605SKent Overstreet const struct bch_alloc_v4 *new,
7931d16c605SKent Overstreet unsigned flags)
7941d16c605SKent Overstreet {
7951d16c605SKent Overstreet s64 old_sectors = bch2_bucket_sectors(*old);
7961d16c605SKent Overstreet s64 new_sectors = bch2_bucket_sectors(*new);
7971d16c605SKent Overstreet if (old->data_type != new->data_type) {
7981d16c605SKent Overstreet int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
7991d16c605SKent Overstreet 1, new_sectors, bch2_bucket_sectors_fragmented(ca, *new), flags) ?:
8001d16c605SKent Overstreet bch2_dev_data_type_accounting_mod(trans, ca, old->data_type,
8011d16c605SKent Overstreet -1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags);
8021d16c605SKent Overstreet if (ret)
8031d16c605SKent Overstreet return ret;
8041d16c605SKent Overstreet } else if (old_sectors != new_sectors) {
8051d16c605SKent Overstreet int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
8061d16c605SKent Overstreet 0,
8071d16c605SKent Overstreet new_sectors - old_sectors,
8081d16c605SKent Overstreet bch2_bucket_sectors_fragmented(ca, *new) -
8091d16c605SKent Overstreet bch2_bucket_sectors_fragmented(ca, *old), flags);
8101d16c605SKent Overstreet if (ret)
8111d16c605SKent Overstreet return ret;
8121d16c605SKent Overstreet }
8131d16c605SKent Overstreet
8141d16c605SKent Overstreet s64 old_unstriped = bch2_bucket_sectors_unstriped(*old);
8151d16c605SKent Overstreet s64 new_unstriped = bch2_bucket_sectors_unstriped(*new);
8161d16c605SKent Overstreet if (old_unstriped != new_unstriped) {
8171d16c605SKent Overstreet int ret = bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped,
8181d16c605SKent Overstreet !!new_unstriped - !!old_unstriped,
8191d16c605SKent Overstreet new_unstriped - old_unstriped,
8201d16c605SKent Overstreet 0,
8211d16c605SKent Overstreet flags);
8221d16c605SKent Overstreet if (ret)
8231d16c605SKent Overstreet return ret;
8241d16c605SKent Overstreet }
8251d16c605SKent Overstreet
8261d16c605SKent Overstreet return 0;
8271d16c605SKent Overstreet }
8281d16c605SKent Overstreet
bch2_trigger_alloc(struct btree_trans * trans,enum btree_id btree,unsigned level,struct bkey_s_c old,struct bkey_s new,enum btree_iter_update_trigger_flags flags)829153d1c63SKent Overstreet int bch2_trigger_alloc(struct btree_trans *trans,
830153d1c63SKent Overstreet enum btree_id btree, unsigned level,
831717296c3SKent Overstreet struct bkey_s_c old, struct bkey_s new,
8322d288745SNathan Chancellor enum btree_iter_update_trigger_flags flags)
833c6b2826cSKent Overstreet {
834c6b2826cSKent Overstreet struct bch_fs *c = trans->c;
8359432e90dSKent Overstreet struct printbuf buf = PRINTBUF;
836c6b2826cSKent Overstreet int ret = 0;
837c6b2826cSKent Overstreet
838a7f1c26fSKent Overstreet struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
839a7f1c26fSKent Overstreet if (!ca)
8408a9f3d05SKent Overstreet return -BCH_ERR_trigger_alloc;
841c6b2826cSKent Overstreet
842153d1c63SKent Overstreet struct bch_alloc_v4 old_a_convert;
843153d1c63SKent Overstreet const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
844bd864bc2SKent Overstreet
845bd864bc2SKent Overstreet struct bch_alloc_v4 *new_a;
846bd864bc2SKent Overstreet if (likely(new.k->type == KEY_TYPE_alloc_v4)) {
847bd864bc2SKent Overstreet new_a = bkey_s_to_alloc_v4(new).v;
848bd864bc2SKent Overstreet } else {
849e150a7e8SKent Overstreet BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair)));
850bd864bc2SKent Overstreet
851bd864bc2SKent Overstreet struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
852bd864bc2SKent Overstreet ret = PTR_ERR_OR_ZERO(new_ka);
853bd864bc2SKent Overstreet if (unlikely(ret))
854bd864bc2SKent Overstreet goto err;
855bd864bc2SKent Overstreet new_a = &new_ka->v;
856bd864bc2SKent Overstreet }
857c6b2826cSKent Overstreet
85838ad9dc8SKent Overstreet if (flags & BTREE_TRIGGER_transactional) {
859fa9bb741SKent Overstreet alloc_data_type_set(new_a, new_a->data_type);
860822835ffSKent Overstreet
861be565740SKent Overstreet int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
862be565740SKent Overstreet (int) data_type_is_empty(old_a->data_type);
863be565740SKent Overstreet
864be565740SKent Overstreet if (is_empty_delta < 0) {
865cff07e27SKent Overstreet new_a->io_time[READ] = bch2_current_io_time(c, READ);
866cff07e27SKent Overstreet new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
867c6b2826cSKent Overstreet SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
86859cc38b8SKent Overstreet SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
869c6b2826cSKent Overstreet }
870c6b2826cSKent Overstreet
871822835ffSKent Overstreet if (data_type_is_empty(new_a->data_type) &&
872822835ffSKent Overstreet BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
873717296c3SKent Overstreet !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
87469bd8a92SKent Overstreet if (new_a->oldest_gen == new_a->gen &&
87569bd8a92SKent Overstreet !bch2_bucket_sectors_total(*new_a))
87669bd8a92SKent Overstreet new_a->oldest_gen++;
877c6b2826cSKent Overstreet new_a->gen++;
878c6b2826cSKent Overstreet SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
879f648b6c1SKent Overstreet alloc_data_type_set(new_a, new_a->data_type);
880c6b2826cSKent Overstreet }
881c6b2826cSKent Overstreet
88219a614d2SKent Overstreet if (old_a->data_type != new_a->data_type ||
883822835ffSKent Overstreet (new_a->data_type == BCH_DATA_free &&
88419a614d2SKent Overstreet alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
885267039d0SKent Overstreet ret = bch2_bucket_do_index(trans, ca, old, old_a, false) ?:
886267039d0SKent Overstreet bch2_bucket_do_index(trans, ca, new.s_c, new_a, true);
887c6b2826cSKent Overstreet if (ret)
888a7f1c26fSKent Overstreet goto err;
889c6b2826cSKent Overstreet }
890c6b2826cSKent Overstreet
8917003589dSKent Overstreet if (new_a->data_type == BCH_DATA_cached &&
8927003589dSKent Overstreet !new_a->io_time[READ])
893cff07e27SKent Overstreet new_a->io_time[READ] = bch2_current_io_time(c, READ);
8947003589dSKent Overstreet
895717296c3SKent Overstreet ret = bch2_lru_change(trans, new.k->p.inode,
896717296c3SKent Overstreet bucket_to_u64(new.k->p),
897e1304967SKent Overstreet alloc_lru_idx_read(*old_a),
898e1304967SKent Overstreet alloc_lru_idx_read(*new_a));
899c6b2826cSKent Overstreet if (ret)
900a7f1c26fSKent Overstreet goto err;
901c6b2826cSKent Overstreet
90280c33085SKent Overstreet ret = bch2_lru_change(trans,
903b8e37c16SKent Overstreet BCH_LRU_BUCKET_FRAGMENTATION,
904717296c3SKent Overstreet bucket_to_u64(new.k->p),
905e1304967SKent Overstreet alloc_lru_idx_fragmentation(*old_a, ca),
906e1304967SKent Overstreet alloc_lru_idx_fragmentation(*new_a, ca));
90780c33085SKent Overstreet if (ret)
908a7f1c26fSKent Overstreet goto err;
90980c33085SKent Overstreet
9105250b74dSKent Overstreet if (old_a->gen != new_a->gen) {
911717296c3SKent Overstreet ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
9125250b74dSKent Overstreet if (ret)
913a7f1c26fSKent Overstreet goto err;
9145250b74dSKent Overstreet }
9155250b74dSKent Overstreet
9165dd8c60eSKent Overstreet if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
91725f64e99SKent Overstreet old_a->cached_sectors) {
9181d16c605SKent Overstreet ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
919fb23d57aSKent Overstreet -((s64) old_a->cached_sectors),
920fb23d57aSKent Overstreet flags & BTREE_TRIGGER_gc);
92125f64e99SKent Overstreet if (ret)
922a7f1c26fSKent Overstreet goto err;
92325f64e99SKent Overstreet }
9241d16c605SKent Overstreet
9251d16c605SKent Overstreet ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags);
9261d16c605SKent Overstreet if (ret)
9271d16c605SKent Overstreet goto err;
928c6b2826cSKent Overstreet }
929c6b2826cSKent Overstreet
9305dd8c60eSKent Overstreet if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
9310eafe758SKent Overstreet u64 transaction_seq = trans->journal_res.seq;
9329e779f3fSKent Overstreet BUG_ON(!transaction_seq);
9336820ac2cSKent Overstreet
9349e779f3fSKent Overstreet if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq,
9350eafe758SKent Overstreet trans, alloc_key_journal_seq_in_future,
9360eafe758SKent Overstreet "bucket journal seq in future (currently at %llu)\n%s",
9370eafe758SKent Overstreet journal_cur_seq(&c->journal),
9380eafe758SKent Overstreet (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)))
9399e779f3fSKent Overstreet new_a->journal_seq_nonempty = transaction_seq;
9400eafe758SKent Overstreet
9410eafe758SKent Overstreet int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
9420eafe758SKent Overstreet (int) data_type_is_empty(old_a->data_type);
9430eafe758SKent Overstreet
9449e779f3fSKent Overstreet /*
9459e779f3fSKent Overstreet * Record journal sequence number of empty -> nonempty transition:
9469e779f3fSKent Overstreet * Note that there may be multiple empty -> nonempty
9479e779f3fSKent Overstreet * transitions, data in a bucket may be overwritten while we're
9489e779f3fSKent Overstreet * still writing to it - so be careful to only record the first:
9499e779f3fSKent Overstreet * */
9509e779f3fSKent Overstreet if (is_empty_delta < 0 &&
9519e779f3fSKent Overstreet new_a->journal_seq_empty <= c->journal.flushed_seq_ondisk) {
9529e779f3fSKent Overstreet new_a->journal_seq_nonempty = transaction_seq;
9539e779f3fSKent Overstreet new_a->journal_seq_empty = 0;
9549e779f3fSKent Overstreet }
9556820ac2cSKent Overstreet
9566820ac2cSKent Overstreet /*
9570eafe758SKent Overstreet * Bucket becomes empty: mark it as waiting for a journal flush,
9580eafe758SKent Overstreet * unless updates since empty -> nonempty transition were never
9590eafe758SKent Overstreet * flushed - we may need to ask the journal not to flush
9600eafe758SKent Overstreet * intermediate sequence numbers:
9616820ac2cSKent Overstreet */
9620eafe758SKent Overstreet if (is_empty_delta > 0) {
9639e779f3fSKent Overstreet if (new_a->journal_seq_nonempty == transaction_seq ||
96489e74eccSKent Overstreet bch2_journal_noflush_seq(&c->journal,
9659e779f3fSKent Overstreet new_a->journal_seq_nonempty,
9669e779f3fSKent Overstreet transaction_seq)) {
9679e779f3fSKent Overstreet new_a->journal_seq_nonempty = new_a->journal_seq_empty = 0;
9689e779f3fSKent Overstreet } else {
9699e779f3fSKent Overstreet new_a->journal_seq_empty = transaction_seq;
9706820ac2cSKent Overstreet
9716820ac2cSKent Overstreet ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
9726820ac2cSKent Overstreet c->journal.flushed_seq_ondisk,
9736820ac2cSKent Overstreet new.k->p.inode, new.k->p.offset,
9740eafe758SKent Overstreet transaction_seq);
97538ad9dc8SKent Overstreet if (bch2_fs_fatal_err_on(ret, c,
9769e779f3fSKent Overstreet "setting bucket_needs_journal_commit: %s",
9779e779f3fSKent Overstreet bch2_err_str(ret)))
978a7f1c26fSKent Overstreet goto err;
9796820ac2cSKent Overstreet }
9800eafe758SKent Overstreet }
9816820ac2cSKent Overstreet
9829432e90dSKent Overstreet if (new_a->gen != old_a->gen) {
9831d16c605SKent Overstreet rcu_read_lock();
9849432e90dSKent Overstreet u8 *gen = bucket_gen(ca, new.k->p.offset);
9859432e90dSKent Overstreet if (unlikely(!gen)) {
9861d16c605SKent Overstreet rcu_read_unlock();
9879432e90dSKent Overstreet goto invalid_bucket;
9889432e90dSKent Overstreet }
9899432e90dSKent Overstreet *gen = new_a->gen;
9901d16c605SKent Overstreet rcu_read_unlock();
9919432e90dSKent Overstreet }
9926820ac2cSKent Overstreet
9936e9d0558SKent Overstreet #define eval_state(_a, expr) ({ const struct bch_alloc_v4 *a = _a; expr; })
9946e9d0558SKent Overstreet #define statechange(expr) !eval_state(old_a, expr) && eval_state(new_a, expr)
9959e779f3fSKent Overstreet #define bucket_flushed(a) (a->journal_seq_empty <= c->journal.flushed_seq_ondisk)
9966e9d0558SKent Overstreet
997a393f331SKent Overstreet if (statechange(a->data_type == BCH_DATA_free) &&
998a393f331SKent Overstreet bucket_flushed(new_a))
9996820ac2cSKent Overstreet closure_wake_up(&c->freelist_wait);
10006820ac2cSKent Overstreet
1001a393f331SKent Overstreet if (statechange(a->data_type == BCH_DATA_need_discard) &&
10023727ca56SKent Overstreet !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
1003a393f331SKent Overstreet bucket_flushed(new_a))
100464ee1431SKent Overstreet bch2_discard_one_bucket_fast(ca, new.k->p.offset);
10056820ac2cSKent Overstreet
10066e9d0558SKent Overstreet if (statechange(a->data_type == BCH_DATA_cached) &&
10076e9d0558SKent Overstreet !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
10086820ac2cSKent Overstreet should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
100964ee1431SKent Overstreet bch2_dev_do_invalidates(ca);
10106820ac2cSKent Overstreet
10116e9d0558SKent Overstreet if (statechange(a->data_type == BCH_DATA_need_gc_gens))
101210330402SKent Overstreet bch2_gc_gens_async(c);
1013153d1c63SKent Overstreet }
101438ad9dc8SKent Overstreet
101538ad9dc8SKent Overstreet if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) {
101638ad9dc8SKent Overstreet rcu_read_lock();
101738ad9dc8SKent Overstreet struct bucket *g = gc_bucket(ca, new.k->p.offset);
101838ad9dc8SKent Overstreet if (unlikely(!g)) {
101938ad9dc8SKent Overstreet rcu_read_unlock();
102038ad9dc8SKent Overstreet goto invalid_bucket;
102138ad9dc8SKent Overstreet }
102238ad9dc8SKent Overstreet g->gen_valid = 1;
102338ad9dc8SKent Overstreet g->gen = new_a->gen;
102438ad9dc8SKent Overstreet rcu_read_unlock();
102538ad9dc8SKent Overstreet }
1026a7f1c26fSKent Overstreet err:
10270eafe758SKent Overstreet fsck_err:
10289432e90dSKent Overstreet printbuf_exit(&buf);
1029a7f1c26fSKent Overstreet bch2_dev_put(ca);
1030a7f1c26fSKent Overstreet return ret;
10319432e90dSKent Overstreet invalid_bucket:
10329432e90dSKent Overstreet bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
10339432e90dSKent Overstreet (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
10348a9f3d05SKent Overstreet ret = -BCH_ERR_trigger_alloc;
10359432e90dSKent Overstreet goto err;
10366820ac2cSKent Overstreet }
10376820ac2cSKent Overstreet
1038d23124c7SKent Overstreet /*
10395dd8c60eSKent Overstreet * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for
1040d23124c7SKent Overstreet * extents style btrees, but works on non-extents btrees:
1041d23124c7SKent Overstreet */
bch2_get_key_or_hole(struct btree_trans * trans,struct btree_iter * iter,struct bpos end,struct bkey * hole)10429180ad2eSKent Overstreet static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter,
10439180ad2eSKent Overstreet struct bpos end, struct bkey *hole)
1044d23124c7SKent Overstreet {
10459180ad2eSKent Overstreet struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
1046d23124c7SKent Overstreet
1047d23124c7SKent Overstreet if (bkey_err(k))
1048d23124c7SKent Overstreet return k;
1049d23124c7SKent Overstreet
1050d23124c7SKent Overstreet if (k.k->type) {
1051d23124c7SKent Overstreet return k;
1052d23124c7SKent Overstreet } else {
1053d23124c7SKent Overstreet struct btree_iter iter2;
1054d23124c7SKent Overstreet struct bpos next;
1055d23124c7SKent Overstreet
10569180ad2eSKent Overstreet bch2_trans_copy_iter(trans, &iter2, iter);
10571546cf97SKent Overstreet
10589180ad2eSKent Overstreet struct btree_path *path = btree_iter_path(trans, iter);
105907f383c7SKent Overstreet if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX))
106007f383c7SKent Overstreet end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p));
10611546cf97SKent Overstreet
10621546cf97SKent Overstreet end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1));
10631546cf97SKent Overstreet
10641546cf97SKent Overstreet /*
10651546cf97SKent Overstreet * btree node min/max is a closed interval, upto takes a half
10661546cf97SKent Overstreet * open interval:
10671546cf97SKent Overstreet */
10689180ad2eSKent Overstreet k = bch2_btree_iter_peek_max(trans, &iter2, end);
1069d23124c7SKent Overstreet next = iter2.pos;
10709180ad2eSKent Overstreet bch2_trans_iter_exit(trans, &iter2);
1071d23124c7SKent Overstreet
1072d23124c7SKent Overstreet BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
1073d23124c7SKent Overstreet
1074d23124c7SKent Overstreet if (bkey_err(k))
1075d23124c7SKent Overstreet return k;
1076d23124c7SKent Overstreet
1077d23124c7SKent Overstreet bkey_init(hole);
1078d23124c7SKent Overstreet hole->p = iter->pos;
1079d23124c7SKent Overstreet
1080d23124c7SKent Overstreet bch2_key_resize(hole, next.offset - iter->pos.offset);
1081d23124c7SKent Overstreet return (struct bkey_s_c) { hole, NULL };
1082d23124c7SKent Overstreet }
1083d23124c7SKent Overstreet }
1084d23124c7SKent Overstreet
next_bucket(struct bch_fs * c,struct bch_dev ** ca,struct bpos * bucket)1085bc3204c8SKent Overstreet static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *bucket)
1086d23124c7SKent Overstreet {
1087bc3204c8SKent Overstreet if (*ca) {
1088bc3204c8SKent Overstreet if (bucket->offset < (*ca)->mi.first_bucket)
1089bc3204c8SKent Overstreet bucket->offset = (*ca)->mi.first_bucket;
1090d23124c7SKent Overstreet
1091bc3204c8SKent Overstreet if (bucket->offset < (*ca)->mi.nbuckets)
1092d23124c7SKent Overstreet return true;
1093d23124c7SKent Overstreet
1094bc3204c8SKent Overstreet bch2_dev_put(*ca);
1095bc3204c8SKent Overstreet *ca = NULL;
1096d23124c7SKent Overstreet bucket->inode++;
1097d23124c7SKent Overstreet bucket->offset = 0;
1098d23124c7SKent Overstreet }
1099d23124c7SKent Overstreet
1100d23124c7SKent Overstreet rcu_read_lock();
1101bc3204c8SKent Overstreet *ca = __bch2_next_dev_idx(c, bucket->inode, NULL);
1102bc3204c8SKent Overstreet if (*ca) {
1103bc3204c8SKent Overstreet *bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket);
1104bc3204c8SKent Overstreet bch2_dev_get(*ca);
1105bc3204c8SKent Overstreet }
1106d23124c7SKent Overstreet rcu_read_unlock();
1107d23124c7SKent Overstreet
1108bc3204c8SKent Overstreet return *ca != NULL;
1109d23124c7SKent Overstreet }
1110d23124c7SKent Overstreet
bch2_get_key_or_real_bucket_hole(struct btree_trans * trans,struct btree_iter * iter,struct bch_dev ** ca,struct bkey * hole)11119180ad2eSKent Overstreet static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans,
11129180ad2eSKent Overstreet struct btree_iter *iter,
1113bc3204c8SKent Overstreet struct bch_dev **ca, struct bkey *hole)
1114d23124c7SKent Overstreet {
11159180ad2eSKent Overstreet struct bch_fs *c = trans->c;
1116d23124c7SKent Overstreet struct bkey_s_c k;
1117d23124c7SKent Overstreet again:
11189180ad2eSKent Overstreet k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole);
1119d23124c7SKent Overstreet if (bkey_err(k))
1120d23124c7SKent Overstreet return k;
1121d23124c7SKent Overstreet
1122bc3204c8SKent Overstreet *ca = bch2_dev_iterate_noerror(c, *ca, k.k->p.inode);
1123d23124c7SKent Overstreet
1124bc3204c8SKent Overstreet if (!k.k->type) {
1125bc3204c8SKent Overstreet struct bpos hole_start = bkey_start_pos(k.k);
1126bc3204c8SKent Overstreet
1127bc3204c8SKent Overstreet if (!*ca || !bucket_valid(*ca, hole_start.offset)) {
1128bc3204c8SKent Overstreet if (!next_bucket(c, ca, &hole_start))
1129d23124c7SKent Overstreet return bkey_s_c_null;
1130d23124c7SKent Overstreet
11319180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, iter, hole_start);
1132d23124c7SKent Overstreet goto again;
1133d23124c7SKent Overstreet }
1134d23124c7SKent Overstreet
1135bc3204c8SKent Overstreet if (k.k->p.offset > (*ca)->mi.nbuckets)
1136bc3204c8SKent Overstreet bch2_key_resize(hole, (*ca)->mi.nbuckets - hole_start.offset);
1137d23124c7SKent Overstreet }
1138d23124c7SKent Overstreet
1139d23124c7SKent Overstreet return k;
1140d23124c7SKent Overstreet }
1141d23124c7SKent Overstreet
1142298ac24eSKent Overstreet static noinline_for_stack
bch2_check_alloc_key(struct btree_trans * trans,struct bkey_s_c alloc_k,struct btree_iter * alloc_iter,struct btree_iter * discard_iter,struct btree_iter * freespace_iter,struct btree_iter * bucket_gens_iter)1143298ac24eSKent Overstreet int bch2_check_alloc_key(struct btree_trans *trans,
1144d23124c7SKent Overstreet struct bkey_s_c alloc_k,
1145e34da43eSKent Overstreet struct btree_iter *alloc_iter,
1146e34da43eSKent Overstreet struct btree_iter *discard_iter,
11475250b74dSKent Overstreet struct btree_iter *freespace_iter,
11485250b74dSKent Overstreet struct btree_iter *bucket_gens_iter)
11495add07d5SKent Overstreet {
11505add07d5SKent Overstreet struct bch_fs *c = trans->c;
115119a614d2SKent Overstreet struct bch_alloc_v4 a_convert;
115219a614d2SKent Overstreet const struct bch_alloc_v4 *a;
11535250b74dSKent Overstreet unsigned gens_offset;
1154d23124c7SKent Overstreet struct bkey_s_c k;
11555add07d5SKent Overstreet struct printbuf buf = PRINTBUF;
11569b3059a1SKent Overstreet int ret = 0;
11575add07d5SKent Overstreet
11589b3059a1SKent Overstreet struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p);
11599b3059a1SKent Overstreet if (fsck_err_on(!ca,
1160a850bde6SKent Overstreet trans, alloc_key_to_missing_dev_bucket,
1161a9c0a4cbSKent Overstreet "alloc key for invalid device:bucket %llu:%llu",
1162a9c0a4cbSKent Overstreet alloc_k.k->p.inode, alloc_k.k->p.offset))
11639b3059a1SKent Overstreet ret = bch2_btree_delete_at(trans, alloc_iter, 0);
11649b3059a1SKent Overstreet if (!ca)
11659b3059a1SKent Overstreet return ret;
1166e1effd42SKent Overstreet
1167e1effd42SKent Overstreet if (!ca->mi.freespace_initialized)
11689b3059a1SKent Overstreet goto out;
1169e1effd42SKent Overstreet
117019a614d2SKent Overstreet a = bch2_alloc_to_v4(alloc_k, &a_convert);
1171e1effd42SKent Overstreet
11729180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p);
11739180ad2eSKent Overstreet k = bch2_btree_iter_peek_slot(trans, discard_iter);
11745add07d5SKent Overstreet ret = bkey_err(k);
11755add07d5SKent Overstreet if (ret)
11765add07d5SKent Overstreet goto err;
11775add07d5SKent Overstreet
1178c8e58813SKent Overstreet bool is_discarded = a->data_type == BCH_DATA_need_discard;
1179c8e58813SKent Overstreet if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded,
1180c8e58813SKent Overstreet trans, alloc_k, !is_discarded, true, true)) {
1181c8e58813SKent Overstreet ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded);
11825add07d5SKent Overstreet if (ret)
11835add07d5SKent Overstreet goto err;
11845add07d5SKent Overstreet }
11855add07d5SKent Overstreet
11869180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
11879180ad2eSKent Overstreet k = bch2_btree_iter_peek_slot(trans, freespace_iter);
11885add07d5SKent Overstreet ret = bkey_err(k);
11895add07d5SKent Overstreet if (ret)
11905add07d5SKent Overstreet goto err;
11915add07d5SKent Overstreet
1192c8e58813SKent Overstreet bool is_free = a->data_type == BCH_DATA_free;
1193c8e58813SKent Overstreet if (need_discard_or_freespace_err_on(!!k.k->type != is_free,
1194c8e58813SKent Overstreet trans, alloc_k, !is_free, false, true)) {
1195c8e58813SKent Overstreet ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free);
11965add07d5SKent Overstreet if (ret)
11975add07d5SKent Overstreet goto err;
11985add07d5SKent Overstreet }
11995250b74dSKent Overstreet
12009180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
12019180ad2eSKent Overstreet k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
12025250b74dSKent Overstreet ret = bkey_err(k);
12035250b74dSKent Overstreet if (ret)
12045250b74dSKent Overstreet goto err;
12055250b74dSKent Overstreet
1206cdce1094SKent Overstreet if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
1207a850bde6SKent Overstreet trans, bucket_gens_key_wrong,
12081ece5323SKent Overstreet "incorrect gen in bucket_gens btree (got %u should be %u)\n%s",
12095250b74dSKent Overstreet alloc_gen(k, gens_offset), a->gen,
12105250b74dSKent Overstreet (printbuf_reset(&buf),
1211cdce1094SKent Overstreet bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
12125250b74dSKent Overstreet struct bkey_i_bucket_gens *g =
12135250b74dSKent Overstreet bch2_trans_kmalloc(trans, sizeof(*g));
12145250b74dSKent Overstreet
12155250b74dSKent Overstreet ret = PTR_ERR_OR_ZERO(g);
12165250b74dSKent Overstreet if (ret)
12175250b74dSKent Overstreet goto err;
12185250b74dSKent Overstreet
12195250b74dSKent Overstreet if (k.k->type == KEY_TYPE_bucket_gens) {
12205250b74dSKent Overstreet bkey_reassemble(&g->k_i, k);
12215250b74dSKent Overstreet } else {
12225250b74dSKent Overstreet bkey_bucket_gens_init(&g->k_i);
12235250b74dSKent Overstreet g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset);
12245250b74dSKent Overstreet }
12255250b74dSKent Overstreet
12265250b74dSKent Overstreet g->v.gens[gens_offset] = a->gen;
12275250b74dSKent Overstreet
12285250b74dSKent Overstreet ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0);
12295250b74dSKent Overstreet if (ret)
12305250b74dSKent Overstreet goto err;
12315250b74dSKent Overstreet }
12329b3059a1SKent Overstreet out:
12335add07d5SKent Overstreet err:
12345add07d5SKent Overstreet fsck_err:
12359b3059a1SKent Overstreet bch2_dev_put(ca);
12365add07d5SKent Overstreet printbuf_exit(&buf);
12375add07d5SKent Overstreet return ret;
12385add07d5SKent Overstreet }
12395add07d5SKent Overstreet
1240298ac24eSKent Overstreet static noinline_for_stack
bch2_check_alloc_hole_freespace(struct btree_trans * trans,struct bch_dev * ca,struct bpos start,struct bpos * end,struct btree_iter * freespace_iter)1241298ac24eSKent Overstreet int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
1242bc3204c8SKent Overstreet struct bch_dev *ca,
1243d23124c7SKent Overstreet struct bpos start,
1244d23124c7SKent Overstreet struct bpos *end,
1245d23124c7SKent Overstreet struct btree_iter *freespace_iter)
1246d23124c7SKent Overstreet {
1247d23124c7SKent Overstreet struct bkey_s_c k;
1248d23124c7SKent Overstreet struct printbuf buf = PRINTBUF;
1249d23124c7SKent Overstreet int ret;
1250d23124c7SKent Overstreet
1251d23124c7SKent Overstreet if (!ca->mi.freespace_initialized)
1252d23124c7SKent Overstreet return 0;
1253d23124c7SKent Overstreet
12549180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, freespace_iter, start);
1255d23124c7SKent Overstreet
12569180ad2eSKent Overstreet k = bch2_btree_iter_peek_slot(trans, freespace_iter);
1257d23124c7SKent Overstreet ret = bkey_err(k);
1258d23124c7SKent Overstreet if (ret)
1259d23124c7SKent Overstreet goto err;
1260d23124c7SKent Overstreet
1261d23124c7SKent Overstreet *end = bkey_min(k.k->p, *end);
1262d23124c7SKent Overstreet
1263cdce1094SKent Overstreet if (fsck_err_on(k.k->type != KEY_TYPE_set,
1264a850bde6SKent Overstreet trans, freespace_hole_missing,
1265b65db750SKent Overstreet "hole in alloc btree missing in freespace btree\n"
1266d23124c7SKent Overstreet "device %llu buckets %llu-%llu",
1267d23124c7SKent Overstreet freespace_iter->pos.inode,
1268d23124c7SKent Overstreet freespace_iter->pos.offset,
1269cdce1094SKent Overstreet end->offset)) {
1270d23124c7SKent Overstreet struct bkey_i *update =
1271d23124c7SKent Overstreet bch2_trans_kmalloc(trans, sizeof(*update));
1272d23124c7SKent Overstreet
1273d23124c7SKent Overstreet ret = PTR_ERR_OR_ZERO(update);
1274d23124c7SKent Overstreet if (ret)
1275d23124c7SKent Overstreet goto err;
1276d23124c7SKent Overstreet
1277d23124c7SKent Overstreet bkey_init(&update->k);
1278d23124c7SKent Overstreet update->k.type = KEY_TYPE_set;
1279d23124c7SKent Overstreet update->k.p = freespace_iter->pos;
1280d23124c7SKent Overstreet bch2_key_resize(&update->k,
1281d23124c7SKent Overstreet min_t(u64, U32_MAX, end->offset -
1282d23124c7SKent Overstreet freespace_iter->pos.offset));
1283d23124c7SKent Overstreet
1284d23124c7SKent Overstreet ret = bch2_trans_update(trans, freespace_iter, update, 0);
1285d23124c7SKent Overstreet if (ret)
1286d23124c7SKent Overstreet goto err;
1287d23124c7SKent Overstreet }
1288d23124c7SKent Overstreet err:
1289d23124c7SKent Overstreet fsck_err:
1290d23124c7SKent Overstreet printbuf_exit(&buf);
1291d23124c7SKent Overstreet return ret;
1292d23124c7SKent Overstreet }
1293d23124c7SKent Overstreet
1294298ac24eSKent Overstreet static noinline_for_stack
bch2_check_alloc_hole_bucket_gens(struct btree_trans * trans,struct bpos start,struct bpos * end,struct btree_iter * bucket_gens_iter)1295298ac24eSKent Overstreet int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
12965250b74dSKent Overstreet struct bpos start,
12975250b74dSKent Overstreet struct bpos *end,
12985250b74dSKent Overstreet struct btree_iter *bucket_gens_iter)
12995250b74dSKent Overstreet {
13005250b74dSKent Overstreet struct bkey_s_c k;
13015250b74dSKent Overstreet struct printbuf buf = PRINTBUF;
13025250b74dSKent Overstreet unsigned i, gens_offset, gens_end_offset;
13035250b74dSKent Overstreet int ret;
13045250b74dSKent Overstreet
13059180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
13065250b74dSKent Overstreet
13079180ad2eSKent Overstreet k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
13085250b74dSKent Overstreet ret = bkey_err(k);
13095250b74dSKent Overstreet if (ret)
13105250b74dSKent Overstreet goto err;
13115250b74dSKent Overstreet
13125250b74dSKent Overstreet if (bkey_cmp(alloc_gens_pos(start, &gens_offset),
13135250b74dSKent Overstreet alloc_gens_pos(*end, &gens_end_offset)))
13145250b74dSKent Overstreet gens_end_offset = KEY_TYPE_BUCKET_GENS_NR;
13155250b74dSKent Overstreet
13165250b74dSKent Overstreet if (k.k->type == KEY_TYPE_bucket_gens) {
13175250b74dSKent Overstreet struct bkey_i_bucket_gens g;
13185250b74dSKent Overstreet bool need_update = false;
13195250b74dSKent Overstreet
13205250b74dSKent Overstreet bkey_reassemble(&g.k_i, k);
13215250b74dSKent Overstreet
13225250b74dSKent Overstreet for (i = gens_offset; i < gens_end_offset; i++) {
1323a850bde6SKent Overstreet if (fsck_err_on(g.v.gens[i], trans,
1324b65db750SKent Overstreet bucket_gens_hole_wrong,
13255250b74dSKent Overstreet "hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
13265250b74dSKent Overstreet bucket_gens_pos_to_alloc(k.k->p, i).inode,
13275250b74dSKent Overstreet bucket_gens_pos_to_alloc(k.k->p, i).offset,
13285250b74dSKent Overstreet g.v.gens[i])) {
13295250b74dSKent Overstreet g.v.gens[i] = 0;
13305250b74dSKent Overstreet need_update = true;
13315250b74dSKent Overstreet }
13325250b74dSKent Overstreet }
13335250b74dSKent Overstreet
13345250b74dSKent Overstreet if (need_update) {
133596dea3d5SKent Overstreet struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
13365250b74dSKent Overstreet
133796dea3d5SKent Overstreet ret = PTR_ERR_OR_ZERO(u);
13385250b74dSKent Overstreet if (ret)
13395250b74dSKent Overstreet goto err;
13405250b74dSKent Overstreet
134196dea3d5SKent Overstreet memcpy(u, &g, sizeof(g));
13425250b74dSKent Overstreet
134396dea3d5SKent Overstreet ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
13445250b74dSKent Overstreet if (ret)
13455250b74dSKent Overstreet goto err;
13465250b74dSKent Overstreet }
13475250b74dSKent Overstreet }
13485250b74dSKent Overstreet
13495250b74dSKent Overstreet *end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0));
13505250b74dSKent Overstreet err:
13515250b74dSKent Overstreet fsck_err:
13525250b74dSKent Overstreet printbuf_exit(&buf);
13535250b74dSKent Overstreet return ret;
13545250b74dSKent Overstreet }
13555250b74dSKent Overstreet
13562cd85feaSKent Overstreet struct check_discard_freespace_key_async {
13572cd85feaSKent Overstreet struct work_struct work;
13582cd85feaSKent Overstreet struct bch_fs *c;
13592cd85feaSKent Overstreet struct bbpos pos;
13602cd85feaSKent Overstreet };
13612cd85feaSKent Overstreet
bch2_recheck_discard_freespace_key(struct btree_trans * trans,struct bbpos pos)13622cd85feaSKent Overstreet static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos)
13632cd85feaSKent Overstreet {
13642cd85feaSKent Overstreet struct btree_iter iter;
13652cd85feaSKent Overstreet struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0);
13662cd85feaSKent Overstreet int ret = bkey_err(k);
13672cd85feaSKent Overstreet if (ret)
13682cd85feaSKent Overstreet return ret;
13692cd85feaSKent Overstreet
13702cd85feaSKent Overstreet u8 gen;
13712cd85feaSKent Overstreet ret = k.k->type != KEY_TYPE_set
13722cd85feaSKent Overstreet ? bch2_check_discard_freespace_key(trans, &iter, &gen, false)
13732cd85feaSKent Overstreet : 0;
13742cd85feaSKent Overstreet bch2_trans_iter_exit(trans, &iter);
13752cd85feaSKent Overstreet return ret;
13762cd85feaSKent Overstreet }
13772cd85feaSKent Overstreet
check_discard_freespace_key_work(struct work_struct * work)13782cd85feaSKent Overstreet static void check_discard_freespace_key_work(struct work_struct *work)
13792cd85feaSKent Overstreet {
13802cd85feaSKent Overstreet struct check_discard_freespace_key_async *w =
13812cd85feaSKent Overstreet container_of(work, struct check_discard_freespace_key_async, work);
13822cd85feaSKent Overstreet
13832cd85feaSKent Overstreet bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos));
13842cd85feaSKent Overstreet bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key);
13852cd85feaSKent Overstreet kfree(w);
13862cd85feaSKent Overstreet }
13872cd85feaSKent Overstreet
bch2_check_discard_freespace_key(struct btree_trans * trans,struct btree_iter * iter,u8 * gen,bool async_repair)13882cd85feaSKent Overstreet int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen,
13892cd85feaSKent Overstreet bool async_repair)
13905add07d5SKent Overstreet {
13915add07d5SKent Overstreet struct bch_fs *c = trans->c;
1392822835ffSKent Overstreet enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
1393822835ffSKent Overstreet ? BCH_DATA_need_discard
1394822835ffSKent Overstreet : BCH_DATA_free;
13955add07d5SKent Overstreet struct printbuf buf = PRINTBUF;
13965add07d5SKent Overstreet
1397c97118f1SKent Overstreet struct bpos bucket = iter->pos;
1398c97118f1SKent Overstreet bucket.offset &= ~(~0ULL << 56);
1399c97118f1SKent Overstreet u64 genbits = iter->pos.offset & (~0ULL << 56);
14005add07d5SKent Overstreet
1401c97118f1SKent Overstreet struct btree_iter alloc_iter;
14022cd85feaSKent Overstreet struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
1403ae153f2eSKent Overstreet BTREE_ID_alloc, bucket,
1404ae153f2eSKent Overstreet async_repair ? BTREE_ITER_cached : 0);
1405c97118f1SKent Overstreet int ret = bkey_err(alloc_k);
1406bcb79a51SKent Overstreet if (ret)
1407bcb79a51SKent Overstreet return ret;
14085add07d5SKent Overstreet
1409c97118f1SKent Overstreet if (!bch2_dev_bucket_exists(c, bucket)) {
1410c97118f1SKent Overstreet if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket,
1411a9c0a4cbSKent Overstreet "entry in %s btree for nonexistant dev:bucket %llu:%llu",
1412c97118f1SKent Overstreet bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset))
14135add07d5SKent Overstreet goto delete;
1414c97118f1SKent Overstreet ret = 1;
1415c97118f1SKent Overstreet goto out;
1416c97118f1SKent Overstreet }
14175add07d5SKent Overstreet
1418c97118f1SKent Overstreet struct bch_alloc_v4 a_convert;
1419c97118f1SKent Overstreet const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
14205add07d5SKent Overstreet
1421c97118f1SKent Overstreet if (a->data_type != state ||
1422822835ffSKent Overstreet (state == BCH_DATA_free &&
1423c97118f1SKent Overstreet genbits != alloc_freespace_genbits(*a))) {
1424c97118f1SKent Overstreet if (fsck_err(trans, need_discard_freespace_key_bad,
1425e96f5a61SKent Overstreet "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
1426445d184aSKent Overstreet (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
142788dfe193SKent Overstreet bch2_btree_id_str(iter->btree_id),
1428e96f5a61SKent Overstreet iter->pos.inode,
1429e96f5a61SKent Overstreet iter->pos.offset,
143019a614d2SKent Overstreet a->data_type == state,
143119a614d2SKent Overstreet genbits >> 56, alloc_freespace_genbits(*a) >> 56))
14325add07d5SKent Overstreet goto delete;
1433c97118f1SKent Overstreet ret = 1;
1434c97118f1SKent Overstreet goto out;
1435c97118f1SKent Overstreet }
1436c97118f1SKent Overstreet
1437c97118f1SKent Overstreet *gen = a->gen;
14385add07d5SKent Overstreet out:
14395add07d5SKent Overstreet fsck_err:
14409180ad2eSKent Overstreet bch2_set_btree_iter_dontneed(trans, &alloc_iter);
14415add07d5SKent Overstreet bch2_trans_iter_exit(trans, &alloc_iter);
14425add07d5SKent Overstreet printbuf_exit(&buf);
14435add07d5SKent Overstreet return ret;
14445add07d5SKent Overstreet delete:
14452cd85feaSKent Overstreet if (!async_repair) {
14467d1918b0SKent Overstreet ret = bch2_btree_bit_mod_iter(trans, iter, false) ?:
1447e96f5a61SKent Overstreet bch2_trans_commit(trans, NULL, NULL,
1448c97118f1SKent Overstreet BCH_TRANS_COMMIT_no_enospc) ?:
1449f9e0a9beSKent Overstreet -BCH_ERR_transaction_restart_commit;
14505add07d5SKent Overstreet goto out;
14512cd85feaSKent Overstreet } else {
14522cd85feaSKent Overstreet /*
14532cd85feaSKent Overstreet * We can't repair here when called from the allocator path: the
14542cd85feaSKent Overstreet * commit will recurse back into the allocator
14552cd85feaSKent Overstreet */
14562cd85feaSKent Overstreet struct check_discard_freespace_key_async *w =
14572cd85feaSKent Overstreet kzalloc(sizeof(*w), GFP_KERNEL);
14582cd85feaSKent Overstreet if (!w)
14592cd85feaSKent Overstreet goto out;
14602cd85feaSKent Overstreet
14612cd85feaSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) {
14622cd85feaSKent Overstreet kfree(w);
14632cd85feaSKent Overstreet goto out;
14642cd85feaSKent Overstreet }
14652cd85feaSKent Overstreet
14662cd85feaSKent Overstreet INIT_WORK(&w->work, check_discard_freespace_key_work);
14672cd85feaSKent Overstreet w->c = c;
14682cd85feaSKent Overstreet w->pos = BBPOS(iter->btree_id, iter->pos);
14692cd85feaSKent Overstreet queue_work(c->write_ref_wq, &w->work);
14702cd85feaSKent Overstreet goto out;
14712cd85feaSKent Overstreet }
14725add07d5SKent Overstreet }
14735add07d5SKent Overstreet
bch2_check_discard_freespace_key_fsck(struct btree_trans * trans,struct btree_iter * iter)1474c97118f1SKent Overstreet static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter)
1475c97118f1SKent Overstreet {
1476c97118f1SKent Overstreet u8 gen;
14772cd85feaSKent Overstreet int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false);
1478c97118f1SKent Overstreet return ret < 0 ? ret : 0;
1479c97118f1SKent Overstreet }
1480c97118f1SKent Overstreet
14815250b74dSKent Overstreet /*
14825250b74dSKent Overstreet * We've already checked that generation numbers in the bucket_gens btree are
14835250b74dSKent Overstreet * valid for buckets that exist; this just checks for keys for nonexistent
14845250b74dSKent Overstreet * buckets.
14855250b74dSKent Overstreet */
1486298ac24eSKent Overstreet static noinline_for_stack
bch2_check_bucket_gens_key(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k)1487298ac24eSKent Overstreet int bch2_check_bucket_gens_key(struct btree_trans *trans,
14885250b74dSKent Overstreet struct btree_iter *iter,
14895250b74dSKent Overstreet struct bkey_s_c k)
14905250b74dSKent Overstreet {
14915250b74dSKent Overstreet struct bch_fs *c = trans->c;
14925250b74dSKent Overstreet struct bkey_i_bucket_gens g;
14935250b74dSKent Overstreet u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
14945250b74dSKent Overstreet u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
14955250b74dSKent Overstreet u64 b;
1496bc3204c8SKent Overstreet bool need_update = false;
14975250b74dSKent Overstreet struct printbuf buf = PRINTBUF;
14985250b74dSKent Overstreet int ret = 0;
14995250b74dSKent Overstreet
15005250b74dSKent Overstreet BUG_ON(k.k->type != KEY_TYPE_bucket_gens);
15015250b74dSKent Overstreet bkey_reassemble(&g.k_i, k);
15025250b74dSKent Overstreet
1503bc3204c8SKent Overstreet struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
1504bc3204c8SKent Overstreet if (!ca) {
1505a850bde6SKent Overstreet if (fsck_err(trans, bucket_gens_to_invalid_dev,
15065250b74dSKent Overstreet "bucket_gens key for invalid device:\n%s",
1507bc3204c8SKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
15085250b74dSKent Overstreet ret = bch2_btree_delete_at(trans, iter, 0);
15095250b74dSKent Overstreet goto out;
15105250b74dSKent Overstreet }
15115250b74dSKent Overstreet
15125250b74dSKent Overstreet if (fsck_err_on(end <= ca->mi.first_bucket ||
1513a850bde6SKent Overstreet start >= ca->mi.nbuckets,
1514a850bde6SKent Overstreet trans, bucket_gens_to_invalid_buckets,
15155250b74dSKent Overstreet "bucket_gens key for invalid buckets:\n%s",
15165250b74dSKent Overstreet (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
15175250b74dSKent Overstreet ret = bch2_btree_delete_at(trans, iter, 0);
15185250b74dSKent Overstreet goto out;
15195250b74dSKent Overstreet }
15205250b74dSKent Overstreet
15215250b74dSKent Overstreet for (b = start; b < ca->mi.first_bucket; b++)
1522a850bde6SKent Overstreet if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
1523a850bde6SKent Overstreet trans, bucket_gens_nonzero_for_invalid_buckets,
15245250b74dSKent Overstreet "bucket_gens key has nonzero gen for invalid bucket")) {
15255250b74dSKent Overstreet g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
15265250b74dSKent Overstreet need_update = true;
15275250b74dSKent Overstreet }
15285250b74dSKent Overstreet
15295250b74dSKent Overstreet for (b = ca->mi.nbuckets; b < end; b++)
1530a850bde6SKent Overstreet if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
1531a850bde6SKent Overstreet trans, bucket_gens_nonzero_for_invalid_buckets,
15325250b74dSKent Overstreet "bucket_gens key has nonzero gen for invalid bucket")) {
15335250b74dSKent Overstreet g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
15345250b74dSKent Overstreet need_update = true;
15355250b74dSKent Overstreet }
15365250b74dSKent Overstreet
15375250b74dSKent Overstreet if (need_update) {
153896dea3d5SKent Overstreet struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
15395250b74dSKent Overstreet
154096dea3d5SKent Overstreet ret = PTR_ERR_OR_ZERO(u);
15415250b74dSKent Overstreet if (ret)
15425250b74dSKent Overstreet goto out;
15435250b74dSKent Overstreet
154496dea3d5SKent Overstreet memcpy(u, &g, sizeof(g));
154596dea3d5SKent Overstreet ret = bch2_trans_update(trans, iter, u, 0);
15465250b74dSKent Overstreet }
15475250b74dSKent Overstreet out:
15485250b74dSKent Overstreet fsck_err:
1549bc3204c8SKent Overstreet bch2_dev_put(ca);
15505250b74dSKent Overstreet printbuf_exit(&buf);
15515250b74dSKent Overstreet return ret;
15525250b74dSKent Overstreet }
15535250b74dSKent Overstreet
bch2_check_alloc_info(struct bch_fs * c)1554e1effd42SKent Overstreet int bch2_check_alloc_info(struct bch_fs *c)
15555add07d5SKent Overstreet {
15566bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c);
15575250b74dSKent Overstreet struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
1558bc3204c8SKent Overstreet struct bch_dev *ca = NULL;
1559d23124c7SKent Overstreet struct bkey hole;
1560445d184aSKent Overstreet struct bkey_s_c k;
1561e1effd42SKent Overstreet int ret = 0;
15625add07d5SKent Overstreet
15636bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
15645dd8c60eSKent Overstreet BTREE_ITER_prefetch);
15656bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
15665dd8c60eSKent Overstreet BTREE_ITER_prefetch);
15676bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
15685dd8c60eSKent Overstreet BTREE_ITER_prefetch);
15696bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
15705dd8c60eSKent Overstreet BTREE_ITER_prefetch);
1571d23124c7SKent Overstreet
1572e34da43eSKent Overstreet while (1) {
1573d23124c7SKent Overstreet struct bpos next;
1574d23124c7SKent Overstreet
15756bd68ec2SKent Overstreet bch2_trans_begin(trans);
1576d23124c7SKent Overstreet
15779180ad2eSKent Overstreet k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole);
1578d23124c7SKent Overstreet ret = bkey_err(k);
15795add07d5SKent Overstreet if (ret)
1580d23124c7SKent Overstreet goto bkey_err;
1581d23124c7SKent Overstreet
1582d23124c7SKent Overstreet if (!k.k)
15835add07d5SKent Overstreet break;
1584e34da43eSKent Overstreet
1585d23124c7SKent Overstreet if (k.k->type) {
1586d23124c7SKent Overstreet next = bpos_nosnap_successor(k.k->p);
1587d23124c7SKent Overstreet
15886bd68ec2SKent Overstreet ret = bch2_check_alloc_key(trans,
1589d23124c7SKent Overstreet k, &iter,
1590d23124c7SKent Overstreet &discard_iter,
15915250b74dSKent Overstreet &freespace_iter,
15925250b74dSKent Overstreet &bucket_gens_iter);
1593d23124c7SKent Overstreet if (ret)
15945250b74dSKent Overstreet goto bkey_err;
1595d23124c7SKent Overstreet } else {
1596d23124c7SKent Overstreet next = k.k->p;
1597d23124c7SKent Overstreet
1598bc3204c8SKent Overstreet ret = bch2_check_alloc_hole_freespace(trans, ca,
1599d23124c7SKent Overstreet bkey_start_pos(k.k),
1600d23124c7SKent Overstreet &next,
16015250b74dSKent Overstreet &freespace_iter) ?:
16026bd68ec2SKent Overstreet bch2_check_alloc_hole_bucket_gens(trans,
16035250b74dSKent Overstreet bkey_start_pos(k.k),
16045250b74dSKent Overstreet &next,
16055250b74dSKent Overstreet &bucket_gens_iter);
1606d23124c7SKent Overstreet if (ret)
1607d23124c7SKent Overstreet goto bkey_err;
1608d23124c7SKent Overstreet }
1609d23124c7SKent Overstreet
16106bd68ec2SKent Overstreet ret = bch2_trans_commit(trans, NULL, NULL,
16113f0e297dSKent Overstreet BCH_TRANS_COMMIT_no_enospc);
1612d23124c7SKent Overstreet if (ret)
1613d23124c7SKent Overstreet goto bkey_err;
1614d23124c7SKent Overstreet
16159180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, next);
1616d23124c7SKent Overstreet bkey_err:
1617d23124c7SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1618d23124c7SKent Overstreet continue;
1619d23124c7SKent Overstreet if (ret)
1620d23124c7SKent Overstreet break;
16215add07d5SKent Overstreet }
16226bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &bucket_gens_iter);
16236bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &freespace_iter);
16246bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &discard_iter);
16256bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter);
1626bc3204c8SKent Overstreet bch2_dev_put(ca);
1627bc3204c8SKent Overstreet ca = NULL;
16285add07d5SKent Overstreet
1629e34da43eSKent Overstreet if (ret < 0)
16305add07d5SKent Overstreet goto err;
16315add07d5SKent Overstreet
16325028b907SKent Overstreet ret = for_each_btree_key(trans, iter,
1633445d184aSKent Overstreet BTREE_ID_need_discard, POS_MIN,
16345dd8c60eSKent Overstreet BTREE_ITER_prefetch, k,
1635c97118f1SKent Overstreet bch2_check_discard_freespace_key_fsck(trans, &iter));
16367d9ae04eSKent Overstreet if (ret)
16377d9ae04eSKent Overstreet goto err;
16387d9ae04eSKent Overstreet
16397d9ae04eSKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN,
16405dd8c60eSKent Overstreet BTREE_ITER_prefetch);
16417d9ae04eSKent Overstreet while (1) {
16427d9ae04eSKent Overstreet bch2_trans_begin(trans);
16439180ad2eSKent Overstreet k = bch2_btree_iter_peek(trans, &iter);
16447d9ae04eSKent Overstreet if (!k.k)
16457d9ae04eSKent Overstreet break;
16467d9ae04eSKent Overstreet
16477d9ae04eSKent Overstreet ret = bkey_err(k) ?:
1648c97118f1SKent Overstreet bch2_check_discard_freespace_key_fsck(trans, &iter);
16497d9ae04eSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
16507d9ae04eSKent Overstreet ret = 0;
16517d9ae04eSKent Overstreet continue;
16527d9ae04eSKent Overstreet }
16537d9ae04eSKent Overstreet if (ret) {
16547d9ae04eSKent Overstreet struct printbuf buf = PRINTBUF;
16557d9ae04eSKent Overstreet bch2_bkey_val_to_text(&buf, c, k);
16567d9ae04eSKent Overstreet
16577d9ae04eSKent Overstreet bch_err(c, "while checking %s", buf.buf);
16587d9ae04eSKent Overstreet printbuf_exit(&buf);
16597d9ae04eSKent Overstreet break;
16607d9ae04eSKent Overstreet }
16617d9ae04eSKent Overstreet
16629180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos));
16637d9ae04eSKent Overstreet }
16647d9ae04eSKent Overstreet bch2_trans_iter_exit(trans, &iter);
16657d9ae04eSKent Overstreet if (ret)
16667d9ae04eSKent Overstreet goto err;
16677d9ae04eSKent Overstreet
16687d9ae04eSKent Overstreet ret = for_each_btree_key_commit(trans, iter,
16695250b74dSKent Overstreet BTREE_ID_bucket_gens, POS_MIN,
16705dd8c60eSKent Overstreet BTREE_ITER_prefetch, k,
16713f0e297dSKent Overstreet NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
16726bd68ec2SKent Overstreet bch2_check_bucket_gens_key(trans, &iter, k));
16735add07d5SKent Overstreet err:
16746bd68ec2SKent Overstreet bch2_trans_put(trans);
16751bb3c2a9SKent Overstreet bch_err_fn(c, ret);
16761bb3c2a9SKent Overstreet return ret;
16775add07d5SKent Overstreet }
16785add07d5SKent Overstreet
bch2_check_alloc_to_lru_ref(struct btree_trans * trans,struct btree_iter * alloc_iter,struct bkey_buf * last_flushed)16795add07d5SKent Overstreet static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
1680d39881d2SKent Overstreet struct btree_iter *alloc_iter,
1681d39881d2SKent Overstreet struct bkey_buf *last_flushed)
16825add07d5SKent Overstreet {
16835add07d5SKent Overstreet struct bch_fs *c = trans->c;
168419a614d2SKent Overstreet struct bch_alloc_v4 a_convert;
168519a614d2SKent Overstreet const struct bch_alloc_v4 *a;
1686d39881d2SKent Overstreet struct bkey_s_c alloc_k;
16875add07d5SKent Overstreet struct printbuf buf = PRINTBUF;
16885add07d5SKent Overstreet int ret;
16895add07d5SKent Overstreet
16909180ad2eSKent Overstreet alloc_k = bch2_btree_iter_peek(trans, alloc_iter);
16915add07d5SKent Overstreet if (!alloc_k.k)
16925add07d5SKent Overstreet return 0;
16935add07d5SKent Overstreet
16945add07d5SKent Overstreet ret = bkey_err(alloc_k);
16955add07d5SKent Overstreet if (ret)
16965add07d5SKent Overstreet return ret;
16975add07d5SKent Overstreet
1698260af156SKent Overstreet struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode);
1699260af156SKent Overstreet if (!ca)
1700260af156SKent Overstreet return 0;
1701260af156SKent Overstreet
170219a614d2SKent Overstreet a = bch2_alloc_to_v4(alloc_k, &a_convert);
17035add07d5SKent Overstreet
1704260af156SKent Overstreet u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
1705260af156SKent Overstreet if (lru_idx) {
1706b8e37c16SKent Overstreet ret = bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION,
17073aff608bSKent Overstreet bucket_to_u64(alloc_k.k->p),
1708260af156SKent Overstreet lru_idx, alloc_k, last_flushed);
1709d39881d2SKent Overstreet if (ret)
1710260af156SKent Overstreet goto err;
1711d39881d2SKent Overstreet }
1712d39881d2SKent Overstreet
171319a614d2SKent Overstreet if (a->data_type != BCH_DATA_cached)
1714260af156SKent Overstreet goto err;
17155add07d5SKent Overstreet
1716a850bde6SKent Overstreet if (fsck_err_on(!a->io_time[READ],
1717a850bde6SKent Overstreet trans, alloc_key_cached_but_read_time_zero,
17181ece5323SKent Overstreet "cached bucket with read_time 0\n%s",
17193f59547eSKent Overstreet (printbuf_reset(&buf),
17203f59547eSKent Overstreet bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
17213f59547eSKent Overstreet struct bkey_i_alloc_v4 *a_mut =
17223f59547eSKent Overstreet bch2_alloc_to_v4_mut(trans, alloc_k);
17233f59547eSKent Overstreet ret = PTR_ERR_OR_ZERO(a_mut);
17243f59547eSKent Overstreet if (ret)
17253f59547eSKent Overstreet goto err;
17263f59547eSKent Overstreet
1727cff07e27SKent Overstreet a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
17283f59547eSKent Overstreet ret = bch2_trans_update(trans, alloc_iter,
17295dd8c60eSKent Overstreet &a_mut->k_i, BTREE_TRIGGER_norun);
17303f59547eSKent Overstreet if (ret)
17313f59547eSKent Overstreet goto err;
17323f59547eSKent Overstreet
17333f59547eSKent Overstreet a = &a_mut->v;
17343f59547eSKent Overstreet }
17353f59547eSKent Overstreet
17363aff608bSKent Overstreet ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
17373aff608bSKent Overstreet bucket_to_u64(alloc_k.k->p),
17383aff608bSKent Overstreet a->io_time[READ],
1739d39881d2SKent Overstreet alloc_k, last_flushed);
17405add07d5SKent Overstreet if (ret)
17415add07d5SKent Overstreet goto err;
17425add07d5SKent Overstreet err:
17435add07d5SKent Overstreet fsck_err:
1744260af156SKent Overstreet bch2_dev_put(ca);
17455add07d5SKent Overstreet printbuf_exit(&buf);
17465add07d5SKent Overstreet return ret;
17475add07d5SKent Overstreet }
17485add07d5SKent Overstreet
bch2_check_alloc_to_lru_refs(struct bch_fs * c)17495add07d5SKent Overstreet int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
17505add07d5SKent Overstreet {
1751d39881d2SKent Overstreet struct bkey_buf last_flushed;
1752d39881d2SKent Overstreet
1753d39881d2SKent Overstreet bch2_bkey_buf_init(&last_flushed);
1754d39881d2SKent Overstreet bkey_init(&last_flushed.k->k);
1755d39881d2SKent Overstreet
17563f59547eSKent Overstreet int ret = bch2_trans_run(c,
17576bd68ec2SKent Overstreet for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
17585dd8c60eSKent Overstreet POS_MIN, BTREE_ITER_prefetch, k,
17593f0e297dSKent Overstreet NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
17606756e385SKent Overstreet bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
17616756e385SKent Overstreet bch2_check_stripe_to_lru_refs(c);
1762d39881d2SKent Overstreet
1763d39881d2SKent Overstreet bch2_bkey_buf_exit(&last_flushed, c);
17641bb3c2a9SKent Overstreet bch_err_fn(c, ret);
17651bb3c2a9SKent Overstreet return ret;
17665add07d5SKent Overstreet }
17675add07d5SKent Overstreet
discard_in_flight_add(struct bch_dev * ca,u64 bucket,bool in_progress)176864ee1431SKent Overstreet static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
1769a393f331SKent Overstreet {
1770a393f331SKent Overstreet int ret;
1771a393f331SKent Overstreet
177264ee1431SKent Overstreet mutex_lock(&ca->discard_buckets_in_flight_lock);
177364ee1431SKent Overstreet darray_for_each(ca->discard_buckets_in_flight, i)
177464ee1431SKent Overstreet if (i->bucket == bucket) {
177550479406SKent Overstreet ret = -BCH_ERR_EEXIST_discard_in_flight_add;
1776a393f331SKent Overstreet goto out;
1777a393f331SKent Overstreet }
1778a393f331SKent Overstreet
177964ee1431SKent Overstreet ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
178064ee1431SKent Overstreet .in_progress = in_progress,
178164ee1431SKent Overstreet .bucket = bucket,
178264ee1431SKent Overstreet }));
1783a393f331SKent Overstreet out:
178464ee1431SKent Overstreet mutex_unlock(&ca->discard_buckets_in_flight_lock);
1785a393f331SKent Overstreet return ret;
1786a393f331SKent Overstreet }
1787a393f331SKent Overstreet
discard_in_flight_remove(struct bch_dev * ca,u64 bucket)178864ee1431SKent Overstreet static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
1789a393f331SKent Overstreet {
179064ee1431SKent Overstreet mutex_lock(&ca->discard_buckets_in_flight_lock);
179164ee1431SKent Overstreet darray_for_each(ca->discard_buckets_in_flight, i)
179264ee1431SKent Overstreet if (i->bucket == bucket) {
179364ee1431SKent Overstreet BUG_ON(!i->in_progress);
179464ee1431SKent Overstreet darray_remove_item(&ca->discard_buckets_in_flight, i);
1795a393f331SKent Overstreet goto found;
1796a393f331SKent Overstreet }
1797a393f331SKent Overstreet BUG();
1798a393f331SKent Overstreet found:
179964ee1431SKent Overstreet mutex_unlock(&ca->discard_buckets_in_flight_lock);
1800a393f331SKent Overstreet }
1801a393f331SKent Overstreet
1802a6548c8bSKent Overstreet struct discard_buckets_state {
1803a6548c8bSKent Overstreet u64 seen;
1804a6548c8bSKent Overstreet u64 open;
1805a6548c8bSKent Overstreet u64 need_journal_commit;
1806a6548c8bSKent Overstreet u64 discarded;
1807a6548c8bSKent Overstreet };
1808a6548c8bSKent Overstreet
180980be08cdSKent Overstreet /*
181080be08cdSKent Overstreet * This is needed because discard is both a filesystem option and a device
181180be08cdSKent Overstreet * option, and mount options are supposed to apply to that mount and not be
181280be08cdSKent Overstreet * persisted, i.e. if it's set as a mount option we can't propagate it to the
181380be08cdSKent Overstreet * device.
181480be08cdSKent Overstreet */
discard_opt_enabled(struct bch_fs * c,struct bch_dev * ca)181580be08cdSKent Overstreet static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
181680be08cdSKent Overstreet {
181780be08cdSKent Overstreet return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
181880be08cdSKent Overstreet ? c->opts.discard
181980be08cdSKent Overstreet : ca->mi.discard;
182080be08cdSKent Overstreet }
182180be08cdSKent Overstreet
bch2_discard_one_bucket(struct btree_trans * trans,struct bch_dev * ca,struct btree_iter * need_discard_iter,struct bpos * discard_pos_done,struct discard_buckets_state * s,bool fastpath)18224910a950SKent Overstreet static int bch2_discard_one_bucket(struct btree_trans *trans,
182364ee1431SKent Overstreet struct bch_dev *ca,
18244910a950SKent Overstreet struct btree_iter *need_discard_iter,
18254910a950SKent Overstreet struct bpos *discard_pos_done,
1826bb61afebSKent Overstreet struct discard_buckets_state *s,
1827bb61afebSKent Overstreet bool fastpath)
182859cc38b8SKent Overstreet {
182959cc38b8SKent Overstreet struct bch_fs *c = trans->c;
18304910a950SKent Overstreet struct bpos pos = need_discard_iter->pos;
18319180ad2eSKent Overstreet struct btree_iter iter = {};
183259cc38b8SKent Overstreet struct bkey_s_c k;
183359cc38b8SKent Overstreet struct bkey_i_alloc_v4 *a;
183459cc38b8SKent Overstreet struct printbuf buf = PRINTBUF;
1835a393f331SKent Overstreet bool discard_locked = false;
18364910a950SKent Overstreet int ret = 0;
183759cc38b8SKent Overstreet
18384910a950SKent Overstreet if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
1839a6548c8bSKent Overstreet s->open++;
18404910a950SKent Overstreet goto out;
18414910a950SKent Overstreet }
18424910a950SKent Overstreet
18439e903352SKent Overstreet u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
18449e903352SKent Overstreet pos.inode, pos.offset);
18459e903352SKent Overstreet if (seq_ready > c->journal.flushed_seq_ondisk) {
18469e903352SKent Overstreet if (seq_ready > c->journal.flushing_seq)
1847a6548c8bSKent Overstreet s->need_journal_commit++;
18484910a950SKent Overstreet goto out;
18494910a950SKent Overstreet }
18504910a950SKent Overstreet
1851bcb79a51SKent Overstreet k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
18524910a950SKent Overstreet need_discard_iter->pos,
18535dd8c60eSKent Overstreet BTREE_ITER_cached);
185459cc38b8SKent Overstreet ret = bkey_err(k);
185559cc38b8SKent Overstreet if (ret)
185659cc38b8SKent Overstreet goto out;
185759cc38b8SKent Overstreet
185859cc38b8SKent Overstreet a = bch2_alloc_to_v4_mut(trans, k);
185959cc38b8SKent Overstreet ret = PTR_ERR_OR_ZERO(a);
186059cc38b8SKent Overstreet if (ret)
186159cc38b8SKent Overstreet goto out;
186259cc38b8SKent Overstreet
18637ee88737SKent Overstreet if (a->v.data_type != BCH_DATA_need_discard) {
1864acd1fc7bSKent Overstreet if (need_discard_or_freespace_err(trans, k, true, true, true)) {
1865acd1fc7bSKent Overstreet ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false);
1866acd1fc7bSKent Overstreet if (ret)
1867acd1fc7bSKent Overstreet goto out;
1868acd1fc7bSKent Overstreet goto commit;
1869acd1fc7bSKent Overstreet }
1870acd1fc7bSKent Overstreet
1871822835ffSKent Overstreet goto out;
1872822835ffSKent Overstreet }
187359cc38b8SKent Overstreet
1874bb61afebSKent Overstreet if (!fastpath) {
187564ee1431SKent Overstreet if (discard_in_flight_add(ca, iter.pos.offset, true))
1876a393f331SKent Overstreet goto out;
1877a393f331SKent Overstreet
1878a393f331SKent Overstreet discard_locked = true;
1879bb61afebSKent Overstreet }
1880a393f331SKent Overstreet
18819e903352SKent Overstreet if (!bkey_eq(*discard_pos_done, iter.pos)) {
18829e903352SKent Overstreet s->discarded++;
18839e903352SKent Overstreet *discard_pos_done = iter.pos;
18849e903352SKent Overstreet
188580be08cdSKent Overstreet if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
188659cc38b8SKent Overstreet /*
188759cc38b8SKent Overstreet * This works without any other locks because this is the only
188859cc38b8SKent Overstreet * thread that removes items from the need_discard tree
188959cc38b8SKent Overstreet */
1890096386a5SKent Overstreet bch2_trans_unlock_long(trans);
189159cc38b8SKent Overstreet blkdev_issue_discard(ca->disk_sb.bdev,
189259cc38b8SKent Overstreet k.k->p.offset * ca->mi.bucket_size,
189359cc38b8SKent Overstreet ca->mi.bucket_size,
189459cc38b8SKent Overstreet GFP_KERNEL);
189531381636SKent Overstreet ret = bch2_trans_relock_notrace(trans);
189659cc38b8SKent Overstreet if (ret)
189759cc38b8SKent Overstreet goto out;
189859cc38b8SKent Overstreet }
18999e903352SKent Overstreet }
190059cc38b8SKent Overstreet
190159cc38b8SKent Overstreet SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
1902f648b6c1SKent Overstreet alloc_data_type_set(&a->v, a->v.data_type);
1903f648b6c1SKent Overstreet
1904acd1fc7bSKent Overstreet ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
1905acd1fc7bSKent Overstreet if (ret)
1906acd1fc7bSKent Overstreet goto out;
1907acd1fc7bSKent Overstreet commit:
1908acd1fc7bSKent Overstreet ret = bch2_trans_commit(trans, NULL, NULL,
1909f33c58fcSKent Overstreet BCH_WATERMARK_btree|
1910cb52d23eSKent Overstreet BCH_TRANS_COMMIT_no_enospc);
19114910a950SKent Overstreet if (ret)
19124910a950SKent Overstreet goto out;
19134910a950SKent Overstreet
19145ee760f6SKent Overstreet if (!fastpath)
191574644030SKent Overstreet count_event(c, bucket_discard);
19165ee760f6SKent Overstreet else
19175ee760f6SKent Overstreet count_event(c, bucket_discard_fast);
191859cc38b8SKent Overstreet out:
1919acd1fc7bSKent Overstreet fsck_err:
1920a393f331SKent Overstreet if (discard_locked)
192164ee1431SKent Overstreet discard_in_flight_remove(ca, iter.pos.offset);
1922bb61afebSKent Overstreet if (!ret)
1923a6548c8bSKent Overstreet s->seen++;
192459cc38b8SKent Overstreet bch2_trans_iter_exit(trans, &iter);
192559cc38b8SKent Overstreet printbuf_exit(&buf);
192659cc38b8SKent Overstreet return ret;
192759cc38b8SKent Overstreet }
192859cc38b8SKent Overstreet
bch2_do_discards_work(struct work_struct * work)192959cc38b8SKent Overstreet static void bch2_do_discards_work(struct work_struct *work)
193059cc38b8SKent Overstreet {
193164ee1431SKent Overstreet struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
193264ee1431SKent Overstreet struct bch_fs *c = ca->fs;
1933a6548c8bSKent Overstreet struct discard_buckets_state s = {};
19344910a950SKent Overstreet struct bpos discard_pos_done = POS_MAX;
193559cc38b8SKent Overstreet int ret;
193659cc38b8SKent Overstreet
19374910a950SKent Overstreet /*
19384910a950SKent Overstreet * We're doing the commit in bch2_discard_one_bucket instead of using
19394910a950SKent Overstreet * for_each_btree_key_commit() so that we can increment counters after
19404910a950SKent Overstreet * successful commit:
19414910a950SKent Overstreet */
19426bd68ec2SKent Overstreet ret = bch2_trans_run(c,
1943000fe8d5SKent Overstreet for_each_btree_key_max(trans, iter,
194464ee1431SKent Overstreet BTREE_ID_need_discard,
194564ee1431SKent Overstreet POS(ca->dev_idx, 0),
194664ee1431SKent Overstreet POS(ca->dev_idx, U64_MAX), 0, k,
1947bb61afebSKent Overstreet bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false)));
1948a6548c8bSKent Overstreet
19499e903352SKent Overstreet if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
19509e903352SKent Overstreet bch2_journal_flush_async(&c->journal, NULL);
19519e903352SKent Overstreet
1952a6548c8bSKent Overstreet trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
1953a6548c8bSKent Overstreet bch2_err_str(ret));
195459cc38b8SKent Overstreet
1955dcffc3b1SKent Overstreet percpu_ref_put(&ca->io_ref[WRITE]);
19560b50b731SKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_discard);
195764ee1431SKent Overstreet }
195864ee1431SKent Overstreet
bch2_dev_do_discards(struct bch_dev * ca)195964ee1431SKent Overstreet void bch2_dev_do_discards(struct bch_dev *ca)
196064ee1431SKent Overstreet {
196164ee1431SKent Overstreet struct bch_fs *c = ca->fs;
196264ee1431SKent Overstreet
19630b50b731SKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
196464ee1431SKent Overstreet return;
196564ee1431SKent Overstreet
19660b50b731SKent Overstreet if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
19670b50b731SKent Overstreet goto put_write_ref;
196864ee1431SKent Overstreet
196964ee1431SKent Overstreet if (queue_work(c->write_ref_wq, &ca->discard_work))
197064ee1431SKent Overstreet return;
197164ee1431SKent Overstreet
1972dcffc3b1SKent Overstreet percpu_ref_put(&ca->io_ref[WRITE]);
19730b50b731SKent Overstreet put_write_ref:
19740b50b731SKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_discard);
197559cc38b8SKent Overstreet }
197659cc38b8SKent Overstreet
bch2_do_discards(struct bch_fs * c)197759cc38b8SKent Overstreet void bch2_do_discards(struct bch_fs *c)
197859cc38b8SKent Overstreet {
197964ee1431SKent Overstreet for_each_member_device(c, ca)
198064ee1431SKent Overstreet bch2_dev_do_discards(ca);
198159cc38b8SKent Overstreet }
198259cc38b8SKent Overstreet
bch2_do_discards_fast_one(struct btree_trans * trans,struct bch_dev * ca,u64 bucket,struct bpos * discard_pos_done,struct discard_buckets_state * s)1983bb61afebSKent Overstreet static int bch2_do_discards_fast_one(struct btree_trans *trans,
1984bb61afebSKent Overstreet struct bch_dev *ca,
1985bb61afebSKent Overstreet u64 bucket,
1986bb61afebSKent Overstreet struct bpos *discard_pos_done,
1987bb61afebSKent Overstreet struct discard_buckets_state *s)
1988a393f331SKent Overstreet {
1989bb61afebSKent Overstreet struct btree_iter need_discard_iter;
1990bb61afebSKent Overstreet struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter,
1991bb61afebSKent Overstreet BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0);
1992bb61afebSKent Overstreet int ret = bkey_err(discard_k);
1993a393f331SKent Overstreet if (ret)
1994bb61afebSKent Overstreet return ret;
1995a393f331SKent Overstreet
1996bb61afebSKent Overstreet if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set,
1997bb61afebSKent Overstreet trans, discarding_bucket_not_in_need_discard_btree,
1998bb61afebSKent Overstreet "attempting to discard bucket %u:%llu not in need_discard btree",
1999052210c3SKent Overstreet ca->dev_idx, bucket))
2000bb61afebSKent Overstreet goto out;
2001a393f331SKent Overstreet
2002bb61afebSKent Overstreet ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true);
2003bb61afebSKent Overstreet out:
2004bb61afebSKent Overstreet fsck_err:
2005bb61afebSKent Overstreet bch2_trans_iter_exit(trans, &need_discard_iter);
2006a393f331SKent Overstreet return ret;
2007a393f331SKent Overstreet }
2008a393f331SKent Overstreet
bch2_do_discards_fast_work(struct work_struct * work)2009a393f331SKent Overstreet static void bch2_do_discards_fast_work(struct work_struct *work)
2010a393f331SKent Overstreet {
201164ee1431SKent Overstreet struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
201264ee1431SKent Overstreet struct bch_fs *c = ca->fs;
2013bb61afebSKent Overstreet struct discard_buckets_state s = {};
2014bb61afebSKent Overstreet struct bpos discard_pos_done = POS_MAX;
2015bb61afebSKent Overstreet struct btree_trans *trans = bch2_trans_get(c);
2016bb61afebSKent Overstreet int ret = 0;
2017a393f331SKent Overstreet
2018a393f331SKent Overstreet while (1) {
2019a393f331SKent Overstreet bool got_bucket = false;
202064ee1431SKent Overstreet u64 bucket;
2021a393f331SKent Overstreet
202264ee1431SKent Overstreet mutex_lock(&ca->discard_buckets_in_flight_lock);
202364ee1431SKent Overstreet darray_for_each(ca->discard_buckets_in_flight, i) {
202464ee1431SKent Overstreet if (i->in_progress)
2025a393f331SKent Overstreet continue;
2026a393f331SKent Overstreet
2027a393f331SKent Overstreet got_bucket = true;
202864ee1431SKent Overstreet bucket = i->bucket;
202964ee1431SKent Overstreet i->in_progress = true;
2030a393f331SKent Overstreet break;
2031a393f331SKent Overstreet }
203264ee1431SKent Overstreet mutex_unlock(&ca->discard_buckets_in_flight_lock);
2033a393f331SKent Overstreet
2034a393f331SKent Overstreet if (!got_bucket)
2035a393f331SKent Overstreet break;
2036a393f331SKent Overstreet
2037bb61afebSKent Overstreet ret = lockrestart_do(trans,
2038bb61afebSKent Overstreet bch2_do_discards_fast_one(trans, ca, bucket, &discard_pos_done, &s));
2039a393f331SKent Overstreet bch_err_fn(c, ret);
2040a393f331SKent Overstreet
204164ee1431SKent Overstreet discard_in_flight_remove(ca, bucket);
2042a393f331SKent Overstreet
2043a393f331SKent Overstreet if (ret)
2044a393f331SKent Overstreet break;
2045a393f331SKent Overstreet }
2046a393f331SKent Overstreet
20479e903352SKent Overstreet trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
2048bb61afebSKent Overstreet
2049bb61afebSKent Overstreet bch2_trans_put(trans);
2050dcffc3b1SKent Overstreet percpu_ref_put(&ca->io_ref[WRITE]);
20515a6e43afSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
2052a393f331SKent Overstreet }
2053a393f331SKent Overstreet
bch2_discard_one_bucket_fast(struct bch_dev * ca,u64 bucket)205464ee1431SKent Overstreet static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
2055a393f331SKent Overstreet {
205664ee1431SKent Overstreet struct bch_fs *c = ca->fs;
2057a393f331SKent Overstreet
205864ee1431SKent Overstreet if (discard_in_flight_add(ca, bucket, false))
205964ee1431SKent Overstreet return;
206064ee1431SKent Overstreet
20615a6e43afSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
206264ee1431SKent Overstreet return;
206364ee1431SKent Overstreet
20645a6e43afSKent Overstreet if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
20655a6e43afSKent Overstreet goto put_ref;
206664ee1431SKent Overstreet
206764ee1431SKent Overstreet if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
206864ee1431SKent Overstreet return;
206964ee1431SKent Overstreet
2070dcffc3b1SKent Overstreet percpu_ref_put(&ca->io_ref[WRITE]);
20715a6e43afSKent Overstreet put_ref:
20725a6e43afSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
2073a393f331SKent Overstreet }
2074a393f331SKent Overstreet
invalidate_one_bp(struct btree_trans * trans,struct bch_dev * ca,struct bkey_s_c_backpointer bp,struct bkey_buf * last_flushed)2075942a418cSKent Overstreet static int invalidate_one_bp(struct btree_trans *trans,
2076942a418cSKent Overstreet struct bch_dev *ca,
2077942a418cSKent Overstreet struct bkey_s_c_backpointer bp,
2078942a418cSKent Overstreet struct bkey_buf *last_flushed)
2079942a418cSKent Overstreet {
2080942a418cSKent Overstreet struct btree_iter extent_iter;
2081942a418cSKent Overstreet struct bkey_s_c extent_k =
2082942a418cSKent Overstreet bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed);
2083942a418cSKent Overstreet int ret = bkey_err(extent_k);
2084942a418cSKent Overstreet if (ret)
2085942a418cSKent Overstreet return ret;
2086942a418cSKent Overstreet
2087*39ebd748SKent Overstreet if (!extent_k.k)
2088*39ebd748SKent Overstreet return 0;
2089*39ebd748SKent Overstreet
2090942a418cSKent Overstreet struct bkey_i *n =
2091942a418cSKent Overstreet bch2_bkey_make_mut(trans, &extent_iter, &extent_k,
2092942a418cSKent Overstreet BTREE_UPDATE_internal_snapshot_node);
2093942a418cSKent Overstreet ret = PTR_ERR_OR_ZERO(n);
2094942a418cSKent Overstreet if (ret)
2095942a418cSKent Overstreet goto err;
2096942a418cSKent Overstreet
2097942a418cSKent Overstreet bch2_bkey_drop_device(bkey_i_to_s(n), ca->dev_idx);
2098942a418cSKent Overstreet err:
2099942a418cSKent Overstreet bch2_trans_iter_exit(trans, &extent_iter);
2100942a418cSKent Overstreet return ret;
2101942a418cSKent Overstreet }
2102942a418cSKent Overstreet
invalidate_one_bucket_by_bps(struct btree_trans * trans,struct bch_dev * ca,struct bpos bucket,u8 gen,struct bkey_buf * last_flushed)2103942a418cSKent Overstreet static int invalidate_one_bucket_by_bps(struct btree_trans *trans,
2104942a418cSKent Overstreet struct bch_dev *ca,
2105942a418cSKent Overstreet struct bpos bucket,
2106942a418cSKent Overstreet u8 gen,
2107942a418cSKent Overstreet struct bkey_buf *last_flushed)
2108942a418cSKent Overstreet {
2109942a418cSKent Overstreet struct bpos bp_start = bucket_pos_to_bp_start(ca, bucket);
2110942a418cSKent Overstreet struct bpos bp_end = bucket_pos_to_bp_end(ca, bucket);
2111942a418cSKent Overstreet
2112942a418cSKent Overstreet return for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
2113942a418cSKent Overstreet bp_start, bp_end, 0, k,
2114942a418cSKent Overstreet NULL, NULL,
2115942a418cSKent Overstreet BCH_WATERMARK_btree|
2116942a418cSKent Overstreet BCH_TRANS_COMMIT_no_enospc, ({
2117942a418cSKent Overstreet if (k.k->type != KEY_TYPE_backpointer)
2118942a418cSKent Overstreet continue;
2119942a418cSKent Overstreet
2120942a418cSKent Overstreet struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
2121942a418cSKent Overstreet
2122942a418cSKent Overstreet if (bp.v->bucket_gen != gen)
2123942a418cSKent Overstreet continue;
2124942a418cSKent Overstreet
2125942a418cSKent Overstreet /* filter out bps with gens that don't match */
2126942a418cSKent Overstreet
2127942a418cSKent Overstreet invalidate_one_bp(trans, ca, bp, last_flushed);
2128942a418cSKent Overstreet }));
2129942a418cSKent Overstreet }
2130942a418cSKent Overstreet
2131942a418cSKent Overstreet noinline_for_stack
invalidate_one_bucket(struct btree_trans * trans,struct bch_dev * ca,struct btree_iter * lru_iter,struct bkey_s_c lru_k,struct bkey_buf * last_flushed,s64 * nr_to_invalidate)2132d04801a0SKent Overstreet static int invalidate_one_bucket(struct btree_trans *trans,
2133942a418cSKent Overstreet struct bch_dev *ca,
213483f33d68SKent Overstreet struct btree_iter *lru_iter,
2135629a21b6SKent Overstreet struct bkey_s_c lru_k,
2136942a418cSKent Overstreet struct bkey_buf *last_flushed,
213783f33d68SKent Overstreet s64 *nr_to_invalidate)
2138caece7feSKent Overstreet {
2139caece7feSKent Overstreet struct bch_fs *c = trans->c;
21409b93596cSKent Overstreet struct printbuf buf = PRINTBUF;
2141629a21b6SKent Overstreet struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
2142942a418cSKent Overstreet struct btree_iter alloc_iter = {};
2143d04801a0SKent Overstreet int ret = 0;
2144caece7feSKent Overstreet
214583f33d68SKent Overstreet if (*nr_to_invalidate <= 0)
2146d04801a0SKent Overstreet return 1;
2147caece7feSKent Overstreet
2148629a21b6SKent Overstreet if (!bch2_dev_bucket_exists(c, bucket)) {
2149724e49c6SKent Overstreet if (fsck_err(trans, lru_entry_to_invalid_bucket,
2150724e49c6SKent Overstreet "lru key points to nonexistent device:bucket %llu:%llu",
2151724e49c6SKent Overstreet bucket.inode, bucket.offset))
2152724e49c6SKent Overstreet return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false);
2153724e49c6SKent Overstreet goto out;
2154629a21b6SKent Overstreet }
2155629a21b6SKent Overstreet
215684ddb8b9SKent Overstreet if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
215784ddb8b9SKent Overstreet return 0;
215884ddb8b9SKent Overstreet
2159942a418cSKent Overstreet struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
2160942a418cSKent Overstreet BTREE_ID_alloc, bucket,
2161942a418cSKent Overstreet BTREE_ITER_cached);
2162942a418cSKent Overstreet ret = bkey_err(alloc_k);
2163caece7feSKent Overstreet if (ret)
2164942a418cSKent Overstreet return ret;
2165942a418cSKent Overstreet
2166942a418cSKent Overstreet struct bch_alloc_v4 a_convert;
2167942a418cSKent Overstreet const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
2168caece7feSKent Overstreet
21691b30ed5fSKent Overstreet /* We expect harmless races here due to the btree write buffer: */
2170942a418cSKent Overstreet if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a))
21711b30ed5fSKent Overstreet goto out;
2172d04801a0SKent Overstreet
2173b7f648e2SKent Overstreet /*
2174b7f648e2SKent Overstreet * Impossible since alloc_lru_idx_read() only returns nonzero if the
2175b7f648e2SKent Overstreet * bucket is supposed to be on the cached bucket LRU (i.e.
2176b7f648e2SKent Overstreet * BCH_DATA_cached)
2177b7f648e2SKent Overstreet *
2178b7f648e2SKent Overstreet * bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0
2179b7f648e2SKent Overstreet */
2180942a418cSKent Overstreet BUG_ON(a->data_type != BCH_DATA_cached);
2181942a418cSKent Overstreet BUG_ON(a->dirty_sectors);
2182caece7feSKent Overstreet
2183942a418cSKent Overstreet if (!a->cached_sectors)
218438585367SKent Overstreet bch_err(c, "invalidating empty bucket, confused");
218538585367SKent Overstreet
2186942a418cSKent Overstreet unsigned cached_sectors = a->cached_sectors;
2187942a418cSKent Overstreet u8 gen = a->gen;
218838585367SKent Overstreet
2189942a418cSKent Overstreet ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed);
21901f93726eSKent Overstreet if (ret)
21911f93726eSKent Overstreet goto out;
2192d04801a0SKent Overstreet
2193674cfc26SKent Overstreet trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors);
2194d04801a0SKent Overstreet --*nr_to_invalidate;
2195caece7feSKent Overstreet out:
2196724e49c6SKent Overstreet fsck_err:
2197942a418cSKent Overstreet bch2_trans_iter_exit(trans, &alloc_iter);
21989b93596cSKent Overstreet printbuf_exit(&buf);
2199caece7feSKent Overstreet return ret;
2200caece7feSKent Overstreet }
2201caece7feSKent Overstreet
next_lru_key(struct btree_trans * trans,struct btree_iter * iter,struct bch_dev * ca,bool * wrapped)22022e9940d4SKent Overstreet static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter,
22032e9940d4SKent Overstreet struct bch_dev *ca, bool *wrapped)
22042e9940d4SKent Overstreet {
22052e9940d4SKent Overstreet struct bkey_s_c k;
22062e9940d4SKent Overstreet again:
22079180ad2eSKent Overstreet k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
22082e9940d4SKent Overstreet if (!k.k && !*wrapped) {
22099180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0));
22102e9940d4SKent Overstreet *wrapped = true;
22112e9940d4SKent Overstreet goto again;
22122e9940d4SKent Overstreet }
22132e9940d4SKent Overstreet
22142e9940d4SKent Overstreet return k;
22152e9940d4SKent Overstreet }
22162e9940d4SKent Overstreet
bch2_do_invalidates_work(struct work_struct * work)2217caece7feSKent Overstreet static void bch2_do_invalidates_work(struct work_struct *work)
2218caece7feSKent Overstreet {
221964ee1431SKent Overstreet struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
222064ee1431SKent Overstreet struct bch_fs *c = ca->fs;
22216bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c);
2222caece7feSKent Overstreet int ret = 0;
2223caece7feSKent Overstreet
2224942a418cSKent Overstreet struct bkey_buf last_flushed;
2225942a418cSKent Overstreet bch2_bkey_buf_init(&last_flushed);
2226942a418cSKent Overstreet bkey_init(&last_flushed.k->k);
2227942a418cSKent Overstreet
2228cb13f471SKent Overstreet ret = bch2_btree_write_buffer_tryflush(trans);
22291b30ed5fSKent Overstreet if (ret)
22301b30ed5fSKent Overstreet goto err;
22311b30ed5fSKent Overstreet
2232822835ffSKent Overstreet s64 nr_to_invalidate =
2233822835ffSKent Overstreet should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
22342e9940d4SKent Overstreet struct btree_iter iter;
22352e9940d4SKent Overstreet bool wrapped = false;
2236822835ffSKent Overstreet
22372e9940d4SKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
22382e9940d4SKent Overstreet lru_pos(ca->dev_idx, 0,
22392e9940d4SKent Overstreet ((bch2_current_io_time(c, READ) + U32_MAX) &
22402e9940d4SKent Overstreet LRU_TIME_MAX)), 0);
22412e9940d4SKent Overstreet
22422e9940d4SKent Overstreet while (true) {
22432e9940d4SKent Overstreet bch2_trans_begin(trans);
22442e9940d4SKent Overstreet
22452e9940d4SKent Overstreet struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
22462e9940d4SKent Overstreet ret = bkey_err(k);
22472e9940d4SKent Overstreet if (ret)
22487ee4be9cSKent Overstreet goto restart_err;
22492e9940d4SKent Overstreet if (!k.k)
22502e9940d4SKent Overstreet break;
22512e9940d4SKent Overstreet
2252942a418cSKent Overstreet ret = invalidate_one_bucket(trans, ca, &iter, k, &last_flushed, &nr_to_invalidate);
22537ee4be9cSKent Overstreet restart_err:
22547ee4be9cSKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
22557ee4be9cSKent Overstreet continue;
22562e9940d4SKent Overstreet if (ret)
22572e9940d4SKent Overstreet break;
22582e9940d4SKent Overstreet
22599180ad2eSKent Overstreet bch2_btree_iter_advance(trans, &iter);
22602e9940d4SKent Overstreet }
22612e9940d4SKent Overstreet bch2_trans_iter_exit(trans, &iter);
22621b30ed5fSKent Overstreet err:
22636bd68ec2SKent Overstreet bch2_trans_put(trans);
2264dcffc3b1SKent Overstreet percpu_ref_put(&ca->io_ref[WRITE]);
2265942a418cSKent Overstreet bch2_bkey_buf_exit(&last_flushed, c);
22665a6e43afSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
226764ee1431SKent Overstreet }
226864ee1431SKent Overstreet
bch2_dev_do_invalidates(struct bch_dev * ca)226964ee1431SKent Overstreet void bch2_dev_do_invalidates(struct bch_dev *ca)
227064ee1431SKent Overstreet {
227164ee1431SKent Overstreet struct bch_fs *c = ca->fs;
227264ee1431SKent Overstreet
22735a6e43afSKent Overstreet if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
227464ee1431SKent Overstreet return;
227564ee1431SKent Overstreet
22765a6e43afSKent Overstreet if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
22775a6e43afSKent Overstreet goto put_ref;
227864ee1431SKent Overstreet
227964ee1431SKent Overstreet if (queue_work(c->write_ref_wq, &ca->invalidate_work))
228064ee1431SKent Overstreet return;
228164ee1431SKent Overstreet
2282dcffc3b1SKent Overstreet percpu_ref_put(&ca->io_ref[WRITE]);
22835a6e43afSKent Overstreet put_ref:
22845a6e43afSKent Overstreet bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
2285caece7feSKent Overstreet }
2286caece7feSKent Overstreet
bch2_do_invalidates(struct bch_fs * c)2287caece7feSKent Overstreet void bch2_do_invalidates(struct bch_fs *c)
2288caece7feSKent Overstreet {
228964ee1431SKent Overstreet for_each_member_device(c, ca)
229064ee1431SKent Overstreet bch2_dev_do_invalidates(ca);
2291caece7feSKent Overstreet }
2292caece7feSKent Overstreet
bch2_dev_freespace_init(struct bch_fs * c,struct bch_dev * ca,u64 bucket_start,u64 bucket_end)229369d1f052SKent Overstreet int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
229469d1f052SKent Overstreet u64 bucket_start, u64 bucket_end)
2295c6b2826cSKent Overstreet {
22966bd68ec2SKent Overstreet struct btree_trans *trans = bch2_trans_get(c);
2297c6b2826cSKent Overstreet struct btree_iter iter;
2298c6b2826cSKent Overstreet struct bkey_s_c k;
2299d23124c7SKent Overstreet struct bkey hole;
230069d1f052SKent Overstreet struct bpos end = POS(ca->dev_idx, bucket_end);
2301c6b2826cSKent Overstreet struct bch_member *m;
230269d1f052SKent Overstreet unsigned long last_updated = jiffies;
2303c6b2826cSKent Overstreet int ret;
2304c6b2826cSKent Overstreet
230569d1f052SKent Overstreet BUG_ON(bucket_start > bucket_end);
230669d1f052SKent Overstreet BUG_ON(bucket_end > ca->mi.nbuckets);
230769d1f052SKent Overstreet
23086bd68ec2SKent Overstreet bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
230969d1f052SKent Overstreet POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)),
23105dd8c60eSKent Overstreet BTREE_ITER_prefetch);
2311cc65f565SKent Overstreet /*
2312cc65f565SKent Overstreet * Scan the alloc btree for every bucket on @ca, and add buckets to the
2313cc65f565SKent Overstreet * freespace/need_discard/need_gc_gens btrees as needed:
2314cc65f565SKent Overstreet */
2315cc65f565SKent Overstreet while (1) {
2316848c3ff8SChen Yufan if (time_after(jiffies, last_updated + HZ * 10)) {
2317e9b9e475SKent Overstreet bch_info(ca, "%s: currently at %llu/%llu",
2318e9b9e475SKent Overstreet __func__, iter.pos.offset, ca->mi.nbuckets);
231969d1f052SKent Overstreet last_updated = jiffies;
2320e9b9e475SKent Overstreet }
2321e9b9e475SKent Overstreet
23226bd68ec2SKent Overstreet bch2_trans_begin(trans);
2323d23124c7SKent Overstreet
2324d23124c7SKent Overstreet if (bkey_ge(iter.pos, end)) {
2325cc65f565SKent Overstreet ret = 0;
2326cc65f565SKent Overstreet break;
2327d23124c7SKent Overstreet }
2328cc65f565SKent Overstreet
23299180ad2eSKent Overstreet k = bch2_get_key_or_hole(trans, &iter, end, &hole);
2330cc65f565SKent Overstreet ret = bkey_err(k);
2331cc65f565SKent Overstreet if (ret)
2332cc65f565SKent Overstreet goto bkey_err;
2333cc65f565SKent Overstreet
2334cc65f565SKent Overstreet if (k.k->type) {
2335cc65f565SKent Overstreet /*
2336cc65f565SKent Overstreet * We process live keys in the alloc btree one at a
2337cc65f565SKent Overstreet * time:
2338cc65f565SKent Overstreet */
2339cc65f565SKent Overstreet struct bch_alloc_v4 a_convert;
2340cc65f565SKent Overstreet const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
2341cc65f565SKent Overstreet
2342267039d0SKent Overstreet ret = bch2_bucket_do_index(trans, ca, k, a, true) ?:
23436bd68ec2SKent Overstreet bch2_trans_commit(trans, NULL, NULL,
2344cb52d23eSKent Overstreet BCH_TRANS_COMMIT_no_enospc);
2345cc65f565SKent Overstreet if (ret)
2346cc65f565SKent Overstreet goto bkey_err;
2347cc65f565SKent Overstreet
23489180ad2eSKent Overstreet bch2_btree_iter_advance(trans, &iter);
2349cc65f565SKent Overstreet } else {
2350cc65f565SKent Overstreet struct bkey_i *freespace;
2351cc65f565SKent Overstreet
23526bd68ec2SKent Overstreet freespace = bch2_trans_kmalloc(trans, sizeof(*freespace));
2353cc65f565SKent Overstreet ret = PTR_ERR_OR_ZERO(freespace);
2354cc65f565SKent Overstreet if (ret)
2355cc65f565SKent Overstreet goto bkey_err;
2356cc65f565SKent Overstreet
2357cc65f565SKent Overstreet bkey_init(&freespace->k);
2358cc65f565SKent Overstreet freespace->k.type = KEY_TYPE_set;
2359d23124c7SKent Overstreet freespace->k.p = k.k->p;
2360d23124c7SKent Overstreet freespace->k.size = k.k->size;
2361cc65f565SKent Overstreet
23626bd68ec2SKent Overstreet ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
23636bd68ec2SKent Overstreet bch2_trans_commit(trans, NULL, NULL,
2364cb52d23eSKent Overstreet BCH_TRANS_COMMIT_no_enospc);
2365cc65f565SKent Overstreet if (ret)
2366cc65f565SKent Overstreet goto bkey_err;
2367cc65f565SKent Overstreet
23689180ad2eSKent Overstreet bch2_btree_iter_set_pos(trans, &iter, k.k->p);
2369cc65f565SKent Overstreet }
2370cc65f565SKent Overstreet bkey_err:
2371cc65f565SKent Overstreet if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
2372cc65f565SKent Overstreet continue;
2373cc65f565SKent Overstreet if (ret)
2374cc65f565SKent Overstreet break;
2375cc65f565SKent Overstreet }
2376cc65f565SKent Overstreet
23776bd68ec2SKent Overstreet bch2_trans_iter_exit(trans, &iter);
23786bd68ec2SKent Overstreet bch2_trans_put(trans);
2379c6b2826cSKent Overstreet
2380ca91f40fSKent Overstreet if (ret < 0) {
2381e46c181aSKent Overstreet bch_err_msg(ca, ret, "initializing free space");
2382c6b2826cSKent Overstreet return ret;
2383c6b2826cSKent Overstreet }
2384c6b2826cSKent Overstreet
2385c6b2826cSKent Overstreet mutex_lock(&c->sb_lock);
23863f7b9713SHunter Shaffer m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
2387c6b2826cSKent Overstreet SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
2388c6b2826cSKent Overstreet mutex_unlock(&c->sb_lock);
2389c6b2826cSKent Overstreet
2390ca91f40fSKent Overstreet return 0;
2391c6b2826cSKent Overstreet }
2392c6b2826cSKent Overstreet
bch2_fs_freespace_init(struct bch_fs * c)2393c6b2826cSKent Overstreet int bch2_fs_freespace_init(struct bch_fs *c)
2394c6b2826cSKent Overstreet {
2395c6b2826cSKent Overstreet int ret = 0;
2396c6b2826cSKent Overstreet bool doing_init = false;
2397c6b2826cSKent Overstreet
2398c6b2826cSKent Overstreet /*
2399c6b2826cSKent Overstreet * We can crash during the device add path, so we need to check this on
2400c6b2826cSKent Overstreet * every mount:
2401c6b2826cSKent Overstreet */
2402c6b2826cSKent Overstreet
24039fea2274SKent Overstreet for_each_member_device(c, ca) {
2404c6b2826cSKent Overstreet if (ca->mi.freespace_initialized)
2405c6b2826cSKent Overstreet continue;
2406c6b2826cSKent Overstreet
2407c6b2826cSKent Overstreet if (!doing_init) {
2408c6b2826cSKent Overstreet bch_info(c, "initializing freespace");
2409c6b2826cSKent Overstreet doing_init = true;
2410c6b2826cSKent Overstreet }
2411c6b2826cSKent Overstreet
241269d1f052SKent Overstreet ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
2413c6b2826cSKent Overstreet if (ret) {
2414f295298bSKent Overstreet bch2_dev_put(ca);
24151bb3c2a9SKent Overstreet bch_err_fn(c, ret);
2416c6b2826cSKent Overstreet return ret;
2417c6b2826cSKent Overstreet }
2418c6b2826cSKent Overstreet }
2419c6b2826cSKent Overstreet
2420c6b2826cSKent Overstreet if (doing_init) {
2421c6b2826cSKent Overstreet mutex_lock(&c->sb_lock);
2422c6b2826cSKent Overstreet bch2_write_super(c);
2423c6b2826cSKent Overstreet mutex_unlock(&c->sb_lock);
2424c6b2826cSKent Overstreet bch_verbose(c, "done initializing freespace");
2425c6b2826cSKent Overstreet }
2426c6b2826cSKent Overstreet
24271bb3c2a9SKent Overstreet return 0;
2428c6b2826cSKent Overstreet }
2429c6b2826cSKent Overstreet
24306b812f1dSKent Overstreet /* device removal */
24316b812f1dSKent Overstreet
bch2_dev_remove_alloc(struct bch_fs * c,struct bch_dev * ca)24326b812f1dSKent Overstreet int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
24336b812f1dSKent Overstreet {
24346b812f1dSKent Overstreet struct bpos start = POS(ca->dev_idx, 0);
24356b812f1dSKent Overstreet struct bpos end = POS(ca->dev_idx, U64_MAX);
24366b812f1dSKent Overstreet int ret;
24376b812f1dSKent Overstreet
24386b812f1dSKent Overstreet /*
24396b812f1dSKent Overstreet * We clear the LRU and need_discard btrees first so that we don't race
24406b812f1dSKent Overstreet * with bch2_do_invalidates() and bch2_do_discards()
24416b812f1dSKent Overstreet */
2442ad8d1f77SKent Overstreet ret = bch2_dev_remove_stripes(c, ca->dev_idx) ?:
2443ad8d1f77SKent Overstreet bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
24446b812f1dSKent Overstreet BTREE_TRIGGER_norun, NULL) ?:
24456b812f1dSKent Overstreet bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
24466b812f1dSKent Overstreet BTREE_TRIGGER_norun, NULL) ?:
24476b812f1dSKent Overstreet bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
24486b812f1dSKent Overstreet BTREE_TRIGGER_norun, NULL) ?:
24496b812f1dSKent Overstreet bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
24506b812f1dSKent Overstreet BTREE_TRIGGER_norun, NULL) ?:
24516b812f1dSKent Overstreet bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
24526b812f1dSKent Overstreet BTREE_TRIGGER_norun, NULL) ?:
2453ad8d1f77SKent Overstreet bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
2454ad8d1f77SKent Overstreet BTREE_TRIGGER_norun, NULL) ?:
24556b812f1dSKent Overstreet bch2_dev_usage_remove(c, ca->dev_idx);
24566b812f1dSKent Overstreet bch_err_msg(ca, ret, "removing dev alloc info");
24576b812f1dSKent Overstreet return ret;
24586b812f1dSKent Overstreet }
24596b812f1dSKent Overstreet
24607b3f84eaSKent Overstreet /* Bucket IO clocks: */
24617b3f84eaSKent Overstreet
__bch2_bucket_io_time_reset(struct btree_trans * trans,unsigned dev,size_t bucket_nr,int rw)24626bee2a04SKent Overstreet static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
2463f30dd860SKent Overstreet size_t bucket_nr, int rw)
2464f30dd860SKent Overstreet {
2465f30dd860SKent Overstreet struct bch_fs *c = trans->c;
24666bee2a04SKent Overstreet
246767e0dd8fSKent Overstreet struct btree_iter iter;
24686bee2a04SKent Overstreet struct bkey_i_alloc_v4 *a =
24696bee2a04SKent Overstreet bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
24706bee2a04SKent Overstreet int ret = PTR_ERR_OR_ZERO(a);
2471f30dd860SKent Overstreet if (ret)
24723d48a7f8SKent Overstreet return ret;
2473f30dd860SKent Overstreet
24746bee2a04SKent Overstreet u64 now = bch2_current_io_time(c, rw);
24753d48a7f8SKent Overstreet if (a->v.io_time[rw] == now)
2476f30dd860SKent Overstreet goto out;
2477f30dd860SKent Overstreet
24783d48a7f8SKent Overstreet a->v.io_time[rw] = now;
2479f30dd860SKent Overstreet
24803d48a7f8SKent Overstreet ret = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
2481f30dd860SKent Overstreet bch2_trans_commit(trans, NULL, NULL, 0);
2482f30dd860SKent Overstreet out:
248367e0dd8fSKent Overstreet bch2_trans_iter_exit(trans, &iter);
2484f30dd860SKent Overstreet return ret;
2485f30dd860SKent Overstreet }
2486f30dd860SKent Overstreet
bch2_bucket_io_time_reset(struct btree_trans * trans,unsigned dev,size_t bucket_nr,int rw)24876bee2a04SKent Overstreet int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
24886bee2a04SKent Overstreet size_t bucket_nr, int rw)
24896bee2a04SKent Overstreet {
24906bee2a04SKent Overstreet if (bch2_trans_relock(trans))
24916bee2a04SKent Overstreet bch2_trans_begin(trans);
24926bee2a04SKent Overstreet
24936bee2a04SKent Overstreet return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw));
24946bee2a04SKent Overstreet }
24956bee2a04SKent Overstreet
24967b3f84eaSKent Overstreet /* Startup/shutdown (ro/rw): */
24977b3f84eaSKent Overstreet
bch2_recalc_capacity(struct bch_fs * c)24987b3f84eaSKent Overstreet void bch2_recalc_capacity(struct bch_fs *c)
24997b3f84eaSKent Overstreet {
2500cb66fc5fSKent Overstreet u64 capacity = 0, reserved_sectors = 0, gc_reserve;
2501b092daddSKent Overstreet unsigned bucket_size_max = 0;
25027b3f84eaSKent Overstreet unsigned long ra_pages = 0;
25037b3f84eaSKent Overstreet
25047b3f84eaSKent Overstreet lockdep_assert_held(&c->state_lock);
25057b3f84eaSKent Overstreet
25069fea2274SKent Overstreet for_each_online_member(c, ca) {
25077b3f84eaSKent Overstreet struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
25087b3f84eaSKent Overstreet
25097b3f84eaSKent Overstreet ra_pages += bdi->ra_pages;
25107b3f84eaSKent Overstreet }
25117b3f84eaSKent Overstreet
25127b3f84eaSKent Overstreet bch2_set_ra_pages(c, ra_pages);
25137b3f84eaSKent Overstreet
2514dcffc3b1SKent Overstreet __for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
25157b3f84eaSKent Overstreet u64 dev_reserve = 0;
25167b3f84eaSKent Overstreet
25177b3f84eaSKent Overstreet /*
25187b3f84eaSKent Overstreet * We need to reserve buckets (from the number
25197b3f84eaSKent Overstreet * of currently available buckets) against
25207b3f84eaSKent Overstreet * foreground writes so that mainly copygc can
25217b3f84eaSKent Overstreet * make forward progress.
25227b3f84eaSKent Overstreet *
25237b3f84eaSKent Overstreet * We need enough to refill the various reserves
25247b3f84eaSKent Overstreet * from scratch - copygc will use its entire
25257b3f84eaSKent Overstreet * reserve all at once, then run against when
25267b3f84eaSKent Overstreet * its reserve is refilled (from the formerly
25277b3f84eaSKent Overstreet * available buckets).
25287b3f84eaSKent Overstreet *
25297b3f84eaSKent Overstreet * This reserve is just used when considering if
25307b3f84eaSKent Overstreet * allocations for foreground writes must wait -
25317b3f84eaSKent Overstreet * not -ENOSPC calculations.
25327b3f84eaSKent Overstreet */
2533f25d8215SKent Overstreet
2534f25d8215SKent Overstreet dev_reserve += ca->nr_btree_reserve * 2;
2535f25d8215SKent Overstreet dev_reserve += ca->mi.nbuckets >> 6; /* copygc reserve */
25367b3f84eaSKent Overstreet
25377b3f84eaSKent Overstreet dev_reserve += 1; /* btree write point */
25387b3f84eaSKent Overstreet dev_reserve += 1; /* copygc write point */
25397b3f84eaSKent Overstreet dev_reserve += 1; /* rebalance write point */
25407b3f84eaSKent Overstreet
25417b3f84eaSKent Overstreet dev_reserve *= ca->mi.bucket_size;
25427b3f84eaSKent Overstreet
25437b3f84eaSKent Overstreet capacity += bucket_to_sector(ca, ca->mi.nbuckets -
25447b3f84eaSKent Overstreet ca->mi.first_bucket);
25457b3f84eaSKent Overstreet
25467b3f84eaSKent Overstreet reserved_sectors += dev_reserve * 2;
2547b092daddSKent Overstreet
2548b092daddSKent Overstreet bucket_size_max = max_t(unsigned, bucket_size_max,
2549b092daddSKent Overstreet ca->mi.bucket_size);
25507b3f84eaSKent Overstreet }
25517b3f84eaSKent Overstreet
25527b3f84eaSKent Overstreet gc_reserve = c->opts.gc_reserve_bytes
25537b3f84eaSKent Overstreet ? c->opts.gc_reserve_bytes >> 9
25547b3f84eaSKent Overstreet : div64_u64(capacity * c->opts.gc_reserve_percent, 100);
25557b3f84eaSKent Overstreet
25567b3f84eaSKent Overstreet reserved_sectors = max(gc_reserve, reserved_sectors);
25577b3f84eaSKent Overstreet
25587b3f84eaSKent Overstreet reserved_sectors = min(reserved_sectors, capacity);
25597b3f84eaSKent Overstreet
256026a170aaSKent Overstreet c->reserved = reserved_sectors;
25617b3f84eaSKent Overstreet c->capacity = capacity - reserved_sectors;
25627b3f84eaSKent Overstreet
2563b092daddSKent Overstreet c->bucket_size_max = bucket_size_max;
2564b092daddSKent Overstreet
25657b3f84eaSKent Overstreet /* Wake up case someone was waiting for buckets */
25667b3f84eaSKent Overstreet closure_wake_up(&c->freelist_wait);
25677b3f84eaSKent Overstreet }
25687b3f84eaSKent Overstreet
bch2_min_rw_member_capacity(struct bch_fs * c)25691f7056b7SKent Overstreet u64 bch2_min_rw_member_capacity(struct bch_fs *c)
25701f7056b7SKent Overstreet {
25711f7056b7SKent Overstreet u64 ret = U64_MAX;
25721f7056b7SKent Overstreet
25739fea2274SKent Overstreet for_each_rw_member(c, ca)
25741f7056b7SKent Overstreet ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
25751f7056b7SKent Overstreet return ret;
25761f7056b7SKent Overstreet }
25771f7056b7SKent Overstreet
bch2_dev_has_open_write_point(struct bch_fs * c,struct bch_dev * ca)25787b3f84eaSKent Overstreet static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
25797b3f84eaSKent Overstreet {
25807b3f84eaSKent Overstreet struct open_bucket *ob;
25817b3f84eaSKent Overstreet bool ret = false;
25827b3f84eaSKent Overstreet
25837b3f84eaSKent Overstreet for (ob = c->open_buckets;
25847b3f84eaSKent Overstreet ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
25857b3f84eaSKent Overstreet ob++) {
25867b3f84eaSKent Overstreet spin_lock(&ob->lock);
25877b3f84eaSKent Overstreet if (ob->valid && !ob->on_partial_list &&
2588abe19d45SKent Overstreet ob->dev == ca->dev_idx)
25897b3f84eaSKent Overstreet ret = true;
25907b3f84eaSKent Overstreet spin_unlock(&ob->lock);
25917b3f84eaSKent Overstreet }
25927b3f84eaSKent Overstreet
25937b3f84eaSKent Overstreet return ret;
25947b3f84eaSKent Overstreet }
25957b3f84eaSKent Overstreet
25967b3f84eaSKent Overstreet /* device goes ro: */
bch2_dev_allocator_remove(struct bch_fs * c,struct bch_dev * ca)25977b3f84eaSKent Overstreet void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
25987b3f84eaSKent Overstreet {
259983ccd9b3SKent Overstreet lockdep_assert_held(&c->state_lock);
26007b3f84eaSKent Overstreet
26017b3f84eaSKent Overstreet /* First, remove device from allocation groups: */
26027b3f84eaSKent Overstreet
260383ccd9b3SKent Overstreet for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
26047b3f84eaSKent Overstreet clear_bit(ca->dev_idx, c->rw_devs[i].d);
26057b3f84eaSKent Overstreet
260683ccd9b3SKent Overstreet c->rw_devs_change_count++;
260783ccd9b3SKent Overstreet
26087b3f84eaSKent Overstreet /*
26097b3f84eaSKent Overstreet * Capacity is calculated based off of devices in allocation groups:
26107b3f84eaSKent Overstreet */
26117b3f84eaSKent Overstreet bch2_recalc_capacity(c);
26127b3f84eaSKent Overstreet
2613b40901b0SKent Overstreet bch2_open_buckets_stop(c, ca, false);
2614cd575ddfSKent Overstreet
26157b3f84eaSKent Overstreet /*
26167b3f84eaSKent Overstreet * Wake up threads that were blocked on allocation, so they can notice
26177b3f84eaSKent Overstreet * the device can no longer be removed and the capacity has changed:
26187b3f84eaSKent Overstreet */
26197b3f84eaSKent Overstreet closure_wake_up(&c->freelist_wait);
26207b3f84eaSKent Overstreet
26217b3f84eaSKent Overstreet /*
26227b3f84eaSKent Overstreet * journal_res_get() can block waiting for free space in the journal -
26237b3f84eaSKent Overstreet * it needs to notice there may not be devices to allocate from anymore:
26247b3f84eaSKent Overstreet */
26257b3f84eaSKent Overstreet wake_up(&c->journal.wait);
26267b3f84eaSKent Overstreet
26277b3f84eaSKent Overstreet /* Now wait for any in flight writes: */
26287b3f84eaSKent Overstreet
26297b3f84eaSKent Overstreet closure_wait_event(&c->open_buckets_wait,
26307b3f84eaSKent Overstreet !bch2_dev_has_open_write_point(c, ca));
26317b3f84eaSKent Overstreet }
26327b3f84eaSKent Overstreet
26337b3f84eaSKent Overstreet /* device goes rw: */
bch2_dev_allocator_add(struct bch_fs * c,struct bch_dev * ca)26347b3f84eaSKent Overstreet void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
26357b3f84eaSKent Overstreet {
263683ccd9b3SKent Overstreet lockdep_assert_held(&c->state_lock);
26377b3f84eaSKent Overstreet
263883ccd9b3SKent Overstreet for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
26397b3f84eaSKent Overstreet if (ca->mi.data_allowed & (1 << i))
26407b3f84eaSKent Overstreet set_bit(ca->dev_idx, c->rw_devs[i].d);
264183ccd9b3SKent Overstreet
264283ccd9b3SKent Overstreet c->rw_devs_change_count++;
26437b3f84eaSKent Overstreet }
26447b3f84eaSKent Overstreet
bch2_dev_allocator_background_exit(struct bch_dev * ca)264564ee1431SKent Overstreet void bch2_dev_allocator_background_exit(struct bch_dev *ca)
2646a393f331SKent Overstreet {
264764ee1431SKent Overstreet darray_exit(&ca->discard_buckets_in_flight);
264864ee1431SKent Overstreet }
264964ee1431SKent Overstreet
bch2_dev_allocator_background_init(struct bch_dev * ca)265064ee1431SKent Overstreet void bch2_dev_allocator_background_init(struct bch_dev *ca)
265164ee1431SKent Overstreet {
265264ee1431SKent Overstreet mutex_init(&ca->discard_buckets_in_flight_lock);
265364ee1431SKent Overstreet INIT_WORK(&ca->discard_work, bch2_do_discards_work);
265464ee1431SKent Overstreet INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
265564ee1431SKent Overstreet INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
2656a393f331SKent Overstreet }
2657a393f331SKent Overstreet
bch2_fs_allocator_background_init(struct bch_fs * c)2658b092daddSKent Overstreet void bch2_fs_allocator_background_init(struct bch_fs *c)
26597b3f84eaSKent Overstreet {
26607b3f84eaSKent Overstreet spin_lock_init(&c->freelist_lock);
26617b3f84eaSKent Overstreet }
2662