xref: /linux-6.15/fs/bcachefs/alloc_background.c (revision 39ebd748)
17b3f84eaSKent Overstreet // SPDX-License-Identifier: GPL-2.0
27b3f84eaSKent Overstreet #include "bcachefs.h"
37b3f84eaSKent Overstreet #include "alloc_background.h"
47b3f84eaSKent Overstreet #include "alloc_foreground.h"
5a8c752bbSKent Overstreet #include "backpointers.h"
6d39881d2SKent Overstreet #include "bkey_buf.h"
77b3f84eaSKent Overstreet #include "btree_cache.h"
87b3f84eaSKent Overstreet #include "btree_io.h"
95d20ba48SKent Overstreet #include "btree_key_cache.h"
107b3f84eaSKent Overstreet #include "btree_update.h"
117b3f84eaSKent Overstreet #include "btree_update_interior.h"
127b3f84eaSKent Overstreet #include "btree_gc.h"
131b30ed5fSKent Overstreet #include "btree_write_buffer.h"
147b3f84eaSKent Overstreet #include "buckets.h"
1521aec962SKent Overstreet #include "buckets_waiting_for_journal.h"
167b3f84eaSKent Overstreet #include "clock.h"
177b3f84eaSKent Overstreet #include "debug.h"
181d16c605SKent Overstreet #include "disk_accounting.h"
19cd575ddfSKent Overstreet #include "ec.h"
207b3f84eaSKent Overstreet #include "error.h"
21c6b2826cSKent Overstreet #include "lru.h"
22d0734356SKent Overstreet #include "recovery.h"
237b3f84eaSKent Overstreet #include "trace.h"
247f4e1d5dSKent Overstreet #include "varint.h"
257b3f84eaSKent Overstreet 
267b3f84eaSKent Overstreet #include <linux/kthread.h>
277b3f84eaSKent Overstreet #include <linux/math64.h>
287b3f84eaSKent Overstreet #include <linux/random.h>
297b3f84eaSKent Overstreet #include <linux/rculist.h>
307b3f84eaSKent Overstreet #include <linux/rcupdate.h>
317b3f84eaSKent Overstreet #include <linux/sched/task.h>
327b3f84eaSKent Overstreet #include <linux/sort.h>
33848c3ff8SChen Yufan #include <linux/jiffies.h>
347b3f84eaSKent Overstreet 
3564ee1431SKent Overstreet static void bch2_discard_one_bucket_fast(struct bch_dev *, u64);
36a393f331SKent Overstreet 
373d48a7f8SKent Overstreet /* Persistent alloc info: */
383d48a7f8SKent Overstreet 
397f4e1d5dSKent Overstreet static const unsigned BCH_ALLOC_V1_FIELD_BYTES[] = {
407f4e1d5dSKent Overstreet #define x(name, bits) [BCH_ALLOC_FIELD_V1_##name] = bits / 8,
417f4e1d5dSKent Overstreet 	BCH_ALLOC_FIELDS_V1()
4290541a74SKent Overstreet #undef x
4390541a74SKent Overstreet };
4490541a74SKent Overstreet 
453d48a7f8SKent Overstreet struct bkey_alloc_unpacked {
463d48a7f8SKent Overstreet 	u64		journal_seq;
473d48a7f8SKent Overstreet 	u8		gen;
483d48a7f8SKent Overstreet 	u8		oldest_gen;
493d48a7f8SKent Overstreet 	u8		data_type;
503d48a7f8SKent Overstreet 	bool		need_discard:1;
513d48a7f8SKent Overstreet 	bool		need_inc_gen:1;
523d48a7f8SKent Overstreet #define x(_name, _bits)	u##_bits _name;
533d48a7f8SKent Overstreet 	BCH_ALLOC_FIELDS_V2()
543d48a7f8SKent Overstreet #undef  x
553d48a7f8SKent Overstreet };
567b3f84eaSKent Overstreet 
alloc_field_v1_get(const struct bch_alloc * a,const void ** p,unsigned field)577f4e1d5dSKent Overstreet static inline u64 alloc_field_v1_get(const struct bch_alloc *a,
5890541a74SKent Overstreet 				     const void **p, unsigned field)
5990541a74SKent Overstreet {
607f4e1d5dSKent Overstreet 	unsigned bytes = BCH_ALLOC_V1_FIELD_BYTES[field];
6190541a74SKent Overstreet 	u64 v;
6290541a74SKent Overstreet 
6390541a74SKent Overstreet 	if (!(a->fields & (1 << field)))
6490541a74SKent Overstreet 		return 0;
6590541a74SKent Overstreet 
6690541a74SKent Overstreet 	switch (bytes) {
6790541a74SKent Overstreet 	case 1:
6890541a74SKent Overstreet 		v = *((const u8 *) *p);
6990541a74SKent Overstreet 		break;
7090541a74SKent Overstreet 	case 2:
7190541a74SKent Overstreet 		v = le16_to_cpup(*p);
7290541a74SKent Overstreet 		break;
7390541a74SKent Overstreet 	case 4:
7490541a74SKent Overstreet 		v = le32_to_cpup(*p);
7590541a74SKent Overstreet 		break;
7690541a74SKent Overstreet 	case 8:
7790541a74SKent Overstreet 		v = le64_to_cpup(*p);
7890541a74SKent Overstreet 		break;
7990541a74SKent Overstreet 	default:
8090541a74SKent Overstreet 		BUG();
8190541a74SKent Overstreet 	}
8290541a74SKent Overstreet 
8390541a74SKent Overstreet 	*p += bytes;
8490541a74SKent Overstreet 	return v;
8590541a74SKent Overstreet }
8690541a74SKent Overstreet 
bch2_alloc_unpack_v1(struct bkey_alloc_unpacked * out,struct bkey_s_c k)877f4e1d5dSKent Overstreet static void bch2_alloc_unpack_v1(struct bkey_alloc_unpacked *out,
887f4e1d5dSKent Overstreet 				 struct bkey_s_c k)
898fe826f9SKent Overstreet {
907f4e1d5dSKent Overstreet 	const struct bch_alloc *in = bkey_s_c_to_alloc(k).v;
917f4e1d5dSKent Overstreet 	const void *d = in->data;
928fe826f9SKent Overstreet 	unsigned idx = 0;
938fe826f9SKent Overstreet 
947f4e1d5dSKent Overstreet 	out->gen = in->gen;
95460651eeSKent Overstreet 
967f4e1d5dSKent Overstreet #define x(_name, _bits) out->_name = alloc_field_v1_get(in, &d, idx++);
977f4e1d5dSKent Overstreet 	BCH_ALLOC_FIELDS_V1()
988fe826f9SKent Overstreet #undef  x
99460651eeSKent Overstreet }
1007f4e1d5dSKent Overstreet 
bch2_alloc_unpack_v2(struct bkey_alloc_unpacked * out,struct bkey_s_c k)1017f4e1d5dSKent Overstreet static int bch2_alloc_unpack_v2(struct bkey_alloc_unpacked *out,
1027f4e1d5dSKent Overstreet 				struct bkey_s_c k)
1037f4e1d5dSKent Overstreet {
1047f4e1d5dSKent Overstreet 	struct bkey_s_c_alloc_v2 a = bkey_s_c_to_alloc_v2(k);
1057f4e1d5dSKent Overstreet 	const u8 *in = a.v->data;
1067f4e1d5dSKent Overstreet 	const u8 *end = bkey_val_end(a);
1077f4e1d5dSKent Overstreet 	unsigned fieldnr = 0;
1087f4e1d5dSKent Overstreet 	int ret;
1097f4e1d5dSKent Overstreet 	u64 v;
1107f4e1d5dSKent Overstreet 
1117f4e1d5dSKent Overstreet 	out->gen	= a.v->gen;
1127f4e1d5dSKent Overstreet 	out->oldest_gen	= a.v->oldest_gen;
1137f4e1d5dSKent Overstreet 	out->data_type	= a.v->data_type;
1147f4e1d5dSKent Overstreet 
1157f4e1d5dSKent Overstreet #define x(_name, _bits)							\
1167f4e1d5dSKent Overstreet 	if (fieldnr < a.v->nr_fields) {					\
1178d344587SKent Overstreet 		ret = bch2_varint_decode_fast(in, end, &v);		\
1187f4e1d5dSKent Overstreet 		if (ret < 0)						\
1197f4e1d5dSKent Overstreet 			return ret;					\
1207f4e1d5dSKent Overstreet 		in += ret;						\
1217f4e1d5dSKent Overstreet 	} else {							\
1227f4e1d5dSKent Overstreet 		v = 0;							\
1237f4e1d5dSKent Overstreet 	}								\
1247f4e1d5dSKent Overstreet 	out->_name = v;							\
1257f4e1d5dSKent Overstreet 	if (v != out->_name)						\
1267f4e1d5dSKent Overstreet 		return -1;						\
1277f4e1d5dSKent Overstreet 	fieldnr++;
1287f4e1d5dSKent Overstreet 
1297f4e1d5dSKent Overstreet 	BCH_ALLOC_FIELDS_V2()
1307f4e1d5dSKent Overstreet #undef  x
1317f4e1d5dSKent Overstreet 	return 0;
1327f4e1d5dSKent Overstreet }
1337f4e1d5dSKent Overstreet 
bch2_alloc_unpack_v3(struct bkey_alloc_unpacked * out,struct bkey_s_c k)1343e52c222SKent Overstreet static int bch2_alloc_unpack_v3(struct bkey_alloc_unpacked *out,
1353e52c222SKent Overstreet 				struct bkey_s_c k)
1363e52c222SKent Overstreet {
1373e52c222SKent Overstreet 	struct bkey_s_c_alloc_v3 a = bkey_s_c_to_alloc_v3(k);
1383e52c222SKent Overstreet 	const u8 *in = a.v->data;
1393e52c222SKent Overstreet 	const u8 *end = bkey_val_end(a);
1403e52c222SKent Overstreet 	unsigned fieldnr = 0;
1413e52c222SKent Overstreet 	int ret;
1423e52c222SKent Overstreet 	u64 v;
1433e52c222SKent Overstreet 
1443e52c222SKent Overstreet 	out->gen	= a.v->gen;
1453e52c222SKent Overstreet 	out->oldest_gen	= a.v->oldest_gen;
1463e52c222SKent Overstreet 	out->data_type	= a.v->data_type;
1473d48a7f8SKent Overstreet 	out->need_discard = BCH_ALLOC_V3_NEED_DISCARD(a.v);
1483d48a7f8SKent Overstreet 	out->need_inc_gen = BCH_ALLOC_V3_NEED_INC_GEN(a.v);
1493e52c222SKent Overstreet 	out->journal_seq = le64_to_cpu(a.v->journal_seq);
1503e52c222SKent Overstreet 
1513e52c222SKent Overstreet #define x(_name, _bits)							\
1523e52c222SKent Overstreet 	if (fieldnr < a.v->nr_fields) {					\
1533e52c222SKent Overstreet 		ret = bch2_varint_decode_fast(in, end, &v);		\
1543e52c222SKent Overstreet 		if (ret < 0)						\
1553e52c222SKent Overstreet 			return ret;					\
1563e52c222SKent Overstreet 		in += ret;						\
1573e52c222SKent Overstreet 	} else {							\
1583e52c222SKent Overstreet 		v = 0;							\
1593e52c222SKent Overstreet 	}								\
1603e52c222SKent Overstreet 	out->_name = v;							\
1613e52c222SKent Overstreet 	if (v != out->_name)						\
1623e52c222SKent Overstreet 		return -1;						\
1633e52c222SKent Overstreet 	fieldnr++;
1643e52c222SKent Overstreet 
1653e52c222SKent Overstreet 	BCH_ALLOC_FIELDS_V2()
1663e52c222SKent Overstreet #undef  x
1673e52c222SKent Overstreet 	return 0;
1683e52c222SKent Overstreet }
1693e52c222SKent Overstreet 
bch2_alloc_unpack(struct bkey_s_c k)1703d48a7f8SKent Overstreet static struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c k)
1717f4e1d5dSKent Overstreet {
17219a614d2SKent Overstreet 	struct bkey_alloc_unpacked ret = { .gen	= 0 };
1737f4e1d5dSKent Overstreet 
1743e52c222SKent Overstreet 	switch (k.k->type) {
1753e52c222SKent Overstreet 	case KEY_TYPE_alloc:
1767f4e1d5dSKent Overstreet 		bch2_alloc_unpack_v1(&ret, k);
1773e52c222SKent Overstreet 		break;
1783e52c222SKent Overstreet 	case KEY_TYPE_alloc_v2:
1793e52c222SKent Overstreet 		bch2_alloc_unpack_v2(&ret, k);
1803e52c222SKent Overstreet 		break;
1813e52c222SKent Overstreet 	case KEY_TYPE_alloc_v3:
1823e52c222SKent Overstreet 		bch2_alloc_unpack_v3(&ret, k);
1833e52c222SKent Overstreet 		break;
1843e52c222SKent Overstreet 	}
1857f4e1d5dSKent Overstreet 
1868fe826f9SKent Overstreet 	return ret;
1878fe826f9SKent Overstreet }
1888fe826f9SKent Overstreet 
bch_alloc_v1_val_u64s(const struct bch_alloc * a)1896ad060b0SKent Overstreet static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a)
1907b3f84eaSKent Overstreet {
19190541a74SKent Overstreet 	unsigned i, bytes = offsetof(struct bch_alloc, data);
1927b3f84eaSKent Overstreet 
1937f4e1d5dSKent Overstreet 	for (i = 0; i < ARRAY_SIZE(BCH_ALLOC_V1_FIELD_BYTES); i++)
19490541a74SKent Overstreet 		if (a->fields & (1 << i))
1957f4e1d5dSKent Overstreet 			bytes += BCH_ALLOC_V1_FIELD_BYTES[i];
1967b3f84eaSKent Overstreet 
1977b3f84eaSKent Overstreet 	return DIV_ROUND_UP(bytes, sizeof(u64));
1987b3f84eaSKent Overstreet }
1997b3f84eaSKent Overstreet 
bch2_alloc_v1_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)200d97de0d0SKent Overstreet int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k,
201a6f4794fSKent Overstreet 			   struct bkey_validate_context from)
2027b3f84eaSKent Overstreet {
20326609b61SKent Overstreet 	struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k);
204b65db750SKent Overstreet 	int ret = 0;
20526609b61SKent Overstreet 
206e8897337SKent Overstreet 	/* allow for unknown fields */
207d97de0d0SKent Overstreet 	bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v),
208d97de0d0SKent Overstreet 			 c, alloc_v1_val_size_bad,
209b65db750SKent Overstreet 			 "incorrect value size (%zu < %u)",
210f0ac7df2SKent Overstreet 			 bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v));
211b65db750SKent Overstreet fsck_err:
212b65db750SKent Overstreet 	return ret;
2137b3f84eaSKent Overstreet }
2147b3f84eaSKent Overstreet 
bch2_alloc_v2_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)215d97de0d0SKent Overstreet int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k,
216a6f4794fSKent Overstreet 			   struct bkey_validate_context from)
2177f4e1d5dSKent Overstreet {
2187f4e1d5dSKent Overstreet 	struct bkey_alloc_unpacked u;
219b65db750SKent Overstreet 	int ret = 0;
2207f4e1d5dSKent Overstreet 
221d97de0d0SKent Overstreet 	bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k),
222d97de0d0SKent Overstreet 			 c, alloc_v2_unpack_error,
223b65db750SKent Overstreet 			 "unpack error");
224b65db750SKent Overstreet fsck_err:
225b65db750SKent Overstreet 	return ret;
226f0ac7df2SKent Overstreet }
227f0ac7df2SKent Overstreet 
bch2_alloc_v3_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)228d97de0d0SKent Overstreet int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k,
229a6f4794fSKent Overstreet 			   struct bkey_validate_context from)
2303e52c222SKent Overstreet {
2313e52c222SKent Overstreet 	struct bkey_alloc_unpacked u;
232b65db750SKent Overstreet 	int ret = 0;
2333e52c222SKent Overstreet 
234d97de0d0SKent Overstreet 	bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k),
235bafd41b4SThorsten Blum 			 c, alloc_v3_unpack_error,
236b65db750SKent Overstreet 			 "unpack error");
237b65db750SKent Overstreet fsck_err:
238b65db750SKent Overstreet 	return ret;
239f0ac7df2SKent Overstreet }
240f0ac7df2SKent Overstreet 
bch2_alloc_v4_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)241d97de0d0SKent Overstreet int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k,
242a6f4794fSKent Overstreet 			   struct bkey_validate_context from)
2437b3f84eaSKent Overstreet {
2448ed823b1SKent Overstreet 	struct bch_alloc_v4 a;
245b65db750SKent Overstreet 	int ret = 0;
24611c7d3e8SKent Overstreet 
2478ed823b1SKent Overstreet 	bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k));
2488ed823b1SKent Overstreet 
2498ed823b1SKent Overstreet 	bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k),
250d97de0d0SKent Overstreet 			 c, alloc_v4_val_size_bad,
251b65db750SKent Overstreet 			 "bad val size (%u > %zu)",
2528ed823b1SKent Overstreet 			 alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k));
2533d48a7f8SKent Overstreet 
2548ed823b1SKent Overstreet 	bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) &&
2558ed823b1SKent Overstreet 			 BCH_ALLOC_V4_NR_BACKPOINTERS(&a),
256d97de0d0SKent Overstreet 			 c, alloc_v4_backpointers_start_bad,
257b65db750SKent Overstreet 			 "invalid backpointers_start");
258a8c752bbSKent Overstreet 
2598ed823b1SKent Overstreet 	bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type,
260d97de0d0SKent Overstreet 			 c, alloc_key_data_type_bad,
261b65db750SKent Overstreet 			 "invalid data type (got %u should be %u)",
2628ed823b1SKent Overstreet 			 a.data_type, alloc_data_type(a, a.data_type));
26362491956SKent Overstreet 
264cff07e27SKent Overstreet 	for (unsigned i = 0; i < 2; i++)
2658ed823b1SKent Overstreet 		bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX,
266d97de0d0SKent Overstreet 				 c, alloc_key_io_time_bad,
267cff07e27SKent Overstreet 				 "invalid io_time[%s]: %llu, max %llu",
268cff07e27SKent Overstreet 				 i == READ ? "read" : "write",
2698ed823b1SKent Overstreet 				 a.io_time[i], LRU_TIME_MAX);
270cff07e27SKent Overstreet 
2718ed823b1SKent Overstreet 	unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) >
27255f7962dSKent Overstreet 		offsetof(struct bch_alloc_v4, stripe_sectors)
2738ed823b1SKent Overstreet 		? a.stripe_sectors
27455f7962dSKent Overstreet 		: 0;
27555f7962dSKent Overstreet 
2768ed823b1SKent Overstreet 	switch (a.data_type) {
277822835ffSKent Overstreet 	case BCH_DATA_free:
278822835ffSKent Overstreet 	case BCH_DATA_need_gc_gens:
279822835ffSKent Overstreet 	case BCH_DATA_need_discard:
28055f7962dSKent Overstreet 		bkey_fsck_err_on(stripe_sectors ||
2818ed823b1SKent Overstreet 				 a.dirty_sectors ||
2828ed823b1SKent Overstreet 				 a.cached_sectors ||
2838ed823b1SKent Overstreet 				 a.stripe,
284d97de0d0SKent Overstreet 				 c, alloc_key_empty_but_have_data,
28555f7962dSKent Overstreet 				 "empty data type free but have data %u.%u.%u %u",
28655f7962dSKent Overstreet 				 stripe_sectors,
2878ed823b1SKent Overstreet 				 a.dirty_sectors,
2888ed823b1SKent Overstreet 				 a.cached_sectors,
2898ed823b1SKent Overstreet 				 a.stripe);
290822835ffSKent Overstreet 		break;
291822835ffSKent Overstreet 	case BCH_DATA_sb:
292822835ffSKent Overstreet 	case BCH_DATA_journal:
293822835ffSKent Overstreet 	case BCH_DATA_btree:
294822835ffSKent Overstreet 	case BCH_DATA_user:
295822835ffSKent Overstreet 	case BCH_DATA_parity:
2968ed823b1SKent Overstreet 		bkey_fsck_err_on(!a.dirty_sectors &&
29755f7962dSKent Overstreet 				 !stripe_sectors,
298d97de0d0SKent Overstreet 				 c, alloc_key_dirty_sectors_0,
299b65db750SKent Overstreet 				 "data_type %s but dirty_sectors==0",
3008ed823b1SKent Overstreet 				 bch2_data_type_str(a.data_type));
301822835ffSKent Overstreet 		break;
302822835ffSKent Overstreet 	case BCH_DATA_cached:
3038ed823b1SKent Overstreet 		bkey_fsck_err_on(!a.cached_sectors ||
3048ed823b1SKent Overstreet 				 a.dirty_sectors ||
30555f7962dSKent Overstreet 				 stripe_sectors ||
3068ed823b1SKent Overstreet 				 a.stripe,
307d97de0d0SKent Overstreet 				 c, alloc_key_cached_inconsistency,
308b65db750SKent Overstreet 				 "data type inconsistency");
309822835ffSKent Overstreet 
3108ed823b1SKent Overstreet 		bkey_fsck_err_on(!a.io_time[READ] &&
311b65db750SKent Overstreet 				 c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs,
312d97de0d0SKent Overstreet 				 c, alloc_key_cached_but_read_time_zero,
313b65db750SKent Overstreet 				 "cached bucket with read_time == 0");
314822835ffSKent Overstreet 		break;
315822835ffSKent Overstreet 	case BCH_DATA_stripe:
316822835ffSKent Overstreet 		break;
317822835ffSKent Overstreet 	}
318b65db750SKent Overstreet fsck_err:
319b65db750SKent Overstreet 	return ret;
3203d48a7f8SKent Overstreet }
3213d48a7f8SKent Overstreet 
bch2_alloc_v4_swab(struct bkey_s k)3223d48a7f8SKent Overstreet void bch2_alloc_v4_swab(struct bkey_s k)
3233d48a7f8SKent Overstreet {
3243d48a7f8SKent Overstreet 	struct bch_alloc_v4 *a = bkey_s_to_alloc_v4(k).v;
3253d48a7f8SKent Overstreet 
3269e779f3fSKent Overstreet 	a->journal_seq_nonempty	= swab64(a->journal_seq_nonempty);
3279e779f3fSKent Overstreet 	a->journal_seq_empty	= swab64(a->journal_seq_empty);
3283d48a7f8SKent Overstreet 	a->flags		= swab32(a->flags);
3293d48a7f8SKent Overstreet 	a->dirty_sectors	= swab32(a->dirty_sectors);
3303d48a7f8SKent Overstreet 	a->cached_sectors	= swab32(a->cached_sectors);
3313d48a7f8SKent Overstreet 	a->io_time[0]		= swab64(a->io_time[0]);
3323d48a7f8SKent Overstreet 	a->io_time[1]		= swab64(a->io_time[1]);
3333d48a7f8SKent Overstreet 	a->stripe		= swab32(a->stripe);
3343d48a7f8SKent Overstreet 	a->nr_external_backpointers = swab32(a->nr_external_backpointers);
33555f7962dSKent Overstreet 	a->stripe_sectors	= swab32(a->stripe_sectors);
3363d48a7f8SKent Overstreet }
3373d48a7f8SKent Overstreet 
bch2_alloc_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)3383d48a7f8SKent Overstreet void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
3393d48a7f8SKent Overstreet {
34019a614d2SKent Overstreet 	struct bch_alloc_v4 _a;
34119a614d2SKent Overstreet 	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
342260af156SKent Overstreet 	struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL;
3433d48a7f8SKent Overstreet 
34419a614d2SKent Overstreet 	prt_newline(out);
34519a614d2SKent Overstreet 	printbuf_indent_add(out, 2);
3463d48a7f8SKent Overstreet 
347e58f963cSKent Overstreet 	prt_printf(out, "gen %u oldest_gen %u data_type ", a->gen, a->oldest_gen);
348e58f963cSKent Overstreet 	bch2_prt_data_type(out, a->data_type);
34919a614d2SKent Overstreet 	prt_newline(out);
3509e779f3fSKent Overstreet 	prt_printf(out, "journal_seq_nonempty %llu\n",	a->journal_seq_nonempty);
3519e779f3fSKent Overstreet 	prt_printf(out, "journal_seq_empty    %llu\n",	a->journal_seq_empty);
3527423330eSKent Overstreet 	prt_printf(out, "need_discard         %llu\n",	BCH_ALLOC_V4_NEED_DISCARD(a));
3537423330eSKent Overstreet 	prt_printf(out, "need_inc_gen         %llu\n",	BCH_ALLOC_V4_NEED_INC_GEN(a));
3547423330eSKent Overstreet 	prt_printf(out, "dirty_sectors        %u\n",	a->dirty_sectors);
35555f7962dSKent Overstreet 	prt_printf(out, "stripe_sectors       %u\n",	a->stripe_sectors);
3567423330eSKent Overstreet 	prt_printf(out, "cached_sectors       %u\n",	a->cached_sectors);
3577423330eSKent Overstreet 	prt_printf(out, "stripe               %u\n",	a->stripe);
3587423330eSKent Overstreet 	prt_printf(out, "stripe_redundancy    %u\n",	a->stripe_redundancy);
3597423330eSKent Overstreet 	prt_printf(out, "io_time[READ]        %llu\n",	a->io_time[READ]);
3607423330eSKent Overstreet 	prt_printf(out, "io_time[WRITE]       %llu\n",	a->io_time[WRITE]);
361260af156SKent Overstreet 
362260af156SKent Overstreet 	if (ca)
363260af156SKent Overstreet 		prt_printf(out, "fragmentation     %llu\n",	alloc_lru_idx_fragmentation(*a, ca));
3647423330eSKent Overstreet 	prt_printf(out, "bp_start          %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a));
365a8c752bbSKent Overstreet 	printbuf_indent_sub(out, 2);
366260af156SKent Overstreet 
367260af156SKent Overstreet 	bch2_dev_put(ca);
36819a614d2SKent Overstreet }
36919a614d2SKent Overstreet 
__bch2_alloc_to_v4(struct bkey_s_c k,struct bch_alloc_v4 * out)37019a614d2SKent Overstreet void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out)
37119a614d2SKent Overstreet {
37219a614d2SKent Overstreet 	if (k.k->type == KEY_TYPE_alloc_v4) {
37319a614d2SKent Overstreet 		void *src, *dst;
37419a614d2SKent Overstreet 
37519a614d2SKent Overstreet 		*out = *bkey_s_c_to_alloc_v4(k).v;
37619a614d2SKent Overstreet 
37719a614d2SKent Overstreet 		src = alloc_v4_backpointers(out);
37819a614d2SKent Overstreet 		SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
37919a614d2SKent Overstreet 		dst = alloc_v4_backpointers(out);
38019a614d2SKent Overstreet 
38119a614d2SKent Overstreet 		if (src < dst)
38219a614d2SKent Overstreet 			memset(src, 0, dst - src);
38362a03559SKent Overstreet 
38462a03559SKent Overstreet 		SET_BCH_ALLOC_V4_NR_BACKPOINTERS(out, 0);
38519a614d2SKent Overstreet 	} else {
38619a614d2SKent Overstreet 		struct bkey_alloc_unpacked u = bch2_alloc_unpack(k);
38719a614d2SKent Overstreet 
38819a614d2SKent Overstreet 		*out = (struct bch_alloc_v4) {
3899e779f3fSKent Overstreet 			.journal_seq_nonempty	= u.journal_seq,
39019a614d2SKent Overstreet 			.flags			= u.need_discard,
39119a614d2SKent Overstreet 			.gen			= u.gen,
39219a614d2SKent Overstreet 			.oldest_gen		= u.oldest_gen,
39319a614d2SKent Overstreet 			.data_type		= u.data_type,
39419a614d2SKent Overstreet 			.stripe_redundancy	= u.stripe_redundancy,
39519a614d2SKent Overstreet 			.dirty_sectors		= u.dirty_sectors,
39619a614d2SKent Overstreet 			.cached_sectors		= u.cached_sectors,
39719a614d2SKent Overstreet 			.io_time[READ]		= u.read_time,
39819a614d2SKent Overstreet 			.io_time[WRITE]		= u.write_time,
39919a614d2SKent Overstreet 			.stripe			= u.stripe,
40019a614d2SKent Overstreet 		};
40119a614d2SKent Overstreet 
40219a614d2SKent Overstreet 		SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s);
40319a614d2SKent Overstreet 	}
40419a614d2SKent Overstreet }
40519a614d2SKent Overstreet 
40619a614d2SKent Overstreet static noinline struct bkey_i_alloc_v4 *
__bch2_alloc_to_v4_mut(struct btree_trans * trans,struct bkey_s_c k)40719a614d2SKent Overstreet __bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
40819a614d2SKent Overstreet {
40919a614d2SKent Overstreet 	struct bkey_i_alloc_v4 *ret;
41019a614d2SKent Overstreet 
411615fccadSKent Overstreet 	ret = bch2_trans_kmalloc(trans, max(bkey_bytes(k.k), sizeof(struct bkey_i_alloc_v4)));
41219a614d2SKent Overstreet 	if (IS_ERR(ret))
41319a614d2SKent Overstreet 		return ret;
41419a614d2SKent Overstreet 
41562a03559SKent Overstreet 	if (k.k->type == KEY_TYPE_alloc_v4) {
41662a03559SKent Overstreet 		void *src, *dst;
41762a03559SKent Overstreet 
41819a614d2SKent Overstreet 		bkey_reassemble(&ret->k_i, k);
41919a614d2SKent Overstreet 
42019a614d2SKent Overstreet 		src = alloc_v4_backpointers(&ret->v);
42119a614d2SKent Overstreet 		SET_BCH_ALLOC_V4_BACKPOINTERS_START(&ret->v, BCH_ALLOC_V4_U64s);
42219a614d2SKent Overstreet 		dst = alloc_v4_backpointers(&ret->v);
42319a614d2SKent Overstreet 
42419a614d2SKent Overstreet 		if (src < dst)
42519a614d2SKent Overstreet 			memset(src, 0, dst - src);
42662a03559SKent Overstreet 
42762a03559SKent Overstreet 		SET_BCH_ALLOC_V4_NR_BACKPOINTERS(&ret->v, 0);
42819a614d2SKent Overstreet 		set_alloc_v4_u64s(ret);
42919a614d2SKent Overstreet 	} else {
43019a614d2SKent Overstreet 		bkey_alloc_v4_init(&ret->k_i);
43119a614d2SKent Overstreet 		ret->k.p = k.k->p;
43219a614d2SKent Overstreet 		bch2_alloc_to_v4(k, &ret->v);
43319a614d2SKent Overstreet 	}
43419a614d2SKent Overstreet 	return ret;
43519a614d2SKent Overstreet }
43619a614d2SKent Overstreet 
bch2_alloc_to_v4_mut_inlined(struct btree_trans * trans,struct bkey_s_c k)43719a614d2SKent Overstreet static inline struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut_inlined(struct btree_trans *trans, struct bkey_s_c k)
43819a614d2SKent Overstreet {
43962a03559SKent Overstreet 	struct bkey_s_c_alloc_v4 a;
44062a03559SKent Overstreet 
44119a614d2SKent Overstreet 	if (likely(k.k->type == KEY_TYPE_alloc_v4) &&
44262a03559SKent Overstreet 	    ((a = bkey_s_c_to_alloc_v4(k), true) &&
44334dfa5dbSKent Overstreet 	     BCH_ALLOC_V4_NR_BACKPOINTERS(a.v) == 0))
444dbda63bbSKent Overstreet 		return bch2_bkey_make_mut_noupdate_typed(trans, k, alloc_v4);
44519a614d2SKent Overstreet 
44619a614d2SKent Overstreet 	return __bch2_alloc_to_v4_mut(trans, k);
44719a614d2SKent Overstreet }
44819a614d2SKent Overstreet 
bch2_alloc_to_v4_mut(struct btree_trans * trans,struct bkey_s_c k)44919a614d2SKent Overstreet struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *trans, struct bkey_s_c k)
45019a614d2SKent Overstreet {
45119a614d2SKent Overstreet 	return bch2_alloc_to_v4_mut_inlined(trans, k);
45219a614d2SKent Overstreet }
45319a614d2SKent Overstreet 
45419a614d2SKent Overstreet struct bkey_i_alloc_v4 *
bch2_trans_start_alloc_update_noupdate(struct btree_trans * trans,struct btree_iter * iter,struct bpos pos)455abe2f470SKent Overstreet bch2_trans_start_alloc_update_noupdate(struct btree_trans *trans, struct btree_iter *iter,
45619a614d2SKent Overstreet 				       struct bpos pos)
45719a614d2SKent Overstreet {
458abe2f470SKent Overstreet 	struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_alloc, pos,
4595dd8c60eSKent Overstreet 					       BTREE_ITER_with_updates|
4605dd8c60eSKent Overstreet 					       BTREE_ITER_cached|
4615dd8c60eSKent Overstreet 					       BTREE_ITER_intent);
462abe2f470SKent Overstreet 	int ret = bkey_err(k);
46319a614d2SKent Overstreet 	if (unlikely(ret))
464bcb79a51SKent Overstreet 		return ERR_PTR(ret);
46519a614d2SKent Overstreet 
466abe2f470SKent Overstreet 	struct bkey_i_alloc_v4 *a = bch2_alloc_to_v4_mut_inlined(trans, k);
46719a614d2SKent Overstreet 	ret = PTR_ERR_OR_ZERO(a);
46819a614d2SKent Overstreet 	if (unlikely(ret))
46919a614d2SKent Overstreet 		goto err;
47019a614d2SKent Overstreet 	return a;
47119a614d2SKent Overstreet err:
47219a614d2SKent Overstreet 	bch2_trans_iter_exit(trans, iter);
47319a614d2SKent Overstreet 	return ERR_PTR(ret);
4747b3f84eaSKent Overstreet }
4757b3f84eaSKent Overstreet 
476abe2f470SKent Overstreet __flatten
bch2_trans_start_alloc_update(struct btree_trans * trans,struct bpos pos,enum btree_iter_update_trigger_flags flags)477e0d5bc6aSKent Overstreet struct bkey_i_alloc_v4 *bch2_trans_start_alloc_update(struct btree_trans *trans, struct bpos pos,
478e0d5bc6aSKent Overstreet 						      enum btree_iter_update_trigger_flags flags)
479abe2f470SKent Overstreet {
480abe2f470SKent Overstreet 	struct btree_iter iter;
481abe2f470SKent Overstreet 	struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update_noupdate(trans, &iter, pos);
482abe2f470SKent Overstreet 	int ret = PTR_ERR_OR_ZERO(a);
483abe2f470SKent Overstreet 	if (ret)
484abe2f470SKent Overstreet 		return ERR_PTR(ret);
485abe2f470SKent Overstreet 
486e0d5bc6aSKent Overstreet 	ret = bch2_trans_update(trans, &iter, &a->k_i, flags);
487abe2f470SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
488abe2f470SKent Overstreet 	return unlikely(ret) ? ERR_PTR(ret) : a;
489abe2f470SKent Overstreet }
490abe2f470SKent Overstreet 
alloc_gens_pos(struct bpos pos,unsigned * offset)4915250b74dSKent Overstreet static struct bpos alloc_gens_pos(struct bpos pos, unsigned *offset)
4925250b74dSKent Overstreet {
4935250b74dSKent Overstreet 	*offset = pos.offset & KEY_TYPE_BUCKET_GENS_MASK;
4945250b74dSKent Overstreet 
4955250b74dSKent Overstreet 	pos.offset >>= KEY_TYPE_BUCKET_GENS_BITS;
4965250b74dSKent Overstreet 	return pos;
4975250b74dSKent Overstreet }
4985250b74dSKent Overstreet 
bucket_gens_pos_to_alloc(struct bpos pos,unsigned offset)4995250b74dSKent Overstreet static struct bpos bucket_gens_pos_to_alloc(struct bpos pos, unsigned offset)
5005250b74dSKent Overstreet {
5015250b74dSKent Overstreet 	pos.offset <<= KEY_TYPE_BUCKET_GENS_BITS;
5025250b74dSKent Overstreet 	pos.offset += offset;
5035250b74dSKent Overstreet 	return pos;
5045250b74dSKent Overstreet }
5055250b74dSKent Overstreet 
alloc_gen(struct bkey_s_c k,unsigned offset)5065250b74dSKent Overstreet static unsigned alloc_gen(struct bkey_s_c k, unsigned offset)
5075250b74dSKent Overstreet {
5085250b74dSKent Overstreet 	return k.k->type == KEY_TYPE_bucket_gens
5095250b74dSKent Overstreet 		? bkey_s_c_to_bucket_gens(k).v->gens[offset]
5105250b74dSKent Overstreet 		: 0;
5115250b74dSKent Overstreet }
5125250b74dSKent Overstreet 
bch2_bucket_gens_validate(struct bch_fs * c,struct bkey_s_c k,struct bkey_validate_context from)513d97de0d0SKent Overstreet int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k,
514a6f4794fSKent Overstreet 			      struct bkey_validate_context from)
5155250b74dSKent Overstreet {
516b65db750SKent Overstreet 	int ret = 0;
5175250b74dSKent Overstreet 
518d97de0d0SKent Overstreet 	bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens),
519d97de0d0SKent Overstreet 			 c, bucket_gens_val_size_bad,
520b65db750SKent Overstreet 			 "bad val size (%zu != %zu)",
521b65db750SKent Overstreet 			 bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens));
522b65db750SKent Overstreet fsck_err:
523b65db750SKent Overstreet 	return ret;
5245250b74dSKent Overstreet }
5255250b74dSKent Overstreet 
bch2_bucket_gens_to_text(struct printbuf * out,struct bch_fs * c,struct bkey_s_c k)5265250b74dSKent Overstreet void bch2_bucket_gens_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k)
5275250b74dSKent Overstreet {
5285250b74dSKent Overstreet 	struct bkey_s_c_bucket_gens g = bkey_s_c_to_bucket_gens(k);
5295250b74dSKent Overstreet 	unsigned i;
5305250b74dSKent Overstreet 
5315250b74dSKent Overstreet 	for (i = 0; i < ARRAY_SIZE(g.v->gens); i++) {
5325250b74dSKent Overstreet 		if (i)
5335250b74dSKent Overstreet 			prt_char(out, ' ');
5345250b74dSKent Overstreet 		prt_printf(out, "%u", g.v->gens[i]);
5355250b74dSKent Overstreet 	}
5365250b74dSKent Overstreet }
5375250b74dSKent Overstreet 
bch2_bucket_gens_init(struct bch_fs * c)5385250b74dSKent Overstreet int bch2_bucket_gens_init(struct bch_fs *c)
5395250b74dSKent Overstreet {
5406bd68ec2SKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
5415250b74dSKent Overstreet 	struct bkey_i_bucket_gens g;
5425250b74dSKent Overstreet 	bool have_bucket_gens_key = false;
5435250b74dSKent Overstreet 	int ret;
5445250b74dSKent Overstreet 
5455028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
5465dd8c60eSKent Overstreet 				 BTREE_ITER_prefetch, k, ({
5475250b74dSKent Overstreet 		/*
5485250b74dSKent Overstreet 		 * Not a fsck error because this is checked/repaired by
5495250b74dSKent Overstreet 		 * bch2_check_alloc_key() which runs later:
5505250b74dSKent Overstreet 		 */
5515250b74dSKent Overstreet 		if (!bch2_dev_bucket_exists(c, k.k->p))
5525250b74dSKent Overstreet 			continue;
5535250b74dSKent Overstreet 
55480eab7a7SKent Overstreet 		struct bch_alloc_v4 a;
55580eab7a7SKent Overstreet 		u8 gen = bch2_alloc_to_v4(k, &a)->gen;
55680eab7a7SKent Overstreet 		unsigned offset;
55780eab7a7SKent Overstreet 		struct bpos pos = alloc_gens_pos(iter.pos, &offset);
5581ba6f48fSKent Overstreet 		int ret2 = 0;
5595250b74dSKent Overstreet 
560c2a503f3SKent Overstreet 		if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) {
5611ba6f48fSKent Overstreet 			ret2 =  bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
5621ba6f48fSKent Overstreet 				bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
5631ba6f48fSKent Overstreet 			if (ret2)
5641ba6f48fSKent Overstreet 				goto iter_err;
5655250b74dSKent Overstreet 			have_bucket_gens_key = false;
5665250b74dSKent Overstreet 		}
5675250b74dSKent Overstreet 
5685250b74dSKent Overstreet 		if (!have_bucket_gens_key) {
5695250b74dSKent Overstreet 			bkey_bucket_gens_init(&g.k_i);
5705250b74dSKent Overstreet 			g.k.p = pos;
5715250b74dSKent Overstreet 			have_bucket_gens_key = true;
5725250b74dSKent Overstreet 		}
5735250b74dSKent Overstreet 
5745250b74dSKent Overstreet 		g.v.gens[offset] = gen;
5751ba6f48fSKent Overstreet iter_err:
5761ba6f48fSKent Overstreet 		ret2;
57727b2df98SKent Overstreet 	}));
5785250b74dSKent Overstreet 
5795250b74dSKent Overstreet 	if (have_bucket_gens_key && !ret)
5806bd68ec2SKent Overstreet 		ret = commit_do(trans, NULL, NULL,
5813f0e297dSKent Overstreet 				BCH_TRANS_COMMIT_no_enospc,
5826bd68ec2SKent Overstreet 			bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
5835250b74dSKent Overstreet 
5846bd68ec2SKent Overstreet 	bch2_trans_put(trans);
5855250b74dSKent Overstreet 
5861bb3c2a9SKent Overstreet 	bch_err_fn(c, ret);
5875250b74dSKent Overstreet 	return ret;
5885250b74dSKent Overstreet }
5895250b74dSKent Overstreet 
bch2_alloc_read(struct bch_fs * c)59010a6ced2SKent Overstreet int bch2_alloc_read(struct bch_fs *c)
5915250b74dSKent Overstreet {
5922dd202dbSKent Overstreet 	down_read(&c->state_lock);
5932dd202dbSKent Overstreet 
5946bd68ec2SKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
595fa6cce09SKent Overstreet 	struct bch_dev *ca = NULL;
5965250b74dSKent Overstreet 	int ret;
5975250b74dSKent Overstreet 
59810a6ced2SKent Overstreet 	if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_bucket_gens) {
5995028b907SKent Overstreet 		ret = for_each_btree_key(trans, iter, BTREE_ID_bucket_gens, POS_MIN,
6005dd8c60eSKent Overstreet 					 BTREE_ITER_prefetch, k, ({
6015250b74dSKent Overstreet 			u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
6025250b74dSKent Overstreet 			u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
6035250b74dSKent Overstreet 
6045250b74dSKent Overstreet 			if (k.k->type != KEY_TYPE_bucket_gens)
6055250b74dSKent Overstreet 				continue;
6065250b74dSKent Overstreet 
607fa6cce09SKent Overstreet 			ca = bch2_dev_iterate(c, ca, k.k->p.inode);
6085250b74dSKent Overstreet 			/*
6095250b74dSKent Overstreet 			 * Not a fsck error because this is checked/repaired by
6105250b74dSKent Overstreet 			 * bch2_check_alloc_key() which runs later:
6115250b74dSKent Overstreet 			 */
612fa6cce09SKent Overstreet 			if (!ca) {
6139180ad2eSKent Overstreet 				bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
6145250b74dSKent Overstreet 				continue;
615fa6cce09SKent Overstreet 			}
6165250b74dSKent Overstreet 
617fa6cce09SKent Overstreet 			const struct bch_bucket_gens *g = bkey_s_c_to_bucket_gens(k).v;
6185250b74dSKent Overstreet 
61980eab7a7SKent Overstreet 			for (u64 b = max_t(u64, ca->mi.first_bucket, start);
6205250b74dSKent Overstreet 			     b < min_t(u64, ca->mi.nbuckets, end);
6215250b74dSKent Overstreet 			     b++)
6225250b74dSKent Overstreet 				*bucket_gen(ca, b) = g->gens[b & KEY_TYPE_BUCKET_GENS_MASK];
62327b2df98SKent Overstreet 			0;
62427b2df98SKent Overstreet 		}));
62510a6ced2SKent Overstreet 	} else {
6265028b907SKent Overstreet 		ret = for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN,
6275dd8c60eSKent Overstreet 					 BTREE_ITER_prefetch, k, ({
628fa6cce09SKent Overstreet 			ca = bch2_dev_iterate(c, ca, k.k->p.inode);
62910a6ced2SKent Overstreet 			/*
63010a6ced2SKent Overstreet 			 * Not a fsck error because this is checked/repaired by
63110a6ced2SKent Overstreet 			 * bch2_check_alloc_key() which runs later:
63210a6ced2SKent Overstreet 			 */
633fa6cce09SKent Overstreet 			if (!ca) {
6349180ad2eSKent Overstreet 				bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
63510a6ced2SKent Overstreet 				continue;
636fa6cce09SKent Overstreet 			}
63710a6ced2SKent Overstreet 
638a319aeaeSKent Overstreet 			if (k.k->p.offset < ca->mi.first_bucket) {
6399180ad2eSKent Overstreet 				bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode, ca->mi.first_bucket));
640a319aeaeSKent Overstreet 				continue;
641a319aeaeSKent Overstreet 			}
642a319aeaeSKent Overstreet 
643a319aeaeSKent Overstreet 			if (k.k->p.offset >= ca->mi.nbuckets) {
6449180ad2eSKent Overstreet 				bch2_btree_iter_set_pos(trans, &iter, POS(k.k->p.inode + 1, 0));
645a319aeaeSKent Overstreet 				continue;
646a319aeaeSKent Overstreet 			}
647a319aeaeSKent Overstreet 
64880eab7a7SKent Overstreet 			struct bch_alloc_v4 a;
64910a6ced2SKent Overstreet 			*bucket_gen(ca, k.k->p.offset) = bch2_alloc_to_v4(k, &a)->gen;
65027b2df98SKent Overstreet 			0;
65127b2df98SKent Overstreet 		}));
65210a6ced2SKent Overstreet 	}
6535250b74dSKent Overstreet 
654fa6cce09SKent Overstreet 	bch2_dev_put(ca);
6556bd68ec2SKent Overstreet 	bch2_trans_put(trans);
6565250b74dSKent Overstreet 
6572dd202dbSKent Overstreet 	up_read(&c->state_lock);
6581bb3c2a9SKent Overstreet 	bch_err_fn(c, ret);
6595250b74dSKent Overstreet 	return ret;
6605250b74dSKent Overstreet }
6615250b74dSKent Overstreet 
662c6b2826cSKent Overstreet /* Free space/discard btree: */
663c6b2826cSKent Overstreet 
__need_discard_or_freespace_err(struct btree_trans * trans,struct bkey_s_c alloc_k,bool set,bool discard,bool repair)664c8e58813SKent Overstreet static int __need_discard_or_freespace_err(struct btree_trans *trans,
665c8e58813SKent Overstreet 					   struct bkey_s_c alloc_k,
666c8e58813SKent Overstreet 					   bool set, bool discard, bool repair)
667c8e58813SKent Overstreet {
668c8e58813SKent Overstreet 	struct bch_fs *c = trans->c;
669c8e58813SKent Overstreet 	enum bch_fsck_flags flags = FSCK_CAN_IGNORE|(repair ? FSCK_CAN_FIX : 0);
670c8e58813SKent Overstreet 	enum bch_sb_error_id err_id = discard
671c8e58813SKent Overstreet 		? BCH_FSCK_ERR_need_discard_key_wrong
672c8e58813SKent Overstreet 		: BCH_FSCK_ERR_freespace_key_wrong;
673c8e58813SKent Overstreet 	enum btree_id btree = discard ? BTREE_ID_need_discard : BTREE_ID_freespace;
674c8e58813SKent Overstreet 	struct printbuf buf = PRINTBUF;
675c8e58813SKent Overstreet 
676c8e58813SKent Overstreet 	bch2_bkey_val_to_text(&buf, c, alloc_k);
677c8e58813SKent Overstreet 
678c8e58813SKent Overstreet 	int ret = __bch2_fsck_err(NULL, trans, flags, err_id,
6791ece5323SKent Overstreet 				  "bucket incorrectly %sset in %s btree\n%s",
680c8e58813SKent Overstreet 				  set ? "" : "un",
681c8e58813SKent Overstreet 				  bch2_btree_id_str(btree),
682c8e58813SKent Overstreet 				  buf.buf);
683052210c3SKent Overstreet 	if (ret == -BCH_ERR_fsck_ignore ||
684052210c3SKent Overstreet 	    ret == -BCH_ERR_fsck_errors_not_fixed)
685052210c3SKent Overstreet 		ret = 0;
686052210c3SKent Overstreet 
687c8e58813SKent Overstreet 	printbuf_exit(&buf);
688c8e58813SKent Overstreet 	return ret;
689c8e58813SKent Overstreet }
690c8e58813SKent Overstreet 
691c8e58813SKent Overstreet #define need_discard_or_freespace_err(...)		\
692c8e58813SKent Overstreet 	fsck_err_wrap(__need_discard_or_freespace_err(__VA_ARGS__))
693c8e58813SKent Overstreet 
694c8e58813SKent Overstreet #define need_discard_or_freespace_err_on(cond, ...)		\
695c8e58813SKent Overstreet 	(unlikely(cond) ?  need_discard_or_freespace_err(__VA_ARGS__) : false)
696c8e58813SKent Overstreet 
bch2_bucket_do_index(struct btree_trans * trans,struct bch_dev * ca,struct bkey_s_c alloc_k,const struct bch_alloc_v4 * a,bool set)697c6b2826cSKent Overstreet static int bch2_bucket_do_index(struct btree_trans *trans,
698267039d0SKent Overstreet 				struct bch_dev *ca,
699c6b2826cSKent Overstreet 				struct bkey_s_c alloc_k,
700822835ffSKent Overstreet 				const struct bch_alloc_v4 *a,
701c6b2826cSKent Overstreet 				bool set)
702c6b2826cSKent Overstreet {
703c6b2826cSKent Overstreet 	enum btree_id btree;
7047d1918b0SKent Overstreet 	struct bpos pos;
705c6b2826cSKent Overstreet 
706822835ffSKent Overstreet 	if (a->data_type != BCH_DATA_free &&
707822835ffSKent Overstreet 	    a->data_type != BCH_DATA_need_discard)
708c6b2826cSKent Overstreet 		return 0;
709c6b2826cSKent Overstreet 
710822835ffSKent Overstreet 	switch (a->data_type) {
711822835ffSKent Overstreet 	case BCH_DATA_free:
712c6b2826cSKent Overstreet 		btree = BTREE_ID_freespace;
7137d1918b0SKent Overstreet 		pos = alloc_freespace_pos(alloc_k.k->p, *a);
714c6b2826cSKent Overstreet 		break;
715822835ffSKent Overstreet 	case BCH_DATA_need_discard:
716c6b2826cSKent Overstreet 		btree = BTREE_ID_need_discard;
7177d1918b0SKent Overstreet 		pos = alloc_k.k->p;
718c6b2826cSKent Overstreet 		break;
719c6b2826cSKent Overstreet 	default:
720c6b2826cSKent Overstreet 		return 0;
721c6b2826cSKent Overstreet 	}
722c6b2826cSKent Overstreet 
7237d1918b0SKent Overstreet 	struct btree_iter iter;
7247d1918b0SKent Overstreet 	struct bkey_s_c old = bch2_bkey_get_iter(trans, &iter, btree, pos, BTREE_ITER_intent);
7257d1918b0SKent Overstreet 	int ret = bkey_err(old);
726c6b2826cSKent Overstreet 	if (ret)
727bcb79a51SKent Overstreet 		return ret;
728c6b2826cSKent Overstreet 
729c8e58813SKent Overstreet 	need_discard_or_freespace_err_on(ca->mi.freespace_initialized &&
730c8e58813SKent Overstreet 					 !old.k->type != set,
731c8e58813SKent Overstreet 					 trans, alloc_k, set,
732c8e58813SKent Overstreet 					 btree == BTREE_ID_need_discard, false);
733c6b2826cSKent Overstreet 
7347d1918b0SKent Overstreet 	ret = bch2_btree_bit_mod_iter(trans, &iter, set);
735c8e58813SKent Overstreet fsck_err:
736c6b2826cSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
737c6b2826cSKent Overstreet 	return ret;
738c6b2826cSKent Overstreet }
739c6b2826cSKent Overstreet 
bch2_bucket_gen_update(struct btree_trans * trans,struct bpos bucket,u8 gen)7405250b74dSKent Overstreet static noinline int bch2_bucket_gen_update(struct btree_trans *trans,
7415250b74dSKent Overstreet 					   struct bpos bucket, u8 gen)
7425250b74dSKent Overstreet {
7435250b74dSKent Overstreet 	struct btree_iter iter;
7445250b74dSKent Overstreet 	unsigned offset;
7455250b74dSKent Overstreet 	struct bpos pos = alloc_gens_pos(bucket, &offset);
7465250b74dSKent Overstreet 	struct bkey_i_bucket_gens *g;
7475250b74dSKent Overstreet 	struct bkey_s_c k;
7485250b74dSKent Overstreet 	int ret;
7495250b74dSKent Overstreet 
7505250b74dSKent Overstreet 	g = bch2_trans_kmalloc(trans, sizeof(*g));
7515250b74dSKent Overstreet 	ret = PTR_ERR_OR_ZERO(g);
7525250b74dSKent Overstreet 	if (ret)
7535250b74dSKent Overstreet 		return ret;
7545250b74dSKent Overstreet 
755bcb79a51SKent Overstreet 	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_bucket_gens, pos,
7565dd8c60eSKent Overstreet 			       BTREE_ITER_intent|
7575dd8c60eSKent Overstreet 			       BTREE_ITER_with_updates);
7585250b74dSKent Overstreet 	ret = bkey_err(k);
7595250b74dSKent Overstreet 	if (ret)
760bcb79a51SKent Overstreet 		return ret;
7615250b74dSKent Overstreet 
7625250b74dSKent Overstreet 	if (k.k->type != KEY_TYPE_bucket_gens) {
7635250b74dSKent Overstreet 		bkey_bucket_gens_init(&g->k_i);
7645250b74dSKent Overstreet 		g->k.p = iter.pos;
7655250b74dSKent Overstreet 	} else {
7665250b74dSKent Overstreet 		bkey_reassemble(&g->k_i, k);
7675250b74dSKent Overstreet 	}
7685250b74dSKent Overstreet 
7695250b74dSKent Overstreet 	g->v.gens[offset] = gen;
7705250b74dSKent Overstreet 
7715250b74dSKent Overstreet 	ret = bch2_trans_update(trans, &iter, &g->k_i, 0);
7725250b74dSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
7735250b74dSKent Overstreet 	return ret;
7745250b74dSKent Overstreet }
7755250b74dSKent Overstreet 
bch2_dev_data_type_accounting_mod(struct btree_trans * trans,struct bch_dev * ca,enum bch_data_type data_type,s64 delta_buckets,s64 delta_sectors,s64 delta_fragmented,unsigned flags)7761d16c605SKent Overstreet static inline int bch2_dev_data_type_accounting_mod(struct btree_trans *trans, struct bch_dev *ca,
7771d16c605SKent Overstreet 						    enum bch_data_type data_type,
7781d16c605SKent Overstreet 						    s64 delta_buckets,
7791d16c605SKent Overstreet 						    s64 delta_sectors,
7801d16c605SKent Overstreet 						    s64 delta_fragmented, unsigned flags)
7811d16c605SKent Overstreet {
7821d16c605SKent Overstreet 	s64 d[3] = { delta_buckets, delta_sectors, delta_fragmented };
7831d16c605SKent Overstreet 
784f4a584f4SKent Overstreet 	return bch2_disk_accounting_mod2(trans, flags & BTREE_TRIGGER_gc,
785f4a584f4SKent Overstreet 					 d, dev_data_type,
786f4a584f4SKent Overstreet 					 .dev		= ca->dev_idx,
787f4a584f4SKent Overstreet 					 .data_type	= data_type);
7881d16c605SKent Overstreet }
7891d16c605SKent Overstreet 
bch2_alloc_key_to_dev_counters(struct btree_trans * trans,struct bch_dev * ca,const struct bch_alloc_v4 * old,const struct bch_alloc_v4 * new,unsigned flags)7901d16c605SKent Overstreet int bch2_alloc_key_to_dev_counters(struct btree_trans *trans, struct bch_dev *ca,
7911d16c605SKent Overstreet 				   const struct bch_alloc_v4 *old,
7921d16c605SKent Overstreet 				   const struct bch_alloc_v4 *new,
7931d16c605SKent Overstreet 				   unsigned flags)
7941d16c605SKent Overstreet {
7951d16c605SKent Overstreet 	s64 old_sectors = bch2_bucket_sectors(*old);
7961d16c605SKent Overstreet 	s64 new_sectors = bch2_bucket_sectors(*new);
7971d16c605SKent Overstreet 	if (old->data_type != new->data_type) {
7981d16c605SKent Overstreet 		int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
7991d16c605SKent Overstreet 				 1,  new_sectors,  bch2_bucket_sectors_fragmented(ca, *new), flags) ?:
8001d16c605SKent Overstreet 			  bch2_dev_data_type_accounting_mod(trans, ca, old->data_type,
8011d16c605SKent Overstreet 				-1, -old_sectors, -bch2_bucket_sectors_fragmented(ca, *old), flags);
8021d16c605SKent Overstreet 		if (ret)
8031d16c605SKent Overstreet 			return ret;
8041d16c605SKent Overstreet 	} else if (old_sectors != new_sectors) {
8051d16c605SKent Overstreet 		int ret = bch2_dev_data_type_accounting_mod(trans, ca, new->data_type,
8061d16c605SKent Overstreet 					 0,
8071d16c605SKent Overstreet 					 new_sectors - old_sectors,
8081d16c605SKent Overstreet 					 bch2_bucket_sectors_fragmented(ca, *new) -
8091d16c605SKent Overstreet 					 bch2_bucket_sectors_fragmented(ca, *old), flags);
8101d16c605SKent Overstreet 		if (ret)
8111d16c605SKent Overstreet 			return ret;
8121d16c605SKent Overstreet 	}
8131d16c605SKent Overstreet 
8141d16c605SKent Overstreet 	s64 old_unstriped = bch2_bucket_sectors_unstriped(*old);
8151d16c605SKent Overstreet 	s64 new_unstriped = bch2_bucket_sectors_unstriped(*new);
8161d16c605SKent Overstreet 	if (old_unstriped != new_unstriped) {
8171d16c605SKent Overstreet 		int ret = bch2_dev_data_type_accounting_mod(trans, ca, BCH_DATA_unstriped,
8181d16c605SKent Overstreet 					 !!new_unstriped - !!old_unstriped,
8191d16c605SKent Overstreet 					 new_unstriped - old_unstriped,
8201d16c605SKent Overstreet 					 0,
8211d16c605SKent Overstreet 					 flags);
8221d16c605SKent Overstreet 		if (ret)
8231d16c605SKent Overstreet 			return ret;
8241d16c605SKent Overstreet 	}
8251d16c605SKent Overstreet 
8261d16c605SKent Overstreet 	return 0;
8271d16c605SKent Overstreet }
8281d16c605SKent Overstreet 
bch2_trigger_alloc(struct btree_trans * trans,enum btree_id btree,unsigned level,struct bkey_s_c old,struct bkey_s new,enum btree_iter_update_trigger_flags flags)829153d1c63SKent Overstreet int bch2_trigger_alloc(struct btree_trans *trans,
830153d1c63SKent Overstreet 		       enum btree_id btree, unsigned level,
831717296c3SKent Overstreet 		       struct bkey_s_c old, struct bkey_s new,
8322d288745SNathan Chancellor 		       enum btree_iter_update_trigger_flags flags)
833c6b2826cSKent Overstreet {
834c6b2826cSKent Overstreet 	struct bch_fs *c = trans->c;
8359432e90dSKent Overstreet 	struct printbuf buf = PRINTBUF;
836c6b2826cSKent Overstreet 	int ret = 0;
837c6b2826cSKent Overstreet 
838a7f1c26fSKent Overstreet 	struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
839a7f1c26fSKent Overstreet 	if (!ca)
8408a9f3d05SKent Overstreet 		return -BCH_ERR_trigger_alloc;
841c6b2826cSKent Overstreet 
842153d1c63SKent Overstreet 	struct bch_alloc_v4 old_a_convert;
843153d1c63SKent Overstreet 	const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert);
844bd864bc2SKent Overstreet 
845bd864bc2SKent Overstreet 	struct bch_alloc_v4 *new_a;
846bd864bc2SKent Overstreet 	if (likely(new.k->type == KEY_TYPE_alloc_v4)) {
847bd864bc2SKent Overstreet 		new_a = bkey_s_to_alloc_v4(new).v;
848bd864bc2SKent Overstreet 	} else {
849e150a7e8SKent Overstreet 		BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair)));
850bd864bc2SKent Overstreet 
851bd864bc2SKent Overstreet 		struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c);
852bd864bc2SKent Overstreet 		ret = PTR_ERR_OR_ZERO(new_ka);
853bd864bc2SKent Overstreet 		if (unlikely(ret))
854bd864bc2SKent Overstreet 			goto err;
855bd864bc2SKent Overstreet 		new_a = &new_ka->v;
856bd864bc2SKent Overstreet 	}
857c6b2826cSKent Overstreet 
85838ad9dc8SKent Overstreet 	if (flags & BTREE_TRIGGER_transactional) {
859fa9bb741SKent Overstreet 		alloc_data_type_set(new_a, new_a->data_type);
860822835ffSKent Overstreet 
861be565740SKent Overstreet 		int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
862be565740SKent Overstreet 				     (int) data_type_is_empty(old_a->data_type);
863be565740SKent Overstreet 
864be565740SKent Overstreet 		if (is_empty_delta < 0) {
865cff07e27SKent Overstreet 			new_a->io_time[READ] = bch2_current_io_time(c, READ);
866cff07e27SKent Overstreet 			new_a->io_time[WRITE]= bch2_current_io_time(c, WRITE);
867c6b2826cSKent Overstreet 			SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true);
86859cc38b8SKent Overstreet 			SET_BCH_ALLOC_V4_NEED_DISCARD(new_a, true);
869c6b2826cSKent Overstreet 		}
870c6b2826cSKent Overstreet 
871822835ffSKent Overstreet 		if (data_type_is_empty(new_a->data_type) &&
872822835ffSKent Overstreet 		    BCH_ALLOC_V4_NEED_INC_GEN(new_a) &&
873717296c3SKent Overstreet 		    !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset)) {
87469bd8a92SKent Overstreet 			if (new_a->oldest_gen == new_a->gen &&
87569bd8a92SKent Overstreet 			    !bch2_bucket_sectors_total(*new_a))
87669bd8a92SKent Overstreet 				new_a->oldest_gen++;
877c6b2826cSKent Overstreet 			new_a->gen++;
878c6b2826cSKent Overstreet 			SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, false);
879f648b6c1SKent Overstreet 			alloc_data_type_set(new_a, new_a->data_type);
880c6b2826cSKent Overstreet 		}
881c6b2826cSKent Overstreet 
88219a614d2SKent Overstreet 		if (old_a->data_type != new_a->data_type ||
883822835ffSKent Overstreet 		    (new_a->data_type == BCH_DATA_free &&
88419a614d2SKent Overstreet 		     alloc_freespace_genbits(*old_a) != alloc_freespace_genbits(*new_a))) {
885267039d0SKent Overstreet 			ret =   bch2_bucket_do_index(trans, ca, old, old_a, false) ?:
886267039d0SKent Overstreet 				bch2_bucket_do_index(trans, ca, new.s_c, new_a, true);
887c6b2826cSKent Overstreet 			if (ret)
888a7f1c26fSKent Overstreet 				goto err;
889c6b2826cSKent Overstreet 		}
890c6b2826cSKent Overstreet 
8917003589dSKent Overstreet 		if (new_a->data_type == BCH_DATA_cached &&
8927003589dSKent Overstreet 		    !new_a->io_time[READ])
893cff07e27SKent Overstreet 			new_a->io_time[READ] = bch2_current_io_time(c, READ);
8947003589dSKent Overstreet 
895717296c3SKent Overstreet 		ret = bch2_lru_change(trans, new.k->p.inode,
896717296c3SKent Overstreet 				      bucket_to_u64(new.k->p),
897e1304967SKent Overstreet 				      alloc_lru_idx_read(*old_a),
898e1304967SKent Overstreet 				      alloc_lru_idx_read(*new_a));
899c6b2826cSKent Overstreet 		if (ret)
900a7f1c26fSKent Overstreet 			goto err;
901c6b2826cSKent Overstreet 
90280c33085SKent Overstreet 		ret = bch2_lru_change(trans,
903b8e37c16SKent Overstreet 				      BCH_LRU_BUCKET_FRAGMENTATION,
904717296c3SKent Overstreet 				      bucket_to_u64(new.k->p),
905e1304967SKent Overstreet 				      alloc_lru_idx_fragmentation(*old_a, ca),
906e1304967SKent Overstreet 				      alloc_lru_idx_fragmentation(*new_a, ca));
90780c33085SKent Overstreet 		if (ret)
908a7f1c26fSKent Overstreet 			goto err;
90980c33085SKent Overstreet 
9105250b74dSKent Overstreet 		if (old_a->gen != new_a->gen) {
911717296c3SKent Overstreet 			ret = bch2_bucket_gen_update(trans, new.k->p, new_a->gen);
9125250b74dSKent Overstreet 			if (ret)
913a7f1c26fSKent Overstreet 				goto err;
9145250b74dSKent Overstreet 		}
9155250b74dSKent Overstreet 
9165dd8c60eSKent Overstreet 		if ((flags & BTREE_TRIGGER_bucket_invalidate) &&
91725f64e99SKent Overstreet 		    old_a->cached_sectors) {
9181d16c605SKent Overstreet 			ret = bch2_mod_dev_cached_sectors(trans, ca->dev_idx,
919fb23d57aSKent Overstreet 					 -((s64) old_a->cached_sectors),
920fb23d57aSKent Overstreet 					 flags & BTREE_TRIGGER_gc);
92125f64e99SKent Overstreet 			if (ret)
922a7f1c26fSKent Overstreet 				goto err;
92325f64e99SKent Overstreet 		}
9241d16c605SKent Overstreet 
9251d16c605SKent Overstreet 		ret = bch2_alloc_key_to_dev_counters(trans, ca, old_a, new_a, flags);
9261d16c605SKent Overstreet 		if (ret)
9271d16c605SKent Overstreet 			goto err;
928c6b2826cSKent Overstreet 	}
929c6b2826cSKent Overstreet 
9305dd8c60eSKent Overstreet 	if ((flags & BTREE_TRIGGER_atomic) && (flags & BTREE_TRIGGER_insert)) {
9310eafe758SKent Overstreet 		u64 transaction_seq = trans->journal_res.seq;
9329e779f3fSKent Overstreet 		BUG_ON(!transaction_seq);
9336820ac2cSKent Overstreet 
9349e779f3fSKent Overstreet 		if (log_fsck_err_on(transaction_seq && new_a->journal_seq_nonempty > transaction_seq,
9350eafe758SKent Overstreet 				    trans, alloc_key_journal_seq_in_future,
9360eafe758SKent Overstreet 				    "bucket journal seq in future (currently at %llu)\n%s",
9370eafe758SKent Overstreet 				    journal_cur_seq(&c->journal),
9380eafe758SKent Overstreet 				    (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf)))
9399e779f3fSKent Overstreet 			new_a->journal_seq_nonempty = transaction_seq;
9400eafe758SKent Overstreet 
9410eafe758SKent Overstreet 		int is_empty_delta = (int) data_type_is_empty(new_a->data_type) -
9420eafe758SKent Overstreet 				     (int) data_type_is_empty(old_a->data_type);
9430eafe758SKent Overstreet 
9449e779f3fSKent Overstreet 		/*
9459e779f3fSKent Overstreet 		 * Record journal sequence number of empty -> nonempty transition:
9469e779f3fSKent Overstreet 		 * Note that there may be multiple empty -> nonempty
9479e779f3fSKent Overstreet 		 * transitions, data in a bucket may be overwritten while we're
9489e779f3fSKent Overstreet 		 * still writing to it - so be careful to only record the first:
9499e779f3fSKent Overstreet 		 * */
9509e779f3fSKent Overstreet 		if (is_empty_delta < 0 &&
9519e779f3fSKent Overstreet 		    new_a->journal_seq_empty <= c->journal.flushed_seq_ondisk) {
9529e779f3fSKent Overstreet 			new_a->journal_seq_nonempty	= transaction_seq;
9539e779f3fSKent Overstreet 			new_a->journal_seq_empty	= 0;
9549e779f3fSKent Overstreet 		}
9556820ac2cSKent Overstreet 
9566820ac2cSKent Overstreet 		/*
9570eafe758SKent Overstreet 		 * Bucket becomes empty: mark it as waiting for a journal flush,
9580eafe758SKent Overstreet 		 * unless updates since empty -> nonempty transition were never
9590eafe758SKent Overstreet 		 * flushed - we may need to ask the journal not to flush
9600eafe758SKent Overstreet 		 * intermediate sequence numbers:
9616820ac2cSKent Overstreet 		 */
9620eafe758SKent Overstreet 		if (is_empty_delta > 0) {
9639e779f3fSKent Overstreet 			if (new_a->journal_seq_nonempty == transaction_seq ||
96489e74eccSKent Overstreet 			    bch2_journal_noflush_seq(&c->journal,
9659e779f3fSKent Overstreet 						     new_a->journal_seq_nonempty,
9669e779f3fSKent Overstreet 						     transaction_seq)) {
9679e779f3fSKent Overstreet 				new_a->journal_seq_nonempty = new_a->journal_seq_empty = 0;
9689e779f3fSKent Overstreet 			} else {
9699e779f3fSKent Overstreet 				new_a->journal_seq_empty = transaction_seq;
9706820ac2cSKent Overstreet 
9716820ac2cSKent Overstreet 				ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
9726820ac2cSKent Overstreet 									   c->journal.flushed_seq_ondisk,
9736820ac2cSKent Overstreet 									   new.k->p.inode, new.k->p.offset,
9740eafe758SKent Overstreet 									   transaction_seq);
97538ad9dc8SKent Overstreet 				if (bch2_fs_fatal_err_on(ret, c,
9769e779f3fSKent Overstreet 						"setting bucket_needs_journal_commit: %s",
9779e779f3fSKent Overstreet 						bch2_err_str(ret)))
978a7f1c26fSKent Overstreet 					goto err;
9796820ac2cSKent Overstreet 			}
9800eafe758SKent Overstreet 		}
9816820ac2cSKent Overstreet 
9829432e90dSKent Overstreet 		if (new_a->gen != old_a->gen) {
9831d16c605SKent Overstreet 			rcu_read_lock();
9849432e90dSKent Overstreet 			u8 *gen = bucket_gen(ca, new.k->p.offset);
9859432e90dSKent Overstreet 			if (unlikely(!gen)) {
9861d16c605SKent Overstreet 				rcu_read_unlock();
9879432e90dSKent Overstreet 				goto invalid_bucket;
9889432e90dSKent Overstreet 			}
9899432e90dSKent Overstreet 			*gen = new_a->gen;
9901d16c605SKent Overstreet 			rcu_read_unlock();
9919432e90dSKent Overstreet 		}
9926820ac2cSKent Overstreet 
9936e9d0558SKent Overstreet #define eval_state(_a, expr)		({ const struct bch_alloc_v4 *a = _a; expr; })
9946e9d0558SKent Overstreet #define statechange(expr)		!eval_state(old_a, expr) && eval_state(new_a, expr)
9959e779f3fSKent Overstreet #define bucket_flushed(a)		(a->journal_seq_empty <= c->journal.flushed_seq_ondisk)
9966e9d0558SKent Overstreet 
997a393f331SKent Overstreet 		if (statechange(a->data_type == BCH_DATA_free) &&
998a393f331SKent Overstreet 		    bucket_flushed(new_a))
9996820ac2cSKent Overstreet 			closure_wake_up(&c->freelist_wait);
10006820ac2cSKent Overstreet 
1001a393f331SKent Overstreet 		if (statechange(a->data_type == BCH_DATA_need_discard) &&
10023727ca56SKent Overstreet 		    !bch2_bucket_is_open_safe(c, new.k->p.inode, new.k->p.offset) &&
1003a393f331SKent Overstreet 		    bucket_flushed(new_a))
100464ee1431SKent Overstreet 			bch2_discard_one_bucket_fast(ca, new.k->p.offset);
10056820ac2cSKent Overstreet 
10066e9d0558SKent Overstreet 		if (statechange(a->data_type == BCH_DATA_cached) &&
10076e9d0558SKent Overstreet 		    !bch2_bucket_is_open(c, new.k->p.inode, new.k->p.offset) &&
10086820ac2cSKent Overstreet 		    should_invalidate_buckets(ca, bch2_dev_usage_read(ca)))
100964ee1431SKent Overstreet 			bch2_dev_do_invalidates(ca);
10106820ac2cSKent Overstreet 
10116e9d0558SKent Overstreet 		if (statechange(a->data_type == BCH_DATA_need_gc_gens))
101210330402SKent Overstreet 			bch2_gc_gens_async(c);
1013153d1c63SKent Overstreet 	}
101438ad9dc8SKent Overstreet 
101538ad9dc8SKent Overstreet 	if ((flags & BTREE_TRIGGER_gc) && (flags & BTREE_TRIGGER_insert)) {
101638ad9dc8SKent Overstreet 		rcu_read_lock();
101738ad9dc8SKent Overstreet 		struct bucket *g = gc_bucket(ca, new.k->p.offset);
101838ad9dc8SKent Overstreet 		if (unlikely(!g)) {
101938ad9dc8SKent Overstreet 			rcu_read_unlock();
102038ad9dc8SKent Overstreet 			goto invalid_bucket;
102138ad9dc8SKent Overstreet 		}
102238ad9dc8SKent Overstreet 		g->gen_valid	= 1;
102338ad9dc8SKent Overstreet 		g->gen		= new_a->gen;
102438ad9dc8SKent Overstreet 		rcu_read_unlock();
102538ad9dc8SKent Overstreet 	}
1026a7f1c26fSKent Overstreet err:
10270eafe758SKent Overstreet fsck_err:
10289432e90dSKent Overstreet 	printbuf_exit(&buf);
1029a7f1c26fSKent Overstreet 	bch2_dev_put(ca);
1030a7f1c26fSKent Overstreet 	return ret;
10319432e90dSKent Overstreet invalid_bucket:
10329432e90dSKent Overstreet 	bch2_fs_inconsistent(c, "reference to invalid bucket\n%s",
10339432e90dSKent Overstreet 			     (bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
10348a9f3d05SKent Overstreet 	ret = -BCH_ERR_trigger_alloc;
10359432e90dSKent Overstreet 	goto err;
10366820ac2cSKent Overstreet }
10376820ac2cSKent Overstreet 
1038d23124c7SKent Overstreet /*
10395dd8c60eSKent Overstreet  * This synthesizes deleted extents for holes, similar to BTREE_ITER_slots for
1040d23124c7SKent Overstreet  * extents style btrees, but works on non-extents btrees:
1041d23124c7SKent Overstreet  */
bch2_get_key_or_hole(struct btree_trans * trans,struct btree_iter * iter,struct bpos end,struct bkey * hole)10429180ad2eSKent Overstreet static struct bkey_s_c bch2_get_key_or_hole(struct btree_trans *trans, struct btree_iter *iter,
10439180ad2eSKent Overstreet 					    struct bpos end, struct bkey *hole)
1044d23124c7SKent Overstreet {
10459180ad2eSKent Overstreet 	struct bkey_s_c k = bch2_btree_iter_peek_slot(trans, iter);
1046d23124c7SKent Overstreet 
1047d23124c7SKent Overstreet 	if (bkey_err(k))
1048d23124c7SKent Overstreet 		return k;
1049d23124c7SKent Overstreet 
1050d23124c7SKent Overstreet 	if (k.k->type) {
1051d23124c7SKent Overstreet 		return k;
1052d23124c7SKent Overstreet 	} else {
1053d23124c7SKent Overstreet 		struct btree_iter iter2;
1054d23124c7SKent Overstreet 		struct bpos next;
1055d23124c7SKent Overstreet 
10569180ad2eSKent Overstreet 		bch2_trans_copy_iter(trans, &iter2, iter);
10571546cf97SKent Overstreet 
10589180ad2eSKent Overstreet 		struct btree_path *path = btree_iter_path(trans, iter);
105907f383c7SKent Overstreet 		if (!bpos_eq(path->l[0].b->key.k.p, SPOS_MAX))
106007f383c7SKent Overstreet 			end = bkey_min(end, bpos_nosnap_successor(path->l[0].b->key.k.p));
10611546cf97SKent Overstreet 
10621546cf97SKent Overstreet 		end = bkey_min(end, POS(iter->pos.inode, iter->pos.offset + U32_MAX - 1));
10631546cf97SKent Overstreet 
10641546cf97SKent Overstreet 		/*
10651546cf97SKent Overstreet 		 * btree node min/max is a closed interval, upto takes a half
10661546cf97SKent Overstreet 		 * open interval:
10671546cf97SKent Overstreet 		 */
10689180ad2eSKent Overstreet 		k = bch2_btree_iter_peek_max(trans, &iter2, end);
1069d23124c7SKent Overstreet 		next = iter2.pos;
10709180ad2eSKent Overstreet 		bch2_trans_iter_exit(trans, &iter2);
1071d23124c7SKent Overstreet 
1072d23124c7SKent Overstreet 		BUG_ON(next.offset >= iter->pos.offset + U32_MAX);
1073d23124c7SKent Overstreet 
1074d23124c7SKent Overstreet 		if (bkey_err(k))
1075d23124c7SKent Overstreet 			return k;
1076d23124c7SKent Overstreet 
1077d23124c7SKent Overstreet 		bkey_init(hole);
1078d23124c7SKent Overstreet 		hole->p = iter->pos;
1079d23124c7SKent Overstreet 
1080d23124c7SKent Overstreet 		bch2_key_resize(hole, next.offset - iter->pos.offset);
1081d23124c7SKent Overstreet 		return (struct bkey_s_c) { hole, NULL };
1082d23124c7SKent Overstreet 	}
1083d23124c7SKent Overstreet }
1084d23124c7SKent Overstreet 
next_bucket(struct bch_fs * c,struct bch_dev ** ca,struct bpos * bucket)1085bc3204c8SKent Overstreet static bool next_bucket(struct bch_fs *c, struct bch_dev **ca, struct bpos *bucket)
1086d23124c7SKent Overstreet {
1087bc3204c8SKent Overstreet 	if (*ca) {
1088bc3204c8SKent Overstreet 		if (bucket->offset < (*ca)->mi.first_bucket)
1089bc3204c8SKent Overstreet 			bucket->offset = (*ca)->mi.first_bucket;
1090d23124c7SKent Overstreet 
1091bc3204c8SKent Overstreet 		if (bucket->offset < (*ca)->mi.nbuckets)
1092d23124c7SKent Overstreet 			return true;
1093d23124c7SKent Overstreet 
1094bc3204c8SKent Overstreet 		bch2_dev_put(*ca);
1095bc3204c8SKent Overstreet 		*ca = NULL;
1096d23124c7SKent Overstreet 		bucket->inode++;
1097d23124c7SKent Overstreet 		bucket->offset = 0;
1098d23124c7SKent Overstreet 	}
1099d23124c7SKent Overstreet 
1100d23124c7SKent Overstreet 	rcu_read_lock();
1101bc3204c8SKent Overstreet 	*ca = __bch2_next_dev_idx(c, bucket->inode, NULL);
1102bc3204c8SKent Overstreet 	if (*ca) {
1103bc3204c8SKent Overstreet 		*bucket = POS((*ca)->dev_idx, (*ca)->mi.first_bucket);
1104bc3204c8SKent Overstreet 		bch2_dev_get(*ca);
1105bc3204c8SKent Overstreet 	}
1106d23124c7SKent Overstreet 	rcu_read_unlock();
1107d23124c7SKent Overstreet 
1108bc3204c8SKent Overstreet 	return *ca != NULL;
1109d23124c7SKent Overstreet }
1110d23124c7SKent Overstreet 
bch2_get_key_or_real_bucket_hole(struct btree_trans * trans,struct btree_iter * iter,struct bch_dev ** ca,struct bkey * hole)11119180ad2eSKent Overstreet static struct bkey_s_c bch2_get_key_or_real_bucket_hole(struct btree_trans *trans,
11129180ad2eSKent Overstreet 							struct btree_iter *iter,
1113bc3204c8SKent Overstreet 							struct bch_dev **ca, struct bkey *hole)
1114d23124c7SKent Overstreet {
11159180ad2eSKent Overstreet 	struct bch_fs *c = trans->c;
1116d23124c7SKent Overstreet 	struct bkey_s_c k;
1117d23124c7SKent Overstreet again:
11189180ad2eSKent Overstreet 	k = bch2_get_key_or_hole(trans, iter, POS_MAX, hole);
1119d23124c7SKent Overstreet 	if (bkey_err(k))
1120d23124c7SKent Overstreet 		return k;
1121d23124c7SKent Overstreet 
1122bc3204c8SKent Overstreet 	*ca = bch2_dev_iterate_noerror(c, *ca, k.k->p.inode);
1123d23124c7SKent Overstreet 
1124bc3204c8SKent Overstreet 	if (!k.k->type) {
1125bc3204c8SKent Overstreet 		struct bpos hole_start = bkey_start_pos(k.k);
1126bc3204c8SKent Overstreet 
1127bc3204c8SKent Overstreet 		if (!*ca || !bucket_valid(*ca, hole_start.offset)) {
1128bc3204c8SKent Overstreet 			if (!next_bucket(c, ca, &hole_start))
1129d23124c7SKent Overstreet 				return bkey_s_c_null;
1130d23124c7SKent Overstreet 
11319180ad2eSKent Overstreet 			bch2_btree_iter_set_pos(trans, iter, hole_start);
1132d23124c7SKent Overstreet 			goto again;
1133d23124c7SKent Overstreet 		}
1134d23124c7SKent Overstreet 
1135bc3204c8SKent Overstreet 		if (k.k->p.offset > (*ca)->mi.nbuckets)
1136bc3204c8SKent Overstreet 			bch2_key_resize(hole, (*ca)->mi.nbuckets - hole_start.offset);
1137d23124c7SKent Overstreet 	}
1138d23124c7SKent Overstreet 
1139d23124c7SKent Overstreet 	return k;
1140d23124c7SKent Overstreet }
1141d23124c7SKent Overstreet 
1142298ac24eSKent Overstreet static noinline_for_stack
bch2_check_alloc_key(struct btree_trans * trans,struct bkey_s_c alloc_k,struct btree_iter * alloc_iter,struct btree_iter * discard_iter,struct btree_iter * freespace_iter,struct btree_iter * bucket_gens_iter)1143298ac24eSKent Overstreet int bch2_check_alloc_key(struct btree_trans *trans,
1144d23124c7SKent Overstreet 			 struct bkey_s_c alloc_k,
1145e34da43eSKent Overstreet 			 struct btree_iter *alloc_iter,
1146e34da43eSKent Overstreet 			 struct btree_iter *discard_iter,
11475250b74dSKent Overstreet 			 struct btree_iter *freespace_iter,
11485250b74dSKent Overstreet 			 struct btree_iter *bucket_gens_iter)
11495add07d5SKent Overstreet {
11505add07d5SKent Overstreet 	struct bch_fs *c = trans->c;
115119a614d2SKent Overstreet 	struct bch_alloc_v4 a_convert;
115219a614d2SKent Overstreet 	const struct bch_alloc_v4 *a;
11535250b74dSKent Overstreet 	unsigned gens_offset;
1154d23124c7SKent Overstreet 	struct bkey_s_c k;
11555add07d5SKent Overstreet 	struct printbuf buf = PRINTBUF;
11569b3059a1SKent Overstreet 	int ret = 0;
11575add07d5SKent Overstreet 
11589b3059a1SKent Overstreet 	struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_k.k->p);
11599b3059a1SKent Overstreet 	if (fsck_err_on(!ca,
1160a850bde6SKent Overstreet 			trans, alloc_key_to_missing_dev_bucket,
1161a9c0a4cbSKent Overstreet 			"alloc key for invalid device:bucket %llu:%llu",
1162a9c0a4cbSKent Overstreet 			alloc_k.k->p.inode, alloc_k.k->p.offset))
11639b3059a1SKent Overstreet 		ret = bch2_btree_delete_at(trans, alloc_iter, 0);
11649b3059a1SKent Overstreet 	if (!ca)
11659b3059a1SKent Overstreet 		return ret;
1166e1effd42SKent Overstreet 
1167e1effd42SKent Overstreet 	if (!ca->mi.freespace_initialized)
11689b3059a1SKent Overstreet 		goto out;
1169e1effd42SKent Overstreet 
117019a614d2SKent Overstreet 	a = bch2_alloc_to_v4(alloc_k, &a_convert);
1171e1effd42SKent Overstreet 
11729180ad2eSKent Overstreet 	bch2_btree_iter_set_pos(trans, discard_iter, alloc_k.k->p);
11739180ad2eSKent Overstreet 	k = bch2_btree_iter_peek_slot(trans, discard_iter);
11745add07d5SKent Overstreet 	ret = bkey_err(k);
11755add07d5SKent Overstreet 	if (ret)
11765add07d5SKent Overstreet 		goto err;
11775add07d5SKent Overstreet 
1178c8e58813SKent Overstreet 	bool is_discarded = a->data_type == BCH_DATA_need_discard;
1179c8e58813SKent Overstreet 	if (need_discard_or_freespace_err_on(!!k.k->type != is_discarded,
1180c8e58813SKent Overstreet 					     trans, alloc_k, !is_discarded, true, true)) {
1181c8e58813SKent Overstreet 		ret = bch2_btree_bit_mod_iter(trans, discard_iter, is_discarded);
11825add07d5SKent Overstreet 		if (ret)
11835add07d5SKent Overstreet 			goto err;
11845add07d5SKent Overstreet 	}
11855add07d5SKent Overstreet 
11869180ad2eSKent Overstreet 	bch2_btree_iter_set_pos(trans, freespace_iter, alloc_freespace_pos(alloc_k.k->p, *a));
11879180ad2eSKent Overstreet 	k = bch2_btree_iter_peek_slot(trans, freespace_iter);
11885add07d5SKent Overstreet 	ret = bkey_err(k);
11895add07d5SKent Overstreet 	if (ret)
11905add07d5SKent Overstreet 		goto err;
11915add07d5SKent Overstreet 
1192c8e58813SKent Overstreet 	bool is_free = a->data_type == BCH_DATA_free;
1193c8e58813SKent Overstreet 	if (need_discard_or_freespace_err_on(!!k.k->type != is_free,
1194c8e58813SKent Overstreet 					     trans, alloc_k, !is_free, false, true)) {
1195c8e58813SKent Overstreet 		ret = bch2_btree_bit_mod_iter(trans, freespace_iter, is_free);
11965add07d5SKent Overstreet 		if (ret)
11975add07d5SKent Overstreet 			goto err;
11985add07d5SKent Overstreet 	}
11995250b74dSKent Overstreet 
12009180ad2eSKent Overstreet 	bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(alloc_k.k->p, &gens_offset));
12019180ad2eSKent Overstreet 	k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
12025250b74dSKent Overstreet 	ret = bkey_err(k);
12035250b74dSKent Overstreet 	if (ret)
12045250b74dSKent Overstreet 		goto err;
12055250b74dSKent Overstreet 
1206cdce1094SKent Overstreet 	if (fsck_err_on(a->gen != alloc_gen(k, gens_offset),
1207a850bde6SKent Overstreet 			trans, bucket_gens_key_wrong,
12081ece5323SKent Overstreet 			"incorrect gen in bucket_gens btree (got %u should be %u)\n%s",
12095250b74dSKent Overstreet 			alloc_gen(k, gens_offset), a->gen,
12105250b74dSKent Overstreet 			(printbuf_reset(&buf),
1211cdce1094SKent Overstreet 			 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
12125250b74dSKent Overstreet 		struct bkey_i_bucket_gens *g =
12135250b74dSKent Overstreet 			bch2_trans_kmalloc(trans, sizeof(*g));
12145250b74dSKent Overstreet 
12155250b74dSKent Overstreet 		ret = PTR_ERR_OR_ZERO(g);
12165250b74dSKent Overstreet 		if (ret)
12175250b74dSKent Overstreet 			goto err;
12185250b74dSKent Overstreet 
12195250b74dSKent Overstreet 		if (k.k->type == KEY_TYPE_bucket_gens) {
12205250b74dSKent Overstreet 			bkey_reassemble(&g->k_i, k);
12215250b74dSKent Overstreet 		} else {
12225250b74dSKent Overstreet 			bkey_bucket_gens_init(&g->k_i);
12235250b74dSKent Overstreet 			g->k.p = alloc_gens_pos(alloc_k.k->p, &gens_offset);
12245250b74dSKent Overstreet 		}
12255250b74dSKent Overstreet 
12265250b74dSKent Overstreet 		g->v.gens[gens_offset] = a->gen;
12275250b74dSKent Overstreet 
12285250b74dSKent Overstreet 		ret = bch2_trans_update(trans, bucket_gens_iter, &g->k_i, 0);
12295250b74dSKent Overstreet 		if (ret)
12305250b74dSKent Overstreet 			goto err;
12315250b74dSKent Overstreet 	}
12329b3059a1SKent Overstreet out:
12335add07d5SKent Overstreet err:
12345add07d5SKent Overstreet fsck_err:
12359b3059a1SKent Overstreet 	bch2_dev_put(ca);
12365add07d5SKent Overstreet 	printbuf_exit(&buf);
12375add07d5SKent Overstreet 	return ret;
12385add07d5SKent Overstreet }
12395add07d5SKent Overstreet 
1240298ac24eSKent Overstreet static noinline_for_stack
bch2_check_alloc_hole_freespace(struct btree_trans * trans,struct bch_dev * ca,struct bpos start,struct bpos * end,struct btree_iter * freespace_iter)1241298ac24eSKent Overstreet int bch2_check_alloc_hole_freespace(struct btree_trans *trans,
1242bc3204c8SKent Overstreet 				    struct bch_dev *ca,
1243d23124c7SKent Overstreet 				    struct bpos start,
1244d23124c7SKent Overstreet 				    struct bpos *end,
1245d23124c7SKent Overstreet 				    struct btree_iter *freespace_iter)
1246d23124c7SKent Overstreet {
1247d23124c7SKent Overstreet 	struct bkey_s_c k;
1248d23124c7SKent Overstreet 	struct printbuf buf = PRINTBUF;
1249d23124c7SKent Overstreet 	int ret;
1250d23124c7SKent Overstreet 
1251d23124c7SKent Overstreet 	if (!ca->mi.freespace_initialized)
1252d23124c7SKent Overstreet 		return 0;
1253d23124c7SKent Overstreet 
12549180ad2eSKent Overstreet 	bch2_btree_iter_set_pos(trans, freespace_iter, start);
1255d23124c7SKent Overstreet 
12569180ad2eSKent Overstreet 	k = bch2_btree_iter_peek_slot(trans, freespace_iter);
1257d23124c7SKent Overstreet 	ret = bkey_err(k);
1258d23124c7SKent Overstreet 	if (ret)
1259d23124c7SKent Overstreet 		goto err;
1260d23124c7SKent Overstreet 
1261d23124c7SKent Overstreet 	*end = bkey_min(k.k->p, *end);
1262d23124c7SKent Overstreet 
1263cdce1094SKent Overstreet 	if (fsck_err_on(k.k->type != KEY_TYPE_set,
1264a850bde6SKent Overstreet 			trans, freespace_hole_missing,
1265b65db750SKent Overstreet 			"hole in alloc btree missing in freespace btree\n"
1266d23124c7SKent Overstreet 			"device %llu buckets %llu-%llu",
1267d23124c7SKent Overstreet 			freespace_iter->pos.inode,
1268d23124c7SKent Overstreet 			freespace_iter->pos.offset,
1269cdce1094SKent Overstreet 			end->offset)) {
1270d23124c7SKent Overstreet 		struct bkey_i *update =
1271d23124c7SKent Overstreet 			bch2_trans_kmalloc(trans, sizeof(*update));
1272d23124c7SKent Overstreet 
1273d23124c7SKent Overstreet 		ret = PTR_ERR_OR_ZERO(update);
1274d23124c7SKent Overstreet 		if (ret)
1275d23124c7SKent Overstreet 			goto err;
1276d23124c7SKent Overstreet 
1277d23124c7SKent Overstreet 		bkey_init(&update->k);
1278d23124c7SKent Overstreet 		update->k.type	= KEY_TYPE_set;
1279d23124c7SKent Overstreet 		update->k.p	= freespace_iter->pos;
1280d23124c7SKent Overstreet 		bch2_key_resize(&update->k,
1281d23124c7SKent Overstreet 				min_t(u64, U32_MAX, end->offset -
1282d23124c7SKent Overstreet 				      freespace_iter->pos.offset));
1283d23124c7SKent Overstreet 
1284d23124c7SKent Overstreet 		ret = bch2_trans_update(trans, freespace_iter, update, 0);
1285d23124c7SKent Overstreet 		if (ret)
1286d23124c7SKent Overstreet 			goto err;
1287d23124c7SKent Overstreet 	}
1288d23124c7SKent Overstreet err:
1289d23124c7SKent Overstreet fsck_err:
1290d23124c7SKent Overstreet 	printbuf_exit(&buf);
1291d23124c7SKent Overstreet 	return ret;
1292d23124c7SKent Overstreet }
1293d23124c7SKent Overstreet 
1294298ac24eSKent Overstreet static noinline_for_stack
bch2_check_alloc_hole_bucket_gens(struct btree_trans * trans,struct bpos start,struct bpos * end,struct btree_iter * bucket_gens_iter)1295298ac24eSKent Overstreet int bch2_check_alloc_hole_bucket_gens(struct btree_trans *trans,
12965250b74dSKent Overstreet 				      struct bpos start,
12975250b74dSKent Overstreet 				      struct bpos *end,
12985250b74dSKent Overstreet 				      struct btree_iter *bucket_gens_iter)
12995250b74dSKent Overstreet {
13005250b74dSKent Overstreet 	struct bkey_s_c k;
13015250b74dSKent Overstreet 	struct printbuf buf = PRINTBUF;
13025250b74dSKent Overstreet 	unsigned i, gens_offset, gens_end_offset;
13035250b74dSKent Overstreet 	int ret;
13045250b74dSKent Overstreet 
13059180ad2eSKent Overstreet 	bch2_btree_iter_set_pos(trans, bucket_gens_iter, alloc_gens_pos(start, &gens_offset));
13065250b74dSKent Overstreet 
13079180ad2eSKent Overstreet 	k = bch2_btree_iter_peek_slot(trans, bucket_gens_iter);
13085250b74dSKent Overstreet 	ret = bkey_err(k);
13095250b74dSKent Overstreet 	if (ret)
13105250b74dSKent Overstreet 		goto err;
13115250b74dSKent Overstreet 
13125250b74dSKent Overstreet 	if (bkey_cmp(alloc_gens_pos(start, &gens_offset),
13135250b74dSKent Overstreet 		     alloc_gens_pos(*end,  &gens_end_offset)))
13145250b74dSKent Overstreet 		gens_end_offset = KEY_TYPE_BUCKET_GENS_NR;
13155250b74dSKent Overstreet 
13165250b74dSKent Overstreet 	if (k.k->type == KEY_TYPE_bucket_gens) {
13175250b74dSKent Overstreet 		struct bkey_i_bucket_gens g;
13185250b74dSKent Overstreet 		bool need_update = false;
13195250b74dSKent Overstreet 
13205250b74dSKent Overstreet 		bkey_reassemble(&g.k_i, k);
13215250b74dSKent Overstreet 
13225250b74dSKent Overstreet 		for (i = gens_offset; i < gens_end_offset; i++) {
1323a850bde6SKent Overstreet 			if (fsck_err_on(g.v.gens[i], trans,
1324b65db750SKent Overstreet 					bucket_gens_hole_wrong,
13255250b74dSKent Overstreet 					"hole in alloc btree at %llu:%llu with nonzero gen in bucket_gens btree (%u)",
13265250b74dSKent Overstreet 					bucket_gens_pos_to_alloc(k.k->p, i).inode,
13275250b74dSKent Overstreet 					bucket_gens_pos_to_alloc(k.k->p, i).offset,
13285250b74dSKent Overstreet 					g.v.gens[i])) {
13295250b74dSKent Overstreet 				g.v.gens[i] = 0;
13305250b74dSKent Overstreet 				need_update = true;
13315250b74dSKent Overstreet 			}
13325250b74dSKent Overstreet 		}
13335250b74dSKent Overstreet 
13345250b74dSKent Overstreet 		if (need_update) {
133596dea3d5SKent Overstreet 			struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
13365250b74dSKent Overstreet 
133796dea3d5SKent Overstreet 			ret = PTR_ERR_OR_ZERO(u);
13385250b74dSKent Overstreet 			if (ret)
13395250b74dSKent Overstreet 				goto err;
13405250b74dSKent Overstreet 
134196dea3d5SKent Overstreet 			memcpy(u, &g, sizeof(g));
13425250b74dSKent Overstreet 
134396dea3d5SKent Overstreet 			ret = bch2_trans_update(trans, bucket_gens_iter, u, 0);
13445250b74dSKent Overstreet 			if (ret)
13455250b74dSKent Overstreet 				goto err;
13465250b74dSKent Overstreet 		}
13475250b74dSKent Overstreet 	}
13485250b74dSKent Overstreet 
13495250b74dSKent Overstreet 	*end = bkey_min(*end, bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0));
13505250b74dSKent Overstreet err:
13515250b74dSKent Overstreet fsck_err:
13525250b74dSKent Overstreet 	printbuf_exit(&buf);
13535250b74dSKent Overstreet 	return ret;
13545250b74dSKent Overstreet }
13555250b74dSKent Overstreet 
13562cd85feaSKent Overstreet struct check_discard_freespace_key_async {
13572cd85feaSKent Overstreet 	struct work_struct	work;
13582cd85feaSKent Overstreet 	struct bch_fs		*c;
13592cd85feaSKent Overstreet 	struct bbpos		pos;
13602cd85feaSKent Overstreet };
13612cd85feaSKent Overstreet 
bch2_recheck_discard_freespace_key(struct btree_trans * trans,struct bbpos pos)13622cd85feaSKent Overstreet static int bch2_recheck_discard_freespace_key(struct btree_trans *trans, struct bbpos pos)
13632cd85feaSKent Overstreet {
13642cd85feaSKent Overstreet 	struct btree_iter iter;
13652cd85feaSKent Overstreet 	struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, pos.btree, pos.pos, 0);
13662cd85feaSKent Overstreet 	int ret = bkey_err(k);
13672cd85feaSKent Overstreet 	if (ret)
13682cd85feaSKent Overstreet 		return ret;
13692cd85feaSKent Overstreet 
13702cd85feaSKent Overstreet 	u8 gen;
13712cd85feaSKent Overstreet 	ret = k.k->type != KEY_TYPE_set
13722cd85feaSKent Overstreet 		? bch2_check_discard_freespace_key(trans, &iter, &gen, false)
13732cd85feaSKent Overstreet 		: 0;
13742cd85feaSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
13752cd85feaSKent Overstreet 	return ret;
13762cd85feaSKent Overstreet }
13772cd85feaSKent Overstreet 
check_discard_freespace_key_work(struct work_struct * work)13782cd85feaSKent Overstreet static void check_discard_freespace_key_work(struct work_struct *work)
13792cd85feaSKent Overstreet {
13802cd85feaSKent Overstreet 	struct check_discard_freespace_key_async *w =
13812cd85feaSKent Overstreet 		container_of(work, struct check_discard_freespace_key_async, work);
13822cd85feaSKent Overstreet 
13832cd85feaSKent Overstreet 	bch2_trans_do(w->c, bch2_recheck_discard_freespace_key(trans, w->pos));
13842cd85feaSKent Overstreet 	bch2_write_ref_put(w->c, BCH_WRITE_REF_check_discard_freespace_key);
13852cd85feaSKent Overstreet 	kfree(w);
13862cd85feaSKent Overstreet }
13872cd85feaSKent Overstreet 
bch2_check_discard_freespace_key(struct btree_trans * trans,struct btree_iter * iter,u8 * gen,bool async_repair)13882cd85feaSKent Overstreet int bch2_check_discard_freespace_key(struct btree_trans *trans, struct btree_iter *iter, u8 *gen,
13892cd85feaSKent Overstreet 				     bool async_repair)
13905add07d5SKent Overstreet {
13915add07d5SKent Overstreet 	struct bch_fs *c = trans->c;
1392822835ffSKent Overstreet 	enum bch_data_type state = iter->btree_id == BTREE_ID_need_discard
1393822835ffSKent Overstreet 		? BCH_DATA_need_discard
1394822835ffSKent Overstreet 		: BCH_DATA_free;
13955add07d5SKent Overstreet 	struct printbuf buf = PRINTBUF;
13965add07d5SKent Overstreet 
1397c97118f1SKent Overstreet 	struct bpos bucket = iter->pos;
1398c97118f1SKent Overstreet 	bucket.offset &= ~(~0ULL << 56);
1399c97118f1SKent Overstreet 	u64 genbits = iter->pos.offset & (~0ULL << 56);
14005add07d5SKent Overstreet 
1401c97118f1SKent Overstreet 	struct btree_iter alloc_iter;
14022cd85feaSKent Overstreet 	struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
1403ae153f2eSKent Overstreet 						     BTREE_ID_alloc, bucket,
1404ae153f2eSKent Overstreet 						     async_repair ? BTREE_ITER_cached : 0);
1405c97118f1SKent Overstreet 	int ret = bkey_err(alloc_k);
1406bcb79a51SKent Overstreet 	if (ret)
1407bcb79a51SKent Overstreet 		return ret;
14085add07d5SKent Overstreet 
1409c97118f1SKent Overstreet 	if (!bch2_dev_bucket_exists(c, bucket)) {
1410c97118f1SKent Overstreet 		if (fsck_err(trans, need_discard_freespace_key_to_invalid_dev_bucket,
1411a9c0a4cbSKent Overstreet 			     "entry in %s btree for nonexistant dev:bucket %llu:%llu",
1412c97118f1SKent Overstreet 			     bch2_btree_id_str(iter->btree_id), bucket.inode, bucket.offset))
14135add07d5SKent Overstreet 			goto delete;
1414c97118f1SKent Overstreet 		ret = 1;
1415c97118f1SKent Overstreet 		goto out;
1416c97118f1SKent Overstreet 	}
14175add07d5SKent Overstreet 
1418c97118f1SKent Overstreet 	struct bch_alloc_v4 a_convert;
1419c97118f1SKent Overstreet 	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
14205add07d5SKent Overstreet 
1421c97118f1SKent Overstreet 	if (a->data_type != state ||
1422822835ffSKent Overstreet 	    (state == BCH_DATA_free &&
1423c97118f1SKent Overstreet 	     genbits != alloc_freespace_genbits(*a))) {
1424c97118f1SKent Overstreet 		if (fsck_err(trans, need_discard_freespace_key_bad,
1425e96f5a61SKent Overstreet 			     "%s\nincorrectly set at %s:%llu:%llu:0 (free %u, genbits %llu should be %llu)",
1426445d184aSKent Overstreet 			     (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf),
142788dfe193SKent Overstreet 			     bch2_btree_id_str(iter->btree_id),
1428e96f5a61SKent Overstreet 			     iter->pos.inode,
1429e96f5a61SKent Overstreet 			     iter->pos.offset,
143019a614d2SKent Overstreet 			     a->data_type == state,
143119a614d2SKent Overstreet 			     genbits >> 56, alloc_freespace_genbits(*a) >> 56))
14325add07d5SKent Overstreet 			goto delete;
1433c97118f1SKent Overstreet 		ret = 1;
1434c97118f1SKent Overstreet 		goto out;
1435c97118f1SKent Overstreet 	}
1436c97118f1SKent Overstreet 
1437c97118f1SKent Overstreet 	*gen = a->gen;
14385add07d5SKent Overstreet out:
14395add07d5SKent Overstreet fsck_err:
14409180ad2eSKent Overstreet 	bch2_set_btree_iter_dontneed(trans, &alloc_iter);
14415add07d5SKent Overstreet 	bch2_trans_iter_exit(trans, &alloc_iter);
14425add07d5SKent Overstreet 	printbuf_exit(&buf);
14435add07d5SKent Overstreet 	return ret;
14445add07d5SKent Overstreet delete:
14452cd85feaSKent Overstreet 	if (!async_repair) {
14467d1918b0SKent Overstreet 		ret =   bch2_btree_bit_mod_iter(trans, iter, false) ?:
1447e96f5a61SKent Overstreet 			bch2_trans_commit(trans, NULL, NULL,
1448c97118f1SKent Overstreet 				BCH_TRANS_COMMIT_no_enospc) ?:
1449f9e0a9beSKent Overstreet 			-BCH_ERR_transaction_restart_commit;
14505add07d5SKent Overstreet 		goto out;
14512cd85feaSKent Overstreet 	} else {
14522cd85feaSKent Overstreet 		/*
14532cd85feaSKent Overstreet 		 * We can't repair here when called from the allocator path: the
14542cd85feaSKent Overstreet 		 * commit will recurse back into the allocator
14552cd85feaSKent Overstreet 		 */
14562cd85feaSKent Overstreet 		struct check_discard_freespace_key_async *w =
14572cd85feaSKent Overstreet 			kzalloc(sizeof(*w), GFP_KERNEL);
14582cd85feaSKent Overstreet 		if (!w)
14592cd85feaSKent Overstreet 			goto out;
14602cd85feaSKent Overstreet 
14612cd85feaSKent Overstreet 		if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_check_discard_freespace_key)) {
14622cd85feaSKent Overstreet 			kfree(w);
14632cd85feaSKent Overstreet 			goto out;
14642cd85feaSKent Overstreet 		}
14652cd85feaSKent Overstreet 
14662cd85feaSKent Overstreet 		INIT_WORK(&w->work, check_discard_freespace_key_work);
14672cd85feaSKent Overstreet 		w->c = c;
14682cd85feaSKent Overstreet 		w->pos = BBPOS(iter->btree_id, iter->pos);
14692cd85feaSKent Overstreet 		queue_work(c->write_ref_wq, &w->work);
14702cd85feaSKent Overstreet 		goto out;
14712cd85feaSKent Overstreet 	}
14725add07d5SKent Overstreet }
14735add07d5SKent Overstreet 
bch2_check_discard_freespace_key_fsck(struct btree_trans * trans,struct btree_iter * iter)1474c97118f1SKent Overstreet static int bch2_check_discard_freespace_key_fsck(struct btree_trans *trans, struct btree_iter *iter)
1475c97118f1SKent Overstreet {
1476c97118f1SKent Overstreet 	u8 gen;
14772cd85feaSKent Overstreet 	int ret = bch2_check_discard_freespace_key(trans, iter, &gen, false);
1478c97118f1SKent Overstreet 	return ret < 0 ? ret : 0;
1479c97118f1SKent Overstreet }
1480c97118f1SKent Overstreet 
14815250b74dSKent Overstreet /*
14825250b74dSKent Overstreet  * We've already checked that generation numbers in the bucket_gens btree are
14835250b74dSKent Overstreet  * valid for buckets that exist; this just checks for keys for nonexistent
14845250b74dSKent Overstreet  * buckets.
14855250b74dSKent Overstreet  */
1486298ac24eSKent Overstreet static noinline_for_stack
bch2_check_bucket_gens_key(struct btree_trans * trans,struct btree_iter * iter,struct bkey_s_c k)1487298ac24eSKent Overstreet int bch2_check_bucket_gens_key(struct btree_trans *trans,
14885250b74dSKent Overstreet 			       struct btree_iter *iter,
14895250b74dSKent Overstreet 			       struct bkey_s_c k)
14905250b74dSKent Overstreet {
14915250b74dSKent Overstreet 	struct bch_fs *c = trans->c;
14925250b74dSKent Overstreet 	struct bkey_i_bucket_gens g;
14935250b74dSKent Overstreet 	u64 start = bucket_gens_pos_to_alloc(k.k->p, 0).offset;
14945250b74dSKent Overstreet 	u64 end = bucket_gens_pos_to_alloc(bpos_nosnap_successor(k.k->p), 0).offset;
14955250b74dSKent Overstreet 	u64 b;
1496bc3204c8SKent Overstreet 	bool need_update = false;
14975250b74dSKent Overstreet 	struct printbuf buf = PRINTBUF;
14985250b74dSKent Overstreet 	int ret = 0;
14995250b74dSKent Overstreet 
15005250b74dSKent Overstreet 	BUG_ON(k.k->type != KEY_TYPE_bucket_gens);
15015250b74dSKent Overstreet 	bkey_reassemble(&g.k_i, k);
15025250b74dSKent Overstreet 
1503bc3204c8SKent Overstreet 	struct bch_dev *ca = bch2_dev_tryget_noerror(c, k.k->p.inode);
1504bc3204c8SKent Overstreet 	if (!ca) {
1505a850bde6SKent Overstreet 		if (fsck_err(trans, bucket_gens_to_invalid_dev,
15065250b74dSKent Overstreet 			     "bucket_gens key for invalid device:\n%s",
1507bc3204c8SKent Overstreet 			     (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
15085250b74dSKent Overstreet 			ret = bch2_btree_delete_at(trans, iter, 0);
15095250b74dSKent Overstreet 		goto out;
15105250b74dSKent Overstreet 	}
15115250b74dSKent Overstreet 
15125250b74dSKent Overstreet 	if (fsck_err_on(end <= ca->mi.first_bucket ||
1513a850bde6SKent Overstreet 			start >= ca->mi.nbuckets,
1514a850bde6SKent Overstreet 			trans, bucket_gens_to_invalid_buckets,
15155250b74dSKent Overstreet 			"bucket_gens key for invalid buckets:\n%s",
15165250b74dSKent Overstreet 			(bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
15175250b74dSKent Overstreet 		ret = bch2_btree_delete_at(trans, iter, 0);
15185250b74dSKent Overstreet 		goto out;
15195250b74dSKent Overstreet 	}
15205250b74dSKent Overstreet 
15215250b74dSKent Overstreet 	for (b = start; b < ca->mi.first_bucket; b++)
1522a850bde6SKent Overstreet 		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
1523a850bde6SKent Overstreet 				trans, bucket_gens_nonzero_for_invalid_buckets,
15245250b74dSKent Overstreet 				"bucket_gens key has nonzero gen for invalid bucket")) {
15255250b74dSKent Overstreet 			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
15265250b74dSKent Overstreet 			need_update = true;
15275250b74dSKent Overstreet 		}
15285250b74dSKent Overstreet 
15295250b74dSKent Overstreet 	for (b = ca->mi.nbuckets; b < end; b++)
1530a850bde6SKent Overstreet 		if (fsck_err_on(g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK],
1531a850bde6SKent Overstreet 				trans, bucket_gens_nonzero_for_invalid_buckets,
15325250b74dSKent Overstreet 				"bucket_gens key has nonzero gen for invalid bucket")) {
15335250b74dSKent Overstreet 			g.v.gens[b & KEY_TYPE_BUCKET_GENS_MASK] = 0;
15345250b74dSKent Overstreet 			need_update = true;
15355250b74dSKent Overstreet 		}
15365250b74dSKent Overstreet 
15375250b74dSKent Overstreet 	if (need_update) {
153896dea3d5SKent Overstreet 		struct bkey_i *u = bch2_trans_kmalloc(trans, sizeof(g));
15395250b74dSKent Overstreet 
154096dea3d5SKent Overstreet 		ret = PTR_ERR_OR_ZERO(u);
15415250b74dSKent Overstreet 		if (ret)
15425250b74dSKent Overstreet 			goto out;
15435250b74dSKent Overstreet 
154496dea3d5SKent Overstreet 		memcpy(u, &g, sizeof(g));
154596dea3d5SKent Overstreet 		ret = bch2_trans_update(trans, iter, u, 0);
15465250b74dSKent Overstreet 	}
15475250b74dSKent Overstreet out:
15485250b74dSKent Overstreet fsck_err:
1549bc3204c8SKent Overstreet 	bch2_dev_put(ca);
15505250b74dSKent Overstreet 	printbuf_exit(&buf);
15515250b74dSKent Overstreet 	return ret;
15525250b74dSKent Overstreet }
15535250b74dSKent Overstreet 
bch2_check_alloc_info(struct bch_fs * c)1554e1effd42SKent Overstreet int bch2_check_alloc_info(struct bch_fs *c)
15555add07d5SKent Overstreet {
15566bd68ec2SKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
15575250b74dSKent Overstreet 	struct btree_iter iter, discard_iter, freespace_iter, bucket_gens_iter;
1558bc3204c8SKent Overstreet 	struct bch_dev *ca = NULL;
1559d23124c7SKent Overstreet 	struct bkey hole;
1560445d184aSKent Overstreet 	struct bkey_s_c k;
1561e1effd42SKent Overstreet 	int ret = 0;
15625add07d5SKent Overstreet 
15636bd68ec2SKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, POS_MIN,
15645dd8c60eSKent Overstreet 			     BTREE_ITER_prefetch);
15656bd68ec2SKent Overstreet 	bch2_trans_iter_init(trans, &discard_iter, BTREE_ID_need_discard, POS_MIN,
15665dd8c60eSKent Overstreet 			     BTREE_ITER_prefetch);
15676bd68ec2SKent Overstreet 	bch2_trans_iter_init(trans, &freespace_iter, BTREE_ID_freespace, POS_MIN,
15685dd8c60eSKent Overstreet 			     BTREE_ITER_prefetch);
15696bd68ec2SKent Overstreet 	bch2_trans_iter_init(trans, &bucket_gens_iter, BTREE_ID_bucket_gens, POS_MIN,
15705dd8c60eSKent Overstreet 			     BTREE_ITER_prefetch);
1571d23124c7SKent Overstreet 
1572e34da43eSKent Overstreet 	while (1) {
1573d23124c7SKent Overstreet 		struct bpos next;
1574d23124c7SKent Overstreet 
15756bd68ec2SKent Overstreet 		bch2_trans_begin(trans);
1576d23124c7SKent Overstreet 
15779180ad2eSKent Overstreet 		k = bch2_get_key_or_real_bucket_hole(trans, &iter, &ca, &hole);
1578d23124c7SKent Overstreet 		ret = bkey_err(k);
15795add07d5SKent Overstreet 		if (ret)
1580d23124c7SKent Overstreet 			goto bkey_err;
1581d23124c7SKent Overstreet 
1582d23124c7SKent Overstreet 		if (!k.k)
15835add07d5SKent Overstreet 			break;
1584e34da43eSKent Overstreet 
1585d23124c7SKent Overstreet 		if (k.k->type) {
1586d23124c7SKent Overstreet 			next = bpos_nosnap_successor(k.k->p);
1587d23124c7SKent Overstreet 
15886bd68ec2SKent Overstreet 			ret = bch2_check_alloc_key(trans,
1589d23124c7SKent Overstreet 						   k, &iter,
1590d23124c7SKent Overstreet 						   &discard_iter,
15915250b74dSKent Overstreet 						   &freespace_iter,
15925250b74dSKent Overstreet 						   &bucket_gens_iter);
1593d23124c7SKent Overstreet 			if (ret)
15945250b74dSKent Overstreet 				goto bkey_err;
1595d23124c7SKent Overstreet 		} else {
1596d23124c7SKent Overstreet 			next = k.k->p;
1597d23124c7SKent Overstreet 
1598bc3204c8SKent Overstreet 			ret = bch2_check_alloc_hole_freespace(trans, ca,
1599d23124c7SKent Overstreet 						    bkey_start_pos(k.k),
1600d23124c7SKent Overstreet 						    &next,
16015250b74dSKent Overstreet 						    &freespace_iter) ?:
16026bd68ec2SKent Overstreet 				bch2_check_alloc_hole_bucket_gens(trans,
16035250b74dSKent Overstreet 						    bkey_start_pos(k.k),
16045250b74dSKent Overstreet 						    &next,
16055250b74dSKent Overstreet 						    &bucket_gens_iter);
1606d23124c7SKent Overstreet 			if (ret)
1607d23124c7SKent Overstreet 				goto bkey_err;
1608d23124c7SKent Overstreet 		}
1609d23124c7SKent Overstreet 
16106bd68ec2SKent Overstreet 		ret = bch2_trans_commit(trans, NULL, NULL,
16113f0e297dSKent Overstreet 					BCH_TRANS_COMMIT_no_enospc);
1612d23124c7SKent Overstreet 		if (ret)
1613d23124c7SKent Overstreet 			goto bkey_err;
1614d23124c7SKent Overstreet 
16159180ad2eSKent Overstreet 		bch2_btree_iter_set_pos(trans, &iter, next);
1616d23124c7SKent Overstreet bkey_err:
1617d23124c7SKent Overstreet 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
1618d23124c7SKent Overstreet 			continue;
1619d23124c7SKent Overstreet 		if (ret)
1620d23124c7SKent Overstreet 			break;
16215add07d5SKent Overstreet 	}
16226bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &bucket_gens_iter);
16236bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &freespace_iter);
16246bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &discard_iter);
16256bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
1626bc3204c8SKent Overstreet 	bch2_dev_put(ca);
1627bc3204c8SKent Overstreet 	ca = NULL;
16285add07d5SKent Overstreet 
1629e34da43eSKent Overstreet 	if (ret < 0)
16305add07d5SKent Overstreet 		goto err;
16315add07d5SKent Overstreet 
16325028b907SKent Overstreet 	ret = for_each_btree_key(trans, iter,
1633445d184aSKent Overstreet 			BTREE_ID_need_discard, POS_MIN,
16345dd8c60eSKent Overstreet 			BTREE_ITER_prefetch, k,
1635c97118f1SKent Overstreet 		bch2_check_discard_freespace_key_fsck(trans, &iter));
16367d9ae04eSKent Overstreet 	if (ret)
16377d9ae04eSKent Overstreet 		goto err;
16387d9ae04eSKent Overstreet 
16397d9ae04eSKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_freespace, POS_MIN,
16405dd8c60eSKent Overstreet 			     BTREE_ITER_prefetch);
16417d9ae04eSKent Overstreet 	while (1) {
16427d9ae04eSKent Overstreet 		bch2_trans_begin(trans);
16439180ad2eSKent Overstreet 		k = bch2_btree_iter_peek(trans, &iter);
16447d9ae04eSKent Overstreet 		if (!k.k)
16457d9ae04eSKent Overstreet 			break;
16467d9ae04eSKent Overstreet 
16477d9ae04eSKent Overstreet 		ret = bkey_err(k) ?:
1648c97118f1SKent Overstreet 			bch2_check_discard_freespace_key_fsck(trans, &iter);
16497d9ae04eSKent Overstreet 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
16507d9ae04eSKent Overstreet 			ret = 0;
16517d9ae04eSKent Overstreet 			continue;
16527d9ae04eSKent Overstreet 		}
16537d9ae04eSKent Overstreet 		if (ret) {
16547d9ae04eSKent Overstreet 			struct printbuf buf = PRINTBUF;
16557d9ae04eSKent Overstreet 			bch2_bkey_val_to_text(&buf, c, k);
16567d9ae04eSKent Overstreet 
16577d9ae04eSKent Overstreet 			bch_err(c, "while checking %s", buf.buf);
16587d9ae04eSKent Overstreet 			printbuf_exit(&buf);
16597d9ae04eSKent Overstreet 			break;
16607d9ae04eSKent Overstreet 		}
16617d9ae04eSKent Overstreet 
16629180ad2eSKent Overstreet 		bch2_btree_iter_set_pos(trans, &iter, bpos_nosnap_successor(iter.pos));
16637d9ae04eSKent Overstreet 	}
16647d9ae04eSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
16657d9ae04eSKent Overstreet 	if (ret)
16667d9ae04eSKent Overstreet 		goto err;
16677d9ae04eSKent Overstreet 
16687d9ae04eSKent Overstreet 	ret = for_each_btree_key_commit(trans, iter,
16695250b74dSKent Overstreet 			BTREE_ID_bucket_gens, POS_MIN,
16705dd8c60eSKent Overstreet 			BTREE_ITER_prefetch, k,
16713f0e297dSKent Overstreet 			NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
16726bd68ec2SKent Overstreet 		bch2_check_bucket_gens_key(trans, &iter, k));
16735add07d5SKent Overstreet err:
16746bd68ec2SKent Overstreet 	bch2_trans_put(trans);
16751bb3c2a9SKent Overstreet 	bch_err_fn(c, ret);
16761bb3c2a9SKent Overstreet 	return ret;
16775add07d5SKent Overstreet }
16785add07d5SKent Overstreet 
bch2_check_alloc_to_lru_ref(struct btree_trans * trans,struct btree_iter * alloc_iter,struct bkey_buf * last_flushed)16795add07d5SKent Overstreet static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans,
1680d39881d2SKent Overstreet 				       struct btree_iter *alloc_iter,
1681d39881d2SKent Overstreet 				       struct bkey_buf *last_flushed)
16825add07d5SKent Overstreet {
16835add07d5SKent Overstreet 	struct bch_fs *c = trans->c;
168419a614d2SKent Overstreet 	struct bch_alloc_v4 a_convert;
168519a614d2SKent Overstreet 	const struct bch_alloc_v4 *a;
1686d39881d2SKent Overstreet 	struct bkey_s_c alloc_k;
16875add07d5SKent Overstreet 	struct printbuf buf = PRINTBUF;
16885add07d5SKent Overstreet 	int ret;
16895add07d5SKent Overstreet 
16909180ad2eSKent Overstreet 	alloc_k = bch2_btree_iter_peek(trans, alloc_iter);
16915add07d5SKent Overstreet 	if (!alloc_k.k)
16925add07d5SKent Overstreet 		return 0;
16935add07d5SKent Overstreet 
16945add07d5SKent Overstreet 	ret = bkey_err(alloc_k);
16955add07d5SKent Overstreet 	if (ret)
16965add07d5SKent Overstreet 		return ret;
16975add07d5SKent Overstreet 
1698260af156SKent Overstreet 	struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode);
1699260af156SKent Overstreet 	if (!ca)
1700260af156SKent Overstreet 		return 0;
1701260af156SKent Overstreet 
170219a614d2SKent Overstreet 	a = bch2_alloc_to_v4(alloc_k, &a_convert);
17035add07d5SKent Overstreet 
1704260af156SKent Overstreet 	u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
1705260af156SKent Overstreet 	if (lru_idx) {
1706b8e37c16SKent Overstreet 		ret = bch2_lru_check_set(trans, BCH_LRU_BUCKET_FRAGMENTATION,
17073aff608bSKent Overstreet 					 bucket_to_u64(alloc_k.k->p),
1708260af156SKent Overstreet 					 lru_idx, alloc_k, last_flushed);
1709d39881d2SKent Overstreet 		if (ret)
1710260af156SKent Overstreet 			goto err;
1711d39881d2SKent Overstreet 	}
1712d39881d2SKent Overstreet 
171319a614d2SKent Overstreet 	if (a->data_type != BCH_DATA_cached)
1714260af156SKent Overstreet 		goto err;
17155add07d5SKent Overstreet 
1716a850bde6SKent Overstreet 	if (fsck_err_on(!a->io_time[READ],
1717a850bde6SKent Overstreet 			trans, alloc_key_cached_but_read_time_zero,
17181ece5323SKent Overstreet 			"cached bucket with read_time 0\n%s",
17193f59547eSKent Overstreet 		(printbuf_reset(&buf),
17203f59547eSKent Overstreet 		 bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
17213f59547eSKent Overstreet 		struct bkey_i_alloc_v4 *a_mut =
17223f59547eSKent Overstreet 			bch2_alloc_to_v4_mut(trans, alloc_k);
17233f59547eSKent Overstreet 		ret = PTR_ERR_OR_ZERO(a_mut);
17243f59547eSKent Overstreet 		if (ret)
17253f59547eSKent Overstreet 			goto err;
17263f59547eSKent Overstreet 
1727cff07e27SKent Overstreet 		a_mut->v.io_time[READ] = bch2_current_io_time(c, READ);
17283f59547eSKent Overstreet 		ret = bch2_trans_update(trans, alloc_iter,
17295dd8c60eSKent Overstreet 					&a_mut->k_i, BTREE_TRIGGER_norun);
17303f59547eSKent Overstreet 		if (ret)
17313f59547eSKent Overstreet 			goto err;
17323f59547eSKent Overstreet 
17333f59547eSKent Overstreet 		a = &a_mut->v;
17343f59547eSKent Overstreet 	}
17353f59547eSKent Overstreet 
17363aff608bSKent Overstreet 	ret = bch2_lru_check_set(trans, alloc_k.k->p.inode,
17373aff608bSKent Overstreet 				 bucket_to_u64(alloc_k.k->p),
17383aff608bSKent Overstreet 				 a->io_time[READ],
1739d39881d2SKent Overstreet 				 alloc_k, last_flushed);
17405add07d5SKent Overstreet 	if (ret)
17415add07d5SKent Overstreet 		goto err;
17425add07d5SKent Overstreet err:
17435add07d5SKent Overstreet fsck_err:
1744260af156SKent Overstreet 	bch2_dev_put(ca);
17455add07d5SKent Overstreet 	printbuf_exit(&buf);
17465add07d5SKent Overstreet 	return ret;
17475add07d5SKent Overstreet }
17485add07d5SKent Overstreet 
bch2_check_alloc_to_lru_refs(struct bch_fs * c)17495add07d5SKent Overstreet int bch2_check_alloc_to_lru_refs(struct bch_fs *c)
17505add07d5SKent Overstreet {
1751d39881d2SKent Overstreet 	struct bkey_buf last_flushed;
1752d39881d2SKent Overstreet 
1753d39881d2SKent Overstreet 	bch2_bkey_buf_init(&last_flushed);
1754d39881d2SKent Overstreet 	bkey_init(&last_flushed.k->k);
1755d39881d2SKent Overstreet 
17563f59547eSKent Overstreet 	int ret = bch2_trans_run(c,
17576bd68ec2SKent Overstreet 		for_each_btree_key_commit(trans, iter, BTREE_ID_alloc,
17585dd8c60eSKent Overstreet 				POS_MIN, BTREE_ITER_prefetch, k,
17593f0e297dSKent Overstreet 				NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
17606756e385SKent Overstreet 			bch2_check_alloc_to_lru_ref(trans, &iter, &last_flushed))) ?:
17616756e385SKent Overstreet 		bch2_check_stripe_to_lru_refs(c);
1762d39881d2SKent Overstreet 
1763d39881d2SKent Overstreet 	bch2_bkey_buf_exit(&last_flushed, c);
17641bb3c2a9SKent Overstreet 	bch_err_fn(c, ret);
17651bb3c2a9SKent Overstreet 	return ret;
17665add07d5SKent Overstreet }
17675add07d5SKent Overstreet 
discard_in_flight_add(struct bch_dev * ca,u64 bucket,bool in_progress)176864ee1431SKent Overstreet static int discard_in_flight_add(struct bch_dev *ca, u64 bucket, bool in_progress)
1769a393f331SKent Overstreet {
1770a393f331SKent Overstreet 	int ret;
1771a393f331SKent Overstreet 
177264ee1431SKent Overstreet 	mutex_lock(&ca->discard_buckets_in_flight_lock);
177364ee1431SKent Overstreet 	darray_for_each(ca->discard_buckets_in_flight, i)
177464ee1431SKent Overstreet 		if (i->bucket == bucket) {
177550479406SKent Overstreet 			ret = -BCH_ERR_EEXIST_discard_in_flight_add;
1776a393f331SKent Overstreet 			goto out;
1777a393f331SKent Overstreet 		}
1778a393f331SKent Overstreet 
177964ee1431SKent Overstreet 	ret = darray_push(&ca->discard_buckets_in_flight, ((struct discard_in_flight) {
178064ee1431SKent Overstreet 			   .in_progress = in_progress,
178164ee1431SKent Overstreet 			   .bucket	= bucket,
178264ee1431SKent Overstreet 	}));
1783a393f331SKent Overstreet out:
178464ee1431SKent Overstreet 	mutex_unlock(&ca->discard_buckets_in_flight_lock);
1785a393f331SKent Overstreet 	return ret;
1786a393f331SKent Overstreet }
1787a393f331SKent Overstreet 
discard_in_flight_remove(struct bch_dev * ca,u64 bucket)178864ee1431SKent Overstreet static void discard_in_flight_remove(struct bch_dev *ca, u64 bucket)
1789a393f331SKent Overstreet {
179064ee1431SKent Overstreet 	mutex_lock(&ca->discard_buckets_in_flight_lock);
179164ee1431SKent Overstreet 	darray_for_each(ca->discard_buckets_in_flight, i)
179264ee1431SKent Overstreet 		if (i->bucket == bucket) {
179364ee1431SKent Overstreet 			BUG_ON(!i->in_progress);
179464ee1431SKent Overstreet 			darray_remove_item(&ca->discard_buckets_in_flight, i);
1795a393f331SKent Overstreet 			goto found;
1796a393f331SKent Overstreet 		}
1797a393f331SKent Overstreet 	BUG();
1798a393f331SKent Overstreet found:
179964ee1431SKent Overstreet 	mutex_unlock(&ca->discard_buckets_in_flight_lock);
1800a393f331SKent Overstreet }
1801a393f331SKent Overstreet 
1802a6548c8bSKent Overstreet struct discard_buckets_state {
1803a6548c8bSKent Overstreet 	u64		seen;
1804a6548c8bSKent Overstreet 	u64		open;
1805a6548c8bSKent Overstreet 	u64		need_journal_commit;
1806a6548c8bSKent Overstreet 	u64		discarded;
1807a6548c8bSKent Overstreet };
1808a6548c8bSKent Overstreet 
180980be08cdSKent Overstreet /*
181080be08cdSKent Overstreet  * This is needed because discard is both a filesystem option and a device
181180be08cdSKent Overstreet  * option, and mount options are supposed to apply to that mount and not be
181280be08cdSKent Overstreet  * persisted, i.e. if it's set as a mount option we can't propagate it to the
181380be08cdSKent Overstreet  * device.
181480be08cdSKent Overstreet  */
discard_opt_enabled(struct bch_fs * c,struct bch_dev * ca)181580be08cdSKent Overstreet static inline bool discard_opt_enabled(struct bch_fs *c, struct bch_dev *ca)
181680be08cdSKent Overstreet {
181780be08cdSKent Overstreet 	return test_bit(BCH_FS_discard_mount_opt_set, &c->flags)
181880be08cdSKent Overstreet 		? c->opts.discard
181980be08cdSKent Overstreet 		: ca->mi.discard;
182080be08cdSKent Overstreet }
182180be08cdSKent Overstreet 
bch2_discard_one_bucket(struct btree_trans * trans,struct bch_dev * ca,struct btree_iter * need_discard_iter,struct bpos * discard_pos_done,struct discard_buckets_state * s,bool fastpath)18224910a950SKent Overstreet static int bch2_discard_one_bucket(struct btree_trans *trans,
182364ee1431SKent Overstreet 				   struct bch_dev *ca,
18244910a950SKent Overstreet 				   struct btree_iter *need_discard_iter,
18254910a950SKent Overstreet 				   struct bpos *discard_pos_done,
1826bb61afebSKent Overstreet 				   struct discard_buckets_state *s,
1827bb61afebSKent Overstreet 				   bool fastpath)
182859cc38b8SKent Overstreet {
182959cc38b8SKent Overstreet 	struct bch_fs *c = trans->c;
18304910a950SKent Overstreet 	struct bpos pos = need_discard_iter->pos;
18319180ad2eSKent Overstreet 	struct btree_iter iter = {};
183259cc38b8SKent Overstreet 	struct bkey_s_c k;
183359cc38b8SKent Overstreet 	struct bkey_i_alloc_v4 *a;
183459cc38b8SKent Overstreet 	struct printbuf buf = PRINTBUF;
1835a393f331SKent Overstreet 	bool discard_locked = false;
18364910a950SKent Overstreet 	int ret = 0;
183759cc38b8SKent Overstreet 
18384910a950SKent Overstreet 	if (bch2_bucket_is_open_safe(c, pos.inode, pos.offset)) {
1839a6548c8bSKent Overstreet 		s->open++;
18404910a950SKent Overstreet 		goto out;
18414910a950SKent Overstreet 	}
18424910a950SKent Overstreet 
18439e903352SKent Overstreet 	u64 seq_ready = bch2_bucket_journal_seq_ready(&c->buckets_waiting_for_journal,
18449e903352SKent Overstreet 						      pos.inode, pos.offset);
18459e903352SKent Overstreet 	if (seq_ready > c->journal.flushed_seq_ondisk) {
18469e903352SKent Overstreet 		if (seq_ready > c->journal.flushing_seq)
1847a6548c8bSKent Overstreet 			s->need_journal_commit++;
18484910a950SKent Overstreet 		goto out;
18494910a950SKent Overstreet 	}
18504910a950SKent Overstreet 
1851bcb79a51SKent Overstreet 	k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
18524910a950SKent Overstreet 			       need_discard_iter->pos,
18535dd8c60eSKent Overstreet 			       BTREE_ITER_cached);
185459cc38b8SKent Overstreet 	ret = bkey_err(k);
185559cc38b8SKent Overstreet 	if (ret)
185659cc38b8SKent Overstreet 		goto out;
185759cc38b8SKent Overstreet 
185859cc38b8SKent Overstreet 	a = bch2_alloc_to_v4_mut(trans, k);
185959cc38b8SKent Overstreet 	ret = PTR_ERR_OR_ZERO(a);
186059cc38b8SKent Overstreet 	if (ret)
186159cc38b8SKent Overstreet 		goto out;
186259cc38b8SKent Overstreet 
18637ee88737SKent Overstreet 	if (a->v.data_type != BCH_DATA_need_discard) {
1864acd1fc7bSKent Overstreet 		if (need_discard_or_freespace_err(trans, k, true, true, true)) {
1865acd1fc7bSKent Overstreet 			ret = bch2_btree_bit_mod_iter(trans, need_discard_iter, false);
1866acd1fc7bSKent Overstreet 			if (ret)
1867acd1fc7bSKent Overstreet 				goto out;
1868acd1fc7bSKent Overstreet 			goto commit;
1869acd1fc7bSKent Overstreet 		}
1870acd1fc7bSKent Overstreet 
1871822835ffSKent Overstreet 		goto out;
1872822835ffSKent Overstreet 	}
187359cc38b8SKent Overstreet 
1874bb61afebSKent Overstreet 	if (!fastpath) {
187564ee1431SKent Overstreet 		if (discard_in_flight_add(ca, iter.pos.offset, true))
1876a393f331SKent Overstreet 			goto out;
1877a393f331SKent Overstreet 
1878a393f331SKent Overstreet 		discard_locked = true;
1879bb61afebSKent Overstreet 	}
1880a393f331SKent Overstreet 
18819e903352SKent Overstreet 	if (!bkey_eq(*discard_pos_done, iter.pos)) {
18829e903352SKent Overstreet 		s->discarded++;
18839e903352SKent Overstreet 		*discard_pos_done = iter.pos;
18849e903352SKent Overstreet 
188580be08cdSKent Overstreet 		if (discard_opt_enabled(c, ca) && !c->opts.nochanges) {
188659cc38b8SKent Overstreet 			/*
188759cc38b8SKent Overstreet 			 * This works without any other locks because this is the only
188859cc38b8SKent Overstreet 			 * thread that removes items from the need_discard tree
188959cc38b8SKent Overstreet 			 */
1890096386a5SKent Overstreet 			bch2_trans_unlock_long(trans);
189159cc38b8SKent Overstreet 			blkdev_issue_discard(ca->disk_sb.bdev,
189259cc38b8SKent Overstreet 					     k.k->p.offset * ca->mi.bucket_size,
189359cc38b8SKent Overstreet 					     ca->mi.bucket_size,
189459cc38b8SKent Overstreet 					     GFP_KERNEL);
189531381636SKent Overstreet 			ret = bch2_trans_relock_notrace(trans);
189659cc38b8SKent Overstreet 			if (ret)
189759cc38b8SKent Overstreet 				goto out;
189859cc38b8SKent Overstreet 		}
18999e903352SKent Overstreet 	}
190059cc38b8SKent Overstreet 
190159cc38b8SKent Overstreet 	SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
1902f648b6c1SKent Overstreet 	alloc_data_type_set(&a->v, a->v.data_type);
1903f648b6c1SKent Overstreet 
1904acd1fc7bSKent Overstreet 	ret = bch2_trans_update(trans, &iter, &a->k_i, 0);
1905acd1fc7bSKent Overstreet 	if (ret)
1906acd1fc7bSKent Overstreet 		goto out;
1907acd1fc7bSKent Overstreet commit:
1908acd1fc7bSKent Overstreet 	ret = bch2_trans_commit(trans, NULL, NULL,
1909f33c58fcSKent Overstreet 				BCH_WATERMARK_btree|
1910cb52d23eSKent Overstreet 				BCH_TRANS_COMMIT_no_enospc);
19114910a950SKent Overstreet 	if (ret)
19124910a950SKent Overstreet 		goto out;
19134910a950SKent Overstreet 
19145ee760f6SKent Overstreet 	if (!fastpath)
191574644030SKent Overstreet 		count_event(c, bucket_discard);
19165ee760f6SKent Overstreet 	else
19175ee760f6SKent Overstreet 		count_event(c, bucket_discard_fast);
191859cc38b8SKent Overstreet out:
1919acd1fc7bSKent Overstreet fsck_err:
1920a393f331SKent Overstreet 	if (discard_locked)
192164ee1431SKent Overstreet 		discard_in_flight_remove(ca, iter.pos.offset);
1922bb61afebSKent Overstreet 	if (!ret)
1923a6548c8bSKent Overstreet 		s->seen++;
192459cc38b8SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
192559cc38b8SKent Overstreet 	printbuf_exit(&buf);
192659cc38b8SKent Overstreet 	return ret;
192759cc38b8SKent Overstreet }
192859cc38b8SKent Overstreet 
bch2_do_discards_work(struct work_struct * work)192959cc38b8SKent Overstreet static void bch2_do_discards_work(struct work_struct *work)
193059cc38b8SKent Overstreet {
193164ee1431SKent Overstreet 	struct bch_dev *ca = container_of(work, struct bch_dev, discard_work);
193264ee1431SKent Overstreet 	struct bch_fs *c = ca->fs;
1933a6548c8bSKent Overstreet 	struct discard_buckets_state s = {};
19344910a950SKent Overstreet 	struct bpos discard_pos_done = POS_MAX;
193559cc38b8SKent Overstreet 	int ret;
193659cc38b8SKent Overstreet 
19374910a950SKent Overstreet 	/*
19384910a950SKent Overstreet 	 * We're doing the commit in bch2_discard_one_bucket instead of using
19394910a950SKent Overstreet 	 * for_each_btree_key_commit() so that we can increment counters after
19404910a950SKent Overstreet 	 * successful commit:
19414910a950SKent Overstreet 	 */
19426bd68ec2SKent Overstreet 	ret = bch2_trans_run(c,
1943000fe8d5SKent Overstreet 		for_each_btree_key_max(trans, iter,
194464ee1431SKent Overstreet 				   BTREE_ID_need_discard,
194564ee1431SKent Overstreet 				   POS(ca->dev_idx, 0),
194664ee1431SKent Overstreet 				   POS(ca->dev_idx, U64_MAX), 0, k,
1947bb61afebSKent Overstreet 			bch2_discard_one_bucket(trans, ca, &iter, &discard_pos_done, &s, false)));
1948a6548c8bSKent Overstreet 
19499e903352SKent Overstreet 	if (s.need_journal_commit > dev_buckets_available(ca, BCH_WATERMARK_normal))
19509e903352SKent Overstreet 		bch2_journal_flush_async(&c->journal, NULL);
19519e903352SKent Overstreet 
1952a6548c8bSKent Overstreet 	trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded,
1953a6548c8bSKent Overstreet 			      bch2_err_str(ret));
195459cc38b8SKent Overstreet 
1955dcffc3b1SKent Overstreet 	percpu_ref_put(&ca->io_ref[WRITE]);
19560b50b731SKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_discard);
195764ee1431SKent Overstreet }
195864ee1431SKent Overstreet 
bch2_dev_do_discards(struct bch_dev * ca)195964ee1431SKent Overstreet void bch2_dev_do_discards(struct bch_dev *ca)
196064ee1431SKent Overstreet {
196164ee1431SKent Overstreet 	struct bch_fs *c = ca->fs;
196264ee1431SKent Overstreet 
19630b50b731SKent Overstreet 	if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard))
196464ee1431SKent Overstreet 		return;
196564ee1431SKent Overstreet 
19660b50b731SKent Overstreet 	if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
19670b50b731SKent Overstreet 		goto put_write_ref;
196864ee1431SKent Overstreet 
196964ee1431SKent Overstreet 	if (queue_work(c->write_ref_wq, &ca->discard_work))
197064ee1431SKent Overstreet 		return;
197164ee1431SKent Overstreet 
1972dcffc3b1SKent Overstreet 	percpu_ref_put(&ca->io_ref[WRITE]);
19730b50b731SKent Overstreet put_write_ref:
19740b50b731SKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_discard);
197559cc38b8SKent Overstreet }
197659cc38b8SKent Overstreet 
bch2_do_discards(struct bch_fs * c)197759cc38b8SKent Overstreet void bch2_do_discards(struct bch_fs *c)
197859cc38b8SKent Overstreet {
197964ee1431SKent Overstreet 	for_each_member_device(c, ca)
198064ee1431SKent Overstreet 		bch2_dev_do_discards(ca);
198159cc38b8SKent Overstreet }
198259cc38b8SKent Overstreet 
bch2_do_discards_fast_one(struct btree_trans * trans,struct bch_dev * ca,u64 bucket,struct bpos * discard_pos_done,struct discard_buckets_state * s)1983bb61afebSKent Overstreet static int bch2_do_discards_fast_one(struct btree_trans *trans,
1984bb61afebSKent Overstreet 				     struct bch_dev *ca,
1985bb61afebSKent Overstreet 				     u64 bucket,
1986bb61afebSKent Overstreet 				     struct bpos *discard_pos_done,
1987bb61afebSKent Overstreet 				     struct discard_buckets_state *s)
1988a393f331SKent Overstreet {
1989bb61afebSKent Overstreet 	struct btree_iter need_discard_iter;
1990bb61afebSKent Overstreet 	struct bkey_s_c discard_k = bch2_bkey_get_iter(trans, &need_discard_iter,
1991bb61afebSKent Overstreet 					BTREE_ID_need_discard, POS(ca->dev_idx, bucket), 0);
1992bb61afebSKent Overstreet 	int ret = bkey_err(discard_k);
1993a393f331SKent Overstreet 	if (ret)
1994bb61afebSKent Overstreet 		return ret;
1995a393f331SKent Overstreet 
1996bb61afebSKent Overstreet 	if (log_fsck_err_on(discard_k.k->type != KEY_TYPE_set,
1997bb61afebSKent Overstreet 			    trans, discarding_bucket_not_in_need_discard_btree,
1998bb61afebSKent Overstreet 			    "attempting to discard bucket %u:%llu not in need_discard btree",
1999052210c3SKent Overstreet 			    ca->dev_idx, bucket))
2000bb61afebSKent Overstreet 		goto out;
2001a393f331SKent Overstreet 
2002bb61afebSKent Overstreet 	ret = bch2_discard_one_bucket(trans, ca, &need_discard_iter, discard_pos_done, s, true);
2003bb61afebSKent Overstreet out:
2004bb61afebSKent Overstreet fsck_err:
2005bb61afebSKent Overstreet 	bch2_trans_iter_exit(trans, &need_discard_iter);
2006a393f331SKent Overstreet 	return ret;
2007a393f331SKent Overstreet }
2008a393f331SKent Overstreet 
bch2_do_discards_fast_work(struct work_struct * work)2009a393f331SKent Overstreet static void bch2_do_discards_fast_work(struct work_struct *work)
2010a393f331SKent Overstreet {
201164ee1431SKent Overstreet 	struct bch_dev *ca = container_of(work, struct bch_dev, discard_fast_work);
201264ee1431SKent Overstreet 	struct bch_fs *c = ca->fs;
2013bb61afebSKent Overstreet 	struct discard_buckets_state s = {};
2014bb61afebSKent Overstreet 	struct bpos discard_pos_done = POS_MAX;
2015bb61afebSKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
2016bb61afebSKent Overstreet 	int ret = 0;
2017a393f331SKent Overstreet 
2018a393f331SKent Overstreet 	while (1) {
2019a393f331SKent Overstreet 		bool got_bucket = false;
202064ee1431SKent Overstreet 		u64 bucket;
2021a393f331SKent Overstreet 
202264ee1431SKent Overstreet 		mutex_lock(&ca->discard_buckets_in_flight_lock);
202364ee1431SKent Overstreet 		darray_for_each(ca->discard_buckets_in_flight, i) {
202464ee1431SKent Overstreet 			if (i->in_progress)
2025a393f331SKent Overstreet 				continue;
2026a393f331SKent Overstreet 
2027a393f331SKent Overstreet 			got_bucket = true;
202864ee1431SKent Overstreet 			bucket = i->bucket;
202964ee1431SKent Overstreet 			i->in_progress = true;
2030a393f331SKent Overstreet 			break;
2031a393f331SKent Overstreet 		}
203264ee1431SKent Overstreet 		mutex_unlock(&ca->discard_buckets_in_flight_lock);
2033a393f331SKent Overstreet 
2034a393f331SKent Overstreet 		if (!got_bucket)
2035a393f331SKent Overstreet 			break;
2036a393f331SKent Overstreet 
2037bb61afebSKent Overstreet 		ret = lockrestart_do(trans,
2038bb61afebSKent Overstreet 			bch2_do_discards_fast_one(trans, ca, bucket, &discard_pos_done, &s));
2039a393f331SKent Overstreet 		bch_err_fn(c, ret);
2040a393f331SKent Overstreet 
204164ee1431SKent Overstreet 		discard_in_flight_remove(ca, bucket);
2042a393f331SKent Overstreet 
2043a393f331SKent Overstreet 		if (ret)
2044a393f331SKent Overstreet 			break;
2045a393f331SKent Overstreet 	}
2046a393f331SKent Overstreet 
20479e903352SKent Overstreet 	trace_discard_buckets_fast(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret));
2048bb61afebSKent Overstreet 
2049bb61afebSKent Overstreet 	bch2_trans_put(trans);
2050dcffc3b1SKent Overstreet 	percpu_ref_put(&ca->io_ref[WRITE]);
20515a6e43afSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
2052a393f331SKent Overstreet }
2053a393f331SKent Overstreet 
bch2_discard_one_bucket_fast(struct bch_dev * ca,u64 bucket)205464ee1431SKent Overstreet static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
2055a393f331SKent Overstreet {
205664ee1431SKent Overstreet 	struct bch_fs *c = ca->fs;
2057a393f331SKent Overstreet 
205864ee1431SKent Overstreet 	if (discard_in_flight_add(ca, bucket, false))
205964ee1431SKent Overstreet 		return;
206064ee1431SKent Overstreet 
20615a6e43afSKent Overstreet 	if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
206264ee1431SKent Overstreet 		return;
206364ee1431SKent Overstreet 
20645a6e43afSKent Overstreet 	if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
20655a6e43afSKent Overstreet 		goto put_ref;
206664ee1431SKent Overstreet 
206764ee1431SKent Overstreet 	if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
206864ee1431SKent Overstreet 		return;
206964ee1431SKent Overstreet 
2070dcffc3b1SKent Overstreet 	percpu_ref_put(&ca->io_ref[WRITE]);
20715a6e43afSKent Overstreet put_ref:
20725a6e43afSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
2073a393f331SKent Overstreet }
2074a393f331SKent Overstreet 
invalidate_one_bp(struct btree_trans * trans,struct bch_dev * ca,struct bkey_s_c_backpointer bp,struct bkey_buf * last_flushed)2075942a418cSKent Overstreet static int invalidate_one_bp(struct btree_trans *trans,
2076942a418cSKent Overstreet 			     struct bch_dev *ca,
2077942a418cSKent Overstreet 			     struct bkey_s_c_backpointer bp,
2078942a418cSKent Overstreet 			     struct bkey_buf *last_flushed)
2079942a418cSKent Overstreet {
2080942a418cSKent Overstreet 	struct btree_iter extent_iter;
2081942a418cSKent Overstreet 	struct bkey_s_c extent_k =
2082942a418cSKent Overstreet 		bch2_backpointer_get_key(trans, bp, &extent_iter, 0, last_flushed);
2083942a418cSKent Overstreet 	int ret = bkey_err(extent_k);
2084942a418cSKent Overstreet 	if (ret)
2085942a418cSKent Overstreet 		return ret;
2086942a418cSKent Overstreet 
2087*39ebd748SKent Overstreet 	if (!extent_k.k)
2088*39ebd748SKent Overstreet 		return 0;
2089*39ebd748SKent Overstreet 
2090942a418cSKent Overstreet 	struct bkey_i *n =
2091942a418cSKent Overstreet 		bch2_bkey_make_mut(trans, &extent_iter, &extent_k,
2092942a418cSKent Overstreet 				   BTREE_UPDATE_internal_snapshot_node);
2093942a418cSKent Overstreet 	ret = PTR_ERR_OR_ZERO(n);
2094942a418cSKent Overstreet 	if (ret)
2095942a418cSKent Overstreet 		goto err;
2096942a418cSKent Overstreet 
2097942a418cSKent Overstreet 	bch2_bkey_drop_device(bkey_i_to_s(n), ca->dev_idx);
2098942a418cSKent Overstreet err:
2099942a418cSKent Overstreet 	bch2_trans_iter_exit(trans, &extent_iter);
2100942a418cSKent Overstreet 	return ret;
2101942a418cSKent Overstreet }
2102942a418cSKent Overstreet 
invalidate_one_bucket_by_bps(struct btree_trans * trans,struct bch_dev * ca,struct bpos bucket,u8 gen,struct bkey_buf * last_flushed)2103942a418cSKent Overstreet static int invalidate_one_bucket_by_bps(struct btree_trans *trans,
2104942a418cSKent Overstreet 					struct bch_dev *ca,
2105942a418cSKent Overstreet 					struct bpos bucket,
2106942a418cSKent Overstreet 					u8 gen,
2107942a418cSKent Overstreet 					struct bkey_buf *last_flushed)
2108942a418cSKent Overstreet {
2109942a418cSKent Overstreet 	struct bpos bp_start	= bucket_pos_to_bp_start(ca,	bucket);
2110942a418cSKent Overstreet 	struct bpos bp_end	= bucket_pos_to_bp_end(ca,	bucket);
2111942a418cSKent Overstreet 
2112942a418cSKent Overstreet 	return for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
2113942a418cSKent Overstreet 				      bp_start, bp_end, 0, k,
2114942a418cSKent Overstreet 				      NULL, NULL,
2115942a418cSKent Overstreet 				      BCH_WATERMARK_btree|
2116942a418cSKent Overstreet 				      BCH_TRANS_COMMIT_no_enospc, ({
2117942a418cSKent Overstreet 		if (k.k->type != KEY_TYPE_backpointer)
2118942a418cSKent Overstreet 			continue;
2119942a418cSKent Overstreet 
2120942a418cSKent Overstreet 		struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
2121942a418cSKent Overstreet 
2122942a418cSKent Overstreet 		if (bp.v->bucket_gen != gen)
2123942a418cSKent Overstreet 			continue;
2124942a418cSKent Overstreet 
2125942a418cSKent Overstreet 		/* filter out bps with gens that don't match */
2126942a418cSKent Overstreet 
2127942a418cSKent Overstreet 		invalidate_one_bp(trans, ca, bp, last_flushed);
2128942a418cSKent Overstreet 	}));
2129942a418cSKent Overstreet }
2130942a418cSKent Overstreet 
2131942a418cSKent Overstreet noinline_for_stack
invalidate_one_bucket(struct btree_trans * trans,struct bch_dev * ca,struct btree_iter * lru_iter,struct bkey_s_c lru_k,struct bkey_buf * last_flushed,s64 * nr_to_invalidate)2132d04801a0SKent Overstreet static int invalidate_one_bucket(struct btree_trans *trans,
2133942a418cSKent Overstreet 				 struct bch_dev *ca,
213483f33d68SKent Overstreet 				 struct btree_iter *lru_iter,
2135629a21b6SKent Overstreet 				 struct bkey_s_c lru_k,
2136942a418cSKent Overstreet 				 struct bkey_buf *last_flushed,
213783f33d68SKent Overstreet 				 s64 *nr_to_invalidate)
2138caece7feSKent Overstreet {
2139caece7feSKent Overstreet 	struct bch_fs *c = trans->c;
21409b93596cSKent Overstreet 	struct printbuf buf = PRINTBUF;
2141629a21b6SKent Overstreet 	struct bpos bucket = u64_to_bucket(lru_k.k->p.offset);
2142942a418cSKent Overstreet 	struct btree_iter alloc_iter = {};
2143d04801a0SKent Overstreet 	int ret = 0;
2144caece7feSKent Overstreet 
214583f33d68SKent Overstreet 	if (*nr_to_invalidate <= 0)
2146d04801a0SKent Overstreet 		return 1;
2147caece7feSKent Overstreet 
2148629a21b6SKent Overstreet 	if (!bch2_dev_bucket_exists(c, bucket)) {
2149724e49c6SKent Overstreet 		if (fsck_err(trans, lru_entry_to_invalid_bucket,
2150724e49c6SKent Overstreet 			     "lru key points to nonexistent device:bucket %llu:%llu",
2151724e49c6SKent Overstreet 			     bucket.inode, bucket.offset))
2152724e49c6SKent Overstreet 			return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false);
2153724e49c6SKent Overstreet 		goto out;
2154629a21b6SKent Overstreet 	}
2155629a21b6SKent Overstreet 
215684ddb8b9SKent Overstreet 	if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset))
215784ddb8b9SKent Overstreet 		return 0;
215884ddb8b9SKent Overstreet 
2159942a418cSKent Overstreet 	struct bkey_s_c alloc_k = bch2_bkey_get_iter(trans, &alloc_iter,
2160942a418cSKent Overstreet 						     BTREE_ID_alloc, bucket,
2161942a418cSKent Overstreet 						     BTREE_ITER_cached);
2162942a418cSKent Overstreet 	ret = bkey_err(alloc_k);
2163caece7feSKent Overstreet 	if (ret)
2164942a418cSKent Overstreet 		return ret;
2165942a418cSKent Overstreet 
2166942a418cSKent Overstreet 	struct bch_alloc_v4 a_convert;
2167942a418cSKent Overstreet 	const struct bch_alloc_v4 *a = bch2_alloc_to_v4(alloc_k, &a_convert);
2168caece7feSKent Overstreet 
21691b30ed5fSKent Overstreet 	/* We expect harmless races here due to the btree write buffer: */
2170942a418cSKent Overstreet 	if (lru_pos_time(lru_iter->pos) != alloc_lru_idx_read(*a))
21711b30ed5fSKent Overstreet 		goto out;
2172d04801a0SKent Overstreet 
2173b7f648e2SKent Overstreet 	/*
2174b7f648e2SKent Overstreet 	 * Impossible since alloc_lru_idx_read() only returns nonzero if the
2175b7f648e2SKent Overstreet 	 * bucket is supposed to be on the cached bucket LRU (i.e.
2176b7f648e2SKent Overstreet 	 * BCH_DATA_cached)
2177b7f648e2SKent Overstreet 	 *
2178b7f648e2SKent Overstreet 	 * bch2_lru_validate() also disallows lru keys with lru_pos_time() == 0
2179b7f648e2SKent Overstreet 	 */
2180942a418cSKent Overstreet 	BUG_ON(a->data_type != BCH_DATA_cached);
2181942a418cSKent Overstreet 	BUG_ON(a->dirty_sectors);
2182caece7feSKent Overstreet 
2183942a418cSKent Overstreet 	if (!a->cached_sectors)
218438585367SKent Overstreet 		bch_err(c, "invalidating empty bucket, confused");
218538585367SKent Overstreet 
2186942a418cSKent Overstreet 	unsigned cached_sectors = a->cached_sectors;
2187942a418cSKent Overstreet 	u8 gen = a->gen;
218838585367SKent Overstreet 
2189942a418cSKent Overstreet 	ret = invalidate_one_bucket_by_bps(trans, ca, bucket, gen, last_flushed);
21901f93726eSKent Overstreet 	if (ret)
21911f93726eSKent Overstreet 		goto out;
2192d04801a0SKent Overstreet 
2193674cfc26SKent Overstreet 	trace_and_count(c, bucket_invalidate, c, bucket.inode, bucket.offset, cached_sectors);
2194d04801a0SKent Overstreet 	--*nr_to_invalidate;
2195caece7feSKent Overstreet out:
2196724e49c6SKent Overstreet fsck_err:
2197942a418cSKent Overstreet 	bch2_trans_iter_exit(trans, &alloc_iter);
21989b93596cSKent Overstreet 	printbuf_exit(&buf);
2199caece7feSKent Overstreet 	return ret;
2200caece7feSKent Overstreet }
2201caece7feSKent Overstreet 
next_lru_key(struct btree_trans * trans,struct btree_iter * iter,struct bch_dev * ca,bool * wrapped)22022e9940d4SKent Overstreet static struct bkey_s_c next_lru_key(struct btree_trans *trans, struct btree_iter *iter,
22032e9940d4SKent Overstreet 				    struct bch_dev *ca, bool *wrapped)
22042e9940d4SKent Overstreet {
22052e9940d4SKent Overstreet 	struct bkey_s_c k;
22062e9940d4SKent Overstreet again:
22079180ad2eSKent Overstreet 	k = bch2_btree_iter_peek_max(trans, iter, lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX));
22082e9940d4SKent Overstreet 	if (!k.k && !*wrapped) {
22099180ad2eSKent Overstreet 		bch2_btree_iter_set_pos(trans, iter, lru_pos(ca->dev_idx, 0, 0));
22102e9940d4SKent Overstreet 		*wrapped = true;
22112e9940d4SKent Overstreet 		goto again;
22122e9940d4SKent Overstreet 	}
22132e9940d4SKent Overstreet 
22142e9940d4SKent Overstreet 	return k;
22152e9940d4SKent Overstreet }
22162e9940d4SKent Overstreet 
bch2_do_invalidates_work(struct work_struct * work)2217caece7feSKent Overstreet static void bch2_do_invalidates_work(struct work_struct *work)
2218caece7feSKent Overstreet {
221964ee1431SKent Overstreet 	struct bch_dev *ca = container_of(work, struct bch_dev, invalidate_work);
222064ee1431SKent Overstreet 	struct bch_fs *c = ca->fs;
22216bd68ec2SKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
2222caece7feSKent Overstreet 	int ret = 0;
2223caece7feSKent Overstreet 
2224942a418cSKent Overstreet 	struct bkey_buf last_flushed;
2225942a418cSKent Overstreet 	bch2_bkey_buf_init(&last_flushed);
2226942a418cSKent Overstreet 	bkey_init(&last_flushed.k->k);
2227942a418cSKent Overstreet 
2228cb13f471SKent Overstreet 	ret = bch2_btree_write_buffer_tryflush(trans);
22291b30ed5fSKent Overstreet 	if (ret)
22301b30ed5fSKent Overstreet 		goto err;
22311b30ed5fSKent Overstreet 
2232822835ffSKent Overstreet 	s64 nr_to_invalidate =
2233822835ffSKent Overstreet 		should_invalidate_buckets(ca, bch2_dev_usage_read(ca));
22342e9940d4SKent Overstreet 	struct btree_iter iter;
22352e9940d4SKent Overstreet 	bool wrapped = false;
2236822835ffSKent Overstreet 
22372e9940d4SKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_lru,
22382e9940d4SKent Overstreet 			     lru_pos(ca->dev_idx, 0,
22392e9940d4SKent Overstreet 				     ((bch2_current_io_time(c, READ) + U32_MAX) &
22402e9940d4SKent Overstreet 				      LRU_TIME_MAX)), 0);
22412e9940d4SKent Overstreet 
22422e9940d4SKent Overstreet 	while (true) {
22432e9940d4SKent Overstreet 		bch2_trans_begin(trans);
22442e9940d4SKent Overstreet 
22452e9940d4SKent Overstreet 		struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
22462e9940d4SKent Overstreet 		ret = bkey_err(k);
22472e9940d4SKent Overstreet 		if (ret)
22487ee4be9cSKent Overstreet 			goto restart_err;
22492e9940d4SKent Overstreet 		if (!k.k)
22502e9940d4SKent Overstreet 			break;
22512e9940d4SKent Overstreet 
2252942a418cSKent Overstreet 		ret = invalidate_one_bucket(trans, ca, &iter, k, &last_flushed, &nr_to_invalidate);
22537ee4be9cSKent Overstreet restart_err:
22547ee4be9cSKent Overstreet 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
22557ee4be9cSKent Overstreet 			continue;
22562e9940d4SKent Overstreet 		if (ret)
22572e9940d4SKent Overstreet 			break;
22582e9940d4SKent Overstreet 
22599180ad2eSKent Overstreet 		bch2_btree_iter_advance(trans, &iter);
22602e9940d4SKent Overstreet 	}
22612e9940d4SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
22621b30ed5fSKent Overstreet err:
22636bd68ec2SKent Overstreet 	bch2_trans_put(trans);
2264dcffc3b1SKent Overstreet 	percpu_ref_put(&ca->io_ref[WRITE]);
2265942a418cSKent Overstreet 	bch2_bkey_buf_exit(&last_flushed, c);
22665a6e43afSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
226764ee1431SKent Overstreet }
226864ee1431SKent Overstreet 
bch2_dev_do_invalidates(struct bch_dev * ca)226964ee1431SKent Overstreet void bch2_dev_do_invalidates(struct bch_dev *ca)
227064ee1431SKent Overstreet {
227164ee1431SKent Overstreet 	struct bch_fs *c = ca->fs;
227264ee1431SKent Overstreet 
22735a6e43afSKent Overstreet 	if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
227464ee1431SKent Overstreet 		return;
227564ee1431SKent Overstreet 
22765a6e43afSKent Overstreet 	if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
22775a6e43afSKent Overstreet 		goto put_ref;
227864ee1431SKent Overstreet 
227964ee1431SKent Overstreet 	if (queue_work(c->write_ref_wq, &ca->invalidate_work))
228064ee1431SKent Overstreet 		return;
228164ee1431SKent Overstreet 
2282dcffc3b1SKent Overstreet 	percpu_ref_put(&ca->io_ref[WRITE]);
22835a6e43afSKent Overstreet put_ref:
22845a6e43afSKent Overstreet 	bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
2285caece7feSKent Overstreet }
2286caece7feSKent Overstreet 
bch2_do_invalidates(struct bch_fs * c)2287caece7feSKent Overstreet void bch2_do_invalidates(struct bch_fs *c)
2288caece7feSKent Overstreet {
228964ee1431SKent Overstreet 	for_each_member_device(c, ca)
229064ee1431SKent Overstreet 		bch2_dev_do_invalidates(ca);
2291caece7feSKent Overstreet }
2292caece7feSKent Overstreet 
bch2_dev_freespace_init(struct bch_fs * c,struct bch_dev * ca,u64 bucket_start,u64 bucket_end)229369d1f052SKent Overstreet int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca,
229469d1f052SKent Overstreet 			    u64 bucket_start, u64 bucket_end)
2295c6b2826cSKent Overstreet {
22966bd68ec2SKent Overstreet 	struct btree_trans *trans = bch2_trans_get(c);
2297c6b2826cSKent Overstreet 	struct btree_iter iter;
2298c6b2826cSKent Overstreet 	struct bkey_s_c k;
2299d23124c7SKent Overstreet 	struct bkey hole;
230069d1f052SKent Overstreet 	struct bpos end = POS(ca->dev_idx, bucket_end);
2301c6b2826cSKent Overstreet 	struct bch_member *m;
230269d1f052SKent Overstreet 	unsigned long last_updated = jiffies;
2303c6b2826cSKent Overstreet 	int ret;
2304c6b2826cSKent Overstreet 
230569d1f052SKent Overstreet 	BUG_ON(bucket_start > bucket_end);
230669d1f052SKent Overstreet 	BUG_ON(bucket_end > ca->mi.nbuckets);
230769d1f052SKent Overstreet 
23086bd68ec2SKent Overstreet 	bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
230969d1f052SKent Overstreet 		POS(ca->dev_idx, max_t(u64, ca->mi.first_bucket, bucket_start)),
23105dd8c60eSKent Overstreet 		BTREE_ITER_prefetch);
2311cc65f565SKent Overstreet 	/*
2312cc65f565SKent Overstreet 	 * Scan the alloc btree for every bucket on @ca, and add buckets to the
2313cc65f565SKent Overstreet 	 * freespace/need_discard/need_gc_gens btrees as needed:
2314cc65f565SKent Overstreet 	 */
2315cc65f565SKent Overstreet 	while (1) {
2316848c3ff8SChen Yufan 		if (time_after(jiffies, last_updated + HZ * 10)) {
2317e9b9e475SKent Overstreet 			bch_info(ca, "%s: currently at %llu/%llu",
2318e9b9e475SKent Overstreet 				 __func__, iter.pos.offset, ca->mi.nbuckets);
231969d1f052SKent Overstreet 			last_updated = jiffies;
2320e9b9e475SKent Overstreet 		}
2321e9b9e475SKent Overstreet 
23226bd68ec2SKent Overstreet 		bch2_trans_begin(trans);
2323d23124c7SKent Overstreet 
2324d23124c7SKent Overstreet 		if (bkey_ge(iter.pos, end)) {
2325cc65f565SKent Overstreet 			ret = 0;
2326cc65f565SKent Overstreet 			break;
2327d23124c7SKent Overstreet 		}
2328cc65f565SKent Overstreet 
23299180ad2eSKent Overstreet 		k = bch2_get_key_or_hole(trans, &iter, end, &hole);
2330cc65f565SKent Overstreet 		ret = bkey_err(k);
2331cc65f565SKent Overstreet 		if (ret)
2332cc65f565SKent Overstreet 			goto bkey_err;
2333cc65f565SKent Overstreet 
2334cc65f565SKent Overstreet 		if (k.k->type) {
2335cc65f565SKent Overstreet 			/*
2336cc65f565SKent Overstreet 			 * We process live keys in the alloc btree one at a
2337cc65f565SKent Overstreet 			 * time:
2338cc65f565SKent Overstreet 			 */
2339cc65f565SKent Overstreet 			struct bch_alloc_v4 a_convert;
2340cc65f565SKent Overstreet 			const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
2341cc65f565SKent Overstreet 
2342267039d0SKent Overstreet 			ret =   bch2_bucket_do_index(trans, ca, k, a, true) ?:
23436bd68ec2SKent Overstreet 				bch2_trans_commit(trans, NULL, NULL,
2344cb52d23eSKent Overstreet 						  BCH_TRANS_COMMIT_no_enospc);
2345cc65f565SKent Overstreet 			if (ret)
2346cc65f565SKent Overstreet 				goto bkey_err;
2347cc65f565SKent Overstreet 
23489180ad2eSKent Overstreet 			bch2_btree_iter_advance(trans, &iter);
2349cc65f565SKent Overstreet 		} else {
2350cc65f565SKent Overstreet 			struct bkey_i *freespace;
2351cc65f565SKent Overstreet 
23526bd68ec2SKent Overstreet 			freespace = bch2_trans_kmalloc(trans, sizeof(*freespace));
2353cc65f565SKent Overstreet 			ret = PTR_ERR_OR_ZERO(freespace);
2354cc65f565SKent Overstreet 			if (ret)
2355cc65f565SKent Overstreet 				goto bkey_err;
2356cc65f565SKent Overstreet 
2357cc65f565SKent Overstreet 			bkey_init(&freespace->k);
2358cc65f565SKent Overstreet 			freespace->k.type	= KEY_TYPE_set;
2359d23124c7SKent Overstreet 			freespace->k.p		= k.k->p;
2360d23124c7SKent Overstreet 			freespace->k.size	= k.k->size;
2361cc65f565SKent Overstreet 
23626bd68ec2SKent Overstreet 			ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?:
23636bd68ec2SKent Overstreet 				bch2_trans_commit(trans, NULL, NULL,
2364cb52d23eSKent Overstreet 						  BCH_TRANS_COMMIT_no_enospc);
2365cc65f565SKent Overstreet 			if (ret)
2366cc65f565SKent Overstreet 				goto bkey_err;
2367cc65f565SKent Overstreet 
23689180ad2eSKent Overstreet 			bch2_btree_iter_set_pos(trans, &iter, k.k->p);
2369cc65f565SKent Overstreet 		}
2370cc65f565SKent Overstreet bkey_err:
2371cc65f565SKent Overstreet 		if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
2372cc65f565SKent Overstreet 			continue;
2373cc65f565SKent Overstreet 		if (ret)
2374cc65f565SKent Overstreet 			break;
2375cc65f565SKent Overstreet 	}
2376cc65f565SKent Overstreet 
23776bd68ec2SKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
23786bd68ec2SKent Overstreet 	bch2_trans_put(trans);
2379c6b2826cSKent Overstreet 
2380ca91f40fSKent Overstreet 	if (ret < 0) {
2381e46c181aSKent Overstreet 		bch_err_msg(ca, ret, "initializing free space");
2382c6b2826cSKent Overstreet 		return ret;
2383c6b2826cSKent Overstreet 	}
2384c6b2826cSKent Overstreet 
2385c6b2826cSKent Overstreet 	mutex_lock(&c->sb_lock);
23863f7b9713SHunter Shaffer 	m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
2387c6b2826cSKent Overstreet 	SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, true);
2388c6b2826cSKent Overstreet 	mutex_unlock(&c->sb_lock);
2389c6b2826cSKent Overstreet 
2390ca91f40fSKent Overstreet 	return 0;
2391c6b2826cSKent Overstreet }
2392c6b2826cSKent Overstreet 
bch2_fs_freespace_init(struct bch_fs * c)2393c6b2826cSKent Overstreet int bch2_fs_freespace_init(struct bch_fs *c)
2394c6b2826cSKent Overstreet {
2395c6b2826cSKent Overstreet 	int ret = 0;
2396c6b2826cSKent Overstreet 	bool doing_init = false;
2397c6b2826cSKent Overstreet 
2398c6b2826cSKent Overstreet 	/*
2399c6b2826cSKent Overstreet 	 * We can crash during the device add path, so we need to check this on
2400c6b2826cSKent Overstreet 	 * every mount:
2401c6b2826cSKent Overstreet 	 */
2402c6b2826cSKent Overstreet 
24039fea2274SKent Overstreet 	for_each_member_device(c, ca) {
2404c6b2826cSKent Overstreet 		if (ca->mi.freespace_initialized)
2405c6b2826cSKent Overstreet 			continue;
2406c6b2826cSKent Overstreet 
2407c6b2826cSKent Overstreet 		if (!doing_init) {
2408c6b2826cSKent Overstreet 			bch_info(c, "initializing freespace");
2409c6b2826cSKent Overstreet 			doing_init = true;
2410c6b2826cSKent Overstreet 		}
2411c6b2826cSKent Overstreet 
241269d1f052SKent Overstreet 		ret = bch2_dev_freespace_init(c, ca, 0, ca->mi.nbuckets);
2413c6b2826cSKent Overstreet 		if (ret) {
2414f295298bSKent Overstreet 			bch2_dev_put(ca);
24151bb3c2a9SKent Overstreet 			bch_err_fn(c, ret);
2416c6b2826cSKent Overstreet 			return ret;
2417c6b2826cSKent Overstreet 		}
2418c6b2826cSKent Overstreet 	}
2419c6b2826cSKent Overstreet 
2420c6b2826cSKent Overstreet 	if (doing_init) {
2421c6b2826cSKent Overstreet 		mutex_lock(&c->sb_lock);
2422c6b2826cSKent Overstreet 		bch2_write_super(c);
2423c6b2826cSKent Overstreet 		mutex_unlock(&c->sb_lock);
2424c6b2826cSKent Overstreet 		bch_verbose(c, "done initializing freespace");
2425c6b2826cSKent Overstreet 	}
2426c6b2826cSKent Overstreet 
24271bb3c2a9SKent Overstreet 	return 0;
2428c6b2826cSKent Overstreet }
2429c6b2826cSKent Overstreet 
24306b812f1dSKent Overstreet /* device removal */
24316b812f1dSKent Overstreet 
bch2_dev_remove_alloc(struct bch_fs * c,struct bch_dev * ca)24326b812f1dSKent Overstreet int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)
24336b812f1dSKent Overstreet {
24346b812f1dSKent Overstreet 	struct bpos start	= POS(ca->dev_idx, 0);
24356b812f1dSKent Overstreet 	struct bpos end		= POS(ca->dev_idx, U64_MAX);
24366b812f1dSKent Overstreet 	int ret;
24376b812f1dSKent Overstreet 
24386b812f1dSKent Overstreet 	/*
24396b812f1dSKent Overstreet 	 * We clear the LRU and need_discard btrees first so that we don't race
24406b812f1dSKent Overstreet 	 * with bch2_do_invalidates() and bch2_do_discards()
24416b812f1dSKent Overstreet 	 */
2442ad8d1f77SKent Overstreet 	ret =   bch2_dev_remove_stripes(c, ca->dev_idx) ?:
2443ad8d1f77SKent Overstreet 		bch2_btree_delete_range(c, BTREE_ID_lru, start, end,
24446b812f1dSKent Overstreet 					BTREE_TRIGGER_norun, NULL) ?:
24456b812f1dSKent Overstreet 		bch2_btree_delete_range(c, BTREE_ID_need_discard, start, end,
24466b812f1dSKent Overstreet 					BTREE_TRIGGER_norun, NULL) ?:
24476b812f1dSKent Overstreet 		bch2_btree_delete_range(c, BTREE_ID_freespace, start, end,
24486b812f1dSKent Overstreet 					BTREE_TRIGGER_norun, NULL) ?:
24496b812f1dSKent Overstreet 		bch2_btree_delete_range(c, BTREE_ID_backpointers, start, end,
24506b812f1dSKent Overstreet 					BTREE_TRIGGER_norun, NULL) ?:
24516b812f1dSKent Overstreet 		bch2_btree_delete_range(c, BTREE_ID_bucket_gens, start, end,
24526b812f1dSKent Overstreet 					BTREE_TRIGGER_norun, NULL) ?:
2453ad8d1f77SKent Overstreet 		bch2_btree_delete_range(c, BTREE_ID_alloc, start, end,
2454ad8d1f77SKent Overstreet 					BTREE_TRIGGER_norun, NULL) ?:
24556b812f1dSKent Overstreet 		bch2_dev_usage_remove(c, ca->dev_idx);
24566b812f1dSKent Overstreet 	bch_err_msg(ca, ret, "removing dev alloc info");
24576b812f1dSKent Overstreet 	return ret;
24586b812f1dSKent Overstreet }
24596b812f1dSKent Overstreet 
24607b3f84eaSKent Overstreet /* Bucket IO clocks: */
24617b3f84eaSKent Overstreet 
__bch2_bucket_io_time_reset(struct btree_trans * trans,unsigned dev,size_t bucket_nr,int rw)24626bee2a04SKent Overstreet static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
2463f30dd860SKent Overstreet 				size_t bucket_nr, int rw)
2464f30dd860SKent Overstreet {
2465f30dd860SKent Overstreet 	struct bch_fs *c = trans->c;
24666bee2a04SKent Overstreet 
246767e0dd8fSKent Overstreet 	struct btree_iter iter;
24686bee2a04SKent Overstreet 	struct bkey_i_alloc_v4 *a =
24696bee2a04SKent Overstreet 		bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
24706bee2a04SKent Overstreet 	int ret = PTR_ERR_OR_ZERO(a);
2471f30dd860SKent Overstreet 	if (ret)
24723d48a7f8SKent Overstreet 		return ret;
2473f30dd860SKent Overstreet 
24746bee2a04SKent Overstreet 	u64 now = bch2_current_io_time(c, rw);
24753d48a7f8SKent Overstreet 	if (a->v.io_time[rw] == now)
2476f30dd860SKent Overstreet 		goto out;
2477f30dd860SKent Overstreet 
24783d48a7f8SKent Overstreet 	a->v.io_time[rw] = now;
2479f30dd860SKent Overstreet 
24803d48a7f8SKent Overstreet 	ret   = bch2_trans_update(trans, &iter, &a->k_i, 0) ?:
2481f30dd860SKent Overstreet 		bch2_trans_commit(trans, NULL, NULL, 0);
2482f30dd860SKent Overstreet out:
248367e0dd8fSKent Overstreet 	bch2_trans_iter_exit(trans, &iter);
2484f30dd860SKent Overstreet 	return ret;
2485f30dd860SKent Overstreet }
2486f30dd860SKent Overstreet 
bch2_bucket_io_time_reset(struct btree_trans * trans,unsigned dev,size_t bucket_nr,int rw)24876bee2a04SKent Overstreet int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
24886bee2a04SKent Overstreet 			      size_t bucket_nr, int rw)
24896bee2a04SKent Overstreet {
24906bee2a04SKent Overstreet 	if (bch2_trans_relock(trans))
24916bee2a04SKent Overstreet 		bch2_trans_begin(trans);
24926bee2a04SKent Overstreet 
24936bee2a04SKent Overstreet 	return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw));
24946bee2a04SKent Overstreet }
24956bee2a04SKent Overstreet 
24967b3f84eaSKent Overstreet /* Startup/shutdown (ro/rw): */
24977b3f84eaSKent Overstreet 
bch2_recalc_capacity(struct bch_fs * c)24987b3f84eaSKent Overstreet void bch2_recalc_capacity(struct bch_fs *c)
24997b3f84eaSKent Overstreet {
2500cb66fc5fSKent Overstreet 	u64 capacity = 0, reserved_sectors = 0, gc_reserve;
2501b092daddSKent Overstreet 	unsigned bucket_size_max = 0;
25027b3f84eaSKent Overstreet 	unsigned long ra_pages = 0;
25037b3f84eaSKent Overstreet 
25047b3f84eaSKent Overstreet 	lockdep_assert_held(&c->state_lock);
25057b3f84eaSKent Overstreet 
25069fea2274SKent Overstreet 	for_each_online_member(c, ca) {
25077b3f84eaSKent Overstreet 		struct backing_dev_info *bdi = ca->disk_sb.bdev->bd_disk->bdi;
25087b3f84eaSKent Overstreet 
25097b3f84eaSKent Overstreet 		ra_pages += bdi->ra_pages;
25107b3f84eaSKent Overstreet 	}
25117b3f84eaSKent Overstreet 
25127b3f84eaSKent Overstreet 	bch2_set_ra_pages(c, ra_pages);
25137b3f84eaSKent Overstreet 
2514dcffc3b1SKent Overstreet 	__for_each_online_member(c, ca, BIT(BCH_MEMBER_STATE_rw), READ) {
25157b3f84eaSKent Overstreet 		u64 dev_reserve = 0;
25167b3f84eaSKent Overstreet 
25177b3f84eaSKent Overstreet 		/*
25187b3f84eaSKent Overstreet 		 * We need to reserve buckets (from the number
25197b3f84eaSKent Overstreet 		 * of currently available buckets) against
25207b3f84eaSKent Overstreet 		 * foreground writes so that mainly copygc can
25217b3f84eaSKent Overstreet 		 * make forward progress.
25227b3f84eaSKent Overstreet 		 *
25237b3f84eaSKent Overstreet 		 * We need enough to refill the various reserves
25247b3f84eaSKent Overstreet 		 * from scratch - copygc will use its entire
25257b3f84eaSKent Overstreet 		 * reserve all at once, then run against when
25267b3f84eaSKent Overstreet 		 * its reserve is refilled (from the formerly
25277b3f84eaSKent Overstreet 		 * available buckets).
25287b3f84eaSKent Overstreet 		 *
25297b3f84eaSKent Overstreet 		 * This reserve is just used when considering if
25307b3f84eaSKent Overstreet 		 * allocations for foreground writes must wait -
25317b3f84eaSKent Overstreet 		 * not -ENOSPC calculations.
25327b3f84eaSKent Overstreet 		 */
2533f25d8215SKent Overstreet 
2534f25d8215SKent Overstreet 		dev_reserve += ca->nr_btree_reserve * 2;
2535f25d8215SKent Overstreet 		dev_reserve += ca->mi.nbuckets >> 6; /* copygc reserve */
25367b3f84eaSKent Overstreet 
25377b3f84eaSKent Overstreet 		dev_reserve += 1;	/* btree write point */
25387b3f84eaSKent Overstreet 		dev_reserve += 1;	/* copygc write point */
25397b3f84eaSKent Overstreet 		dev_reserve += 1;	/* rebalance write point */
25407b3f84eaSKent Overstreet 
25417b3f84eaSKent Overstreet 		dev_reserve *= ca->mi.bucket_size;
25427b3f84eaSKent Overstreet 
25437b3f84eaSKent Overstreet 		capacity += bucket_to_sector(ca, ca->mi.nbuckets -
25447b3f84eaSKent Overstreet 					     ca->mi.first_bucket);
25457b3f84eaSKent Overstreet 
25467b3f84eaSKent Overstreet 		reserved_sectors += dev_reserve * 2;
2547b092daddSKent Overstreet 
2548b092daddSKent Overstreet 		bucket_size_max = max_t(unsigned, bucket_size_max,
2549b092daddSKent Overstreet 					ca->mi.bucket_size);
25507b3f84eaSKent Overstreet 	}
25517b3f84eaSKent Overstreet 
25527b3f84eaSKent Overstreet 	gc_reserve = c->opts.gc_reserve_bytes
25537b3f84eaSKent Overstreet 		? c->opts.gc_reserve_bytes >> 9
25547b3f84eaSKent Overstreet 		: div64_u64(capacity * c->opts.gc_reserve_percent, 100);
25557b3f84eaSKent Overstreet 
25567b3f84eaSKent Overstreet 	reserved_sectors = max(gc_reserve, reserved_sectors);
25577b3f84eaSKent Overstreet 
25587b3f84eaSKent Overstreet 	reserved_sectors = min(reserved_sectors, capacity);
25597b3f84eaSKent Overstreet 
256026a170aaSKent Overstreet 	c->reserved = reserved_sectors;
25617b3f84eaSKent Overstreet 	c->capacity = capacity - reserved_sectors;
25627b3f84eaSKent Overstreet 
2563b092daddSKent Overstreet 	c->bucket_size_max = bucket_size_max;
2564b092daddSKent Overstreet 
25657b3f84eaSKent Overstreet 	/* Wake up case someone was waiting for buckets */
25667b3f84eaSKent Overstreet 	closure_wake_up(&c->freelist_wait);
25677b3f84eaSKent Overstreet }
25687b3f84eaSKent Overstreet 
bch2_min_rw_member_capacity(struct bch_fs * c)25691f7056b7SKent Overstreet u64 bch2_min_rw_member_capacity(struct bch_fs *c)
25701f7056b7SKent Overstreet {
25711f7056b7SKent Overstreet 	u64 ret = U64_MAX;
25721f7056b7SKent Overstreet 
25739fea2274SKent Overstreet 	for_each_rw_member(c, ca)
25741f7056b7SKent Overstreet 		ret = min(ret, ca->mi.nbuckets * ca->mi.bucket_size);
25751f7056b7SKent Overstreet 	return ret;
25761f7056b7SKent Overstreet }
25771f7056b7SKent Overstreet 
bch2_dev_has_open_write_point(struct bch_fs * c,struct bch_dev * ca)25787b3f84eaSKent Overstreet static bool bch2_dev_has_open_write_point(struct bch_fs *c, struct bch_dev *ca)
25797b3f84eaSKent Overstreet {
25807b3f84eaSKent Overstreet 	struct open_bucket *ob;
25817b3f84eaSKent Overstreet 	bool ret = false;
25827b3f84eaSKent Overstreet 
25837b3f84eaSKent Overstreet 	for (ob = c->open_buckets;
25847b3f84eaSKent Overstreet 	     ob < c->open_buckets + ARRAY_SIZE(c->open_buckets);
25857b3f84eaSKent Overstreet 	     ob++) {
25867b3f84eaSKent Overstreet 		spin_lock(&ob->lock);
25877b3f84eaSKent Overstreet 		if (ob->valid && !ob->on_partial_list &&
2588abe19d45SKent Overstreet 		    ob->dev == ca->dev_idx)
25897b3f84eaSKent Overstreet 			ret = true;
25907b3f84eaSKent Overstreet 		spin_unlock(&ob->lock);
25917b3f84eaSKent Overstreet 	}
25927b3f84eaSKent Overstreet 
25937b3f84eaSKent Overstreet 	return ret;
25947b3f84eaSKent Overstreet }
25957b3f84eaSKent Overstreet 
25967b3f84eaSKent Overstreet /* device goes ro: */
bch2_dev_allocator_remove(struct bch_fs * c,struct bch_dev * ca)25977b3f84eaSKent Overstreet void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
25987b3f84eaSKent Overstreet {
259983ccd9b3SKent Overstreet 	lockdep_assert_held(&c->state_lock);
26007b3f84eaSKent Overstreet 
26017b3f84eaSKent Overstreet 	/* First, remove device from allocation groups: */
26027b3f84eaSKent Overstreet 
260383ccd9b3SKent Overstreet 	for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
26047b3f84eaSKent Overstreet 		clear_bit(ca->dev_idx, c->rw_devs[i].d);
26057b3f84eaSKent Overstreet 
260683ccd9b3SKent Overstreet 	c->rw_devs_change_count++;
260783ccd9b3SKent Overstreet 
26087b3f84eaSKent Overstreet 	/*
26097b3f84eaSKent Overstreet 	 * Capacity is calculated based off of devices in allocation groups:
26107b3f84eaSKent Overstreet 	 */
26117b3f84eaSKent Overstreet 	bch2_recalc_capacity(c);
26127b3f84eaSKent Overstreet 
2613b40901b0SKent Overstreet 	bch2_open_buckets_stop(c, ca, false);
2614cd575ddfSKent Overstreet 
26157b3f84eaSKent Overstreet 	/*
26167b3f84eaSKent Overstreet 	 * Wake up threads that were blocked on allocation, so they can notice
26177b3f84eaSKent Overstreet 	 * the device can no longer be removed and the capacity has changed:
26187b3f84eaSKent Overstreet 	 */
26197b3f84eaSKent Overstreet 	closure_wake_up(&c->freelist_wait);
26207b3f84eaSKent Overstreet 
26217b3f84eaSKent Overstreet 	/*
26227b3f84eaSKent Overstreet 	 * journal_res_get() can block waiting for free space in the journal -
26237b3f84eaSKent Overstreet 	 * it needs to notice there may not be devices to allocate from anymore:
26247b3f84eaSKent Overstreet 	 */
26257b3f84eaSKent Overstreet 	wake_up(&c->journal.wait);
26267b3f84eaSKent Overstreet 
26277b3f84eaSKent Overstreet 	/* Now wait for any in flight writes: */
26287b3f84eaSKent Overstreet 
26297b3f84eaSKent Overstreet 	closure_wait_event(&c->open_buckets_wait,
26307b3f84eaSKent Overstreet 			   !bch2_dev_has_open_write_point(c, ca));
26317b3f84eaSKent Overstreet }
26327b3f84eaSKent Overstreet 
26337b3f84eaSKent Overstreet /* device goes rw: */
bch2_dev_allocator_add(struct bch_fs * c,struct bch_dev * ca)26347b3f84eaSKent Overstreet void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
26357b3f84eaSKent Overstreet {
263683ccd9b3SKent Overstreet 	lockdep_assert_held(&c->state_lock);
26377b3f84eaSKent Overstreet 
263883ccd9b3SKent Overstreet 	for (unsigned i = 0; i < ARRAY_SIZE(c->rw_devs); i++)
26397b3f84eaSKent Overstreet 		if (ca->mi.data_allowed & (1 << i))
26407b3f84eaSKent Overstreet 			set_bit(ca->dev_idx, c->rw_devs[i].d);
264183ccd9b3SKent Overstreet 
264283ccd9b3SKent Overstreet 	c->rw_devs_change_count++;
26437b3f84eaSKent Overstreet }
26447b3f84eaSKent Overstreet 
bch2_dev_allocator_background_exit(struct bch_dev * ca)264564ee1431SKent Overstreet void bch2_dev_allocator_background_exit(struct bch_dev *ca)
2646a393f331SKent Overstreet {
264764ee1431SKent Overstreet 	darray_exit(&ca->discard_buckets_in_flight);
264864ee1431SKent Overstreet }
264964ee1431SKent Overstreet 
bch2_dev_allocator_background_init(struct bch_dev * ca)265064ee1431SKent Overstreet void bch2_dev_allocator_background_init(struct bch_dev *ca)
265164ee1431SKent Overstreet {
265264ee1431SKent Overstreet 	mutex_init(&ca->discard_buckets_in_flight_lock);
265364ee1431SKent Overstreet 	INIT_WORK(&ca->discard_work, bch2_do_discards_work);
265464ee1431SKent Overstreet 	INIT_WORK(&ca->discard_fast_work, bch2_do_discards_fast_work);
265564ee1431SKent Overstreet 	INIT_WORK(&ca->invalidate_work, bch2_do_invalidates_work);
2656a393f331SKent Overstreet }
2657a393f331SKent Overstreet 
bch2_fs_allocator_background_init(struct bch_fs * c)2658b092daddSKent Overstreet void bch2_fs_allocator_background_init(struct bch_fs *c)
26597b3f84eaSKent Overstreet {
26607b3f84eaSKent Overstreet 	spin_lock_init(&c->freelist_lock);
26617b3f84eaSKent Overstreet }
2662