From 7773df19c35fabdcc8c36176a480a1b19ad32866 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Apr 2024 20:45:00 -0400 Subject: [PATCH] bcachefs: metadata version bucket_stripe_sectors New on disk format version for bch_alloc->stripe_sectors and BCH_DATA_unstriped - accounting for unstriped data in stripe buckets. Upgrade/downgrade requires regenerating alloc info - but only if erasure coding is in use. Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 3 +- fs/bcachefs/btree_iter.h | 8 ++++ fs/bcachefs/btree_update.h | 8 ---- fs/bcachefs/recovery.c | 5 ++ fs/bcachefs/sb-downgrade.c | 88 +++++++++++++++++++++++++++++++---- fs/bcachefs/sb-downgrade.h | 1 + 7 files changed, 96 insertions(+), 18 deletions(-) diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 4d93889f3bae..6eec526c45d4 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -592,6 +592,7 @@ struct bch_dev { #define BCH_FS_FLAGS() \ x(new_fs) \ x(started) \ + x(btree_running) \ x(may_go_rw) \ x(rw) \ x(was_rw) \ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 1d580e529da5..cdcb9bc4cc14 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -723,7 +723,8 @@ struct bch_sb_field_ext { x(member_seq, BCH_VERSION(1, 4)) \ x(subvolume_fs_parent, BCH_VERSION(1, 5)) \ x(btree_subvolume_children, BCH_VERSION(1, 6)) \ - x(mi_btree_bitmap, BCH_VERSION(1, 7)) + x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ + x(bucket_stripe_sectors, BCH_VERSION(1, 8)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 798eb1c47966..699c1b8ef112 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -866,6 +866,14 @@ __bch2_btree_iter_peek_and_restart(struct btree_trans *trans, _p; \ }) +#define bch2_trans_run(_c, _do) \ +({ \ + struct btree_trans *trans = bch2_trans_get(_c); \ + int _ret = (_do); \ + bch2_trans_put(trans); \ + _ret; \ +}) + void bch2_trans_updates_to_text(struct printbuf *, struct btree_trans *); void bch2_btree_path_to_text(struct printbuf *, struct btree_trans *, btree_path_idx_t); void bch2_trans_paths_to_text(struct printbuf *, struct btree_trans *); diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index b4894e4d5447..cbe0ee3c7168 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -178,14 +178,6 @@ static inline int bch2_trans_commit(struct btree_trans *trans, nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\ (_journal_seq), (_flags))) -#define bch2_trans_run(_c, _do) \ -({ \ - struct btree_trans *trans = bch2_trans_get(_c); \ - int _ret = (_do); \ - bch2_trans_put(trans); \ - _ret; \ -}) - #define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \ bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do)) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1f9d044ed920..34c4899c4599 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -810,6 +810,10 @@ use_clean: if (ret) goto err; + set_bit(BCH_FS_btree_running, &c->flags); + + ret = bch2_sb_set_upgrade_extra(c); + ret = bch2_run_recovery_passes(c); if (ret) goto err; @@ -969,6 +973,7 @@ int bch2_fs_initialize(struct bch_fs *c) mutex_unlock(&c->sb_lock); c->curr_recovery_pass = BCH_RECOVERY_PASS_NR; + set_bit(BCH_FS_btree_running, &c->flags); set_bit(BCH_FS_may_go_rw, &c->flags); for (unsigned i = 0; i < BTREE_ID_NR; i++) diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index 4710b61631f0..9596f470e166 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -56,7 +56,9 @@ BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_btree_bitmap_not_marked) -#define DOWNGRADE_TABLE() +#define DOWNGRADE_TABLE() \ + x(bucket_stripe_sectors, \ + 0) struct upgrade_downgrade_entry { u64 recovery_passes; @@ -80,6 +82,37 @@ UPGRADE_TABLE() #undef x }; +static int have_stripes(struct bch_fs *c) +{ + return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b); +} + +int bch2_sb_set_upgrade_extra(struct bch_fs *c) +{ + unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; + unsigned new_version = c->sb.version; + bool write_sb = false; + int ret = 0; + + mutex_lock(&c->sb_lock); + struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); + + if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && + new_version >= bcachefs_metadata_version_bucket_stripe_sectors && + (ret = have_stripes(c) > 0)) { + __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent); + write_sb = true; + } + + if (write_sb) + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + + return ret < 0 ? ret : 0; +} + void bch2_sb_set_upgrade(struct bch_fs *c, unsigned old_version, unsigned new_version) @@ -101,16 +134,12 @@ void bch2_sb_set_upgrade(struct bch_fs *c, ext->recovery_passes_required[0] |= cpu_to_le64(bch2_recovery_passes_to_stable(passes)); - for (const u16 *e = i->errors; - e < i->errors + i->nr_errors; - e++) { - __set_bit(*e, c->sb.errors_silent); - ext->errors_silent[*e / 64] |= cpu_to_le64(BIT_ULL(*e % 64)); - } + for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++) + __set_bit_le64(*e, ext->errors_silent); } } -#define x(ver, passes, ...) static const u16 downgrade_ver_##errors[] = { __VA_ARGS__ }; +#define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ }; DOWNGRADE_TABLE() #undef x @@ -125,6 +154,37 @@ DOWNGRADE_TABLE() #undef x }; +static int downgrade_table_extra(struct bch_fs *c, darray_char *table) +{ + struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); + unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); + int ret = 0; + + unsigned nr_errors = le16_to_cpu(dst->nr_errors); + + switch (le16_to_cpu(dst->version)) { + case bcachefs_metadata_version_bucket_stripe_sectors: + if (have_stripes(c)) { + bytes += sizeof(dst->errors[0]) * 2; + + ret = darray_make_room(table, bytes); + if (ret) + return ret; + + /* open coded __set_bit_le64, as dst is packed and + * dst->recovery_passes is misaligned */ + unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; + dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64)); + + dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong); + } + break; + } + + dst->nr_errors = cpu_to_le16(nr_errors); + return ret; +} + static inline const struct bch_sb_field_downgrade_entry * downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) { @@ -210,6 +270,9 @@ const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { int bch2_sb_downgrade_update(struct bch_fs *c) { + if (!test_bit(BCH_FS_btree_running, &c->flags)) + return 0; + darray_char table = {}; int ret = 0; @@ -234,7 +297,14 @@ int bch2_sb_downgrade_update(struct bch_fs *c) for (unsigned i = 0; i < src->nr_errors; i++) dst->errors[i] = cpu_to_le16(src->errors[i]); - table.nr += bytes; + downgrade_table_extra(c, &table); + + if (!dst->recovery_passes[0] && + !dst->recovery_passes[1] && + !dst->nr_errors) + continue; + + table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); } struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); diff --git a/fs/bcachefs/sb-downgrade.h b/fs/bcachefs/sb-downgrade.h index 57e6c916fc73..095b7cc9bb47 100644 --- a/fs/bcachefs/sb-downgrade.h +++ b/fs/bcachefs/sb-downgrade.h @@ -6,6 +6,7 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_downgrade; int bch2_sb_downgrade_update(struct bch_fs *); void bch2_sb_set_upgrade(struct bch_fs *, unsigned, unsigned); +int bch2_sb_set_upgrade_extra(struct bch_fs *); void bch2_sb_set_downgrade(struct bch_fs *, unsigned, unsigned); #endif /* _BCACHEFS_SB_DOWNGRADE_H */