bcachefs: Add code to scan for/rewite old btree nodes

This adds a new data job type to scan for btree nodes in the old extent
format, and rewrite them.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
Kent Overstreet 2021-03-14 19:01:14 -04:00 committed by Kent Overstreet
parent 8567415457
commit 1889ad5a12
8 changed files with 132 additions and 38 deletions

View File

@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state {
};
enum bch_data_ops {
BCH_DATA_OP_SCRUB = 0,
BCH_DATA_OP_REREPLICATE = 1,
BCH_DATA_OP_MIGRATE = 2,
BCH_DATA_OP_NR = 3,
BCH_DATA_OP_SCRUB = 0,
BCH_DATA_OP_REREPLICATE = 1,
BCH_DATA_OP_MIGRATE = 2,
BCH_DATA_OP_REWRITE_OLD_NODES = 3,
BCH_DATA_OP_NR = 4,
};
/*
@ -187,11 +188,13 @@ enum bch_data_ops {
* job. The file descriptor is O_CLOEXEC.
*/
struct bch_ioctl_data {
__u32 op;
__u16 op;
__u8 start_btree;
__u8 end_btree;
__u32 flags;
struct bpos start;
struct bpos end;
struct bpos start_pos;
struct bpos end_pos;
union {
struct {

View File

@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
unsigned u64s;
int ret, retry_read = 0, write = READ;
b->version_ondisk = U16_MAX;
iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
sort_iter_init(iter, b);
iter->size = (btree_blocks(c) + 1) * 2;
@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
sectors = vstruct_sectors(bne, c->block_bits);
}
b->version_ondisk = min(b->version_ondisk,
le16_to_cpu(i->version));
ret = validate_bset(c, ca, b, i, sectors,
READ, have_retry);
if (ret)

View File

@ -76,6 +76,7 @@ struct btree {
u16 written;
u8 nsets;
u8 nr_key_bits;
u16 version_ondisk;
struct bkey_format format;

View File

@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
bch2_bset_init_first(b, &b->data->keys);
b->c.level = level;
b->c.btree_id = as->btree_id;
b->version_ondisk = c->sb.version;
memset(&b->nr, 0, sizeof(b->nr));
b->data->magic = cpu_to_le64(bset_magic(c));

View File

@ -531,7 +531,7 @@ static int __bch2_move_data(struct bch_fs *c,
stats->data_type = BCH_DATA_user;
stats->btree_id = btree_id;
stats->pos = POS_MIN;
stats->pos = start;
iter = bch2_trans_get_iter(&trans, btree_id, start,
BTREE_ITER_PREFETCH);
@ -646,14 +646,15 @@ out:
}
int bch2_move_data(struct bch_fs *c,
enum btree_id start_btree_id, struct bpos start_pos,
enum btree_id end_btree_id, struct bpos end_pos,
struct bch_ratelimit *rate,
struct write_point_specifier wp,
struct bpos start,
struct bpos end,
move_pred_fn pred, void *arg,
struct bch_move_stats *stats)
{
struct moving_context ctxt = { .stats = stats };
enum btree_id id;
int ret;
closure_init_stack(&ctxt.cl);
@ -662,10 +663,23 @@ int bch2_move_data(struct bch_fs *c,
stats->data_type = BCH_DATA_user;
ret = __bch2_move_data(c, &ctxt, rate, wp, start, end,
pred, arg, stats, BTREE_ID_EXTENTS) ?:
__bch2_move_data(c, &ctxt, rate, wp, start, end,
pred, arg, stats, BTREE_ID_REFLINK);
for (id = start_btree_id;
id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
id++) {
stats->btree_id = id;
if (id != BTREE_ID_EXTENTS &&
id != BTREE_ID_REFLINK)
continue;
ret = __bch2_move_data(c, &ctxt, rate, wp,
id == start_btree_id ? start_pos : POS_MIN,
id == end_btree_id ? end_pos : POS_MAX,
pred, arg, stats, id);
if (ret)
break;
}
move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
closure_sync(&ctxt.cl);
@ -679,16 +693,22 @@ int bch2_move_data(struct bch_fs *c,
return ret;
}
typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
struct btree *, struct bch_io_opts *,
struct data_opts *);
static int bch2_move_btree(struct bch_fs *c,
move_pred_fn pred,
void *arg,
enum btree_id start_btree_id, struct bpos start_pos,
enum btree_id end_btree_id, struct bpos end_pos,
move_btree_pred pred, void *arg,
struct bch_move_stats *stats)
{
bool kthread = (current->flags & PF_KTHREAD) != 0;
struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
struct btree_trans trans;
struct btree_iter *iter;
struct btree *b;
unsigned id;
enum btree_id id;
struct data_opts data_opts;
enum data_cmd cmd;
int ret = 0;
@ -697,16 +717,24 @@ static int bch2_move_btree(struct bch_fs *c,
stats->data_type = BCH_DATA_btree;
for (id = 0; id < BTREE_ID_NR; id++) {
for (id = start_btree_id;
id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
id++) {
stats->btree_id = id;
for_each_btree_node(&trans, iter, id, POS_MIN,
for_each_btree_node(&trans, iter, id,
id == start_btree_id ? start_pos : POS_MIN,
BTREE_ITER_PREFETCH, b) {
if (kthread && kthread_should_stop())
goto out;
if ((cmp_int(id, end_btree_id) ?:
bkey_cmp(b->key.k.p, end_pos)) > 0)
break;
stats->pos = iter->pos;
switch ((cmd = pred(c, arg,
bkey_i_to_s_c(&b->key),
&io_opts, &data_opts))) {
switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
case DATA_SKIP:
goto next;
case DATA_SCRUB:
@ -726,7 +754,7 @@ next:
ret = bch2_trans_iter_free(&trans, iter) ?: ret;
}
out:
bch2_trans_exit(&trans);
return ret;
@ -785,6 +813,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
return DATA_REWRITE;
}
static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
}
static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
struct btree *b,
struct bch_io_opts *io_opts,
struct data_opts *data_opts)
{
if (b->version_ondisk != c->sb.version ||
btree_node_need_rewrite(b)) {
data_opts->target = 0;
data_opts->nr_replicas = 1;
data_opts->btree_insert_flags = 0;
return DATA_REWRITE;
}
return DATA_SKIP;
}
int bch2_data_job(struct bch_fs *c,
struct bch_move_stats *stats,
struct bch_ioctl_data op)
@ -796,17 +856,20 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, -1);
ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
ret = bch2_move_btree(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
rereplicate_btree_pred, c, stats) ?: ret;
closure_wait_event(&c->btree_interior_update_wait,
!bch2_btree_interior_updates_nr_pending(c));
ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL,
writepoint_hashed((unsigned long) current),
op.start,
op.end,
ret = bch2_move_data(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
NULL, writepoint_hashed((unsigned long) current),
rereplicate_pred, c, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
@ -817,16 +880,32 @@ int bch2_data_job(struct bch_fs *c,
stats->data_type = BCH_DATA_journal;
ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
ret = bch2_move_btree(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
migrate_btree_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
ret = bch2_move_data(c, NULL,
writepoint_hashed((unsigned long) current),
op.start,
op.end,
ret = bch2_move_data(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
NULL, writepoint_hashed((unsigned long) current),
migrate_pred, &op, stats) ?: ret;
ret = bch2_replicas_gc2(c) ?: ret;
break;
case BCH_DATA_OP_REWRITE_OLD_NODES:
ret = bch2_move_btree(c,
op.start_btree, op.start_pos,
op.end_btree, op.end_pos,
rewrite_old_nodes_pred, &op, stats) ?: ret;
if (!ret) {
mutex_lock(&c->sb_lock);
c->disk_sb.sb->version_min = c->disk_sb.sb->version;
bch2_write_super(c);
mutex_unlock(&c->sb_lock);
}
break;
default:
ret = -EINVAL;
}

View File

@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
struct bkey_s_c,
struct bch_io_opts *, struct data_opts *);
int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
int bch2_move_data(struct bch_fs *,
enum btree_id, struct bpos,
enum btree_id, struct bpos,
struct bch_ratelimit *,
struct write_point_specifier,
struct bpos, struct bpos,
move_pred_fn, void *,
struct bch_move_stats *);

View File

@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c)
sizeof(h->data[0]),
bucket_offset_cmp, NULL);
ret = bch2_move_data(c, &c->copygc_pd.rate,
ret = bch2_move_data(c,
0, POS_MIN,
BTREE_ID_NR, POS_MAX,
&c->copygc_pd.rate,
writepoint_ptr(&c->copygc_write_point),
POS_MIN, POS_MAX,
copygc_pred, NULL,
&move_stats);

View File

@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg)
rebalance_work_reset(c);
bch2_move_data(c,
0, POS_MIN,
BTREE_ID_NR, POS_MAX,
/* ratelimiting disabled for now */
NULL, /* &r->pd.rate, */
writepoint_ptr(&c->rebalance_write_point),
POS_MIN, POS_MAX,
rebalance_pred, NULL,
&r->move_stats);
}