mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-15 16:24:13 +08:00
bcachefs: Fix overlapping extent repair
A number of smallish fixes for overlapping extent repair, and (part of) a new unit test. This fixes all the issues turned up by bhzhu203, in his filesystem image from running mongodb + snapshots. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
parent
b56b787c7d
commit
e2bd06178c
@ -408,6 +408,28 @@ static inline void snapshots_seen_init(struct snapshots_seen *s)
|
||||
memset(s, 0, sizeof(*s));
|
||||
}
|
||||
|
||||
static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id)
|
||||
{
|
||||
struct snapshots_seen_entry *i, n = {
|
||||
.id = id,
|
||||
.equiv = bch2_snapshot_equiv(c, id),
|
||||
};
|
||||
int ret = 0;
|
||||
|
||||
darray_for_each(s->ids, i) {
|
||||
if (i->id == id)
|
||||
return 0;
|
||||
if (i->id > id)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = darray_insert_item(&s->ids, i - s->ids.data, n);
|
||||
if (ret)
|
||||
bch_err(c, "error reallocating snapshots_seen table (size %zu)",
|
||||
s->ids.size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s,
|
||||
enum btree_id btree_id, struct bpos pos)
|
||||
{
|
||||
@ -1122,74 +1144,116 @@ static int extent_ends_at(struct bch_fs *c,
|
||||
|
||||
static int overlapping_extents_found(struct btree_trans *trans,
|
||||
enum btree_id btree,
|
||||
struct bpos pos1, struct bkey pos2,
|
||||
bool *fixed)
|
||||
struct bpos pos1, struct snapshots_seen *pos1_seen,
|
||||
struct bkey pos2,
|
||||
bool *fixed,
|
||||
struct extent_end *extent_end)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct printbuf buf = PRINTBUF;
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot = min(pos1.snapshot, pos2.p.snapshot);
|
||||
struct btree_iter iter1, iter2 = { NULL };
|
||||
struct bkey_s_c k1, k2;
|
||||
int ret;
|
||||
|
||||
BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2)));
|
||||
|
||||
bch2_trans_iter_init(trans, &iter, btree, SPOS(pos1.inode, pos1.offset - 1, snapshot), 0);
|
||||
k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
|
||||
ret = bkey_err(k);
|
||||
bch2_trans_iter_init(trans, &iter1, btree, pos1,
|
||||
BTREE_ITER_ALL_SNAPSHOTS|
|
||||
BTREE_ITER_NOT_EXTENTS);
|
||||
k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX));
|
||||
ret = bkey_err(k1);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
prt_str(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_bkey_val_to_text(&buf, c, k1);
|
||||
|
||||
if (!bpos_eq(pos1, k.k->p)) {
|
||||
bch_err(c, "%s: error finding first overlapping extent when repairing%s",
|
||||
if (!bpos_eq(pos1, k1.k->p)) {
|
||||
prt_str(&buf, "\n wanted\n ");
|
||||
bch2_bpos_to_text(&buf, pos1);
|
||||
prt_str(&buf, "\n ");
|
||||
bch2_bkey_to_text(&buf, &pos2);
|
||||
|
||||
bch_err(c, "%s: error finding first overlapping extent when repairing, got%s",
|
||||
__func__, buf.buf);
|
||||
ret = -BCH_ERR_internal_fsck_err;
|
||||
goto err;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
bch2_btree_iter_advance(&iter);
|
||||
bch2_trans_copy_iter(&iter2, &iter1);
|
||||
|
||||
k = bch2_btree_iter_peek_upto(&iter, POS(pos1.inode, U64_MAX));
|
||||
ret = bkey_err(k);
|
||||
while (1) {
|
||||
bch2_btree_iter_advance(&iter2);
|
||||
|
||||
k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX));
|
||||
ret = bkey_err(k2);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (bkey_ge(k.k->p, pos2.p))
|
||||
if (bpos_ge(k2.k->p, pos2.p))
|
||||
break;
|
||||
|
||||
}
|
||||
|
||||
prt_str(&buf, "\n ");
|
||||
bch2_bkey_val_to_text(&buf, c, k);
|
||||
bch2_bkey_val_to_text(&buf, c, k2);
|
||||
|
||||
if (bkey_gt(k.k->p, pos2.p) ||
|
||||
pos2.size != k.k->size) {
|
||||
if (bpos_gt(k2.k->p, pos2.p) ||
|
||||
pos2.size != k2.k->size) {
|
||||
bch_err(c, "%s: error finding seconding overlapping extent when repairing%s",
|
||||
__func__, buf.buf);
|
||||
ret = -BCH_ERR_internal_fsck_err;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (fsck_err(c, "overlapping extents%s", buf.buf)) {
|
||||
struct bpos update_pos = pos1.snapshot < pos2.p.snapshot ? pos1 : pos2.p;
|
||||
struct btree_iter update_iter;
|
||||
prt_printf(&buf, "\n overwriting %s extent",
|
||||
pos1.snapshot >= pos2.p.snapshot ? "first" : "second");
|
||||
|
||||
struct bkey_i *update = bch2_bkey_get_mut(trans, &update_iter,
|
||||
btree, update_pos,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
|
||||
bch2_trans_iter_exit(trans, &update_iter);
|
||||
if ((ret = PTR_ERR_OR_ZERO(update)))
|
||||
if (fsck_err(c, "overlapping extents%s", buf.buf)) {
|
||||
struct btree_iter *old_iter = &iter1;
|
||||
struct disk_reservation res = { 0 };
|
||||
|
||||
if (pos1.snapshot < pos2.p.snapshot) {
|
||||
old_iter = &iter2;
|
||||
swap(k1, k2);
|
||||
}
|
||||
|
||||
trans->extra_journal_res += bch2_bkey_sectors_compressed(k2);
|
||||
|
||||
ret = bch2_trans_update_extent_overwrite(trans, old_iter,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE,
|
||||
k1, k2) ?:
|
||||
bch2_trans_commit(trans, &res, NULL,
|
||||
BTREE_INSERT_LAZY_RW|BTREE_INSERT_NOFAIL);
|
||||
bch2_disk_reservation_put(c, &res);
|
||||
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
*fixed = true;
|
||||
|
||||
if (pos1.snapshot == pos2.p.snapshot) {
|
||||
/*
|
||||
* We overwrote the first extent, and did the overwrite
|
||||
* in the same snapshot:
|
||||
*/
|
||||
extent_end->offset = bkey_start_offset(&pos2);
|
||||
} else if (pos1.snapshot > pos2.p.snapshot) {
|
||||
/*
|
||||
* We overwrote the first extent in pos2's snapshot:
|
||||
*/
|
||||
ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot);
|
||||
} else {
|
||||
/*
|
||||
* We overwrote the second extent - restart
|
||||
* check_extent() from the top:
|
||||
*/
|
||||
ret = -BCH_ERR_transaction_restart_nested;
|
||||
}
|
||||
}
|
||||
fsck_err:
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
bch2_trans_iter_exit(trans, &iter2);
|
||||
bch2_trans_iter_exit(trans, &iter1);
|
||||
printbuf_exit(&buf);
|
||||
return ret;
|
||||
}
|
||||
@ -1199,11 +1263,11 @@ static int check_overlapping_extents(struct btree_trans *trans,
|
||||
struct extent_ends *extent_ends,
|
||||
struct bkey_s_c k,
|
||||
u32 equiv,
|
||||
struct btree_iter *iter)
|
||||
struct btree_iter *iter,
|
||||
bool *fixed)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct extent_end *i;
|
||||
bool fixed = false;
|
||||
int ret = 0;
|
||||
|
||||
/* transaction restart, running again */
|
||||
@ -1226,7 +1290,8 @@ static int check_overlapping_extents(struct btree_trans *trans,
|
||||
SPOS(iter->pos.inode,
|
||||
i->offset,
|
||||
i->snapshot),
|
||||
*k.k, &fixed);
|
||||
&i->seen,
|
||||
*k.k, fixed, i);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
@ -1237,7 +1302,7 @@ static int check_overlapping_extents(struct btree_trans *trans,
|
||||
|
||||
extent_ends->last_pos = k.k->p;
|
||||
err:
|
||||
return ret ?: fixed;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
@ -1292,13 +1357,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
|
||||
goto delete;
|
||||
|
||||
ret = check_overlapping_extents(trans, s, extent_ends, k,
|
||||
equiv.snapshot, iter);
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
equiv.snapshot, iter,
|
||||
&inode->recalculate_sums);
|
||||
if (ret)
|
||||
inode->recalculate_sums = true;
|
||||
ret = 0;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1373,7 +1435,7 @@ int bch2_check_extents(struct bch_fs *c)
|
||||
|
||||
snapshots_seen_init(&s);
|
||||
extent_ends_init(&extent_ends);
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
|
||||
bch2_trans_init(&trans, c, BTREE_ITER_MAX, 4096);
|
||||
|
||||
ret = for_each_btree_key_commit(&trans, iter, BTREE_ID_extents,
|
||||
POS(BCACHEFS_ROOT_INO, 0),
|
||||
|
@ -503,6 +503,36 @@ static int test_extent_overwrite_all(struct bch_fs *c, u64 nr)
|
||||
__test_extent_overwrite(c, 32, 64, 32, 128);
|
||||
}
|
||||
|
||||
static int insert_test_overlapping_extent(struct bch_fs *c, u64 inum, u64 start, u32 len, u32 snapid)
|
||||
{
|
||||
struct bkey_i_cookie k;
|
||||
int ret;
|
||||
|
||||
bkey_cookie_init(&k.k_i);
|
||||
k.k_i.k.p.inode = inum;
|
||||
k.k_i.k.p.offset = start + len;
|
||||
k.k_i.k.p.snapshot = snapid;
|
||||
k.k_i.k.size = len;
|
||||
|
||||
ret = bch2_trans_do(c, NULL, NULL, 0,
|
||||
bch2_btree_insert_nonextent(&trans, BTREE_ID_extents, &k.k_i,
|
||||
BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE));
|
||||
if (ret)
|
||||
bch_err_fn(c, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_extent_create_overlapping(struct bch_fs *c, u64 inum)
|
||||
{
|
||||
return insert_test_overlapping_extent(c, inum, 0, 16, U32_MAX - 2) ?: /* overwrite entire */
|
||||
insert_test_overlapping_extent(c, inum, 2, 8, U32_MAX - 2) ?:
|
||||
insert_test_overlapping_extent(c, inum, 4, 4, U32_MAX) ?:
|
||||
insert_test_overlapping_extent(c, inum, 32, 8, U32_MAX - 2) ?: /* overwrite front/back */
|
||||
insert_test_overlapping_extent(c, inum, 36, 8, U32_MAX) ?:
|
||||
insert_test_overlapping_extent(c, inum, 60, 8, U32_MAX - 2) ?:
|
||||
insert_test_overlapping_extent(c, inum, 64, 8, U32_MAX);
|
||||
}
|
||||
|
||||
/* snapshot unit tests */
|
||||
|
||||
/* Test skipping over keys in unrelated snapshots: */
|
||||
@ -901,6 +931,7 @@ int bch2_btree_perf_test(struct bch_fs *c, const char *testname,
|
||||
perf_test(test_extent_overwrite_back);
|
||||
perf_test(test_extent_overwrite_middle);
|
||||
perf_test(test_extent_overwrite_all);
|
||||
perf_test(test_extent_create_overlapping);
|
||||
|
||||
perf_test(test_snapshots);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user