btrfs: convert BUG_ON()'s in select_reloc_root() to proper errors

We have several BUG_ON()'s in select_reloc_root() that can be tripped if
there is an extent tree corruption.  Convert these to ASSERT()'s, because
if we hit it during testing it really is bad, or could indicate a
problem with the backref walking code.

However if users hit these problems it generally indicates corruption,
I've hit a few machines in the fleet that trip over these with clearly
corrupted extent trees, so be nice and print out an error message and
return an error instead of bringing the whole box down.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Josef Bacik 2021-03-12 15:24:59 -05:00 committed by David Sterba
parent cbdc2ebc7c
commit 8ee66afe99

View File

@ -1994,8 +1994,33 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
cond_resched();
next = walk_up_backref(next, edges, &index);
root = next->root;
BUG_ON(!root);
BUG_ON(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state));
/*
* If there is no root, then our references for this block are
* incomplete, as we should be able to walk all the way up to a
* block that is owned by a root.
*
* This path is only for SHAREABLE roots, so if we come upon a
* non-SHAREABLE root then we have backrefs that resolve
* improperly.
*
* Both of these cases indicate file system corruption, or a bug
* in the backref walking code.
*/
if (!root) {
ASSERT(0);
btrfs_err(trans->fs_info,
"bytenr %llu doesn't have a backref path ending in a root",
node->bytenr);
return ERR_PTR(-EUCLEAN);
}
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
ASSERT(0);
btrfs_err(trans->fs_info,
"bytenr %llu has multiple refs with one ending in a non-shareable root",
node->bytenr);
return ERR_PTR(-EUCLEAN);
}
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
record_reloc_root_in_trans(trans, root);
@ -2006,8 +2031,22 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
root = root->reloc_root;
if (next->new_bytenr != root->node->start) {
BUG_ON(next->new_bytenr);
BUG_ON(!list_empty(&next->list));
/*
* We just created the reloc root, so we shouldn't have
* ->new_bytenr set and this shouldn't be in the changed
* list. If it is then we have multiple roots pointing
* at the same bytenr which indicates corruption, or
* we've made a mistake in the backref walking code.
*/
ASSERT(next->new_bytenr == 0);
ASSERT(list_empty(&next->list));
if (next->new_bytenr || !list_empty(&next->list)) {
btrfs_err(trans->fs_info,
"bytenr %llu possibly has multiple roots pointing at the same bytenr %llu",
node->bytenr, next->bytenr);
return ERR_PTR(-EUCLEAN);
}
next->new_bytenr = root->node->start;
btrfs_put_root(next->root);
next->root = btrfs_grab_root(root);