btrfs: scrub: introduce error reporting functionality for scrub_stripe

The new helper, scrub_stripe_report_errors(), will report the result of
the scrub to system log.

The main reporting is done by introducing a new helper,
scrub_print_common_warning(), which is mostly the same content from
scrub_print_wanring(), but without the need for a scrub_block.

Since we're reporting the errors, it's the perfect time to update the
scrub stats too.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2023-03-20 10:12:56 +08:00 committed by David Sterba
parent 058e09e6fe
commit 0096580713

View File

@ -105,6 +105,7 @@ enum scrub_stripe_flags {
* Represent one contiguous range with a length of BTRFS_STRIPE_LEN.
*/
struct scrub_stripe {
struct scrub_ctx *sctx;
struct btrfs_block_group *bg;
struct page *pages[SCRUB_STRIPE_PAGES];
@ -119,6 +120,13 @@ struct scrub_stripe {
/* Should be BTRFS_STRIPE_LEN / sectorsize. */
u16 nr_sectors;
/*
* How many data/meta extents are in this stripe. Only for scrub status
* reporting purposes.
*/
u16 nr_data_extents;
u16 nr_meta_extents;
atomic_t pending_io;
wait_queue_head_t io_wait;
wait_queue_head_t repair_wait;
@ -377,6 +385,7 @@ static void release_scrub_stripe(struct scrub_stripe *stripe)
kfree(stripe->csums);
stripe->sectors = NULL;
stripe->csums = NULL;
stripe->sctx = NULL;
stripe->state = 0;
}
@ -1046,10 +1055,10 @@ err:
return 0;
}
static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
static void scrub_print_common_warning(const char *errstr, struct btrfs_device *dev,
bool is_super, u64 logical, u64 physical)
{
struct btrfs_device *dev;
struct btrfs_fs_info *fs_info;
struct btrfs_fs_info *fs_info = dev->fs_info;
struct btrfs_path *path;
struct btrfs_key found_key;
struct extent_buffer *eb;
@ -1062,22 +1071,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
u8 ref_level = 0;
int ret;
WARN_ON(sblock->sector_count < 1);
dev = sblock->dev;
fs_info = sblock->sctx->fs_info;
/* Super block error, no need to search extent tree. */
if (sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER) {
if (is_super) {
btrfs_warn_in_rcu(fs_info, "%s on device %s, physical %llu",
errstr, btrfs_dev_name(dev), sblock->physical);
errstr, btrfs_dev_name(dev), physical);
return;
}
path = btrfs_alloc_path();
if (!path)
return;
swarn.physical = sblock->physical;
swarn.logical = sblock->logical;
swarn.physical = physical;
swarn.logical = logical;
swarn.errstr = errstr;
swarn.dev = NULL;
@ -1126,6 +1131,13 @@ out:
btrfs_free_path(path);
}
static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
{
scrub_print_common_warning(errstr, sblock->dev,
sblock->sectors[0]->flags & BTRFS_EXTENT_FLAG_SUPER,
sblock->logical, sblock->physical);
}
static inline void scrub_get_recover(struct scrub_recover *recover)
{
refcount_inc(&recover->refs);
@ -2453,6 +2465,131 @@ static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe,
}
}
static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
struct scrub_stripe *stripe)
{
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_device *dev = NULL;
u64 physical = 0;
int nr_data_sectors = 0;
int nr_meta_sectors = 0;
int nr_nodatacsum_sectors = 0;
int nr_repaired_sectors = 0;
int sector_nr;
/*
* Init needed infos for error reporting.
*
* Although our scrub_stripe infrastucture is mostly based on btrfs_submit_bio()
* thus no need for dev/physical, error reporting still needs dev and physical.
*/
if (!bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) {
u64 mapped_len = fs_info->sectorsize;
struct btrfs_io_context *bioc = NULL;
int stripe_index = stripe->mirror_num - 1;
int ret;
/* For scrub, our mirror_num should always start at 1. */
ASSERT(stripe->mirror_num >= 1);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
stripe->logical, &mapped_len, &bioc);
/*
* If we failed, dev will be NULL, and later detailed reports
* will just be skipped.
*/
if (ret < 0)
goto skip;
physical = bioc->stripes[stripe_index].physical;
dev = bioc->stripes[stripe_index].dev;
btrfs_put_bioc(bioc);
}
skip:
for_each_set_bit(sector_nr, &stripe->extent_sector_bitmap, stripe->nr_sectors) {
bool repaired = false;
if (stripe->sectors[sector_nr].is_metadata) {
nr_meta_sectors++;
} else {
nr_data_sectors++;
if (!stripe->sectors[sector_nr].csum)
nr_nodatacsum_sectors++;
}
if (test_bit(sector_nr, &stripe->init_error_bitmap) &&
!test_bit(sector_nr, &stripe->error_bitmap)) {
nr_repaired_sectors++;
repaired = true;
}
/* Good sector from the beginning, nothing need to be done. */
if (!test_bit(sector_nr, &stripe->init_error_bitmap))
continue;
/*
* Report error for the corrupted sectors. If repaired, just
* output the message of repaired message.
*/
if (repaired) {
if (dev) {
btrfs_err_rl_in_rcu(fs_info,
"fixed up error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
btrfs_err_rl_in_rcu(fs_info,
"fixed up error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
continue;
}
/* The remaining are all for unrepaired. */
if (dev) {
btrfs_err_rl_in_rcu(fs_info,
"unable to fixup (regular) error at logical %llu on dev %s physical %llu",
stripe->logical, btrfs_dev_name(dev),
physical);
} else {
btrfs_err_rl_in_rcu(fs_info,
"unable to fixup (regular) error at logical %llu on mirror %u",
stripe->logical, stripe->mirror_num);
}
if (test_bit(sector_nr, &stripe->io_error_bitmap))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("i/o error", dev, false,
stripe->logical, physical);
if (test_bit(sector_nr, &stripe->csum_error_bitmap))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("checksum error", dev, false,
stripe->logical, physical);
if (test_bit(sector_nr, &stripe->meta_error_bitmap))
if (__ratelimit(&rs) && dev)
scrub_print_common_warning("header error", dev, false,
stripe->logical, physical);
}
spin_lock(&sctx->stat_lock);
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
sctx->stat.data_bytes_scrubbed += nr_data_sectors << fs_info->sectorsize_bits;
sctx->stat.tree_bytes_scrubbed += nr_meta_sectors << fs_info->sectorsize_bits;
sctx->stat.no_csum += nr_nodatacsum_sectors;
sctx->stat.read_errors +=
bitmap_weight(&stripe->io_error_bitmap, stripe->nr_sectors);
sctx->stat.csum_errors +=
bitmap_weight(&stripe->csum_error_bitmap, stripe->nr_sectors);
sctx->stat.verify_errors +=
bitmap_weight(&stripe->meta_error_bitmap, stripe->nr_sectors);
sctx->stat.uncorrectable_errors +=
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
sctx->stat.corrected_errors += nr_repaired_sectors;
spin_unlock(&sctx->stat_lock);
}
/*
* The main entrance for all read related scrub work, including:
*
@ -2526,6 +2663,7 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
goto out;
}
out:
scrub_stripe_report_errors(stripe->sctx, stripe);
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
wake_up(&stripe->repair_wait);
}
@ -4189,6 +4327,10 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
if (ret)
goto out;
get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
stripe->nr_meta_extents++;
if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
stripe->nr_data_extents++;
cur_logical = max(extent_start, cur_logical);
/*
@ -4222,6 +4364,10 @@ int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
}
get_extent_info(&path, &extent_start, &extent_len,
&extent_flags, &extent_gen);
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
stripe->nr_meta_extents++;
if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
stripe->nr_data_extents++;
fill_one_extent_info(fs_info, stripe, extent_start, extent_len,
extent_flags, extent_gen);
cur_logical = extent_start + extent_len;