xfs: report btree block corruption errors to the health system

Whenever we encounter corrupt btree blocks, we should report that to the
health monitoring system for later reporting.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
Darrick J. Wong 2024-02-22 12:32:09 -08:00
parent 1196f3f5ab
commit a78d10f45b
8 changed files with 81 additions and 5 deletions

View File

@ -275,6 +275,7 @@ xfs_alloc_complain_bad_rec(
xfs_warn(mp,
"start block 0x%x block count 0x%x", irec->ar_startblock,
irec->ar_blockcount);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@ -2702,6 +2703,7 @@ xfs_exact_minlen_extent_available(
goto out;
if (*stat == 0) {
xfs_btree_mark_sick(cnt_cur);
error = -EFSCORRUPTED;
goto out;
}

View File

@ -368,6 +368,8 @@ xfs_bmap_check_leaf_extents(
error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_btree_mark_sick(cur);
if (error)
goto error_norelse;
}
@ -454,6 +456,8 @@ xfs_bmap_check_leaf_extents(
error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_btree_mark_sick(cur);
if (error)
goto error_norelse;
}
@ -568,6 +572,8 @@ xfs_bmap_btree_to_extents(
#endif
error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_btree_mark_sick(cur);
if (error)
return error;
cblock = XFS_BUF_TO_BLOCK(cbp);

View File

@ -27,6 +27,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_health.h"
/*
* Btree magic numbers.
@ -177,6 +178,7 @@ xfs_btree_check_lblock(
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
return 0;
@ -243,6 +245,7 @@ xfs_btree_check_sblock(
XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
return 0;
@ -318,6 +321,7 @@ xfs_btree_check_ptr(
level, index);
}
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@ -498,6 +502,8 @@ xfs_btree_dup_cursor(
xfs_buf_daddr(bp), mp->m_bsize,
0, &bp,
cur->bc_ops->buf_ops);
if (xfs_metadata_is_sick(error))
xfs_btree_mark_sick(new);
if (error) {
xfs_btree_del_cursor(new, error);
*ncur = NULL;
@ -1351,6 +1357,8 @@ xfs_btree_read_buf_block(
error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
mp->m_bsize, flags, bpp,
cur->bc_ops->buf_ops);
if (xfs_metadata_is_sick(error))
xfs_btree_mark_sick(cur);
if (error)
return error;
@ -1661,6 +1669,7 @@ xfs_btree_increment(
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
goto out0;
ASSERT(0);
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@ -1754,6 +1763,7 @@ xfs_btree_decrement(
if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
goto out0;
ASSERT(0);
xfs_btree_mark_sick(cur);
error = -EFSCORRUPTED;
goto error0;
}
@ -1846,6 +1856,7 @@ out_bad:
*blkp = NULL;
xfs_buf_mark_corrupt(bp);
xfs_trans_brelse(cur->bc_tp, bp);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@ -1892,8 +1903,10 @@ xfs_btree_lookup(
XFS_BTREE_STATS_INC(cur, lookup);
/* No such thing as a zero-level tree. */
if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0))
if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) {
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
block = NULL;
keyno = 0;
@ -1936,6 +1949,7 @@ xfs_btree_lookup(
XFS_ERRLEVEL_LOW,
cur->bc_mp, block,
sizeof(*block));
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@ -4369,13 +4383,17 @@ xfs_btree_visit_block(
*/
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
xfs_buf_daddr(bp)))
xfs_buf_daddr(bp))) {
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
} else {
if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
xfs_buf_daddr(bp)))
xfs_buf_daddr(bp))) {
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
}
return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
}
@ -5233,6 +5251,7 @@ xfs_btree_goto_left_edge(
return error;
if (stat != 0) {
ASSERT(0);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}

View File

@ -37,6 +37,7 @@ struct xfs_mount;
struct xfs_perag;
struct xfs_inode;
struct xfs_fsop_geom;
struct xfs_btree_cur;
/* Observable health issues for metadata spanning the entire filesystem. */
#define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */
@ -160,6 +161,7 @@ void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick,
void xfs_health_unmount(struct xfs_mount *mp);
void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork);
void xfs_btree_mark_sick(struct xfs_btree_cur *cur);
/* Now some helpers. */

View File

@ -148,6 +148,7 @@ xfs_inobt_complain_bad_rec(
"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
irec->ir_startino, irec->ir_count, irec->ir_freecount,
irec->ir_free, irec->ir_holemask);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}

View File

@ -23,6 +23,7 @@
#include "xfs_refcount.h"
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "xfs_health.h"
struct kmem_cache *xfs_refcount_intent_cache;
@ -156,6 +157,7 @@ xfs_refcount_complain_bad_rec(
xfs_warn(mp,
"Start block 0x%x, block count 0x%x, references 0x%x",
irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
@ -1889,8 +1891,10 @@ xfs_refcount_recover_extent(
struct xfs_refcount_recovery *rr;
if (XFS_IS_CORRUPT(cur->bc_mp,
be32_to_cpu(rec->refc.rc_refcount) != 1))
be32_to_cpu(rec->refc.rc_refcount) != 1)) {
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
rr = kmalloc(sizeof(struct xfs_refcount_recovery),
GFP_KERNEL | __GFP_NOFAIL);

View File

@ -23,6 +23,7 @@
#include "xfs_error.h"
#include "xfs_inode.h"
#include "xfs_ag.h"
#include "xfs_health.h"
struct kmem_cache *xfs_rmap_intent_cache;
@ -56,8 +57,10 @@ xfs_rmap_lookup_le(
error = xfs_rmap_get_rec(cur, irec, &get_stat);
if (error)
return error;
if (!get_stat)
if (!get_stat) {
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}
return 0;
}
@ -277,6 +280,7 @@ xfs_rmap_complain_bad_rec(
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
irec->rm_blockcount);
xfs_btree_mark_sick(cur);
return -EFSCORRUPTED;
}

View File

@ -14,6 +14,7 @@
#include "xfs_trace.h"
#include "xfs_health.h"
#include "xfs_ag.h"
#include "xfs_btree.h"
/*
* Warn about metadata corruption that we detected but haven't fixed, and
@ -507,3 +508,40 @@ xfs_bmap_mark_sick(
xfs_inode_mark_sick(ip, mask);
}
/* Record observations of btree corruption with the health tracking system. */
void
xfs_btree_mark_sick(
struct xfs_btree_cur *cur)
{
unsigned int mask;
switch (cur->bc_btnum) {
case XFS_BTNUM_BMAP:
xfs_bmap_mark_sick(cur->bc_ino.ip, cur->bc_ino.whichfork);
return;
case XFS_BTNUM_BNO:
mask = XFS_SICK_AG_BNOBT;
break;
case XFS_BTNUM_CNT:
mask = XFS_SICK_AG_CNTBT;
break;
case XFS_BTNUM_INO:
mask = XFS_SICK_AG_INOBT;
break;
case XFS_BTNUM_FINO:
mask = XFS_SICK_AG_FINOBT;
break;
case XFS_BTNUM_RMAP:
mask = XFS_SICK_AG_RMAPBT;
break;
case XFS_BTNUM_REFC:
mask = XFS_SICK_AG_REFCNTBT;
break;
default:
ASSERT(0);
return;
}
xfs_ag_mark_sick(cur->bc_ag.pag, mask);
}