2018-06-06 10:42:14 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0+
|
2016-10-04 00:11:18 +08:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2016 Oracle. All Rights Reserved.
|
|
|
|
* Author: Darrick J. Wong <darrick.wong@oracle.com>
|
|
|
|
*/
|
|
|
|
#include "xfs.h"
|
|
|
|
#include "xfs_fs.h"
|
|
|
|
#include "xfs_shared.h"
|
|
|
|
#include "xfs_format.h"
|
|
|
|
#include "xfs_log_format.h"
|
|
|
|
#include "xfs_trans_resv.h"
|
|
|
|
#include "xfs_mount.h"
|
|
|
|
#include "xfs_btree.h"
|
2020-03-12 02:09:06 +08:00
|
|
|
#include "xfs_btree_staging.h"
|
2016-10-04 00:11:18 +08:00
|
|
|
#include "xfs_refcount_btree.h"
|
xfs: track cow/shared record domains explicitly in xfs_refcount_irec
Just prior to committing the reflink code into upstream, the xfs
maintainer at the time requested that I find a way to shard the refcount
records into two domains -- one for records tracking shared extents, and
a second for tracking CoW staging extents. The idea here was to
minimize mount time CoW reclamation by pushing all the CoW records to
the right edge of the keyspace, and it was accomplished by setting the
upper bit in rc_startblock. We don't allow AGs to have more than 2^31
blocks, so the bit was free.
Unfortunately, this was a very late addition to the codebase, so most of
the refcount record processing code still treats rc_startblock as a u32
and pays no attention to whether or not the upper bit (the cow flag) is
set. This is a weakness is theoretically exploitable, since we're not
fully validating the incoming metadata records.
Fuzzing demonstrates practical exploits of this weakness. If the cow
flag of a node block key record is corrupted, a lookup operation can go
to the wrong record block and start returning records from the wrong
cow/shared domain. This causes the math to go all wrong (since cow
domain is still implicit in the upper bit of rc_startblock) and we can
crash the kernel by tricking xfs into jumping into a nonexistent AG and
tripping over xfs_perag_get(mp, <nonexistent AG>) returning NULL.
To fix this, start tracking the domain as an explicit part of struct
xfs_refcount_irec, adjust all refcount functions to check the domain
of a returned record, and alter the function definitions to accept them
where necessary.
Found by fuzzing keys[2].cowflag = add in xfs/464.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2022-10-11 00:06:24 +08:00
|
|
|
#include "xfs_refcount.h"
|
2016-10-04 00:11:18 +08:00
|
|
|
#include "xfs_alloc.h"
|
|
|
|
#include "xfs_error.h"
|
2024-02-23 04:39:47 +08:00
|
|
|
#include "xfs_health.h"
|
2016-10-04 00:11:18 +08:00
|
|
|
#include "xfs_trace.h"
|
|
|
|
#include "xfs_trans.h"
|
|
|
|
#include "xfs_bit.h"
|
2016-10-04 00:11:19 +08:00
|
|
|
#include "xfs_rmap.h"
|
2021-06-02 08:48:24 +08:00
|
|
|
#include "xfs_ag.h"
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2021-09-28 05:26:19 +08:00
|
|
|
static struct kmem_cache *xfs_refcountbt_cur_cache;
|
2021-09-24 03:21:37 +08:00
|
|
|
|
2016-10-04 00:11:18 +08:00
|
|
|
static struct xfs_btree_cur *
|
|
|
|
xfs_refcountbt_dup_cursor(
|
|
|
|
struct xfs_btree_cur *cur)
|
|
|
|
{
|
|
|
|
return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp,
|
2021-06-02 08:48:24 +08:00
|
|
|
cur->bc_ag.agbp, cur->bc_ag.pag);
|
2016-10-04 00:11:18 +08:00
|
|
|
}
|
|
|
|
|
2016-10-04 00:11:19 +08:00
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_set_root(
|
2021-08-13 00:49:03 +08:00
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_ptr *ptr,
|
|
|
|
int inc)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
2020-03-11 08:51:15 +08:00
|
|
|
struct xfs_buf *agbp = cur->bc_ag.agbp;
|
2020-03-10 23:57:29 +08:00
|
|
|
struct xfs_agf *agf = agbp->b_addr;
|
2020-07-14 00:13:00 +08:00
|
|
|
struct xfs_perag *pag = agbp->b_pag;
|
2016-10-04 00:11:19 +08:00
|
|
|
|
|
|
|
ASSERT(ptr->s != 0);
|
|
|
|
|
|
|
|
agf->agf_refcount_root = ptr->s;
|
|
|
|
be32_add_cpu(&agf->agf_refcount_level, inc);
|
|
|
|
pag->pagf_refcount_level += inc;
|
|
|
|
|
|
|
|
xfs_alloc_log_agf(cur->bc_tp, agbp,
|
|
|
|
XFS_AGF_REFCOUNT_ROOT | XFS_AGF_REFCOUNT_LEVEL);
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC int
|
|
|
|
xfs_refcountbt_alloc_block(
|
2021-08-13 00:53:27 +08:00
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_ptr *start,
|
|
|
|
union xfs_btree_ptr *new,
|
|
|
|
int *stat)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
2020-03-11 08:51:15 +08:00
|
|
|
struct xfs_buf *agbp = cur->bc_ag.agbp;
|
2020-03-10 23:57:29 +08:00
|
|
|
struct xfs_agf *agf = agbp->b_addr;
|
2016-10-04 00:11:19 +08:00
|
|
|
struct xfs_alloc_arg args; /* block allocation args */
|
|
|
|
int error; /* error return value */
|
|
|
|
|
|
|
|
memset(&args, 0, sizeof(args));
|
|
|
|
args.tp = cur->bc_tp;
|
|
|
|
args.mp = cur->bc_mp;
|
2023-02-13 06:14:53 +08:00
|
|
|
args.pag = cur->bc_ag.pag;
|
2018-12-13 00:46:23 +08:00
|
|
|
args.oinfo = XFS_RMAP_OINFO_REFC;
|
2016-10-04 00:11:19 +08:00
|
|
|
args.minlen = args.maxlen = args.prod = 1;
|
2016-10-04 00:11:44 +08:00
|
|
|
args.resv = XFS_AG_RESV_METADATA;
|
2016-10-04 00:11:19 +08:00
|
|
|
|
2023-02-13 06:14:54 +08:00
|
|
|
error = xfs_alloc_vextent_near_bno(&args,
|
|
|
|
XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno,
|
|
|
|
xfs_refc_block(args.mp)));
|
2016-10-04 00:11:19 +08:00
|
|
|
if (error)
|
|
|
|
goto out_error;
|
|
|
|
if (args.fsbno == NULLFSBLOCK) {
|
|
|
|
*stat = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
2021-06-02 08:48:24 +08:00
|
|
|
ASSERT(args.agno == cur->bc_ag.pag->pag_agno);
|
2016-10-04 00:11:19 +08:00
|
|
|
ASSERT(args.len == 1);
|
|
|
|
|
|
|
|
new->s = cpu_to_be32(args.agbno);
|
|
|
|
be32_add_cpu(&agf->agf_refcount_blocks, 1);
|
|
|
|
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
|
|
|
|
|
|
|
|
*stat = 1;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_error:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC int
|
|
|
|
xfs_refcountbt_free_block(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
struct xfs_buf *bp)
|
|
|
|
{
|
|
|
|
struct xfs_mount *mp = cur->bc_mp;
|
2020-03-11 08:51:15 +08:00
|
|
|
struct xfs_buf *agbp = cur->bc_ag.agbp;
|
2020-03-10 23:57:29 +08:00
|
|
|
struct xfs_agf *agf = agbp->b_addr;
|
2021-08-19 09:46:57 +08:00
|
|
|
xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
|
2016-10-04 00:11:19 +08:00
|
|
|
|
|
|
|
be32_add_cpu(&agf->agf_refcount_blocks, -1);
|
|
|
|
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
|
2023-06-29 02:04:32 +08:00
|
|
|
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
|
2024-07-03 02:22:51 +08:00
|
|
|
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, 0);
|
2016-10-04 00:11:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
STATIC int
|
|
|
|
xfs_refcountbt_get_minrecs(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
int level)
|
|
|
|
{
|
|
|
|
return cur->bc_mp->m_refc_mnr[level != 0];
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC int
|
|
|
|
xfs_refcountbt_get_maxrecs(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
int level)
|
|
|
|
{
|
|
|
|
return cur->bc_mp->m_refc_mxr[level != 0];
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_init_key_from_rec(
|
2021-08-11 08:02:16 +08:00
|
|
|
union xfs_btree_key *key,
|
|
|
|
const union xfs_btree_rec *rec)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
|
|
|
key->refc.rc_startblock = rec->refc.rc_startblock;
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_init_high_key_from_rec(
|
2021-08-11 08:02:16 +08:00
|
|
|
union xfs_btree_key *key,
|
|
|
|
const union xfs_btree_rec *rec)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
2021-08-11 08:02:16 +08:00
|
|
|
__u32 x;
|
2016-10-04 00:11:19 +08:00
|
|
|
|
|
|
|
x = be32_to_cpu(rec->refc.rc_startblock);
|
|
|
|
x += be32_to_cpu(rec->refc.rc_blockcount) - 1;
|
|
|
|
key->refc.rc_startblock = cpu_to_be32(x);
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_init_rec_from_cur(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
union xfs_btree_rec *rec)
|
|
|
|
{
|
xfs: track cow/shared record domains explicitly in xfs_refcount_irec
Just prior to committing the reflink code into upstream, the xfs
maintainer at the time requested that I find a way to shard the refcount
records into two domains -- one for records tracking shared extents, and
a second for tracking CoW staging extents. The idea here was to
minimize mount time CoW reclamation by pushing all the CoW records to
the right edge of the keyspace, and it was accomplished by setting the
upper bit in rc_startblock. We don't allow AGs to have more than 2^31
blocks, so the bit was free.
Unfortunately, this was a very late addition to the codebase, so most of
the refcount record processing code still treats rc_startblock as a u32
and pays no attention to whether or not the upper bit (the cow flag) is
set. This is a weakness is theoretically exploitable, since we're not
fully validating the incoming metadata records.
Fuzzing demonstrates practical exploits of this weakness. If the cow
flag of a node block key record is corrupted, a lookup operation can go
to the wrong record block and start returning records from the wrong
cow/shared domain. This causes the math to go all wrong (since cow
domain is still implicit in the upper bit of rc_startblock) and we can
crash the kernel by tricking xfs into jumping into a nonexistent AG and
tripping over xfs_perag_get(mp, <nonexistent AG>) returning NULL.
To fix this, start tracking the domain as an explicit part of struct
xfs_refcount_irec, adjust all refcount functions to check the domain
of a returned record, and alter the function definitions to accept them
where necessary.
Found by fuzzing keys[2].cowflag = add in xfs/464.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2022-10-11 00:06:24 +08:00
|
|
|
const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
|
|
|
|
uint32_t start;
|
|
|
|
|
|
|
|
start = xfs_refcount_encode_startblock(irec->rc_startblock,
|
|
|
|
irec->rc_domain);
|
|
|
|
rec->refc.rc_startblock = cpu_to_be32(start);
|
2016-10-04 00:11:19 +08:00
|
|
|
rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
|
|
|
|
rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_init_ptr_from_cur(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
union xfs_btree_ptr *ptr)
|
|
|
|
{
|
2020-03-11 08:51:15 +08:00
|
|
|
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
|
2016-10-04 00:11:19 +08:00
|
|
|
|
2021-06-02 08:48:24 +08:00
|
|
|
ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
|
2016-10-04 00:11:19 +08:00
|
|
|
|
|
|
|
ptr->s = agf->agf_refcount_root;
|
|
|
|
}
|
|
|
|
|
2017-06-17 02:00:05 +08:00
|
|
|
STATIC int64_t
|
2016-10-04 00:11:19 +08:00
|
|
|
xfs_refcountbt_key_diff(
|
2021-08-11 08:02:15 +08:00
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_key *key)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
2021-08-11 08:02:15 +08:00
|
|
|
const struct xfs_refcount_key *kp = &key->refc;
|
xfs: track cow/shared record domains explicitly in xfs_refcount_irec
Just prior to committing the reflink code into upstream, the xfs
maintainer at the time requested that I find a way to shard the refcount
records into two domains -- one for records tracking shared extents, and
a second for tracking CoW staging extents. The idea here was to
minimize mount time CoW reclamation by pushing all the CoW records to
the right edge of the keyspace, and it was accomplished by setting the
upper bit in rc_startblock. We don't allow AGs to have more than 2^31
blocks, so the bit was free.
Unfortunately, this was a very late addition to the codebase, so most of
the refcount record processing code still treats rc_startblock as a u32
and pays no attention to whether or not the upper bit (the cow flag) is
set. This is a weakness is theoretically exploitable, since we're not
fully validating the incoming metadata records.
Fuzzing demonstrates practical exploits of this weakness. If the cow
flag of a node block key record is corrupted, a lookup operation can go
to the wrong record block and start returning records from the wrong
cow/shared domain. This causes the math to go all wrong (since cow
domain is still implicit in the upper bit of rc_startblock) and we can
crash the kernel by tricking xfs into jumping into a nonexistent AG and
tripping over xfs_perag_get(mp, <nonexistent AG>) returning NULL.
To fix this, start tracking the domain as an explicit part of struct
xfs_refcount_irec, adjust all refcount functions to check the domain
of a returned record, and alter the function definitions to accept them
where necessary.
Found by fuzzing keys[2].cowflag = add in xfs/464.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2022-10-11 00:06:24 +08:00
|
|
|
const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
|
|
|
|
uint32_t start;
|
2016-10-04 00:11:19 +08:00
|
|
|
|
xfs: track cow/shared record domains explicitly in xfs_refcount_irec
Just prior to committing the reflink code into upstream, the xfs
maintainer at the time requested that I find a way to shard the refcount
records into two domains -- one for records tracking shared extents, and
a second for tracking CoW staging extents. The idea here was to
minimize mount time CoW reclamation by pushing all the CoW records to
the right edge of the keyspace, and it was accomplished by setting the
upper bit in rc_startblock. We don't allow AGs to have more than 2^31
blocks, so the bit was free.
Unfortunately, this was a very late addition to the codebase, so most of
the refcount record processing code still treats rc_startblock as a u32
and pays no attention to whether or not the upper bit (the cow flag) is
set. This is a weakness is theoretically exploitable, since we're not
fully validating the incoming metadata records.
Fuzzing demonstrates practical exploits of this weakness. If the cow
flag of a node block key record is corrupted, a lookup operation can go
to the wrong record block and start returning records from the wrong
cow/shared domain. This causes the math to go all wrong (since cow
domain is still implicit in the upper bit of rc_startblock) and we can
crash the kernel by tricking xfs into jumping into a nonexistent AG and
tripping over xfs_perag_get(mp, <nonexistent AG>) returning NULL.
To fix this, start tracking the domain as an explicit part of struct
xfs_refcount_irec, adjust all refcount functions to check the domain
of a returned record, and alter the function definitions to accept them
where necessary.
Found by fuzzing keys[2].cowflag = add in xfs/464.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
2022-10-11 00:06:24 +08:00
|
|
|
start = xfs_refcount_encode_startblock(irec->rc_startblock,
|
|
|
|
irec->rc_domain);
|
|
|
|
return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
|
2016-10-04 00:11:19 +08:00
|
|
|
}
|
|
|
|
|
2017-06-17 02:00:05 +08:00
|
|
|
STATIC int64_t
|
2016-10-04 00:11:19 +08:00
|
|
|
xfs_refcountbt_diff_two_keys(
|
2021-08-11 08:02:15 +08:00
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_key *k1,
|
2023-04-12 10:00:11 +08:00
|
|
|
const union xfs_btree_key *k2,
|
|
|
|
const union xfs_btree_key *mask)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
2023-04-12 10:00:11 +08:00
|
|
|
ASSERT(!mask || mask->refc.rc_startblock);
|
|
|
|
|
2017-06-17 02:00:05 +08:00
|
|
|
return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
|
2023-04-12 10:00:11 +08:00
|
|
|
be32_to_cpu(k2->refc.rc_startblock);
|
2016-10-04 00:11:19 +08:00
|
|
|
}
|
|
|
|
|
2018-01-09 02:51:03 +08:00
|
|
|
STATIC xfs_failaddr_t
|
2016-10-04 00:11:18 +08:00
|
|
|
xfs_refcountbt_verify(
|
|
|
|
struct xfs_buf *bp)
|
|
|
|
{
|
2019-06-29 10:27:29 +08:00
|
|
|
struct xfs_mount *mp = bp->b_mount;
|
2016-10-04 00:11:18 +08:00
|
|
|
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
|
|
|
|
struct xfs_perag *pag = bp->b_pag;
|
2018-01-09 02:51:03 +08:00
|
|
|
xfs_failaddr_t fa;
|
2016-10-04 00:11:18 +08:00
|
|
|
unsigned int level;
|
|
|
|
|
2019-02-08 02:45:48 +08:00
|
|
|
if (!xfs_verify_magic(bp, block->bb_magic))
|
2018-01-09 02:51:03 +08:00
|
|
|
return __this_address;
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2021-08-19 09:46:37 +08:00
|
|
|
if (!xfs_has_reflink(mp))
|
2018-01-09 02:51:03 +08:00
|
|
|
return __this_address;
|
2024-02-23 04:40:58 +08:00
|
|
|
fa = xfs_btree_agblock_v5hdr_verify(bp);
|
2018-01-09 02:51:03 +08:00
|
|
|
if (fa)
|
|
|
|
return fa;
|
2016-10-04 00:11:18 +08:00
|
|
|
|
|
|
|
level = be16_to_cpu(block->bb_level);
|
2023-02-13 06:14:52 +08:00
|
|
|
if (pag && xfs_perag_initialised_agf(pag)) {
|
2023-12-16 02:03:33 +08:00
|
|
|
unsigned int maxlevel = pag->pagf_refcount_level;
|
|
|
|
|
|
|
|
#ifdef CONFIG_XFS_ONLINE_REPAIR
|
|
|
|
/*
|
|
|
|
* Online repair could be rewriting the refcount btree, so
|
|
|
|
* we'll validate against the larger of either tree while this
|
|
|
|
* is going on.
|
|
|
|
*/
|
|
|
|
maxlevel = max_t(unsigned int, maxlevel,
|
|
|
|
pag->pagf_repair_refcount_level);
|
|
|
|
#endif
|
|
|
|
if (level >= maxlevel)
|
2018-01-09 02:51:03 +08:00
|
|
|
return __this_address;
|
2016-10-04 00:11:18 +08:00
|
|
|
} else if (level >= mp->m_refc_maxlevels)
|
2018-01-09 02:51:03 +08:00
|
|
|
return __this_address;
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2024-02-23 04:40:58 +08:00
|
|
|
return xfs_btree_agblock_verify(bp, mp->m_refc_mxr[level != 0]);
|
2016-10-04 00:11:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_read_verify(
|
|
|
|
struct xfs_buf *bp)
|
|
|
|
{
|
2018-01-09 02:51:03 +08:00
|
|
|
xfs_failaddr_t fa;
|
|
|
|
|
2024-02-23 04:40:58 +08:00
|
|
|
if (!xfs_btree_agblock_verify_crc(bp))
|
2018-01-09 02:51:03 +08:00
|
|
|
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
|
|
|
|
else {
|
|
|
|
fa = xfs_refcountbt_verify(bp);
|
|
|
|
if (fa)
|
|
|
|
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
|
|
|
}
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2018-01-09 02:51:02 +08:00
|
|
|
if (bp->b_error)
|
2016-10-04 00:11:18 +08:00
|
|
|
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC void
|
|
|
|
xfs_refcountbt_write_verify(
|
|
|
|
struct xfs_buf *bp)
|
|
|
|
{
|
2018-01-09 02:51:03 +08:00
|
|
|
xfs_failaddr_t fa;
|
|
|
|
|
|
|
|
fa = xfs_refcountbt_verify(bp);
|
|
|
|
if (fa) {
|
2016-10-04 00:11:18 +08:00
|
|
|
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
2018-01-09 02:51:03 +08:00
|
|
|
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
2016-10-04 00:11:18 +08:00
|
|
|
return;
|
|
|
|
}
|
2024-02-23 04:40:58 +08:00
|
|
|
xfs_btree_agblock_calc_crc(bp);
|
2016-10-04 00:11:18 +08:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct xfs_buf_ops xfs_refcountbt_buf_ops = {
|
|
|
|
.name = "xfs_refcountbt",
|
2019-02-08 02:45:48 +08:00
|
|
|
.magic = { 0, cpu_to_be32(XFS_REFC_CRC_MAGIC) },
|
2016-10-04 00:11:18 +08:00
|
|
|
.verify_read = xfs_refcountbt_read_verify,
|
|
|
|
.verify_write = xfs_refcountbt_write_verify,
|
2018-01-09 02:51:08 +08:00
|
|
|
.verify_struct = xfs_refcountbt_verify,
|
2016-10-04 00:11:18 +08:00
|
|
|
};
|
|
|
|
|
2016-10-04 00:11:19 +08:00
|
|
|
STATIC int
|
|
|
|
xfs_refcountbt_keys_inorder(
|
2021-08-11 08:02:17 +08:00
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_key *k1,
|
|
|
|
const union xfs_btree_key *k2)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
|
|
|
return be32_to_cpu(k1->refc.rc_startblock) <
|
|
|
|
be32_to_cpu(k2->refc.rc_startblock);
|
|
|
|
}
|
|
|
|
|
|
|
|
STATIC int
|
|
|
|
xfs_refcountbt_recs_inorder(
|
2021-08-11 08:02:17 +08:00
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_rec *r1,
|
|
|
|
const union xfs_btree_rec *r2)
|
2016-10-04 00:11:19 +08:00
|
|
|
{
|
|
|
|
return be32_to_cpu(r1->refc.rc_startblock) +
|
|
|
|
be32_to_cpu(r1->refc.rc_blockcount) <=
|
|
|
|
be32_to_cpu(r2->refc.rc_startblock);
|
|
|
|
}
|
|
|
|
|
2023-04-12 10:00:10 +08:00
|
|
|
STATIC enum xbtree_key_contig
|
|
|
|
xfs_refcountbt_keys_contiguous(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
const union xfs_btree_key *key1,
|
2023-04-12 10:00:11 +08:00
|
|
|
const union xfs_btree_key *key2,
|
|
|
|
const union xfs_btree_key *mask)
|
2023-04-12 10:00:10 +08:00
|
|
|
{
|
2023-04-12 10:00:11 +08:00
|
|
|
ASSERT(!mask || mask->refc.rc_startblock);
|
|
|
|
|
2023-04-12 10:00:10 +08:00
|
|
|
return xbtree_key_contig(be32_to_cpu(key1->refc.rc_startblock),
|
|
|
|
be32_to_cpu(key2->refc.rc_startblock));
|
|
|
|
}
|
|
|
|
|
2024-02-23 04:35:15 +08:00
|
|
|
const struct xfs_btree_ops xfs_refcountbt_ops = {
|
2024-02-23 04:39:47 +08:00
|
|
|
.name = "refcount",
|
2024-02-23 04:36:17 +08:00
|
|
|
.type = XFS_BTREE_TYPE_AG,
|
|
|
|
|
2016-10-04 00:11:18 +08:00
|
|
|
.rec_len = sizeof(struct xfs_refcount_rec),
|
|
|
|
.key_len = sizeof(struct xfs_refcount_key),
|
2024-02-23 04:35:36 +08:00
|
|
|
.ptr_len = XFS_BTREE_SHORT_PTR_LEN,
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2024-02-23 04:35:20 +08:00
|
|
|
.lru_refs = XFS_REFC_BTREE_REF,
|
2024-02-23 04:35:21 +08:00
|
|
|
.statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2),
|
2024-02-23 04:39:47 +08:00
|
|
|
.sick_mask = XFS_SICK_AG_REFCNTBT,
|
2024-02-23 04:35:20 +08:00
|
|
|
|
2016-10-04 00:11:18 +08:00
|
|
|
.dup_cursor = xfs_refcountbt_dup_cursor,
|
2016-10-04 00:11:19 +08:00
|
|
|
.set_root = xfs_refcountbt_set_root,
|
|
|
|
.alloc_block = xfs_refcountbt_alloc_block,
|
|
|
|
.free_block = xfs_refcountbt_free_block,
|
|
|
|
.get_minrecs = xfs_refcountbt_get_minrecs,
|
|
|
|
.get_maxrecs = xfs_refcountbt_get_maxrecs,
|
|
|
|
.init_key_from_rec = xfs_refcountbt_init_key_from_rec,
|
|
|
|
.init_high_key_from_rec = xfs_refcountbt_init_high_key_from_rec,
|
|
|
|
.init_rec_from_cur = xfs_refcountbt_init_rec_from_cur,
|
|
|
|
.init_ptr_from_cur = xfs_refcountbt_init_ptr_from_cur,
|
|
|
|
.key_diff = xfs_refcountbt_key_diff,
|
2016-10-04 00:11:18 +08:00
|
|
|
.buf_ops = &xfs_refcountbt_buf_ops,
|
2016-10-04 00:11:19 +08:00
|
|
|
.diff_two_keys = xfs_refcountbt_diff_two_keys,
|
|
|
|
.keys_inorder = xfs_refcountbt_keys_inorder,
|
|
|
|
.recs_inorder = xfs_refcountbt_recs_inorder,
|
2023-04-12 10:00:10 +08:00
|
|
|
.keys_contiguous = xfs_refcountbt_keys_contiguous,
|
2016-10-04 00:11:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
2024-02-23 04:39:39 +08:00
|
|
|
* Create a new refcount btree cursor.
|
|
|
|
*
|
|
|
|
* For staging cursors tp and agbp are NULL.
|
2016-10-04 00:11:18 +08:00
|
|
|
*/
|
2024-02-23 04:39:39 +08:00
|
|
|
struct xfs_btree_cur *
|
|
|
|
xfs_refcountbt_init_cursor(
|
2016-10-04 00:11:18 +08:00
|
|
|
struct xfs_mount *mp,
|
|
|
|
struct xfs_trans *tp,
|
2024-02-23 04:39:39 +08:00
|
|
|
struct xfs_buf *agbp,
|
2021-06-02 08:48:24 +08:00
|
|
|
struct xfs_perag *pag)
|
2016-10-04 00:11:18 +08:00
|
|
|
{
|
|
|
|
struct xfs_btree_cur *cur;
|
|
|
|
|
2021-06-02 08:48:24 +08:00
|
|
|
ASSERT(pag->pag_agno < mp->m_sb.sb_agcount);
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2024-02-23 04:40:51 +08:00
|
|
|
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops,
|
|
|
|
mp->m_refc_maxlevels, xfs_refcountbt_cur_cache);
|
2023-04-12 09:59:55 +08:00
|
|
|
cur->bc_ag.pag = xfs_perag_hold(pag);
|
2024-02-23 04:37:03 +08:00
|
|
|
cur->bc_refc.nr_ops = 0;
|
|
|
|
cur->bc_refc.shape_changes = 0;
|
2020-03-12 02:09:06 +08:00
|
|
|
cur->bc_ag.agbp = agbp;
|
2024-02-23 04:39:39 +08:00
|
|
|
if (agbp) {
|
|
|
|
struct xfs_agf *agf = agbp->b_addr;
|
|
|
|
|
|
|
|
cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level);
|
|
|
|
}
|
2020-03-12 02:09:06 +08:00
|
|
|
return cur;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Swap in the new btree root. Once we pass this point the newly rebuilt btree
|
|
|
|
* is in place and we have to kill off all the old btree blocks.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
xfs_refcountbt_commit_staged_btree(
|
|
|
|
struct xfs_btree_cur *cur,
|
|
|
|
struct xfs_trans *tp,
|
|
|
|
struct xfs_buf *agbp)
|
|
|
|
{
|
|
|
|
struct xfs_agf *agf = agbp->b_addr;
|
|
|
|
struct xbtree_afakeroot *afake = cur->bc_ag.afake;
|
|
|
|
|
|
|
|
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
|
|
|
|
|
|
|
|
agf->agf_refcount_root = cpu_to_be32(afake->af_root);
|
|
|
|
agf->agf_refcount_level = cpu_to_be32(afake->af_levels);
|
|
|
|
agf->agf_refcount_blocks = cpu_to_be32(afake->af_blocks);
|
|
|
|
xfs_alloc_log_agf(tp, agbp, XFS_AGF_REFCOUNT_BLOCKS |
|
|
|
|
XFS_AGF_REFCOUNT_ROOT |
|
|
|
|
XFS_AGF_REFCOUNT_LEVEL);
|
2024-02-23 04:37:35 +08:00
|
|
|
xfs_btree_commit_afakeroot(cur, tp, agbp);
|
2020-03-12 02:09:06 +08:00
|
|
|
}
|
|
|
|
|
2021-09-24 01:32:06 +08:00
|
|
|
/* Calculate number of records in a refcount btree block. */
|
|
|
|
static inline unsigned int
|
|
|
|
xfs_refcountbt_block_maxrecs(
|
|
|
|
unsigned int blocklen,
|
|
|
|
bool leaf)
|
|
|
|
{
|
|
|
|
if (leaf)
|
|
|
|
return blocklen / sizeof(struct xfs_refcount_rec);
|
|
|
|
return blocklen / (sizeof(struct xfs_refcount_key) +
|
|
|
|
sizeof(xfs_refcount_ptr_t));
|
|
|
|
}
|
|
|
|
|
2016-10-04 00:11:18 +08:00
|
|
|
/*
|
|
|
|
* Calculate the number of records in a refcount btree block.
|
|
|
|
*/
|
2024-08-31 06:37:21 +08:00
|
|
|
unsigned int
|
2016-10-04 00:11:18 +08:00
|
|
|
xfs_refcountbt_maxrecs(
|
2024-08-31 06:37:21 +08:00
|
|
|
struct xfs_mount *mp,
|
|
|
|
unsigned int blocklen,
|
2016-10-04 00:11:18 +08:00
|
|
|
bool leaf)
|
|
|
|
{
|
|
|
|
blocklen -= XFS_REFCOUNT_BLOCK_LEN;
|
2021-09-24 01:32:06 +08:00
|
|
|
return xfs_refcountbt_block_maxrecs(blocklen, leaf);
|
|
|
|
}
|
2016-10-04 00:11:18 +08:00
|
|
|
|
2021-09-24 01:32:06 +08:00
|
|
|
/* Compute the max possible height of the maximally sized refcount btree. */
|
|
|
|
unsigned int
|
|
|
|
xfs_refcountbt_maxlevels_ondisk(void)
|
|
|
|
{
|
|
|
|
unsigned int minrecs[2];
|
|
|
|
unsigned int blocklen;
|
|
|
|
|
|
|
|
blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_SBLOCK_CRC_LEN;
|
|
|
|
|
|
|
|
minrecs[0] = xfs_refcountbt_block_maxrecs(blocklen, true) / 2;
|
|
|
|
minrecs[1] = xfs_refcountbt_block_maxrecs(blocklen, false) / 2;
|
|
|
|
|
|
|
|
return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_CRC_AG_BLOCKS);
|
2016-10-04 00:11:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Compute the maximum height of a refcount btree. */
|
|
|
|
void
|
|
|
|
xfs_refcountbt_compute_maxlevels(
|
|
|
|
struct xfs_mount *mp)
|
|
|
|
{
|
2021-09-24 01:32:06 +08:00
|
|
|
if (!xfs_has_reflink(mp)) {
|
|
|
|
mp->m_refc_maxlevels = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-04-07 01:09:42 +08:00
|
|
|
mp->m_refc_maxlevels = xfs_btree_compute_maxlevels(
|
2016-10-04 00:11:18 +08:00
|
|
|
mp->m_refc_mnr, mp->m_sb.sb_agblocks);
|
2021-09-24 01:32:06 +08:00
|
|
|
ASSERT(mp->m_refc_maxlevels <= xfs_refcountbt_maxlevels_ondisk());
|
2016-10-04 00:11:18 +08:00
|
|
|
}
|
2016-10-04 00:11:44 +08:00
|
|
|
|
|
|
|
/* Calculate the refcount btree size for some records. */
|
|
|
|
xfs_extlen_t
|
|
|
|
xfs_refcountbt_calc_size(
|
|
|
|
struct xfs_mount *mp,
|
|
|
|
unsigned long long len)
|
|
|
|
{
|
2018-04-07 01:09:42 +08:00
|
|
|
return xfs_btree_calc_size(mp->m_refc_mnr, len);
|
2016-10-04 00:11:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate the maximum refcount btree size.
|
|
|
|
*/
|
|
|
|
xfs_extlen_t
|
|
|
|
xfs_refcountbt_max_size(
|
2017-01-04 10:39:33 +08:00
|
|
|
struct xfs_mount *mp,
|
|
|
|
xfs_agblock_t agblocks)
|
2016-10-04 00:11:44 +08:00
|
|
|
{
|
|
|
|
/* Bail out if we're uninitialized, which can happen in mkfs. */
|
|
|
|
if (mp->m_refc_mxr[0] == 0)
|
|
|
|
return 0;
|
|
|
|
|
2017-01-04 10:39:33 +08:00
|
|
|
return xfs_refcountbt_calc_size(mp, agblocks);
|
2016-10-04 00:11:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Figure out how many blocks to reserve and how many are used by this btree.
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
xfs_refcountbt_calc_reserves(
|
|
|
|
struct xfs_mount *mp,
|
2018-07-30 13:37:08 +08:00
|
|
|
struct xfs_trans *tp,
|
2021-06-02 08:48:24 +08:00
|
|
|
struct xfs_perag *pag,
|
2016-10-04 00:11:44 +08:00
|
|
|
xfs_extlen_t *ask,
|
|
|
|
xfs_extlen_t *used)
|
|
|
|
{
|
|
|
|
struct xfs_buf *agbp;
|
|
|
|
struct xfs_agf *agf;
|
2017-01-04 10:39:33 +08:00
|
|
|
xfs_agblock_t agblocks;
|
2016-10-04 00:11:44 +08:00
|
|
|
xfs_extlen_t tree_len;
|
|
|
|
int error;
|
|
|
|
|
2021-08-19 09:46:37 +08:00
|
|
|
if (!xfs_has_reflink(mp))
|
2016-10-04 00:11:44 +08:00
|
|
|
return 0;
|
|
|
|
|
2022-07-07 17:07:40 +08:00
|
|
|
error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
|
2016-10-04 00:11:44 +08:00
|
|
|
if (error)
|
|
|
|
return error;
|
|
|
|
|
2020-03-10 23:57:29 +08:00
|
|
|
agf = agbp->b_addr;
|
2017-01-04 10:39:33 +08:00
|
|
|
agblocks = be32_to_cpu(agf->agf_length);
|
2016-10-04 00:11:44 +08:00
|
|
|
tree_len = be32_to_cpu(agf->agf_refcount_blocks);
|
2018-07-30 13:37:08 +08:00
|
|
|
xfs_trans_brelse(tp, agbp);
|
2016-10-04 00:11:44 +08:00
|
|
|
|
2019-05-21 02:25:39 +08:00
|
|
|
/*
|
|
|
|
* The log is permanently allocated, so the space it occupies will
|
|
|
|
* never be available for the kinds of things that would require btree
|
|
|
|
* expansion. We therefore can pretend the space isn't there.
|
|
|
|
*/
|
2022-07-07 17:13:21 +08:00
|
|
|
if (xfs_ag_contains_log(mp, pag->pag_agno))
|
2019-05-21 02:25:39 +08:00
|
|
|
agblocks -= mp->m_sb.sb_logblocks;
|
|
|
|
|
2017-01-04 10:39:33 +08:00
|
|
|
*ask += xfs_refcountbt_max_size(mp, agblocks);
|
2016-10-04 00:11:44 +08:00
|
|
|
*used += tree_len;
|
|
|
|
|
|
|
|
return error;
|
|
|
|
}
|
2021-09-24 03:21:37 +08:00
|
|
|
|
|
|
|
int __init
|
|
|
|
xfs_refcountbt_init_cur_cache(void)
|
|
|
|
{
|
|
|
|
xfs_refcountbt_cur_cache = kmem_cache_create("xfs_refcbt_cur",
|
|
|
|
xfs_btree_cur_sizeof(xfs_refcountbt_maxlevels_ondisk()),
|
|
|
|
0, 0, NULL);
|
|
|
|
|
|
|
|
if (!xfs_refcountbt_cur_cache)
|
|
|
|
return -ENOMEM;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
xfs_refcountbt_destroy_cur_cache(void)
|
|
|
|
{
|
|
|
|
kmem_cache_destroy(xfs_refcountbt_cur_cache);
|
|
|
|
xfs_refcountbt_cur_cache = NULL;
|
|
|
|
}
|