gfs2: Add local resource group locking

Prepare for treating resource group glocks as exclusive among nodes but
shared among all tasks running on a node: introduce another layer of
node-specific locking that the local tasks can use to coordinate their
accesses.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
This commit is contained in:
Andreas Gruenbacher 2021-02-08 20:44:26 +01:00
parent 725d0e9d46
commit 9e514605c7
4 changed files with 59 additions and 7 deletions

View File

@ -20,6 +20,7 @@
#include <linux/percpu.h>
#include <linux/lockref.h>
#include <linux/rhashtable.h>
#include <linux/mutex.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@ -123,6 +124,7 @@ struct gfs2_rgrpd {
#define GFS2_RDF_PREFERRED 0x80000000 /* This rgrp is preferred */
#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */
spinlock_t rd_rsspin; /* protects reservation related vars */
struct mutex rd_mutex;
struct rb_root rd_rstree; /* multi-block reservation tree */
};

View File

@ -76,8 +76,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
struct gfs2_bitmap *bi = rgd->rd_bits + index;
rgrp_lock_local(rgd);
if (bi->bi_clone == NULL)
return;
goto out;
if (sdp->sd_args.ar_discard)
gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
memcpy(bi->bi_clone + bi->bi_offset,
@ -86,6 +87,9 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
rgd->rd_free_clone = rgd->rd_free;
BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
rgd->rd_extfail_pt = rgd->rd_free;
out:
rgrp_unlock_local(rgd);
}
/**

View File

@ -920,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
rgd->rd_data = be32_to_cpu(buf.ri_data);
rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
spin_lock_init(&rgd->rd_rsspin);
mutex_init(&rgd->rd_mutex);
error = compute_bitstructs(rgd);
if (error)
@ -1449,9 +1450,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
/* Trim each bitmap in the rgrp */
for (x = 0; x < rgd->rd_length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
rgrp_lock_local(rgd);
ret = gfs2_rgrp_send_discards(sdp,
rgd->rd_data0, NULL, bi, minlen,
&amt);
rgrp_unlock_local(rgd);
if (ret) {
gfs2_glock_dq_uninit(&gh);
goto out;
@ -1463,9 +1466,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
if (ret == 0) {
bh = rgd->rd_bits[0].bi_bh;
rgrp_lock_local(rgd);
rgd->rd_flags |= GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, bh);
gfs2_rgrp_out(rgd, bh->b_data);
rgrp_unlock_local(rgd);
gfs2_trans_end(sdp);
}
}
@ -2050,7 +2055,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *begin = NULL;
struct gfs2_blkreserv *rs = &ip->i_res;
int error = 0, rg_locked, flags = 0;
int error = 0, flags = 0;
bool rg_locked;
u64 last_unlinked = NO_BLOCK;
u32 target = ap->target;
int loops = 0;
@ -2079,10 +2085,10 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
while (loops < 3) {
struct gfs2_rgrpd *rgd;
rg_locked = 1;
if (!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl)) {
rg_locked = 0;
rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
if (rg_locked) {
rgrp_lock_local(rs->rs_rgd);
} else {
if (skip && skip--)
goto next_rgrp;
if (!gfs2_rs_active(rs)) {
@ -2099,12 +2105,14 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
&ip->i_rgd_gh);
if (unlikely(error))
return error;
rgrp_lock_local(rs->rs_rgd);
if (!gfs2_rs_active(rs) && (loops < 2) &&
gfs2_rgrp_congested(rs->rs_rgd, loops))
goto skip_rgrp;
if (sdp->sd_args.ar_rgrplvb) {
error = update_rgrp_lvb(rs->rs_rgd);
if (unlikely(error)) {
rgrp_unlock_local(rs->rs_rgd);
gfs2_glock_dq_uninit(&ip->i_rgd_gh);
return error;
}
@ -2142,6 +2150,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
rs->rs_reserved = blocks_available;
rgd->rd_reserved += rs->rs_reserved;
spin_unlock(&rgd->rd_rsspin);
rgrp_unlock_local(rs->rs_rgd);
return 0;
check_rgrp:
/* Check for unlinked inodes which can be reclaimed */
@ -2149,6 +2158,8 @@ check_rgrp:
try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
ip->i_no_addr);
skip_rgrp:
rgrp_unlock_local(rs->rs_rgd);
/* Drop reservation, if we couldn't use reserved rgrp */
if (gfs2_rs_active(rs))
gfs2_rs_deltree(rs);
@ -2293,6 +2304,7 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
struct gfs2_blkreserv *trs;
const struct rb_node *n;
spin_lock(&rgd->rd_rsspin);
gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
fs_id_buf,
(unsigned long long)rgd->rd_addr, rgd->rd_flags,
@ -2306,7 +2318,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
be32_to_cpu(rgl->rl_free),
be32_to_cpu(rgl->rl_dinodes));
}
spin_lock(&rgd->rd_rsspin);
for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
dump_rs(seq, trs, fs_id_buf);
@ -2421,6 +2432,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
BUG_ON(ip->i_res.rs_reserved < *nblocks);
rgrp_lock_local(rbm.rgd);
if (gfs2_rs_active(&ip->i_res)) {
gfs2_set_alloc_start(&rbm, ip, dinode);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
@ -2477,6 +2489,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
rgrp_unlock_local(rbm.rgd);
gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
if (dinode)
@ -2490,6 +2503,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
return 0;
rgrp_error:
rgrp_unlock_local(rbm.rgd);
gfs2_rgrp_error(rbm.rgd);
return -EIO;
}
@ -2509,12 +2523,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
rgrp_unlock_local(rgd);
/* Directories keep their data in the metadata address space */
if (meta || ip->i_depth || gfs2_is_jdata(ip))
@ -2550,17 +2566,20 @@ void gfs2_unlink_di(struct inode *inode)
rgd = gfs2_blk2rgrpd(sdp, blkno, true);
if (!rgd)
return;
rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
rgrp_unlock_local(rgd);
}
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
rgrp_lock_local(rgd);
rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
if (!rgd->rd_dinodes)
gfs2_consist_rgrpd(rgd);
@ -2569,6 +2588,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
rgrp_unlock_local(rgd);
be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
gfs2_statfs_change(sdp, 0, +1, -1);
@ -2583,6 +2603,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
* @no_addr: The block number to check
* @type: The block type we are looking for
*
* The inode glock of @no_addr must be held. The @type to check for is either
* GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
* or GFS2_BLKST_USED would make no sense.
*
* Returns: 0 if the block type matches the expected type
* -ESTALE if it doesn't match
* or -ve errno if something went wrong while checking
@ -2606,6 +2630,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
rbm.rgd = rgd;
error = gfs2_rbm_from_block(&rbm, no_addr);
if (!WARN_ON_ONCE(error)) {
/*
* No need to take the local resource group lock here; the
* inode glock of @no_addr provides the necessary
* synchronization in case the block is an inode. (In case
* the block is not an inode, the block type will not match
* the @type we are looking for.)
*/
if (gfs2_testbit(&rbm, false) != type)
error = -ESTALE;
}
@ -2730,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
}
}
void rgrp_lock_local(struct gfs2_rgrpd *rgd)
{
BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
!test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
mutex_lock(&rgd->rd_mutex);
}
void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
{
mutex_unlock(&rgd->rd_mutex);
}

View File

@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
}
extern void check_and_update_goal(struct gfs2_inode *ip);
extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
#endif /* __RGRP_DOT_H__ */