quota: Reduce contention on dq_data_lock

dq_data_lock is currently used to protect all modifications of quota
accounting information, consistency of quota accounting on the inode,
and dquot pointers from inode. As a result contention on the lock can be
pretty heavy.

Reduce the contention on the lock by protecting quota accounting
information by a new dquot->dq_dqb_lock and consistency of quota
accounting with inode usage by inode->i_lock.

This change reduces time to create 500000 files on ext4 on ramdisk by 50
different processes in separate directories by 6% when user quota is
turned on. When those 50 processes belong to 50 different users, the
improvement is about 9%.

Signed-off-by: Jan Kara <jack@suse.cz>
This commit is contained in:
Jan Kara 2017-08-07 13:19:50 +02:00
parent f4a8116a4c
commit 7b9ca4c61b
6 changed files with 186 additions and 132 deletions

View File

@ -5194,7 +5194,7 @@ static int ext4_statfs_project(struct super_block *sb,
dquot = dqget(sb, qid); dquot = dqget(sb, qid);
if (IS_ERR(dquot)) if (IS_ERR(dquot))
return PTR_ERR(dquot); return PTR_ERR(dquot);
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
limit = (dquot->dq_dqb.dqb_bsoftlimit ? limit = (dquot->dq_dqb.dqb_bsoftlimit ?
dquot->dq_dqb.dqb_bsoftlimit : dquot->dq_dqb.dqb_bsoftlimit :
@ -5217,7 +5217,7 @@ static int ext4_statfs_project(struct super_block *sb,
(buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0; (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
} }
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
dqput(dquot); dqput(dquot);
return 0; return 0;
} }

View File

@ -504,7 +504,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
/* Update space and inode usage. Get also other information from /* Update space and inode usage. Get also other information from
* global quota file so that we don't overwrite any changes there. * global quota file so that we don't overwrite any changes there.
* We are */ * We are */
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
spacechange = dquot->dq_dqb.dqb_curspace - spacechange = dquot->dq_dqb.dqb_curspace -
OCFS2_DQUOT(dquot)->dq_origspace; OCFS2_DQUOT(dquot)->dq_origspace;
inodechange = dquot->dq_dqb.dqb_curinodes - inodechange = dquot->dq_dqb.dqb_curinodes -
@ -560,7 +560,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing)
__clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
err = ocfs2_qinfo_lock(info, freeing); err = ocfs2_qinfo_lock(info, freeing);
if (err < 0) { if (err < 0) {
mlog(ML_ERROR, "Failed to lock quota info, losing quota write" mlog(ML_ERROR, "Failed to lock quota info, losing quota write"
@ -924,10 +924,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
/* In case user set some limits, sync dquot immediately to global /* In case user set some limits, sync dquot immediately to global
* quota file so that information propagates quicker */ * quota file so that information propagates quicker */
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
if (dquot->dq_flags & mask) if (dquot->dq_flags & mask)
sync = 1; sync = 1;
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
/* This is a slight hack but we can't afford getting global quota /* This is a slight hack but we can't afford getting global quota
* lock if we already have a transaction started. */ * lock if we already have a transaction started. */
if (!sync || journal_current_handle()) { if (!sync || journal_current_handle()) {

View File

@ -521,7 +521,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
goto out_drop_lock; goto out_drop_lock;
} }
down_write(&sb_dqopt(sb)->dqio_sem); down_write(&sb_dqopt(sb)->dqio_sem);
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
/* Add usage from quota entry into quota changes /* Add usage from quota entry into quota changes
* of our node. Auxiliary variables are important * of our node. Auxiliary variables are important
* due to signedness */ * due to signedness */
@ -529,7 +529,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
inodechange = le64_to_cpu(dqblk->dqb_inodemod); inodechange = le64_to_cpu(dqblk->dqb_inodemod);
dquot->dq_dqb.dqb_curspace += spacechange; dquot->dq_dqb.dqb_curspace += spacechange;
dquot->dq_dqb.dqb_curinodes += inodechange; dquot->dq_dqb.dqb_curinodes += inodechange;
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
/* We want to drop reference held by the crashed /* We want to drop reference held by the crashed
* node. Since we have our own reference we know * node. Since we have our own reference we know
* global structure actually won't be freed. */ * global structure actually won't be freed. */
@ -877,12 +877,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private)
dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns, dqblk->dqb_id = cpu_to_le64(from_kqid(&init_user_ns,
od->dq_dquot.dq_id)); od->dq_dquot.dq_id));
spin_lock(&dq_data_lock); spin_lock(&od->dq_dquot.dq_dqb_lock);
dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace -
od->dq_origspace); od->dq_origspace);
dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes -
od->dq_originodes); od->dq_originodes);
spin_unlock(&dq_data_lock); spin_unlock(&od->dq_dquot.dq_dqb_lock);
trace_olq_set_dquot( trace_olq_set_dquot(
(unsigned long long)le64_to_cpu(dqblk->dqb_spacemod), (unsigned long long)le64_to_cpu(dqblk->dqb_spacemod),
(unsigned long long)le64_to_cpu(dqblk->dqb_inodemod), (unsigned long long)le64_to_cpu(dqblk->dqb_inodemod),

View File

@ -82,16 +82,19 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
/* /*
* There are three quota SMP locks. dq_list_lock protects all lists with quotas * There are five quota SMP locks:
* and quota formats. * * dq_list_lock protects all lists with quotas and quota formats.
* dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and * * dquot->dq_dqb_lock protects data from dq_dqb
* also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. * * inode->i_lock protects inode->i_blocks, i_bytes and also guards
* i_blocks and i_bytes updates itself are guarded by i_lock acquired directly * consistency of dquot->dq_dqb with inode->i_blocks, i_bytes so that
* in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects * dquot_transfer() can stabilize amount it transfers
* modifications of quota state (on quotaon and quotaoff) and readers who care * * dq_data_lock protects mem_dqinfo structures and modifications of dquot
* about latest values take it as well. * pointers in the inode
* * dq_state_lock protects modifications of quota state (on quotaon and
* quotaoff) and readers who care about latest values take it as well.
* *
* The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock, * The spinlock ordering is hence:
* dq_data_lock > dq_list_lock > i_lock > dquot->dq_dqb_lock,
* dq_list_lock > dq_state_lock * dq_list_lock > dq_state_lock
* *
* Note that some things (eg. sb pointer, type, id) doesn't change during * Note that some things (eg. sb pointer, type, id) doesn't change during
@ -246,6 +249,7 @@ struct dqstats dqstats;
EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dqstats);
static qsize_t inode_get_rsv_space(struct inode *inode); static qsize_t inode_get_rsv_space(struct inode *inode);
static qsize_t __inode_get_rsv_space(struct inode *inode);
static int __dquot_initialize(struct inode *inode, int type); static int __dquot_initialize(struct inode *inode, int type);
static inline unsigned int static inline unsigned int
@ -816,6 +820,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
dquot->dq_sb = sb; dquot->dq_sb = sb;
dquot->dq_id = make_kqid_invalid(type); dquot->dq_id = make_kqid_invalid(type);
atomic_set(&dquot->dq_count, 1); atomic_set(&dquot->dq_count, 1);
spin_lock_init(&dquot->dq_dqb_lock);
return dquot; return dquot;
} }
@ -1073,21 +1078,6 @@ static void drop_dquot_ref(struct super_block *sb, int type)
} }
} }
static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number)
{
dquot->dq_dqb.dqb_curinodes += number;
}
static inline void dquot_incr_space(struct dquot *dquot, qsize_t number)
{
dquot->dq_dqb.dqb_curspace += number;
}
static inline void dquot_resv_space(struct dquot *dquot, qsize_t number)
{
dquot->dq_dqb.dqb_rsvspace += number;
}
static inline static inline
void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
{ {
@ -1246,21 +1236,24 @@ static int ignore_hardlimit(struct dquot *dquot)
!(info->dqi_flags & DQF_ROOT_SQUASH)); !(info->dqi_flags & DQF_ROOT_SQUASH));
} }
/* needs dq_data_lock */ static int dquot_add_inodes(struct dquot *dquot, qsize_t inodes,
static int check_idq(struct dquot *dquot, qsize_t inodes, struct dquot_warn *warn)
struct dquot_warn *warn)
{ {
qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; qsize_t newinodes;
int ret = 0;
spin_lock(&dquot->dq_dqb_lock);
newinodes = dquot->dq_dqb.dqb_curinodes + inodes;
if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) || if (!sb_has_quota_limits_enabled(dquot->dq_sb, dquot->dq_id.type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags)) test_bit(DQ_FAKE_B, &dquot->dq_flags))
return 0; goto add;
if (dquot->dq_dqb.dqb_ihardlimit && if (dquot->dq_dqb.dqb_ihardlimit &&
newinodes > dquot->dq_dqb.dqb_ihardlimit && newinodes > dquot->dq_dqb.dqb_ihardlimit &&
!ignore_hardlimit(dquot)) { !ignore_hardlimit(dquot)) {
prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN); prepare_warning(warn, dquot, QUOTA_NL_IHARDWARN);
return -EDQUOT; ret = -EDQUOT;
goto out;
} }
if (dquot->dq_dqb.dqb_isoftlimit && if (dquot->dq_dqb.dqb_isoftlimit &&
@ -1269,7 +1262,8 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_itime &&
!ignore_hardlimit(dquot)) { !ignore_hardlimit(dquot)) {
prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN); prepare_warning(warn, dquot, QUOTA_NL_ISOFTLONGWARN);
return -EDQUOT; ret = -EDQUOT;
goto out;
} }
if (dquot->dq_dqb.dqb_isoftlimit && if (dquot->dq_dqb.dqb_isoftlimit &&
@ -1279,30 +1273,40 @@ static int check_idq(struct dquot *dquot, qsize_t inodes,
dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() + dquot->dq_dqb.dqb_itime = ktime_get_real_seconds() +
sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace; sb_dqopt(dquot->dq_sb)->info[dquot->dq_id.type].dqi_igrace;
} }
add:
dquot->dq_dqb.dqb_curinodes = newinodes;
return 0; out:
spin_unlock(&dquot->dq_dqb_lock);
return ret;
} }
/* needs dq_data_lock */ static int dquot_add_space(struct dquot *dquot, qsize_t space,
static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, qsize_t rsv_space, unsigned int flags,
struct dquot_warn *warn) struct dquot_warn *warn)
{ {
qsize_t tspace; qsize_t tspace;
struct super_block *sb = dquot->dq_sb; struct super_block *sb = dquot->dq_sb;
int ret = 0;
spin_lock(&dquot->dq_dqb_lock);
if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) || if (!sb_has_quota_limits_enabled(sb, dquot->dq_id.type) ||
test_bit(DQ_FAKE_B, &dquot->dq_flags)) test_bit(DQ_FAKE_B, &dquot->dq_flags))
return 0; goto add;
tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace tspace = dquot->dq_dqb.dqb_curspace + dquot->dq_dqb.dqb_rsvspace
+ space; + space + rsv_space;
if (flags & DQUOT_SPACE_NOFAIL)
goto add;
if (dquot->dq_dqb.dqb_bhardlimit && if (dquot->dq_dqb.dqb_bhardlimit &&
tspace > dquot->dq_dqb.dqb_bhardlimit && tspace > dquot->dq_dqb.dqb_bhardlimit &&
!ignore_hardlimit(dquot)) { !ignore_hardlimit(dquot)) {
if (!prealloc) if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN); prepare_warning(warn, dquot, QUOTA_NL_BHARDWARN);
return -EDQUOT; ret = -EDQUOT;
goto out;
} }
if (dquot->dq_dqb.dqb_bsoftlimit && if (dquot->dq_dqb.dqb_bsoftlimit &&
@ -1310,28 +1314,34 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc,
dquot->dq_dqb.dqb_btime && dquot->dq_dqb.dqb_btime &&
ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime && ktime_get_real_seconds() >= dquot->dq_dqb.dqb_btime &&
!ignore_hardlimit(dquot)) { !ignore_hardlimit(dquot)) {
if (!prealloc) if (flags & DQUOT_SPACE_WARN)
prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN); prepare_warning(warn, dquot, QUOTA_NL_BSOFTLONGWARN);
return -EDQUOT; ret = -EDQUOT;
goto out;
} }
if (dquot->dq_dqb.dqb_bsoftlimit && if (dquot->dq_dqb.dqb_bsoftlimit &&
tspace > dquot->dq_dqb.dqb_bsoftlimit && tspace > dquot->dq_dqb.dqb_bsoftlimit &&
dquot->dq_dqb.dqb_btime == 0) { dquot->dq_dqb.dqb_btime == 0) {
if (!prealloc) { if (flags & DQUOT_SPACE_WARN) {
prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN); prepare_warning(warn, dquot, QUOTA_NL_BSOFTWARN);
dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() + dquot->dq_dqb.dqb_btime = ktime_get_real_seconds() +
sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace; sb_dqopt(sb)->info[dquot->dq_id.type].dqi_bgrace;
} } else {
else
/* /*
* We don't allow preallocation to exceed softlimit so exceeding will * We don't allow preallocation to exceed softlimit so exceeding will
* be always printed * be always printed
*/ */
return -EDQUOT; ret = -EDQUOT;
goto out;
}
} }
add:
return 0; dquot->dq_dqb.dqb_rsvspace += rsv_space;
dquot->dq_dqb.dqb_curspace += space;
out:
spin_unlock(&dquot->dq_dqb_lock);
return ret;
} }
static int info_idq_free(struct dquot *dquot, qsize_t inodes) static int info_idq_free(struct dquot *dquot, qsize_t inodes)
@ -1466,8 +1476,15 @@ static int __dquot_initialize(struct inode *inode, int type)
* did a write before quota was turned on * did a write before quota was turned on
*/ */
rsv = inode_get_rsv_space(inode); rsv = inode_get_rsv_space(inode);
if (unlikely(rsv)) if (unlikely(rsv)) {
dquot_resv_space(dquots[cnt], rsv); spin_lock(&inode->i_lock);
/* Get reservation again under proper lock */
rsv = __inode_get_rsv_space(inode);
spin_lock(&dquots[cnt]->dq_dqb_lock);
dquots[cnt]->dq_dqb.dqb_rsvspace += rsv;
spin_unlock(&dquots[cnt]->dq_dqb_lock);
spin_unlock(&inode->i_lock);
}
} }
} }
out_lock: out_lock:
@ -1562,6 +1579,13 @@ static qsize_t *inode_reserved_space(struct inode * inode)
return inode->i_sb->dq_op->get_reserved_space(inode); return inode->i_sb->dq_op->get_reserved_space(inode);
} }
static qsize_t __inode_get_rsv_space(struct inode *inode)
{
if (!inode->i_sb->dq_op->get_reserved_space)
return 0;
return *inode_reserved_space(inode);
}
static qsize_t inode_get_rsv_space(struct inode *inode) static qsize_t inode_get_rsv_space(struct inode *inode)
{ {
qsize_t ret; qsize_t ret;
@ -1569,7 +1593,7 @@ static qsize_t inode_get_rsv_space(struct inode *inode)
if (!inode->i_sb->dq_op->get_reserved_space) if (!inode->i_sb->dq_op->get_reserved_space)
return 0; return 0;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
ret = *inode_reserved_space(inode); ret = __inode_get_rsv_space(inode);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
return ret; return ret;
} }
@ -1610,33 +1634,41 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
dquots = i_dquot(inode); dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu); index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock); spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt]) if (!dquots[cnt])
continue; continue;
ret = check_bdq(dquots[cnt], number, if (flags & DQUOT_SPACE_RESERVE) {
!(flags & DQUOT_SPACE_WARN), &warn[cnt]); ret = dquot_add_space(dquots[cnt], 0, number, flags,
if (ret && !(flags & DQUOT_SPACE_NOFAIL)) { &warn[cnt]);
spin_unlock(&dq_data_lock); } else {
ret = dquot_add_space(dquots[cnt], number, 0, flags,
&warn[cnt]);
}
if (ret) {
/* Back out changes we already did */
for (cnt--; cnt >= 0; cnt--) {
if (!dquots[cnt])
continue;
spin_lock(&dquots[cnt]->dq_dqb_lock);
if (flags & DQUOT_SPACE_RESERVE) {
dquots[cnt]->dq_dqb.dqb_rsvspace -=
number;
} else {
dquots[cnt]->dq_dqb.dqb_curspace -=
number;
}
spin_unlock(&dquots[cnt]->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
goto out_flush_warn; goto out_flush_warn;
} }
} }
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (reserve)
if (!dquots[cnt])
continue;
if (reserve)
dquot_resv_space(dquots[cnt], number);
else
dquot_incr_space(dquots[cnt], number);
}
if (reserve) {
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number; *inode_reserved_space(inode) += number;
spin_unlock(&inode->i_lock); else
} else { __inode_add_bytes(inode, number);
inode_add_bytes(inode, number); spin_unlock(&inode->i_lock);
}
spin_unlock(&dq_data_lock);
if (reserve) if (reserve)
goto out_flush_warn; goto out_flush_warn;
@ -1665,23 +1697,26 @@ int dquot_alloc_inode(struct inode *inode)
dquots = i_dquot(inode); dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu); index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock); spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt]) if (!dquots[cnt])
continue; continue;
ret = check_idq(dquots[cnt], 1, &warn[cnt]); ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]);
if (ret) if (ret) {
for (cnt--; cnt >= 0; cnt--) {
if (!dquots[cnt])
continue;
/* Back out changes we already did */
spin_lock(&dquots[cnt]->dq_dqb_lock);
dquots[cnt]->dq_dqb.dqb_curinodes--;
spin_unlock(&dquots[cnt]->dq_dqb_lock);
}
goto warn_put_all; goto warn_put_all;
} }
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!dquots[cnt])
continue;
dquot_incr_inodes(dquots[cnt], 1);
} }
warn_put_all: warn_put_all:
spin_unlock(&dq_data_lock); spin_unlock(&inode->i_lock);
if (ret == 0) if (ret == 0)
mark_all_dquot_dirty(dquots); mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index); srcu_read_unlock(&dquot_srcu, index);
@ -1708,24 +1743,24 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
dquots = i_dquot(inode); dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu); index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock); spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */ /* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (dquots[cnt]) { if (dquots[cnt]) {
struct dquot *dquot = dquots[cnt]; struct dquot *dquot = dquots[cnt];
spin_lock(&dquot->dq_dqb_lock);
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number)) if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number))
number = dquot->dq_dqb.dqb_rsvspace; number = dquot->dq_dqb.dqb_rsvspace;
dquot->dq_dqb.dqb_curspace += number; dquot->dq_dqb.dqb_curspace += number;
dquot->dq_dqb.dqb_rsvspace -= number; dquot->dq_dqb.dqb_rsvspace -= number;
spin_unlock(&dquot->dq_dqb_lock);
} }
} }
/* Update inode bytes */ /* Update inode bytes */
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) -= number; *inode_reserved_space(inode) -= number;
__inode_add_bytes(inode, number); __inode_add_bytes(inode, number);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(dquots); mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index); srcu_read_unlock(&dquot_srcu, index);
return 0; return 0;
@ -1750,24 +1785,24 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
dquots = i_dquot(inode); dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu); index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock); spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */ /* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (dquots[cnt]) { if (dquots[cnt]) {
struct dquot *dquot = dquots[cnt]; struct dquot *dquot = dquots[cnt];
spin_lock(&dquot->dq_dqb_lock);
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number)) if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
number = dquot->dq_dqb.dqb_curspace; number = dquot->dq_dqb.dqb_curspace;
dquot->dq_dqb.dqb_rsvspace += number; dquot->dq_dqb.dqb_rsvspace += number;
dquot->dq_dqb.dqb_curspace -= number; dquot->dq_dqb.dqb_curspace -= number;
spin_unlock(&dquot->dq_dqb_lock);
} }
} }
/* Update inode bytes */ /* Update inode bytes */
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) += number; *inode_reserved_space(inode) += number;
__inode_sub_bytes(inode, number); __inode_sub_bytes(inode, number);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(dquots); mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index); srcu_read_unlock(&dquot_srcu, index);
return; return;
@ -1797,13 +1832,14 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
dquots = i_dquot(inode); dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu); index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock); spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype; int wtype;
warn[cnt].w_type = QUOTA_NL_NOWARN; warn[cnt].w_type = QUOTA_NL_NOWARN;
if (!dquots[cnt]) if (!dquots[cnt])
continue; continue;
spin_lock(&dquots[cnt]->dq_dqb_lock);
wtype = info_bdq_free(dquots[cnt], number); wtype = info_bdq_free(dquots[cnt], number);
if (wtype != QUOTA_NL_NOWARN) if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn[cnt], dquots[cnt], wtype); prepare_warning(&warn[cnt], dquots[cnt], wtype);
@ -1811,15 +1847,13 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
dquot_free_reserved_space(dquots[cnt], number); dquot_free_reserved_space(dquots[cnt], number);
else else
dquot_decr_space(dquots[cnt], number); dquot_decr_space(dquots[cnt], number);
spin_unlock(&dquots[cnt]->dq_dqb_lock);
} }
if (reserve) { if (reserve)
spin_lock(&inode->i_lock);
*inode_reserved_space(inode) -= number; *inode_reserved_space(inode) -= number;
spin_unlock(&inode->i_lock); else
} else { __inode_sub_bytes(inode, number);
inode_sub_bytes(inode, number); spin_unlock(&inode->i_lock);
}
spin_unlock(&dq_data_lock);
if (reserve) if (reserve)
goto out_unlock; goto out_unlock;
@ -1845,19 +1879,21 @@ void dquot_free_inode(struct inode *inode)
dquots = i_dquot(inode); dquots = i_dquot(inode);
index = srcu_read_lock(&dquot_srcu); index = srcu_read_lock(&dquot_srcu);
spin_lock(&dq_data_lock); spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype; int wtype;
warn[cnt].w_type = QUOTA_NL_NOWARN; warn[cnt].w_type = QUOTA_NL_NOWARN;
if (!dquots[cnt]) if (!dquots[cnt])
continue; continue;
spin_lock(&dquots[cnt]->dq_dqb_lock);
wtype = info_idq_free(dquots[cnt], 1); wtype = info_idq_free(dquots[cnt], 1);
if (wtype != QUOTA_NL_NOWARN) if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn[cnt], dquots[cnt], wtype); prepare_warning(&warn[cnt], dquots[cnt], wtype);
dquot_decr_inodes(dquots[cnt], 1); dquot_decr_inodes(dquots[cnt], 1);
spin_unlock(&dquots[cnt]->dq_dqb_lock);
} }
spin_unlock(&dq_data_lock); spin_unlock(&inode->i_lock);
mark_all_dquot_dirty(dquots); mark_all_dquot_dirty(dquots);
srcu_read_unlock(&dquot_srcu, index); srcu_read_unlock(&dquot_srcu, index);
flush_warnings(warn); flush_warnings(warn);
@ -1878,7 +1914,7 @@ EXPORT_SYMBOL(dquot_free_inode);
*/ */
int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
{ {
qsize_t space, cur_space; qsize_t cur_space;
qsize_t rsv_space = 0; qsize_t rsv_space = 0;
qsize_t inode_usage = 1; qsize_t inode_usage = 1;
struct dquot *transfer_from[MAXQUOTAS] = {}; struct dquot *transfer_from[MAXQUOTAS] = {};
@ -1905,14 +1941,18 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
} }
spin_lock(&dq_data_lock); spin_lock(&dq_data_lock);
spin_lock(&inode->i_lock);
if (IS_NOQUOTA(inode)) { /* File without quota accounting? */ if (IS_NOQUOTA(inode)) { /* File without quota accounting? */
spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock); spin_unlock(&dq_data_lock);
return 0; return 0;
} }
cur_space = inode_get_bytes(inode); cur_space = __inode_get_bytes(inode);
rsv_space = inode_get_rsv_space(inode); rsv_space = __inode_get_rsv_space(inode);
space = cur_space + rsv_space; /*
/* Build the transfer_from list and check the limits */ * Build the transfer_from list, check limits, and update usage in
* the target structures.
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
/* /*
* Skip changes for same uid or gid or for turned off quota-type. * Skip changes for same uid or gid or for turned off quota-type.
@ -1924,28 +1964,33 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
continue; continue;
is_valid[cnt] = 1; is_valid[cnt] = 1;
transfer_from[cnt] = i_dquot(inode)[cnt]; transfer_from[cnt] = i_dquot(inode)[cnt];
ret = check_idq(transfer_to[cnt], inode_usage, &warn_to[cnt]); ret = dquot_add_inodes(transfer_to[cnt], inode_usage,
&warn_to[cnt]);
if (ret) if (ret)
goto over_quota; goto over_quota;
ret = check_bdq(transfer_to[cnt], space, 0, &warn_to[cnt]); ret = dquot_add_space(transfer_to[cnt], cur_space, rsv_space, 0,
if (ret) &warn_to[cnt]);
if (ret) {
dquot_decr_inodes(transfer_to[cnt], inode_usage);
goto over_quota; goto over_quota;
}
} }
/* /* Decrease usage for source structures and update quota pointers */
* Finally perform the needed transfer from transfer_from to transfer_to
*/
for (cnt = 0; cnt < MAXQUOTAS; cnt++) { for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
if (!is_valid[cnt]) if (!is_valid[cnt])
continue; continue;
/* Due to IO error we might not have transfer_from[] structure */ /* Due to IO error we might not have transfer_from[] structure */
if (transfer_from[cnt]) { if (transfer_from[cnt]) {
int wtype; int wtype;
spin_lock(&transfer_from[cnt]->dq_dqb_lock);
wtype = info_idq_free(transfer_from[cnt], inode_usage); wtype = info_idq_free(transfer_from[cnt], inode_usage);
if (wtype != QUOTA_NL_NOWARN) if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_inodes[cnt], prepare_warning(&warn_from_inodes[cnt],
transfer_from[cnt], wtype); transfer_from[cnt], wtype);
wtype = info_bdq_free(transfer_from[cnt], space); wtype = info_bdq_free(transfer_from[cnt],
cur_space + rsv_space);
if (wtype != QUOTA_NL_NOWARN) if (wtype != QUOTA_NL_NOWARN)
prepare_warning(&warn_from_space[cnt], prepare_warning(&warn_from_space[cnt],
transfer_from[cnt], wtype); transfer_from[cnt], wtype);
@ -1953,14 +1998,11 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
dquot_decr_space(transfer_from[cnt], cur_space); dquot_decr_space(transfer_from[cnt], cur_space);
dquot_free_reserved_space(transfer_from[cnt], dquot_free_reserved_space(transfer_from[cnt],
rsv_space); rsv_space);
spin_unlock(&transfer_from[cnt]->dq_dqb_lock);
} }
dquot_incr_inodes(transfer_to[cnt], inode_usage);
dquot_incr_space(transfer_to[cnt], cur_space);
dquot_resv_space(transfer_to[cnt], rsv_space);
i_dquot(inode)[cnt] = transfer_to[cnt]; i_dquot(inode)[cnt] = transfer_to[cnt];
} }
spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock); spin_unlock(&dq_data_lock);
mark_all_dquot_dirty(transfer_from); mark_all_dquot_dirty(transfer_from);
@ -1974,6 +2016,17 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
transfer_to[cnt] = transfer_from[cnt]; transfer_to[cnt] = transfer_from[cnt];
return 0; return 0;
over_quota: over_quota:
/* Back out changes we already did */
for (cnt--; cnt >= 0; cnt--) {
if (!is_valid[cnt])
continue;
spin_lock(&transfer_to[cnt]->dq_dqb_lock);
dquot_decr_inodes(transfer_to[cnt], inode_usage);
dquot_decr_space(transfer_to[cnt], cur_space);
dquot_free_reserved_space(transfer_to[cnt], rsv_space);
spin_unlock(&transfer_to[cnt]->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock); spin_unlock(&dq_data_lock);
flush_warnings(warn_to); flush_warnings(warn_to);
return ret; return ret;
@ -2524,7 +2577,7 @@ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di)
struct mem_dqblk *dm = &dquot->dq_dqb; struct mem_dqblk *dm = &dquot->dq_dqb;
memset(di, 0, sizeof(*di)); memset(di, 0, sizeof(*di));
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
di->d_spc_hardlimit = dm->dqb_bhardlimit; di->d_spc_hardlimit = dm->dqb_bhardlimit;
di->d_spc_softlimit = dm->dqb_bsoftlimit; di->d_spc_softlimit = dm->dqb_bsoftlimit;
di->d_ino_hardlimit = dm->dqb_ihardlimit; di->d_ino_hardlimit = dm->dqb_ihardlimit;
@ -2533,7 +2586,7 @@ static void do_get_dqblk(struct dquot *dquot, struct qc_dqblk *di)
di->d_ino_count = dm->dqb_curinodes; di->d_ino_count = dm->dqb_curinodes;
di->d_spc_timer = dm->dqb_btime; di->d_spc_timer = dm->dqb_btime;
di->d_ino_timer = dm->dqb_itime; di->d_ino_timer = dm->dqb_itime;
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
} }
int dquot_get_dqblk(struct super_block *sb, struct kqid qid, int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
@ -2597,7 +2650,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
(di->d_ino_hardlimit > dqi->dqi_max_ino_limit))) (di->d_ino_hardlimit > dqi->dqi_max_ino_limit)))
return -ERANGE; return -ERANGE;
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
if (di->d_fieldmask & QC_SPACE) { if (di->d_fieldmask & QC_SPACE) {
dm->dqb_curspace = di->d_space - dm->dqb_rsvspace; dm->dqb_curspace = di->d_space - dm->dqb_rsvspace;
check_blim = 1; check_blim = 1;
@ -2663,7 +2716,7 @@ static int do_set_dqblk(struct dquot *dquot, struct qc_dqblk *di)
clear_bit(DQ_FAKE_B, &dquot->dq_flags); clear_bit(DQ_FAKE_B, &dquot->dq_flags);
else else
set_bit(DQ_FAKE_B, &dquot->dq_flags); set_bit(DQ_FAKE_B, &dquot->dq_flags);
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
mark_dquot_dirty(dquot); mark_dquot_dirty(dquot);
return 0; return 0;

View File

@ -389,9 +389,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
return ret; return ret;
} }
} }
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
info->dqi_ops->mem2disk_dqblk(ddquot, dquot); info->dqi_ops->mem2disk_dqblk(ddquot, dquot);
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size,
dquot->dq_off); dquot->dq_off);
if (ret != info->dqi_entry_size) { if (ret != info->dqi_entry_size) {
@ -649,14 +649,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
kfree(ddquot); kfree(ddquot);
goto out; goto out;
} }
spin_lock(&dq_data_lock); spin_lock(&dquot->dq_dqb_lock);
info->dqi_ops->disk2mem_dqblk(dquot, ddquot); info->dqi_ops->disk2mem_dqblk(dquot, ddquot);
if (!dquot->dq_dqb.dqb_bhardlimit && if (!dquot->dq_dqb.dqb_bhardlimit &&
!dquot->dq_dqb.dqb_bsoftlimit && !dquot->dq_dqb.dqb_bsoftlimit &&
!dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_ihardlimit &&
!dquot->dq_dqb.dqb_isoftlimit) !dquot->dq_dqb.dqb_isoftlimit)
set_bit(DQ_FAKE_B, &dquot->dq_flags); set_bit(DQ_FAKE_B, &dquot->dq_flags);
spin_unlock(&dq_data_lock); spin_unlock(&dquot->dq_dqb_lock);
kfree(ddquot); kfree(ddquot);
out: out:
dqstats_inc(DQST_READS); dqstats_inc(DQST_READS);

View File

@ -298,12 +298,13 @@ struct dquot {
struct list_head dq_free; /* Free list element */ struct list_head dq_free; /* Free list element */
struct list_head dq_dirty; /* List of dirty dquots */ struct list_head dq_dirty; /* List of dirty dquots */
struct mutex dq_lock; /* dquot IO lock */ struct mutex dq_lock; /* dquot IO lock */
spinlock_t dq_dqb_lock; /* Lock protecting dq_dqb changes */
atomic_t dq_count; /* Use count */ atomic_t dq_count; /* Use count */
struct super_block *dq_sb; /* superblock this applies to */ struct super_block *dq_sb; /* superblock this applies to */
struct kqid dq_id; /* ID this applies to (uid, gid, projid) */ struct kqid dq_id; /* ID this applies to (uid, gid, projid) */
loff_t dq_off; /* Offset of dquot on disk */ loff_t dq_off; /* Offset of dquot on disk */
unsigned long dq_flags; /* See DQ_* */ unsigned long dq_flags; /* See DQ_* */
struct mem_dqblk dq_dqb; /* Diskquota usage */ struct mem_dqblk dq_dqb; /* Diskquota usage [dq_dqb_lock] */
}; };
/* Operations which must be implemented by each quota format */ /* Operations which must be implemented by each quota format */