mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-12 05:24:12 +08:00
1319ebefd6
The inode log item is kind of special in that it can be aggregating new changes in memory at the same time time existing changes are being written back to disk. This means there are fields in the log item that are accessed concurrently from contexts that don't share any locking at all. e.g. updating ili_last_fields occurs at flush time under the ILOCK_EXCL and flush lock at flush time, under the flush lock at IO completion time, and is read under the ILOCK_EXCL when the inode is logged. Hence there is no actual serialisation between reading the field during logging of the inode in transactions vs clearing the field in IO completion. We currently get away with this by the fact that we are only clearing fields in IO completion, and nothing bad happens if we accidentally log more of the inode than we actually modify. Worst case is we consume a tiny bit more memory and log bandwidth. However, if we want to do more complex state manipulations on the log item that requires updates at all three of these potential locations, we need to have some mechanism of serialising those operations. To do this, introduce a spinlock into the log item to serialise internal state. This could be done via the xfs_inode i_flags_lock, but this then leads to potential lock inversion issues where inode flag updates need to occur inside locks that best nest inside the inode log item locks (e.g. marking inodes stale during inode cluster freeing). Using a separate spinlock avoids these sorts of problems and simplifies future code. This does not touch the use of ili_fields in the item formatting code - that is entirely protected by the ILOCK_EXCL at this point in time, so it remains untouched. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
157 lines
4.2 KiB
C
157 lines
4.2 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (c) 2000,2005 Silicon Graphics, Inc.
|
|
* All Rights Reserved.
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_trans_priv.h"
|
|
#include "xfs_inode_item.h"
|
|
|
|
#include <linux/iversion.h>
|
|
|
|
/*
|
|
* Add a locked inode to the transaction.
|
|
*
|
|
* The inode must be locked, and it cannot be associated with any transaction.
|
|
* If lock_flags is non-zero the inode will be unlocked on transaction commit.
|
|
*/
|
|
void
|
|
xfs_trans_ijoin(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip,
|
|
uint lock_flags)
|
|
{
|
|
struct xfs_inode_log_item *iip;
|
|
|
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
|
if (ip->i_itemp == NULL)
|
|
xfs_inode_item_init(ip, ip->i_mount);
|
|
iip = ip->i_itemp;
|
|
|
|
ASSERT(iip->ili_lock_flags == 0);
|
|
iip->ili_lock_flags = lock_flags;
|
|
ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
|
|
|
|
/*
|
|
* Get a log_item_desc to point at the new item.
|
|
*/
|
|
xfs_trans_add_item(tp, &iip->ili_item);
|
|
}
|
|
|
|
/*
|
|
* Transactional inode timestamp update. Requires the inode to be locked and
|
|
* joined to the transaction supplied. Relies on the transaction subsystem to
|
|
* track dirty state and update/writeback the inode accordingly.
|
|
*/
|
|
void
|
|
xfs_trans_ichgtime(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip,
|
|
int flags)
|
|
{
|
|
struct inode *inode = VFS_I(ip);
|
|
struct timespec64 tv;
|
|
|
|
ASSERT(tp);
|
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
|
|
|
tv = current_time(inode);
|
|
|
|
if (flags & XFS_ICHGTIME_MOD)
|
|
inode->i_mtime = tv;
|
|
if (flags & XFS_ICHGTIME_CHG)
|
|
inode->i_ctime = tv;
|
|
if (flags & XFS_ICHGTIME_CREATE)
|
|
ip->i_d.di_crtime = tv;
|
|
}
|
|
|
|
/*
|
|
* This is called to mark the fields indicated in fieldmask as needing
|
|
* to be logged when the transaction is committed. The inode must
|
|
* already be associated with the given transaction.
|
|
*
|
|
* The values for fieldmask are defined in xfs_inode_item.h. We always
|
|
* log all of the core inode if any of it has changed, and we always log
|
|
* all of the inline data/extents/b-tree root if any of them has changed.
|
|
*/
|
|
void
|
|
xfs_trans_log_inode(
|
|
struct xfs_trans *tp,
|
|
struct xfs_inode *ip,
|
|
uint flags)
|
|
{
|
|
struct xfs_inode_log_item *iip = ip->i_itemp;
|
|
struct inode *inode = VFS_I(ip);
|
|
uint iversion_flags = 0;
|
|
|
|
ASSERT(iip);
|
|
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
|
|
ASSERT(!xfs_iflags_test(ip, XFS_ISTALE));
|
|
|
|
tp->t_flags |= XFS_TRANS_DIRTY;
|
|
|
|
/*
|
|
* Don't bother with i_lock for the I_DIRTY_TIME check here, as races
|
|
* don't matter - we either will need an extra transaction in 24 hours
|
|
* to log the timestamps, or will clear already cleared fields in the
|
|
* worst case.
|
|
*/
|
|
if (inode->i_state & (I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED)) {
|
|
spin_lock(&inode->i_lock);
|
|
inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
|
|
spin_unlock(&inode->i_lock);
|
|
}
|
|
|
|
/*
|
|
* First time we log the inode in a transaction, bump the inode change
|
|
* counter if it is configured for this to occur. While we have the
|
|
* inode locked exclusively for metadata modification, we can usually
|
|
* avoid setting XFS_ILOG_CORE if no one has queried the value since
|
|
* the last time it was incremented. If we have XFS_ILOG_CORE already
|
|
* set however, then go ahead and bump the i_version counter
|
|
* unconditionally.
|
|
*/
|
|
if (!test_and_set_bit(XFS_LI_DIRTY, &iip->ili_item.li_flags)) {
|
|
if (IS_I_VERSION(inode) &&
|
|
inode_maybe_inc_iversion(inode, flags & XFS_ILOG_CORE))
|
|
iversion_flags = XFS_ILOG_CORE;
|
|
}
|
|
|
|
/*
|
|
* Record the specific change for fdatasync optimisation. This allows
|
|
* fdatasync to skip log forces for inodes that are only timestamp
|
|
* dirty.
|
|
*/
|
|
spin_lock(&iip->ili_lock);
|
|
iip->ili_fsync_fields |= flags;
|
|
|
|
/*
|
|
* Always OR in the bits from the ili_last_fields field. This is to
|
|
* coordinate with the xfs_iflush() and xfs_iflush_done() routines in
|
|
* the eventual clearing of the ili_fields bits. See the big comment in
|
|
* xfs_iflush() for an explanation of this coordination mechanism.
|
|
*/
|
|
iip->ili_fields |= (flags | iip->ili_last_fields | iversion_flags);
|
|
spin_unlock(&iip->ili_lock);
|
|
}
|
|
|
|
int
|
|
xfs_trans_roll_inode(
|
|
struct xfs_trans **tpp,
|
|
struct xfs_inode *ip)
|
|
{
|
|
int error;
|
|
|
|
xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
|
|
error = xfs_trans_roll(tpp);
|
|
if (!error)
|
|
xfs_trans_ijoin(*tpp, ip, 0);
|
|
return error;
|
|
}
|