xfs: add missing ilock around dio write last extent alignment

The iomap codepath (via get_blocks()) acquires and release the inode
lock in the case of a direct write that requires block allocation. This
is because xfs_iomap_write_direct() allocates a transaction, which means
the ilock must be dropped and reacquired after the transaction is
allocated and reserved.

xfs_iomap_write_direct() invokes xfs_iomap_eof_align_last_fsb() before
the transaction is created and thus before the ilock is reacquired. This
can lead to calls to xfs_iread_extents() and reads of the in-core extent
list without any synchronization (via xfs_bmap_eof() and
xfs_bmap_last_extent()). xfs_iread_extents() assert fails if the ilock
is not held, but this is not currently seen in practice as the current
callers had already invoked xfs_bmapi_read().

What has been seen in practice are reports of crashes down in the
xfs_bmap_eof() codepath on direct writes due to seemingly bogus pointer
references from xfs_iext_get_ext(). While an explicit reproducer is not
currently available to confirm the cause of the problem, crash analysis
and code inspection from David Jeffrey had identified the insufficient
locking.

xfs_iomap_eof_align_last_fsb() is called from other contexts with the
inode lock already held, so we cannot acquire it therein.
__xfs_get_blocks() acquires and drops the ilock with variable flags to
cover the event that the extent list must be read in. The common case is
that __xfs_get_blocks() acquires the shared ilock. To provide locking
around the last extent alignment call without adding more lock cycles to
the dio path, update xfs_iomap_write_direct() to expect the shared ilock
held on entry and do the extent alignment under its protection. Demote
the lock, if necessary, from __xfs_get_blocks() and push the
xfs_qm_dqattach() call outside of the shared lock critical section.
Also, add an assert to document that the extent list is always expected
to be present in this path. Otherwise, we risk a call to
xfs_iread_extents() while under the shared ilock. This is safe as all
current callers have executed an xfs_bmapi_read() call under the current
iolock context.

Reported-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
This commit is contained in:
Brian Foster 2015-10-12 15:34:20 +11:00 committed by Dave Chinner
parent 5cb13dcd0f
commit 009c6e871e
3 changed files with 36 additions and 12 deletions

View File

@ -1408,12 +1408,12 @@ __xfs_get_blocks(
imap.br_startblock == DELAYSTARTBLOCK))) {
if (direct || xfs_get_extsz_hint(ip)) {
/*
* Drop the ilock in preparation for starting the block
* allocation transaction. It will be retaken
* exclusively inside xfs_iomap_write_direct for the
* actual allocation.
* xfs_iomap_write_direct() expects the shared lock. It
* is unlocked on return.
*/
xfs_iunlock(ip, lockmode);
if (lockmode == XFS_ILOCK_EXCL)
xfs_ilock_demote(ip, lockmode);
error = xfs_iomap_write_direct(ip, offset, size,
&imap, nimaps);
if (error)

View File

@ -131,20 +131,29 @@ xfs_iomap_write_direct(
uint qblocks, resblks, resrtextents;
int committed;
int error;
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
int lockmode;
rt = XFS_IS_REALTIME_INODE(ip);
extsz = xfs_get_extsz_hint(ip);
lockmode = XFS_ILOCK_SHARED; /* locked by caller */
ASSERT(xfs_isilocked(ip, lockmode));
offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
if ((offset + count) > XFS_ISIZE(ip)) {
/*
* Assert that the in-core extent list is present since this can
* call xfs_iread_extents() and we only have the ilock shared.
* This should be safe because the lock was held around a bmapi
* call in the caller and we only need it to access the in-core
* list.
*/
ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
XFS_IFEXTENTS);
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
return error;
goto out_unlock;
} else {
if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@ -173,6 +182,15 @@ xfs_iomap_write_direct(
quota_flag = XFS_QMOPT_RES_REGBLKS;
}
/*
* Drop the shared lock acquired by the caller, attach the dquot if
* necessary and move on to transaction setup.
*/
xfs_iunlock(ip, lockmode);
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
/*
* Allocate and setup the transaction
*/
@ -187,7 +205,8 @@ xfs_iomap_write_direct(
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockmode);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
@ -229,7 +248,7 @@ xfs_iomap_write_direct(
error = xfs_alert_fsblock_zero(ip, imap);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_iunlock(ip, lockmode);
return error;
out_bmap_cancel:

View File

@ -181,6 +181,11 @@ xfs_fs_map_blocks(
ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
/*
* xfs_iomap_write_direct() expects to take ownership of
* the shared ilock.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
error = xfs_iomap_write_direct(ip, offset, length,
&imap, nimaps);
if (error)