NFS: Fix unstable write completion

We do want to respect the FLUSH_SYNC argument to nfs_commit_inode() to
ensure that all outstanding COMMIT requests to the inode in question are
complete. Currently we may exit early from both nfs_commit_inode() and
nfs_write_inode() even if there are COMMIT requests in flight, or unstable
writes on the commit list.

In order to get the right semantics w.r.t. sync_inode(), we don't need
to have nfs_commit_inode() reset the inode dirty flags when called from
nfs_wb_page() and/or nfs_wb_all(). We just need to ensure that
nfs_write_inode() leaves them in the right state if there are outstanding
commits, or stable pages.

Reported-by: Scott Mayhew <smayhew@redhat.com>
Fixes: dc4fd9ab01 ("nfs: don't wait on commit in nfs_commit_inode()...")
Cc: stable@vger.kernel.org # v4.14+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
This commit is contained in:
Trond Myklebust 2018-03-07 15:22:31 -05:00
parent 9c6376ebdd
commit c4f24df942

View File

@ -1876,40 +1876,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
return status;
}
int nfs_commit_inode(struct inode *inode, int how)
static int __nfs_commit_inode(struct inode *inode, int how,
struct writeback_control *wbc)
{
LIST_HEAD(head);
struct nfs_commit_info cinfo;
int may_wait = how & FLUSH_SYNC;
int error = 0;
int res;
int ret, nscan;
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_commit_begin(cinfo.mds);
res = nfs_scan_commit(inode, &head, &cinfo);
if (res)
error = nfs_generic_commit_list(inode, &head, how, &cinfo);
for (;;) {
ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
if (ret <= 0)
break;
ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
if (ret < 0)
break;
ret = 0;
if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
if (nscan < wbc->nr_to_write)
wbc->nr_to_write -= nscan;
else
wbc->nr_to_write = 0;
}
if (nscan < INT_MAX)
break;
cond_resched();
}
nfs_commit_end(cinfo.mds);
if (res == 0)
return res;
if (error < 0)
goto out_error;
if (!may_wait)
goto out_mark_dirty;
error = wait_on_commit(cinfo.mds);
if (error < 0)
return error;
return res;
out_error:
res = error;
/* Note: If we exit without ensuring that the commit is complete,
* we must mark the inode as dirty. Otherwise, future calls to
* sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
* that the data is on the disk.
*/
out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return res;
if (ret || !may_wait)
return ret;
return wait_on_commit(cinfo.mds);
}
int nfs_commit_inode(struct inode *inode, int how)
{
return __nfs_commit_inode(inode, how, NULL);
}
EXPORT_SYMBOL_GPL(nfs_commit_inode);
@ -1919,11 +1922,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
int flags = FLUSH_SYNC;
int ret = 0;
/* no commits means nothing needs to be done */
if (!atomic_long_read(&nfsi->commit_info.ncommit))
return ret;
if (wbc->sync_mode == WB_SYNC_NONE) {
/* no commits means nothing needs to be done */
if (!atomic_long_read(&nfsi->commit_info.ncommit))
goto check_requests_outstanding;
/* Don't commit yet if this is a non-blocking flush and there
* are a lot of outstanding writes for this mapping.
*/
@ -1934,16 +1937,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
flags = 0;
}
ret = nfs_commit_inode(inode, flags);
if (ret >= 0) {
if (wbc->sync_mode == WB_SYNC_NONE) {
if (ret < wbc->nr_to_write)
wbc->nr_to_write -= ret;
else
wbc->nr_to_write = 0;
}
return 0;
}
ret = __nfs_commit_inode(inode, flags, wbc);
if (!ret) {
if (flags & FLUSH_SYNC)
return 0;
} else if (atomic_long_read(&nfsi->commit_info.ncommit))
goto out_mark_dirty;
check_requests_outstanding:
if (!atomic_read(&nfsi->commit_info.rpcs_out))
return ret;
out_mark_dirty:
__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
return ret;