diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a59443db1de9..c02a47453137 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1234,15 +1234,19 @@ xfs_vm_releasepage( } /* - * When we map a DIO buffer, we need to attach an ioend that describes the type - * of write IO we are doing. This passes to the completion function the - * operations it needs to perform. + * When we map a DIO buffer, we may need to attach an ioend that describes the + * type of write IO we are doing. This passes to the completion function the + * operations it needs to perform. If the mapping is for an overwrite wholly + * within the EOF then we don't need an ioend and so we don't allocate one. + * This avoids the unnecessary overhead of allocating and freeing ioends for + * workloads that don't require transactions on IO completion. * * If we get multiple mappings in a single IO, we might be mapping different * types. But because the direct IO can only have a single private pointer, we * need to ensure that: * - * a) the ioend spans the entire region of the IO; and + * a) i) the ioend spans the entire region of unwritten mappings; or + * ii) the ioend spans all the mappings that cross or are beyond EOF; and * b) if it contains unwritten extents, it is *permanently* marked as such * * We could do this by chaining ioends like buffered IO does, but we only @@ -1283,21 +1287,23 @@ xfs_map_direct( trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, ioend->io_size, ioend->io_type, imap); - } else { + } else if (type == XFS_IO_UNWRITTEN || + offset + size > i_size_read(inode)) { ioend = xfs_alloc_ioend(inode, type); ioend->io_offset = offset; ioend->io_size = size; + bh_result->b_private = ioend; + set_buffer_defer_completion(bh_result); trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, imap); + } else { + trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, + imap); } - - if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) - set_buffer_defer_completion(bh_result); } - /* * If this is O_DIRECT or the mpage code calling tell them how large the mapping * is, so that we can avoid repeated get_blocks calls. @@ -1519,9 +1525,11 @@ xfs_get_blocks_direct( /* * Complete a direct I/O write request. * - * If the private argument is non-NULL __xfs_get_blocks signals us that we - * need to issue a transaction to convert the range from unwritten to written - * extents. + * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. + * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite + * wholly within the EOF and so there is nothing for us to do. Note that in this + * case the completion can be called in interrupt context, whereas if we have an + * ioend we will always be called in task context (i.e. from a workqueue). */ STATIC void xfs_end_io_direct_write( @@ -1535,7 +1543,13 @@ xfs_end_io_direct_write( struct xfs_mount *mp = ip->i_mount; struct xfs_ioend *ioend = private; - trace_xfs_gbmap_direct_endio(ip, offset, size, ioend->io_type, NULL); + trace_xfs_gbmap_direct_endio(ip, offset, size, + ioend ? ioend->io_type : 0, NULL); + + if (!ioend) { + ASSERT(offset + size <= i_size_read(inode)); + return; + } if (XFS_FORCED_SHUTDOWN(mp)) goto out_end_io; @@ -1548,12 +1562,12 @@ xfs_end_io_direct_write( /* * The ioend only maps whole blocks, while the IO may be sector aligned. - * Hence the ioend offset/size may not match the IO offset/size exactly, - * but should span it completely. Write the IO sizes into the ioend so - * that completion processing does the right thing. + * Hence the ioend offset/size may not match the IO offset/size exactly. + * Because we don't map overwrites within EOF into the ioend, the offset + * may not match, but only if the endio spans EOF. Either way, write + * the IO sizes into the ioend so that completion processing does the + * right thing. */ - ASSERT(size <= ioend->io_size); - ASSERT(offset >= ioend->io_offset); ASSERT(offset + size <= ioend->io_offset + ioend->io_size); ioend->io_size = size; ioend->io_offset = offset; @@ -1562,20 +1576,15 @@ xfs_end_io_direct_write( * The ioend tells us whether we are doing unwritten extent conversion * or an append transaction that updates the on-disk file size. These * cases are the only cases where we should *potentially* be needing - * to update the VFS inode size. When the ioend indicates this, we - * are *guaranteed* to be running in non-interrupt context. + * to update the VFS inode size. * * We need to update the in-core inode size here so that we don't end up - * with the on-disk inode size being outside the in-core inode size. - * While we can do this in the process context after the IO has - * completed, this does not work for AIO and hence we always update - * the in-core inode size here if necessary. + * with the on-disk inode size being outside the in-core inode size. We + * have no other method of updating EOF for AIO, so always do it here + * if necessary. */ - if (ioend->io_type == XFS_IO_UNWRITTEN || xfs_ioend_is_append(ioend)) { - if (offset + size > i_size_read(inode)) - i_size_write(inode, offset + size); - } else - ASSERT(offset + size <= i_size_read(inode)); + if (offset + size > i_size_read(inode)) + i_size_write(inode, offset + size); /* * If we are doing an append IO that needs to update the EOF on disk, @@ -1584,7 +1593,7 @@ xfs_end_io_direct_write( * result in the ioend processing passing on the error if it is * possible as we can't return it from here. */ - if (ioend->io_type == XFS_IO_OVERWRITE && xfs_ioend_is_append(ioend)) + if (ioend->io_type == XFS_IO_OVERWRITE) ioend->io_error = xfs_setfilesize_trans_alloc(ioend); out_end_io: diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 0ae50e9847bb..4e0a5773eee4 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1220,6 +1220,7 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); DEFINE_IOMAP_EVENT(xfs_gbmap_direct); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); +DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none); DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); DECLARE_EVENT_CLASS(xfs_simple_io_class,