Merge branch 'vfs.fallocate' into vfs.all

Signed-off-by: Christian Brauner <brauner@kernel.org>
This commit is contained in:
Christian Brauner 2024-08-28 19:08:47 +02:00
commit 70f023a608
No known key found for this signature in database
GPG Key ID: 91C61BC06578DCA2
7 changed files with 258 additions and 187 deletions

View File

@ -771,7 +771,7 @@ reexpand:
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
FALLOC_FL_ZERO_RANGE)
static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
@ -830,14 +830,6 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
len >> SECTOR_SHIFT, GFP_KERNEL,
BLKDEV_ZERO_NOFALLBACK);
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
if (error)
goto fail;
error = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
len >> SECTOR_SHIFT, GFP_KERNEL);
break;
default:
error = -EOPNOTSUPP;
}

View File

@ -252,40 +252,39 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (offset < 0 || len <= 0)
return -EINVAL;
/* Return error if mode is not supported */
if (mode & ~FALLOC_FL_SUPPORTED_MASK)
if (mode & ~(FALLOC_FL_MODE_MASK | FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
/* Punch hole and zero range are mutually exclusive */
if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
(FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
/*
* Modes are exclusive, even if that is not obvious from the encoding
* as bit masks and the mix with the flag in the same namespace.
*
* To make things even more complicated, FALLOC_FL_ALLOCATE_RANGE is
* encoded as no bit set.
*/
switch (mode & FALLOC_FL_MODE_MASK) {
case FALLOC_FL_ALLOCATE_RANGE:
case FALLOC_FL_UNSHARE_RANGE:
case FALLOC_FL_ZERO_RANGE:
break;
case FALLOC_FL_PUNCH_HOLE:
if (!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
break;
case FALLOC_FL_COLLAPSE_RANGE:
case FALLOC_FL_INSERT_RANGE:
if (mode & FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
break;
default:
return -EOPNOTSUPP;
/* Punch hole must have keep size set */
if ((mode & FALLOC_FL_PUNCH_HOLE) &&
!(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;
/* Collapse range should only be used exclusively. */
if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
(mode & ~FALLOC_FL_COLLAPSE_RANGE))
return -EINVAL;
/* Insert range should only be used exclusively. */
if ((mode & FALLOC_FL_INSERT_RANGE) &&
(mode & ~FALLOC_FL_INSERT_RANGE))
return -EINVAL;
/* Unshare range should only be used with allocate mode. */
if ((mode & FALLOC_FL_UNSHARE_RANGE) &&
(mode & ~(FALLOC_FL_UNSHARE_RANGE | FALLOC_FL_KEEP_SIZE)))
return -EINVAL;
}
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
/*
* We can only allow pure fallocate on append only files
* On append-only files only space preallocation is supported.
*/
if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
return -EPERM;

View File

@ -653,6 +653,9 @@ xfs_alloc_file_space(
xfs_bmbt_irec_t imaps[1], *imapp;
int error;
if (xfs_is_always_cow_inode(ip))
return 0;
trace_xfs_alloc_file_space(ip);
if (xfs_is_shutdown(mp))
@ -848,6 +851,14 @@ xfs_free_file_space(
if (len <= 0) /* if nothing being freed */
return 0;
/*
* Now AIO and DIO has drained we flush and (if necessary) invalidate
* the cached range over the first operation we are about to run.
*/
error = xfs_flush_unmap_range(ip, offset, len);
if (error)
return error;
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);

View File

@ -852,6 +852,192 @@ static inline bool xfs_file_sync_writes(struct file *filp)
return false;
}
static int
xfs_falloc_newsize(
struct file *file,
int mode,
loff_t offset,
loff_t len,
loff_t *new_size)
{
struct inode *inode = file_inode(file);
if ((mode & FALLOC_FL_KEEP_SIZE) || offset + len <= i_size_read(inode))
return 0;
*new_size = offset + len;
return inode_newsize_ok(inode, *new_size);
}
static int
xfs_falloc_setsize(
struct file *file,
loff_t new_size)
{
struct iattr iattr = {
.ia_valid = ATTR_SIZE,
.ia_size = new_size,
};
if (!new_size)
return 0;
return xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file),
&iattr);
}
static int
xfs_falloc_collapse_range(
struct file *file,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = i_size_read(inode) - len;
int error;
if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
return -EINVAL;
/*
* There is no need to overlap collapse range with EOF, in which case it
* is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode))
return -EINVAL;
error = xfs_collapse_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_insert_range(
struct file *file,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t isize = i_size_read(inode);
int error;
if (!xfs_is_falloc_aligned(XFS_I(inode), offset, len))
return -EINVAL;
/*
* New inode size must not exceed ->s_maxbytes, accounting for
* possible signed overflow.
*/
if (inode->i_sb->s_maxbytes - isize < len)
return -EFBIG;
/* Offset should be less than i_size */
if (offset >= isize)
return -EINVAL;
error = xfs_falloc_setsize(file, isize + len);
if (error)
return error;
/*
* Perform hole insertion now that the file size has been updated so
* that if we crash during the operation we don't leave shifted extents
* past EOF and hence losing access to the data that is contained within
* them.
*/
return xfs_insert_file_space(XFS_I(inode), offset, len);
}
/*
* Punch a hole and prealloc the range. We use a hole punch rather than
* unwritten extent conversion for two reasons:
*
* 1.) Hole punch handles partial block zeroing for us.
* 2.) If prealloc returns ENOSPC, the file range is still zero-valued by
* virtue of the hole punch.
*/
static int
xfs_falloc_zero_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
unsigned int blksize = i_blocksize(inode);
loff_t new_size = 0;
int error;
trace_xfs_zero_file_space(XFS_I(inode));
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_free_file_space(XFS_I(inode), offset, len);
if (error)
return error;
len = round_up(offset + len, blksize) - round_down(offset, blksize);
offset = round_down(offset, blksize);
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_unshare_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = 0;
int error;
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_reflink_unshare(XFS_I(inode), offset, len);
if (error)
return error;
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
static int
xfs_falloc_allocate_range(
struct file *file,
int mode,
loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
loff_t new_size = 0;
int error;
/*
* If always_cow mode we can't use preallocations and thus should not
* create them.
*/
if (xfs_is_always_cow_inode(XFS_I(inode)))
return -EOPNOTSUPP;
error = xfs_falloc_newsize(file, mode, offset, len, &new_size);
if (error)
return error;
error = xfs_alloc_file_space(XFS_I(inode), offset, len);
if (error)
return error;
return xfs_falloc_setsize(file, new_size);
}
#define XFS_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \
@ -868,8 +1054,6 @@ xfs_file_fallocate(
struct xfs_inode *ip = XFS_I(inode);
long error;
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
loff_t new_size = 0;
bool do_file_insert = false;
if (!S_ISREG(inode->i_mode))
return -EINVAL;
@ -890,156 +1074,35 @@ xfs_file_fallocate(
*/
inode_dio_wait(inode);
/*
* Now AIO and DIO has drained we flush and (if necessary) invalidate
* the cached range over the first operation we are about to run.
*
* We care about zero and collapse here because they both run a hole
* punch over the range first. Because that can zero data, and the range
* of invalidation for the shift operations is much larger, we still do
* the required flush for collapse in xfs_prepare_shift().
*
* Insert has the same range requirements as collapse, and we extend the
* file first which can zero data. Hence insert has the same
* flush/invalidate requirements as collapse and so they are both
* handled at the right time by xfs_prepare_shift().
*/
if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
FALLOC_FL_COLLAPSE_RANGE)) {
error = xfs_flush_unmap_range(ip, offset, len);
if (error)
goto out_unlock;
}
error = file_modified(file);
if (error)
goto out_unlock;
if (mode & FALLOC_FL_PUNCH_HOLE) {
switch (mode & FALLOC_FL_MODE_MASK) {
case FALLOC_FL_PUNCH_HOLE:
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
/*
* There is no need to overlap collapse range with EOF,
* in which case it is effectively a truncate operation
*/
if (offset + len >= i_size_read(inode)) {
error = -EINVAL;
goto out_unlock;
}
new_size = i_size_read(inode) - len;
error = xfs_collapse_file_space(ip, offset, len);
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
loff_t isize = i_size_read(inode);
if (!xfs_is_falloc_aligned(ip, offset, len)) {
error = -EINVAL;
goto out_unlock;
}
/*
* New inode size must not exceed ->s_maxbytes, accounting for
* possible signed overflow.
*/
if (inode->i_sb->s_maxbytes - isize < len) {
error = -EFBIG;
goto out_unlock;
}
new_size = isize + len;
/* Offset should be less than i_size */
if (offset >= isize) {
error = -EINVAL;
goto out_unlock;
}
do_file_insert = true;
} else {
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
offset + len > i_size_read(inode)) {
new_size = offset + len;
error = inode_newsize_ok(inode, new_size);
if (error)
goto out_unlock;
}
if (mode & FALLOC_FL_ZERO_RANGE) {
/*
* Punch a hole and prealloc the range. We use a hole
* punch rather than unwritten extent conversion for two
* reasons:
*
* 1.) Hole punch handles partial block zeroing for us.
* 2.) If prealloc returns ENOSPC, the file range is
* still zero-valued by virtue of the hole punch.
*/
unsigned int blksize = i_blocksize(inode);
trace_xfs_zero_file_space(ip);
error = xfs_free_file_space(ip, offset, len);
if (error)
goto out_unlock;
len = round_up(offset + len, blksize) -
round_down(offset, blksize);
offset = round_down(offset, blksize);
} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
error = xfs_reflink_unshare(ip, offset, len);
if (error)
goto out_unlock;
} else {
/*
* If always_cow mode we can't use preallocations and
* thus should not create them.
*/
if (xfs_is_always_cow_inode(ip)) {
error = -EOPNOTSUPP;
goto out_unlock;
}
}
if (!xfs_is_always_cow_inode(ip)) {
error = xfs_alloc_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
break;
case FALLOC_FL_COLLAPSE_RANGE:
error = xfs_falloc_collapse_range(file, offset, len);
break;
case FALLOC_FL_INSERT_RANGE:
error = xfs_falloc_insert_range(file, offset, len);
break;
case FALLOC_FL_ZERO_RANGE:
error = xfs_falloc_zero_range(file, mode, offset, len);
break;
case FALLOC_FL_UNSHARE_RANGE:
error = xfs_falloc_unshare_range(file, mode, offset, len);
break;
case FALLOC_FL_ALLOCATE_RANGE:
error = xfs_falloc_allocate_range(file, mode, offset, len);
break;
default:
error = -EOPNOTSUPP;
break;
}
/* Change file size if needed */
if (new_size) {
struct iattr iattr;
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
error = xfs_vn_setattr_size(file_mnt_idmap(file),
file_dentry(file), &iattr);
if (error)
goto out_unlock;
}
/*
* Perform hole insertion now that the file size has been
* updated so that if we crash during the operation we don't
* leave shifted extents past EOF and hence losing access to
* the data that is contained within them.
*/
if (do_file_insert) {
error = xfs_insert_file_space(ip, offset, len);
if (error)
goto out_unlock;
}
if (xfs_file_sync_writes(file))
if (!error && xfs_file_sync_writes(file))
error = xfs_log_force_inode(ip);
out_unlock:

View File

@ -25,12 +25,18 @@ struct space_resv {
#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
#define FALLOC_FL_SUPPORTED_MASK (FALLOC_FL_KEEP_SIZE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
/*
* Mask of all supported fallocate modes. Only one can be set at a time.
*
* In addition to the mode bit, the mode argument can also encode flags.
* FALLOC_FL_KEEP_SIZE is the only supported flag so far.
*/
#define FALLOC_FL_MODE_MASK (FALLOC_FL_ALLOCATE_RANGE | \
FALLOC_FL_PUNCH_HOLE | \
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
FALLOC_FL_UNSHARE_RANGE)
/* on ia32 l_start is on a 32-bit boundary */
#if defined(CONFIG_X86_64)

View File

@ -91,7 +91,6 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
#define show_falloc_mode(mode) __print_flags(mode, "|", \
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
{ FALLOC_FL_NO_HIDE_STALE, "NO_HIDE_STALE"}, \
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})

View File

@ -2,6 +2,7 @@
#ifndef _UAPI_FALLOC_H_
#define _UAPI_FALLOC_H_
#define FALLOC_FL_ALLOCATE_RANGE 0x00 /* allocate range */
#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */