From 30460e1ea3e62f8457e087db9a309ed1031630da Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:41 +0100 Subject: [PATCH 1/6] fs: Enable bmap() function to properly return errors By now, bmap() will either return the physical block number related to the requested file offset or 0 in case of error or the requested offset maps into a hole. This patch makes the needed changes to enable bmap() to proper return errors, using the return value as an error return, and now, a pointer must be passed to bmap() to be filled with the mapped physical block. It will change the behavior of bmap() on return: - negative value in case of error - zero on success or map fell into a hole In case of a hole, the *block will be zero too Since this is a prep patch, by now, the only error return is -EINVAL if ->bmap doesn't exist. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- drivers/md/md-bitmap.c | 16 ++++++++++------ fs/f2fs/data.c | 16 +++++++++++----- fs/inode.c | 30 ++++++++++++++++++------------ fs/jbd2/journal.c | 22 +++++++++++++++------- include/linux/fs.h | 9 ++++++++- mm/page_io.c | 11 +++++++---- 6 files changed, 69 insertions(+), 35 deletions(-) diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 3ad18246fcb3..92d3b515252d 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -364,7 +364,7 @@ static int read_page(struct file *file, unsigned long index, int ret = 0; struct inode *inode = file_inode(file); struct buffer_head *bh; - sector_t block; + sector_t block, blk_cur; pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT); @@ -375,17 +375,21 @@ static int read_page(struct file *file, unsigned long index, goto out; } attach_page_buffers(page, bh); - block = index << (PAGE_SHIFT - inode->i_blkbits); + blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); while (bh) { + block = blk_cur; + if (count == 0) bh->b_blocknr = 0; else { - bh->b_blocknr = bmap(inode, block); - if (bh->b_blocknr == 0) { - /* Cannot use this file! */ + ret = bmap(inode, &block); + if (ret || !block) { ret = -EINVAL; + bh->b_blocknr = 0; goto out; } + + bh->b_blocknr = block; bh->b_bdev = inode->i_sb->s_bdev; if (count < (1<i_blkbits)) count = 0; @@ -399,7 +403,7 @@ static int read_page(struct file *file, unsigned long index, set_buffer_mapped(bh); submit_bh(REQ_OP_READ, 0, bh); } - block++; + blk_cur++; bh = bh->b_this_page; } page->index = index; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a034cd0ce021..e948902c4ec5 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3170,12 +3170,16 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) while ((probe_block + blocks_per_page) <= last_block && page_no < max) { unsigned block_in_page; sector_t first_block; + sector_t block = 0; + int err = 0; cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + block = probe_block; + err = bmap(inode, &block); + if (err || !block) goto bad_bmap; + first_block = block; /* * It must be PAGE_SIZE aligned on-disk @@ -3187,11 +3191,13 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) for (block_in_page = 1; block_in_page < blocks_per_page; block_in_page++) { - sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + err = bmap(inode, &block); + + if (err || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; diff --git a/fs/inode.c b/fs/inode.c index 96d62d97694e..5dacfa1fb129 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1598,25 +1598,31 @@ retry: } EXPORT_SYMBOL(iput); +#ifdef CONFIG_BLOCK /** * bmap - find a block number in a file - * @inode: inode of file - * @block: block to find + * @inode: inode owning the block number being requested + * @block: pointer containing the block to find * - * Returns the block number on the device holding the inode that - * is the disk block number for the block of the file requested. - * That is, asked for block 4 of inode 1 the function will return the - * disk block relative to the disk start that holds that block of the - * file. + * Replaces the value in *block with the block number on the device holding + * corresponding to the requested block number in the file. + * That is, asked for block 4 of inode 1 the function will replace the + * 4 in *block, with disk block relative to the disk start that holds that + * block of the file. + * + * Returns -EINVAL in case of error, 0 otherwise. If mapping falls into a + * hole, returns 0 and *block is also set to 0. */ -sector_t bmap(struct inode *inode, sector_t block) +int bmap(struct inode *inode, sector_t *block) { - sector_t res = 0; - if (inode->i_mapping->a_ops->bmap) - res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block); - return res; + if (!inode->i_mapping->a_ops->bmap) + return -EINVAL; + + *block = inode->i_mapping->a_ops->bmap(inode->i_mapping, *block); + return 0; } EXPORT_SYMBOL(bmap); +#endif /* * With relative atime, only update atime if the previous atime is diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5e408ee24a1a..01fa5d247e39 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -795,18 +795,23 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, { int err = 0; unsigned long long ret; + sector_t block = 0; if (journal->j_inode) { - ret = bmap(journal->j_inode, blocknr); - if (ret) - *retp = ret; - else { + block = blocknr; + ret = bmap(journal->j_inode, &block); + + if (ret || !block) { printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", __func__, blocknr, journal->j_devname); err = -EIO; __journal_abort_soft(journal, err); + + } else { + *retp = block; } + } else { *retp = blocknr; /* +journal->j_blk_offset */ } @@ -1244,11 +1249,14 @@ journal_t *jbd2_journal_init_dev(struct block_device *bdev, journal_t *jbd2_journal_init_inode(struct inode *inode) { journal_t *journal; + sector_t blocknr; char *p; - unsigned long long blocknr; + int err = 0; - blocknr = bmap(inode, 0); - if (!blocknr) { + blocknr = 0; + err = bmap(inode, &blocknr); + + if (err || !blocknr) { pr_err("%s: Cannot locate journal superblock\n", __func__); return NULL; diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..f762e07ea589 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2865,9 +2865,16 @@ static inline ssize_t generic_write_sync(struct kiocb *iocb, ssize_t count) extern void emergency_sync(void); extern void emergency_remount(void); + #ifdef CONFIG_BLOCK -extern sector_t bmap(struct inode *, sector_t); +extern int bmap(struct inode *inode, sector_t *block); +#else +static inline int bmap(struct inode *inode, sector_t *block) +{ + return -EINVAL; +} #endif + extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); diff --git a/mm/page_io.c b/mm/page_io.c index 3a198deb8bb1..76965be1d40e 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -177,8 +177,9 @@ int generic_swapfile_activate(struct swap_info_struct *sis, cond_resched(); - first_block = bmap(inode, probe_block); - if (first_block == 0) + first_block = probe_block; + ret = bmap(inode, &first_block); + if (ret || !first_block) goto bad_bmap; /* @@ -193,9 +194,11 @@ int generic_swapfile_activate(struct swap_info_struct *sis, block_in_page++) { sector_t block; - block = bmap(inode, probe_block + block_in_page); - if (block == 0) + block = probe_block + block_in_page; + ret = bmap(inode, &block); + if (ret || !block) goto bad_bmap; + if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; From 10d83e11a582631a14579a087a55608075c6f539 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:42 +0100 Subject: [PATCH 2/6] cachefiles: drop direct usage of ->bmap method. Replace the direct usage of ->bmap method by a bmap() call. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/cachefiles/rdwr.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 44a3ce1e4ce4..1dc97f2d6201 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -396,7 +396,7 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, struct cachefiles_object *object; struct cachefiles_cache *cache; struct inode *inode; - sector_t block0, block; + sector_t block; unsigned shift; int ret; @@ -412,7 +412,6 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ @@ -428,12 +427,14 @@ int cachefiles_read_or_alloc_page(struct fscache_retrieval *op, * enough for this as it doesn't indicate errors, but it's all we've * got for the moment */ - block0 = page->index; - block0 <<= shift; + block = page->index; + block <<= shift; + + ret = bmap(inode, &block); + ASSERT(ret < 0); - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, block0); _debug("%llx -> %llx", - (unsigned long long) block0, + (unsigned long long) (page->index << shift), (unsigned long long) block); if (block) { @@ -711,7 +712,6 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, inode = d_backing_inode(object->backer); ASSERT(S_ISREG(inode->i_mode)); - ASSERT(inode->i_mapping->a_ops->bmap); ASSERT(inode->i_mapping->a_ops->readpages); /* calculate the shift required to use bmap */ @@ -728,7 +728,7 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, ret = space ? -ENODATA : -ENOBUFS; list_for_each_entry_safe(page, _n, pages, lru) { - sector_t block0, block; + sector_t block; /* we assume the absence or presence of the first block is a * good enough indication for the page as a whole @@ -736,13 +736,14 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, * good enough for this as it doesn't indicate errors, but * it's all we've got for the moment */ - block0 = page->index; - block0 <<= shift; + block = page->index; + block <<= shift; + + ret = bmap(inode, &block); + ASSERT(!ret); - block = inode->i_mapping->a_ops->bmap(inode->i_mapping, - block0); _debug("%llx -> %llx", - (unsigned long long) block0, + (unsigned long long) (page->index << shift), (unsigned long long) block); if (block) { From 569d2056def7add85c0a9add03c19017e5525012 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:43 +0100 Subject: [PATCH 3/6] ecryptfs: drop direct calls to ->bmap Replace direct ->bmap calls by bmap() method. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/ecryptfs/mmap.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index cffa0c1ec829..019572c6b39a 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -524,16 +524,12 @@ out: static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) { - int rc = 0; - struct inode *inode; - struct inode *lower_inode; + struct inode *lower_inode = ecryptfs_inode_to_lower(mapping->host); + int ret = bmap(lower_inode, &block); - inode = (struct inode *)mapping->host; - lower_inode = ecryptfs_inode_to_lower(inode); - if (lower_inode->i_mapping->a_ops->bmap) - rc = lower_inode->i_mapping->a_ops->bmap(lower_inode->i_mapping, - block); - return rc; + if (ret) + return 0; + return block; } const struct address_space_operations ecryptfs_aops = { From 0d89fdae2afe833dd3025edd8c8287675d45c74e Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:44 +0100 Subject: [PATCH 4/6] fibmap: Use bmap instead of ->bmap method in ioctl_fibmap Now we have the possibility of proper error return in bmap, use bmap() function in ioctl_fibmap() instead of calling ->bmap method directly. Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/ioctl.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/fs/ioctl.c b/fs/ioctl.c index 2f5e4e5b97e1..13327862f278 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -54,19 +54,29 @@ EXPORT_SYMBOL(vfs_ioctl); static int ioctl_fibmap(struct file *filp, int __user *p) { - struct address_space *mapping = filp->f_mapping; - int res, block; + struct inode *inode = file_inode(filp); + int error, ur_block; + sector_t block; - /* do we support this mess? */ - if (!mapping->a_ops->bmap) - return -EINVAL; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - res = get_user(block, p); - if (res) - return res; - res = mapping->a_ops->bmap(mapping, block); - return put_user(res, p); + + error = get_user(ur_block, p); + if (error) + return error; + + block = ur_block; + error = bmap(inode, &block); + + if (error) + ur_block = 0; + else + ur_block = block; + + if (put_user(ur_block, p)) + error = -EFAULT; + + return error; } /** From 324282c0252a44a97d628813e30ea7258940d469 Mon Sep 17 00:00:00 2001 From: Carlos Maiolino Date: Thu, 9 Jan 2020 14:30:45 +0100 Subject: [PATCH 5/6] fibmap: Reject negative block numbers FIBMAP receives an integer from userspace which is then implicitly converted into sector_t to be passed to bmap(). No check is made to ensure userspace didn't send a negative block number, which can end up in an underflow, and returning to userspace a corrupted block address. As a side-effect, the underflow caused by a negative block here, will trigger the WARN() in iomap_bmap_actor(), which is how this issue was first discovered. Reviewed-by: Christoph Hellwig Signed-off-by: Carlos Maiolino Signed-off-by: Al Viro --- fs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ioctl.c b/fs/ioctl.c index 13327862f278..0be9bee9ff8f 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -65,6 +65,9 @@ static int ioctl_fibmap(struct file *filp, int __user *p) if (error) return error; + if (ur_block < 0) + return -EINVAL; + block = ur_block; error = bmap(inode, &block); From 12efec5602744c5a185049eb4fcfd9aebe01bd6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Jan 2020 19:49:57 -0500 Subject: [PATCH 6/6] saner copy_mount_options() don't bother with the byte-by-byte loops, etc. Signed-off-by: Al Viro --- fs/namespace.c | 49 +++++++------------------------------------------ 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 5e1bf611a9eb..85b5f7bea82e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2979,39 +2979,10 @@ static void shrink_submounts(struct mount *mnt) } } -/* - * Some copy_from_user() implementations do not return the exact number of - * bytes remaining to copy on a fault. But copy_mount_options() requires that. - * Note that this function differs from copy_from_user() in that it will oops - * on bad values of `to', rather than returning a short copy. - */ -static long exact_copy_from_user(void *to, const void __user * from, - unsigned long n) -{ - char *t = to; - const char __user *f = from; - char c; - - if (!access_ok(from, n)) - return n; - - while (n) { - if (__get_user(c, f)) { - memset(t, 0, n); - break; - } - *t++ = c; - f++; - n--; - } - return n; -} - void *copy_mount_options(const void __user * data) { - int i; - unsigned long size; char *copy; + unsigned size; if (!data) return NULL; @@ -3020,22 +2991,16 @@ void *copy_mount_options(const void __user * data) if (!copy) return ERR_PTR(-ENOMEM); - /* We only care that *some* data at the address the user - * gave us is valid. Just in case, we'll zero - * the remainder of the page. - */ - /* copy_from_user cannot cross TASK_SIZE ! */ - size = TASK_SIZE - (unsigned long)untagged_addr(data); - if (size > PAGE_SIZE) - size = PAGE_SIZE; + size = PAGE_SIZE - offset_in_page(data); - i = size - exact_copy_from_user(copy, data, size); - if (!i) { + if (copy_from_user(copy, data, size)) { kfree(copy); return ERR_PTR(-EFAULT); } - if (i != PAGE_SIZE) - memset(copy + i, 0, PAGE_SIZE - i); + if (size != PAGE_SIZE) { + if (copy_from_user(copy + size, data + size, PAGE_SIZE - size)) + memset(copy + size, 0, PAGE_SIZE - size); + } return copy; }