From bfc4ee39fdbb2deb8864785d5e5bc5cdd3b31a69 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2006 10:35:10 +0200 Subject: [PATCH 1/5] [PATCH] splice: fix unlocking of page on error ->prepare_write() Looking at generic_file_buffered_write(), we need to unlock_page() if prepare write fails and it isn't due to racing with truncate(). Also trim the size if ->prepare_write() fails, if we have to. Signed-off-by: Jens Axboe --- fs/splice.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 7fb04970c72d..27f5e3738a7b 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -647,11 +647,24 @@ find_page: } ret = mapping->a_ops->prepare_write(file, page, offset, offset+this_len); - if (ret == AOP_TRUNCATED_PAGE) { + if (unlikely(ret)) { + loff_t isize = i_size_read(mapping->host); + + if (ret != AOP_TRUNCATED_PAGE) + unlock_page(page); page_cache_release(page); - goto find_page; - } else if (ret) + if (ret == AOP_TRUNCATED_PAGE) + goto find_page; + + /* + * prepare_write() may have instantiated a few blocks + * outside i_size. Trim these off again. + */ + if (sd->pos + this_len > isize) + vmtruncate(mapping->host, isize); + goto out; + } if (buf->page != page) { /* From 1432873af7ae29d4bb3c56114c05b539d078ca62 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2006 10:35:26 +0200 Subject: [PATCH 2/5] [PATCH] splice: LRU fixups Nick says that the current construct isn't safe. This goes back to the original, but sets PIPE_BUF_FLAG_LRU on user pages as well as they all seem to be on the LRU in the first place. Signed-off-by: Jens Axboe --- fs/splice.c | 33 +++++++++++---------------------- include/linux/pipe_fs_i.h | 5 +++-- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 27f5e3738a7b..0b202425b0b5 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -78,6 +78,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 1; } + buf->flags |= PIPE_BUF_FLAG_LRU; return 0; } @@ -85,6 +86,7 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *info, struct pipe_buffer *buf) { page_cache_release(buf->page); + buf->flags &= ~PIPE_BUF_FLAG_LRU; } static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, @@ -141,6 +143,7 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) return 1; + buf->flags |= PIPE_BUF_FLAG_LRU; return generic_pipe_buf_steal(pipe, buf); } @@ -566,37 +569,23 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, */ if ((sd->flags & SPLICE_F_MOVE) && this_len == PAGE_CACHE_SIZE) { /* - * If steal succeeds, buf->page is now pruned from the vm - * side (page cache) and we can reuse it. The page will also - * be locked on successful return. + * If steal succeeds, buf->page is now pruned from the + * pagecache and we can reuse it. The page will also be + * locked on successful return. */ if (buf->ops->steal(info, buf)) goto find_page; page = buf->page; - page_cache_get(page); - - /* - * page must be on the LRU for adding to the pagecache. - * Check this without grabbing the zone lock, if it isn't - * the do grab the zone lock, recheck, and add if necessary. - */ - if (!PageLRU(page)) { - struct zone *zone = page_zone(page); - - spin_lock_irq(&zone->lru_lock); - if (!PageLRU(page)) { - SetPageLRU(page); - add_page_to_inactive_list(zone, page); - } - spin_unlock_irq(&zone->lru_lock); - } - if (add_to_page_cache(page, mapping, index, gfp_mask)) { - page_cache_release(page); unlock_page(page); goto find_page; } + + page_cache_get(page); + + if (!(buf->flags & PIPE_BUF_FLAG_LRU)) + lru_cache_add(page); } else { find_page: page = find_lock_page(mapping, index); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ba73108cbf8b..ea4f7cd7bfd8 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,8 +5,9 @@ #define PIPE_BUFFERS (16) -#define PIPE_BUF_FLAG_ATOMIC 0x01 /* was atomically mapped */ -#define PIPE_BUF_FLAG_GIFT 0x02 /* page is a gift */ +#define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ +#define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ +#define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ struct pipe_buffer { struct page *page; From 76ad4d11105ccd40a536db1057083f28326019fd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2006 10:41:33 +0200 Subject: [PATCH 3/5] [PATCH] splice: rename remaining info variables to pipe Same thing was done in fs/pipe.c and most of fs/splice.c, but we had a few missing still. Signed-off-by: Jens Axboe --- fs/splice.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 0b202425b0b5..8fa9217ed585 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -51,7 +51,7 @@ struct splice_pipe_desc { * addition of remove_mapping(). If success is returned, the caller may * attempt to reuse this page for another destination. */ -static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, +static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; @@ -82,14 +82,14 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *info, return 0; } -static void page_cache_pipe_buf_release(struct pipe_inode_info *info, +static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { page_cache_release(buf->page); buf->flags &= ~PIPE_BUF_FLAG_LRU; } -static int page_cache_pipe_buf_pin(struct pipe_inode_info *info, +static int page_cache_pipe_buf_pin(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { struct page *page = buf->page; @@ -500,14 +500,14 @@ EXPORT_SYMBOL(generic_file_splice_read); * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' * using sendpage(). Return the number of bytes sent. */ -static int pipe_to_sendpage(struct pipe_inode_info *info, +static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; loff_t pos = sd->pos; int ret, more; - ret = buf->ops->pin(info, buf); + ret = buf->ops->pin(pipe, buf); if (!ret) { more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; @@ -538,7 +538,7 @@ static int pipe_to_sendpage(struct pipe_inode_info *info, * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create * a new page in the output file page cache and fill/dirty that. */ -static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, +static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { struct file *file = sd->file; @@ -552,7 +552,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, /* * make sure the data in this buffer is uptodate */ - ret = buf->ops->pin(info, buf); + ret = buf->ops->pin(pipe, buf); if (unlikely(ret)) return ret; @@ -573,7 +573,7 @@ static int pipe_to_file(struct pipe_inode_info *info, struct pipe_buffer *buf, * pagecache and we can reuse it. The page will also be * locked on successful return. */ - if (buf->ops->steal(info, buf)) + if (buf->ops->steal(pipe, buf)) goto find_page; page = buf->page; @@ -659,13 +659,13 @@ find_page: /* * Careful, ->map() uses KM_USER0! */ - char *src = buf->ops->map(info, buf, 1); + char *src = buf->ops->map(pipe, buf, 1); char *dst = kmap_atomic(page, KM_USER1); memcpy(dst + offset, src + buf->offset, this_len); flush_dcache_page(page); kunmap_atomic(dst, KM_USER1); - buf->ops->unmap(info, buf, src); + buf->ops->unmap(pipe, buf, src); } ret = mapping->a_ops->commit_write(file, page, offset, offset+this_len); From a0548871ed267ae12eb1c860c5aaebd9e466b34e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 3 May 2006 10:58:22 +0200 Subject: [PATCH 4/5] [PATCH] splice: redo page lookup if add_to_page_cache() returns -EEXIST This can happen quite easily, if several processes are trying to splice the same file at the same time. It's not a failure, it just means someone raced with us in allocating this file page. So just dump the allocated page and relookup the original. Signed-off-by: Jens Axboe --- fs/splice.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/splice.c b/fs/splice.c index 8fa9217ed585..a285fd746dc0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -324,6 +324,8 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, mapping_gfp_mask(mapping)); if (unlikely(error)) { page_cache_release(page); + if (error == -EEXIST) + continue; break; } /* From 98232d504db0a1f91ecaa93686ed3bf61963103b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 4 May 2006 09:13:49 +0200 Subject: [PATCH 5/5] [PATCH] compat_sys_vmsplice: one-off in UIO_MAXIOV check nr_segs may not be > UIO_MAXIOV, however it may be equal to. This makes the behaviour identical to the real sys_vmsplice(). The other foov syscalls also agree that this is the way to go. Signed-off-by: Jens Axboe --- fs/compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/compat.c b/fs/compat.c index 3f3e8f4d43d6..970888aad843 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1323,7 +1323,7 @@ compat_sys_vmsplice(int fd, const struct compat_iovec __user *iov32, { unsigned i; struct iovec *iov; - if (nr_segs >= UIO_MAXIOV) + if (nr_segs > UIO_MAXIOV) return -EINVAL; iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec)); for (i = 0; i < nr_segs; i++) {