From d366d39885b4a56eb90c7670deb46d82501579ad Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 1 Jun 2007 14:54:11 +0200 Subject: [PATCH 1/6] splice: move inode size check into generic_file_splice_read() Signed-off-by: Jens Axboe --- fs/splice.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 12f28281d2b1..228a48799d11 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -478,10 +478,18 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, { ssize_t spliced; int ret; + loff_t isize, left; + + isize = i_size_read(in->f_mapping->host); + if (unlikely(*ppos >= isize)) + return 0; + + left = isize - *ppos; + if (unlikely(left < len)) + len = left; ret = 0; spliced = 0; - while (len) { ret = __generic_file_splice_read(in, ppos, pipe, len, flags); @@ -922,7 +930,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { - loff_t isize, left; int ret; if (unlikely(!in->f_op || !in->f_op->splice_read)) @@ -935,14 +942,6 @@ static long do_splice_to(struct file *in, loff_t *ppos, if (unlikely(ret < 0)) return ret; - isize = i_size_read(in->f_mapping->host); - if (unlikely(*ppos >= isize)) - return 0; - - left = isize - *ppos; - if (unlikely(left < len)) - len = left; - return in->f_op->splice_read(in, ppos, pipe, len, flags); } From 267adc3e66c3d3c2edb89dac9eddc20ac94d646b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Jun 2007 08:33:41 +0200 Subject: [PATCH 2/6] splice: remove do_splice_direct() symbol export It's only supposed to be used by do_sendfile(), which is never modular. So kill the export. Signed-off-by: Jens Axboe --- fs/splice.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 228a48799d11..b78a7f057be1 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1057,8 +1057,6 @@ out_release: return ret; } -EXPORT_SYMBOL(do_splice_direct); - /* * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same * location, so checking ->i_pipe is not enough to verify that this is a From 17374ff1aa9ce2a0597416a16729474b538af443 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 4 Jun 2007 15:03:12 +0200 Subject: [PATCH 3/6] pipe: move pipe_inode_info structure decleration up before it's used There's really no reason it's below the first use of the pointer type, and it'll fail compilation for the network addition (for good reason). So move it up a bit. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 8bcbc54e1b48..c8884f971228 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -16,6 +16,21 @@ struct pipe_buffer { unsigned int flags; }; +struct pipe_inode_info { + wait_queue_head_t wait; + unsigned int nrbufs, curbuf; + struct page *tmp_page; + unsigned int readers; + unsigned int writers; + unsigned int waiting_writers; + unsigned int r_counter; + unsigned int w_counter; + struct fasync_struct *fasync_readers; + struct fasync_struct *fasync_writers; + struct inode *inode; + struct pipe_buffer bufs[PIPE_BUFFERS]; +}; + /* * Note on the nesting of these functions: * @@ -38,21 +53,6 @@ struct pipe_buf_operations { void (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; -struct pipe_inode_info { - wait_queue_head_t wait; - unsigned int nrbufs, curbuf; - struct page *tmp_page; - unsigned int readers; - unsigned int writers; - unsigned int waiting_writers; - unsigned int r_counter; - unsigned int w_counter; - struct fasync_struct *fasync_readers; - struct fasync_struct *fasync_writers; - struct inode *inode; - struct pipe_buffer bufs[PIPE_BUFFERS]; -}; - /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE From 20d698db67059a63d217030dfd02872cb5f88dfb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 5 Jun 2007 11:05:11 +0200 Subject: [PATCH 4/6] splice: move balance_dirty_pages_ratelimited() outside of splice actor I've seen inode related deadlocks, so move this call outside of the actor itself, which may hold the inode lock. Signed-off-by: Jens Axboe --- fs/splice.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/splice.c b/fs/splice.c index b78a7f057be1..6349d3189e3f 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -652,7 +652,6 @@ find_page: * accessed, we are now done! */ mark_page_accessed(page); - balance_dirty_pages_ratelimited(mapping); out: page_cache_release(page); unlock_page(page); @@ -823,6 +822,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, if (err) ret = err; } + balance_dirty_pages_ratelimited(mapping); } return ret; @@ -876,6 +876,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, if (err) ret = err; } + balance_dirty_pages_ratelimited(mapping); } return ret; From 475ecade683566b19ebb84972de864039ac5fce3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 7 Jun 2007 09:36:00 +0200 Subject: [PATCH 5/6] splice: __generic_file_splice_read: fix i_size_read() length checks __generic_file_splice_read's partial page check, at eof after readpage, not only got its calculations wrong, but also reused the loff variable: causing data corruption when splicing from a non-0 offset in the file's last page (revealed by ext2 -b 1024 testing on a loop of a tmpfs file). Signed-off-by: Hugh Dickins Signed-off-by: Jens Axboe --- fs/splice.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 6349d3189e3f..123fcdb2e4d9 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -272,7 +272,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, struct page *page; pgoff_t index, end_index; loff_t isize; - size_t total_len; int error, page_nr; struct splice_pipe_desc spd = { .pages = pages, @@ -298,7 +297,6 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * Now fill in the holes: */ error = 0; - total_len = 0; /* * Lookup the (hopefully) full range of pages we need. @@ -429,29 +427,33 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * the length and stop */ if (end_index == index) { - loff = PAGE_CACHE_SIZE - (isize & ~PAGE_CACHE_MASK); - if (total_len + loff > isize) + unsigned int plen; + + /* + * max good bytes in this page + */ + plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (plen <= loff) break; + /* * force quit after adding this page */ + this_len = min(this_len, plen - loff); len = this_len; - this_len = min(this_len, loff); - loff = 0; } } fill_it: partial[page_nr].offset = loff; partial[page_nr].len = this_len; len -= this_len; - total_len += this_len; loff = 0; spd.nr_pages++; index++; } /* - * Release any pages at the end, if we quit early. 'i' is how far + * Release any pages at the end, if we quit early. 'page_nr' is how far * we got, 'nr_pages' is how many pages are in the map. */ while (page_nr < nr_pages) From 620a324b744a7d66c3c45a83042f8e7fc9fc5a04 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 7 Jun 2007 09:39:42 +0200 Subject: [PATCH 6/6] splice: __generic_file_splice_read: fix read/truncate race Original patch and description from Neil Brown , merged and adapted to splice branch by me. Neils text follows: __generic_file_splice_read() currently samples the i_size at the start and doesn't do so again unless it needs to call ->readpage to load a page. After ->readpage it has to re-sample i_size as a truncate may have caused that page to be filled with zeros, and the read() call should not see these. However there are other activities that might cause ->readpage to be called on a page between the time that __generic_file_splice_read() samples i_size and when it finds that it has an uptodate page. These include at least read-ahead and possibly another thread performing a read So we must sample i_size *after* it has an uptodate page. Thus the current sampling at the start and after a read can be replaced with a sampling before page addition into spd. Signed-off-by: Jens Axboe --- fs/splice.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/fs/splice.c b/fs/splice.c index 123fcdb2e4d9..cb211360273a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -413,37 +413,37 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, break; } + } +fill_it: + /* + * i_size must be checked after PageUptodate. + */ + isize = i_size_read(mapping->host); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) + break; + + /* + * if this is the last page, see if we need to shrink + * the length and stop + */ + if (end_index == index) { + unsigned int plen; /* - * i_size must be checked after ->readpage(). + * max good bytes in this page */ - isize = i_size_read(mapping->host); - end_index = (isize - 1) >> PAGE_CACHE_SHIFT; - if (unlikely(!isize || index > end_index)) + plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (plen <= loff) break; /* - * if this is the last page, see if we need to shrink - * the length and stop + * force quit after adding this page */ - if (end_index == index) { - unsigned int plen; - - /* - * max good bytes in this page - */ - plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; - if (plen <= loff) - break; - - /* - * force quit after adding this page - */ - this_len = min(this_len, plen - loff); - len = this_len; - } + this_len = min(this_len, plen - loff); + len = this_len; } -fill_it: + partial[page_nr].offset = loff; partial[page_nr].len = this_len; len -= this_len;