linux/fs
Andrei Vagin 3ef8040afc fs: sendfile handles O_NONBLOCK of out_fd
commit bdeb77bc2c upstream.

sendfile has to return EAGAIN if out_fd is nonblocking and the write into
it would block.

Here is a small reproducer for the problem:

#define _GNU_SOURCE /* See feature_test_macros(7) */
#include <fcntl.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/sendfile.h>


#define FILE_SIZE (1UL << 30)
int main(int argc, char **argv) {
        int p[2], fd;

        if (pipe2(p, O_NONBLOCK))
                return 1;

        fd = open(argv[1], O_RDWR | O_TMPFILE, 0666);
        if (fd < 0)
                return 1;
        ftruncate(fd, FILE_SIZE);

        if (sendfile(p[1], fd, 0, FILE_SIZE) == -1) {
                fprintf(stderr, "FAIL\n");
        }
        if (sendfile(p[1], fd, 0, FILE_SIZE) != -1 || errno != EAGAIN) {
                fprintf(stderr, "FAIL\n");
        }
        return 0;
}

It worked before b964bf53e5, it is stuck after b964bf53e5, and it
works again with this fix.

This regression occurred because do_splice_direct() calls pipe_write
that handles O_NONBLOCK.  Here is a trace log from the reproducer:

 1)               |  __x64_sys_sendfile64() {
 1)               |    do_sendfile() {
 1)               |      __fdget()
 1)               |      rw_verify_area()
 1)               |      __fdget()
 1)               |      rw_verify_area()
 1)               |      do_splice_direct() {
 1)               |        rw_verify_area()
 1)               |        splice_direct_to_actor() {
 1)               |          do_splice_to() {
 1)               |            rw_verify_area()
 1)               |            generic_file_splice_read()
 1) + 74.153 us   |          }
 1)               |          direct_splice_actor() {
 1)               |            iter_file_splice_write() {
 1)               |              __kmalloc()
 1)   0.148 us    |              pipe_lock();
 1)   0.153 us    |              splice_from_pipe_next.part.0();
 1)   0.162 us    |              page_cache_pipe_buf_confirm();
... 16 times
 1)   0.159 us    |              page_cache_pipe_buf_confirm();
 1)               |              vfs_iter_write() {
 1)               |                do_iter_write() {
 1)               |                  rw_verify_area()
 1)               |                  do_iter_readv_writev() {
 1)               |                    pipe_write() {
 1)               |                      mutex_lock()
 1)   0.153 us    |                      mutex_unlock();
 1)   1.368 us    |                    }
 1)   1.686 us    |                  }
 1)   5.798 us    |                }
 1)   6.084 us    |              }
 1)   0.174 us    |              kfree();
 1)   0.152 us    |              pipe_unlock();
 1) + 14.461 us   |            }
 1) + 14.783 us   |          }
 1)   0.164 us    |          page_cache_pipe_buf_release();
... 16 times
 1)   0.161 us    |          page_cache_pipe_buf_release();
 1)               |          touch_atime()
 1) + 95.854 us   |        }
 1) + 99.784 us   |      }
 1) ! 107.393 us  |    }
 1) ! 107.699 us  |  }

Link: https://lkml.kernel.org/r/20220415005015.525191-1-avagin@gmail.com
Fixes: b964bf53e5 ("teach sendfile(2) to handle send-to-pipe directly")
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2022-08-03 12:03:41 +02:00
..
9p 9p: fix fid refcount leak in v9fs_vfs_get_link 2022-06-29 09:03:19 +02:00
adfs
affs
afs afs: Fix dynamic root getattr 2022-06-29 09:03:25 +02:00
autofs autofs: fix wait name hash calculation in autofs_wait() 2021-10-20 21:09:02 -04:00
befs isystem: ship and use stdarg.h 2021-08-19 09:02:55 +09:00
bfs
btrfs btrfs: zoned: fix a leaked bioc in read_zone_info 2022-07-21 21:24:32 +02:00
cachefiles fs: add is_idmapped_mnt() helper 2022-07-02 16:41:14 +02:00
ceph ceph: switch netfs read ops to use rreq->inode instead of rreq->mapping->host 2022-07-21 21:24:30 +02:00
cifs cifs: fix reconnect on smb3 mount types 2022-06-14 18:36:25 +02:00
coda
configfs configfs: fix a race in configfs_{,un}register_subsystem() 2022-03-02 11:48:02 +01:00
cramfs
crypto fscrypt: allow 256-bit master keys with AES-256-XTS 2021-11-18 19:16:11 +01:00
debugfs debugfs: lockdown: Allow reading debugfs files that are not world readable 2022-01-27 11:03:55 +01:00
devpts fsnotify: fix fsnotify hooks in pseudo filesystems 2022-02-01 17:27:01 +01:00
dlm dlm: fix pending remove if msg allocation fails 2022-07-29 17:25:24 +02:00
ecryptfs fs: add is_idmapped_mnt() helper 2022-07-02 16:41:14 +02:00
efivarfs
efs
erofs iomap: Add done_before argument to iomap_dio_rw 2022-05-01 17:22:32 +02:00
exfat exfat: use updated exfat_chain directly during renaming 2022-07-29 17:25:30 +02:00
exportfs exportfs: support idmapped mounts 2022-06-09 10:23:32 +02:00
ext2 ext2: correct max file size computing 2022-04-08 14:23:35 +02:00
ext4 ext4: add reserved GDT blocks check 2022-06-22 14:22:05 +02:00
f2fs f2fs: attach inline_data after setting compression 2022-06-29 09:03:27 +02:00
fat fat: add ratelimit to fat*_ent_bread() 2022-06-09 10:22:42 +02:00
freevxfs
fscache fscache: Remove an unused static variable 2021-10-04 22:13:12 +01:00
fuse iov_iter: Turn iov_iter_fault_in_readable into fault_in_iov_iter_readable 2022-05-01 17:22:28 +02:00
gfs2 gfs2: Fix gfs2_file_buffered_write endless loop workaround 2022-07-12 16:34:59 +02:00
hfs
hfsplus
hostfs hostfs: support splice_write 2021-08-26 22:28:02 +02:00
hpfs
hugetlbfs hugetlbfs: fix hugetlbfs_statfs() locking 2022-06-09 10:23:11 +02:00
iomap iomap: iomap_write_failed fix 2022-06-09 10:22:55 +02:00
isofs isofs: Fix out of bound access for corrupted isofs image 2021-11-12 15:05:50 +01:00
jbd2 jbd2: fix a potential race while discarding reserved buffers after an abort 2022-04-27 14:39:02 +02:00
jffs2 jffs2: fix memory leak in jffs2_do_fill_super 2022-06-14 18:36:10 +02:00
jfs fs: jfs: fix possible NULL pointer dereference in dbFree() 2022-06-09 10:22:41 +02:00
kernfs kernfs: Separate kernfs_pr_cont_buf and rename_lock. 2022-06-14 18:36:22 +02:00
ksmbd ksmbd: use SOCK_NONBLOCK type for kernel_accept() 2022-07-21 21:24:32 +02:00
lockd lockd: fix nlm_close_files 2022-07-21 21:24:24 +02:00
minix minix: fix bug when opening a file with O_DIRECT 2022-04-13 20:59:10 +02:00
netfs netfs: fix parameter of cleanup() 2021-12-29 12:28:59 +01:00
nfs pNFS: Avoid a live lock condition in pnfs_update_layout() 2022-06-22 14:21:59 +02:00
nfs_common nfs: Fix kerneldoc warning shown up by W=1 2021-10-04 22:02:17 +01:00
nfsd NFSD: COMMIT operations must not return NFS?ERR_INVAL 2022-07-12 16:35:01 +02:00
nilfs2 nilfs2: fix incorrect masking of permission flags for symlinks 2022-07-21 21:24:14 +02:00
nls
notify fsnotify: fix wrong lockdep annotations 2022-06-09 10:22:50 +02:00
ntfs ntfs: fix use-after-free in ntfs_ucsncmp() 2022-08-03 12:03:41 +02:00
ntfs3 fs/ntfs3: Fix invalid free in log_replay 2022-06-09 10:23:32 +02:00
ocfs2 Revert "ocfs2: mount shared volume without ha stack" 2022-08-03 12:03:41 +02:00
omfs
openpromfs
orangefs orangefs: Fix the size of a memory allocation in orangefs_bufmap_alloc() 2022-01-20 09:13:13 +01:00
overlayfs fs: add is_idmapped_mnt() helper 2022-07-02 16:41:14 +02:00
proc sysctl: move some boundary constants from sysctl.c to sysctl_vals 2022-07-29 17:25:11 +02:00
pstore pstore: Don't use semaphores in always-atomic-context code 2022-04-08 14:23:01 +02:00
qnx4 qnx4: work around gcc false positive warning bug 2021-09-21 08:36:48 -07:00
qnx6
quota quota: Prevent memory allocation recursion while holding dq_lock 2022-06-22 14:21:56 +02:00
ramfs
reiserfs Kbuild updates for v5.15 2021-09-03 15:33:47 -07:00
romfs
smbfs_common cifs: Fix crash on unload of cifs_arc4.ko 2021-12-14 10:57:12 +01:00
squashfs squashfs: use bvec_virt 2021-08-16 10:50:32 -06:00
sysfs
sysv
tracefs tracefs: Set the group ownership in apply_options() not parse_options() 2022-03-02 11:48:05 +01:00
ubifs ubifs: rename_whiteout: correct old_dir size computing 2022-04-08 14:24:08 +02:00
udf udf: Avoid using stale lengthOfImpUse 2022-05-15 20:18:52 +02:00
ufs isystem: ship and use stdarg.h 2021-08-19 09:02:55 +09:00
unicode
vboxsf vboxfs: fix broken legacy mount signature checking 2021-09-27 11:26:21 -07:00
verity fs-verity: fix signed integer overflow with i_size near S64_MAX 2021-09-22 10:56:34 -07:00
xfs xfs: prevent a WARN_ONCE() in xfs_ioc_attr_list() 2022-07-29 17:25:09 +02:00
zonefs zonefs: fix zonefs_iomap_begin() for reads 2022-06-25 15:18:40 +02:00
aio.c aio: Fix incorrect usage of eventfd_signal_allowed() 2021-12-14 10:57:22 +01:00
anon_inodes.c
attr.c fs: account for group membership 2022-07-02 16:41:17 +02:00
bad_inode.c vfs: add rcu argument to ->get_acl() callback 2021-08-18 22:08:24 +02:00
binfmt_aout.c binfmt: a.out: Fix bogus semicolon 2021-09-05 10:15:05 -07:00
binfmt_elf_fdpic.c coredump: Snapshot the vmas in do_coredump 2022-04-08 14:24:17 +02:00
binfmt_elf.c coredump: Use the vma snapshot in fill_files_note 2022-04-08 14:24:18 +02:00
binfmt_flat.c binfmt_flat: do not stop relocating GOT entries prematurely on riscv 2022-06-09 10:22:26 +02:00
binfmt_misc.c
binfmt_script.c
buffer.c mm: fs: fix lru_cache_disabled race in bh_lru 2022-04-08 14:22:54 +02:00
char_dev.c
compat_binfmt_elf.c
coredump.c coredump: Use the vma snapshot in fill_files_note 2022-04-08 14:24:18 +02:00
d_path.c d_path: make 'prepend()' fill up the buffer exactly on overflow 2021-09-02 10:07:29 -07:00
dax.c dax: fix cache flush on PMD-mapped pages 2022-06-09 10:23:09 +02:00
dcache.c
direct-io.c
drop_caches.c fs: drop_caches: fix skipping over shadow cache inodes 2021-09-03 09:58:10 -07:00
eventfd.c eventfd: Export eventfd_wake_count to modules 2021-09-06 07:20:56 -04:00
eventpoll.c ARM development updates for 5.15: 2021-09-09 13:25:49 -07:00
exec.c fix race between exit_itimers() and /proc/pid/timers 2022-07-21 21:24:11 +02:00
fcntl.c Merge branch 'akpm' (patches from Andrew) 2021-09-03 10:08:28 -07:00
fhandle.c
file_table.c SUNRPC: Ensure we flush any closed sockets before xs_xprt_free() 2022-05-18 10:26:57 +02:00
file.c fs: fix fd table size alignment properly 2022-04-08 14:23:54 +02:00
filesystems.c fs: simplify get_filesystem_list / get_all_fs_names 2021-08-23 01:25:40 -04:00
fs_context.c vfs: fs_context: fix up param length parsing in legacy_parse_param 2022-01-20 09:13:14 +01:00
fs_parser.c namei: Standardize callers of filename_lookup() 2021-09-07 16:07:47 -04:00
fs_pin.c
fs_struct.c
fs_types.c
fs-writeback.c writeback: Fix inode->i_io_list not be protected by inode->i_lock error 2022-06-14 18:36:26 +02:00
fsopen.c
init.c
inode.c writeback: Fix inode->i_io_list not be protected by inode->i_lock error 2022-06-14 18:36:26 +02:00
internal.h block: simplify the block device syncing code 2022-04-27 14:38:50 +02:00
io_uring.c io_uring: avoid io-wq -EAGAIN looping for !IOPOLL 2022-07-12 16:35:08 +02:00
io-wq.c io-wq: drop wqe lock before creating new worker 2021-12-22 09:32:51 +01:00
io-wq.h io-wq: provide a way to limit max number of workers 2021-08-29 07:55:55 -06:00
ioctl.c fs: fix an infinite loop in iomap_fiemap 2022-05-25 09:57:26 +02:00
Kconfig 4 cifs/smb3 fixes, one for DFS reconnect, and one to begin creating common headers for server and client and the other two to rename the cifs_common directory to smbfs_common to be more consistent ie change use of the name cifs to smb which is more accurate 2021-09-12 10:10:21 -07:00
Kconfig.binfmt
kernel_read_file.c vfs: check fd has read access in kernel_read_file_from_fd() 2021-10-18 20:22:03 -10:00
libfs.c
locks.c Revert "memcg: enable accounting for file lock caches" 2021-09-07 11:21:48 -07:00
Makefile 4 cifs/smb3 fixes, one for DFS reconnect, and one to begin creating common headers for server and client and the other two to rename the cifs_common directory to smbfs_common to be more consistent ie change use of the name cifs to smb which is more accurate 2021-09-12 10:10:21 -07:00
mbcache.c
mount.h
mpage.c
namei.c fs: add two trivial lookup helpers 2022-06-09 10:23:32 +02:00
namespace.c fs: support mapped mounts of mapped filesystems 2022-07-02 16:41:17 +02:00
no-block.c
nsfs.c
open.c fs: support mapped mounts of mapped filesystems 2022-07-02 16:41:17 +02:00
pipe.c pipe: Fix missing lock in pipe_resize_ring() 2022-06-06 08:43:37 +02:00
pnode.c
pnode.h
posix_acl.c fs: fix acl translation 2022-07-02 16:41:17 +02:00
proc_namespace.c fs: add is_idmapped_mnt() helper 2022-07-02 16:41:14 +02:00
read_write.c fs: sendfile handles O_NONBLOCK of out_fd 2022-08-03 12:03:41 +02:00
readdir.c
remap_range.c fs/remap: constrain dedupe of EOF blocks 2022-07-21 21:24:14 +02:00
select.c select: Fix indefinitely sleeping task in poll_schedule_timeout() 2022-01-29 10:58:25 +01:00
seq_file.c rxrpc: Fix locking issue 2022-07-12 16:35:08 +02:00
signalfd.c signalfd: use wake_up_pollfree() 2021-12-14 10:57:15 +01:00
splice.c
stack.c
stat.c stat: fix inconsistency between struct stat and struct compat_stat 2022-04-27 14:38:57 +02:00
statfs.c
super.c vfs: make freeze_super abort when sync_filesystem returns error 2022-02-23 12:03:05 +01:00
sync.c vfs: make sync_filesystem return errors from ->sync_fs 2022-04-27 14:38:50 +02:00
timerfd.c timerfd: Provide timerfd_resume() 2021-08-10 17:57:22 +02:00
userfaultfd.c userfaultfd: fix a race between writeprotect and exit_mmap() 2021-10-18 20:22:02 -10:00
utimes.c
xattr.c fs: fix acl translation 2022-07-02 16:41:17 +02:00