Reallocate fuse_session buffer transparently for extended max writes

A previous PR supported extended max writes (eg write requests larger than 1 MB)
by initializing the fuse session buffer size to use the max_pages_limit set in
/proc/sys/fs/fuse. However, this is a huge problem for machines where multiple
fuse servers may be running but only one server needs large writes. In this case,
a lot of memory will be wasted and will lead to OOM issues.

This PR does a reallocation of the session buffer transparently if the server set
 "se->conn.max_write" to a value larger than 1 MiB. This is only for buffers that
are "owned" by libfuse - if the server wishes to provide its own allocated buffer
for receiving/processing requests, then it should ensure that buffer is allocated
to the proper size from the start.

Local testing showed:
echo 65535 | sudo tee /proc/sys/fs/fuse/max_pages_limit
dd if=/dev/urandom of=hello_file bs=6M count=2

write requests:
write request size is 5242880
write request size is 1048576
write request size is 5242880
write request size is 1048576
This commit is contained in:
Joanne Koong 2024-11-14 20:55:48 -08:00 committed by Bernd Schubert
parent 49f74c9b93
commit 0e0f43b79b
3 changed files with 41 additions and 31 deletions

View File

@ -868,6 +868,14 @@ struct fuse_buf {
* Used if FUSE_BUF_FD_SEEK flag is set. * Used if FUSE_BUF_FD_SEEK flag is set.
*/ */
off_t pos; off_t pos;
/**
* Size of memory pointer
*
* Used only if mem was internally allocated.
* Not used if mem was user-provided.
*/
size_t mem_size;
}; };
/** /**
@ -924,6 +932,7 @@ struct libfuse_version
/* .mem = */ NULL, \ /* .mem = */ NULL, \
/* .fd = */ -1, \ /* .fd = */ -1, \
/* .pos = */ 0, \ /* .pos = */ 0, \
/* .mem_size = */ 0, \
} } \ } } \
} ) } )

View File

@ -9,6 +9,8 @@
#include "fuse.h" #include "fuse.h"
#include "fuse_lowlevel.h" #include "fuse_lowlevel.h"
#include <stdbool.h>
#define MIN(a, b) \ #define MIN(a, b) \
({ \ ({ \
typeof(a) _a = (a); \ typeof(a) _a = (a); \
@ -77,6 +79,7 @@ struct fuse_session {
* a later version, to 'fix' it at run time. * a later version, to 'fix' it at run time.
*/ */
struct libfuse_version version; struct libfuse_version version;
bool buf_reallocable;
}; };
struct fuse_chan { struct fuse_chan {

View File

@ -1990,6 +1990,7 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
size_t outargsize = sizeof(outarg); size_t outargsize = sizeof(outarg);
uint64_t inargflags = 0; uint64_t inargflags = 0;
uint64_t outargflags = 0; uint64_t outargflags = 0;
bool buf_reallocable = se->buf_reallocable;
(void) nodeid; (void) nodeid;
if (se->debug) { if (se->debug) {
fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor); fuse_log(FUSE_LOG_DEBUG, "INIT: %u.%u\n", arg->major, arg->minor);
@ -2074,6 +2075,7 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
if (bufsize > max_bufsize) { if (bufsize > max_bufsize) {
bufsize = max_bufsize; bufsize = max_bufsize;
} }
buf_reallocable = false;
} }
if (inargflags & FUSE_DIRECT_IO_ALLOW_MMAP) if (inargflags & FUSE_DIRECT_IO_ALLOW_MMAP)
se->conn.capable |= FUSE_CAP_DIRECT_IO_ALLOW_MMAP; se->conn.capable |= FUSE_CAP_DIRECT_IO_ALLOW_MMAP;
@ -2161,6 +2163,8 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg)
bufsize = FUSE_MIN_READ_BUFFER; bufsize = FUSE_MIN_READ_BUFFER;
} }
if (buf_reallocable)
bufsize = UINT_MAX;
se->conn.max_write = MIN(se->conn.max_write, bufsize - FUSE_BUFFER_HEADER_SIZE); se->conn.max_write = MIN(se->conn.max_write, bufsize - FUSE_BUFFER_HEADER_SIZE);
se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE; se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
@ -2921,30 +2925,6 @@ static void fuse_ll_pipe_destructor(void *data)
fuse_ll_pipe_free(llp); fuse_ll_pipe_free(llp);
} }
static unsigned int get_max_pages(void)
{
char buf[32];
long res;
int fd;
int err;
fd = open("/proc/sys/fs/fuse/max_pages_limit", O_RDONLY);
if (fd < 0)
return FUSE_DEFAULT_MAX_PAGES_LIMIT;
res = read(fd, buf, sizeof(buf) - 1);
close(fd);
if (res < 0)
return FUSE_DEFAULT_MAX_PAGES_LIMIT;
buf[res] = '\0';
err = libfuse_strtol(buf, &res);
return err ? FUSE_DEFAULT_MAX_PAGES_LIMIT : res;
}
int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf) int fuse_session_receive_buf(struct fuse_session *se, struct fuse_buf *buf)
{ {
return fuse_session_receive_buf_int(se, buf, NULL); return fuse_session_receive_buf_int(se, buf, NULL);
@ -2955,8 +2935,8 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
{ {
int err; int err;
ssize_t res; ssize_t res;
#ifdef HAVE_SPLICE
size_t bufsize = se->bufsize; size_t bufsize = se->bufsize;
#ifdef HAVE_SPLICE
struct fuse_ll_pipe *llp; struct fuse_ll_pipe *llp;
struct fuse_buf tmpbuf; struct fuse_buf tmpbuf;
@ -3036,6 +3016,8 @@ int fuse_session_receive_buf_int(struct fuse_session *se, struct fuse_buf *buf,
"fuse: failed to allocate read buffer\n"); "fuse: failed to allocate read buffer\n");
return -ENOMEM; return -ENOMEM;
} }
buf->mem_size = se->bufsize;
se->buf_reallocable = true;
} }
buf->size = se->bufsize; buf->size = se->bufsize;
buf->flags = 0; buf->flags = 0;
@ -3073,22 +3055,40 @@ fallback:
"fuse: failed to allocate read buffer\n"); "fuse: failed to allocate read buffer\n");
return -ENOMEM; return -ENOMEM;
} }
buf->mem_size = se->bufsize;
se->buf_reallocable = true;
} }
restart: restart:
if (se->buf_reallocable)
bufsize = buf->mem_size;
if (se->io != NULL) { if (se->io != NULL) {
/* se->io->read is never NULL if se->io is not NULL as /* se->io->read is never NULL if se->io is not NULL as
specified by fuse_session_custom_io()*/ specified by fuse_session_custom_io()*/
res = se->io->read(ch ? ch->fd : se->fd, buf->mem, se->bufsize, res = se->io->read(ch ? ch->fd : se->fd, buf->mem, bufsize,
se->userdata); se->userdata);
} else { } else {
res = read(ch ? ch->fd : se->fd, buf->mem, se->bufsize); res = read(ch ? ch->fd : se->fd, buf->mem, bufsize);
} }
err = errno; err = errno;
if (fuse_session_exited(se)) if (fuse_session_exited(se))
return 0; return 0;
if (res == -1) { if (res == -1) {
if (err == EINVAL && se->buf_reallocable && se->bufsize > buf->mem_size) {
void *newbuf = malloc(se->bufsize);
if (!newbuf) {
fuse_log(FUSE_LOG_ERR,
"fuse: failed to (re)allocate read buffer\n");
return -ENOMEM;
}
free(buf->mem);
buf->mem = newbuf;
buf->mem_size = se->bufsize;
se->buf_reallocable = true;
goto restart;
}
/* ENOENT means the operation was interrupted, it's safe /* ENOENT means the operation was interrupted, it's safe
to restart */ to restart */
if (err == ENOENT) if (err == ENOENT)
@ -3144,7 +3144,8 @@ struct fuse_session *_fuse_session_new_317(struct fuse_args *args,
goto out1; goto out1;
} }
se->fd = -1; se->fd = -1;
se->conn.max_write = UINT_MAX; se->conn.max_write = FUSE_DEFAULT_MAX_PAGES_LIMIT * getpagesize();
se->bufsize = se->conn.max_write + FUSE_BUFFER_HEADER_SIZE;
se->conn.max_readahead = UINT_MAX; se->conn.max_readahead = UINT_MAX;
/* Parse options */ /* Parse options */
@ -3180,9 +3181,6 @@ struct fuse_session *_fuse_session_new_317(struct fuse_args *args,
if (se->debug) if (se->debug)
fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION); fuse_log(FUSE_LOG_DEBUG, "FUSE library version: %s\n", PACKAGE_VERSION);
se->bufsize = get_max_pages() * getpagesize() +
FUSE_BUFFER_HEADER_SIZE;
list_init_req(&se->list); list_init_req(&se->list);
list_init_req(&se->interrupts); list_init_req(&se->interrupts);
list_init_nreq(&se->notify_list); list_init_nreq(&se->notify_list);