linux/fs/fuse/cuse.c

651 lines
16 KiB
C
Raw Normal View History

/*
* CUSE: Character device in Userspace
*
* Copyright (C) 2008-2009 SUSE Linux Products GmbH
* Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org>
*
* This file is released under the GPLv2.
*
* CUSE enables character devices to be implemented from userland much
* like FUSE allows filesystems. On initialization /dev/cuse is
* created. By opening the file and replying to the CUSE_INIT request
* userland CUSE server can create a character device. After that the
* operation is very similar to FUSE.
*
* A CUSE instance involves the following objects.
*
* cuse_conn : contains fuse_conn and serves as bonding structure
* channel : file handle connected to the userland CUSE server
* cdev : the implemented character device
* dev : generic device for cdev
*
* Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with
* devices, it's called 'channel' to reduce confusion.
*
* channel determines when the character device dies. When channel is
* closed, everything begins to destruct. The cuse_conn is taken off
* the lookup table preventing further access from cdev, cdev and
* generic device are removed and the base reference of cuse_conn is
* put.
*
* On each open, the matching cuse_conn is looked up and if found an
* additional reference is taken which is released when the file is
* closed.
*/
#include <linux/fuse.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/magic.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/module.h>
#include <linux/uio.h>
fuse: Support fuse filesystems outside of init_user_ns In order to support mounts from namespaces other than init_user_ns, fuse must translate uids and gids to/from the userns of the process servicing requests on /dev/fuse. This patch does that, with a couple of restrictions on the namespace: - The userns for the fuse connection is fixed to the namespace from which /dev/fuse is opened. - The namespace must be the same as s_user_ns. These restrictions simplify the implementation by avoiding the need to pass around userns references and by allowing fuse to rely on the checks in setattr_prepare for ownership changes. Either restriction could be relaxed in the future if needed. For cuse the userns used is the opener of /dev/cuse. Semantically the cuse support does not appear safe for unprivileged users. Practically the permissions on /dev/cuse only make it accessible to the global root user. If something slips through the cracks in a user namespace the only users who will be able to use the cuse device are those users mapped into the user namespace. Translation in the posix acl is updated to use the uuser namespace of the filesystem. Avoiding cases which might bypass this translation is handled in a following change. This change is stronlgy based on a similar change from Seth Forshee and Dongsu Park. Cc: Seth Forshee <seth.forshee@canonical.com> Cc: Dongsu Park <dongsu@kinvolk.io> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2018-02-22 01:18:07 +08:00
#include <linux/user_namespace.h>
#include "fuse_i.h"
#define CUSE_CONNTBL_LEN 64
struct cuse_conn {
struct list_head list; /* linked on cuse_conntbl */
struct fuse_conn fc; /* fuse connection */
struct cdev *cdev; /* associated character device */
struct device *dev; /* device representing @cdev */
/* init parameters, set once during initialization */
bool unrestricted_ioctl;
};
static DEFINE_MUTEX(cuse_lock); /* protects registration */
static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
static struct class *cuse_class;
static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
{
return container_of(fc, struct cuse_conn, fc);
}
static struct list_head *cuse_conntbl_head(dev_t devt)
{
return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
}
/**************************************************************************
* CUSE frontend operations
*
* These are file operations for the character device.
*
* On open, CUSE opens a file from the FUSE mnt and stores it to
* private_data of the open file. All other ops call FUSE ops on the
* FUSE file.
*/
static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
{
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb);
loff_t pos = 0;
return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE);
}
static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb);
loff_t pos = 0;
/*
* No locking or generic_write_checks(), the server is
* responsible for locking and sanity checks.
*/
return fuse_direct_io(&io, from, &pos,
FUSE_DIO_WRITE | FUSE_DIO_CUSE);
}
static int cuse_open(struct inode *inode, struct file *file)
{
dev_t devt = inode->i_cdev->dev;
struct cuse_conn *cc = NULL, *pos;
int rc;
/* look up and get the connection */
mutex_lock(&cuse_lock);
list_for_each_entry(pos, cuse_conntbl_head(devt), list)
if (pos->dev->devt == devt) {
fuse_conn_get(&pos->fc);
cc = pos;
break;
}
mutex_unlock(&cuse_lock);
/* dead? */
if (!cc)
return -ENODEV;
/*
* Generic permission check is already done against the chrdev
* file, proceed to open.
*/
rc = fuse_do_open(&cc->fc, 0, file, 0);
if (rc)
fuse_conn_put(&cc->fc);
return rc;
}
static int cuse_release(struct inode *inode, struct file *file)
{
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
fuse_sync_release(ff, file->f_flags);
fuse_conn_put(fc);
return 0;
}
static long cuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc);
unsigned int flags = 0;
if (cc->unrestricted_ioctl)
flags |= FUSE_IOCTL_UNRESTRICTED;
return fuse_do_ioctl(file, cmd, arg, flags);
}
static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fuse_file *ff = file->private_data;
struct cuse_conn *cc = fc_to_cc(ff->fc);
unsigned int flags = FUSE_IOCTL_COMPAT;
if (cc->unrestricted_ioctl)
flags |= FUSE_IOCTL_UNRESTRICTED;
return fuse_do_ioctl(file, cmd, arg, flags);
}
static const struct file_operations cuse_frontend_fops = {
.owner = THIS_MODULE,
.read_iter = cuse_read_iter,
.write_iter = cuse_write_iter,
.open = cuse_open,
.release = cuse_release,
.unlocked_ioctl = cuse_file_ioctl,
.compat_ioctl = cuse_file_compat_ioctl,
.poll = fuse_file_poll,
llseek: automatically add .llseek fop All file_operations should get a .llseek operation so we can make nonseekable_open the default for future file operations without a .llseek pointer. The three cases that we can automatically detect are no_llseek, seq_lseek and default_llseek. For cases where we can we can automatically prove that the file offset is always ignored, we use noop_llseek, which maintains the current behavior of not returning an error from a seek. New drivers should normally not use noop_llseek but instead use no_llseek and call nonseekable_open at open time. Existing drivers can be converted to do the same when the maintainer knows for certain that no user code relies on calling seek on the device file. The generated code is often incorrectly indented and right now contains comments that clarify for each added line why a specific variant was chosen. In the version that gets submitted upstream, the comments will be gone and I will manually fix the indentation, because there does not seem to be a way to do that using coccinelle. Some amount of new code is currently sitting in linux-next that should get the same modifications, which I will do at the end of the merge window. Many thanks to Julia Lawall for helping me learn to write a semantic patch that does all this. ===== begin semantic patch ===== // This adds an llseek= method to all file operations, // as a preparation for making no_llseek the default. // // The rules are // - use no_llseek explicitly if we do nonseekable_open // - use seq_lseek for sequential files // - use default_llseek if we know we access f_pos // - use noop_llseek if we know we don't access f_pos, // but we still want to allow users to call lseek // @ open1 exists @ identifier nested_open; @@ nested_open(...) { <+... nonseekable_open(...) ...+> } @ open exists@ identifier open_f; identifier i, f; identifier open1.nested_open; @@ int open_f(struct inode *i, struct file *f) { <+... ( nonseekable_open(...) | nested_open(...) ) ...+> } @ read disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ read_no_fpos disable optional_qualifier exists @ identifier read_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off) { ... when != off } @ write @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; expression E; identifier func; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { <+... ( *off = E | *off += E | func(..., off, ...) | E = *off ) ...+> } @ write_no_fpos @ identifier write_f; identifier f, p, s, off; type ssize_t, size_t, loff_t; @@ ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off) { ... when != off } @ fops0 @ identifier fops; @@ struct file_operations fops = { ... }; @ has_llseek depends on fops0 @ identifier fops0.fops; identifier llseek_f; @@ struct file_operations fops = { ... .llseek = llseek_f, ... }; @ has_read depends on fops0 @ identifier fops0.fops; identifier read_f; @@ struct file_operations fops = { ... .read = read_f, ... }; @ has_write depends on fops0 @ identifier fops0.fops; identifier write_f; @@ struct file_operations fops = { ... .write = write_f, ... }; @ has_open depends on fops0 @ identifier fops0.fops; identifier open_f; @@ struct file_operations fops = { ... .open = open_f, ... }; // use no_llseek if we call nonseekable_open //////////////////////////////////////////// @ nonseekable1 depends on !has_llseek && has_open @ identifier fops0.fops; identifier nso ~= "nonseekable_open"; @@ struct file_operations fops = { ... .open = nso, ... +.llseek = no_llseek, /* nonseekable */ }; @ nonseekable2 depends on !has_llseek @ identifier fops0.fops; identifier open.open_f; @@ struct file_operations fops = { ... .open = open_f, ... +.llseek = no_llseek, /* open uses nonseekable */ }; // use seq_lseek for sequential files ///////////////////////////////////// @ seq depends on !has_llseek @ identifier fops0.fops; identifier sr ~= "seq_read"; @@ struct file_operations fops = { ... .read = sr, ... +.llseek = seq_lseek, /* we have seq_read */ }; // use default_llseek if there is a readdir /////////////////////////////////////////// @ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier readdir_e; @@ // any other fop is used that changes pos struct file_operations fops = { ... .readdir = readdir_e, ... +.llseek = default_llseek, /* readdir is present */ }; // use default_llseek if at least one of read/write touches f_pos ///////////////////////////////////////////////////////////////// @ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read.read_f; @@ // read fops use offset struct file_operations fops = { ... .read = read_f, ... +.llseek = default_llseek, /* read accesses f_pos */ }; @ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, ... + .llseek = default_llseek, /* write accesses f_pos */ }; // Use noop_llseek if neither read nor write accesses f_pos /////////////////////////////////////////////////////////// @ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; identifier write_no_fpos.write_f; @@ // write fops use offset struct file_operations fops = { ... .write = write_f, .read = read_f, ... +.llseek = noop_llseek, /* read and write both use no f_pos */ }; @ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier write_no_fpos.write_f; @@ struct file_operations fops = { ... .write = write_f, ... +.llseek = noop_llseek, /* write uses no f_pos */ }; @ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; identifier read_no_fpos.read_f; @@ struct file_operations fops = { ... .read = read_f, ... +.llseek = noop_llseek, /* read uses no f_pos */ }; @ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @ identifier fops0.fops; @@ struct file_operations fops = { ... +.llseek = noop_llseek, /* no read or write fn */ }; ===== End semantic patch ===== Signed-off-by: Arnd Bergmann <arnd@arndb.de> Cc: Julia Lawall <julia@diku.dk> Cc: Christoph Hellwig <hch@infradead.org>
2010-08-16 00:52:59 +08:00
.llseek = noop_llseek,
};
/**************************************************************************
* CUSE channel initialization and destruction
*/
struct cuse_devinfo {
const char *name;
};
/**
* cuse_parse_one - parse one key=value pair
* @pp: i/o parameter for the current position
* @end: points to one past the end of the packed string
* @keyp: out parameter for key
* @valp: out parameter for value
*
* *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
* at @end - 1. This function parses one pair and set *@keyp to the
* start of the key and *@valp to the start of the value. Note that
* the original string is modified such that the key string is
* terminated with '\0'. *@pp is updated to point to the next string.
*
* RETURNS:
* 1 on successful parse, 0 on EOF, -errno on failure.
*/
static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
{
char *p = *pp;
char *key, *val;
while (p < end && *p == '\0')
p++;
if (p == end)
return 0;
if (end[-1] != '\0') {
printk(KERN_ERR "CUSE: info not properly terminated\n");
return -EINVAL;
}
key = val = p;
p += strlen(p);
if (valp) {
strsep(&val, "=");
if (!val)
val = key + strlen(key);
key = strstrip(key);
val = strstrip(val);
} else
key = strstrip(key);
if (!strlen(key)) {
printk(KERN_ERR "CUSE: zero length info key specified\n");
return -EINVAL;
}
*pp = p;
*keyp = key;
if (valp)
*valp = val;
return 1;
}
/**
* cuse_parse_dev_info - parse device info
* @p: device info string
* @len: length of device info string
* @devinfo: out parameter for parsed device info
*
* Parse @p to extract device info and store it into @devinfo. String
* pointed to by @p is modified by parsing and @devinfo points into
* them, so @p shouldn't be freed while @devinfo is in use.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
{
char *end = p + len;
char *uninitialized_var(key), *uninitialized_var(val);
int rc;
while (true) {
rc = cuse_parse_one(&p, end, &key, &val);
if (rc < 0)
return rc;
if (!rc)
break;
if (strcmp(key, "DEVNAME") == 0)
devinfo->name = val;
else
printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
key);
}
if (!devinfo->name || !strlen(devinfo->name)) {
printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
return -EINVAL;
}
return 0;
}
static void cuse_gendev_release(struct device *dev)
{
kfree(dev);
}
/**
* cuse_process_init_reply - finish initializing CUSE channel
*
* This function creates the character device and sets up all the
* required data structures for it. Please read the comment at the
* top of this file for high level overview.
*/
static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
struct cuse_conn *cc = fc_to_cc(fc), *pos;
struct cuse_init_out *arg = req->out.args[0].value;
struct page *page = req->pages[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
dev_t devt;
int rc, i;
if (req->out.h.error ||
arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
goto err;
}
fc->minor = arg->minor;
fc->max_read = max_t(unsigned, arg->max_read, 4096);
fc->max_write = max_t(unsigned, arg->max_write, 4096);
/* parse init reply */
cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
&devinfo);
if (rc)
goto err;
/* determine and reserve devt */
devt = MKDEV(arg->dev_major, arg->dev_minor);
if (!MAJOR(devt))
rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
else
rc = register_chrdev_region(devt, 1, devinfo.name);
if (rc) {
printk(KERN_ERR "CUSE: failed to register chrdev region\n");
goto err;
}
/* devt determined, create device */
rc = -ENOMEM;
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
goto err_region;
device_initialize(dev);
dev_set_uevent_suppress(dev, 1);
dev->class = cuse_class;
dev->devt = devt;
dev->release = cuse_gendev_release;
dev_set_drvdata(dev, cc);
dev_set_name(dev, "%s", devinfo.name);
mutex_lock(&cuse_lock);
/* make sure the device-name is unique */
for (i = 0; i < CUSE_CONNTBL_LEN; ++i) {
list_for_each_entry(pos, &cuse_conntbl[i], list)
if (!strcmp(dev_name(pos->dev), dev_name(dev)))
goto err_unlock;
}
rc = device_add(dev);
if (rc)
goto err_unlock;
/* register cdev */
rc = -ENOMEM;
cdev = cdev_alloc();
if (!cdev)
goto err_unlock;
cdev->owner = THIS_MODULE;
cdev->ops = &cuse_frontend_fops;
rc = cdev_add(cdev, devt, 1);
if (rc)
goto err_cdev;
cc->dev = dev;
cc->cdev = cdev;
/* make the device available */
list_add(&cc->list, cuse_conntbl_head(devt));
mutex_unlock(&cuse_lock);
/* announce device availability */
dev_set_uevent_suppress(dev, 0);
kobject_uevent(&dev->kobj, KOBJ_ADD);
out:
kfree(arg);
__free_page(page);
return;
err_cdev:
cdev_del(cdev);
err_unlock:
mutex_unlock(&cuse_lock);
put_device(dev);
err_region:
unregister_chrdev_region(devt, 1);
err:
fuse_abort_conn(fc, false);
goto out;
}
static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct fuse_req *req;
struct page *page;
struct fuse_conn *fc = &cc->fc;
struct cuse_init_in *arg;
void *outarg;
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
req = fuse_get_req_for_background(fc, 1);
if (IS_ERR(req)) {
rc = PTR_ERR(req);
goto err;
}
rc = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto err_put_req;
outarg = kzalloc(sizeof(struct cuse_init_out), GFP_KERNEL);
if (!outarg)
goto err_free_page;
arg = &req->misc.cuse_init_in;
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
arg->flags |= CUSE_UNRESTRICTED_IOCTL;
req->in.h.opcode = CUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct cuse_init_in);
req->in.args[0].value = arg;
req->out.numargs = 2;
req->out.args[0].size = sizeof(struct cuse_init_out);
req->out.args[0].value = outarg;
req->out.args[1].size = CUSE_INIT_INFO_MAX;
req->out.argvar = 1;
req->out.argpages = 1;
req->pages[0] = page;
req->page_descs[0].length = req->out.args[1].size;
req->num_pages = 1;
req->end = cuse_process_init_reply;
fuse_request_send_background(fc, req);
return 0;
err_free_page:
__free_page(page);
err_put_req:
fuse_put_request(fc, req);
err:
return rc;
}
static void cuse_fc_release(struct fuse_conn *fc)
{
struct cuse_conn *cc = fc_to_cc(fc);
kfree_rcu(cc, fc.rcu);
}
/**
* cuse_channel_open - open method for /dev/cuse
* @inode: inode for /dev/cuse
* @file: file struct being opened
*
* Userland CUSE server can create a CUSE device by opening /dev/cuse
* and replying to the initialization request kernel sends. This
* function is responsible for handling CUSE device initialization.
* Because the fd opened by this function is used during
* initialization, this function only creates cuse_conn and sends
* init. The rest is delegated to a kthread.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
static int cuse_channel_open(struct inode *inode, struct file *file)
{
struct fuse_dev *fud;
struct cuse_conn *cc;
int rc;
/* set up cuse_conn */
cc = kzalloc(sizeof(*cc), GFP_KERNEL);
if (!cc)
return -ENOMEM;
fuse: Support fuse filesystems outside of init_user_ns In order to support mounts from namespaces other than init_user_ns, fuse must translate uids and gids to/from the userns of the process servicing requests on /dev/fuse. This patch does that, with a couple of restrictions on the namespace: - The userns for the fuse connection is fixed to the namespace from which /dev/fuse is opened. - The namespace must be the same as s_user_ns. These restrictions simplify the implementation by avoiding the need to pass around userns references and by allowing fuse to rely on the checks in setattr_prepare for ownership changes. Either restriction could be relaxed in the future if needed. For cuse the userns used is the opener of /dev/cuse. Semantically the cuse support does not appear safe for unprivileged users. Practically the permissions on /dev/cuse only make it accessible to the global root user. If something slips through the cracks in a user namespace the only users who will be able to use the cuse device are those users mapped into the user namespace. Translation in the posix acl is updated to use the uuser namespace of the filesystem. Avoiding cases which might bypass this translation is handled in a following change. This change is stronlgy based on a similar change from Seth Forshee and Dongsu Park. Cc: Seth Forshee <seth.forshee@canonical.com> Cc: Dongsu Park <dongsu@kinvolk.io> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
2018-02-22 01:18:07 +08:00
/*
* Limit the cuse channel to requests that can
* be represented in file->f_cred->user_ns.
*/
fuse_conn_init(&cc->fc, file->f_cred->user_ns);
fud = fuse_dev_alloc(&cc->fc);
if (!fud) {
kfree(cc);
return -ENOMEM;
}
INIT_LIST_HEAD(&cc->list);
cc->fc.release = cuse_fc_release;
cc->fc.initialized = 1;
rc = cuse_send_init(cc);
if (rc) {
fuse_dev_free(fud);
return rc;
}
file->private_data = fud;
return 0;
}
/**
* cuse_channel_release - release method for /dev/cuse
* @inode: inode for /dev/cuse
* @file: file struct being closed
*
* Disconnect the channel, deregister CUSE device and initiate
* destruction by putting the default reference.
*
* RETURNS:
* 0 on success, -errno on failure.
*/
static int cuse_channel_release(struct inode *inode, struct file *file)
{
struct fuse_dev *fud = file->private_data;
struct cuse_conn *cc = fc_to_cc(fud->fc);
int rc;
/* remove from the conntbl, no more access from this point on */
mutex_lock(&cuse_lock);
list_del_init(&cc->list);
mutex_unlock(&cuse_lock);
/* remove device */
if (cc->dev)
device_unregister(cc->dev);
if (cc->cdev) {
unregister_chrdev_region(cc->cdev->dev, 1);
cdev_del(cc->cdev);
}
/* Base reference is now owned by "fud" */
fuse_conn_put(&cc->fc);
rc = fuse_dev_release(inode, file); /* puts the base reference */
return rc;
}
static struct file_operations cuse_channel_fops; /* initialized during init */
/**************************************************************************
* Misc stuff and module initializatiion
*
* CUSE exports the same set of attributes to sysfs as fusectl.
*/
static ssize_t cuse_class_waiting_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cuse_conn *cc = dev_get_drvdata(dev);
return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
}
static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL);
static ssize_t cuse_class_abort_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct cuse_conn *cc = dev_get_drvdata(dev);
fuse_abort_conn(&cc->fc, false);
return count;
}
static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store);
static struct attribute *cuse_class_dev_attrs[] = {
&dev_attr_waiting.attr,
&dev_attr_abort.attr,
NULL,
};
ATTRIBUTE_GROUPS(cuse_class_dev);
static struct miscdevice cuse_miscdev = {
.minor = CUSE_MINOR,
.name = "cuse",
.fops = &cuse_channel_fops,
};
MODULE_ALIAS_MISCDEV(CUSE_MINOR);
MODULE_ALIAS("devname:cuse");
static int __init cuse_init(void)
{
int i, rc;
/* init conntbl */
for (i = 0; i < CUSE_CONNTBL_LEN; i++)
INIT_LIST_HEAD(&cuse_conntbl[i]);
/* inherit and extend fuse_dev_operations */
cuse_channel_fops = fuse_dev_operations;
cuse_channel_fops.owner = THIS_MODULE;
cuse_channel_fops.open = cuse_channel_open;
cuse_channel_fops.release = cuse_channel_release;
cuse_class = class_create(THIS_MODULE, "cuse");
if (IS_ERR(cuse_class))
return PTR_ERR(cuse_class);
cuse_class->dev_groups = cuse_class_dev_groups;
rc = misc_register(&cuse_miscdev);
if (rc) {
class_destroy(cuse_class);
return rc;
}
return 0;
}
static void __exit cuse_exit(void)
{
misc_deregister(&cuse_miscdev);
class_destroy(cuse_class);
}
module_init(cuse_init);
module_exit(cuse_exit);
MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
MODULE_DESCRIPTION("Character device in Userspace");
MODULE_LICENSE("GPL");