mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-11-15 06:05:13 +08:00
[PATCH] r/o bind mounts: elevate write count for open()s
This is the first really tricky patch in the series. It elevates the writer count on a mount each time a non-special file is opened for write. We used to do this in may_open(), but Miklos pointed out that __dentry_open() is used as well to create filps. This will cover even those cases, while a call in may_open() would not have. There is also an elevated count around the vfs_create() call in open_namei(). See the comments for more details, but we need this to fix a 'create, remount, fail r/w open()' race. Some filesystems forego the use of normal vfs calls to create struct files. Make sure that these users elevate the mnt writer count because they will get __fput(), and we need to make sure they're balanced. Acked-by: Al Viro <viro@ZenIV.linux.org.uk> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Hansen <haveblue@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
parent
42a74f206b
commit
4a3fd211cc
@ -199,6 +199,17 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
|
|||||||
file->f_mapping = dentry->d_inode->i_mapping;
|
file->f_mapping = dentry->d_inode->i_mapping;
|
||||||
file->f_mode = mode;
|
file->f_mode = mode;
|
||||||
file->f_op = fop;
|
file->f_op = fop;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These mounts don't really matter in practice
|
||||||
|
* for r/o bind mounts. They aren't userspace-
|
||||||
|
* visible. We do this for consistency, and so
|
||||||
|
* that we can do debugging checks at __fput()
|
||||||
|
*/
|
||||||
|
if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
|
||||||
|
error = mnt_want_write(mnt);
|
||||||
|
WARN_ON(error);
|
||||||
|
}
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL(init_file);
|
EXPORT_SYMBOL(init_file);
|
||||||
@ -221,10 +232,13 @@ EXPORT_SYMBOL(fput);
|
|||||||
*/
|
*/
|
||||||
void drop_file_write_access(struct file *file)
|
void drop_file_write_access(struct file *file)
|
||||||
{
|
{
|
||||||
|
struct vfsmount *mnt = file->f_path.mnt;
|
||||||
struct dentry *dentry = file->f_path.dentry;
|
struct dentry *dentry = file->f_path.dentry;
|
||||||
struct inode *inode = dentry->d_inode;
|
struct inode *inode = dentry->d_inode;
|
||||||
|
|
||||||
put_write_access(inode);
|
put_write_access(inode);
|
||||||
|
if (!special_file(inode->i_mode))
|
||||||
|
mnt_drop_write(mnt);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(drop_file_write_access);
|
EXPORT_SYMBOL_GPL(drop_file_write_access);
|
||||||
|
|
||||||
|
75
fs/namei.c
75
fs/namei.c
@ -1623,8 +1623,7 @@ int may_open(struct nameidata *nd, int acc_mode, int flag)
|
|||||||
return -EACCES;
|
return -EACCES;
|
||||||
|
|
||||||
flag &= ~O_TRUNC;
|
flag &= ~O_TRUNC;
|
||||||
} else if (IS_RDONLY(inode) && (acc_mode & MAY_WRITE))
|
}
|
||||||
return -EROFS;
|
|
||||||
|
|
||||||
error = vfs_permission(nd, acc_mode);
|
error = vfs_permission(nd, acc_mode);
|
||||||
if (error)
|
if (error)
|
||||||
@ -1724,18 +1723,32 @@ static inline int open_to_namei_flags(int flag)
|
|||||||
return flag;
|
return flag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int open_will_write_to_fs(int flag, struct inode *inode)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We'll never write to the fs underlying
|
||||||
|
* a device file.
|
||||||
|
*/
|
||||||
|
if (special_file(inode->i_mode))
|
||||||
|
return 0;
|
||||||
|
return (flag & O_TRUNC);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that the low bits of "flag" aren't the same as in the open
|
* Note that the low bits of the passed in "open_flag"
|
||||||
* system call. See open_to_namei_flags().
|
* are not the same as in the local variable "flag". See
|
||||||
|
* open_to_namei_flags() for more details.
|
||||||
*/
|
*/
|
||||||
struct file *do_filp_open(int dfd, const char *pathname,
|
struct file *do_filp_open(int dfd, const char *pathname,
|
||||||
int open_flag, int mode)
|
int open_flag, int mode)
|
||||||
{
|
{
|
||||||
|
struct file *filp;
|
||||||
struct nameidata nd;
|
struct nameidata nd;
|
||||||
int acc_mode, error;
|
int acc_mode, error;
|
||||||
struct path path;
|
struct path path;
|
||||||
struct dentry *dir;
|
struct dentry *dir;
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
int will_write;
|
||||||
int flag = open_to_namei_flags(open_flag);
|
int flag = open_to_namei_flags(open_flag);
|
||||||
|
|
||||||
acc_mode = ACC_MODE(flag);
|
acc_mode = ACC_MODE(flag);
|
||||||
@ -1791,17 +1804,30 @@ do_last:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (IS_ERR(nd.intent.open.file)) {
|
if (IS_ERR(nd.intent.open.file)) {
|
||||||
mutex_unlock(&dir->d_inode->i_mutex);
|
|
||||||
error = PTR_ERR(nd.intent.open.file);
|
error = PTR_ERR(nd.intent.open.file);
|
||||||
goto exit_dput;
|
goto exit_mutex_unlock;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Negative dentry, just create the file */
|
/* Negative dentry, just create the file */
|
||||||
if (!path.dentry->d_inode) {
|
if (!path.dentry->d_inode) {
|
||||||
error = __open_namei_create(&nd, &path, flag, mode);
|
/*
|
||||||
|
* This write is needed to ensure that a
|
||||||
|
* ro->rw transition does not occur between
|
||||||
|
* the time when the file is created and when
|
||||||
|
* a permanent write count is taken through
|
||||||
|
* the 'struct file' in nameidata_to_filp().
|
||||||
|
*/
|
||||||
|
error = mnt_want_write(nd.path.mnt);
|
||||||
if (error)
|
if (error)
|
||||||
|
goto exit_mutex_unlock;
|
||||||
|
error = __open_namei_create(&nd, &path, flag, mode);
|
||||||
|
if (error) {
|
||||||
|
mnt_drop_write(nd.path.mnt);
|
||||||
goto exit;
|
goto exit;
|
||||||
return nameidata_to_filp(&nd, open_flag);
|
}
|
||||||
|
filp = nameidata_to_filp(&nd, open_flag);
|
||||||
|
mnt_drop_write(nd.path.mnt);
|
||||||
|
return filp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1831,11 +1857,40 @@ do_last:
|
|||||||
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
|
if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
|
||||||
goto exit;
|
goto exit;
|
||||||
ok:
|
ok:
|
||||||
error = may_open(&nd, acc_mode, flag);
|
/*
|
||||||
|
* Consider:
|
||||||
|
* 1. may_open() truncates a file
|
||||||
|
* 2. a rw->ro mount transition occurs
|
||||||
|
* 3. nameidata_to_filp() fails due to
|
||||||
|
* the ro mount.
|
||||||
|
* That would be inconsistent, and should
|
||||||
|
* be avoided. Taking this mnt write here
|
||||||
|
* ensures that (2) can not occur.
|
||||||
|
*/
|
||||||
|
will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
|
||||||
|
if (will_write) {
|
||||||
|
error = mnt_want_write(nd.path.mnt);
|
||||||
if (error)
|
if (error)
|
||||||
goto exit;
|
goto exit;
|
||||||
return nameidata_to_filp(&nd, open_flag);
|
}
|
||||||
|
error = may_open(&nd, acc_mode, flag);
|
||||||
|
if (error) {
|
||||||
|
if (will_write)
|
||||||
|
mnt_drop_write(nd.path.mnt);
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
filp = nameidata_to_filp(&nd, open_flag);
|
||||||
|
/*
|
||||||
|
* It is now safe to drop the mnt write
|
||||||
|
* because the filp has had a write taken
|
||||||
|
* on its behalf.
|
||||||
|
*/
|
||||||
|
if (will_write)
|
||||||
|
mnt_drop_write(nd.path.mnt);
|
||||||
|
return filp;
|
||||||
|
|
||||||
|
exit_mutex_unlock:
|
||||||
|
mutex_unlock(&dir->d_inode->i_mutex);
|
||||||
exit_dput:
|
exit_dput:
|
||||||
path_put_conditional(&path, &nd);
|
path_put_conditional(&path, &nd);
|
||||||
exit:
|
exit:
|
||||||
|
36
fs/open.c
36
fs/open.c
@ -730,6 +730,35 @@ out:
|
|||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* You have to be very careful that these write
|
||||||
|
* counts get cleaned up in error cases and
|
||||||
|
* upon __fput(). This should probably never
|
||||||
|
* be called outside of __dentry_open().
|
||||||
|
*/
|
||||||
|
static inline int __get_file_write_access(struct inode *inode,
|
||||||
|
struct vfsmount *mnt)
|
||||||
|
{
|
||||||
|
int error;
|
||||||
|
error = get_write_access(inode);
|
||||||
|
if (error)
|
||||||
|
return error;
|
||||||
|
/*
|
||||||
|
* Do not take mount writer counts on
|
||||||
|
* special files since no writes to
|
||||||
|
* the mount itself will occur.
|
||||||
|
*/
|
||||||
|
if (!special_file(inode->i_mode)) {
|
||||||
|
/*
|
||||||
|
* Balanced in __fput()
|
||||||
|
*/
|
||||||
|
error = mnt_want_write(mnt);
|
||||||
|
if (error)
|
||||||
|
put_write_access(inode);
|
||||||
|
}
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
|
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
|
||||||
int flags, struct file *f,
|
int flags, struct file *f,
|
||||||
int (*open)(struct inode *, struct file *))
|
int (*open)(struct inode *, struct file *))
|
||||||
@ -742,7 +771,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
|
|||||||
FMODE_PREAD | FMODE_PWRITE;
|
FMODE_PREAD | FMODE_PWRITE;
|
||||||
inode = dentry->d_inode;
|
inode = dentry->d_inode;
|
||||||
if (f->f_mode & FMODE_WRITE) {
|
if (f->f_mode & FMODE_WRITE) {
|
||||||
error = get_write_access(inode);
|
error = __get_file_write_access(inode, mnt);
|
||||||
if (error)
|
if (error)
|
||||||
goto cleanup_file;
|
goto cleanup_file;
|
||||||
}
|
}
|
||||||
@ -784,8 +813,11 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
|
|||||||
|
|
||||||
cleanup_all:
|
cleanup_all:
|
||||||
fops_put(f->f_op);
|
fops_put(f->f_op);
|
||||||
if (f->f_mode & FMODE_WRITE)
|
if (f->f_mode & FMODE_WRITE) {
|
||||||
put_write_access(inode);
|
put_write_access(inode);
|
||||||
|
if (!special_file(inode->i_mode))
|
||||||
|
mnt_drop_write(mnt);
|
||||||
|
}
|
||||||
file_kill(f);
|
file_kill(f);
|
||||||
f->f_path.dentry = NULL;
|
f->f_path.dentry = NULL;
|
||||||
f->f_path.mnt = NULL;
|
f->f_path.mnt = NULL;
|
||||||
|
16
ipc/mqueue.c
16
ipc/mqueue.c
@ -598,6 +598,7 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
|
|||||||
int oflag, mode_t mode, struct mq_attr __user *u_attr)
|
int oflag, mode_t mode, struct mq_attr __user *u_attr)
|
||||||
{
|
{
|
||||||
struct mq_attr attr;
|
struct mq_attr attr;
|
||||||
|
struct file *result;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (u_attr) {
|
if (u_attr) {
|
||||||
@ -612,13 +613,24 @@ static struct file *do_create(struct dentry *dir, struct dentry *dentry,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mode &= ~current->fs->umask;
|
mode &= ~current->fs->umask;
|
||||||
|
ret = mnt_want_write(mqueue_mnt);
|
||||||
|
if (ret)
|
||||||
|
goto out;
|
||||||
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
|
ret = vfs_create(dir->d_inode, dentry, mode, NULL);
|
||||||
dentry->d_fsdata = NULL;
|
dentry->d_fsdata = NULL;
|
||||||
if (ret)
|
if (ret)
|
||||||
goto out;
|
goto out_drop_write;
|
||||||
|
|
||||||
return dentry_open(dentry, mqueue_mnt, oflag);
|
result = dentry_open(dentry, mqueue_mnt, oflag);
|
||||||
|
/*
|
||||||
|
* dentry_open() took a persistent mnt_want_write(),
|
||||||
|
* so we can now drop this one.
|
||||||
|
*/
|
||||||
|
mnt_drop_write(mqueue_mnt);
|
||||||
|
return result;
|
||||||
|
|
||||||
|
out_drop_write:
|
||||||
|
mnt_drop_write(mqueue_mnt);
|
||||||
out:
|
out:
|
||||||
dput(dentry);
|
dput(dentry);
|
||||||
mntput(mqueue_mnt);
|
mntput(mqueue_mnt);
|
||||||
|
Loading…
Reference in New Issue
Block a user