linux/fs/ceph/acl.c

266 lines
5.8 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/ceph/acl.c
*
* Copyright (C) 2013 Guangliang Zhao, <lucienchao@gmail.com>
*/
#include <linux/ceph/ceph_debug.h>
#include <linux/fs.h>
#include <linux/string.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
#include <linux/posix_acl.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include "super.h"
#include "mds_client.h"
static inline void ceph_set_cached_acl(struct inode *inode,
int type, struct posix_acl *acl)
{
struct ceph_inode_info *ci = ceph_inode(inode);
spin_lock(&ci->i_ceph_lock);
if (__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 0))
set_cached_acl(inode, type, acl);
posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2016-03-24 21:38:37 +08:00
else
forget_cached_acl(inode, type);
spin_unlock(&ci->i_ceph_lock);
}
struct posix_acl *ceph_get_acl(struct inode *inode, int type, bool rcu)
{
struct ceph_client *cl = ceph_inode_to_client(inode);
int size;
unsigned int retry_cnt = 0;
const char *name;
char *value = NULL;
struct posix_acl *acl;
if (rcu)
return ERR_PTR(-ECHILD);
switch (type) {
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
break;
case ACL_TYPE_DEFAULT:
name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
BUG();
}
retry:
size = __ceph_getxattr(inode, name, "", 0);
if (size > 0) {
value = kzalloc(size, GFP_NOFS);
if (!value)
return ERR_PTR(-ENOMEM);
size = __ceph_getxattr(inode, name, value, size);
}
if (size == -ERANGE && retry_cnt < 10) {
retry_cnt++;
kfree(value);
value = NULL;
goto retry;
}
if (size > 0) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
} else if (size == -ENODATA || size == 0) {
acl = NULL;
} else {
pr_err_ratelimited_client(cl, "%llx.%llx failed, err=%d\n",
ceph_vinop(inode), size);
acl = ERR_PTR(-EIO);
}
kfree(value);
if (!IS_ERR(acl))
ceph_set_cached_acl(inode, type, acl);
return acl;
}
int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
struct posix_acl *acl, int type)
{
int ret = 0, size = 0;
const char *name = NULL;
char *value = NULL;
struct iattr newattrs;
fs: pass dentry to set acl method The current way of setting and getting posix acls through the generic xattr interface is error prone and type unsafe. The vfs needs to interpret and fixup posix acls before storing or reporting it to userspace. Various hacks exist to make this work. The code is hard to understand and difficult to maintain in it's current form. Instead of making this work by hacking posix acls through xattr handlers we are building a dedicated posix acl api around the get and set inode operations. This removes a lot of hackiness and makes the codepaths easier to maintain. A lot of background can be found in [1]. Since some filesystem rely on the dentry being available to them when setting posix acls (e.g., 9p and cifs) they cannot rely on set acl inode operation. But since ->set_acl() is required in order to use the generic posix acl xattr handlers filesystems that do not implement this inode operation cannot use the handler and need to implement their own dedicated posix acl handlers. Update the ->set_acl() inode method to take a dentry argument. This allows all filesystems to rely on ->set_acl(). As far as I can tell all codepaths can be switched to rely on the dentry instead of just the inode. Note that the original motivation for passing the dentry separate from the inode instead of just the dentry in the xattr handlers was because of security modules that call security_d_instantiate(). This hook is called during d_instantiate_new(), d_add(), __d_instantiate_anon(), and d_splice_alias() to initialize the inode's security context and possibly to set security.* xattrs. Since this only affects security.* xattrs this is completely irrelevant for posix acls. Link: https://lore.kernel.org/all/20220801145520.1532837-1-brauner@kernel.org [1] Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
2022-09-23 16:29:39 +08:00
struct inode *inode = d_inode(dentry);
struct timespec64 old_ctime = inode_get_ctime(inode);
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
if (ceph_snap(inode) != CEPH_NOSNAP) {
ret = -EROFS;
goto out;
}
switch (type) {
case ACL_TYPE_ACCESS:
name = XATTR_NAME_POSIX_ACL_ACCESS;
if (acl) {
ret = posix_acl_update_mode(idmap, inode,
acl: handle idmapped mounts The posix acl permission checking helpers determine whether a caller is privileged over an inode according to the acls associated with the inode. Add helpers that make it possible to handle acls on idmapped mounts. The vfs and the filesystems targeted by this first iteration make use of posix_acl_fix_xattr_from_user() and posix_acl_fix_xattr_to_user() to translate basic posix access and default permissions such as the ACL_USER and ACL_GROUP type according to the initial user namespace (or the superblock's user namespace) to and from the caller's current user namespace. Adapt these two helpers to handle idmapped mounts whereby we either map from or into the mount's user namespace depending on in which direction we're translating. Similarly, cap_convert_nscap() is used by the vfs to translate user namespace and non-user namespace aware filesystem capabilities from the superblock's user namespace to the caller's user namespace. Enable it to handle idmapped mounts by accounting for the mount's user namespace. In addition the fileystems targeted in the first iteration of this patch series make use of the posix_acl_chmod() and, posix_acl_update_mode() helpers. Both helpers perform permission checks on the target inode. Let them handle idmapped mounts. These two helpers are called when posix acls are set by the respective filesystems to handle this case we extend the ->set() method to take an additional user namespace argument to pass the mount's user namespace down. Link: https://lore.kernel.org/r/20210121131959.646623-9-christian.brauner@ubuntu.com Cc: Christoph Hellwig <hch@lst.de> Cc: David Howells <dhowells@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
2021-01-21 21:19:27 +08:00
&new_mode, &acl);
if (ret)
goto out;
}
break;
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode)) {
ret = acl ? -EINVAL : 0;
goto out;
}
name = XATTR_NAME_POSIX_ACL_DEFAULT;
break;
default:
ret = -EINVAL;
goto out;
}
if (acl) {
size = posix_acl_xattr_size(acl->a_count);
value = kmalloc(size, GFP_NOFS);
if (!value) {
ret = -ENOMEM;
goto out;
}
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
if (ret < 0)
goto out_free;
}
if (new_mode != old_mode) {
newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
ret = __ceph_setattr(idmap, inode, &newattrs, NULL);
if (ret)
goto out_free;
}
ret = __ceph_setxattr(inode, name, value, size, 0);
if (ret) {
if (new_mode != old_mode) {
newattrs.ia_ctime = old_ctime;
newattrs.ia_mode = old_mode;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
__ceph_setattr(idmap, inode, &newattrs, NULL);
}
goto out_free;
}
ceph_set_cached_acl(inode, type, acl);
out_free:
kfree(value);
out:
return ret;
}
int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
struct ceph_acl_sec_ctx *as_ctx)
{
struct posix_acl *acl, *default_acl;
size_t val_size1 = 0, val_size2 = 0;
struct ceph_pagelist *pagelist = NULL;
void *tmp_buf = NULL;
int err;
err = posix_acl_create(dir, mode, &default_acl, &acl);
if (err)
return err;
if (acl) {
err = posix_acl_equiv_mode(acl, mode);
if (err < 0)
goto out_err;
if (err == 0) {
posix_acl_release(acl);
acl = NULL;
}
}
if (!default_acl && !acl)
return 0;
if (acl)
val_size1 = posix_acl_xattr_size(acl->a_count);
if (default_acl)
val_size2 = posix_acl_xattr_size(default_acl->a_count);
err = -ENOMEM;
tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
if (!tmp_buf)
goto out_err;
pagelist = ceph_pagelist_alloc(GFP_KERNEL);
if (!pagelist)
goto out_err;
err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
if (err)
goto out_err;
ceph_pagelist_encode_32(pagelist, acl && default_acl ? 2 : 1);
if (acl) {
size_t len = strlen(XATTR_NAME_POSIX_ACL_ACCESS);
err = ceph_pagelist_reserve(pagelist, len + val_size1 + 8);
if (err)
goto out_err;
ceph_pagelist_encode_string(pagelist, XATTR_NAME_POSIX_ACL_ACCESS,
len);
err = posix_acl_to_xattr(&init_user_ns, acl,
tmp_buf, val_size1);
if (err < 0)
goto out_err;
ceph_pagelist_encode_32(pagelist, val_size1);
ceph_pagelist_append(pagelist, tmp_buf, val_size1);
}
if (default_acl) {
size_t len = strlen(XATTR_NAME_POSIX_ACL_DEFAULT);
err = ceph_pagelist_reserve(pagelist, len + val_size2 + 8);
if (err)
goto out_err;
ceph_pagelist_encode_string(pagelist,
XATTR_NAME_POSIX_ACL_DEFAULT, len);
err = posix_acl_to_xattr(&init_user_ns, default_acl,
tmp_buf, val_size2);
if (err < 0)
goto out_err;
ceph_pagelist_encode_32(pagelist, val_size2);
ceph_pagelist_append(pagelist, tmp_buf, val_size2);
}
kfree(tmp_buf);
as_ctx->acl = acl;
as_ctx->default_acl = default_acl;
as_ctx->pagelist = pagelist;
return 0;
out_err:
posix_acl_release(acl);
posix_acl_release(default_acl);
kfree(tmp_buf);
if (pagelist)
ceph_pagelist_release(pagelist);
return err;
}
void ceph_init_inode_acls(struct inode *inode, struct ceph_acl_sec_ctx *as_ctx)
{
if (!inode)
return;
ceph_set_cached_acl(inode, ACL_TYPE_ACCESS, as_ctx->acl);
ceph_set_cached_acl(inode, ACL_TYPE_DEFAULT, as_ctx->default_acl);
}