From cc3e8969ffb2ce2560d0206b688a59bb4bf0fcc3 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:18 +0200 Subject: [PATCH 01/11] fs/namespace: introduce SB_I_NOIDMAP flag Right now we determine if filesystem support vfs idmappings or not basing on the FS_ALLOW_IDMAP flag presence. This "static" way works perfecly well for local filesystems like ext4, xfs, btrfs, etc. But for network-like filesystems like fuse, cephfs this approach is not ideal, because sometimes proper support of vfs idmaps requires some extensions for the on-wire protocol, which implies that changes have to be made not only in the Linux kernel code but also in the 3rd party components like libfuse, cephfs MDS server and so on. We have seen that issue during our work on cephfs idmapped mounts [1] with Christian, but right now I'm working on the idmapped mounts support for fuse/virtiofs and I think that it is a right time for this extension. [1] 5ccd8530dd7 ("ceph: handle idmapped mounts in create_request_message()") Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Suggested-by: Christian Brauner Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-2-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/namespace.c | 4 ++++ include/linux/fs.h | 1 + 2 files changed, 5 insertions(+) diff --git a/fs/namespace.c b/fs/namespace.c index 328087a4df8a..d1702285c915 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4436,6 +4436,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) return -EINVAL; + /* The filesystem has turned off idmapped mounts. */ + if (m->mnt_sb->s_iflags & SB_I_NOIDMAP) + return -EINVAL; + /* We're not controlling the superblock. */ if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) return -EPERM; diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..6ff547ef21f2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1189,6 +1189,7 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_TS_EXPIRY_WARNED 0x00000400 /* warned about timestamp range expiry */ #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ +#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ /* Possible states of 'frozen' field */ enum { From d2c5937035e563bf1bf5c1a8b0ef7a1958f0028e Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:19 +0200 Subject: [PATCH 02/11] fs/fuse: add FUSE_OWNER_UID_GID_EXT extension To properly support vfs idmappings we need to provide a fuse daemon with the correct owner uid/gid for inode creation requests like mkdir, mknod, atomic_open, symlink. Right now, fuse daemons use req->in.h.uid/req->in.h.gid to set inode owner. These fields contain fsuid/fsgid of the syscall's caller. And that's perfectly fine, because inode owner have to be set to these values. But, for idmapped mounts it's not the case and caller fsuid/fsgid != inode owner, because idmapped mounts do nothing with the caller fsuid/fsgid, but affect inode owner uid/gid. It means that we can't apply vfsid mapping to caller fsuid/fsgid, but instead we have to introduce a new fields to store inode owner uid/gid which will be appropriately transformed. Christian and I have done the same to support idmapped mounts in the cephfs recently [1]. [1] 5ccd8530 ("ceph: handle idmapped mounts in create_request_message()") Cc: Miklos Szeredi Cc: Christian Brauner Cc: Seth Forshee Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-3-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 34 +++++++++++++++++++++++++++++++--- fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 4 +++- include/uapi/linux/fuse.h | 19 +++++++++++++++++++ 4 files changed, 56 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 2b0d4781f394..30d27d4f3b5a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -572,7 +572,33 @@ static int get_create_supp_group(struct inode *dir, struct fuse_in_arg *ext) return 0; } -static int get_create_ext(struct fuse_args *args, +static int get_owner_uid_gid(struct mnt_idmap *idmap, struct fuse_conn *fc, struct fuse_in_arg *ext) +{ + struct fuse_ext_header *xh; + struct fuse_owner_uid_gid *owner_creds; + u32 owner_creds_len = fuse_ext_size(sizeof(*owner_creds)); + kuid_t owner_fsuid; + kgid_t owner_fsgid; + + xh = extend_arg(ext, owner_creds_len); + if (!xh) + return -ENOMEM; + + xh->size = owner_creds_len; + xh->type = FUSE_EXT_OWNER_UID_GID; + + owner_creds = (struct fuse_owner_uid_gid *) &xh[1]; + + owner_fsuid = mapped_fsuid(idmap, fc->user_ns); + owner_fsgid = mapped_fsgid(idmap, fc->user_ns); + owner_creds->uid = from_kuid(fc->user_ns, owner_fsuid); + owner_creds->gid = from_kgid(fc->user_ns, owner_fsgid); + + return 0; +} + +static int get_create_ext(struct mnt_idmap *idmap, + struct fuse_args *args, struct inode *dir, struct dentry *dentry, umode_t mode) { @@ -584,6 +610,8 @@ static int get_create_ext(struct fuse_args *args, err = get_security_context(dentry, mode, &ext); if (!err && fc->create_supp_group) err = get_create_supp_group(dir, &ext); + if (!err && fc->owner_uid_gid_ext) + err = get_owner_uid_gid(idmap, fc, &ext); if (!err && ext.size) { WARN_ON(args->in_numargs >= ARRAY_SIZE(args->in_args)); @@ -668,7 +696,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out_args[1].size = sizeof(*outopenp); args.out_args[1].value = outopenp; - err = get_create_ext(&args, dir, entry, mode); + err = get_create_ext(&nop_mnt_idmap, &args, dir, entry, mode); if (err) goto out_put_forget_req; @@ -798,7 +826,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, args->out_args[0].value = &outarg; if (args->opcode != FUSE_LINK) { - err = get_create_ext(args, dir, entry, mode); + err = get_create_ext(&nop_mnt_idmap, args, dir, entry, mode); if (err) goto out_put_forget_req; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index f23919610313..d06934e70cc5 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -845,6 +845,9 @@ struct fuse_conn { /* Add supplementary group info when creating a new inode */ unsigned int create_supp_group:1; + /* Add owner_{u,g}id info when creating a new inode */ + unsigned int owner_uid_gid_ext:1; + /* Does the filesystem support per inode DAX? */ unsigned int inode_dax:1; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d8ab4e93916f..6c205731c844 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1343,6 +1343,8 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, } if (flags & FUSE_NO_EXPORT_SUPPORT) fm->sb->s_export_op = &fuse_export_fid_operations; + if (flags & FUSE_OWNER_UID_GID_EXT) + fc->owner_uid_gid_ext = 1; } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1390,7 +1392,7 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | - FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND; + FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_OWNER_UID_GID_EXT; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d08b99d60f6f..d9ecc17fd13b 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -217,6 +217,10 @@ * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag * - add FUSE_NO_EXPORT_SUPPORT init flag * - add FUSE_NOTIFY_RESEND, add FUSE_HAS_RESEND init flag + * + * 7.41 + * - add FUSE_EXT_OWNER_UID_GID + * - add FUSE_OWNER_UID_GID_EXT */ #ifndef _LINUX_FUSE_H @@ -421,6 +425,8 @@ struct fuse_file_lock { * FUSE_NO_EXPORT_SUPPORT: explicitly disable export support * FUSE_HAS_RESEND: kernel supports resending pending requests, and the high bit * of the request ID indicates resend requests + * FUSE_OWNER_UID_GID_EXT: add inode owner UID/GID info to create, mkdir, + * symlink and mknod */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -466,6 +472,7 @@ struct fuse_file_lock { /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP +#define FUSE_OWNER_UID_GID_EXT (1ULL << 40) /** * CUSE INIT request/reply flags @@ -575,11 +582,13 @@ struct fuse_file_lock { * extension type * FUSE_MAX_NR_SECCTX: maximum value of &fuse_secctx_header.nr_secctx * FUSE_EXT_GROUPS: &fuse_supp_groups extension + * FUSE_EXT_OWNER_UID_GID: &fuse_owner_uid_gid extension */ enum fuse_ext_type { /* Types 0..31 are reserved for fuse_secctx_header */ FUSE_MAX_NR_SECCTX = 31, FUSE_EXT_GROUPS = 32, + FUSE_EXT_OWNER_UID_GID = 33, }; enum fuse_opcode { @@ -1186,4 +1195,14 @@ struct fuse_supp_groups { uint32_t groups[]; }; +/** + * struct fuse_owner_uid_gid - Inode owner UID/GID extension + * @uid: inode owner UID + * @gid: inode owner GID + */ +struct fuse_owner_uid_gid { + uint32_t uid; + uint32_t gid; +}; + #endif /* _LINUX_FUSE_H */ From 9961d396252bd9e45ee883654626ad4e5d1953c5 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:20 +0200 Subject: [PATCH 03/11] fs/fuse: support idmap for mkdir/mknod/symlink/create We have all the infrastructure in place, we just need to pass an idmapping here. Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-4-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 30d27d4f3b5a..1e45c6157af4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -637,9 +637,9 @@ static void free_ext_value(struct fuse_args *args) * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ -static int fuse_create_open(struct inode *dir, struct dentry *entry, - struct file *file, unsigned int flags, - umode_t mode, u32 opcode) +static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, + struct dentry *entry, struct file *file, + unsigned int flags, umode_t mode, u32 opcode) { int err; struct inode *inode; @@ -696,7 +696,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, args.out_args[1].size = sizeof(*outopenp); args.out_args[1].value = outopenp; - err = get_create_ext(&nop_mnt_idmap, &args, dir, entry, mode); + err = get_create_ext(idmap, &args, dir, entry, mode); if (err) goto out_put_forget_req; @@ -757,6 +757,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, umode_t mode) { int err; + struct mnt_idmap *idmap = file_mnt_idmap(file); struct fuse_conn *fc = get_fuse_conn(dir); struct dentry *res = NULL; @@ -781,7 +782,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fc->no_create) goto mknod; - err = fuse_create_open(dir, entry, file, flags, mode, FUSE_CREATE); + err = fuse_create_open(idmap, dir, entry, file, flags, mode, FUSE_CREATE); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; @@ -792,7 +793,7 @@ out_dput: return err; mknod: - err = fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); + err = fuse_mknod(idmap, dir, entry, mode, 0); if (err) goto out_dput; no_open: @@ -802,9 +803,9 @@ no_open: /* * Code shared between mknod, mkdir, symlink and link */ -static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, - struct inode *dir, struct dentry *entry, - umode_t mode) +static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm, + struct fuse_args *args, struct inode *dir, + struct dentry *entry, umode_t mode) { struct fuse_entry_out outarg; struct inode *inode; @@ -826,7 +827,7 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args, args->out_args[0].value = &outarg; if (args->opcode != FUSE_LINK) { - err = get_create_ext(&nop_mnt_idmap, args, dir, entry, mode); + err = get_create_ext(idmap, args, dir, entry, mode); if (err) goto out_put_forget_req; } @@ -892,13 +893,13 @@ static int fuse_mknod(struct mnt_idmap *idmap, struct inode *dir, args.in_args[0].value = &inarg; args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; - return create_new_entry(fm, &args, dir, entry, mode); + return create_new_entry(idmap, fm, &args, dir, entry, mode); } static int fuse_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *entry, umode_t mode, bool excl) { - return fuse_mknod(&nop_mnt_idmap, dir, entry, mode, 0); + return fuse_mknod(idmap, dir, entry, mode, 0); } static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, @@ -910,7 +911,7 @@ static int fuse_tmpfile(struct mnt_idmap *idmap, struct inode *dir, if (fc->no_tmpfile) return -EOPNOTSUPP; - err = fuse_create_open(dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE); + err = fuse_create_open(idmap, dir, file->f_path.dentry, file, file->f_flags, mode, FUSE_TMPFILE); if (err == -ENOSYS) { fc->no_tmpfile = 1; err = -EOPNOTSUPP; @@ -937,7 +938,7 @@ static int fuse_mkdir(struct mnt_idmap *idmap, struct inode *dir, args.in_args[0].value = &inarg; args.in_args[1].size = entry->d_name.len + 1; args.in_args[1].value = entry->d_name.name; - return create_new_entry(fm, &args, dir, entry, S_IFDIR); + return create_new_entry(idmap, fm, &args, dir, entry, S_IFDIR); } static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, @@ -953,7 +954,7 @@ static int fuse_symlink(struct mnt_idmap *idmap, struct inode *dir, args.in_args[0].value = entry->d_name.name; args.in_args[1].size = len; args.in_args[1].value = link; - return create_new_entry(fm, &args, dir, entry, S_IFLNK); + return create_new_entry(idmap, fm, &args, dir, entry, S_IFLNK); } void fuse_flush_time_update(struct inode *inode) @@ -1147,7 +1148,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, args.in_args[0].value = &inarg; args.in_args[1].size = newent->d_name.len + 1; args.in_args[1].value = newent->d_name.name; - err = create_new_entry(fm, &args, newdir, newent, inode->i_mode); + err = create_new_entry(&nop_mnt_idmap, fm, &args, newdir, newent, inode->i_mode); if (!err) fuse_update_ctime_in_cache(inode); else if (err == -EINTR) From 52dfd148ff75e75842e233135e128eb3fdc33cb2 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:21 +0200 Subject: [PATCH 04/11] fs/fuse: support idmapped getattr inode op We have to: - pass an idmapping to the generic_fillattr() to properly handle UIG/GID mapping for the userspace. - pass -/- to fuse_fillattr() (analog of generic_fillattr() in fuse). Difference between these two is that generic_fillattr() takes all the stat() data from the inode directly, while fuse_fillattr() codepath takes a fresh data just from the userspace reply on the FUSE_GETATTR request. In some cases we can just pass &nop_mnt_idmap, because idmapping won't be used in these codepaths. For example, when 3rd argument of fuse_do_getattr() is NULL then idmap argument is not used. Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-5-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 1e45c6157af4..a5bf8c18a0ae 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1157,18 +1157,22 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, return err; } -static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, - struct kstat *stat) +static void fuse_fillattr(struct mnt_idmap *idmap, struct inode *inode, + struct fuse_attr *attr, struct kstat *stat) { unsigned int blkbits; struct fuse_conn *fc = get_fuse_conn(inode); + vfsuid_t vfsuid = make_vfsuid(idmap, fc->user_ns, + make_kuid(fc->user_ns, attr->uid)); + vfsgid_t vfsgid = make_vfsgid(idmap, fc->user_ns, + make_kgid(fc->user_ns, attr->gid)); stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); stat->nlink = attr->nlink; - stat->uid = make_kuid(fc->user_ns, attr->uid); - stat->gid = make_kgid(fc->user_ns, attr->gid); + stat->uid = vfsuid_into_kuid(vfsuid); + stat->gid = vfsgid_into_kgid(vfsgid); stat->rdev = inode->i_rdev; stat->atime.tv_sec = attr->atime; stat->atime.tv_nsec = attr->atimensec; @@ -1207,8 +1211,8 @@ static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr) attr->blksize = sx->blksize; } -static int fuse_do_statx(struct inode *inode, struct file *file, - struct kstat *stat) +static int fuse_do_statx(struct mnt_idmap *idmap, struct inode *inode, + struct file *file, struct kstat *stat) { int err; struct fuse_attr attr; @@ -1261,15 +1265,15 @@ static int fuse_do_statx(struct inode *inode, struct file *file, stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME); stat->btime.tv_sec = sx->btime.tv_sec; stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1); - fuse_fillattr(inode, &attr, stat); + fuse_fillattr(idmap, inode, &attr, stat); stat->result_mask |= STATX_TYPE; } return 0; } -static int fuse_do_getattr(struct inode *inode, struct kstat *stat, - struct file *file) +static int fuse_do_getattr(struct mnt_idmap *idmap, struct inode *inode, + struct kstat *stat, struct file *file) { int err; struct fuse_getattr_in inarg; @@ -1308,15 +1312,15 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, ATTR_TIMEOUT(&outarg), attr_version); if (stat) - fuse_fillattr(inode, &outarg.attr, stat); + fuse_fillattr(idmap, inode, &outarg.attr, stat); } } return err; } -static int fuse_update_get_attr(struct inode *inode, struct file *file, - struct kstat *stat, u32 request_mask, - unsigned int flags) +static int fuse_update_get_attr(struct mnt_idmap *idmap, struct inode *inode, + struct file *file, struct kstat *stat, + u32 request_mask, unsigned int flags) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); @@ -1347,17 +1351,17 @@ retry: forget_all_cached_acls(inode); /* Try statx if BTIME is requested */ if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) { - err = fuse_do_statx(inode, file, stat); + err = fuse_do_statx(idmap, inode, file, stat); if (err == -ENOSYS) { fc->no_statx = 1; err = 0; goto retry; } } else { - err = fuse_do_getattr(inode, stat, file); + err = fuse_do_getattr(idmap, inode, stat, file); } } else if (stat) { - generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); + generic_fillattr(idmap, request_mask, inode, stat); stat->mode = fi->orig_i_mode; stat->ino = fi->orig_ino; if (test_bit(FUSE_I_BTIME, &fi->state)) { @@ -1371,7 +1375,7 @@ retry: int fuse_update_attributes(struct inode *inode, struct file *file, u32 mask) { - return fuse_update_get_attr(inode, file, NULL, mask, 0); + return fuse_update_get_attr(&nop_mnt_idmap, inode, file, NULL, mask, 0); } int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, @@ -1515,7 +1519,7 @@ static int fuse_perm_getattr(struct inode *inode, int mask) return -ECHILD; forget_all_cached_acls(inode); - return fuse_do_getattr(inode, NULL, NULL); + return fuse_do_getattr(&nop_mnt_idmap, inode, NULL, NULL); } /* @@ -2094,7 +2098,7 @@ static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry, * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. */ - ret = fuse_do_getattr(inode, NULL, file); + ret = fuse_do_getattr(idmap, inode, NULL, file); if (ret) return ret; @@ -2151,7 +2155,7 @@ static int fuse_getattr(struct mnt_idmap *idmap, return -EACCES; } - return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); + return fuse_update_get_attr(idmap, inode, NULL, stat, request_mask, flags); } static const struct inode_operations fuse_dir_inode_operations = { From 34ddf0de71be7aa332af42bbfe0cd2a144c61bee Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:22 +0200 Subject: [PATCH 05/11] fs/fuse: support idmapped ->permission inode op We only cover the case when "default_permissions" flag is used. A reason for that is that otherwise all the permission checks are done in the userspace and we have to deal with VFS idmapping in the userspace (which is bad), alternatively we have to provide the userspace with idmapped req->in.h.uid/req->in.h.gid which is also not align with VFS idmaps philosophy. Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-6-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index a5bf8c18a0ae..cd3b91b60cae 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1567,7 +1567,7 @@ static int fuse_permission(struct mnt_idmap *idmap, } if (fc->default_permissions) { - err = generic_permission(&nop_mnt_idmap, inode, mask); + err = generic_permission(idmap, inode, mask); /* If permission is denied, try to refresh file attributes. This is also needed, because the root @@ -1575,7 +1575,7 @@ static int fuse_permission(struct mnt_idmap *idmap, if (err == -EACCES && !refreshed) { err = fuse_perm_getattr(inode, mask); if (!err) - err = generic_permission(&nop_mnt_idmap, + err = generic_permission(idmap, inode, mask); } From 27b622529cdcbbcb37da189c20e8288741dc31ea Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:23 +0200 Subject: [PATCH 06/11] fs/fuse: support idmapped ->setattr op Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-7-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 32 +++++++++++++++++++++----------- fs/fuse/file.c | 2 +- fs/fuse/fuse_i.h | 4 ++-- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index cd3b91b60cae..c50f951596dd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1771,17 +1771,27 @@ static bool update_mtime(unsigned ivalid, bool trust_local_mtime) return true; } -static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, - struct fuse_setattr_in *arg, bool trust_local_cmtime) +static void iattr_to_fattr(struct mnt_idmap *idmap, struct fuse_conn *fc, + struct iattr *iattr, struct fuse_setattr_in *arg, + bool trust_local_cmtime) { unsigned ivalid = iattr->ia_valid; if (ivalid & ATTR_MODE) arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; - if (ivalid & ATTR_UID) - arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); - if (ivalid & ATTR_GID) - arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); + + if (ivalid & ATTR_UID) { + kuid_t fsuid = from_vfsuid(idmap, fc->user_ns, iattr->ia_vfsuid); + arg->valid |= FATTR_UID; + arg->uid = from_kuid(fc->user_ns, fsuid); + } + + if (ivalid & ATTR_GID) { + kgid_t fsgid = from_vfsgid(idmap, fc->user_ns, iattr->ia_vfsgid); + arg->valid |= FATTR_GID; + arg->gid = from_kgid(fc->user_ns, fsgid); + } + if (ivalid & ATTR_SIZE) arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; if (ivalid & ATTR_ATIME) { @@ -1901,8 +1911,8 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff) * vmtruncate() doesn't allow for this case, so do the rlimit checking * and the actual truncation by hand. */ -int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, - struct file *file) +int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr, struct file *file) { struct inode *inode = d_inode(dentry); struct fuse_mount *fm = get_fuse_mount(inode); @@ -1922,7 +1932,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (!fc->default_permissions) attr->ia_valid |= ATTR_FORCE; - err = setattr_prepare(&nop_mnt_idmap, dentry, attr); + err = setattr_prepare(idmap, dentry, attr); if (err) return err; @@ -1981,7 +1991,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); - iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime); + iattr_to_fattr(idmap, fc, attr, &inarg, trust_local_cmtime); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; @@ -2116,7 +2126,7 @@ static int fuse_setattr(struct mnt_idmap *idmap, struct dentry *entry, if (!attr->ia_valid) return 0; - ret = fuse_do_setattr(entry, attr, file); + ret = fuse_do_setattr(idmap, entry, attr, file); if (!ret) { /* * If filesystem supports acls it may have updated acl xattrs in diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f39456c65ed7..562bdf8d5976 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2966,7 +2966,7 @@ static void fuse_do_truncate(struct file *file) attr.ia_file = file; attr.ia_valid |= ATTR_FILE; - fuse_do_setattr(file_dentry(file), &attr, file); + fuse_do_setattr(file_mnt_idmap(file), file_dentry(file), &attr, file); } static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off) diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index d06934e70cc5..883151a44d72 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -1333,8 +1333,8 @@ bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); int fuse_flush_times(struct inode *inode, struct fuse_file *ff); int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); -int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, - struct file *file); +int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, + struct iattr *attr, struct file *file); void fuse_set_initialized(struct fuse_conn *fc); From 6d8f2f4fde1377605b0f1521e81ec31c135fe6c2 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:24 +0200 Subject: [PATCH 07/11] fs/fuse: drop idmap argument from __fuse_get_acl We don't need to have idmap in the __fuse_get_acl as we don't have any use for it. In the current POSIX ACL implementation, idmapped mounts are taken into account on the userspace/kernel border (see vfs_set_acl_idmapped_mnt() and vfs_posix_acl_to_xattr()). Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-8-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/acl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 04cfd8fee992..897d813c5e92 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -12,7 +12,6 @@ #include static struct posix_acl *__fuse_get_acl(struct fuse_conn *fc, - struct mnt_idmap *idmap, struct inode *inode, int type, bool rcu) { int size; @@ -74,7 +73,7 @@ struct posix_acl *fuse_get_acl(struct mnt_idmap *idmap, if (fuse_no_acl(fc, inode)) return ERR_PTR(-EOPNOTSUPP); - return __fuse_get_acl(fc, idmap, inode, type, false); + return __fuse_get_acl(fc, inode, type, false); } struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) @@ -90,8 +89,7 @@ struct posix_acl *fuse_get_inode_acl(struct inode *inode, int type, bool rcu) */ if (!fc->posix_acl) return NULL; - - return __fuse_get_acl(fc, &nop_mnt_idmap, inode, type, rcu); + return __fuse_get_acl(fc, inode, type, rcu); } int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, From ab7c30987cbb50914ea9774fbe3713225748eed5 Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:25 +0200 Subject: [PATCH 08/11] fs/fuse: support idmapped ->set_acl It's just a matter of adjusting a permission check condition for S_ISGID flag. All the rest is already handled in the generic VFS code. Notice that this permission check is the analog of what we have in posix_acl_update_mode() generic helper, but fuse doesn't use this helper as on the kernel side we don't care about ensuring that POSIX ACL and CHMOD permissions are in sync as it is a responsibility of a userspace daemon to handle that. For the same reason we don't have a calls to posix_acl_chmod(), while most of other filesystem do. Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-9-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/acl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c index 897d813c5e92..8f484b105f13 100644 --- a/fs/fuse/acl.c +++ b/fs/fuse/acl.c @@ -144,8 +144,8 @@ int fuse_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, * be stripped. */ if (fc->posix_acl && - !in_group_or_capable(&nop_mnt_idmap, inode, - i_gid_into_vfsgid(&nop_mnt_idmap, inode))) + !in_group_or_capable(idmap, inode, + i_gid_into_vfsgid(idmap, inode))) extra_flags |= FUSE_SETXATTR_ACL_KILL_SGID; ret = fuse_setxattr(inode, name, value, size, 0, extra_flags); From 76c0baad3782c20060eac12b630230cefd412fdb Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:26 +0200 Subject: [PATCH 09/11] fs/fuse: properly handle idmapped ->rename op Support of RENAME_WHITEOUT with idmapped mounts requires an API extension for FUSE_RENAME2. Let's just forbid this combination for now. It's not critical at all as it's only needed for overlayfs on top of fuse/virtiofs. Choice of EINVAL is not random, we just simulate a standard behavior when RENAME_WHITEOUT flag is not supported. Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-10-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/dir.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index c50f951596dd..0cd01f25251f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1111,6 +1111,9 @@ static int fuse_rename2(struct mnt_idmap *idmap, struct inode *olddir, if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; + if ((flags & RENAME_WHITEOUT) && (idmap != &nop_mnt_idmap)) + return -EINVAL; + if (flags) { if (fc->no_rename2 || fc->minor < 23) return -EINVAL; From 9aace2eda1bd4b6213353b01a2728633b153a18a Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:27 +0200 Subject: [PATCH 10/11] fs/fuse: allow idmapped mounts Now we have everything in place and we can allow idmapped mounts by setting the FS_ALLOW_IDMAP flag. Notice that real availability of idmapped mounts will depend on the fuse daemon. Fuse daemon have to set FUSE_ALLOW_IDMAP flag in the FUSE_INIT reply. To discuss: - we enable idmapped mounts support only if "default_permissions" mode is enabled, because otherwise we would need to deal with UID/GID mappings in the userspace side OR provide the userspace with idmapped req->in.h.uid/req->in.h.gid values which is not something that we probably want to. Idmapped mounts phylosophy is not about faking caller uid/gid. Some extra links and examples: - libfuse support https://github.com/mihalicyn/libfuse/commits/idmap_support - fuse-overlayfs support: https://github.com/mihalicyn/fuse-overlayfs/commits/idmap_support - cephfs-fuse conversion example https://github.com/mihalicyn/ceph/commits/fuse_idmap - glusterfs conversion example https://github.com/mihalicyn/glusterfs/commits/fuse_idmap Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-11-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/inode.c | 14 +++++++++++--- include/uapi/linux/fuse.h | 5 ++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 6c205731c844..b840189ac8be 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -1345,6 +1345,12 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fm->sb->s_export_op = &fuse_export_fid_operations; if (flags & FUSE_OWNER_UID_GID_EXT) fc->owner_uid_gid_ext = 1; + if (flags & FUSE_ALLOW_IDMAP) { + if (fc->owner_uid_gid_ext && fc->default_permissions) + fm->sb->s_iflags &= ~SB_I_NOIDMAP; + else + ok = false; + } } else { ra_pages = fc->max_read / PAGE_SIZE; fc->no_lock = 1; @@ -1392,7 +1398,8 @@ void fuse_send_init(struct fuse_mount *fm) FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT | FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP | FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_ALLOW_MMAP | - FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_OWNER_UID_GID_EXT; + FUSE_NO_EXPORT_SUPPORT | FUSE_HAS_RESEND | FUSE_OWNER_UID_GID_EXT | + FUSE_ALLOW_IDMAP; #ifdef CONFIG_FUSE_DAX if (fm->fc->dax) flags |= FUSE_MAP_ALIGNMENT; @@ -1569,6 +1576,7 @@ static void fuse_sb_defaults(struct super_block *sb) sb->s_time_gran = 1; sb->s_export_op = &fuse_export_operations; sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; + sb->s_iflags |= SB_I_NOIDMAP; if (sb->s_user_ns != &init_user_ns) sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); @@ -1981,7 +1989,7 @@ static void fuse_kill_sb_anon(struct super_block *sb) static struct file_system_type fuse_fs_type = { .owner = THIS_MODULE, .name = "fuse", - .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, + .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT | FS_ALLOW_IDMAP, .init_fs_context = fuse_init_fs_context, .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_anon, @@ -2002,7 +2010,7 @@ static struct file_system_type fuseblk_fs_type = { .init_fs_context = fuse_init_fs_context, .parameters = fuse_fs_parameters, .kill_sb = fuse_kill_sb_blk, - .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, + .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE | FS_ALLOW_IDMAP, }; MODULE_ALIAS_FS("fuseblk"); diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index d9ecc17fd13b..b23e8247ce43 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -221,6 +221,7 @@ * 7.41 * - add FUSE_EXT_OWNER_UID_GID * - add FUSE_OWNER_UID_GID_EXT + * - add FUSE_ALLOW_IDMAP */ #ifndef _LINUX_FUSE_H @@ -256,7 +257,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 40 +#define FUSE_KERNEL_MINOR_VERSION 41 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -427,6 +428,7 @@ struct fuse_file_lock { * of the request ID indicates resend requests * FUSE_OWNER_UID_GID_EXT: add inode owner UID/GID info to create, mkdir, * symlink and mknod + * FUSE_ALLOW_IDMAP: allow creation of idmapped mounts */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -473,6 +475,7 @@ struct fuse_file_lock { /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP #define FUSE_OWNER_UID_GID_EXT (1ULL << 40) +#define FUSE_ALLOW_IDMAP (1ULL << 41) /** * CUSE INIT request/reply flags From 020a698f136c609c19d0ace16516c6e1066fa06b Mon Sep 17 00:00:00 2001 From: Alexander Mikhalitsyn Date: Thu, 15 Aug 2024 11:24:28 +0200 Subject: [PATCH 11/11] fs/fuse/virtio_fs: allow idmapped mounts Allow idmapped mounts for virtiofs. It's absolutely safe as for virtiofs we have the same feature negotiation mechanism as for classical fuse filesystems. This does not affect any existing setups anyhow. virtiofsd support: https://gitlab.com/virtio-fs/virtiofsd/-/merge_requests/245 Cc: Christian Brauner Cc: Seth Forshee Cc: Miklos Szeredi Cc: Vivek Goyal Cc: German Maglione Cc: Amir Goldstein Cc: Bernd Schubert Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20240815092429.103356-12-aleksandr.mikhalitsyn@canonical.com Reviewed-by: Christian Brauner Signed-off-by: Christian Brauner --- fs/fuse/virtio_fs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index dd5260141615..7e5bbaef6f76 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1628,6 +1628,7 @@ static struct file_system_type virtio_fs_type = { .name = "virtiofs", .init_fs_context = virtio_fs_init_fs_context, .kill_sb = virtio_kill_sb, + .fs_flags = FS_ALLOW_IDMAP, }; static int virtio_fs_uevent(const struct kobject *kobj, struct kobj_uevent_env *env)