// SPDX-License-Identifier: GPL-2.0 #ifndef NO_BCACHEFS_FS #include "bcachefs.h" #include "chardev.h" #include "dirent.h" #include "fs.h" #include "fs-common.h" #include "fs-ioctl.h" #include "quota.h" #include #include #include #include #include #include #define FS_IOC_GOINGDOWN _IOR('X', 125, __u32) #define FSOP_GOING_FLAGS_DEFAULT 0x0 /* going down */ #define FSOP_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */ #define FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ struct flags_set { unsigned mask; unsigned flags; unsigned projid; bool set_projinherit; bool projinherit; }; static int bch2_inode_flags_set(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { struct bch_fs *c = inode->v.i_sb->s_fs_info; /* * We're relying on btree locking here for exclusion with other ioctl * calls - use the flags in the btree (@bi), not inode->i_flags: */ struct flags_set *s = p; unsigned newflags = s->flags; unsigned oldflags = bi->bi_flags & s->mask; if (((newflags ^ oldflags) & (BCH_INODE_append|BCH_INODE_immutable)) && !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; if (!S_ISREG(bi->bi_mode) && !S_ISDIR(bi->bi_mode) && (newflags & (BCH_INODE_nodump|BCH_INODE_noatime)) != newflags) return -EINVAL; if (s->set_projinherit) { bi->bi_fields_set &= ~(1 << Inode_opt_project); bi->bi_fields_set |= ((int) s->projinherit << Inode_opt_project); } bi->bi_flags &= ~s->mask; bi->bi_flags |= newflags; bi->bi_ctime = timespec_to_bch2_time(c, current_time(&inode->v)); return 0; } static int bch2_ioc_getflags(struct bch_inode_info *inode, int __user *arg) { unsigned flags = map_flags(bch_flags_to_uflags, inode->ei_inode.bi_flags); return put_user(flags, arg); } static int bch2_ioc_setflags(struct bch_fs *c, struct file *file, struct bch_inode_info *inode, void __user *arg) { struct flags_set s = { .mask = map_defined(bch_flags_to_uflags) }; unsigned uflags; int ret; if (get_user(uflags, (int __user *) arg)) return -EFAULT; s.flags = map_flags_rev(bch_flags_to_uflags, uflags); if (uflags) return -EOPNOTSUPP; ret = mnt_want_write_file(file); if (ret) return ret; inode_lock(&inode->v); if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { ret = -EACCES; goto setflags_out; } mutex_lock(&inode->ei_update_lock); ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: bch2_write_inode(c, inode, bch2_inode_flags_set, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); setflags_out: inode_unlock(&inode->v); mnt_drop_write_file(file); return ret; } static int bch2_ioc_fsgetxattr(struct bch_inode_info *inode, struct fsxattr __user *arg) { struct fsxattr fa = { 0 }; fa.fsx_xflags = map_flags(bch_flags_to_xflags, inode->ei_inode.bi_flags); if (inode->ei_inode.bi_fields_set & (1 << Inode_opt_project)) fa.fsx_xflags |= FS_XFLAG_PROJINHERIT; fa.fsx_projid = inode->ei_qid.q[QTYP_PRJ]; if (copy_to_user(arg, &fa, sizeof(fa))) return -EFAULT; return 0; } static int fssetxattr_inode_update_fn(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { struct flags_set *s = p; if (s->projid != bi->bi_project) { bi->bi_fields_set |= 1U << Inode_opt_project; bi->bi_project = s->projid; } return bch2_inode_flags_set(trans, inode, bi, p); } static int bch2_ioc_fssetxattr(struct bch_fs *c, struct file *file, struct bch_inode_info *inode, struct fsxattr __user *arg) { struct flags_set s = { .mask = map_defined(bch_flags_to_xflags) }; struct fsxattr fa; int ret; if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; s.set_projinherit = true; s.projinherit = (fa.fsx_xflags & FS_XFLAG_PROJINHERIT) != 0; fa.fsx_xflags &= ~FS_XFLAG_PROJINHERIT; s.flags = map_flags_rev(bch_flags_to_xflags, fa.fsx_xflags); if (fa.fsx_xflags) return -EOPNOTSUPP; if (fa.fsx_projid >= U32_MAX) return -EINVAL; /* * inode fields accessible via the xattr interface are stored with a +1 * bias, so that 0 means unset: */ s.projid = fa.fsx_projid + 1; ret = mnt_want_write_file(file); if (ret) return ret; inode_lock(&inode->v); if (!inode_owner_or_capable(file_mnt_idmap(file), &inode->v)) { ret = -EACCES; goto err; } mutex_lock(&inode->ei_update_lock); ret = bch2_subvol_is_ro(c, inode->ei_inum.subvol) ?: bch2_set_projid(c, inode, fa.fsx_projid) ?: bch2_write_inode(c, inode, fssetxattr_inode_update_fn, &s, ATTR_CTIME); mutex_unlock(&inode->ei_update_lock); err: inode_unlock(&inode->v); mnt_drop_write_file(file); return ret; } static int bch2_reinherit_attrs_fn(struct btree_trans *trans, struct bch_inode_info *inode, struct bch_inode_unpacked *bi, void *p) { struct bch_inode_info *dir = p; return !bch2_reinherit_attrs(bi, &dir->ei_inode); } static int bch2_ioc_reinherit_attrs(struct bch_fs *c, struct file *file, struct bch_inode_info *src, const char __user *name) { struct bch_hash_info hash = bch2_hash_info_init(c, &src->ei_inode); struct bch_inode_info *dst; struct inode *vinode = NULL; char *kname = NULL; struct qstr qstr; int ret = 0; subvol_inum inum; kname = kmalloc(BCH_NAME_MAX + 1, GFP_KERNEL); if (!kname) return -ENOMEM; ret = strncpy_from_user(kname, name, BCH_NAME_MAX); if (unlikely(ret < 0)) goto err1; qstr.len = ret; qstr.name = kname; ret = bch2_dirent_lookup(c, inode_inum(src), &hash, &qstr, &inum); if (ret) goto err1; vinode = bch2_vfs_inode_get(c, inum); ret = PTR_ERR_OR_ZERO(vinode); if (ret) goto err1; dst = to_bch_ei(vinode); ret = mnt_want_write_file(file); if (ret) goto err2; bch2_lock_inodes(INODE_UPDATE_LOCK, src, dst); if (inode_attr_changing(src, dst, Inode_opt_project)) { ret = bch2_fs_quota_transfer(c, dst, src->ei_qid, 1 << QTYP_PRJ, KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err3; } ret = bch2_write_inode(c, dst, bch2_reinherit_attrs_fn, src, 0); err3: bch2_unlock_inodes(INODE_UPDATE_LOCK, src, dst); /* return true if we did work */ if (ret >= 0) ret = !ret; mnt_drop_write_file(file); err2: iput(vinode); err1: kfree(kname); return ret; } static int bch2_ioc_getversion(struct bch_inode_info *inode, u32 __user *arg) { return put_user(inode->v.i_generation, arg); } static int bch2_ioc_getlabel(struct bch_fs *c, char __user *user_label) { int ret; size_t len; char label[BCH_SB_LABEL_SIZE]; BUILD_BUG_ON(BCH_SB_LABEL_SIZE >= FSLABEL_MAX); mutex_lock(&c->sb_lock); memcpy(label, c->disk_sb.sb->label, BCH_SB_LABEL_SIZE); mutex_unlock(&c->sb_lock); len = strnlen(label, BCH_SB_LABEL_SIZE); if (len == BCH_SB_LABEL_SIZE) { bch_warn(c, "label is too long, return the first %zu bytes", --len); } ret = copy_to_user(user_label, label, len); return ret ? -EFAULT : 0; } static int bch2_ioc_setlabel(struct bch_fs *c, struct file *file, struct bch_inode_info *inode, const char __user *user_label) { int ret; char label[BCH_SB_LABEL_SIZE]; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(label, user_label, sizeof(label))) return -EFAULT; if (strnlen(label, BCH_SB_LABEL_SIZE) == BCH_SB_LABEL_SIZE) { bch_err(c, "unable to set label with more than %d bytes", BCH_SB_LABEL_SIZE - 1); return -EINVAL; } ret = mnt_want_write_file(file); if (ret) return ret; mutex_lock(&c->sb_lock); strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); ret = bch2_write_super(c); mutex_unlock(&c->sb_lock); mnt_drop_write_file(file); return ret; } static int bch2_ioc_goingdown(struct bch_fs *c, u32 __user *arg) { u32 flags; int ret = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(flags, arg)) return -EFAULT; bch_notice(c, "shutdown by ioctl type %u", flags); switch (flags) { case FSOP_GOING_FLAGS_DEFAULT: ret = bdev_freeze(c->vfs_sb->s_bdev); if (ret) break; bch2_journal_flush(&c->journal); bch2_fs_emergency_read_only(c); bdev_thaw(c->vfs_sb->s_bdev); break; case FSOP_GOING_FLAGS_LOGFLUSH: bch2_journal_flush(&c->journal); fallthrough; case FSOP_GOING_FLAGS_NOLOGFLUSH: bch2_fs_emergency_read_only(c); break; default: ret = -EINVAL; break; } return ret; } static long bch2_ioctl_subvolume_create(struct bch_fs *c, struct file *filp, struct bch_ioctl_subvolume arg) { struct inode *dir; struct bch_inode_info *inode; struct user_namespace *s_user_ns; struct dentry *dst_dentry; struct path src_path, dst_path; int how = LOOKUP_FOLLOW; int error; subvol_inum snapshot_src = { 0 }; unsigned lookup_flags = 0; unsigned create_flags = BCH_CREATE_SUBVOL; if (arg.flags & ~(BCH_SUBVOL_SNAPSHOT_CREATE| BCH_SUBVOL_SNAPSHOT_RO)) return -EINVAL; if (!(arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && (arg.src_ptr || (arg.flags & BCH_SUBVOL_SNAPSHOT_RO))) return -EINVAL; if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) create_flags |= BCH_CREATE_SNAPSHOT; if (arg.flags & BCH_SUBVOL_SNAPSHOT_RO) create_flags |= BCH_CREATE_SNAPSHOT_RO; if (arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) { /* sync_inodes_sb enforce s_umount is locked */ down_read(&c->vfs_sb->s_umount); sync_inodes_sb(c->vfs_sb); up_read(&c->vfs_sb->s_umount); } retry: if (arg.src_ptr) { error = user_path_at(arg.dirfd, (const char __user *)(unsigned long)arg.src_ptr, how, &src_path); if (error) goto err1; if (src_path.dentry->d_sb->s_fs_info != c) { path_put(&src_path); error = -EXDEV; goto err1; } snapshot_src = inode_inum(to_bch_ei(src_path.dentry->d_inode)); } dst_dentry = user_path_create(arg.dirfd, (const char __user *)(unsigned long)arg.dst_ptr, &dst_path, lookup_flags); error = PTR_ERR_OR_ZERO(dst_dentry); if (error) goto err2; if (dst_dentry->d_sb->s_fs_info != c) { error = -EXDEV; goto err3; } if (dst_dentry->d_inode) { error = -BCH_ERR_EEXIST_subvolume_create; goto err3; } dir = dst_path.dentry->d_inode; if (IS_DEADDIR(dir)) { error = -BCH_ERR_ENOENT_directory_dead; goto err3; } s_user_ns = dir->i_sb->s_user_ns; if (!kuid_has_mapping(s_user_ns, current_fsuid()) || !kgid_has_mapping(s_user_ns, current_fsgid())) { error = -EOVERFLOW; goto err3; } error = inode_permission(file_mnt_idmap(filp), dir, MAY_WRITE | MAY_EXEC); if (error) goto err3; if (!IS_POSIXACL(dir)) arg.mode &= ~current_umask(); error = security_path_mkdir(&dst_path, dst_dentry, arg.mode); if (error) goto err3; if ((arg.flags & BCH_SUBVOL_SNAPSHOT_CREATE) && !arg.src_ptr) snapshot_src.subvol = inode_inum(to_bch_ei(dir)).subvol; down_write(&c->snapshot_create_lock); inode = __bch2_create(file_mnt_idmap(filp), to_bch_ei(dir), dst_dentry, arg.mode|S_IFDIR, 0, snapshot_src, create_flags); up_write(&c->snapshot_create_lock); error = PTR_ERR_OR_ZERO(inode); if (error) goto err3; d_instantiate(dst_dentry, &inode->v); fsnotify_mkdir(dir, dst_dentry); err3: done_path_create(&dst_path, dst_dentry); err2: if (arg.src_ptr) path_put(&src_path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; } err1: return error; } static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp, struct bch_ioctl_subvolume arg) { const char __user *name = (void __user *)(unsigned long)arg.dst_ptr; struct path path; struct inode *dir; struct dentry *victim; int ret = 0; if (arg.flags) return -EINVAL; victim = user_path_locked_at(arg.dirfd, name, &path); if (IS_ERR(victim)) return PTR_ERR(victim); dir = d_inode(path.dentry); if (victim->d_sb->s_fs_info != c) { ret = -EXDEV; goto err; } if (!d_is_positive(victim)) { ret = -ENOENT; goto err; } ret = __bch2_unlink(dir, victim, true); if (!ret) { fsnotify_rmdir(dir, victim); d_delete(victim); } err: inode_unlock(dir); dput(victim); path_put(&path); return ret; } long bch2_fs_file_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct bch_inode_info *inode = file_bch_inode(file); struct bch_fs *c = inode->v.i_sb->s_fs_info; long ret; switch (cmd) { case FS_IOC_GETFLAGS: ret = bch2_ioc_getflags(inode, (int __user *) arg); break; case FS_IOC_SETFLAGS: ret = bch2_ioc_setflags(c, file, inode, (int __user *) arg); break; case FS_IOC_FSGETXATTR: ret = bch2_ioc_fsgetxattr(inode, (void __user *) arg); break; case FS_IOC_FSSETXATTR: ret = bch2_ioc_fssetxattr(c, file, inode, (void __user *) arg); break; case BCHFS_IOC_REINHERIT_ATTRS: ret = bch2_ioc_reinherit_attrs(c, file, inode, (void __user *) arg); break; case FS_IOC_GETVERSION: ret = bch2_ioc_getversion(inode, (u32 __user *) arg); break; case FS_IOC_SETVERSION: ret = -ENOTTY; break; case FS_IOC_GETFSLABEL: ret = bch2_ioc_getlabel(c, (void __user *) arg); break; case FS_IOC_SETFSLABEL: ret = bch2_ioc_setlabel(c, file, inode, (const void __user *) arg); break; case FS_IOC_GOINGDOWN: ret = bch2_ioc_goingdown(c, (u32 __user *) arg); break; case BCH_IOCTL_SUBVOLUME_CREATE: { struct bch_ioctl_subvolume i; ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) ? -EFAULT : bch2_ioctl_subvolume_create(c, file, i); break; } case BCH_IOCTL_SUBVOLUME_DESTROY: { struct bch_ioctl_subvolume i; ret = copy_from_user(&i, (void __user *) arg, sizeof(i)) ? -EFAULT : bch2_ioctl_subvolume_destroy(c, file, i); break; } default: ret = bch2_fs_ioctl(c, cmd, (void __user *) arg); break; } return bch2_err_class(ret); } #ifdef CONFIG_COMPAT long bch2_compat_fs_ioctl(struct file *file, unsigned cmd, unsigned long arg) { /* These are just misnamed, they actually get/put from/to user an int */ switch (cmd) { case FS_IOC32_GETFLAGS: cmd = FS_IOC_GETFLAGS; break; case FS_IOC32_SETFLAGS: cmd = FS_IOC_SETFLAGS; break; case FS_IOC32_GETVERSION: cmd = FS_IOC_GETVERSION; break; case FS_IOC_GETFSLABEL: case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; } return bch2_fs_file_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); } #endif #endif /* NO_BCACHEFS_FS */