From cdda1f26e74bac732eca537a69f19f6a37b641be Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 10 Oct 2024 16:52:32 +0100 Subject: [PATCH] pidfd: add ioctl to retrieve pid info A common pattern when using pid fds is having to get information about the process, which currently requires /proc being mounted, resolving the fd to a pid, and then do manual string parsing of /proc/N/status and friends. This needs to be reimplemented over and over in all userspace projects (e.g.: I have reimplemented resolving in systemd, dbus, dbus-daemon, polkit so far), and requires additional care in checking that the fd is still valid after having parsed the data, to avoid races. Having a programmatic API that can be used directly removes all these requirements, including having /proc mounted. As discussed at LPC24, add an ioctl with an extensible struct so that more parameters can be added later if needed. Start with returning pid/tgid/ppid and creds unconditionally, and cgroupid optionally. Signed-off-by: Luca Boccassi Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 86 ++++++++++++++++++- include/uapi/linux/pidfd.h | 50 +++++++++++ .../testing/selftests/pidfd/pidfd_open_test.c | 82 +++++++++++++++++- 3 files changed, 214 insertions(+), 4 deletions(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 80675b6bf884..618abb1fa1b8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) +{ + struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + size_t usize = _IOC_SIZE(cmd); + struct pidfd_info kinfo = {}; + struct user_namespace *user_ns; + const struct cred *c; + __u64 mask; +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; +#endif + + if (!uinfo) + return -EINVAL; + if (usize < PIDFD_INFO_SIZE_VER0) + return -EINVAL; /* First version, no smaller struct possible */ + + if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) + return -EFAULT; + + c = get_task_cred(task); + if (!c) + return -ESRCH; + + /* Unconditionally return identifiers and credentials, the rest only on request */ + + user_ns = current_user_ns(); + kinfo.ruid = from_kuid_munged(user_ns, c->uid); + kinfo.rgid = from_kgid_munged(user_ns, c->gid); + kinfo.euid = from_kuid_munged(user_ns, c->euid); + kinfo.egid = from_kgid_munged(user_ns, c->egid); + kinfo.suid = from_kuid_munged(user_ns, c->suid); + kinfo.sgid = from_kgid_munged(user_ns, c->sgid); + kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); + kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); + kinfo.mask |= PIDFD_INFO_CREDS; + put_cred(c); + +#ifdef CONFIG_CGROUPS + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + kinfo.cgroupid = cgroup_id(cgrp); + kinfo.mask |= PIDFD_INFO_CGROUPID; + rcu_read_unlock(); +#endif + + /* + * Copy pid/tgid last, to reduce the chances the information might be + * stale. Note that it is not possible to ensure it will be valid as the + * task might return as soon as the copy_to_user finishes, but that's ok + * and userspace expects that might happen and can act accordingly, so + * this is just best-effort. What we can do however is checking that all + * the fields are set correctly, or return ESRCH to avoid providing + * incomplete information. */ + + kinfo.ppid = task_ppid_nr_ns(task, NULL); + kinfo.tgid = task_tgid_vnr(task); + kinfo.pid = task_pid_vnr(task); + kinfo.mask |= PIDFD_INFO_PID; + + if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + return -ESRCH; + + /* + * If userspace and the kernel have the same struct size it can just + * be copied. If userspace provides an older struct, only the bits that + * userspace knows about will be copied. If userspace provides a new + * struct, only the bits that the kernel knows about will be copied. + */ + if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) + return -EFAULT; + + return 0; +} + static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; @@ -122,13 +198,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ns_common *ns_common = NULL; struct pid_namespace *pid_ns; - if (arg) - return -EINVAL; - task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; + /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ + if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) + return pidfd_info(task, cmd, arg); + + if (arg) + return -EINVAL; + scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 565fc0629fff..4540f6301b8c 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -16,6 +16,55 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +/* Flags for pidfd_info. */ +#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */ +#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ +#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ + +#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ + +struct pidfd_info { + /* + * This mask is similar to the request_mask in statx(2). + * + * Userspace indicates what extensions or expensive-to-calculate fields + * they want by setting the corresponding bits in mask. The kernel + * will ignore bits that it does not know about. + * + * When filling the structure, the kernel will only set bits + * corresponding to the fields that were actually filled by the kernel. + * This also includes any future extensions that might be automatically + * filled. If the structure size is too small to contain a field + * (requested or not), to avoid confusion the mask will not + * contain a bit for that field. + * + * As such, userspace MUST verify that mask contains the + * corresponding flags after the ioctl(2) returns to ensure that it is + * using valid data. + */ + __u64 mask; + /* + * The information contained in the following fields might be stale at the + * time it is received, as the target process might have exited as soon as + * the IOCTL was processed, and there is no way to avoid that. However, it + * is guaranteed that if the call was successful, then the information was + * correct and referred to the intended process at the time the work was + * performed. */ + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; + #define PIDFS_IOCTL_MAGIC 0xFF #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) @@ -28,5 +77,6 @@ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) #endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index c62564c264b1..ce413a221bac 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,32 @@ #include "pidfd.h" #include "../kselftest.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_INFO_CGROUPID (1UL << 0) + +struct pidfd_info { + __u64 request_mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; +#endif + static int safe_int(const char *numstr, int *converted) { char *err = NULL; @@ -120,10 +147,13 @@ out: int main(int argc, char **argv) { + struct pidfd_info info = { + .request_mask = PIDFD_INFO_CGROUPID, + }; int pidfd = -1, ret = 1; pid_t pid; - ksft_set_plan(3); + ksft_set_plan(4); pidfd = sys_pidfd_open(-1, 0); if (pidfd >= 0) { @@ -153,6 +183,56 @@ int main(int argc, char **argv) pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); + goto on_error; + } + if (info.pid != pid) { + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ppid != getppid()) { + ksft_print_msg("ppid %d does not match ppid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ruid != getuid()) { + ksft_print_msg("uid %d does not match uid from ioctl %d\n", + getuid(), info.ruid); + goto on_error; + } + if (info.rgid != getgid()) { + ksft_print_msg("gid %d does not match gid from ioctl %d\n", + getgid(), info.rgid); + goto on_error; + } + if (info.euid != geteuid()) { + ksft_print_msg("euid %d does not match euid from ioctl %d\n", + geteuid(), info.euid); + goto on_error; + } + if (info.egid != getegid()) { + ksft_print_msg("egid %d does not match egid from ioctl %d\n", + getegid(), info.egid); + goto on_error; + } + if (info.suid != geteuid()) { + ksft_print_msg("suid %d does not match suid from ioctl %d\n", + geteuid(), info.suid); + goto on_error; + } + if (info.sgid != getegid()) { + ksft_print_msg("sgid %d does not match sgid from ioctl %d\n", + getegid(), info.sgid); + goto on_error; + } + if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); + goto on_error; + } + ksft_test_result_pass("get info from pidfd test: passed\n"); + ret = 0; on_error: