mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-18 18:43:59 +08:00
pidfd patches for v5.2-rc1
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE7btrcuORLb1XUhEwjrBW1T7ssS0FAlzReuoACgkQjrBW1T7s sS1uvBAA16pgnhRNxNTrp3LYft6lUWmF4n0baOTVtQNLhPjpwaOxHIrCBugkQCJB QcQ9IQSOvIkaEW0XAQoPBaeLviiKhHOFw1Fv89OtW6xUidSfSV15lcI9f1F2pCm2 4yCL/8XvL6M0NhxiwftJAkWOXeDNLfjFnLwyLxBfgg3EeyqMgUB8raeosEID0ORR gm2/g8DYS2r+KNqM/F4xvMSgabfi2bGk+8BtAaVnftJfstpRNrqKwWnSK3Wspj1l 5gkb8gSsiY6ns3V6RgNHrFlhevFg8V+VjcJt7FR+aUEjOkcoiXas/PhvamMzdsn/ FM1F/A0pM8FSybIUClhnnnxNPc+p8ZN/71YQAPs+Mnh3xvbtKea2lkhC+Xv4OpK3 edutSZWFaiIery82Rk00H3vqiSF1+kRIXSpZSS4mElk4FsVljkyH+nSP7rbmE2MR EQe+kKnZl8QzWrVbnODC+EVvvVpA2bXDvENJmvKqus+t2G0OdV7Iku3F5E3KjF8k S5RRV1zuBF3ugqnjmYrVmJtpEA8mxClmqvg6okru+qW6ngO5oOgVpPLjWn1CXcdj wcuQ6Pe1QwAHS54e9WSWgCHVssLvm9nCdCqypdNaoyGWmbTWntwlrY7Y0JUQnAbB 6/G/DQQiCWY9y8bMZlTEydhIpgcsdROuPYv+oHF5+eQQthsWwHc= =LH11 -----END PGP SIGNATURE----- Merge tag 'pidfd-v5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux Pull pidfd updates from Christian Brauner: "This patchset makes it possible to retrieve pidfds at process creation time by introducing the new flag CLONE_PIDFD to the clone() system call. Linus originally suggested to implement this as a new flag to clone() instead of making it a separate system call. After a thorough review from Oleg CLONE_PIDFD returns pidfds in the parent_tidptr argument. This means we can give back the associated pid and the pidfd at the same time. Access to process metadata information thus becomes rather trivial. As has been agreed, CLONE_PIDFD creates file descriptors based on anonymous inodes similar to the new mount api. They are made unconditional by this patchset as they are now needed by core kernel code (vfs, pidfd) even more than they already were before (timerfd, signalfd, io_uring, epoll etc.). The core patchset is rather small. The bulky looking changelist is caused by David's very simple changes to Kconfig to make anon inodes unconditional. A pidfd comes with additional information in fdinfo if the kernel supports procfs. The fdinfo file contains the pid of the process in the callers pid namespace in the same format as the procfs status file, i.e. "Pid:\t%d". To remove worries about missing metadata access this patchset comes with a sample/test program that illustrates how a combination of CLONE_PIDFD and pidfd_send_signal() can be used to gain race-free access to process metadata through /proc/<pid>. Further work based on this patchset has been done by Joel. His work makes pidfds pollable. It finished too late for this merge window. I would prefer to have it sitting in linux-next for a while and send it for inclusion during the 5.3 merge window" * tag 'pidfd-v5.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux: samples: show race-free pidfd metadata access signal: support CLONE_PIDFD with pidfd_send_signal clone: add CLONE_PIDFD Make anon_inodes unconditional
This commit is contained in:
commit
eac7078a0f
@ -22,7 +22,6 @@ config KVM
|
||||
bool "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on MMU && OF
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select ARM_GIC
|
||||
select ARM_GIC_V3
|
||||
select ARM_GIC_V3_ITS
|
||||
|
@ -23,7 +23,6 @@ config KVM
|
||||
depends on OF
|
||||
select MMU_NOTIFIER
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
|
||||
select KVM_MMIO
|
||||
|
@ -21,7 +21,6 @@ config KVM
|
||||
depends on MIPS_FP_SUPPORT
|
||||
select EXPORT_UASM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select KVM_MMIO
|
||||
|
@ -20,7 +20,6 @@ if VIRTUALIZATION
|
||||
config KVM
|
||||
bool
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_EVENTFD
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select SRCU
|
||||
|
@ -21,7 +21,6 @@ config KVM
|
||||
prompt "Kernel-based Virtual Machine (KVM) support"
|
||||
depends on HAVE_KVM
|
||||
select PREEMPT_NOTIFIERS
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_CPU_RELAX_INTERCEPT
|
||||
select HAVE_KVM_VCPU_ASYNC_IOCTL
|
||||
select HAVE_KVM_EVENTFD
|
||||
|
@ -44,7 +44,6 @@ config X86
|
||||
#
|
||||
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
|
||||
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
|
||||
select ANON_INODES
|
||||
select ARCH_32BIT_OFF_T if X86_32
|
||||
select ARCH_CLOCKSOURCE_DATA
|
||||
select ARCH_CLOCKSOURCE_INIT
|
||||
|
@ -27,7 +27,6 @@ config KVM
|
||||
depends on X86_LOCAL_APIC
|
||||
select PREEMPT_NOTIFIERS
|
||||
select MMU_NOTIFIER
|
||||
select ANON_INODES
|
||||
select HAVE_KVM_IRQCHIP
|
||||
select HAVE_KVM_IRQFD
|
||||
select IRQ_BYPASS_MANAGER
|
||||
|
@ -174,7 +174,6 @@ source "drivers/base/regmap/Kconfig"
|
||||
config DMA_SHARED_BUFFER
|
||||
bool
|
||||
default n
|
||||
select ANON_INODES
|
||||
select IRQ_WORK
|
||||
help
|
||||
This option enables the framework for buffer-sharing between
|
||||
|
@ -157,7 +157,6 @@ config TCG_CRB
|
||||
config TCG_VTPM_PROXY
|
||||
tristate "VTPM Proxy Interface"
|
||||
depends on TCG_TPM
|
||||
select ANON_INODES
|
||||
---help---
|
||||
This driver proxies for an emulated TPM (vTPM) running in userspace.
|
||||
A device /dev/vtpmx is provided that creates a device pair
|
||||
|
@ -3,7 +3,6 @@ menu "DMABUF options"
|
||||
config SYNC_FILE
|
||||
bool "Explicit Synchronization Framework"
|
||||
default n
|
||||
select ANON_INODES
|
||||
select DMA_SHARED_BUFFER
|
||||
---help---
|
||||
The Sync File Framework adds explicit syncronization via
|
||||
|
@ -12,7 +12,6 @@ config ARCH_HAVE_CUSTOM_GPIO_H
|
||||
|
||||
menuconfig GPIOLIB
|
||||
bool "GPIO Support"
|
||||
select ANON_INODES
|
||||
help
|
||||
This enables GPIO support through the generic GPIO library.
|
||||
You only need to enable this, if you also want to enable
|
||||
|
@ -4,7 +4,6 @@
|
||||
|
||||
menuconfig IIO
|
||||
tristate "Industrial I/O support"
|
||||
select ANON_INODES
|
||||
help
|
||||
The industrial I/O subsystem provides a unified framework for
|
||||
drivers for many different types of embedded sensors using a
|
||||
|
@ -25,7 +25,6 @@ config INFINIBAND_USER_MAD
|
||||
|
||||
config INFINIBAND_USER_ACCESS
|
||||
tristate "InfiniBand userspace access (verbs and CM)"
|
||||
select ANON_INODES
|
||||
depends on MMU
|
||||
---help---
|
||||
Userspace InfiniBand access support. This enables the
|
||||
|
@ -22,7 +22,6 @@ menuconfig VFIO
|
||||
tristate "VFIO Non-Privileged userspace driver framework"
|
||||
depends on IOMMU_API
|
||||
select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
|
||||
select ANON_INODES
|
||||
help
|
||||
VFIO provides a framework for secure userspace device drivers.
|
||||
See Documentation/vfio.txt for more details.
|
||||
|
@ -25,7 +25,7 @@ obj-$(CONFIG_PROC_FS) += proc_namespace.o
|
||||
|
||||
obj-y += notify/
|
||||
obj-$(CONFIG_EPOLL) += eventpoll.o
|
||||
obj-$(CONFIG_ANON_INODES) += anon_inodes.o
|
||||
obj-y += anon_inodes.o
|
||||
obj-$(CONFIG_SIGNALFD) += signalfd.o
|
||||
obj-$(CONFIG_TIMERFD) += timerfd.o
|
||||
obj-$(CONFIG_EVENTFD) += eventfd.o
|
||||
|
@ -1,7 +1,6 @@
|
||||
config FANOTIFY
|
||||
bool "Filesystem wide access notification"
|
||||
select FSNOTIFY
|
||||
select ANON_INODES
|
||||
select EXPORTFS
|
||||
default n
|
||||
---help---
|
||||
|
@ -1,6 +1,5 @@
|
||||
config INOTIFY_USER
|
||||
bool "Inotify support for userspace"
|
||||
select ANON_INODES
|
||||
select FSNOTIFY
|
||||
default y
|
||||
---help---
|
||||
|
@ -66,6 +66,8 @@ struct pid
|
||||
|
||||
extern struct pid init_struct_pid;
|
||||
|
||||
extern const struct file_operations pidfd_fops;
|
||||
|
||||
static inline struct pid *get_pid(struct pid *pid)
|
||||
{
|
||||
if (pid)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
|
||||
#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
|
||||
#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
|
||||
#define CLONE_PIDFD 0x00001000 /* set if a pidfd should be placed in parent */
|
||||
#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
|
||||
#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
|
||||
#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
|
||||
|
10
init/Kconfig
10
init/Kconfig
@ -1171,9 +1171,6 @@ config LD_DEAD_CODE_DATA_ELIMINATION
|
||||
config SYSCTL
|
||||
bool
|
||||
|
||||
config ANON_INODES
|
||||
bool
|
||||
|
||||
config HAVE_UID16
|
||||
bool
|
||||
|
||||
@ -1378,14 +1375,12 @@ config HAVE_FUTEX_CMPXCHG
|
||||
config EPOLL
|
||||
bool "Enable eventpoll support" if EXPERT
|
||||
default y
|
||||
select ANON_INODES
|
||||
help
|
||||
Disabling this option will cause the kernel to be built without
|
||||
support for epoll family of system calls.
|
||||
|
||||
config SIGNALFD
|
||||
bool "Enable signalfd() system call" if EXPERT
|
||||
select ANON_INODES
|
||||
default y
|
||||
help
|
||||
Enable the signalfd() system call that allows to receive signals
|
||||
@ -1395,7 +1390,6 @@ config SIGNALFD
|
||||
|
||||
config TIMERFD
|
||||
bool "Enable timerfd() system call" if EXPERT
|
||||
select ANON_INODES
|
||||
default y
|
||||
help
|
||||
Enable the timerfd() system call that allows to receive timer
|
||||
@ -1405,7 +1399,6 @@ config TIMERFD
|
||||
|
||||
config EVENTFD
|
||||
bool "Enable eventfd() system call" if EXPERT
|
||||
select ANON_INODES
|
||||
default y
|
||||
help
|
||||
Enable the eventfd() system call that allows to receive both
|
||||
@ -1516,7 +1509,6 @@ config KALLSYMS_BASE_RELATIVE
|
||||
# syscall, maps, verifier
|
||||
config BPF_SYSCALL
|
||||
bool "Enable bpf() system call"
|
||||
select ANON_INODES
|
||||
select BPF
|
||||
select IRQ_WORK
|
||||
default n
|
||||
@ -1533,7 +1525,6 @@ config BPF_JIT_ALWAYS_ON
|
||||
|
||||
config USERFAULTFD
|
||||
bool "Enable userfaultfd() system call"
|
||||
select ANON_INODES
|
||||
depends on MMU
|
||||
help
|
||||
Enable the userfaultfd() system call that allows to intercept and
|
||||
@ -1600,7 +1591,6 @@ config PERF_EVENTS
|
||||
bool "Kernel performance events and counters"
|
||||
default y if PROFILING
|
||||
depends on HAVE_PERF_EVENTS
|
||||
select ANON_INODES
|
||||
select IRQ_WORK
|
||||
select SRCU
|
||||
help
|
||||
|
107
kernel/fork.c
107
kernel/fork.c
@ -11,6 +11,7 @@
|
||||
* management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
|
||||
*/
|
||||
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/autogroup.h>
|
||||
#include <linux/sched/mm.h>
|
||||
@ -21,6 +22,7 @@
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/sched/cputime.h>
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/rtmutex.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/unistd.h>
|
||||
@ -1670,6 +1672,58 @@ static inline void rcu_copy_process(struct task_struct *p)
|
||||
#endif /* #ifdef CONFIG_TASKS_RCU */
|
||||
}
|
||||
|
||||
static int pidfd_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct pid *pid = file->private_data;
|
||||
|
||||
file->private_data = NULL;
|
||||
put_pid(pid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
static void pidfd_show_fdinfo(struct seq_file *m, struct file *f)
|
||||
{
|
||||
struct pid_namespace *ns = proc_pid_ns(file_inode(m->file));
|
||||
struct pid *pid = f->private_data;
|
||||
|
||||
seq_put_decimal_ull(m, "Pid:\t", pid_nr_ns(pid, ns));
|
||||
seq_putc(m, '\n');
|
||||
}
|
||||
#endif
|
||||
|
||||
const struct file_operations pidfd_fops = {
|
||||
.release = pidfd_release,
|
||||
#ifdef CONFIG_PROC_FS
|
||||
.show_fdinfo = pidfd_show_fdinfo,
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
* pidfd_create() - Create a new pid file descriptor.
|
||||
*
|
||||
* @pid: struct pid that the pidfd will reference
|
||||
*
|
||||
* This creates a new pid file descriptor with the O_CLOEXEC flag set.
|
||||
*
|
||||
* Note, that this function can only be called after the fd table has
|
||||
* been unshared to avoid leaking the pidfd to the new process.
|
||||
*
|
||||
* Return: On success, a cloexec pidfd is returned.
|
||||
* On error, a negative errno number will be returned.
|
||||
*/
|
||||
static int pidfd_create(struct pid *pid)
|
||||
{
|
||||
int fd;
|
||||
|
||||
fd = anon_inode_getfd("[pidfd]", &pidfd_fops, get_pid(pid),
|
||||
O_RDWR | O_CLOEXEC);
|
||||
if (fd < 0)
|
||||
put_pid(pid);
|
||||
|
||||
return fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* This creates a new process as a copy of the old one,
|
||||
* but does not actually start it yet.
|
||||
@ -1682,13 +1736,14 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
unsigned long clone_flags,
|
||||
unsigned long stack_start,
|
||||
unsigned long stack_size,
|
||||
int __user *parent_tidptr,
|
||||
int __user *child_tidptr,
|
||||
struct pid *pid,
|
||||
int trace,
|
||||
unsigned long tls,
|
||||
int node)
|
||||
{
|
||||
int retval;
|
||||
int pidfd = -1, retval;
|
||||
struct task_struct *p;
|
||||
struct multiprocess_signals delayed;
|
||||
|
||||
@ -1738,6 +1793,31 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
if (clone_flags & CLONE_PIDFD) {
|
||||
int reserved;
|
||||
|
||||
/*
|
||||
* - CLONE_PARENT_SETTID is useless for pidfds and also
|
||||
* parent_tidptr is used to return pidfds.
|
||||
* - CLONE_DETACHED is blocked so that we can potentially
|
||||
* reuse it later for CLONE_PIDFD.
|
||||
* - CLONE_THREAD is blocked until someone really needs it.
|
||||
*/
|
||||
if (clone_flags &
|
||||
(CLONE_DETACHED | CLONE_PARENT_SETTID | CLONE_THREAD))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/*
|
||||
* Verify that parent_tidptr is sane so we can potentially
|
||||
* reuse it later.
|
||||
*/
|
||||
if (get_user(reserved, parent_tidptr))
|
||||
return ERR_PTR(-EFAULT);
|
||||
|
||||
if (reserved != 0)
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Force any signals received before this point to be delivered
|
||||
* before the fork happens. Collect up signals sent to multiple
|
||||
@ -1944,6 +2024,22 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This has to happen after we've potentially unshared the file
|
||||
* descriptor table (so that the pidfd doesn't leak into the child
|
||||
* if the fd table isn't shared).
|
||||
*/
|
||||
if (clone_flags & CLONE_PIDFD) {
|
||||
retval = pidfd_create(pid);
|
||||
if (retval < 0)
|
||||
goto bad_fork_free_pid;
|
||||
|
||||
pidfd = retval;
|
||||
retval = put_user(pidfd, parent_tidptr);
|
||||
if (retval)
|
||||
goto bad_fork_put_pidfd;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLOCK
|
||||
p->plug = NULL;
|
||||
#endif
|
||||
@ -2004,7 +2100,7 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
*/
|
||||
retval = cgroup_can_fork(p);
|
||||
if (retval)
|
||||
goto bad_fork_free_pid;
|
||||
goto bad_fork_put_pidfd;
|
||||
|
||||
/*
|
||||
* From this point on we must avoid any synchronous user-space
|
||||
@ -2119,6 +2215,9 @@ bad_fork_cancel_cgroup:
|
||||
spin_unlock(¤t->sighand->siglock);
|
||||
write_unlock_irq(&tasklist_lock);
|
||||
cgroup_cancel_fork(p);
|
||||
bad_fork_put_pidfd:
|
||||
if (clone_flags & CLONE_PIDFD)
|
||||
ksys_close(pidfd);
|
||||
bad_fork_free_pid:
|
||||
cgroup_threadgroup_change_end(current);
|
||||
if (pid != &init_struct_pid)
|
||||
@ -2184,7 +2283,7 @@ static inline void init_idle_pids(struct task_struct *idle)
|
||||
struct task_struct *fork_idle(int cpu)
|
||||
{
|
||||
struct task_struct *task;
|
||||
task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0,
|
||||
task = copy_process(CLONE_VM, 0, 0, NULL, NULL, &init_struct_pid, 0, 0,
|
||||
cpu_to_node(cpu));
|
||||
if (!IS_ERR(task)) {
|
||||
init_idle_pids(task);
|
||||
@ -2236,7 +2335,7 @@ long _do_fork(unsigned long clone_flags,
|
||||
trace = 0;
|
||||
}
|
||||
|
||||
p = copy_process(clone_flags, stack_start, stack_size,
|
||||
p = copy_process(clone_flags, stack_start, stack_size, parent_tidptr,
|
||||
child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
|
||||
add_latent_entropy();
|
||||
|
||||
|
@ -3513,7 +3513,6 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
|
||||
return kill_something_info(sig, &info, pid);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
/*
|
||||
* Verify that the signaler and signalee either are in the same pid namespace
|
||||
* or that the signaler's pid namespace is an ancestor of the signalee's pid
|
||||
@ -3550,6 +3549,14 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, siginfo_t *info)
|
||||
return copy_siginfo_from_user(kinfo, info);
|
||||
}
|
||||
|
||||
static struct pid *pidfd_to_pid(const struct file *file)
|
||||
{
|
||||
if (file->f_op == &pidfd_fops)
|
||||
return file->private_data;
|
||||
|
||||
return tgid_pidfd_to_pid(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_pidfd_send_signal - send a signal to a process through a task file
|
||||
* descriptor
|
||||
@ -3586,7 +3593,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
|
||||
return -EBADF;
|
||||
|
||||
/* Is this a pidfd? */
|
||||
pid = tgid_pidfd_to_pid(f.file);
|
||||
pid = pidfd_to_pid(f.file);
|
||||
if (IS_ERR(pid)) {
|
||||
ret = PTR_ERR(pid);
|
||||
goto err;
|
||||
@ -3620,7 +3627,6 @@ err:
|
||||
fdput(f);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
|
||||
static int
|
||||
do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
|
||||
|
@ -167,9 +167,6 @@ COND_SYSCALL(syslog);
|
||||
|
||||
/* kernel/sched/core.c */
|
||||
|
||||
/* kernel/signal.c */
|
||||
COND_SYSCALL(pidfd_send_signal);
|
||||
|
||||
/* kernel/sys.c */
|
||||
COND_SYSCALL(setregid);
|
||||
COND_SYSCALL(setgid);
|
||||
|
@ -3,4 +3,4 @@
|
||||
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
|
||||
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
|
||||
configfs/ connector/ v4l/ trace_printk/ \
|
||||
vfio-mdev/ statx/ qmi/ binderfs/
|
||||
vfio-mdev/ statx/ qmi/ binderfs/ pidfd/
|
||||
|
6
samples/pidfd/Makefile
Normal file
6
samples/pidfd/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
hostprogs-y := pidfd-metadata
|
||||
always := $(hostprogs-y)
|
||||
HOSTCFLAGS_pidfd-metadata.o += -I$(objtree)/usr/include
|
||||
all: pidfd-metadata
|
112
samples/pidfd/pidfd-metadata.c
Normal file
112
samples/pidfd/pidfd-metadata.c
Normal file
@ -0,0 +1,112 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
#include <limits.h>
|
||||
#include <sched.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef CLONE_PIDFD
|
||||
#define CLONE_PIDFD 0x00001000
|
||||
#endif
|
||||
|
||||
static int do_child(void *args)
|
||||
{
|
||||
printf("%d\n", getpid());
|
||||
_exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
static pid_t pidfd_clone(int flags, int *pidfd)
|
||||
{
|
||||
size_t stack_size = 1024;
|
||||
char *stack[1024] = { 0 };
|
||||
|
||||
#ifdef __ia64__
|
||||
return __clone2(do_child, stack, stack_size, flags | SIGCHLD, NULL, pidfd);
|
||||
#else
|
||||
return clone(do_child, stack + stack_size, flags | SIGCHLD, NULL, pidfd);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
|
||||
unsigned int flags)
|
||||
{
|
||||
return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
|
||||
}
|
||||
|
||||
static int pidfd_metadata_fd(pid_t pid, int pidfd)
|
||||
{
|
||||
int procfd, ret;
|
||||
char path[100];
|
||||
|
||||
snprintf(path, sizeof(path), "/proc/%d", pid);
|
||||
procfd = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
||||
if (procfd < 0) {
|
||||
warn("Failed to open %s\n", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Verify that the pid has not been recycled and our /proc/<pid> handle
|
||||
* is still valid.
|
||||
*/
|
||||
ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
|
||||
if (ret < 0) {
|
||||
switch (errno) {
|
||||
case EPERM:
|
||||
/* Process exists, just not allowed to signal it. */
|
||||
break;
|
||||
default:
|
||||
warn("Failed to signal process\n");
|
||||
close(procfd);
|
||||
procfd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return procfd;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int pidfd = 0, ret = EXIT_FAILURE;
|
||||
char buf[4096] = { 0 };
|
||||
pid_t pid;
|
||||
int procfd, statusfd;
|
||||
ssize_t bytes;
|
||||
|
||||
pid = pidfd_clone(CLONE_PIDFD, &pidfd);
|
||||
if (pid < 0)
|
||||
exit(ret);
|
||||
|
||||
procfd = pidfd_metadata_fd(pid, pidfd);
|
||||
close(pidfd);
|
||||
if (procfd < 0)
|
||||
goto out;
|
||||
|
||||
statusfd = openat(procfd, "status", O_RDONLY | O_CLOEXEC);
|
||||
close(procfd);
|
||||
if (statusfd < 0)
|
||||
goto out;
|
||||
|
||||
bytes = read(statusfd, buf, sizeof(buf));
|
||||
if (bytes > 0)
|
||||
bytes = write(STDOUT_FILENO, buf, bytes);
|
||||
close(statusfd);
|
||||
ret = EXIT_SUCCESS;
|
||||
|
||||
out:
|
||||
(void)wait(NULL);
|
||||
|
||||
exit(ret);
|
||||
}
|
Loading…
Reference in New Issue
Block a user