diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst index 6efb41cc8072..d61219889e49 100644 --- a/Documentation/userspace-api/seccomp_filter.rst +++ b/Documentation/userspace-api/seccomp_filter.rst @@ -259,6 +259,18 @@ and ``ioctl(SECCOMP_IOCTL_NOTIF_SEND)`` a response, indicating what should be returned to userspace. The ``id`` member of ``struct seccomp_notif_resp`` should be the same ``id`` as in ``struct seccomp_notif``. +Userspace can also add file descriptors to the notifying process via +``ioctl(SECCOMP_IOCTL_NOTIF_ADDFD)``. The ``id`` member of +``struct seccomp_notif_addfd`` should be the same ``id`` as in +``struct seccomp_notif``. The ``newfd_flags`` flag may be used to set flags +like O_EXEC on the file descriptor in the notifying process. If the supervisor +wants to inject the file descriptor with a specific number, the +``SECCOMP_ADDFD_FLAG_SETFD`` flag can be used, and set the ``newfd`` member to +the specific number to use. If that file descriptor is already open in the +notifying process it will be replaced. The supervisor can also add an FD, and +respond atomically by using the ``SECCOMP_ADDFD_FLAG_SEND`` flag and the return +value will be the injected file descriptor number. + It is worth noting that ``struct seccomp_data`` contains the values of register arguments to the syscall, but does not contain pointers to memory. The task's memory is accessible to suitably privileged traces via ``ptrace()`` or diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 6ba18b82a02e..78074254ab98 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -115,6 +115,7 @@ struct seccomp_notif_resp { /* valid flags for seccomp_notif_addfd */ #define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ +#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */ /** * struct seccomp_notif_addfd diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 9f58049ac16d..057e17f3215d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -107,6 +107,7 @@ struct seccomp_knotif { * installing process should allocate the fd as normal. * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC * is allowed. + * @ioctl_flags: The flags used for the seccomp_addfd ioctl. * @ret: The return value of the installing process. It is set to the fd num * upon success (>= 0). * @completion: Indicates that the installing process has completed fd @@ -118,6 +119,7 @@ struct seccomp_kaddfd { struct file *file; int fd; unsigned int flags; + __u32 ioctl_flags; union { bool setfd; @@ -1065,18 +1067,37 @@ static u64 seccomp_next_notify_id(struct seccomp_filter *filter) return filter->notif->next_id++; } -static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd) +static void seccomp_handle_addfd(struct seccomp_kaddfd *addfd, struct seccomp_knotif *n) { + int fd; + /* * Remove the notification, and reset the list pointers, indicating * that it has been handled. */ list_del_init(&addfd->list); if (!addfd->setfd) - addfd->ret = receive_fd(addfd->file, addfd->flags); + fd = receive_fd(addfd->file, addfd->flags); else - addfd->ret = receive_fd_replace(addfd->fd, addfd->file, - addfd->flags); + fd = receive_fd_replace(addfd->fd, addfd->file, addfd->flags); + addfd->ret = fd; + + if (addfd->ioctl_flags & SECCOMP_ADDFD_FLAG_SEND) { + /* If we fail reset and return an error to the notifier */ + if (fd < 0) { + n->state = SECCOMP_NOTIFY_SENT; + } else { + /* Return the FD we just added */ + n->flags = 0; + n->error = 0; + n->val = fd; + } + } + + /* + * Mark the notification as completed. From this point, addfd mem + * might be invalidated and we can't safely read it anymore. + */ complete(&addfd->completion); } @@ -1120,7 +1141,7 @@ static int seccomp_do_user_notification(int this_syscall, struct seccomp_kaddfd, list); /* Check if we were woken up by a addfd message */ if (addfd) - seccomp_handle_addfd(addfd); + seccomp_handle_addfd(addfd, &n); } while (n.state != SECCOMP_NOTIFY_REPLIED); @@ -1581,7 +1602,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, if (addfd.newfd_flags & ~O_CLOEXEC) return -EINVAL; - if (addfd.flags & ~SECCOMP_ADDFD_FLAG_SETFD) + if (addfd.flags & ~(SECCOMP_ADDFD_FLAG_SETFD | SECCOMP_ADDFD_FLAG_SEND)) return -EINVAL; if (addfd.newfd && !(addfd.flags & SECCOMP_ADDFD_FLAG_SETFD)) @@ -1591,6 +1612,7 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, if (!kaddfd.file) return -EBADF; + kaddfd.ioctl_flags = addfd.flags; kaddfd.flags = addfd.newfd_flags; kaddfd.setfd = addfd.flags & SECCOMP_ADDFD_FLAG_SETFD; kaddfd.fd = addfd.newfd; @@ -1616,6 +1638,23 @@ static long seccomp_notify_addfd(struct seccomp_filter *filter, goto out_unlock; } + if (addfd.flags & SECCOMP_ADDFD_FLAG_SEND) { + /* + * Disallow queuing an atomic addfd + send reply while there are + * some addfd requests still to process. + * + * There is no clear reason to support it and allows us to keep + * the loop on the other side straight-forward. + */ + if (!list_empty(&knotif->addfd)) { + ret = -EBUSY; + goto out_unlock; + } + + /* Allow exactly only one reply */ + knotif->state = SECCOMP_NOTIFY_REPLIED; + } + list_add(&kaddfd.list, &knotif->addfd); complete(&knotif->ready); mutex_unlock(&filter->notify_lock);