iommufd: Add iommufd fault object

An iommufd fault object provides an interface for delivering I/O page
faults to user space. These objects are created and destroyed by user
space, and they can be associated with or dissociated from hardware page
table objects during page table allocation or destruction.

User space interacts with the fault object through a file interface. This
interface offers a straightforward and efficient way for user space to
handle page faults. It allows user space to read fault messages
sequentially and respond to them by writing to the same file. The file
interface supports reading messages in poll mode, so it's recommended that
user space applications use io_uring to enhance read and write efficiency.

A fault object can be associated with any iopf-capable iommufd_hw_pgtable
during the pgtable's allocation. All I/O page faults triggered by devices
when accessing the I/O addresses of an iommufd_hw_pgtable are routed
through the fault object to user space. Similarly, user space's responses
to these page faults are routed back to the iommu device driver through
the same fault object.

Link: https://lore.kernel.org/r/20240702063444.105814-7-baolu.lu@linux.intel.com
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
This commit is contained in:
Lu Baolu 2024-07-02 14:34:40 +08:00 committed by Jason Gunthorpe
parent c714f15860
commit 07838f7fd5
7 changed files with 287 additions and 0 deletions

View File

@ -110,6 +110,8 @@ static struct iopf_group *iopf_group_alloc(struct iommu_fault_param *iopf_param,
list_add(&group->pending_node, &iopf_param->faults); list_add(&group->pending_node, &iopf_param->faults);
mutex_unlock(&iopf_param->lock); mutex_unlock(&iopf_param->lock);
group->fault_count = list_count_nodes(&group->faults);
return group; return group;
} }

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only # SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \ iommufd-y := \
device.o \ device.o \
fault.o \
hw_pagetable.o \ hw_pagetable.o \
io_pagetable.o \ io_pagetable.o \
ioas.o \ ioas.o \

View File

@ -0,0 +1,226 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (C) 2024 Intel Corporation
*/
#define pr_fmt(fmt) "iommufd: " fmt
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/iommufd.h>
#include <linux/poll.h>
#include <linux/anon_inodes.h>
#include <uapi/linux/iommufd.h>
#include "../iommu-priv.h"
#include "iommufd_private.h"
void iommufd_fault_destroy(struct iommufd_object *obj)
{
struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
struct iopf_group *group, *next;
/*
* The iommufd object's reference count is zero at this point.
* We can be confident that no other threads are currently
* accessing this pointer. Therefore, acquiring the mutex here
* is unnecessary.
*/
list_for_each_entry_safe(group, next, &fault->deliver, node) {
list_del(&group->node);
iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
iopf_free_group(group);
}
}
static void iommufd_compose_fault_message(struct iommu_fault *fault,
struct iommu_hwpt_pgfault *hwpt_fault,
struct iommufd_device *idev,
u32 cookie)
{
hwpt_fault->flags = fault->prm.flags;
hwpt_fault->dev_id = idev->obj.id;
hwpt_fault->pasid = fault->prm.pasid;
hwpt_fault->grpid = fault->prm.grpid;
hwpt_fault->perm = fault->prm.perm;
hwpt_fault->addr = fault->prm.addr;
hwpt_fault->length = 0;
hwpt_fault->cookie = cookie;
}
static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
{
size_t fault_size = sizeof(struct iommu_hwpt_pgfault);
struct iommufd_fault *fault = filep->private_data;
struct iommu_hwpt_pgfault data;
struct iommufd_device *idev;
struct iopf_group *group;
struct iopf_fault *iopf;
size_t done = 0;
int rc = 0;
if (*ppos || count % fault_size)
return -ESPIPE;
mutex_lock(&fault->mutex);
while (!list_empty(&fault->deliver) && count > done) {
group = list_first_entry(&fault->deliver,
struct iopf_group, node);
if (group->fault_count * fault_size > count - done)
break;
rc = xa_alloc(&fault->response, &group->cookie, group,
xa_limit_32b, GFP_KERNEL);
if (rc)
break;
idev = to_iommufd_handle(group->attach_handle)->idev;
list_for_each_entry(iopf, &group->faults, list) {
iommufd_compose_fault_message(&iopf->fault,
&data, idev,
group->cookie);
if (copy_to_user(buf + done, &data, fault_size)) {
xa_erase(&fault->response, group->cookie);
rc = -EFAULT;
break;
}
done += fault_size;
}
list_del(&group->node);
}
mutex_unlock(&fault->mutex);
return done == 0 ? rc : done;
}
static ssize_t iommufd_fault_fops_write(struct file *filep, const char __user *buf,
size_t count, loff_t *ppos)
{
size_t response_size = sizeof(struct iommu_hwpt_page_response);
struct iommufd_fault *fault = filep->private_data;
struct iommu_hwpt_page_response response;
struct iopf_group *group;
size_t done = 0;
int rc = 0;
if (*ppos || count % response_size)
return -ESPIPE;
mutex_lock(&fault->mutex);
while (count > done) {
rc = copy_from_user(&response, buf + done, response_size);
if (rc)
break;
group = xa_erase(&fault->response, response.cookie);
if (!group) {
rc = -EINVAL;
break;
}
iopf_group_response(group, response.code);
iopf_free_group(group);
done += response_size;
}
mutex_unlock(&fault->mutex);
return done == 0 ? rc : done;
}
static __poll_t iommufd_fault_fops_poll(struct file *filep,
struct poll_table_struct *wait)
{
struct iommufd_fault *fault = filep->private_data;
__poll_t pollflags = EPOLLOUT;
poll_wait(filep, &fault->wait_queue, wait);
mutex_lock(&fault->mutex);
if (!list_empty(&fault->deliver))
pollflags |= EPOLLIN | EPOLLRDNORM;
mutex_unlock(&fault->mutex);
return pollflags;
}
static int iommufd_fault_fops_release(struct inode *inode, struct file *filep)
{
struct iommufd_fault *fault = filep->private_data;
refcount_dec(&fault->obj.users);
iommufd_ctx_put(fault->ictx);
return 0;
}
static const struct file_operations iommufd_fault_fops = {
.owner = THIS_MODULE,
.open = nonseekable_open,
.read = iommufd_fault_fops_read,
.write = iommufd_fault_fops_write,
.poll = iommufd_fault_fops_poll,
.release = iommufd_fault_fops_release,
.llseek = no_llseek,
};
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
{
struct iommu_fault_alloc *cmd = ucmd->cmd;
struct iommufd_fault *fault;
struct file *filep;
int fdno;
int rc;
if (cmd->flags)
return -EOPNOTSUPP;
fault = iommufd_object_alloc(ucmd->ictx, fault, IOMMUFD_OBJ_FAULT);
if (IS_ERR(fault))
return PTR_ERR(fault);
fault->ictx = ucmd->ictx;
INIT_LIST_HEAD(&fault->deliver);
xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
mutex_init(&fault->mutex);
init_waitqueue_head(&fault->wait_queue);
filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
fault, O_RDWR);
if (IS_ERR(filep)) {
rc = PTR_ERR(filep);
goto out_abort;
}
refcount_inc(&fault->obj.users);
iommufd_ctx_get(fault->ictx);
fault->filep = filep;
fdno = get_unused_fd_flags(O_CLOEXEC);
if (fdno < 0) {
rc = fdno;
goto out_fput;
}
cmd->out_fault_id = fault->obj.id;
cmd->out_fault_fd = fdno;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
goto out_put_fdno;
iommufd_object_finalize(ucmd->ictx, &fault->obj);
fd_install(fdno, fault->filep);
return 0;
out_put_fdno:
put_unused_fd(fdno);
out_fput:
fput(filep);
refcount_dec(&fault->obj.users);
iommufd_ctx_put(fault->ictx);
out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);
return rc;
}

View File

@ -128,6 +128,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_HWPT_NESTED, IOMMUFD_OBJ_HWPT_NESTED,
IOMMUFD_OBJ_IOAS, IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_ACCESS, IOMMUFD_OBJ_ACCESS,
IOMMUFD_OBJ_FAULT,
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST, IOMMUFD_OBJ_SELFTEST,
#endif #endif
@ -426,6 +427,35 @@ void iopt_remove_access(struct io_pagetable *iopt,
u32 iopt_access_list_id); u32 iopt_access_list_id);
void iommufd_access_destroy_object(struct iommufd_object *obj); void iommufd_access_destroy_object(struct iommufd_object *obj);
/*
* An iommufd_fault object represents an interface to deliver I/O page faults
* to the user space. These objects are created/destroyed by the user space and
* associated with hardware page table objects during page-table allocation.
*/
struct iommufd_fault {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct file *filep;
/* The lists of outstanding faults protected by below mutex. */
struct mutex mutex;
struct list_head deliver;
struct xarray response;
struct wait_queue_head wait_queue;
};
struct iommufd_attach_handle {
struct iommu_attach_handle handle;
struct iommufd_device *idev;
};
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd); int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj); void iommufd_selftest_destroy(struct iommufd_object *obj);

View File

@ -319,6 +319,7 @@ static int iommufd_option(struct iommufd_ucmd *ucmd)
union ucmd_buffer { union ucmd_buffer {
struct iommu_destroy destroy; struct iommu_destroy destroy;
struct iommu_fault_alloc fault;
struct iommu_hw_info info; struct iommu_hw_info info;
struct iommu_hwpt_alloc hwpt; struct iommu_hwpt_alloc hwpt;
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap; struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
@ -355,6 +356,8 @@ struct iommufd_ioctl_op {
} }
static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
IOCTL_OP(IOMMU_FAULT_QUEUE_ALLOC, iommufd_fault_alloc, struct iommu_fault_alloc,
out_fault_fd),
IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info, IOCTL_OP(IOMMU_GET_HW_INFO, iommufd_get_hw_info, struct iommu_hw_info,
__reserved), __reserved),
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc, IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
@ -513,6 +516,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
.destroy = iommufd_hwpt_nested_destroy, .destroy = iommufd_hwpt_nested_destroy,
.abort = iommufd_hwpt_nested_abort, .abort = iommufd_hwpt_nested_abort,
}, },
[IOMMUFD_OBJ_FAULT] = {
.destroy = iommufd_fault_destroy,
},
#ifdef CONFIG_IOMMUFD_TEST #ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = { [IOMMUFD_OBJ_SELFTEST] = {
.destroy = iommufd_selftest_destroy, .destroy = iommufd_selftest_destroy,

View File

@ -124,12 +124,16 @@ struct iopf_fault {
struct iopf_group { struct iopf_group {
struct iopf_fault last_fault; struct iopf_fault last_fault;
struct list_head faults; struct list_head faults;
size_t fault_count;
/* list node for iommu_fault_param::faults */ /* list node for iommu_fault_param::faults */
struct list_head pending_node; struct list_head pending_node;
struct work_struct work; struct work_struct work;
struct iommu_attach_handle *attach_handle; struct iommu_attach_handle *attach_handle;
/* The device's fault data parameter. */ /* The device's fault data parameter. */
struct iommu_fault_param *fault_param; struct iommu_fault_param *fault_param;
/* Used by handler provider to hook the group on its own lists. */
struct list_head node;
u32 cookie;
}; };
/** /**

View File

@ -50,6 +50,7 @@ enum {
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING, IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
IOMMUFD_CMD_HWPT_INVALIDATE, IOMMUFD_CMD_HWPT_INVALIDATE,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC,
}; };
/** /**
@ -775,4 +776,21 @@ struct iommu_hwpt_page_response {
__u32 cookie; __u32 cookie;
__u32 code; __u32 code;
}; };
/**
* struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC)
* @size: sizeof(struct iommu_fault_alloc)
* @flags: Must be 0
* @out_fault_id: The ID of the new FAULT
* @out_fault_fd: The fd of the new FAULT
*
* Explicitly allocate a fault handling object.
*/
struct iommu_fault_alloc {
__u32 size;
__u32 flags;
__u32 out_fault_id;
__u32 out_fault_fd;
};
#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
#endif #endif