vfio: Replace the DMA unmapping notifier with a callback

Instead of having drivers register the notifier with explicit code just
have them provide a dma_unmap callback op in their driver ops and rely on
the core code to wire it up.

Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/1-v4-681e038e30fd+78-vfio_unmap_notif_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Jason Gunthorpe 2022-07-19 21:02:48 -03:00 committed by Alex Williamson
parent 2a8ed7ef00
commit ce4b4657ff
9 changed files with 86 additions and 240 deletions

View File

@ -226,7 +226,6 @@ struct intel_vgpu {
unsigned long nr_cache_entries;
struct mutex cache_lock;
struct notifier_block iommu_notifier;
atomic_t released;
struct kvm_page_track_notifier_node track_node;

View File

@ -729,34 +729,25 @@ int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
return ret;
}
static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
unsigned long action, void *data)
static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
u64 length)
{
struct intel_vgpu *vgpu =
container_of(nb, struct intel_vgpu, iommu_notifier);
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
struct gvt_dma *entry;
u64 iov_pfn = iova >> PAGE_SHIFT;
u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
struct vfio_iommu_type1_dma_unmap *unmap = data;
struct gvt_dma *entry;
unsigned long iov_pfn, end_iov_pfn;
mutex_lock(&vgpu->cache_lock);
for (; iov_pfn < end_iov_pfn; iov_pfn++) {
entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
if (!entry)
continue;
iov_pfn = unmap->iova >> PAGE_SHIFT;
end_iov_pfn = iov_pfn + unmap->size / PAGE_SIZE;
mutex_lock(&vgpu->cache_lock);
for (; iov_pfn < end_iov_pfn; iov_pfn++) {
entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
if (!entry)
continue;
gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
entry->size);
__gvt_cache_remove_entry(vgpu, entry);
}
mutex_unlock(&vgpu->cache_lock);
gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
entry->size);
__gvt_cache_remove_entry(vgpu, entry);
}
return NOTIFY_OK;
mutex_unlock(&vgpu->cache_lock);
}
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
@ -783,36 +774,20 @@ out:
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
unsigned long events;
int ret;
vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
&vgpu->iommu_notifier);
if (ret != 0) {
gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
ret);
goto out;
}
ret = -EEXIST;
if (vgpu->attached)
goto undo_iommu;
return -EEXIST;
ret = -ESRCH;
if (!vgpu->vfio_device.kvm ||
vgpu->vfio_device.kvm->mm != current->mm) {
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
goto undo_iommu;
return -ESRCH;
}
kvm_get_kvm(vgpu->vfio_device.kvm);
ret = -EEXIST;
if (__kvmgt_vgpu_exist(vgpu))
goto undo_iommu;
return -EEXIST;
vgpu->attached = true;
@ -831,12 +806,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
atomic_set(&vgpu->released, 0);
return 0;
undo_iommu:
vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier);
out:
return ret;
}
static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
@ -853,8 +822,6 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
int ret;
if (!vgpu->attached)
return;
@ -864,11 +831,6 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
intel_gvt_release_vgpu(vgpu);
ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
&vgpu->iommu_notifier);
drm_WARN(&i915->drm, ret,
"vfio_unregister_notifier for iommu failed: %d\n", ret);
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
@ -1610,6 +1572,7 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
.write = intel_vgpu_write,
.mmap = intel_vgpu_mmap,
.ioctl = intel_vgpu_ioctl,
.dma_unmap = intel_vgpu_dma_unmap,
};
static int intel_vgpu_probe(struct mdev_device *mdev)

View File

@ -33,30 +33,16 @@ static int vfio_ccw_mdev_reset(struct vfio_ccw_private *private)
return 0;
}
static int vfio_ccw_mdev_notifier(struct notifier_block *nb,
unsigned long action,
void *data)
static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
{
struct vfio_ccw_private *private =
container_of(nb, struct vfio_ccw_private, nb);
container_of(vdev, struct vfio_ccw_private, vdev);
/*
* Vendor drivers MUST unpin pages in response to an
* invalidation.
*/
if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
struct vfio_iommu_type1_dma_unmap *unmap = data;
/* Drivers MUST unpin pages in response to an invalidation. */
if (!cp_iova_pinned(&private->cp, iova))
return;
if (!cp_iova_pinned(&private->cp, unmap->iova))
return NOTIFY_OK;
if (vfio_ccw_mdev_reset(private))
return NOTIFY_BAD;
return NOTIFY_OK;
}
return NOTIFY_DONE;
vfio_ccw_mdev_reset(private);
}
static ssize_t name_show(struct mdev_type *mtype,
@ -154,23 +140,15 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
{
struct vfio_ccw_private *private =
container_of(vdev, struct vfio_ccw_private, vdev);
unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
int ret;
/* Device cannot simply be opened again from this state */
if (private->state == VFIO_CCW_STATE_NOT_OPER)
return -EINVAL;
private->nb.notifier_call = vfio_ccw_mdev_notifier;
ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY,
&events, &private->nb);
if (ret)
return ret;
ret = vfio_ccw_register_async_dev_regions(private);
if (ret)
goto out_unregister;
return ret;
ret = vfio_ccw_register_schib_dev_regions(private);
if (ret)
@ -190,7 +168,6 @@ static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
out_unregister:
vfio_ccw_unregister_dev_regions(private);
vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
return ret;
}
@ -201,7 +178,6 @@ static void vfio_ccw_mdev_close_device(struct vfio_device *vdev)
vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE);
vfio_ccw_unregister_dev_regions(private);
vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, &private->nb);
}
static ssize_t vfio_ccw_mdev_read_io_region(struct vfio_ccw_private *private,
@ -624,6 +600,7 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
.write = vfio_ccw_mdev_write,
.ioctl = vfio_ccw_mdev_ioctl,
.request = vfio_ccw_mdev_request,
.dma_unmap = vfio_ccw_dma_unmap,
};
struct mdev_driver vfio_ccw_mdev_driver = {

View File

@ -73,7 +73,6 @@ struct vfio_ccw_crw {
* @state: internal state of the device
* @completion: synchronization helper of the I/O completion
* @avail: available for creating a mediated device
* @nb: notifier for vfio events
* @io_region: MMIO region to input/output I/O arguments/results
* @io_mutex: protect against concurrent update of I/O regions
* @region: additional regions for other subchannel operations
@ -96,7 +95,6 @@ struct vfio_ccw_private {
int state;
struct completion *completion;
atomic_t avail;
struct notifier_block nb;
struct ccw_io_region *io_region;
struct mutex io_mutex;
struct vfio_ccw_region *region;

View File

@ -1226,34 +1226,14 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
return 0;
}
/**
* vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
*
* @nb: The notifier block
* @action: Action to be taken
* @data: data associated with the request
*
* For an UNMAP request, unpin the guest IOVA (the NIB guest address we
* pinned before). Other requests are ignored.
*
* Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
*/
static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
unsigned long action, void *data)
static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
u64 length)
{
struct ap_matrix_mdev *matrix_mdev;
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
unsigned long g_pfn = iova >> PAGE_SHIFT;
matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
struct vfio_iommu_type1_dma_unmap *unmap = data;
unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
return NOTIFY_OK;
}
return NOTIFY_DONE;
vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
}
/**
@ -1380,27 +1360,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
{
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
unsigned long events;
int ret;
if (!vdev->kvm)
return -EINVAL;
ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
if (ret)
return ret;
matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
&matrix_mdev->iommu_notifier);
if (ret)
goto err_kvm;
return 0;
err_kvm:
vfio_ap_mdev_unset_kvm(matrix_mdev);
return ret;
return vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
}
static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
@ -1408,8 +1372,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
struct ap_matrix_mdev *matrix_mdev =
container_of(vdev, struct ap_matrix_mdev, vdev);
vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier);
vfio_ap_mdev_unset_kvm(matrix_mdev);
}
@ -1461,6 +1423,7 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
.open_device = vfio_ap_mdev_open_device,
.close_device = vfio_ap_mdev_close_device,
.ioctl = vfio_ap_mdev_ioctl,
.dma_unmap = vfio_ap_mdev_dma_unmap,
};
static struct mdev_driver vfio_ap_matrix_driver = {

View File

@ -81,8 +81,6 @@ struct ap_matrix {
* @node: allows the ap_matrix_mdev struct to be added to a list
* @matrix: the adapters, usage domains and control domains assigned to the
* mediated matrix device.
* @iommu_notifier: notifier block used for specifying callback function for
* handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
* @kvm: the struct holding guest's state
* @pqap_hook: the function pointer to the interception handler for the
* PQAP(AQIC) instruction.
@ -92,7 +90,6 @@ struct ap_matrix_mdev {
struct vfio_device vdev;
struct list_head node;
struct ap_matrix matrix;
struct notifier_block iommu_notifier;
struct kvm *kvm;
crypto_hook pqap_hook;
struct mdev_device *mdev;

View File

@ -231,6 +231,9 @@ int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
{
struct vfio_iommu_driver *driver, *tmp;
if (WARN_ON(!ops->register_notifier != !ops->unregister_notifier))
return -EINVAL;
driver = kzalloc(sizeof(*driver), GFP_KERNEL);
if (!driver)
return -ENOMEM;
@ -1079,8 +1082,20 @@ static void vfio_device_unassign_container(struct vfio_device *device)
up_write(&device->group->group_rwsem);
}
static int vfio_iommu_notifier(struct notifier_block *nb, unsigned long action,
void *data)
{
struct vfio_device *vfio_device =
container_of(nb, struct vfio_device, iommu_nb);
struct vfio_iommu_type1_dma_unmap *unmap = data;
vfio_device->ops->dma_unmap(vfio_device, unmap->iova, unmap->size);
return NOTIFY_OK;
}
static struct file *vfio_device_open(struct vfio_device *device)
{
struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
@ -1111,6 +1126,18 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
iommu_driver = device->group->container->iommu_driver;
if (device->ops->dma_unmap && iommu_driver &&
iommu_driver->ops->register_notifier) {
unsigned long events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
device->iommu_nb.notifier_call = vfio_iommu_notifier;
iommu_driver->ops->register_notifier(
device->group->container->iommu_data, &events,
&device->iommu_nb);
}
up_read(&device->group->group_rwsem);
}
mutex_unlock(&device->dev_set->lock);
@ -1145,8 +1172,16 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
iommu_driver = device->group->container->iommu_driver;
if (device->ops->dma_unmap && iommu_driver &&
iommu_driver->ops->unregister_notifier)
iommu_driver->ops->unregister_notifier(
device->group->container->iommu_data,
&device->iommu_nb);
}
err_undo_count:
up_read(&device->group->group_rwsem);
device->open_count--;
@ -1341,12 +1376,20 @@ static const struct file_operations vfio_group_fops = {
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
down_read(&device->group->group_rwsem);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
iommu_driver = device->group->container->iommu_driver;
if (device->ops->dma_unmap && iommu_driver &&
iommu_driver->ops->unregister_notifier)
iommu_driver->ops->unregister_notifier(
device->group->container->iommu_data,
&device->iommu_nb);
up_read(&device->group->group_rwsem);
device->open_count--;
if (device->open_count == 0)
@ -2029,90 +2072,6 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, void *data,
}
EXPORT_SYMBOL(vfio_dma_rw);
static int vfio_register_iommu_notifier(struct vfio_group *group,
unsigned long *events,
struct notifier_block *nb)
{
struct vfio_container *container;
struct vfio_iommu_driver *driver;
int ret;
lockdep_assert_held_read(&group->group_rwsem);
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->register_notifier))
ret = driver->ops->register_notifier(container->iommu_data,
events, nb);
else
ret = -ENOTTY;
return ret;
}
static int vfio_unregister_iommu_notifier(struct vfio_group *group,
struct notifier_block *nb)
{
struct vfio_container *container;
struct vfio_iommu_driver *driver;
int ret;
lockdep_assert_held_read(&group->group_rwsem);
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->unregister_notifier))
ret = driver->ops->unregister_notifier(container->iommu_data,
nb);
else
ret = -ENOTTY;
return ret;
}
int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type, unsigned long *events,
struct notifier_block *nb)
{
struct vfio_group *group = device->group;
int ret;
if (!nb || !events || (*events == 0) ||
!vfio_assert_device_open(device))
return -EINVAL;
switch (type) {
case VFIO_IOMMU_NOTIFY:
ret = vfio_register_iommu_notifier(group, events, nb);
break;
default:
ret = -EINVAL;
}
return ret;
}
EXPORT_SYMBOL(vfio_register_notifier);
int vfio_unregister_notifier(struct vfio_device *device,
enum vfio_notify_type type,
struct notifier_block *nb)
{
struct vfio_group *group = device->group;
int ret;
if (!nb || !vfio_assert_device_open(device))
return -EINVAL;
switch (type) {
case VFIO_IOMMU_NOTIFY:
ret = vfio_unregister_iommu_notifier(group, nb);
break;
default:
ret = -EINVAL;
}
return ret;
}
EXPORT_SYMBOL(vfio_unregister_notifier);
/*
* Module/class support
*/

View File

@ -33,6 +33,9 @@ enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
};
/* events for register_notifier() */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
/**
* struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
*/

View File

@ -49,6 +49,7 @@ struct vfio_device {
unsigned int open_count;
struct completion comp;
struct list_head group_next;
struct notifier_block iommu_nb;
};
/**
@ -65,6 +66,8 @@ struct vfio_device {
* @match: Optional device name match callback (return: 0 for no-match, >0 for
* match, -errno for abort (ex. match with insufficient or incorrect
* additional args)
* @dma_unmap: Called when userspace unmaps IOVA from the container
* this device is attached to.
* @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl
*/
struct vfio_device_ops {
@ -80,6 +83,7 @@ struct vfio_device_ops {
int (*mmap)(struct vfio_device *vdev, struct vm_area_struct *vma);
void (*request)(struct vfio_device *vdev, unsigned int count);
int (*match)(struct vfio_device *vdev, char *buf);
void (*dma_unmap)(struct vfio_device *vdev, u64 iova, u64 length);
int (*device_feature)(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz);
};
@ -164,23 +168,6 @@ int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
void *data, size_t len, bool write);
/* each type has independent events */
enum vfio_notify_type {
VFIO_IOMMU_NOTIFY = 0,
};
/* events for VFIO_IOMMU_NOTIFY */
#define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0)
int vfio_register_notifier(struct vfio_device *device,
enum vfio_notify_type type,
unsigned long *required_events,
struct notifier_block *nb);
int vfio_unregister_notifier(struct vfio_device *device,
enum vfio_notify_type type,
struct notifier_block *nb);
/*
* Sub-module helpers
*/