vfio/pci: Support dynamic MSI-X

pci_msix_alloc_irq_at() enables an individual MSI-X interrupt to be
allocated after MSI-X enabling.

Use dynamic MSI-X (if supported by the device) to allocate an interrupt
after MSI-X is enabled. An MSI-X interrupt is dynamically allocated at
the time a valid eventfd is assigned. This is different behavior from
a range provided during MSI-X enabling where interrupts are allocated
for the entire range whether a valid eventfd is provided for each
interrupt or not.

The PCI-MSIX API requires that some number of irqs are allocated for
an initial set of vectors when enabling MSI-X on the device. When
dynamic MSIX allocation is not supported, the vector table, and thus
the allocated irq set can only be resized by disabling and re-enabling
MSI-X with a different range. In that case the irq allocation is
essentially a cache for configuring vectors within the previously
allocated vector range. When dynamic MSI-X allocation is supported,
the API still requires some initial set of irqs to be allocated, but
also supports allocating and freeing specific irq vectors both
within and beyond the initially allocated range.

For consistency between modes, as well as to reduce latency and improve
reliability of allocations, and also simplicity, this implementation
only releases irqs via pci_free_irq_vectors() when either the interrupt
mode changes or the device is released.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Link: https://lore.kernel.org/lkml/20230403211841.0e206b67.alex.williamson@redhat.com/
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/956c47057ae9fd45591feaa82e9ae20929889249.1683740667.git.reinette.chatre@intel.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
Reinette Chatre 2023-05-11 08:44:37 -07:00 committed by Alex Williamson
parent dd27a70700
commit e4163438e0

View File

@ -381,27 +381,55 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
return 0;
}
/*
* vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
* interrupt vector. If a Linux IRQ number is not available then a new
* interrupt is allocated if dynamic MSI-X is supported.
*
* Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
* essentially forming a cache that subsequent allocations can draw from.
* Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
* disabled.
*/
static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
unsigned int vector, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
struct msi_map map;
int irq;
u16 cmd;
irq = pci_irq_vector(pdev, vector);
if (WARN_ON_ONCE(irq == 0))
return -EINVAL;
if (irq > 0 || !msix || !vdev->has_dyn_msix)
return irq;
cmd = vfio_pci_memory_lock_and_enable(vdev);
map = pci_msix_alloc_irq_at(pdev, vector, NULL);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
return map.index < 0 ? map.index : map.virq;
}
static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
unsigned int vector, int fd, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger;
int irq, ret;
int irq = -EINVAL, ret;
u16 cmd;
irq = pci_irq_vector(pdev, vector);
if (irq < 0)
return -EINVAL;
ctx = vfio_irq_ctx_get(vdev, vector);
if (ctx) {
irq_bypass_unregister_producer(&ctx->producer);
irq = pci_irq_vector(pdev, vector);
cmd = vfio_pci_memory_lock_and_enable(vdev);
free_irq(irq, ctx->trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
/* Interrupt stays allocated, will be freed at MSI-X disable. */
kfree(ctx->name);
eventfd_ctx_put(ctx->trigger);
vfio_irq_ctx_free(vdev, ctx, vector);
@ -410,6 +438,13 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
if (fd < 0)
return 0;
if (irq == -EINVAL) {
/* Interrupt stays allocated, will be freed at MSI-X disable. */
irq = vfio_msi_alloc_irq(vdev, vector, msix);
if (irq < 0)
return irq;
}
ctx = vfio_irq_ctx_alloc(vdev, vector);
if (!ctx)
return -ENOMEM;