From 8138dabbab269d9d6ba2ffa17aad03aff22cfdb6 Mon Sep 17 00:00:00 2001 From: Wei Jiangang Date: Wed, 17 Aug 2016 14:37:05 +0800 Subject: [PATCH 1/5] vfio/pci: Fix typos in comments Signed-off-by: Wei Jiangang Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 688691d9058d..c4f235452d81 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -70,7 +70,7 @@ static const u8 pci_cap_length[PCI_CAP_ID_MAX + 1] = { /* * Lengths of PCIe/PCI-X Extended Config Capabilities - * 0: Removed or masked from the user visible capabilty list + * 0: Removed or masked from the user visible capability list * FF: Variable length */ static const u16 pci_ext_cap_length[PCI_EXT_CAP_ID_MAX + 1] = { @@ -355,7 +355,7 @@ static int alloc_perm_bits(struct perm_bits *perm, int size) * ignore whether a read/write exceeds the defined capability * structure. We can do this because: * - Standard config space is already dword aligned - * - Capabilities are all dword alinged (bits 0:1 of next reserved) + * - Capabilities are all dword aligned (bits 0:1 of next reserved) * - Express capabilities defined as dword aligned */ size = round_up(size, 4); @@ -1516,10 +1516,10 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) * space which tracks reads and writes to bits that we emulate for * the user. Initial values filled from device. * - * Using shared stuct perm_bits between all vfio-pci devices saves + * Using shared struct perm_bits between all vfio-pci devices saves * us from allocating cfg_size buffers for virt and write for every * device. We could remove vconfig and allocate individual buffers - * for each area requring emulated bits, but the array of pointers + * for each area requiring emulated bits, but the array of pointers * would be comparable in size (at least for standard config space). */ int vfio_config_init(struct vfio_pci_device *vdev) From 2e06285655b59362847b610a7cfad204fee9640b Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Thu, 1 Sep 2016 19:15:35 +0800 Subject: [PATCH 2/5] vfio: platform: mark symbols static where possible We get a few warnings when building kernel with W=1: drivers/vfio/platform/vfio_platform_common.c:76:5: warning: no previous prototype for 'vfio_platform_acpi_call_reset' [-Wmissing-prototypes] drivers/vfio/platform/vfio_platform_common.c:98:6: warning: no previous prototype for 'vfio_platform_acpi_has_reset' [-Wmissing-prototypes] drivers/vfio/platform/vfio_platform_common.c:640:5: warning: no previous prototype for 'vfio_platform_of_probe' [-Wmissing-prototypes] drivers/vfio/platform/reset/vfio_platform_amdxgbe.c:59:5: warning: no previous prototype for 'vfio_platform_amdxgbe_reset' [-Wmissing-prototypes] drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c:60:5: warning: no previous prototype for 'vfio_platform_calxedaxgmac_reset' [-Wmissing-prototypes] .... In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. so this patch marks these functions with 'static'. Signed-off-by: Baoyou Xie Acked-by: Arnd Bergmann Reviewed-by: Eric Auger Reviewed-by: Baptiste Reynal Signed-off-by: Alex Williamson --- drivers/vfio/platform/reset/vfio_platform_amdxgbe.c | 2 +- drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c | 2 +- drivers/vfio/platform/vfio_platform_common.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c index d4030d0c38e9..bcd419cfd79c 100644 --- a/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c +++ b/drivers/vfio/platform/reset/vfio_platform_amdxgbe.c @@ -56,7 +56,7 @@ static void xmdio_write(void *ioaddr, unsigned int mmd, iowrite32(value, ioaddr + ((mmd_address & 0xff) << 2)); } -int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev) +static int vfio_platform_amdxgbe_reset(struct vfio_platform_device *vdev) { struct vfio_platform_region *xgmac_regs = &vdev->regions[0]; struct vfio_platform_region *xpcs_regs = &vdev->regions[1]; diff --git a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c index e3d3d948e661..49e5df6e8f29 100644 --- a/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c +++ b/drivers/vfio/platform/reset/vfio_platform_calxedaxgmac.c @@ -57,7 +57,7 @@ static inline void xgmac_mac_disable(void __iomem *ioaddr) writel(value, ioaddr + XGMAC_CONTROL); } -int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) +static int vfio_platform_calxedaxgmac_reset(struct vfio_platform_device *vdev) { struct vfio_platform_region *reg = &vdev->regions[0]; diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 1cf2d462b53d..d78142830754 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -73,7 +73,7 @@ static int vfio_platform_acpi_probe(struct vfio_platform_device *vdev, return WARN_ON(!vdev->acpihid) ? -EINVAL : 0; } -int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev, +static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev, const char **extra_dbg) { #ifdef CONFIG_ACPI @@ -95,7 +95,7 @@ int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev, #endif } -bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev) +static bool vfio_platform_acpi_has_reset(struct vfio_platform_device *vdev) { #ifdef CONFIG_ACPI struct device *dev = vdev->device; @@ -637,7 +637,7 @@ static const struct vfio_device_ops vfio_platform_ops = { .mmap = vfio_platform_mmap, }; -int vfio_platform_of_probe(struct vfio_platform_device *vdev, +static int vfio_platform_of_probe(struct vfio_platform_device *vdev, struct device *dev) { int ret; From ddf9dc0eb5314d6dac8b19b1cc37c739c6896e7e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 26 Sep 2016 13:52:16 -0600 Subject: [PATCH 3/5] vfio-pci: Virtualize PCIe & AF FLR We use a BAR restore trick to try to detect when a user has performed a device reset, possibly through FLR or other backdoors, to put things back into a working state. This is important for backdoor resets, but we can actually just virtualize the "front door" resets provided via PCIe and AF FLR. Set these bits as virtualized + writable, allowing the default write to set them in vconfig, then we can simply check the bit, perform an FLR of our own, and clear the bit. We don't actually have the granularity in PCI to specify the type of reset we want to do, but generally devices don't implement both PCIe and AF FLR and we'll favor these over other types of reset, so we should generally lineup. We do test whether the device provides the requested FLR type to stay consistent with hardware capabilities though. This seems to fix several instance of devices getting into bad states with userspace drivers, like dpdk, running inside a VM. Signed-off-by: Alex Williamson Reviewed-by: Greg Rose --- drivers/vfio/pci/vfio_pci_config.c | 82 ++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index c4f235452d81..65d4a3015542 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -804,6 +804,40 @@ static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) return 0; } +static int vfio_exp_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + __le16 *ctrl = (__le16 *)(vdev->vconfig + pos - + offset + PCI_EXP_DEVCTL); + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + /* + * The FLR bit is virtualized, if set and the device supports PCIe + * FLR, issue a reset_function. Regardless, clear the bit, the spec + * requires it to be always read as zero. NB, reset_function might + * not use a PCIe FLR, we don't have that level of granularity. + */ + if (*ctrl & cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR)) { + u32 cap; + int ret; + + *ctrl &= ~cpu_to_le16(PCI_EXP_DEVCTL_BCR_FLR); + + ret = pci_user_read_config_dword(vdev->pdev, + pos - offset + PCI_EXP_DEVCAP, + &cap); + + if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) + pci_try_reset_function(vdev->pdev); + } + + return count; +} + /* Permissions for PCI Express capability */ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) { @@ -811,26 +845,64 @@ static int __init init_pci_cap_exp_perm(struct perm_bits *perm) if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2)) return -ENOMEM; + perm->writefn = vfio_exp_config_write; + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); /* - * Allow writes to device control fields (includes FLR!) - * but not to devctl_phantom which could confuse IOMMU - * or to the ARI bit in devctl2 which is set at probe time + * Allow writes to device control fields, except devctl_phantom, + * which could confuse IOMMU, and the ARI bit in devctl2, which + * is set at probe time. FLR gets virtualized via our writefn. */ - p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM); + p_setw(perm, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_BCR_FLR, ~PCI_EXP_DEVCTL_PHANTOM); p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); return 0; } +static int vfio_af_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + u8 *ctrl = vdev->vconfig + pos - offset + PCI_AF_CTRL; + + count = vfio_default_config_write(vdev, pos, count, perm, offset, val); + if (count < 0) + return count; + + /* + * The FLR bit is virtualized, if set and the device supports AF + * FLR, issue a reset_function. Regardless, clear the bit, the spec + * requires it to be always read as zero. NB, reset_function might + * not use an AF FLR, we don't have that level of granularity. + */ + if (*ctrl & PCI_AF_CTRL_FLR) { + u8 cap; + int ret; + + *ctrl &= ~PCI_AF_CTRL_FLR; + + ret = pci_user_read_config_byte(vdev->pdev, + pos - offset + PCI_AF_CAP, + &cap); + + if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) + pci_try_reset_function(vdev->pdev); + } + + return count; +} + /* Permissions for Advanced Function capability */ static int __init init_pci_cap_af_perm(struct perm_bits *perm) { if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF])) return -ENOMEM; + perm->writefn = vfio_af_config_write; + p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); - p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR); + p_setb(perm, PCI_AF_CTRL, PCI_AF_CTRL_FLR, PCI_AF_CTRL_FLR); return 0; } From c93a97ee0583cd65adaf872e7cc402493eae92a7 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 26 Sep 2016 13:52:19 -0600 Subject: [PATCH 4/5] vfio-pci: Disable INTx after MSI/X teardown The MSI/X shutdown path can gratuitously enable INTx, which is not something we want to happen if we're dealing with broken INTx device. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_intrs.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 152b43822ef1..aa23e9a4a22f 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -414,6 +414,13 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) } else pci_disable_msi(pdev); + /* + * Both disable paths above use pci_intx_for_msi() to clear DisINTx + * via their shutdown paths. Restore for NoINTx devices. + */ + if (vdev->nointx) + pci_intx(pdev, 0); + vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); From 61771468e0a567f007fc450725063bb9cf7eb199 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 15:31:26 +0200 Subject: [PATCH 5/5] vfio_pci: use pci_alloc_irq_vectors Simplify the interrupt setup by using the new PCI layer helpers. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_intrs.c | 45 +++++++---------------------- drivers/vfio/pci/vfio_pci_private.h | 1 - 2 files changed, 10 insertions(+), 36 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index aa23e9a4a22f..c2e60893cd09 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -250,6 +250,7 @@ static irqreturn_t vfio_msihandler(int irq, void *arg) static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) { struct pci_dev *pdev = vdev->pdev; + unsigned int flag = msix ? PCI_IRQ_MSIX : PCI_IRQ_MSI; int ret; if (!is_irq_none(vdev)) @@ -259,35 +260,13 @@ static int vfio_msi_enable(struct vfio_pci_device *vdev, int nvec, bool msix) if (!vdev->ctx) return -ENOMEM; - if (msix) { - int i; - - vdev->msix = kzalloc(nvec * sizeof(struct msix_entry), - GFP_KERNEL); - if (!vdev->msix) { - kfree(vdev->ctx); - return -ENOMEM; - } - - for (i = 0; i < nvec; i++) - vdev->msix[i].entry = i; - - ret = pci_enable_msix_range(pdev, vdev->msix, 1, nvec); - if (ret < nvec) { - if (ret > 0) - pci_disable_msix(pdev); - kfree(vdev->msix); - kfree(vdev->ctx); - return ret; - } - } else { - ret = pci_enable_msi_range(pdev, 1, nvec); - if (ret < nvec) { - if (ret > 0) - pci_disable_msi(pdev); - kfree(vdev->ctx); - return ret; - } + /* return the number of supported vectors if we can't get all: */ + ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag); + if (ret < nvec) { + if (ret > 0) + pci_free_irq_vectors(pdev); + kfree(vdev->ctx); + return ret; } vdev->num_ctx = nvec; @@ -315,7 +294,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; - irq = msix ? vdev->msix[vector].vector : pdev->irq + vector; + irq = pci_irq_vector(pdev, vector); if (vdev->ctx[vector].trigger) { free_irq(irq, vdev->ctx[vector].trigger); @@ -408,11 +387,7 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); - if (msix) { - pci_disable_msix(vdev->pdev); - kfree(vdev->msix); - } else - pci_disable_msi(pdev); + pci_free_irq_vectors(pdev); /* * Both disable paths above use pci_intx_for_msi() to clear DisINTx diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 2128de86c80d..f561ac1c78a0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -72,7 +72,6 @@ struct vfio_pci_device { struct perm_bits *msi_perm; spinlock_t irqlock; struct mutex igate; - struct msix_entry *msix; struct vfio_pci_irq_ctx *ctx; int num_ctx; int irq_type;