From 0dad3ce523c2917b1912fbde047207533e9f1eeb Mon Sep 17 00:00:00 2001 From: Raphael Norwitz Date: Thu, 8 Apr 2021 18:23:40 +0000 Subject: [PATCH 01/48] PCI: Add pci_reset_bus_function() Secondary Bus Reset interface pci_parent_bus_reset() resets a device by performing a Secondary Bus Reset on a PCI-to-PCI bridge leading to the device. pci_dev_reset_slot_function() does the same, except that it uses a hotplug driver to keep the reset from looking like a hot-remove followed by a hot-add. Add a pci_reset_bus_function() wrapper, which attempts the hotplug driver slot reset and falls back to the parent bus reset if that fails. This provides a single interface for performing a Secondary Bus Reset. [bhelgaas: commit log, don't expose yet] Suggested-by: Alex Williamson Link: https://lore.kernel.org/r/20210323100625.0021a943@omen.home.shazbot.org/ Link: https://lore.kernel.org/r/20210408182328.12323-1-raphael.norwitz@nutanix.com Signed-off-by: Amey Narkhede Signed-off-by: Raphael Norwitz Signed-off-by: Bjorn Helgaas Reviewed-by: Leon Romanovsky --- drivers/pci/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b717680377a9..452351025a09 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5020,6 +5020,16 @@ static int pci_dev_reset_slot_function(struct pci_dev *dev, int probe) return pci_reset_hotplug_slot(dev->slot->hotplug, probe); } +static int pci_reset_bus_function(struct pci_dev *dev, int probe) +{ + int rc; + + rc = pci_dev_reset_slot_function(dev, probe); + if (rc != -ENOTTY) + return rc; + return pci_parent_bus_reset(dev, probe); +} + static void pci_dev_lock(struct pci_dev *dev) { pci_cfg_access_lock(dev); @@ -5140,10 +5150,7 @@ int __pci_reset_function_locked(struct pci_dev *dev) rc = pci_pm_reset(dev, 0); if (rc != -ENOTTY) return rc; - rc = pci_dev_reset_slot_function(dev, 0); - if (rc != -ENOTTY) - return rc; - return pci_parent_bus_reset(dev, 0); + return pci_reset_bus_function(dev, 0); } EXPORT_SYMBOL_GPL(__pci_reset_function_locked); @@ -5173,13 +5180,10 @@ int pci_probe_reset_function(struct pci_dev *dev) if (rc != -ENOTTY) return rc; rc = pci_pm_reset(dev, 1); - if (rc != -ENOTTY) - return rc; - rc = pci_dev_reset_slot_function(dev, 1); if (rc != -ENOTTY) return rc; - return pci_parent_bus_reset(dev, 1); + return pci_reset_bus_function(dev, 1); } /** From 411e2a43d210e98730713acf6d01dcf823ee35e3 Mon Sep 17 00:00:00 2001 From: Chiqijun Date: Mon, 24 May 2021 17:44:07 -0500 Subject: [PATCH 02/48] PCI: Work around Huawei Intelligent NIC VF FLR erratum pcie_flr() starts a Function Level Reset (FLR), waits 100ms (the maximum time allowed for FLR completion by PCIe r5.0, sec 6.6.2), and waits for the FLR to complete. It assumes the FLR is complete when a config read returns valid data. When we do an FLR on several Huawei Intelligent NIC VFs at the same time, firmware on the NIC processes them serially. The VF may respond to config reads before the firmware has completed its reset processing. If we bind a driver to the VF (e.g., by assigning the VF to a virtual machine) in the interval between the successful config read and completion of the firmware reset processing, the NIC VF driver may fail to load. Prevent this driver failure by waiting for the NIC firmware to complete its reset processing. Not all NIC firmware supports this feature. [bhelgaas: commit log] Link: https://support.huawei.com/enterprise/en/doc/EDOC1100063073/87950645/vm-oss-occasionally-fail-to-load-the-in200-driver-when-the-vf-performs-flr Link: https://lore.kernel.org/r/20210414132301.1793-1-chiqijun@huawei.com Signed-off-by: Chiqijun Signed-off-by: Bjorn Helgaas Reviewed-by: Alex Williamson --- drivers/pci/quirks.c | 65 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dcb229de1acb..d85914afe65a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3901,6 +3901,69 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe) return 0; } +#define PCI_DEVICE_ID_HINIC_VF 0x375E +#define HINIC_VF_FLR_TYPE 0x1000 +#define HINIC_VF_FLR_CAP_BIT (1UL << 30) +#define HINIC_VF_OP 0xE80 +#define HINIC_VF_FLR_PROC_BIT (1UL << 18) +#define HINIC_OPERATION_TIMEOUT 15000 /* 15 seconds */ + +/* Device-specific reset method for Huawei Intelligent NIC virtual functions */ +static int reset_hinic_vf_dev(struct pci_dev *pdev, int probe) +{ + unsigned long timeout; + void __iomem *bar; + u32 val; + + if (probe) + return 0; + + bar = pci_iomap(pdev, 0, 0); + if (!bar) + return -ENOTTY; + + /* Get and check firmware capabilities */ + val = ioread32be(bar + HINIC_VF_FLR_TYPE); + if (!(val & HINIC_VF_FLR_CAP_BIT)) { + pci_iounmap(pdev, bar); + return -ENOTTY; + } + + /* Set HINIC_VF_FLR_PROC_BIT for the start of FLR */ + val = ioread32be(bar + HINIC_VF_OP); + val = val | HINIC_VF_FLR_PROC_BIT; + iowrite32be(val, bar + HINIC_VF_OP); + + pcie_flr(pdev); + + /* + * The device must recapture its Bus and Device Numbers after FLR + * in order generate Completions. Issue a config write to let the + * device capture this information. + */ + pci_write_config_word(pdev, PCI_VENDOR_ID, 0); + + /* Firmware clears HINIC_VF_FLR_PROC_BIT when reset is complete */ + timeout = jiffies + msecs_to_jiffies(HINIC_OPERATION_TIMEOUT); + do { + val = ioread32be(bar + HINIC_VF_OP); + if (!(val & HINIC_VF_FLR_PROC_BIT)) + goto reset_complete; + msleep(20); + } while (time_before(jiffies, timeout)); + + val = ioread32be(bar + HINIC_VF_OP); + if (!(val & HINIC_VF_FLR_PROC_BIT)) + goto reset_complete; + + pci_warn(pdev, "Reset dev timeout, FLR ack reg: %#010x\n", val); + +reset_complete: + pci_iounmap(pdev, bar); + + return 0; +} + static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF, reset_intel_82599_sfp_virtfn }, @@ -3913,6 +3976,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr }, { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, reset_chelsio_generic_dev }, + { PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_HINIC_VF, + reset_hinic_vf_dev }, { 0 } }; From 95ea95396135f3c44ff6265f29343407b4ccf365 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Mon, 29 Mar 2021 19:02:01 +0800 Subject: [PATCH 03/48] PCI/AER: Use consistent format when printing PCI device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We use format domain:bus:slot.function when printing PCI device. Use consistent format in AER messages. [bhelgaas: also drop "AER recover:" prefix since we already have an "AER:" prefix from pr_fmt()] Link: https://lore.kernel.org/r/1617015721-51701-1-git-send-email-yangyicong@hisilicon.com Signed-off-by: Yicong Yang Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- drivers/pci/pcie/aer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index ec943cee5ecc..9ae012ef9266 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -983,7 +983,7 @@ static void aer_recover_work_func(struct work_struct *work) pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, entry.devfn); if (!pdev) { - pr_err("AER recover: Can not find pci_dev for %04x:%02x:%02x:%x\n", + pr_err("no pci_dev for %04x:%02x:%02x.%x\n", entry.domain, entry.bus, PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); continue; @@ -1022,7 +1022,7 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, &aer_recover_ring_lock)) schedule_work(&aer_recover_work); else - pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", + pr_err("buffer overflow in recovery for %04x:%02x:%02x.%x\n", domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); } EXPORT_SYMBOL_GPL(aer_recover_queue); From 0a470c843d233c2f6b68ae65357a246d9fb66178 Mon Sep 17 00:00:00 2001 From: Yang Li Date: Mon, 15 Mar 2021 15:40:00 +0800 Subject: [PATCH 04/48] x86/pci: Return true/false (not 1/0) from bool functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return boolean values ("true" or "false") instead of 1 or 0 from bool functions. This fixes the following warnings from coccicheck: ./arch/x86/pci/mmconfig-shared.c:464:9-10: WARNING: return of 0/1 in function 'is_mmconf_reserved' with return type bool ./arch/x86/pci/mmconfig-shared.c:493:5-6: WARNING: return of 0/1 in function 'is_mmconf_reserved' with return type bool ./arch/x86/pci/mmconfig-shared.c:501:9-10: WARNING: return of 0/1 in function 'is_mmconf_reserved' with return type bool ./arch/x86/pci/mmconfig-shared.c:522:5-6: WARNING: return of 0/1 in function 'is_mmconf_reserved' with return type bool Link: https://lore.kernel.org/r/1615794000-102771-1-git-send-email-yang.lee@linux.alibaba.com Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- arch/x86/pci/mmconfig-shared.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index de6bf0e7e8f8..758cbfe55daa 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -461,7 +461,7 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, } if (size < (16UL<<20) && size != old_size) - return 0; + return false; if (dev) dev_info(dev, "MMCONFIG at %pR reserved in %s\n", @@ -493,7 +493,7 @@ static bool __ref is_mmconf_reserved(check_reserved_t is_reserved, &cfg->res, (unsigned long) cfg->address); } - return 1; + return true; } static bool __ref @@ -501,7 +501,7 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e { if (!early && !acpi_disabled) { if (is_mmconf_reserved(is_acpi_reserved, cfg, dev, 0)) - return 1; + return true; if (dev) dev_info(dev, FW_INFO @@ -522,14 +522,14 @@ pci_mmcfg_check_reserved(struct device *dev, struct pci_mmcfg_region *cfg, int e * _CBA method, just assume it's reserved. */ if (pci_mmcfg_running_state) - return 1; + return true; /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) return is_mmconf_reserved(e820__mapped_all, cfg, dev, 1); - return 0; + return false; } static void __init pci_mmcfg_reject_broken(int early) From ea4aae05974334e9837d86ff1cb716bad36b3ca8 Mon Sep 17 00:00:00 2001 From: Niklas Schnelle Date: Thu, 11 Mar 2021 14:23:12 +0100 Subject: [PATCH 05/48] PCI: Print a debug message on PCI device release Commit 62795041418d ("PCI: enhance physical slot debug information") added a debug print on releasing the PCI slot and another message on destroying it. There is however no debug print on releasing the PCI device structure itself and even with closely looking at the kernel log during hotplug testing, I overlooked several missing pci_dev_put() calls for way too long. Add a debug print in pci_release_dev() making it much easier to spot when the PCI device structure is not released when it is supposed to be. Link: https://lore.kernel.org/r/20210311132312.2882425-1-schnelle@linux.ibm.com Signed-off-by: Niklas Schnelle Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3a62d09b8869..7d9eddc2c913 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2226,6 +2226,7 @@ static void pci_release_dev(struct device *dev) pci_bus_put(pci_dev->bus); kfree(pci_dev->driver_override); bitmap_free(pci_dev->dma_alias_mask); + dev_dbg(dev, "device released\n"); kfree(pci_dev); } From 8e3237989b0d38176a3603422777ac7da6bfab2b Mon Sep 17 00:00:00 2001 From: Wesley Sheng Date: Mon, 31 May 2021 16:12:15 +0800 Subject: [PATCH 06/48] Documentation: PCI: Fix typo in pci-error-recovery.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace "It" with "If", since it is a conditional statement. Link: https://lore.kernel.org/r/20210531081215.43507-1-wesley.sheng@amd.com Signed-off-by: Wesley Sheng Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- Documentation/PCI/pci-error-recovery.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 84ceebb08cac..187f43a03200 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -295,7 +295,7 @@ and let the driver restart normal I/O processing. A driver can still return a critical failure for this function if it can't get the device operational after reset. If the platform previously tried a soft reset, it might now try a hard reset (power -cycle) and then call slot_reset() again. It the device still can't +cycle) and then call slot_reset() again. If the device still can't be recovered, there is nothing more that can be done; the platform will typically report a "permanent failure" in such a case. The device will be considered "dead" in this case. From 1243106474294ea4ea95d9fc076549817814ce1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sun, 9 May 2021 04:19:32 +0000 Subject: [PATCH 07/48] PCI: microchip: Make the struct event_descs static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The struct event_descs does not have any users outside the pcie-microchip-host.c file, and has no previous declaration, thus it can be made static. This resolves the following sparse warning: drivers/pci/controller/pcie-microchip-host.c:352:3: warning: symbol 'event_descs' was not declared. Should it be static? Link: https://lore.kernel.org/r/20210509041932.560340-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/pcie-microchip-host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c index 89c68c56d93b..fdab8202ae5d 100644 --- a/drivers/pci/controller/pcie-microchip-host.c +++ b/drivers/pci/controller/pcie-microchip-host.c @@ -341,7 +341,7 @@ static struct event_map local_status_to_event[] = { LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_SYS_ERR), }; -struct { +static struct { u32 base; u32 offset; u32 mask; From 42d7a8dc195f99e2e99d8f38a683e0852a29f6af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Mon, 10 May 2021 02:30:32 +0000 Subject: [PATCH 08/48] PCI: mobiveil: Remove unused readl and writel functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The PCIe host controller driver for Layerscape 4th generation SoC was added in the commit d29ad70a813b ("PCI: mobiveil: Add PCIe Gen4 RC driver for Layerscape SoCs"). At this time two static functions were introduced that appear to currently have no users. Since nothing is using neither of these functions at the moment they can be safely removed. This resolves the following build time warnings: drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c:45:19: warning: unused function 'ls_pcie_g4_lut_readl' [-Wunused-function] drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c:50:20: warning: unused function 'ls_pcie_g4_lut_writel' [-Wunused-function] Link: https://lore.kernel.org/r/20210510023032.3063932-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Lorenzo Pieralisi --- .../pci/controller/mobiveil/pcie-layerscape-gen4.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c b/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c index ee0156921ebc..306950272fd6 100644 --- a/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c +++ b/drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c @@ -42,17 +42,6 @@ struct ls_pcie_g4 { int irq; }; -static inline u32 ls_pcie_g4_lut_readl(struct ls_pcie_g4 *pcie, u32 off) -{ - return ioread32(pcie->pci.csr_axi_slave_base + PCIE_LUT_OFF + off); -} - -static inline void ls_pcie_g4_lut_writel(struct ls_pcie_g4 *pcie, - u32 off, u32 val) -{ - iowrite32(val, pcie->pci.csr_axi_slave_base + PCIE_LUT_OFF + off); -} - static inline u32 ls_pcie_g4_pf_readl(struct ls_pcie_g4 *pcie, u32 off) { return ioread32(pcie->pci.csr_axi_slave_base + PCIE_PF_OFF + off); From 5be967d5016ac5ffb9c4d0df51b48441ee4d5ed1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 17 May 2021 16:41:17 -0700 Subject: [PATCH 09/48] PCI: ftpci100: Rename macro name collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI_IOSIZE is defined in mach-loongson64/spaces.h, so change the name of the PCI_* macros in pci-ftpci100.c to use FTPCI_* so that they are more localized and won't conflict with other drivers or arches. ../drivers/pci/controller/pci-ftpci100.c:37: warning: "PCI_IOSIZE" redefined 37 | #define PCI_IOSIZE 0x00 | In file included from ../arch/mips/include/asm/addrspace.h:13, ... from ../drivers/pci/controller/pci-ftpci100.c:15: arch/mips/include/asm/mach-loongson64/spaces.h:11: note: this is the location of the previous definition 11 | #define PCI_IOSIZE SZ_16M Suggested-by: Linus Walleij Link: https://lore.kernel.org/r/20210517234117.3660-1-rdunlap@infradead.org Reported-by: kernel test robot Signed-off-by: Randy Dunlap Signed-off-by: Lorenzo Pieralisi Reviewed-by: Linus Walleij Cc: Jiaxun Yang Cc: Linus Walleij Cc: Krzysztof Wilczyński Cc: Thomas Bogendoerfer Cc: linux-mips@vger.kernel.org --- drivers/pci/controller/pci-ftpci100.c | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c index da3cd216da00..aefef1986201 100644 --- a/drivers/pci/controller/pci-ftpci100.c +++ b/drivers/pci/controller/pci-ftpci100.c @@ -34,12 +34,12 @@ * Special configuration registers directly in the first few words * in I/O space. */ -#define PCI_IOSIZE 0x00 -#define PCI_PROT 0x04 /* AHB protection */ -#define PCI_CTRL 0x08 /* PCI control signal */ -#define PCI_SOFTRST 0x10 /* Soft reset counter and response error enable */ -#define PCI_CONFIG 0x28 /* PCI configuration command register */ -#define PCI_DATA 0x2C +#define FTPCI_IOSIZE 0x00 +#define FTPCI_PROT 0x04 /* AHB protection */ +#define FTPCI_CTRL 0x08 /* PCI control signal */ +#define FTPCI_SOFTRST 0x10 /* Soft reset counter and response error enable */ +#define FTPCI_CONFIG 0x28 /* PCI configuration command register */ +#define FTPCI_DATA 0x2C #define FARADAY_PCI_STATUS_CMD 0x04 /* Status and command */ #define FARADAY_PCI_PMC 0x40 /* Power management control */ @@ -195,9 +195,9 @@ static int faraday_raw_pci_read_config(struct faraday_pci *p, int bus_number, PCI_CONF_FUNCTION(PCI_FUNC(fn)) | PCI_CONF_WHERE(config) | PCI_CONF_ENABLE, - p->base + PCI_CONFIG); + p->base + FTPCI_CONFIG); - *value = readl(p->base + PCI_DATA); + *value = readl(p->base + FTPCI_DATA); if (size == 1) *value = (*value >> (8 * (config & 3))) & 0xFF; @@ -230,17 +230,17 @@ static int faraday_raw_pci_write_config(struct faraday_pci *p, int bus_number, PCI_CONF_FUNCTION(PCI_FUNC(fn)) | PCI_CONF_WHERE(config) | PCI_CONF_ENABLE, - p->base + PCI_CONFIG); + p->base + FTPCI_CONFIG); switch (size) { case 4: - writel(value, p->base + PCI_DATA); + writel(value, p->base + FTPCI_DATA); break; case 2: - writew(value, p->base + PCI_DATA + (config & 3)); + writew(value, p->base + FTPCI_DATA + (config & 3)); break; case 1: - writeb(value, p->base + PCI_DATA + (config & 3)); + writeb(value, p->base + FTPCI_DATA + (config & 3)); break; default: ret = PCIBIOS_BAD_REGISTER_NUMBER; @@ -469,7 +469,7 @@ static int faraday_pci_probe(struct platform_device *pdev) if (!faraday_res_to_memcfg(io->start - win->offset, resource_size(io), &val)) { /* setup I/O space size */ - writel(val, p->base + PCI_IOSIZE); + writel(val, p->base + FTPCI_IOSIZE); } else { dev_err(dev, "illegal IO mem size\n"); return -EINVAL; @@ -477,11 +477,11 @@ static int faraday_pci_probe(struct platform_device *pdev) } /* Setup hostbridge */ - val = readl(p->base + PCI_CTRL); + val = readl(p->base + FTPCI_CTRL); val |= PCI_COMMAND_IO; val |= PCI_COMMAND_MEMORY; val |= PCI_COMMAND_MASTER; - writel(val, p->base + PCI_CTRL); + writel(val, p->base + FTPCI_CTRL); /* Mask and clear all interrupts */ faraday_raw_pci_write_config(p, 0, 0, FARADAY_PCI_CTRL2 + 2, 2, 0xF000); if (variant->cascaded_irq) { From 28bba1e220775e41dbddda715892aa0a497fe835 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 11 May 2021 20:24:53 +0800 Subject: [PATCH 10/48] PCI: mediatek: Remove redundant error printing in mtk_pcie_subsys_powerup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When devm_ioremap_resource() fails, a clear enough error message will be printed by its subfunction __devm_ioremap_resource(). The error information contains the device name, failure cause, and possibly resource information. Therefore, remove the error printing here to simplify code and reduce the binary size. Link: https://lore.kernel.org/r/20210511122453.6052-1-thunder.leizhen@huawei.com Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Lorenzo Pieralisi Reviewed-by: Krzysztof Wilczyński --- drivers/pci/controller/pcie-mediatek.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index 62a042e75d9a..25bee693834f 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -991,10 +991,8 @@ static int mtk_pcie_subsys_powerup(struct mtk_pcie *pcie) regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "subsys"); if (regs) { pcie->base = devm_ioremap_resource(dev, regs); - if (IS_ERR(pcie->base)) { - dev_err(dev, "failed to map shared register\n"); + if (IS_ERR(pcie->base)) return PTR_ERR(pcie->base); - } } pcie->free_ck = devm_clk_get(dev, "free_ck"); From 94d22763207ac6633612b8d8e0ca4fba0f7aa139 Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 12 May 2021 01:06:40 -0700 Subject: [PATCH 11/48] PCI: hv: Fix a race condition when removing the device On removing the device, any work item (hv_pci_devices_present() or hv_pci_eject_device()) scheduled on workqueue hbus->wq may still be running and race with hv_pci_remove(). This can happen because the host may send PCI_EJECT or PCI_BUS_RELATIONS(2) and decide to rescind the channel immediately after that. Fix this by flushing/destroying the workqueue of hbus before doing hbus remove. Link: https://lore.kernel.org/r/1620806800-30983-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 30 ++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 6511648271b2..272c63ac49f9 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -444,7 +444,6 @@ enum hv_pcibus_state { hv_pcibus_probed, hv_pcibus_installed, hv_pcibus_removing, - hv_pcibus_removed, hv_pcibus_maximum }; @@ -3243,8 +3242,9 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) struct pci_packet teardown_packet; u8 buffer[sizeof(struct pci_message)]; } pkt; - struct hv_dr_state *dr; struct hv_pci_compl comp_pkt; + struct hv_pci_dev *hpdev, *tmp; + unsigned long flags; int ret; /* @@ -3256,9 +3256,16 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) if (!keep_devs) { /* Delete any children which might still exist. */ - dr = kzalloc(sizeof(*dr), GFP_KERNEL); - if (dr && hv_pci_start_relations_work(hbus, dr)) - kfree(dr); + spin_lock_irqsave(&hbus->device_list_lock, flags); + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) { + list_del(&hpdev->list_entry); + if (hpdev->pci_slot) + pci_destroy_slot(hpdev->pci_slot); + /* For the two refs got in new_pcichild_device() */ + put_pcichild(hpdev); + put_pcichild(hpdev); + } + spin_unlock_irqrestore(&hbus->device_list_lock, flags); } ret = hv_send_resources_released(hdev); @@ -3301,13 +3308,23 @@ static int hv_pci_remove(struct hv_device *hdev) hbus = hv_get_drvdata(hdev); if (hbus->state == hv_pcibus_installed) { + tasklet_disable(&hdev->channel->callback_event); + hbus->state = hv_pcibus_removing; + tasklet_enable(&hdev->channel->callback_event); + destroy_workqueue(hbus->wq); + hbus->wq = NULL; + /* + * At this point, no work is running or can be scheduled + * on hbus-wq. We can't race with hv_pci_devices_present() + * or hv_pci_eject_device(), it's safe to proceed. + */ + /* Remove the bus from PCI's point of view. */ pci_lock_rescan_remove(); pci_stop_root_bus(hbus->pci_bus); hv_pci_remove_slots(hbus); pci_remove_root_bus(hbus->pci_bus); pci_unlock_rescan_remove(); - hbus->state = hv_pcibus_removed; } ret = hv_pci_bus_exit(hdev, false); @@ -3322,7 +3339,6 @@ static int hv_pci_remove(struct hv_device *hdev) irq_domain_free_fwnode(hbus->sysdata.fwnode); put_hvpcibus(hbus); wait_for_completion(&hbus->remove_event); - destroy_workqueue(hbus->wq); hv_put_dom_num(hbus->sysdata.domain); From 326dc2e1e59a98c61c3c71616496422af522678c Mon Sep 17 00:00:00 2001 From: Long Li Date: Wed, 12 May 2021 01:06:49 -0700 Subject: [PATCH 12/48] PCI: hv: Remove bus device removal unused refcount/functions With the new method of flushing/stopping the workqueue before doing bus removal, the old mechanism of using refcount and wait for completion is no longer needed. Remove those dead code. Link: https://lore.kernel.org/r/1620806809-31055-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li [lorenzo.pieralisi@arm.com: Reworded subject] Signed-off-by: Lorenzo Pieralisi Reviewed-by: Michael Kelley --- drivers/pci/controller/pci-hyperv.c | 34 +++-------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 272c63ac49f9..8807b6247e9e 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -452,7 +452,6 @@ struct hv_pcibus_device { /* Protocol version negotiated with the host */ enum pci_protocol_version_t protocol_version; enum hv_pcibus_state state; - refcount_t remove_lock; struct hv_device *hdev; resource_size_t low_mmio_space; resource_size_t high_mmio_space; @@ -460,7 +459,6 @@ struct hv_pcibus_device { struct resource *low_mmio_res; struct resource *high_mmio_res; struct completion *survey_event; - struct completion remove_event; struct pci_bus *pci_bus; spinlock_t config_lock; /* Avoid two threads writing index page */ spinlock_t device_list_lock; /* Protect lists below */ @@ -592,9 +590,6 @@ static void put_pcichild(struct hv_pci_dev *hpdev) kfree(hpdev); } -static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); -static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); - /* * There is no good way to get notified from vmbus_onoffer_rescind(), * so let's use polling here, since this is not a hot path. @@ -2063,10 +2058,8 @@ static void pci_devices_present_work(struct work_struct *work) } spin_unlock_irqrestore(&hbus->device_list_lock, flags); - if (!dr) { - put_hvpcibus(hbus); + if (!dr) return; - } /* First, mark all existing children as reported missing. */ spin_lock_irqsave(&hbus->device_list_lock, flags); @@ -2149,7 +2142,6 @@ static void pci_devices_present_work(struct work_struct *work) break; } - put_hvpcibus(hbus); kfree(dr); } @@ -2190,12 +2182,10 @@ static int hv_pci_start_relations_work(struct hv_pcibus_device *hbus, list_add_tail(&dr->list_entry, &hbus->dr_list); spin_unlock_irqrestore(&hbus->device_list_lock, flags); - if (pending_dr) { + if (pending_dr) kfree(dr_wrk); - } else { - get_hvpcibus(hbus); + else queue_work(hbus->wq, &dr_wrk->wrk); - } return 0; } @@ -2338,8 +2328,6 @@ static void hv_eject_device_work(struct work_struct *work) put_pcichild(hpdev); put_pcichild(hpdev); /* hpdev has been freed. Do not use it any more. */ - - put_hvpcibus(hbus); } /** @@ -2363,7 +2351,6 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev) hpdev->state = hv_pcichild_ejecting; get_pcichild(hpdev); INIT_WORK(&hpdev->wrk, hv_eject_device_work); - get_hvpcibus(hbus); queue_work(hbus->wq, &hpdev->wrk); } @@ -2963,17 +2950,6 @@ static int hv_send_resources_released(struct hv_device *hdev) return 0; } -static void get_hvpcibus(struct hv_pcibus_device *hbus) -{ - refcount_inc(&hbus->remove_lock); -} - -static void put_hvpcibus(struct hv_pcibus_device *hbus) -{ - if (refcount_dec_and_test(&hbus->remove_lock)) - complete(&hbus->remove_event); -} - #define HVPCI_DOM_MAP_SIZE (64 * 1024) static DECLARE_BITMAP(hvpci_dom_map, HVPCI_DOM_MAP_SIZE); @@ -3093,14 +3069,12 @@ static int hv_pci_probe(struct hv_device *hdev, hbus->sysdata.domain = dom; hbus->hdev = hdev; - refcount_set(&hbus->remove_lock, 1); INIT_LIST_HEAD(&hbus->children); INIT_LIST_HEAD(&hbus->dr_list); INIT_LIST_HEAD(&hbus->resources_for_children); spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); - init_completion(&hbus->remove_event); hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, hbus->sysdata.domain); if (!hbus->wq) { @@ -3337,8 +3311,6 @@ static int hv_pci_remove(struct hv_device *hdev) hv_pci_free_bridge_windows(hbus); irq_domain_remove(hbus->irq_domain); irq_domain_free_fwnode(hbus->sysdata.fwnode); - put_hvpcibus(hbus); - wait_for_completion(&hbus->remove_event); hv_put_dom_num(hbus->sysdata.domain); From bdcdaa13ad96f1a530711c29e6d4b8311eff767c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 3 Jun 2021 00:01:12 +0000 Subject: [PATCH 13/48] PCI/sysfs: Fix dsm_label_utf16s_to_utf8s() buffer overrun MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "utf16s_to_utf8s(..., buf, PAGE_SIZE)" puts up to PAGE_SIZE bytes into "buf" and returns the number of bytes it actually put there. If it wrote PAGE_SIZE bytes, the newline added by dsm_label_utf16s_to_utf8s() would overrun "buf". Reduce the size available for utf16s_to_utf8s() to use so there is always space for the newline. [bhelgaas: reorder patch in series, commit log] Fixes: 6058989bad05 ("PCI: Export ACPI _DSM provided firmware instance number and string name to sysfs") Link: https://lore.kernel.org/r/20210603000112.703037-7-kw@linux.com Reported-by: Joe Perches Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/pci-label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index c32f3b7540e8..76b381cf70b2 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -145,7 +145,7 @@ static void dsm_label_utf16s_to_utf8s(union acpi_object *obj, char *buf) len = utf16s_to_utf8s((const wchar_t *)obj->buffer.pointer, obj->buffer.length, UTF16_LITTLE_ENDIAN, - buf, PAGE_SIZE); + buf, PAGE_SIZE - 1); buf[len] = '\n'; } From 316ae33051215f92c72fe13bc1bfc4e513a26700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 3 Jun 2021 00:01:08 +0000 Subject: [PATCH 14/48] PCI/sysfs: Rely on lengths from scnprintf(), dsm_label_utf16s_to_utf8s() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scnprintf() returns the number of bytes written into the buffer. Change dsm_label_utf16s_to_utf8s() to do the same. Rely on those values instead of using strlen() to compute the buffer length. No functional change intended. [bhelgaas: reorder patch in series, len++ to include newline added by dsm_label_utf16s_to_utf8s(), commit log] Link: https://lore.kernel.org/r/20210603000112.703037-3-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/pci-label.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index 76b381cf70b2..94b59e37939c 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -139,14 +139,17 @@ enum acpi_attr_enum { ACPI_ATTR_INDEX_SHOW, }; -static void dsm_label_utf16s_to_utf8s(union acpi_object *obj, char *buf) +static int dsm_label_utf16s_to_utf8s(union acpi_object *obj, char *buf) { int len; + len = utf16s_to_utf8s((const wchar_t *)obj->buffer.pointer, obj->buffer.length, UTF16_LITTLE_ENDIAN, buf, PAGE_SIZE - 1); - buf[len] = '\n'; + buf[len++] = '\n'; + + return len; } static int dsm_get_label(struct device *dev, char *buf, @@ -154,7 +157,7 @@ static int dsm_get_label(struct device *dev, char *buf, { acpi_handle handle = ACPI_HANDLE(dev); union acpi_object *obj, *tmp; - int len = -1; + int len = 0; if (!handle) return -1; @@ -175,20 +178,19 @@ static int dsm_get_label(struct device *dev, char *buf, * this entry must return a null string. */ if (attr == ACPI_ATTR_INDEX_SHOW) { - scnprintf(buf, PAGE_SIZE, "%llu\n", tmp->integer.value); + len = scnprintf(buf, PAGE_SIZE, "%llu\n", tmp->integer.value); } else if (attr == ACPI_ATTR_LABEL_SHOW) { if (tmp[1].type == ACPI_TYPE_STRING) - scnprintf(buf, PAGE_SIZE, "%s\n", + len = scnprintf(buf, PAGE_SIZE, "%s\n", tmp[1].string.pointer); else if (tmp[1].type == ACPI_TYPE_BUFFER) - dsm_label_utf16s_to_utf8s(tmp + 1, buf); + len = dsm_label_utf16s_to_utf8s(tmp + 1, buf); } - len = strlen(buf) > 0 ? strlen(buf) : -1; } ACPI_FREE(obj); - return len; + return len > 0 ? len : -1; } static ssize_t label_show(struct device *dev, struct device_attribute *attr, From f8cf6e513ec4f0e207f56c27d5030da429ac2cae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 3 Jun 2021 00:01:07 +0000 Subject: [PATCH 15/48] PCI/sysfs: Use sysfs_emit() and sysfs_emit_at() in "show" functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs_emit() and sysfs_emit_at() functions were introduced to make it less ambiguous which function is preferred when writing to the output buffer in a device attribute's "show" callback [1]. Convert the PCI sysfs object "show" functions from sprintf(), snprintf() and scnprintf() to sysfs_emit() and sysfs_emit_at() accordingly, as the latter is aware of the PAGE_SIZE buffer and correctly returns the number of bytes written into the buffer. No functional change intended. [1] Documentation/filesystems/sysfs.rst Related commit: ad025f8e46f3 ("PCI/sysfs: Use sysfs_emit() and sysfs_emit_at() in "show" functions"). Link: https://lore.kernel.org/r/20210603000112.703037-2-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/hotplug/pci_hotplug_core.c | 8 +++--- drivers/pci/hotplug/rpadlpar_sysfs.c | 4 +-- drivers/pci/hotplug/shpchp_sysfs.c | 38 ++++++++++++++------------ drivers/pci/iov.c | 12 ++++---- drivers/pci/msi.c | 8 +++--- drivers/pci/p2pdma.c | 7 ++--- drivers/pci/pci-label.c | 6 ++-- drivers/pci/pci.c | 2 +- drivers/pci/pcie/aer.c | 20 ++++++++------ drivers/pci/pcie/aspm.c | 4 +-- drivers/pci/slot.c | 18 ++++++------ drivers/pci/switch/switchtec.c | 18 ++++++------ 12 files changed, 75 insertions(+), 70 deletions(-) diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index 5ac31f683b85..058d5937d8a9 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -73,7 +73,7 @@ static ssize_t power_read_file(struct pci_slot *pci_slot, char *buf) if (retval) return retval; - return sprintf(buf, "%d\n", value); + return sysfs_emit(buf, "%d\n", value); } static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf, @@ -130,7 +130,7 @@ static ssize_t attention_read_file(struct pci_slot *pci_slot, char *buf) if (retval) return retval; - return sprintf(buf, "%d\n", value); + return sysfs_emit(buf, "%d\n", value); } static ssize_t attention_write_file(struct pci_slot *pci_slot, const char *buf, @@ -175,7 +175,7 @@ static ssize_t latch_read_file(struct pci_slot *pci_slot, char *buf) if (retval) return retval; - return sprintf(buf, "%d\n", value); + return sysfs_emit(buf, "%d\n", value); } static struct pci_slot_attribute hotplug_slot_attr_latch = { @@ -192,7 +192,7 @@ static ssize_t presence_read_file(struct pci_slot *pci_slot, char *buf) if (retval) return retval; - return sprintf(buf, "%d\n", value); + return sysfs_emit(buf, "%d\n", value); } static struct pci_slot_attribute hotplug_slot_attr_presence = { diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index dbfa0b55d31a..068b7810a574 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -50,7 +50,7 @@ static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, static ssize_t add_slot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); } static ssize_t remove_slot_store(struct kobject *kobj, @@ -80,7 +80,7 @@ static ssize_t remove_slot_store(struct kobject *kobj, static ssize_t remove_slot_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return sprintf(buf, "0\n"); + return sysfs_emit(buf, "0\n"); } static struct kobj_attribute add_slot_attr = diff --git a/drivers/pci/hotplug/shpchp_sysfs.c b/drivers/pci/hotplug/shpchp_sysfs.c index 45658bb5c554..64beed7a26be 100644 --- a/drivers/pci/hotplug/shpchp_sysfs.c +++ b/drivers/pci/hotplug/shpchp_sysfs.c @@ -24,50 +24,54 @@ static ssize_t show_ctrl(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev; - char *out = buf; int index, busnr; struct resource *res; struct pci_bus *bus; + size_t len = 0; pdev = to_pci_dev(dev); bus = pdev->subordinate; - out += sprintf(buf, "Free resources: memory\n"); + len += sysfs_emit_at(buf, len, "Free resources: memory\n"); pci_bus_for_each_resource(bus, res, index) { if (res && (res->flags & IORESOURCE_MEM) && !(res->flags & IORESOURCE_PREFETCH)) { - out += sprintf(out, "start = %8.8llx, length = %8.8llx\n", - (unsigned long long)res->start, - (unsigned long long)resource_size(res)); + len += sysfs_emit_at(buf, len, + "start = %8.8llx, length = %8.8llx\n", + (unsigned long long)res->start, + (unsigned long long)resource_size(res)); } } - out += sprintf(out, "Free resources: prefetchable memory\n"); + len += sysfs_emit_at(buf, len, "Free resources: prefetchable memory\n"); pci_bus_for_each_resource(bus, res, index) { if (res && (res->flags & IORESOURCE_MEM) && (res->flags & IORESOURCE_PREFETCH)) { - out += sprintf(out, "start = %8.8llx, length = %8.8llx\n", - (unsigned long long)res->start, - (unsigned long long)resource_size(res)); + len += sysfs_emit_at(buf, len, + "start = %8.8llx, length = %8.8llx\n", + (unsigned long long)res->start, + (unsigned long long)resource_size(res)); } } - out += sprintf(out, "Free resources: IO\n"); + len += sysfs_emit_at(buf, len, "Free resources: IO\n"); pci_bus_for_each_resource(bus, res, index) { if (res && (res->flags & IORESOURCE_IO)) { - out += sprintf(out, "start = %8.8llx, length = %8.8llx\n", - (unsigned long long)res->start, - (unsigned long long)resource_size(res)); + len += sysfs_emit_at(buf, len, + "start = %8.8llx, length = %8.8llx\n", + (unsigned long long)res->start, + (unsigned long long)resource_size(res)); } } - out += sprintf(out, "Free resources: bus numbers\n"); + len += sysfs_emit_at(buf, len, "Free resources: bus numbers\n"); for (busnr = bus->busn_res.start; busnr <= bus->busn_res.end; busnr++) { if (!pci_find_bus(pci_domain_nr(bus), busnr)) break; } if (busnr < bus->busn_res.end) - out += sprintf(out, "start = %8.8x, length = %8.8x\n", - busnr, (int)(bus->busn_res.end - busnr)); + len += sysfs_emit_at(buf, len, + "start = %8.8x, length = %8.8x\n", + busnr, (int)(bus->busn_res.end - busnr)); - return out - buf; + return len; } static DEVICE_ATTR(ctrl, S_IRUGO, show_ctrl, NULL); diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index afc06e6ce115..a71258347323 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -346,7 +346,7 @@ static ssize_t sriov_totalvfs_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); - return sprintf(buf, "%u\n", pci_sriov_get_totalvfs(pdev)); + return sysfs_emit(buf, "%u\n", pci_sriov_get_totalvfs(pdev)); } static ssize_t sriov_numvfs_show(struct device *dev, @@ -361,7 +361,7 @@ static ssize_t sriov_numvfs_show(struct device *dev, num_vfs = pdev->sriov->num_VFs; device_unlock(&pdev->dev); - return sprintf(buf, "%u\n", num_vfs); + return sysfs_emit(buf, "%u\n", num_vfs); } /* @@ -435,7 +435,7 @@ static ssize_t sriov_offset_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); - return sprintf(buf, "%u\n", pdev->sriov->offset); + return sysfs_emit(buf, "%u\n", pdev->sriov->offset); } static ssize_t sriov_stride_show(struct device *dev, @@ -444,7 +444,7 @@ static ssize_t sriov_stride_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); - return sprintf(buf, "%u\n", pdev->sriov->stride); + return sysfs_emit(buf, "%u\n", pdev->sriov->stride); } static ssize_t sriov_vf_device_show(struct device *dev, @@ -453,7 +453,7 @@ static ssize_t sriov_vf_device_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); - return sprintf(buf, "%x\n", pdev->sriov->vf_device); + return sysfs_emit(buf, "%x\n", pdev->sriov->vf_device); } static ssize_t sriov_drivers_autoprobe_show(struct device *dev, @@ -462,7 +462,7 @@ static ssize_t sriov_drivers_autoprobe_show(struct device *dev, { struct pci_dev *pdev = to_pci_dev(dev); - return sprintf(buf, "%u\n", pdev->sriov->drivers_autoprobe); + return sysfs_emit(buf, "%u\n", pdev->sriov->drivers_autoprobe); } static ssize_t sriov_drivers_autoprobe_store(struct device *dev, diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 217dc9f0231f..9232255c8515 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -464,11 +464,11 @@ static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr, return retval; entry = irq_get_msi_desc(irq); - if (entry) - return sprintf(buf, "%s\n", - entry->msi_attrib.is_msix ? "msix" : "msi"); + if (!entry) + return -ENODEV; - return -ENODEV; + return sysfs_emit(buf, "%s\n", + entry->msi_attrib.is_msix ? "msix" : "msi"); } static int populate_msi_sysfs(struct pci_dev *pdev) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 196382630363..a1351b3e2c4c 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -53,7 +53,7 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, if (pdev->p2pdma->pool) size = gen_pool_size(pdev->p2pdma->pool); - return scnprintf(buf, PAGE_SIZE, "%zd\n", size); + return sysfs_emit(buf, "%zd\n", size); } static DEVICE_ATTR_RO(size); @@ -66,7 +66,7 @@ static ssize_t available_show(struct device *dev, struct device_attribute *attr, if (pdev->p2pdma->pool) avail = gen_pool_avail(pdev->p2pdma->pool); - return scnprintf(buf, PAGE_SIZE, "%zd\n", avail); + return sysfs_emit(buf, "%zd\n", avail); } static DEVICE_ATTR_RO(available); @@ -75,8 +75,7 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr, { struct pci_dev *pdev = to_pci_dev(dev); - return scnprintf(buf, PAGE_SIZE, "%d\n", - pdev->p2pdma->p2pmem_published); + return sysfs_emit(buf, "%d\n", pdev->p2pdma->p2pmem_published); } static DEVICE_ATTR_RO(published); diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index 94b59e37939c..0c6446519640 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -178,11 +178,11 @@ static int dsm_get_label(struct device *dev, char *buf, * this entry must return a null string. */ if (attr == ACPI_ATTR_INDEX_SHOW) { - len = scnprintf(buf, PAGE_SIZE, "%llu\n", tmp->integer.value); + len = sysfs_emit(buf, "%llu\n", tmp->integer.value); } else if (attr == ACPI_ATTR_LABEL_SHOW) { if (tmp[1].type == ACPI_TYPE_STRING) - len = scnprintf(buf, PAGE_SIZE, "%s\n", - tmp[1].string.pointer); + len = sysfs_emit(buf, "%s\n", + tmp[1].string.pointer); else if (tmp[1].type == ACPI_TYPE_BUFFER) len = dsm_label_utf16s_to_utf8s(tmp + 1, buf); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index b717680377a9..5ed316ea5831 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6439,7 +6439,7 @@ static ssize_t resource_alignment_show(struct bus_type *bus, char *buf) spin_lock(&resource_alignment_lock); if (resource_alignment_param) - count = scnprintf(buf, PAGE_SIZE, "%s", resource_alignment_param); + count = sysfs_emit(buf, "%s", resource_alignment_param); spin_unlock(&resource_alignment_lock); /* diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index ec943cee5ecc..40ef7bed7a77 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -529,21 +529,23 @@ static const char *aer_agent_string[] = { char *buf) \ { \ unsigned int i; \ - char *str = buf; \ struct pci_dev *pdev = to_pci_dev(dev); \ u64 *stats = pdev->aer_stats->stats_array; \ + size_t len = 0; \ \ for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ if (strings_array[i]) \ - str += sprintf(str, "%s %llu\n", \ - strings_array[i], stats[i]); \ + len += sysfs_emit_at(buf, len, "%s %llu\n", \ + strings_array[i], \ + stats[i]); \ else if (stats[i]) \ - str += sprintf(str, #stats_array "_bit[%d] %llu\n",\ - i, stats[i]); \ + len += sysfs_emit_at(buf, len, \ + #stats_array "_bit[%d] %llu\n",\ + i, stats[i]); \ } \ - str += sprintf(str, "TOTAL_%s %llu\n", total_string, \ - pdev->aer_stats->total_field); \ - return str-buf; \ + len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \ + pdev->aer_stats->total_field); \ + return len; \ } \ static DEVICE_ATTR_RO(name) @@ -563,7 +565,7 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, char *buf) \ { \ struct pci_dev *pdev = to_pci_dev(dev); \ - return sprintf(buf, "%llu\n", pdev->aer_stats->field); \ + return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \ } \ static DEVICE_ATTR_RO(name) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index ac0557a305af..013a47f587ce 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -1208,7 +1208,7 @@ static ssize_t aspm_attr_show_common(struct device *dev, struct pci_dev *pdev = to_pci_dev(dev); struct pcie_link_state *link = pcie_aspm_get_link(pdev); - return sprintf(buf, "%d\n", (link->aspm_enabled & state) ? 1 : 0); + return sysfs_emit(buf, "%d\n", (link->aspm_enabled & state) ? 1 : 0); } static ssize_t aspm_attr_store_common(struct device *dev, @@ -1265,7 +1265,7 @@ static ssize_t clkpm_show(struct device *dev, struct pci_dev *pdev = to_pci_dev(dev); struct pcie_link_state *link = pcie_aspm_get_link(pdev); - return sprintf(buf, "%d\n", link->clkpm_enabled); + return sysfs_emit(buf, "%d\n", link->clkpm_enabled); } static ssize_t clkpm_store(struct device *dev, diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c index d627dd9179b4..751a26668e3a 100644 --- a/drivers/pci/slot.c +++ b/drivers/pci/slot.c @@ -39,19 +39,19 @@ static const struct sysfs_ops pci_slot_sysfs_ops = { static ssize_t address_read_file(struct pci_slot *slot, char *buf) { if (slot->number == 0xff) - return sprintf(buf, "%04x:%02x\n", - pci_domain_nr(slot->bus), - slot->bus->number); - else - return sprintf(buf, "%04x:%02x:%02x\n", - pci_domain_nr(slot->bus), - slot->bus->number, - slot->number); + return sysfs_emit(buf, "%04x:%02x\n", + pci_domain_nr(slot->bus), + slot->bus->number); + + return sysfs_emit(buf, "%04x:%02x:%02x\n", + pci_domain_nr(slot->bus), + slot->bus->number, + slot->number); } static ssize_t bus_speed_read(enum pci_bus_speed speed, char *buf) { - return sprintf(buf, "%s\n", pci_speed_string(speed)); + return sysfs_emit(buf, "%s\n", pci_speed_string(speed)); } static ssize_t max_speed_read_file(struct pci_slot *slot, char *buf) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index ba52459928f7..0b301f8be9ed 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -280,7 +280,7 @@ static ssize_t device_version_show(struct device *dev, ver = ioread32(&stdev->mmio_sys_info->device_version); - return sprintf(buf, "%x\n", ver); + return sysfs_emit(buf, "%x\n", ver); } static DEVICE_ATTR_RO(device_version); @@ -292,7 +292,7 @@ static ssize_t fw_version_show(struct device *dev, ver = ioread32(&stdev->mmio_sys_info->firmware_version); - return sprintf(buf, "%08x\n", ver); + return sysfs_emit(buf, "%08x\n", ver); } static DEVICE_ATTR_RO(fw_version); @@ -344,7 +344,7 @@ static ssize_t component_vendor_show(struct device *dev, /* component_vendor field not supported after gen3 */ if (stdev->gen != SWITCHTEC_GEN3) - return sprintf(buf, "none\n"); + return sysfs_emit(buf, "none\n"); return io_string_show(buf, &si->gen3.component_vendor, sizeof(si->gen3.component_vendor)); @@ -359,9 +359,9 @@ static ssize_t component_id_show(struct device *dev, /* component_id field not supported after gen3 */ if (stdev->gen != SWITCHTEC_GEN3) - return sprintf(buf, "none\n"); + return sysfs_emit(buf, "none\n"); - return sprintf(buf, "PM%04X\n", id); + return sysfs_emit(buf, "PM%04X\n", id); } static DEVICE_ATTR_RO(component_id); @@ -373,9 +373,9 @@ static ssize_t component_revision_show(struct device *dev, /* component_revision field not supported after gen3 */ if (stdev->gen != SWITCHTEC_GEN3) - return sprintf(buf, "255\n"); + return sysfs_emit(buf, "255\n"); - return sprintf(buf, "%d\n", rev); + return sysfs_emit(buf, "%d\n", rev); } static DEVICE_ATTR_RO(component_revision); @@ -384,7 +384,7 @@ static ssize_t partition_show(struct device *dev, { struct switchtec_dev *stdev = to_stdev(dev); - return sprintf(buf, "%d\n", stdev->partition); + return sysfs_emit(buf, "%d\n", stdev->partition); } static DEVICE_ATTR_RO(partition); @@ -393,7 +393,7 @@ static ssize_t partition_count_show(struct device *dev, { struct switchtec_dev *stdev = to_stdev(dev); - return sprintf(buf, "%d\n", stdev->partition_count); + return sysfs_emit(buf, "%d\n", stdev->partition_count); } static DEVICE_ATTR_RO(partition_count); From 381bd3fa8306a56b4bb8703966e6372f1b83762e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 3 Jun 2021 00:01:09 +0000 Subject: [PATCH 16/48] PCI/sysfs: Fix 'resource_alignment' newline issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value of the "resource_alignment" can be specified using a kernel command-line argument ("pci=resource_alignment=") or through the corresponding sysfs attribute under the /sys/bus/pci path. Previously, when the value was set via the kernel command-line argument, and then subsequently accessed through sysfs attribute, the value read back was not correct: # grep -oE 'pci=resource_alignment.+' /proc/cmdline pci=resource_alignment=20@00:1f.2 # cat /sys/bus/pci/resource_alignment 20@00:1f. This was also true when the value was set through the sysfs attribute without including a trailing newline: # echo -n 20@00:1f.2 > /sys/bus/pci/resource_alignment # cat /sys/bus/pci/resource_alignment 20@00:1f. When it was set through the sysfs attribute *including* a newline, reading it back worked as intended: # echo 20@00:1f.2 > /sys/bus/pci/resource_alignment # cat /sys/bus/pci/resource_alignment 20@00:1f.2 To fix this inconsistency, append a trailing newline in the show() function and strip the trailing line in the store() function if one is present. Also, allow for the value previously set using either a command-line argument or through the sysfs object to be cleared at run-time. [bhelgaas: fold in kfree fix from https://lore.kernel.org/linux-pci/20210604133230.983956-4-kw@linux.com] Fixes: e499081da1a2 ("PCI: Force trailing new line to resource_alignment_param in sysfs") Link: https://lore.kernel.org/r/20210603000112.703037-4-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/pci.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 5ed316ea5831..68f57d86b243 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6439,34 +6439,40 @@ static ssize_t resource_alignment_show(struct bus_type *bus, char *buf) spin_lock(&resource_alignment_lock); if (resource_alignment_param) - count = sysfs_emit(buf, "%s", resource_alignment_param); + count = sysfs_emit(buf, "%s\n", resource_alignment_param); spin_unlock(&resource_alignment_lock); - /* - * When set by the command line, resource_alignment_param will not - * have a trailing line feed, which is ugly. So conditionally add - * it here. - */ - if (count >= 2 && buf[count - 2] != '\n' && count < PAGE_SIZE - 1) { - buf[count - 1] = '\n'; - buf[count++] = 0; - } - return count; } static ssize_t resource_alignment_store(struct bus_type *bus, const char *buf, size_t count) { - char *param = kstrndup(buf, count, GFP_KERNEL); + char *param, *old, *end; + if (count >= (PAGE_SIZE - 1)) + return -EINVAL; + + param = kstrndup(buf, count, GFP_KERNEL); if (!param) return -ENOMEM; + end = strchr(param, '\n'); + if (end) + *end = '\0'; + spin_lock(&resource_alignment_lock); - kfree(resource_alignment_param); - resource_alignment_param = param; + old = resource_alignment_param; + if (strlen(param)) { + resource_alignment_param = param; + } else { + kfree(param); + resource_alignment_param = NULL; + } spin_unlock(&resource_alignment_lock); + + kfree(old); + return count; } From 14c19b2a40b61b609f68d1d6a5518ebb1c30706f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 3 Jun 2021 00:01:10 +0000 Subject: [PATCH 17/48] PCI/sysfs: Add 'devspec' newline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, when the value of the "devspec" sysfs attribute was read from the user space there was no newline present, and utilities such as "cat" wouldn't display the result of the read correctly. Append a newline character in the show() function to match other "devspec" attributes. Link: https://lore.kernel.org/r/20210603000112.703037-5-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/pci-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index beb8d1f4fafe..5d63df7c1820 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -537,7 +537,7 @@ static ssize_t devspec_show(struct device *dev, if (np == NULL) return 0; - return sysfs_emit(buf, "%pOF", np); + return sysfs_emit(buf, "%pOF\n", np); } static DEVICE_ATTR_RO(devspec); #endif From 65db04053efea3f3e412a7e0cc599962999c96b4 Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Thu, 1 Apr 2021 21:12:52 +0800 Subject: [PATCH 18/48] PCI: Coalesce host bridge contiguous apertures Built-in graphics on HP EliteDesk 805 G6 doesn't work because graphics can't get the BAR it needs: pci_bus 0000:00: root bus resource [mem 0x10020200000-0x100303fffff window] pci_bus 0000:00: root bus resource [mem 0x10030400000-0x100401fffff window] pci 0000:00:08.1: bridge window [mem 0xd2000000-0xd23fffff] pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100401fffff 64bit pref] pci 0000:00:08.1: can't claim BAR 15 [mem 0x10030000000-0x100401fffff 64bit pref]: no compatible bridge window pci 0000:00:08.1: [mem 0x10030000000-0x100401fffff 64bit pref] clipped to [mem 0x10030000000-0x100303fffff 64bit pref] pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100303fffff 64bit pref] pci 0000:07:00.0: can't claim BAR 0 [mem 0x10030000000-0x1003fffffff 64bit pref]: no compatible bridge window pci 0000:07:00.0: can't claim BAR 2 [mem 0x10040000000-0x100401fffff 64bit pref]: no compatible bridge window However, the root bus has two contiguous apertures that can contain the child resource requested. Coalesce contiguous apertures so we can allocate from the entire contiguous region. [bhelgaas: fold in https://lore.kernel.org/r/20210528170242.1564038-1-kai.heng.feng@canonical.com] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212013 Suggested-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20210401131252.531935-1-kai.heng.feng@canonical.com Signed-off-by: Kai-Heng Feng Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 52 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 3a62d09b8869..bd862b612633 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "pci.h" #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ @@ -874,14 +875,31 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus) dev_set_msi_domain(&bus->dev, d); } +static int res_cmp(void *priv, const struct list_head *a, + const struct list_head *b) +{ + struct resource_entry *entry1, *entry2; + + entry1 = container_of(a, struct resource_entry, node); + entry2 = container_of(b, struct resource_entry, node); + + if (entry1->res->flags != entry2->res->flags) + return entry1->res->flags > entry2->res->flags; + + if (entry1->offset != entry2->offset) + return entry1->offset > entry2->offset; + + return entry1->res->start > entry2->res->start; +} + static int pci_register_host_bridge(struct pci_host_bridge *bridge) { struct device *parent = bridge->dev.parent; - struct resource_entry *window, *n; + struct resource_entry *window, *next, *n; struct pci_bus *bus, *b; - resource_size_t offset; + resource_size_t offset, next_offset; LIST_HEAD(resources); - struct resource *res; + struct resource *res, *next_res; char addr[64], *fmt; const char *name; int err; @@ -960,11 +978,35 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE) dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n"); - /* Add initial resources to the bus */ + /* Sort and coalesce contiguous windows */ + list_sort(NULL, &resources, res_cmp); resource_list_for_each_entry_safe(window, n, &resources) { - list_move_tail(&window->node, &bridge->windows); + if (list_is_last(&window->node, &resources)) + break; + + next = list_next_entry(window, node); offset = window->offset; res = window->res; + next_offset = next->offset; + next_res = next->res; + + if (res->flags != next_res->flags || offset != next_offset) + continue; + + if (res->end + 1 == next_res->start) { + next_res->start = res->start; + res->flags = res->start = res->end = 0; + } + } + + /* Add initial resources to the bus */ + resource_list_for_each_entry_safe(window, n, &resources) { + offset = window->offset; + res = window->res; + if (!res->end) + continue; + + list_move_tail(&window->node, &bridge->windows); if (res->flags & IORESOURCE_BUS) pci_bus_insert_busn_res(bus, bus->number, res->end); From 4694ae373dc2114f9a82f6ae15737e65af0c6dea Mon Sep 17 00:00:00 2001 From: Konstantin Kharlamov Date: Fri, 21 May 2021 02:55:01 +0300 Subject: [PATCH 19/48] PCI: Leave Apple Thunderbolt controllers on for s2idle or standby On Macbook 2013, resuming from suspend-to-idle or standby resulted in the external monitor no longer being detected, a stacktrace, and errors like this in dmesg: pcieport 0000:06:00.0: can't change power state from D3hot to D0 (config space inaccessible) The reason is that we know how to turn power to the Thunderbolt controller *off* via the SXIO/SXFP/SXLF methods, but we don't know how to turn power back on. We have to rely on firmware to turn the power back on. When going to the "suspend-to-idle" or "standby" system sleep states, firmware is not involved either on the suspend side or the resume side, so we can't use SXIO/SXFP/SXLF to turn the power off. Skip SXIO/SXFP/SXLF when firmware isn't involved in suspend, e.g., when we're going to the "suspend-to-idle" or "standby" system sleep states. Fixes: 1df5172c5c25 ("PCI: Suspend/resume quirks for Apple thunderbolt") Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212767 Link: https://lore.kernel.org/r/20210520235501.917397-1-Hi-Angel@yandex.ru Signed-off-by: Konstantin Kharlamov Signed-off-by: Bjorn Helgaas Reviewed-by: Lukas Wunner Cc: stable@vger.kernel.org --- drivers/pci/quirks.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index dcb229de1acb..0dde9c5259f2 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include /* isa_dma_bridge_buggy */ #include "pci.h" @@ -3634,6 +3635,16 @@ static void quirk_apple_poweroff_thunderbolt(struct pci_dev *dev) return; if (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) return; + + /* + * SXIO/SXFP/SXLF turns off power to the Thunderbolt controller. + * We don't know how to turn it back on again, but firmware does, + * so we can only use SXIO/SXFP/SXLF if we're suspending via + * firmware. + */ + if (!pm_suspend_via_firmware()) + return; + bridge = ACPI_HANDLE(&dev->dev); if (!bridge) return; From e9c3bbd68ec7dc5dd986f7270d9233d27b092816 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Sat, 27 Mar 2021 10:51:40 -0700 Subject: [PATCH 20/48] PCI/IOV: Clarify error message for unbound devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Be more verbose to disambiguate the error case when trying to configure SR-IOV with no driver bound vs. a driver that does not implement the .sriov_configure() callback. Link: https://lore.kernel.org/r/20210327175140.682708-1-mdf@kernel.org Reported-by: Brian Foley Signed-off-by: Moritz Fischer Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński --- drivers/pci/iov.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index afc06e6ce115..5e8278a6f3b2 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -391,9 +391,16 @@ static ssize_t sriov_numvfs_store(struct device *dev, if (num_vfs == pdev->sriov->num_VFs) goto exit; + /* is PF driver loaded */ + if (!pdev->driver) { + pci_info(pdev, "no driver bound to device; cannot configure SR-IOV\n"); + ret = -ENOENT; + goto exit; + } + /* is PF driver loaded w/callback */ - if (!pdev->driver || !pdev->driver->sriov_configure) { - pci_info(pdev, "Driver does not support SRIOV configuration via sysfs\n"); + if (!pdev->driver->sriov_configure) { + pci_info(pdev, "driver does not support SR-IOV configuration via sysfs\n"); ret = -ENOENT; goto exit; } From 6389d43745228de128e7b1a66eb18c0ccf43e6b4 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:04 -0600 Subject: [PATCH 21/48] PCI/P2PDMA: Rename upstream_bridge_distance() and rework doc The function upstream_bridge_distance() has evolved such that its name is no longer entirely reflective of what the function does. It not only calculates the distance between two peers but also calculates how the DMA addresses for those two peers should be mapped. Rename it to calc_map_type_and_dist() and rework the documentation to better describe the two pieces of information the function returns. [bhelgaas: tweak comment wording] Link: https://lore.kernel.org/r/20210610160609.28447-2-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 73 +++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 35 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 196382630363..a860137fac89 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -354,7 +354,7 @@ static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b) } static enum pci_p2pdma_map_type -__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, +__calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, int *dist, bool *acs_redirects, struct seq_buf *acs_list) { struct pci_dev *a = provider, *b = client, *bb; @@ -433,52 +433,55 @@ static unsigned long map_types_idx(struct pci_dev *client) } /* - * Find the distance through the nearest common upstream bridge between - * two PCI devices. + * Calculate the P2PDMA mapping type and distance between two PCI devices. * - * If the two devices are the same device then 0 will be returned. + * If the two devices are the same PCI function, return + * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 0. * - * If there are two virtual functions of the same device behind the same - * bridge port then 2 will be returned (one step down to the PCIe switch, - * then one step back to the same device). + * If they are two functions of the same device, return + * PCI_P2PDMA_MAP_BUS_ADDR and a distance of 2 (one hop up to the bridge, + * then one hop back down to another function of the same device). * - * In the case where two devices are connected to the same PCIe switch, the - * value 4 will be returned. This corresponds to the following PCI tree: + * In the case where two devices are connected to the same PCIe switch, + * return a distance of 4. This corresponds to the following PCI tree: * * -+ Root Port * \+ Switch Upstream Port - * +-+ Switch Downstream Port + * +-+ Switch Downstream Port 0 * + \- Device A - * \-+ Switch Downstream Port + * \-+ Switch Downstream Port 1 * \- Device B * - * The distance is 4 because we traverse from Device A through the downstream - * port of the switch, to the common upstream port, back up to the second - * downstream port and then to Device B. + * The distance is 4 because we traverse from Device A to Downstream Port 0 + * to the common Switch Upstream Port, back down to Downstream Port 1 and + * then to Device B. The mapping type returned depends on the ACS + * redirection setting of the ports along the path. * - * Any two devices that cannot communicate using p2pdma will return - * PCI_P2PDMA_MAP_NOT_SUPPORTED. + * If ACS redirect is set on any port in the path, traffic between the + * devices will go through the host bridge, so return + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; otherwise return + * PCI_P2PDMA_MAP_BUS_ADDR. * * Any two devices that have a data path that goes through the host bridge - * will consult a whitelist. If the host bridges are on the whitelist, - * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE. - * - * If either bridge is not on the whitelist this function returns + * will consult a whitelist. If the host bridge is in the whitelist, return + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of + * ports per above. If the device is not in the whitelist, return * PCI_P2PDMA_MAP_NOT_SUPPORTED. * - * If a bridge which has any ACS redirection bits set is in the path, - * acs_redirects will be set to true. In this case, a list of all infringing - * bridge addresses will be populated in acs_list (assuming it's non-null) - * for printk purposes. + * If any ACS redirect bits are set, then acs_redirects boolean will be set + * to true and their PCI device names will be appended to the acs_list + * seq_buf. This seq_buf is used to print a warning informing the user how + * to disable ACS using a command line parameter. (See + * calc_map_type_and_dist_warn() below) */ static enum pci_p2pdma_map_type -upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, +calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, int *dist, bool *acs_redirects, struct seq_buf *acs_list) { enum pci_p2pdma_map_type map_type; - map_type = __upstream_bridge_distance(provider, client, dist, - acs_redirects, acs_list); + map_type = __calc_map_type_and_dist(provider, client, dist, + acs_redirects, acs_list); if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { if (!cpu_supports_p2pdma() && @@ -494,8 +497,8 @@ upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client, } static enum pci_p2pdma_map_type -upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, - int *dist) +calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, + int *dist) { struct seq_buf acs_list; bool acs_redirects; @@ -505,8 +508,8 @@ upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client, if (!acs_list.buffer) return -ENOMEM; - ret = upstream_bridge_distance(provider, client, dist, &acs_redirects, - &acs_list); + ret = calc_map_type_and_dist(provider, client, dist, &acs_redirects, + &acs_list); if (acs_redirects) { pci_warn(client, "ACS redirect is set between the client and provider (%s)\n", pci_name(provider)); @@ -565,11 +568,11 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, } if (verbose) - ret = upstream_bridge_distance_warn(provider, - pci_client, &distance); + ret = calc_map_type_and_dist_warn(provider, pci_client, + &distance); else - ret = upstream_bridge_distance(provider, pci_client, - &distance, NULL, NULL); + ret = calc_map_type_and_dist(provider, pci_client, + &distance, NULL, NULL); pci_dev_put(pci_client); From e4ece59abd70d8f54e2163274dc996bb442832a6 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:05 -0600 Subject: [PATCH 22/48] PCI/P2PDMA: Collect acs list in stack buffer to avoid sleeping In order to call the calc_map_type_and_dist_warn() function from a dma_map operation, the function must not sleep. The only reason it sleeps is to allocate memory for the seq_buf to print a verbose warning telling the user how to disable ACS for that path. Instead of allocating the memory with kmalloc(), allocate a smaller buffer on the stack. A 128 byte buffer is enough to print 10 PCI device names. A system with 10 bridge ports between two devices that have ACS enabled would be unusually large, so this should still be a reasonable limit. This also cleans up the awkward (and broken) return with -ENOMEM which contradicts the return type and the caller was not prepared for. Link: https://lore.kernel.org/r/20210610160609.28447-3-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index a860137fac89..109bd7321962 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -502,11 +502,10 @@ calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, { struct seq_buf acs_list; bool acs_redirects; + char buf[128]; int ret; - seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); - if (!acs_list.buffer) - return -ENOMEM; + seq_buf_init(&acs_list, buf, sizeof(buf)); ret = calc_map_type_and_dist(provider, client, dist, &acs_redirects, &acs_list); @@ -524,8 +523,6 @@ calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, pci_name(provider)); } - kfree(acs_list.buffer); - return ret; } From f9c125b9eb30650356cf582003365b1ecbd7003b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:06 -0600 Subject: [PATCH 23/48] PCI/P2PDMA: Use correct calc_map_type_and_dist() return type Instead of using an int for the return value of this function, use the correct enum pci_p2pdma_map_type. Link: https://lore.kernel.org/r/20210610160609.28447-4-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 109bd7321962..abcdb2a6b1c0 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -546,11 +546,11 @@ calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, int num_clients, bool verbose) { + enum pci_p2pdma_map_type map; bool not_supported = false; struct pci_dev *pci_client; int total_dist = 0; - int distance; - int i, ret; + int i, distance; if (num_clients == 0) return -1; @@ -565,15 +565,15 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, } if (verbose) - ret = calc_map_type_and_dist_warn(provider, pci_client, + map = calc_map_type_and_dist_warn(provider, pci_client, &distance); else - ret = calc_map_type_and_dist(provider, pci_client, + map = calc_map_type_and_dist(provider, pci_client, &distance, NULL, NULL); pci_dev_put(pci_client); - if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) + if (map == PCI_P2PDMA_MAP_NOT_SUPPORTED) not_supported = true; if (not_supported && !verbose) From cf201bfe8cdc9ba11c4f312945b908ed24c7b7b5 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:07 -0600 Subject: [PATCH 24/48] PCI/P2PDMA: Warn if host bridge not in whitelist If the host bridge is not in the whitelist print a warning in the calc_map_type_and_dist_warn() path detailing the vendor and device IDs that would need to be added to the whitelist. Suggested-by: Don Dutile Link: https://lore.kernel.org/r/20210610160609.28447-5-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index abcdb2a6b1c0..c07c928e3952 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -309,7 +309,7 @@ static const struct pci_p2pdma_whitelist_entry { }; static bool __host_bridge_whitelist(struct pci_host_bridge *host, - bool same_host_bridge) + bool same_host_bridge, bool warn) { struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); const struct pci_p2pdma_whitelist_entry *entry; @@ -331,6 +331,10 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, return true; } + if (warn) + pci_warn(root, "Host bridge not in P2PDMA whitelist: %04x:%04x\n", + vendor, device); + return false; } @@ -338,16 +342,17 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, * If we can't find a common upstream bridge take a look at the root * complex and compare it to a whitelist of known good hardware. */ -static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b) +static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, + bool warn) { struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus); struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus); if (host_a == host_b) - return __host_bridge_whitelist(host_a, true); + return __host_bridge_whitelist(host_a, true, warn); - if (__host_bridge_whitelist(host_a, false) && - __host_bridge_whitelist(host_b, false)) + if (__host_bridge_whitelist(host_a, false, warn) && + __host_bridge_whitelist(host_b, false, warn)) return true; return false; @@ -485,7 +490,7 @@ calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { if (!cpu_supports_p2pdma() && - !host_bridge_whitelist(provider, client)) + !host_bridge_whitelist(provider, client, acs_redirects)) map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } From 7e2faa1710c408712185bb6463eaa0ee4776350f Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:08 -0600 Subject: [PATCH 25/48] PCI/P2PDMA: Refactor pci_p2pdma_map_type() All callers of pci_p2pdma_map_type() have a struct dev_pgmap and a struct device (of the client doing the DMA transfer). Thus move the conversion to struct pci_devs for the provider and client into this function. Link: https://lore.kernel.org/r/20210610160609.28447-6-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index c07c928e3952..0bb6792b828c 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -816,12 +816,20 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); -static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider, - struct pci_dev *client) +static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, + struct device *dev) { + struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; + struct pci_dev *client; + if (!provider->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; + if (!dev_is_pci(dev)) + return PCI_P2PDMA_MAP_NOT_SUPPORTED; + + client = to_pci_dev(dev); + return xa_to_value(xa_load(&provider->p2pdma->map_types, map_types_idx(client))); } @@ -858,14 +866,8 @@ int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg, { struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(sg_page(sg)->pgmap); - struct pci_dev *client; - if (WARN_ON_ONCE(!dev_is_pci(dev))) - return 0; - - client = to_pci_dev(dev); - - switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) { + switch (pci_p2pdma_map_type(sg_page(sg)->pgmap, dev)) { case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: return dma_map_sg_attrs(dev, sg, nents, dir, attrs); case PCI_P2PDMA_MAP_BUS_ADDR: @@ -889,17 +891,9 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs); void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct pci_p2pdma_pagemap *p2p_pgmap = - to_p2p_pgmap(sg_page(sg)->pgmap); enum pci_p2pdma_map_type map_type; - struct pci_dev *client; - if (WARN_ON_ONCE(!dev_is_pci(dev))) - return; - - client = to_pci_dev(dev); - - map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client); + map_type = pci_p2pdma_map_type(sg_page(sg)->pgmap, dev); if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) dma_unmap_sg_attrs(dev, sg, nents, dir, attrs); From 3ec0c3ec2d92c09465534a1ff9c6f9d9506ffef6 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 10 Jun 2021 10:06:09 -0600 Subject: [PATCH 26/48] PCI/P2PDMA: Avoid pci_get_slot(), which may sleep In order to use upstream_bridge_distance_warn() from a dma_map function, it must not sleep. However, pci_get_slot() takes the pci_bus_sem so it might sleep. In order to avoid this, try to get the host bridge's device from the first element in the device list. It should be impossible for the host bridge's device to go away while references are held on child devices, so the first element should not be able to change and, thus, this should be safe. Introduce a static function called pci_host_bridge_dev() to obtain the host bridge's root device. Link: https://lore.kernel.org/r/20210610160609.28447-7-logang@deltatee.com Signed-off-by: Logan Gunthorpe Signed-off-by: Bjorn Helgaas --- drivers/pci/p2pdma.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 0bb6792b828c..deb097ceaf41 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -308,10 +308,41 @@ static const struct pci_p2pdma_whitelist_entry { {} }; +/* + * This lookup function tries to find the PCI device corresponding to a given + * host bridge. + * + * It assumes the host bridge device is the first PCI device in the + * bus->devices list and that the devfn is 00.0. These assumptions should hold + * for all the devices in the whitelist above. + * + * This function is equivalent to pci_get_slot(host->bus, 0), however it does + * not take the pci_bus_sem lock seeing __host_bridge_whitelist() must not + * sleep. + * + * For this to be safe, the caller should hold a reference to a device on the + * bridge, which should ensure the host_bridge device will not be freed + * or removed from the head of the devices list. + */ +static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host) +{ + struct pci_dev *root; + + root = list_first_entry_or_null(&host->bus->devices, + struct pci_dev, bus_list); + + if (!root) + return NULL; + if (root->devfn != PCI_DEVFN(0, 0)) + return NULL; + + return root; +} + static bool __host_bridge_whitelist(struct pci_host_bridge *host, bool same_host_bridge, bool warn) { - struct pci_dev *root = pci_get_slot(host->bus, PCI_DEVFN(0, 0)); + struct pci_dev *root = pci_host_bridge_dev(host); const struct pci_p2pdma_whitelist_entry *entry; unsigned short vendor, device; @@ -320,7 +351,6 @@ static bool __host_bridge_whitelist(struct pci_host_bridge *host, vendor = root->vendor; device = root->device; - pci_dev_put(root); for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) { if (vendor != entry->vendor || device != entry->device) From d1b8dc09dd71248f5098792af98caa497ec66d19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Jun 2021 07:53:10 +0200 Subject: [PATCH 27/48] PCI/P2PDMA: Simplify distance calculation Merge __calc_map_type_and_dist() and calc_map_type_and_dist_warn() into calc_map_type_and_dist() to simplify the code a bit. This now means we add the devfn strings to the acs_buf unconditionally even if the buffer is not printed, but that is not a lot of overhead and keeps the code much simpler. Link: https://lore.kernel.org/r/20210614055310.3960791-1-hch@lst.de Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Logan Gunthorpe --- drivers/pci/p2pdma.c | 196 +++++++++++++++++-------------------------- 1 file changed, 76 insertions(+), 120 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index deb097ceaf41..ca2574debb2d 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -388,79 +388,6 @@ static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b, return false; } -static enum pci_p2pdma_map_type -__calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, - int *dist, bool *acs_redirects, struct seq_buf *acs_list) -{ - struct pci_dev *a = provider, *b = client, *bb; - int dist_a = 0; - int dist_b = 0; - int acs_cnt = 0; - - if (acs_redirects) - *acs_redirects = false; - - /* - * Note, we don't need to take references to devices returned by - * pci_upstream_bridge() seeing we hold a reference to a child - * device which will already hold a reference to the upstream bridge. - */ - - while (a) { - dist_b = 0; - - if (pci_bridge_has_acs_redir(a)) { - seq_buf_print_bus_devfn(acs_list, a); - acs_cnt++; - } - - bb = b; - - while (bb) { - if (a == bb) - goto check_b_path_acs; - - bb = pci_upstream_bridge(bb); - dist_b++; - } - - a = pci_upstream_bridge(a); - dist_a++; - } - - if (dist) - *dist = dist_a + dist_b; - - return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; - -check_b_path_acs: - bb = b; - - while (bb) { - if (a == bb) - break; - - if (pci_bridge_has_acs_redir(bb)) { - seq_buf_print_bus_devfn(acs_list, bb); - acs_cnt++; - } - - bb = pci_upstream_bridge(bb); - } - - if (dist) - *dist = dist_a + dist_b; - - if (acs_cnt) { - if (acs_redirects) - *acs_redirects = true; - - return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; - } - - return PCI_P2PDMA_MAP_BUS_ADDR; -} - static unsigned long map_types_idx(struct pci_dev *client) { return (pci_domain_nr(client->bus) << 16) | @@ -502,63 +429,96 @@ static unsigned long map_types_idx(struct pci_dev *client) * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE with the distance set to the number of * ports per above. If the device is not in the whitelist, return * PCI_P2PDMA_MAP_NOT_SUPPORTED. - * - * If any ACS redirect bits are set, then acs_redirects boolean will be set - * to true and their PCI device names will be appended to the acs_list - * seq_buf. This seq_buf is used to print a warning informing the user how - * to disable ACS using a command line parameter. (See - * calc_map_type_and_dist_warn() below) */ static enum pci_p2pdma_map_type calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, - int *dist, bool *acs_redirects, struct seq_buf *acs_list) -{ - enum pci_p2pdma_map_type map_type; - - map_type = __calc_map_type_and_dist(provider, client, dist, - acs_redirects, acs_list); - - if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) { - if (!cpu_supports_p2pdma() && - !host_bridge_whitelist(provider, client, acs_redirects)) - map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; - } - - if (provider->p2pdma) - xa_store(&provider->p2pdma->map_types, map_types_idx(client), - xa_mk_value(map_type), GFP_KERNEL); - - return map_type; -} - -static enum pci_p2pdma_map_type -calc_map_type_and_dist_warn(struct pci_dev *provider, struct pci_dev *client, - int *dist) + int *dist, bool verbose) { + enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; + struct pci_dev *a = provider, *b = client, *bb; + bool acs_redirects = false; struct seq_buf acs_list; - bool acs_redirects; + int acs_cnt = 0; + int dist_a = 0; + int dist_b = 0; char buf[128]; - int ret; seq_buf_init(&acs_list, buf, sizeof(buf)); - ret = calc_map_type_and_dist(provider, client, dist, &acs_redirects, - &acs_list); - if (acs_redirects) { + /* + * Note, we don't need to take references to devices returned by + * pci_upstream_bridge() seeing we hold a reference to a child + * device which will already hold a reference to the upstream bridge. + */ + while (a) { + dist_b = 0; + + if (pci_bridge_has_acs_redir(a)) { + seq_buf_print_bus_devfn(&acs_list, a); + acs_cnt++; + } + + bb = b; + + while (bb) { + if (a == bb) + goto check_b_path_acs; + + bb = pci_upstream_bridge(bb); + dist_b++; + } + + a = pci_upstream_bridge(a); + dist_a++; + } + + *dist = dist_a + dist_b; + goto map_through_host_bridge; + +check_b_path_acs: + bb = b; + + while (bb) { + if (a == bb) + break; + + if (pci_bridge_has_acs_redir(bb)) { + seq_buf_print_bus_devfn(&acs_list, bb); + acs_cnt++; + } + + bb = pci_upstream_bridge(bb); + } + + *dist = dist_a + dist_b; + + if (!acs_cnt) { + map_type = PCI_P2PDMA_MAP_BUS_ADDR; + goto done; + } + + if (verbose) { + acs_list.buffer[acs_list.len-1] = 0; /* drop final semicolon */ pci_warn(client, "ACS redirect is set between the client and provider (%s)\n", pci_name(provider)); - /* Drop final semicolon */ - acs_list.buffer[acs_list.len-1] = 0; pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n", acs_list.buffer); } + acs_redirects = true; - if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) { - pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", - pci_name(provider)); +map_through_host_bridge: + if (!cpu_supports_p2pdma() && + !host_bridge_whitelist(provider, client, acs_redirects)) { + if (verbose) + pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n", + pci_name(provider)); + map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } - - return ret; +done: + if (provider->p2pdma) + xa_store(&provider->p2pdma->map_types, map_types_idx(client), + xa_mk_value(map_type), GFP_KERNEL); + return map_type; } /** @@ -599,12 +559,8 @@ int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients, return -1; } - if (verbose) - map = calc_map_type_and_dist_warn(provider, pci_client, - &distance); - else - map = calc_map_type_and_dist(provider, pci_client, - &distance, NULL, NULL); + map = calc_map_type_and_dist(provider, pci_client, &distance, + verbose); pci_dev_put(pci_client); From a97396c6eb13f65bea894dbe7739b2e883d40a3e Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 1 May 2021 10:29:00 +0200 Subject: [PATCH 28/48] PCI: pciehp: Ignore Link Down/Up caused by DPC Downstream Port Containment (PCIe r5.0, sec. 6.2.10) disables the link upon an error and attempts to re-enable it when instructed by the DPC driver. A slot which is both DPC- and hotplug-capable is currently powered off by pciehp once DPC is triggered (due to the link change) and powered back up on successful recovery. That's undesirable, the slot should remain powered so the hotplugged device remains bound to its driver. DPC notifies the driver of the error and of successful recovery in pcie_do_recovery() and the driver may then restore the device to working state. Moreover, Sinan points out that turning off slot power by pciehp may foil recovery by DPC: Power off/on is a cold reset concurrently to DPC's warm reset. Sathyanarayanan reports extended delays or failure in link retraining by DPC if pciehp brings down the slot. Fix by detecting whether a Link Down event is caused by DPC and awaiting recovery if so. On successful recovery, ignore both the Link Down and the subsequent Link Up event. Afterwards, check whether the link is down to detect surprise-removal or another DPC event immediately after DPC recovery. Ensure that the corresponding DLLSC event is not ignored by synthesizing it and invoking irq_wake_thread() to trigger a re-run of pciehp_ist(). The IRQ threads of the hotplug and DPC drivers, pciehp_ist() and dpc_handler(), race against each other. If pciehp is faster than DPC, it will wait until DPC recovery completes. Recovery consists of two steps: The first step (waiting for link disablement) is recognizable by pciehp through a set DPC Trigger Status bit. The second step (waiting for link retraining) is recognizable through a newly introduced PCI_DPC_RECOVERING flag. If DPC is faster than pciehp, neither of the two flags will be set and pciehp may glean the recovery status from the new PCI_DPC_RECOVERED flag. The flag is zero if DPC didn't occur at all, hence DLLSC events are not ignored by default. pciehp waits up to 4 seconds before assuming that DPC recovery failed and bringing down the slot. This timeout is not taken from the spec (it doesn't mandate one) but based on a report from Yicong Yang that DPC may take a bit more than 3 seconds on HiSilicon's Kunpeng platform. The timeout is necessary because the DPC Trigger Status bit may never clear: On Root Ports which support RP Extensions for DPC, the DPC driver polls the DPC RP Busy bit for up to 1 second before giving up on DPC recovery. Without the timeout, pciehp would then wait indefinitely for DPC to complete. This commit draws inspiration from previous attempts to synchronize DPC with pciehp: By Sinan Kaya, August 2018: https://lore.kernel.org/linux-pci/20180818065126.77912-1-okaya@kernel.org/ By Ethan Zhao, October 2020: https://lore.kernel.org/linux-pci/20201007113158.48933-1-haifeng.zhao@intel.com/ By Kuppuswamy Sathyanarayanan, March 2021: https://lore.kernel.org/linux-pci/59cb30f5e5ac6d65427ceaadf1012b2ba8dbf66c.1615606143.git.sathyanarayanan.kuppuswamy@linux.intel.com/ Link: https://lore.kernel.org/r/0be565d97438fe2a6d57354b3aa4e8626952a00b.1619857124.git.lukas@wunner.de Reported-by: Sinan Kaya Reported-by: Ethan Zhao Reported-by: Kuppuswamy Sathyanarayanan Tested-by: Kuppuswamy Sathyanarayanan Tested-by: Yicong Yang Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Kuppuswamy Sathyanarayanan Cc: Dan Williams Cc: Ashok Raj Cc: Keith Busch --- drivers/pci/hotplug/pciehp_hpc.c | 36 ++++++++++++++++ drivers/pci/pci.h | 4 ++ drivers/pci/pcie/dpc.c | 74 +++++++++++++++++++++++++++++--- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index fb3840e222ad..9d06939736c0 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -563,6 +563,32 @@ void pciehp_power_off_slot(struct controller *ctrl) PCI_EXP_SLTCTL_PWR_OFF); } +static void pciehp_ignore_dpc_link_change(struct controller *ctrl, + struct pci_dev *pdev, int irq) +{ + /* + * Ignore link changes which occurred while waiting for DPC recovery. + * Could be several if DPC triggered multiple times consecutively. + */ + synchronize_hardirq(irq); + atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events); + if (pciehp_poll_mode) + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_DLLSC); + ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", + slot_name(ctrl)); + + /* + * If the link is unexpectedly down after successful recovery, + * the corresponding link change may have been ignored above. + * Synthesize it to ensure that it is acted on. + */ + down_read(&ctrl->reset_lock); + if (!pciehp_check_link_active(ctrl)) + pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + up_read(&ctrl->reset_lock); +} + static irqreturn_t pciehp_isr(int irq, void *dev_id) { struct controller *ctrl = (struct controller *)dev_id; @@ -706,6 +732,16 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) PCI_EXP_SLTCTL_ATTN_IND_ON); } + /* + * Ignore Link Down/Up events caused by Downstream Port Containment + * if recovery from the error succeeded. + */ + if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && + ctrl->state == ON_STATE) { + events &= ~PCI_EXP_SLTSTA_DLLSC; + pciehp_ignore_dpc_link_change(ctrl, pdev, irq); + } + /* * Disable requests have higher priority than Presence Detect Changed * or Data Link Layer State Changed events. diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 37c913bbc6e1..dac6922553b4 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -385,6 +385,8 @@ static inline bool pci_dev_is_disconnected(const struct pci_dev *dev) /* pci_dev priv_flags */ #define PCI_DEV_ADDED 0 +#define PCI_DPC_RECOVERED 1 +#define PCI_DPC_RECOVERING 2 static inline void pci_dev_assign_added(struct pci_dev *dev, bool added) { @@ -439,10 +441,12 @@ void pci_restore_dpc_state(struct pci_dev *dev); void pci_dpc_init(struct pci_dev *pdev); void dpc_process_error(struct pci_dev *pdev); pci_ers_result_t dpc_reset_link(struct pci_dev *pdev); +bool pci_dpc_recovered(struct pci_dev *pdev); #else static inline void pci_save_dpc_state(struct pci_dev *dev) {} static inline void pci_restore_dpc_state(struct pci_dev *dev) {} static inline void pci_dpc_init(struct pci_dev *pdev) {} +static inline bool pci_dpc_recovered(struct pci_dev *pdev) { return false; } #endif #ifdef CONFIG_PCIEPORTBUS diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index e05aba86a317..c556e7beafe3 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -71,6 +71,58 @@ void pci_restore_dpc_state(struct pci_dev *dev) pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap); } +static DECLARE_WAIT_QUEUE_HEAD(dpc_completed_waitqueue); + +#ifdef CONFIG_HOTPLUG_PCI_PCIE +static bool dpc_completed(struct pci_dev *pdev) +{ + u16 status; + + pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status); + if ((status != 0xffff) && (status & PCI_EXP_DPC_STATUS_TRIGGER)) + return false; + + if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags)) + return false; + + return true; +} + +/** + * pci_dpc_recovered - whether DPC triggered and has recovered successfully + * @pdev: PCI device + * + * Return true if DPC was triggered for @pdev and has recovered successfully. + * Wait for recovery if it hasn't completed yet. Called from the PCIe hotplug + * driver to recognize and ignore Link Down/Up events caused by DPC. + */ +bool pci_dpc_recovered(struct pci_dev *pdev) +{ + struct pci_host_bridge *host; + + if (!pdev->dpc_cap) + return false; + + /* + * Synchronization between hotplug and DPC is not supported + * if DPC is owned by firmware and EDR is not enabled. + */ + host = pci_find_host_bridge(pdev->bus); + if (!host->native_dpc && !IS_ENABLED(CONFIG_PCIE_EDR)) + return false; + + /* + * Need a timeout in case DPC never completes due to failure of + * dpc_wait_rp_inactive(). The spec doesn't mandate a time limit, + * but reports indicate that DPC completes within 4 seconds. + */ + wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev), + msecs_to_jiffies(4000)); + + return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); +} +#endif /* CONFIG_HOTPLUG_PCI_PCIE */ + static int dpc_wait_rp_inactive(struct pci_dev *pdev) { unsigned long timeout = jiffies + HZ; @@ -91,8 +143,11 @@ static int dpc_wait_rp_inactive(struct pci_dev *pdev) pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) { + pci_ers_result_t ret; u16 cap; + set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); + /* * DPC disables the Link automatically in hardware, so it has * already been reset by the time we get here. @@ -106,18 +161,27 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) if (!pcie_wait_for_link(pdev, false)) pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); - if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) - return PCI_ERS_RESULT_DISCONNECT; + if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) { + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_DISCONNECT; + goto out; + } pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, PCI_EXP_DPC_STATUS_TRIGGER); if (!pcie_wait_for_link(pdev, true)) { pci_info(pdev, "Data Link Layer Link Active not set in 1000 msec\n"); - return PCI_ERS_RESULT_DISCONNECT; + clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_DISCONNECT; + } else { + set_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); + ret = PCI_ERS_RESULT_RECOVERED; } - - return PCI_ERS_RESULT_RECOVERED; +out: + clear_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); + wake_up_all(&dpc_completed_waitqueue); + return ret; } static void dpc_process_rp_pio_error(struct pci_dev *pdev) From 8fe55ef23387ce3c7488375b1fd539420d7654bb Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 13 May 2021 15:18:27 +0100 Subject: [PATCH 29/48] PCI: Dynamically map ECAM regions Attempting to boot 32-bit ARM kernels under QEMU's 3.x virt models fails when we have more than 512M of RAM in the model as we run out of vmalloc space for the PCI ECAM regions. This failure will be silent when running libvirt, as the console in that situation is a PCI device. In this configuration, the kernel maps the whole ECAM, which QEMU sets up for 256 buses, even when maybe only seven buses are in use. Each bus uses 1M of ECAM space, and ioremap() adds an additional guard page between allocations. The kernel vmap allocator will align these regions to 512K, resulting in each mapping eating 1.5M of vmalloc space. This means we need 384M of vmalloc space just to map all of these, which is very wasteful of resources. Fix this by only mapping the ECAM for buses we are going to be using. In my setups, this is around seven buses in most guests, which is 10.5M of vmalloc space - way smaller than the 384M that would otherwise be required. This also means that the kernel can boot without forcing extra RAM into highmem with the vmalloc= argument, or decreasing the virtual RAM available to the guest. Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/r/E1lhCAV-0002yb-50@rmk-PC.armlinux.org.uk Signed-off-by: Russell King Signed-off-by: Bjorn Helgaas Reviewed-by: Arnd Bergmann --- drivers/pci/ecam.c | 54 ++++++++++++++++++++++++++++++++++------ include/linux/pci-ecam.h | 1 + 2 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/pci/ecam.c b/drivers/pci/ecam.c index d2a1920bb055..1c40d2506aef 100644 --- a/drivers/pci/ecam.c +++ b/drivers/pci/ecam.c @@ -32,7 +32,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, struct pci_config_window *cfg; unsigned int bus_range, bus_range_max, bsz; struct resource *conflict; - int i, err; + int err; if (busr->start > busr->end) return ERR_PTR(-EINVAL); @@ -50,6 +50,7 @@ struct pci_config_window *pci_ecam_create(struct device *dev, cfg->busr.start = busr->start; cfg->busr.end = busr->end; cfg->busr.flags = IORESOURCE_BUS; + cfg->bus_shift = bus_shift; bus_range = resource_size(&cfg->busr); bus_range_max = resource_size(cfgres) >> bus_shift; if (bus_range > bus_range_max) { @@ -77,13 +78,6 @@ struct pci_config_window *pci_ecam_create(struct device *dev, cfg->winp = kcalloc(bus_range, sizeof(*cfg->winp), GFP_KERNEL); if (!cfg->winp) goto err_exit_malloc; - for (i = 0; i < bus_range; i++) { - cfg->winp[i] = - pci_remap_cfgspace(cfgres->start + i * bsz, - bsz); - if (!cfg->winp[i]) - goto err_exit_iomap; - } } else { cfg->win = pci_remap_cfgspace(cfgres->start, bus_range * bsz); if (!cfg->win) @@ -129,6 +123,44 @@ void pci_ecam_free(struct pci_config_window *cfg) } EXPORT_SYMBOL_GPL(pci_ecam_free); +static int pci_ecam_add_bus(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + unsigned int bsz = 1 << cfg->bus_shift; + unsigned int busn = bus->number; + phys_addr_t start; + + if (!per_bus_mapping) + return 0; + + if (busn < cfg->busr.start || busn > cfg->busr.end) + return -EINVAL; + + busn -= cfg->busr.start; + start = cfg->res.start + busn * bsz; + + cfg->winp[busn] = pci_remap_cfgspace(start, bsz); + if (!cfg->winp[busn]) + return -ENOMEM; + + return 0; +} + +static void pci_ecam_remove_bus(struct pci_bus *bus) +{ + struct pci_config_window *cfg = bus->sysdata; + unsigned int busn = bus->number; + + if (!per_bus_mapping || busn < cfg->busr.start || busn > cfg->busr.end) + return; + + busn -= cfg->busr.start; + if (cfg->winp[busn]) { + iounmap(cfg->winp[busn]); + cfg->winp[busn] = NULL; + } +} + /* * Function to implement the pci_ops ->map_bus method */ @@ -167,6 +199,8 @@ EXPORT_SYMBOL_GPL(pci_ecam_map_bus); /* ECAM ops */ const struct pci_ecam_ops pci_generic_ecam_ops = { .pci_ops = { + .add_bus = pci_ecam_add_bus, + .remove_bus = pci_ecam_remove_bus, .map_bus = pci_ecam_map_bus, .read = pci_generic_config_read, .write = pci_generic_config_write, @@ -178,6 +212,8 @@ EXPORT_SYMBOL_GPL(pci_generic_ecam_ops); /* ECAM ops for 32-bit access only (non-compliant) */ const struct pci_ecam_ops pci_32b_ops = { .pci_ops = { + .add_bus = pci_ecam_add_bus, + .remove_bus = pci_ecam_remove_bus, .map_bus = pci_ecam_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write32, @@ -187,6 +223,8 @@ const struct pci_ecam_ops pci_32b_ops = { /* ECAM ops for 32-bit read only (non-compliant) */ const struct pci_ecam_ops pci_32b_read_ops = { .pci_ops = { + .add_bus = pci_ecam_add_bus, + .remove_bus = pci_ecam_remove_bus, .map_bus = pci_ecam_map_bus, .read = pci_generic_config_read32, .write = pci_generic_config_write, diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index fbdadd4d8377..adea5a4771cf 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -55,6 +55,7 @@ struct pci_ecam_ops { struct pci_config_window { struct resource res; struct resource busr; + unsigned int bus_shift; void *priv; const struct pci_ecam_ops *ops; union { From 3a2e476dc5d02af3422143b07d8db1eced475314 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 11 May 2021 15:11:31 +0800 Subject: [PATCH 30/48] PCI: mediatek-gen3: Add missing MODULE_DEVICE_TABLE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Link: https://lore.kernel.org/r/1620717091-108691-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Lorenzo Pieralisi Reviewed-by: Krzysztof Wilczyński --- drivers/pci/controller/pcie-mediatek-gen3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index 3c5b97716d40..f3aeb8d4eaca 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -1012,6 +1012,7 @@ static const struct of_device_id mtk_pcie_of_match[] = { { .compatible = "mediatek,mt8192-pcie" }, {}, }; +MODULE_DEVICE_TABLE(of, mtk_pcie_of_match); static struct platform_driver mtk_pcie_driver = { .probe = mtk_pcie_probe, From e673d697b9a234fc3544ac240e173cef8c82b349 Mon Sep 17 00:00:00 2001 From: Sandor Bodo-Merle Date: Tue, 22 Jun 2021 17:26:29 +0200 Subject: [PATCH 31/48] PCI: iproc: Fix multi-MSI base vector number allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit fc54bae28818 ("PCI: iproc: Allow allocation of multiple MSIs") introduced multi-MSI support with a broken allocation mechanism (it failed to reserve the proper number of bits from the inner domain). Natural alignment of the base vector number was also not guaranteed. Link: https://lore.kernel.org/r/20210622152630.40842-1-sbodomerle@gmail.com Fixes: fc54bae28818 ("PCI: iproc: Allow allocation of multiple MSIs") Reported-by: Pali Rohár Signed-off-by: Sandor Bodo-Merle Signed-off-by: Lorenzo Pieralisi Acked-by: Marc Zyngier Acked-by: Pali Rohár Acked-by: Ray Jui --- drivers/pci/controller/pcie-iproc-msi.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index eede4e8f3f75..557d93dcb3bc 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -252,18 +252,18 @@ static int iproc_msi_irq_domain_alloc(struct irq_domain *domain, mutex_lock(&msi->bitmap_lock); - /* Allocate 'nr_cpus' number of MSI vectors each time */ - hwirq = bitmap_find_next_zero_area(msi->bitmap, msi->nr_msi_vecs, 0, - msi->nr_cpus, 0); - if (hwirq < msi->nr_msi_vecs) { - bitmap_set(msi->bitmap, hwirq, msi->nr_cpus); - } else { - mutex_unlock(&msi->bitmap_lock); - return -ENOSPC; - } + /* + * Allocate 'nr_irqs' multiplied by 'nr_cpus' number of MSI vectors + * each time + */ + hwirq = bitmap_find_free_region(msi->bitmap, msi->nr_msi_vecs, + order_base_2(msi->nr_cpus * nr_irqs)); mutex_unlock(&msi->bitmap_lock); + if (hwirq < 0) + return -ENOSPC; + for (i = 0; i < nr_irqs; i++) { irq_domain_set_info(domain, virq + i, hwirq + i, &iproc_msi_bottom_irq_chip, @@ -284,7 +284,8 @@ static void iproc_msi_irq_domain_free(struct irq_domain *domain, mutex_lock(&msi->bitmap_lock); hwirq = hwirq_to_canonical_hwirq(msi, data->hwirq); - bitmap_clear(msi->bitmap, hwirq, msi->nr_cpus); + bitmap_release_region(msi->bitmap, hwirq, + order_base_2(msi->nr_cpus * nr_irqs)); mutex_unlock(&msi->bitmap_lock); From 2dc0a201d0f59e6818ef443609f0850a32910844 Mon Sep 17 00:00:00 2001 From: Sandor Bodo-Merle Date: Tue, 22 Jun 2021 17:26:30 +0200 Subject: [PATCH 32/48] PCI: iproc: Support multi-MSI only on uniprocessor kernel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The interrupt affinity scheme used by this driver is incompatible with multi-MSI as it implies moving the doorbell address to that of another MSI group. This isn't possible for multi-MSI, as all the MSIs must have the same doorbell address. As such it is restricted to systems with a single CPU. Link: https://lore.kernel.org/r/20210622152630.40842-2-sbodomerle@gmail.com Fixes: fc54bae28818 ("PCI: iproc: Allow allocation of multiple MSIs") Reported-by: Marc Zyngier Signed-off-by: Sandor Bodo-Merle Signed-off-by: Lorenzo Pieralisi Acked-by: Marc Zyngier Acked-by: Pali Rohár Acked-by: Ray Jui --- drivers/pci/controller/pcie-iproc-msi.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index 557d93dcb3bc..81b4effeb130 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -171,7 +171,7 @@ static struct irq_chip iproc_msi_irq_chip = { static struct msi_domain_info iproc_msi_domain_info = { .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, + MSI_FLAG_PCI_MSIX, .chip = &iproc_msi_irq_chip, }; @@ -250,6 +250,9 @@ static int iproc_msi_irq_domain_alloc(struct irq_domain *domain, struct iproc_msi *msi = domain->host_data; int hwirq, i; + if (msi->nr_cpus > 1 && nr_irqs > 1) + return -EINVAL; + mutex_lock(&msi->bitmap_lock); /* @@ -540,6 +543,9 @@ int iproc_msi_init(struct iproc_pcie *pcie, struct device_node *node) mutex_init(&msi->bitmap_lock); msi->nr_cpus = num_possible_cpus(); + if (msi->nr_cpus == 1) + iproc_msi_domain_info.flags |= MSI_FLAG_MULTI_PCI_MSI; + msi->nr_irqs = of_irq_count(node); if (!msi->nr_irqs) { dev_err(pcie->dev, "found no MSI GIC interrupt\n"); From 655832d12f2251e04031294f547c86935a0a126d Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Wed, 6 Jan 2021 14:55:40 +0100 Subject: [PATCH 33/48] PCI: intel-gw: Fix INTx enable The legacy PCI interrupt lines need to be enabled using PCIE_APP_IRNEN bits 13 (INTA), 14 (INTB), 15 (INTC) and 16 (INTD). The old code however was taking (for example) "13" as raw value instead of taking BIT(13). Define the legacy PCI interrupt bits using the BIT() macro and then use these in PCIE_APP_IRN_INT. Link: https://lore.kernel.org/r/20210106135540.48420-1-martin.blumenstingl@googlemail.com Fixes: ed22aaaede44 ("PCI: dwc: intel: PCIe RC controller driver") Signed-off-by: Martin Blumenstingl Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Rahul Tanwar --- drivers/pci/controller/dwc/pcie-intel-gw.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/pci/controller/dwc/pcie-intel-gw.c b/drivers/pci/controller/dwc/pcie-intel-gw.c index f89a7d24ba28..d15cf35fa7f2 100644 --- a/drivers/pci/controller/dwc/pcie-intel-gw.c +++ b/drivers/pci/controller/dwc/pcie-intel-gw.c @@ -39,6 +39,10 @@ #define PCIE_APP_IRN_PM_TO_ACK BIT(9) #define PCIE_APP_IRN_LINK_AUTO_BW_STAT BIT(11) #define PCIE_APP_IRN_BW_MGT BIT(12) +#define PCIE_APP_IRN_INTA BIT(13) +#define PCIE_APP_IRN_INTB BIT(14) +#define PCIE_APP_IRN_INTC BIT(15) +#define PCIE_APP_IRN_INTD BIT(16) #define PCIE_APP_IRN_MSG_LTR BIT(18) #define PCIE_APP_IRN_SYS_ERR_RC BIT(29) #define PCIE_APP_INTX_OFST 12 @@ -48,10 +52,8 @@ PCIE_APP_IRN_RX_VDM_MSG | PCIE_APP_IRN_SYS_ERR_RC | \ PCIE_APP_IRN_PM_TO_ACK | PCIE_APP_IRN_MSG_LTR | \ PCIE_APP_IRN_BW_MGT | PCIE_APP_IRN_LINK_AUTO_BW_STAT | \ - (PCIE_APP_INTX_OFST + PCI_INTERRUPT_INTA) | \ - (PCIE_APP_INTX_OFST + PCI_INTERRUPT_INTB) | \ - (PCIE_APP_INTX_OFST + PCI_INTERRUPT_INTC) | \ - (PCIE_APP_INTX_OFST + PCI_INTERRUPT_INTD)) + PCIE_APP_IRN_INTA | PCIE_APP_IRN_INTB | \ + PCIE_APP_IRN_INTC | PCIE_APP_IRN_INTD) #define BUS_IATU_OFFSET SZ_256M #define RESET_INTERVAL_MS 100 From fd6403756f4c142ed788d27cde8d7cae3fba3956 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Tue, 11 May 2021 19:45:47 +0800 Subject: [PATCH 34/48] PCI: imx6: Remove imx6_pcie_probe() redundant error message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When devm_ioremap_resource() fails, __devm_ioremap_resource() prints an error message including the device name, failure cause, and possibly resource information. Remove the error message from imx6_pcie_probe() since it's redundant. Link: https://lore.kernel.org/r/20210511114547.5601-1-thunder.leizhen@huawei.com Reported-by: Hulk Robot Signed-off-by: Zhen Lei Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Krzysztof Wilczyński Acked-by: Richard Zhu --- drivers/pci/controller/dwc/pci-imx6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 0cf1333c0440..035fb622cafa 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1002,10 +1002,8 @@ static int imx6_pcie_probe(struct platform_device *pdev) return ret; } imx6_pcie->phy_base = devm_ioremap_resource(dev, &res); - if (IS_ERR(imx6_pcie->phy_base)) { - dev_err(dev, "Unable to map PCIe PHY\n"); + if (IS_ERR(imx6_pcie->phy_base)) return PTR_ERR(imx6_pcie->phy_base); - } } dbi_base = platform_get_resource(pdev, IORESOURCE_MEM, 0); From 7a289a164c734f53178607c24a063551cabd76d9 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Sat, 20 Feb 2021 10:49:48 +0800 Subject: [PATCH 35/48] PCI: imx6: Limit DBI register length for imx6qp PCIe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the length of the DBI registers and limit config space to its length. This makes sure that the kernel does not access registers beyond that point that otherwise would lead to an abort on the i.MX 6QuadPlus. See commit 075af61c19cd ("PCI: imx6: Limit DBI register length") that resolves a similar issue on the i.MX 6Quad PCIe. Link: https://lore.kernel.org/r/1613789388-2495-2-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Richard Zhu Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Lucas Stach Reviewed-by: Krzysztof Wilczyński --- drivers/pci/controller/dwc/pci-imx6.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 035fb622cafa..dc48dd7cd7c5 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -1173,6 +1173,7 @@ static const struct imx6_pcie_drvdata drvdata[] = { .variant = IMX6QP, .flags = IMX6_PCIE_FLAG_IMX6_PHY | IMX6_PCIE_FLAG_IMX6_SPEED_CHANGE, + .dbi_length = 0x200, }, [IMX7D] = { .variant = IMX7D, From c9d511dc84610498f370bbfff16e1c194b93c8d8 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Fri, 4 Jun 2021 09:47:48 +0800 Subject: [PATCH 36/48] dt-bindings: imx6q-pcie: Add "vph-supply" for PHY supply voltage The i.MX8MQ PCIe PHY can use either a 1.8V or a 3.3V power supply. Add a "vph-supply" property to indicate which regulator supplies power for the PHY. Link: https://lore.kernel.org/r/1622771269-13844-2-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Richard Zhu Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Lucas Stach Acked-by: Rob Herring --- Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt index de4b2baf91e8..d8971ab99274 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt @@ -38,6 +38,9 @@ Optional properties: The regulator will be enabled when initializing the PCIe host and disabled either as part of the init process or when shutting down the host. +- vph-supply: Should specify the regulator in charge of VPH one of the three + PCIe PHY powers. This regulator can be supplied by both 1.8v and 3.3v voltage + supplies. Additional required properties for imx6sx-pcie: - clock names: Must include the following additional entries: From d2ce69ca2516906f08b0b239df9c4e9493a4c193 Mon Sep 17 00:00:00 2001 From: Richard Zhu Date: Fri, 4 Jun 2021 09:47:49 +0800 Subject: [PATCH 37/48] PCI: imx6: Enable PHY internal regulator when supplied >3V The i.MX8MQ PCIe PHY needs 1.8V in default but can be supplied by either a 1.8V or a 3.3V regulator. The "vph-supply" DT property tells us which external regulator supplies the PHY. If that regulator supplies anything over 3V, enable the PHY's internal 3.3V-to-1.8V regulator. Link: https://lore.kernel.org/r/1622771269-13844-3-git-send-email-hongxing.zhu@nxp.com Signed-off-by: Richard Zhu Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Lucas Stach --- drivers/pci/controller/dwc/pci-imx6.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index dc48dd7cd7c5..80fc98acf097 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -37,6 +37,7 @@ #define IMX8MQ_GPR_PCIE_REF_USE_PAD BIT(9) #define IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE_EN BIT(10) #define IMX8MQ_GPR_PCIE_CLK_REQ_OVERRIDE BIT(11) +#define IMX8MQ_GPR_PCIE_VREG_BYPASS BIT(12) #define IMX8MQ_GPR12_PCIE2_CTRL_DEVICE_TYPE GENMASK(11, 8) #define IMX8MQ_PCIE2_BASE_ADDR 0x33c00000 @@ -80,6 +81,7 @@ struct imx6_pcie { u32 tx_swing_full; u32 tx_swing_low; struct regulator *vpcie; + struct regulator *vph; void __iomem *phy_base; /* power domain for pcie */ @@ -621,6 +623,17 @@ static void imx6_pcie_init_phy(struct imx6_pcie *imx6_pcie) imx6_pcie_grp_offset(imx6_pcie), IMX8MQ_GPR_PCIE_REF_USE_PAD, IMX8MQ_GPR_PCIE_REF_USE_PAD); + /* + * Regarding the datasheet, the PCIE_VPH is suggested + * to be 1.8V. If the PCIE_VPH is supplied by 3.3V, the + * VREG_BYPASS should be cleared to zero. + */ + if (imx6_pcie->vph && + regulator_get_voltage(imx6_pcie->vph) > 3000000) + regmap_update_bits(imx6_pcie->iomuxc_gpr, + imx6_pcie_grp_offset(imx6_pcie), + IMX8MQ_GPR_PCIE_VREG_BYPASS, + 0); break; case IMX7D: regmap_update_bits(imx6_pcie->iomuxc_gpr, IOMUXC_GPR12, @@ -1128,6 +1141,13 @@ static int imx6_pcie_probe(struct platform_device *pdev) imx6_pcie->vpcie = NULL; } + imx6_pcie->vph = devm_regulator_get_optional(&pdev->dev, "vph"); + if (IS_ERR(imx6_pcie->vph)) { + if (PTR_ERR(imx6_pcie->vph) != -ENODEV) + return PTR_ERR(imx6_pcie->vph); + imx6_pcie->vph = NULL; + } + platform_set_drvdata(pdev, imx6_pcie); ret = imx6_pcie_attach_pd(dev); From 7bf475a4614a9722b9b989e53184a02596cf16d1 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 12 May 2021 12:07:02 +0800 Subject: [PATCH 38/48] PCI: tegra: Add missing MODULE_DEVICE_TABLE Add missing MODULE_DEVICE_TABLE definition so we generate correct modalias for automatic loading of this driver when it is built as a module. Link: https://lore.kernel.org/r/1620792422-16535-1-git-send-email-zou_wei@huawei.com Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Reviewed-by: Vidya Sagar Acked-by: Thierry Reding --- drivers/pci/controller/pci-tegra.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index 8069bd9232d4..c979229a6d0d 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -2539,6 +2539,7 @@ static const struct of_device_id tegra_pcie_of_match[] = { { .compatible = "nvidia,tegra20-pcie", .data = &tegra20_pcie }, { }, }; +MODULE_DEVICE_TABLE(of, tegra_pcie_of_match); static void *tegra_pcie_ports_seq_start(struct seq_file *s, loff_t *pos) { From c4bf1f25c6c187864681d5ad4dd1fa92f62d5d32 Mon Sep 17 00:00:00 2001 From: Vidya Sagar Date: Tue, 4 May 2021 22:51:57 +0530 Subject: [PATCH 39/48] PCI: tegra194: Fix host initialization during resume Commit 275e88b06a27 ("PCI: tegra: Fix host link initialization") broke host initialization during resume as it misses out calling the API dw_pcie_setup_rc() which is required for host and MSI initialization. Link: https://lore.kernel.org/r/20210504172157.29712-1-vidyas@nvidia.com Fixes: 275e88b06a27 ("PCI: tegra: Fix host link initialization") Tested-by: Jon Hunter Signed-off-by: Vidya Sagar Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/dwc/pcie-tegra194.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index bafd2c6ab3c2..b19775ab134e 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -2314,6 +2314,8 @@ static int tegra_pcie_dw_resume_noirq(struct device *dev) goto fail_host_init; } + dw_pcie_setup_rc(&pcie->pci.pp); + ret = tegra_pcie_dw_start_link(&pcie->pci); if (ret < 0) goto fail_host_init; From 8ceeac307a79f68c0d0c72d6e48b82fa424204ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Thu, 24 Jun 2021 23:33:43 +0200 Subject: [PATCH 40/48] PCI: aardvark: Fix checking for PIO Non-posted Request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PIO_NON_POSTED_REQ for PIO_STAT register is incorrectly defined. Bit 10 in register PIO_STAT indicates the response is to a non-posted request. Link: https://lore.kernel.org/r/20210624213345.3617-2-pali@kernel.org Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marek Behún Cc: stable@vger.kernel.org --- drivers/pci/controller/pci-aardvark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 051b48bd7985..a81397a9886f 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -57,7 +57,7 @@ #define PIO_COMPLETION_STATUS_UR 1 #define PIO_COMPLETION_STATUS_CRS 2 #define PIO_COMPLETION_STATUS_CA 4 -#define PIO_NON_POSTED_REQ BIT(0) +#define PIO_NON_POSTED_REQ BIT(10) #define PIO_ADDR_LS (PIO_BASE_ADDR + 0x8) #define PIO_ADDR_MS (PIO_BASE_ADDR + 0xc) #define PIO_WR_DATA (PIO_BASE_ADDR + 0x10) From 7f71a409fe3d9358da07c77f15bb5b7960f12253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pali=20Roh=C3=A1r?= Date: Fri, 25 Jun 2021 00:26:20 +0200 Subject: [PATCH 41/48] PCI: aardvark: Implement workaround for the readback value of VEND_ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marvell Armada 3700 Functional Errata, Guidelines, and Restrictions document describes in erratum 4.1 PCIe value of vendor ID (Ref #: 243): The readback value of VEND_ID (RD0070000h [15:0]) is 1B4Bh, while it should read 11ABh. The firmware can write the correct value, 11ABh, through VEND_ID (RD0076044h [15:0]). Implement this workaround in aardvark driver for both PCI vendor id and PCI subsystem vendor id. This change affects and fixes PCI vendor id of emulated PCIe root bridge. After this change emulated PCIe root bridge has correct vendor id. Link: https://lore.kernel.org/r/20210624222621.4776-5-pali@kernel.org Fixes: 8a3ebd8de328 ("PCI: aardvark: Implement emulated root PCI bridge config space") Signed-off-by: Pali Rohár Signed-off-by: Lorenzo Pieralisi Reviewed-by: Marek Behún Cc: stable@vger.kernel.org --- drivers/pci/controller/pci-aardvark.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index a81397a9886f..4dbd6208890a 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -125,6 +125,7 @@ #define LTSSM_MASK 0x3f #define LTSSM_L0 0x10 #define RC_BAR_CONFIG 0x300 +#define VENDOR_ID_REG (LMI_BASE_ADDR + 0x44) /* PCIe core controller registers */ #define CTRL_CORE_BASE_ADDR 0x18000 @@ -385,6 +386,16 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg |= (IS_RC_MSK << IS_RC_SHIFT); advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); + /* + * Replace incorrect PCI vendor id value 0x1b4b by correct value 0x11ab. + * VENDOR_ID_REG contains vendor id in low 16 bits and subsystem vendor + * id in high 16 bits. Updating this register changes readback value of + * read-only vendor id bits in PCIE_CORE_DEV_ID_REG register. Workaround + * for erratum 4.1: "The value of device and vendor ID is incorrect". + */ + reg = (PCI_VENDOR_ID_MARVELL << 16) | PCI_VENDOR_ID_MARVELL; + advk_writel(pcie, reg, VENDOR_ID_REG); + /* Set Advanced Error Capabilities and Control PF0 register */ reg = PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX | PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN | From f67092eff2bd40650aad54a1a1910160f41d864a Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 18 Jun 2021 17:02:19 +0100 Subject: [PATCH 42/48] PCI: tegra194: Fix tegra_pcie_ep_raise_msi_irq() ill-defined shift tegra_pcie_ep_raise_msi_irq() shifted a signed 32-bit value left by 31 bits. The behavior of this is implementation-defined. Replace the shift by BIT(), which is well-defined. Found by cppcheck: $ cppcheck --enable=all drivers/pci/controller/dwc/pcie-tegra194.c Checking drivers/pci/controller/dwc/pcie-tegra194.c ... drivers/pci/controller/dwc/pcie-tegra194.c:1829:23: portability: Shifting signed 32-bit value by 31 bits is implementation-defined behaviour. See condition at line 1826. [shiftTooManyBitsSigned] appl_writel(pcie, (1 << irq), APPL_MSI_CTRL_1); ^ [bhelgaas: commit log] Link: https://lore.kernel.org/r/20210618160219.303092-1-jonathanh@nvidia.com Fixes: c57247f940e8 ("PCI: tegra: Add support for PCIe endpoint mode in Tegra194") Signed-off-by: Jon Hunter Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/dwc/pcie-tegra194.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index b19775ab134e..5a64c6139e6c 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -1926,7 +1926,7 @@ static int tegra_pcie_ep_raise_msi_irq(struct tegra_pcie_dw *pcie, u16 irq) if (unlikely(irq > 31)) return -EINVAL; - appl_writel(pcie, (1 << irq), APPL_MSI_CTRL_1); + appl_writel(pcie, BIT(irq), APPL_MSI_CTRL_1); return 0; } From 3cf5f7ab230e2b886e493c7a8449ed50e29d2b98 Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Tue, 8 Jun 2021 10:04:09 +0200 Subject: [PATCH 43/48] PCI: rockchip: Register IRQ handlers after device and data are ready An IRQ handler may be called at any time after it is registered, so anything it relies on must be ready before registration. rockchip_pcie_subsys_irq_handler() and rockchip_pcie_client_irq_handler() read registers in the PCIe controller, but we registered them before turning on clocks to the controller. If either is called before the clocks are turned on, the register reads fail and the machine hangs. Similarly, rockchip_pcie_legacy_int_handler() uses rockchip->irq_domain, but we installed it before initializing irq_domain. Register IRQ handlers after their data structures are initialized and clocks are enabled. Found by enabling CONFIG_DEBUG_SHIRQ, which calls the IRQ handler when it is being unregistered. An error during the probe path might cause this unregistration and IRQ handler execution before the device or data structure init has finished. [bhelgaas: commit log] Link: https://lore.kernel.org/r/20210608080409.1729276-1-javierm@redhat.com Reported-by: Peter Robinson Tested-by: Peter Robinson Signed-off-by: Javier Martinez Canillas Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Acked-by: Shawn Lin --- drivers/pci/controller/pcie-rockchip-host.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index f1d08a1b1591..78d04ac29cd5 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -592,10 +592,6 @@ static int rockchip_pcie_parse_host_dt(struct rockchip_pcie *rockchip) if (err) return err; - err = rockchip_pcie_setup_irq(rockchip); - if (err) - return err; - rockchip->vpcie12v = devm_regulator_get_optional(dev, "vpcie12v"); if (IS_ERR(rockchip->vpcie12v)) { if (PTR_ERR(rockchip->vpcie12v) != -ENODEV) @@ -973,8 +969,6 @@ static int rockchip_pcie_probe(struct platform_device *pdev) if (err) goto err_vpcie; - rockchip_pcie_enable_interrupts(rockchip); - err = rockchip_pcie_init_irq_domain(rockchip); if (err < 0) goto err_deinit_port; @@ -992,6 +986,12 @@ static int rockchip_pcie_probe(struct platform_device *pdev) bridge->sysdata = rockchip; bridge->ops = &rockchip_pcie_ops; + err = rockchip_pcie_setup_irq(rockchip); + if (err) + goto err_remove_irq_domain; + + rockchip_pcie_enable_interrupts(rockchip); + err = pci_host_probe(bridge); if (err < 0) goto err_remove_irq_domain; From 4db221f6983aadd6d1975bab170f089afd6fd8d6 Mon Sep 17 00:00:00 2001 From: Joyce Ooi Date: Thu, 1 Jul 2021 14:52:47 +0800 Subject: [PATCH 44/48] MAINTAINERS: Add Joyce Ooi as Altera PCIe maintainer Ley Foon Tan has moved to a different role, so add Joyce Ooi as Altera PCIe maintainer. The rfi@lists.rocketboards.org mailing list seems to be dead, so drop it. [bhelgaas: drop rfi@lists.rocketboards.org] Link: https://lore.kernel.org/r/20210701065247.152292-1-joyce.ooi@intel.com Signed-off-by: Joyce Ooi Signed-off-by: Bjorn Helgaas --- MAINTAINERS | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f..2f2fb63246c3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13942,8 +13942,7 @@ F: Documentation/devicetree/bindings/pci/aardvark-pci.txt F: drivers/pci/controller/pci-aardvark.c PCI DRIVER FOR ALTERA PCIE IP -M: Ley Foon Tan -L: rfi@lists.rocketboards.org (moderated for non-subscribers) +M: Joyce Ooi L: linux-pci@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/pci/altera-pcie.txt @@ -14140,8 +14139,7 @@ S: Supported F: Documentation/PCI/pci-error-recovery.rst PCI MSI DRIVER FOR ALTERA MSI IP -M: Ley Foon Tan -L: rfi@lists.rocketboards.org (moderated for non-subscribers) +M: Joyce Ooi L: linux-pci@vger.kernel.org S: Supported F: Documentation/devicetree/bindings/pci/altera-pcie-msi.txt From 6d71cc4c91d856f05d9f175fba866616dd1a7d1f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Thu, 1 Jul 2021 18:43:06 +0000 Subject: [PATCH 45/48] PCI: cpcihp: Declare cpci_debug in header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cpci_debug is declared as a global variable in cpci_hotplug_core.c and used in cpci_hotplug_pci.c via an "extern". Add an extern declaration in the header file. Resolves the following sparse warning: drivers/pci/hotplug/cpci_hotplug_core.c:47:5: warning: symbol 'cpci_debug' was not declared. Should it be static? Link: https://lore.kernel.org/r/20210701184306.1492003-1-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/hotplug/cpci_hotplug.h | 3 +++ drivers/pci/hotplug/cpci_hotplug_pci.c | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h index f33ff2bca414..3fdd1b9bd8c3 100644 --- a/drivers/pci/hotplug/cpci_hotplug.h +++ b/drivers/pci/hotplug/cpci_hotplug.h @@ -75,6 +75,9 @@ int cpci_hp_unregister_bus(struct pci_bus *bus); int cpci_hp_start(void); int cpci_hp_stop(void); +/* Global variables */ +extern int cpci_debug; + /* * Internal function prototypes, these functions should not be used by * board/chassis drivers. diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c index 2c16adb7f4ec..6c48066acb44 100644 --- a/drivers/pci/hotplug/cpci_hotplug_pci.c +++ b/drivers/pci/hotplug/cpci_hotplug_pci.c @@ -19,8 +19,6 @@ #define MY_NAME "cpci_hotplug" -extern int cpci_debug; - #define dbg(format, arg...) \ do { \ if (cpci_debug) \ From 347269c113f10fbe893f11dd3ae5f44aa15d3111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wilczy=C5=84ski?= Date: Sat, 3 Jul 2021 15:13:02 +0000 Subject: [PATCH 46/48] PCI: Fix kernel-doc formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix kernel-doc formatting throughout drivers/pci and related include files. No change to functionality intended. Check for warnings: $ find include drivers/pci -type f -path "*pci*.[ch]" | xargs scripts/kernel-doc -none [bhelgaas: squashed to one commit] Link: https://lore.kernel.org/r/20210509030237.368540-1-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-1-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-2-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-3-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-4-kw@linux.com Link: https://lore.kernel.org/r/20210703151306.1922450-5-kw@linux.com Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/cadence/pcie-cadence.h | 7 ++++-- drivers/pci/controller/pci-xgene.c | 2 +- drivers/pci/controller/pcie-iproc-msi.c | 4 ++-- drivers/pci/controller/pcie-iproc.c | 24 +++++++++---------- drivers/pci/controller/pcie-iproc.h | 16 +++++++++---- drivers/pci/hotplug/cpqphp_core.c | 7 +++--- drivers/pci/hotplug/cpqphp_ctrl.c | 2 +- drivers/pci/hotplug/pciehp.h | 3 +++ drivers/pci/pci.h | 4 ++-- include/linux/pci-ep-cfs.h | 2 +- include/linux/pci-epc.h | 5 +++- include/linux/pci-epf.h | 5 +++- include/linux/pci_hotplug.h | 2 ++ include/uapi/linux/pcitest.h | 2 +- 14 files changed, 52 insertions(+), 33 deletions(-) diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h index 254d2570f8c9..30db2d68c17a 100644 --- a/drivers/pci/controller/cadence/pcie-cadence.h +++ b/drivers/pci/controller/cadence/pcie-cadence.h @@ -263,9 +263,12 @@ struct cdns_pcie_ops { * struct cdns_pcie - private data for Cadence PCIe controller drivers * @reg_base: IO mapped register base * @mem_res: start/end offsets in the physical system memory to map PCI accesses + * @dev: PCIe controller * @is_rc: tell whether the PCIe controller mode is Root Complex or Endpoint. - * @bus: In Root Complex mode, the bus number - * @ops: Platform specific ops to control various inputs from Cadence PCIe + * @phy_count: number of supported PHY devices + * @phy: list of pointers to specific PHY control blocks + * @link: list of pointers to corresponding device link representations + * @ops: Platform-specific ops to control various inputs from Cadence PCIe * wrapper */ struct cdns_pcie { diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 7f503dd4ff81..8dc5140dd80d 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0+ -/** +/* * APM X-Gene PCIe Driver * * Copyright (c) 2014 Applied Micro Circuits Corporation. diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index eede4e8f3f75..21d28dc0b901 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -49,7 +49,7 @@ enum iproc_msi_reg { struct iproc_msi; /** - * iProc MSI group + * struct iproc_msi_grp - iProc MSI group * * One MSI group is allocated per GIC interrupt, serviced by one iProc MSI * event queue. @@ -65,7 +65,7 @@ struct iproc_msi_grp { }; /** - * iProc event queue based MSI + * struct iproc_msi - iProc event queue based MSI * * Only meant to be used on platforms without MSI support integrated into the * GIC. diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 02e52f698eeb..30ac5fbefbbf 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -89,8 +89,8 @@ #define IPROC_PCIE_REG_INVALID 0xffff /** - * iProc PCIe outbound mapping controller specific parameters - * + * struct iproc_pcie_ob_map - iProc PCIe outbound mapping controller-specific + * parameters * @window_sizes: list of supported outbound mapping window sizes in MB * @nr_sizes: number of supported outbound mapping window sizes */ @@ -136,22 +136,20 @@ static const struct iproc_pcie_ob_map paxb_v2_ob_map[] = { }; /** - * iProc PCIe inbound mapping type + * enum iproc_pcie_ib_map_type - iProc PCIe inbound mapping type + * @IPROC_PCIE_IB_MAP_MEM: DDR memory + * @IPROC_PCIE_IB_MAP_IO: device I/O memory + * @IPROC_PCIE_IB_MAP_INVALID: invalid or unused */ enum iproc_pcie_ib_map_type { - /* for DDR memory */ IPROC_PCIE_IB_MAP_MEM = 0, - - /* for device I/O memory */ IPROC_PCIE_IB_MAP_IO, - - /* invalid or unused */ IPROC_PCIE_IB_MAP_INVALID }; /** - * iProc PCIe inbound mapping controller specific parameters - * + * struct iproc_pcie_ib_map - iProc PCIe inbound mapping controller-specific + * parameters * @type: inbound mapping region type * @size_unit: inbound mapping region size unit, could be SZ_1K, SZ_1M, or * SZ_1G @@ -437,7 +435,7 @@ static inline void iproc_pcie_write_reg(struct iproc_pcie *pcie, writel(val, pcie->base + offset); } -/** +/* * APB error forwarding can be disabled during access of configuration * registers of the endpoint device, to prevent unsupported requests * (typically seen during enumeration with multi-function devices) from @@ -619,7 +617,7 @@ static int iproc_pcie_config_read(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } -/** +/* * Note access to the configuration registers are protected at the higher layer * by 'pci_lock' in drivers/pci/access.c */ @@ -897,7 +895,7 @@ static inline int iproc_pcie_ob_write(struct iproc_pcie *pcie, int window_idx, return 0; } -/** +/* * Some iProc SoCs require the SW to configure the outbound address mapping * * Outbound address translation: diff --git a/drivers/pci/controller/pcie-iproc.h b/drivers/pci/controller/pcie-iproc.h index c2676e442f55..dcca315897c8 100644 --- a/drivers/pci/controller/pcie-iproc.h +++ b/drivers/pci/controller/pcie-iproc.h @@ -7,7 +7,13 @@ #define _PCIE_IPROC_H /** - * iProc PCIe interface type + * enum iproc_pcie_type - iProc PCIe interface type + * @IPROC_PCIE_PAXB_BCMA: BCMA-based host controllers + * @IPROC_PCIE_PAXB: PAXB-based host controllers for + * NS, NSP, Cygnus, NS2, and Pegasus SOCs + * @IPROC_PCIE_PAXB_V2: PAXB-based host controllers for Stingray SoCs + * @IPROC_PCIE_PAXC: PAXC-based host controllers + * @IPROC_PCIE_PAXC_V2: PAXC-based host controllers (second generation) * * PAXB is the wrapper used in root complex that can be connected to an * external endpoint device. @@ -24,7 +30,7 @@ enum iproc_pcie_type { }; /** - * iProc PCIe outbound mapping + * struct iproc_pcie_ob - iProc PCIe outbound mapping * @axi_offset: offset from the AXI address to the internal address used by * the iProc PCIe core * @nr_windows: total number of supported outbound mapping windows @@ -35,7 +41,7 @@ struct iproc_pcie_ob { }; /** - * iProc PCIe inbound mapping + * struct iproc_pcie_ib - iProc PCIe inbound mapping * @nr_regions: total number of supported inbound mapping regions */ struct iproc_pcie_ib { @@ -47,13 +53,13 @@ struct iproc_pcie_ib_map; struct iproc_msi; /** - * iProc PCIe device - * + * struct iproc_pcie - iProc PCIe device * @dev: pointer to device data structure * @type: iProc PCIe interface type * @reg_offsets: register offsets * @base: PCIe host controller I/O register base * @base_addr: PCIe host controller register base physical address + * @mem: host bridge memory window resource * @phy: optional PHY device that controls the Serdes * @map_irq: function callback to map interrupts * @ep_is_internal: indicates an internal emulated endpoint device is connected diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c index b8aacb41a83c..f99a7927e5a8 100644 --- a/drivers/pci/hotplug/cpqphp_core.c +++ b/drivers/pci/hotplug/cpqphp_core.c @@ -296,9 +296,10 @@ static int ctrl_slot_cleanup(struct controller *ctrl) * * Won't work for more than one PCI-PCI bridge in a slot. * - * @bus_num - bus number of PCI device - * @dev_num - device number of PCI device - * @slot - Pointer to u8 where slot number will be returned + * @bus: pointer to the PCI bus structure + * @bus_num: bus number of PCI device + * @dev_num: device number of PCI device + * @slot: Pointer to u8 where slot number will be returned * * Output: SUCCESS or FAILURE */ diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c index 68de958a9be8..1b26ca0b3701 100644 --- a/drivers/pci/hotplug/cpqphp_ctrl.c +++ b/drivers/pci/hotplug/cpqphp_ctrl.c @@ -1877,7 +1877,7 @@ static void interrupt_event_handler(struct controller *ctrl) /** * cpqhp_pushbutton_thread - handle pushbutton events - * @slot: target slot (struct) + * @t: pointer to struct timer_list which holds all timer-related callbacks * * Scheduled procedure to handle blocking stuff for the pushbuttons. * Handles all pending events and exits. diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 4fd200d8b0a9..d4a930881054 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -47,6 +47,9 @@ extern int pciehp_poll_time; * struct controller - PCIe hotplug controller * @pcie: pointer to the controller's PCIe port service device * @slot_cap: cached copy of the Slot Capabilities register + * @inband_presence_disabled: In-Band Presence Detect Disable supported by + * controller and disabled per spec recommendation (PCIe r5.0, appendix I + * implementation note) * @slot_ctrl: cached copy of the Slot Control register * @ctrl_lock: serializes writes to the Slot Control register * @cmd_started: jiffies when the Slot Control register was last written; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 37c913bbc6e1..8b385eaf2043 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -324,8 +324,8 @@ struct pci_sriov { /** * pci_dev_set_io_state - Set the new error state if possible. * - * @dev - pci device to set new error_state - * @new - the state we want dev to be in + * @dev: PCI device to set new error_state + * @new: the state we want dev to be in * * Must be called with device_lock held. * diff --git a/include/linux/pci-ep-cfs.h b/include/linux/pci-ep-cfs.h index 662881335c7e..3e2140d7e31d 100644 --- a/include/linux/pci-ep-cfs.h +++ b/include/linux/pci-ep-cfs.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0+ */ -/** +/* * PCI Endpoint ConfigFS header file * * Copyright (C) 2017 Texas Instruments diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index b82c9b100e97..50a649d33e68 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** +/* * PCI Endpoint *Controller* (EPC) header file * * Copyright (C) 2017 Texas Instruments @@ -58,6 +58,7 @@ pci_epc_interface_string(enum pci_epc_interface_type type) * @map_msi_irq: ops to map physical address to MSI address and return MSI data * @start: ops to start the PCI link * @stop: ops to stop the PCI link + * @get_features: ops to get the features supported by the EPC * @owner: the module owner containing the ops */ struct pci_epc_ops { @@ -150,6 +151,8 @@ struct pci_epc { /** * struct pci_epc_features - features supported by a EPC device per function * @linkup_notifier: indicate if the EPC device can notify EPF driver on link up + * @core_init_notifier: indicate cores that can notify about their availability + * for initialization * @msi_capable: indicate if the endpoint function has MSI capability * @msix_capable: indicate if the endpoint function has MSI-X capability * @reserved_bar: bitmap to indicate reserved BAR unavailable to function driver diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 6833e2160ef1..2debc27ba95e 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/** +/* * PCI Endpoint *Function* (EPF) header file * * Copyright (C) 2017 Texas Instruments @@ -102,6 +102,8 @@ struct pci_epf_driver { * @phys_addr: physical address that should be mapped to the BAR * @addr: virtual address corresponding to the @phys_addr * @size: the size of the address space present in BAR + * @barno: BAR number + * @flags: flags that are set for the BAR */ struct pci_epf_bar { dma_addr_t phys_addr; @@ -118,6 +120,7 @@ struct pci_epf_bar { * @header: represents standard configuration header * @bar: represents the BAR of EPF device * @msi_interrupts: number of MSI interrupts required by this function + * @msix_interrupts: number of MSI-X interrupts required by this function * @func_no: unique function number within this endpoint device * @epc: the EPC device to which this EPF device is bound * @driver: the EPF driver to which this EPF device is bound diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h index b482e42d7153..2dac431d94ac 100644 --- a/include/linux/pci_hotplug.h +++ b/include/linux/pci_hotplug.h @@ -50,6 +50,8 @@ struct hotplug_slot_ops { /** * struct hotplug_slot - used to register a physical slot with the hotplug pci core * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot + * @slot_list: internal list used to track hotplug PCI slots + * @pci_slot: represents a physical slot * @owner: The module owner of this structure * @mod_name: The module name (KBUILD_MODNAME) of this structure */ diff --git a/include/uapi/linux/pcitest.h b/include/uapi/linux/pcitest.h index c3ab4c826297..f9c1af8d141b 100644 --- a/include/uapi/linux/pcitest.h +++ b/include/uapi/linux/pcitest.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/** +/* * pcitest.h - PCI test uapi defines * * Copyright (C) 2017 Texas Instruments From 662e4b03431f5304603f1e42c4d4c2c1d64cba40 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 17 May 2021 12:18:39 -0500 Subject: [PATCH 47/48] PCI: xgene: Annotate __iomem pointer "bar_addr" is passed as the argument to writel(), which expects a "void __iomem *". Annotate "bar_addr" correctly. Resolves an sparse "incorrect type in argument 2 (different address spaces)" warning. Link: https://lore.kernel.org/r/202105171809.Tay9fImZ-lkp@intel.com Link: https://lore.kernel.org/r/20210517171839.25777-1-helgaas@kernel.org Reported-by: kernel test robot Signed-off-by: Bjorn Helgaas Signed-off-by: Lorenzo Pieralisi --- drivers/pci/controller/pci-xgene.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 7f503dd4ff81..1a412f5377fb 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -485,7 +485,7 @@ static void xgene_pcie_setup_ib_reg(struct xgene_pcie_port *port, { void __iomem *cfg_base = port->cfg_base; struct device *dev = port->dev; - void *bar_addr; + void __iomem *bar_addr; u32 pim_reg; u64 cpu_addr = entry->res->start; u64 pci_addr = cpu_addr - entry->offset; From ae21f835a5bda0ef1d00940373445693a764d89e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 1 Jul 2021 16:48:23 -0500 Subject: [PATCH 48/48] PCI/P2PDMA: Finish RCU conversion of pdev->p2pdma MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While looking at pci_alloc_p2pmem() I found RCU protection was not properly applied there, as pdev->p2pdma was potentially read multiple times. Fix pci_alloc_p2pmem(), add __rcu qualifier to p2pdma field of struct pci_dev, and fix all other accesses to this field with proper RCU verbs. Link: https://lore.kernel.org/r/20210701095621.3129283-1-eric.dumazet@gmail.com Fixes: 1570175abd16 ("PCI/P2PDMA: track pgmap references per resource, not globally") Signed-off-by: Eric Dumazet Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Logan Gunthorpe Cc: Dan Williams Cc: Ira Weiny Cc: Greg Kroah-Hartman Cc: "Jérôme Glisse" Cc: "Rafael J. Wysocki" --- drivers/pci/p2pdma.c | 97 ++++++++++++++++++++++++++++++++------------ include/linux/pci.h | 2 +- 2 files changed, 73 insertions(+), 26 deletions(-) diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index ca2574debb2d..69c25e71590a 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -48,10 +48,14 @@ static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; size_t size = 0; - if (pdev->p2pdma->pool) - size = gen_pool_size(pdev->p2pdma->pool); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma && p2pdma->pool) + size = gen_pool_size(p2pdma->pool); + rcu_read_unlock(); return scnprintf(buf, PAGE_SIZE, "%zd\n", size); } @@ -61,10 +65,14 @@ static ssize_t available_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; size_t avail = 0; - if (pdev->p2pdma->pool) - avail = gen_pool_avail(pdev->p2pdma->pool); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma && p2pdma->pool) + avail = gen_pool_avail(p2pdma->pool); + rcu_read_unlock(); return scnprintf(buf, PAGE_SIZE, "%zd\n", avail); } @@ -74,9 +82,16 @@ static ssize_t published_show(struct device *dev, struct device_attribute *attr, char *buf) { struct pci_dev *pdev = to_pci_dev(dev); + struct pci_p2pdma *p2pdma; + bool published = false; - return scnprintf(buf, PAGE_SIZE, "%d\n", - pdev->p2pdma->p2pmem_published); + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) + published = p2pdma->p2pmem_published; + rcu_read_unlock(); + + return scnprintf(buf, PAGE_SIZE, "%d\n", published); } static DEVICE_ATTR_RO(published); @@ -95,8 +110,9 @@ static const struct attribute_group p2pmem_group = { static void pci_p2pdma_release(void *data) { struct pci_dev *pdev = data; - struct pci_p2pdma *p2pdma = pdev->p2pdma; + struct pci_p2pdma *p2pdma; + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); if (!p2pdma) return; @@ -128,16 +144,14 @@ static int pci_p2pdma_setup(struct pci_dev *pdev) if (error) goto out_pool_destroy; - pdev->p2pdma = p2p; - error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); if (error) goto out_pool_destroy; + rcu_assign_pointer(pdev->p2pdma, p2p); return 0; out_pool_destroy: - pdev->p2pdma = NULL; gen_pool_destroy(p2p->pool); out: devm_kfree(&pdev->dev, p2p); @@ -159,6 +173,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, { struct pci_p2pdma_pagemap *p2p_pgmap; struct dev_pagemap *pgmap; + struct pci_p2pdma *p2pdma; void *addr; int error; @@ -200,7 +215,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, goto pgmap_free; } - error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + error = gen_pool_add_owner(p2pdma->pool, (unsigned long)addr, pci_bus_address(pdev, bar) + offset, range_len(&pgmap->range), dev_to_node(&pdev->dev), pgmap->ref); @@ -437,6 +453,7 @@ calc_map_type_and_dist(struct pci_dev *provider, struct pci_dev *client, enum pci_p2pdma_map_type map_type = PCI_P2PDMA_MAP_THRU_HOST_BRIDGE; struct pci_dev *a = provider, *b = client, *bb; bool acs_redirects = false; + struct pci_p2pdma *p2pdma; struct seq_buf acs_list; int acs_cnt = 0; int dist_a = 0; @@ -515,9 +532,12 @@ map_through_host_bridge: map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED; } done: - if (provider->p2pdma) - xa_store(&provider->p2pdma->map_types, map_types_idx(client), + rcu_read_lock(); + p2pdma = rcu_dereference(provider->p2pdma); + if (p2pdma) + xa_store(&p2pdma->map_types, map_types_idx(client), xa_mk_value(map_type), GFP_KERNEL); + rcu_read_unlock(); return map_type; } @@ -586,7 +606,15 @@ EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many); */ bool pci_has_p2pmem(struct pci_dev *pdev) { - return pdev->p2pdma && pdev->p2pdma->p2pmem_published; + struct pci_p2pdma *p2pdma; + bool res; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + res = p2pdma && p2pdma->p2pmem_published; + rcu_read_unlock(); + + return res; } EXPORT_SYMBOL_GPL(pci_has_p2pmem); @@ -666,6 +694,7 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) { void *ret = NULL; struct percpu_ref *ref; + struct pci_p2pdma *p2pdma; /* * Pairs with synchronize_rcu() in pci_p2pdma_release() to @@ -673,16 +702,16 @@ void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size) * read-lock. */ rcu_read_lock(); - if (unlikely(!pdev->p2pdma)) + p2pdma = rcu_dereference(pdev->p2pdma); + if (unlikely(!p2pdma)) goto out; - ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size, - (void **) &ref); + ret = (void *)gen_pool_alloc_owner(p2pdma->pool, size, (void **) &ref); if (!ret) goto out; if (unlikely(!percpu_ref_tryget_live(ref))) { - gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size); + gen_pool_free(p2pdma->pool, (unsigned long) ret, size); ret = NULL; goto out; } @@ -701,8 +730,9 @@ EXPORT_SYMBOL_GPL(pci_alloc_p2pmem); void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size) { struct percpu_ref *ref; + struct pci_p2pdma *p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); - gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size, + gen_pool_free_owner(p2pdma->pool, (uintptr_t)addr, size, (void **) &ref); percpu_ref_put(ref); } @@ -716,9 +746,13 @@ EXPORT_SYMBOL_GPL(pci_free_p2pmem); */ pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) { + struct pci_p2pdma *p2pdma; + if (!addr) return 0; - if (!pdev->p2pdma) + + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + if (!p2pdma) return 0; /* @@ -726,7 +760,7 @@ pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr) * bus address as the physical address. So gen_pool_virt_to_phys() * actually returns the bus address despite the misleading name. */ - return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr); + return gen_pool_virt_to_phys(p2pdma->pool, (unsigned long)addr); } EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus); @@ -797,16 +831,23 @@ EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl); */ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) { - if (pdev->p2pdma) - pdev->p2pdma->p2pmem_published = publish; + struct pci_p2pdma *p2pdma; + + rcu_read_lock(); + p2pdma = rcu_dereference(pdev->p2pdma); + if (p2pdma) + p2pdma->p2pmem_published = publish; + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, struct device *dev) { + enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; struct pci_dev *client; + struct pci_p2pdma *p2pdma; if (!provider->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; @@ -816,8 +857,14 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, client = to_pci_dev(dev); - return xa_to_value(xa_load(&provider->p2pdma->map_types, - map_types_idx(client))); + rcu_read_lock(); + p2pdma = rcu_dereference(provider->p2pdma); + + if (p2pdma) + type = xa_to_value(xa_load(&p2pdma->map_types, + map_types_idx(client))); + rcu_read_unlock(); + return type; } static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap, diff --git a/include/linux/pci.h b/include/linux/pci.h index c20211e59a57..58a39c7239f3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -497,7 +497,7 @@ struct pci_dev { u16 pasid_features; #endif #ifdef CONFIG_PCI_P2PDMA - struct pci_p2pdma *p2pdma; + struct pci_p2pdma __rcu *p2pdma; #endif u16 acs_cap; /* ACS Capability offset */ phys_addr_t rom; /* Physical address if not from BAR */