linux/drivers/pci/hotplug/pciehp_hpc.c

893 lines
24 KiB
C
Raw Normal View History

/*
* PCI Express PCI Hot Plug Driver
*
* Copyright (C) 1995,2001 Compaq Computer Corporation
* Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
* Copyright (C) 2001 IBM Corp.
* Copyright (C) 2003-2004 Intel Corporation
*
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* Send feedback to <greg@kroah.com>,<kristen.c.accardi@intel.com>
*
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/signal.h>
#include <linux/jiffies.h>
#include <linux/timer.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/time.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include "../pci.h"
#include "pciehp.h"
static inline struct pci_dev *ctrl_dev(struct controller *ctrl)
{
return ctrl->pcie->port;
}
static irqreturn_t pcie_isr(int irq, void *dev_id);
static void start_int_poll_timer(struct controller *ctrl, int sec);
/* This is the interrupt polling timeout function. */
static void int_poll_timeout(unsigned long data)
{
struct controller *ctrl = (struct controller *)data;
/* Poll for interrupt events. regs == NULL => polling */
pcie_isr(0, ctrl);
init_timer(&ctrl->poll_timer);
if (!pciehp_poll_time)
pciehp_poll_time = 2; /* default polling interval is 2 sec */
start_int_poll_timer(ctrl, pciehp_poll_time);
}
/* This function starts the interrupt polling timer. */
static void start_int_poll_timer(struct controller *ctrl, int sec)
{
/* Clamp to sane value */
if ((sec <= 0) || (sec > 60))
sec = 2;
ctrl->poll_timer.function = &int_poll_timeout;
ctrl->poll_timer.data = (unsigned long)ctrl;
ctrl->poll_timer.expires = jiffies + sec * HZ;
add_timer(&ctrl->poll_timer);
}
static inline int pciehp_request_irq(struct controller *ctrl)
{
int retval, irq = ctrl->pcie->irq;
/* Install interrupt polling timer. Start with 10 sec delay */
if (pciehp_poll_mode) {
init_timer(&ctrl->poll_timer);
start_int_poll_timer(ctrl, 10);
return 0;
}
/* Installs the interrupt handler */
retval = request_irq(irq, pcie_isr, IRQF_SHARED, MY_NAME, ctrl);
if (retval)
ctrl_err(ctrl, "Cannot get irq %d for the hotplug controller\n",
irq);
return retval;
}
static inline void pciehp_free_irq(struct controller *ctrl)
{
if (pciehp_poll_mode)
del_timer_sync(&ctrl->poll_timer);
else
free_irq(ctrl->pcie->irq, ctrl);
}
static int pcie_poll_cmd(struct controller *ctrl, int timeout)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
while (true) {
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
PCI: pciehp: Handle invalid data when reading from non-existent devices It's platform-dependent, but an MMIO read to a non-existent PCI device generally returns data with all bits set. This happens when the host bridge or Root Complex times out waiting for a response from the device and fabricates return data to complete the CPU's read. One example, reported in the bugzilla below, involved this hierarchy: pci 0000:00:1c.0: PCI bridge to [bus 02-3a] Root Port pci 0000:02:00.0: PCI bridge to [bus 03-0a] Upstream Port pci 0000:03:03.0: PCI bridge to [bus 05-07] Downstream Port pci 0000:05:00.0: PCI bridge to [bus 06-07] Thunderbolt Upstream Port pci 0000:06:00.0: PCI bridge to [bus 07] Thunderbolt Downstream Port pci 0000:07:00.0: BCM57762 NIC Unplugging the Thunderbolt switch and the NIC below it resulted in this: pciehp 0000:03:03.0: Surprise Removal tg3 0000:07:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE=ffffffff pciehp 0000:06:00.0: unloading service driver pciehp pciehp 0000:06:00.0: pcie_isr: intr_loc 11f pciehp 0000:06:00.0: Switch interrupt received pciehp 0000:06:00.0: Latch open on Slot pciehp 0000:06:00.0: Attention button interrupt received pciehp 0000:06:00.0: Button pressed on Slot pciehp 0000:06:00.0: Presence/Notify input change pciehp 0000:06:00.0: Card present on Slot pciehp 0000:06:00.0: Power fault interrupt received pciehp 0000:06:00.0: Data Link Layer State change pciehp 0000:06:00.0: Link Up event The pciehp driver correctly noticed that the Thunderbolt switch (05:00.0 and 06:00.0) and NIC (07:00.0) had been removed, and it called their driver remove methods. Since the NIC was already gone, tg3 received 0xffffffff when it tried to read from the device. The resulting timeout is a tg3 issue and not of interest here. Similarly, since the 06:00.0 Thunderbolt switch was already gone, pcie_isr() received 0xffff when it tried to read PCI_EXP_SLTSTA, and pciehp thought that was valid status showing that many events had happened: the latch had been opened, the attention button had been pressed, a card was now present, and the link was now up. These are all wrong, of course, but pciehp went on to try to power up and enumerate devices below the non-existent bridge: pciehp 0000:06:00.0: PCI slot - powering on due to button press pciehp 0000:06:00.0: Surprise Insertion pci 0000:07:00.0 id reading try 50 times with interval 20 ms to get ffffffff [bhelgaas: changelog, also check in pcie_poll_cmd() & pcie_do_write_cmd()] Link: https://bugzilla.kernel.org/show_bug.cgi?id=99841 Suggested-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-07-22 00:25:30 +08:00
if (slot_status == (u16) ~0) {
ctrl_info(ctrl, "%s: no response from device\n",
__func__);
return 0;
}
if (slot_status & PCI_EXP_SLTSTA_CC) {
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_CC);
return 1;
}
if (timeout < 0)
break;
msleep(10);
timeout -= 10;
}
return 0; /* timeout */
}
static void pcie_wait_cmd(struct controller *ctrl)
{
unsigned int msecs = pciehp_poll_mode ? 2500 : 1000;
unsigned long duration = msecs_to_jiffies(msecs);
unsigned long cmd_timeout = ctrl->cmd_started + duration;
unsigned long now, timeout;
int rc;
/*
* If the controller does not generate notifications for command
* completions, we never need to wait between writes.
*/
if (NO_CMD_CMPL(ctrl))
return;
if (!ctrl->cmd_busy)
return;
/*
* Even if the command has already timed out, we want to call
* pcie_poll_cmd() so it can clear PCI_EXP_SLTSTA_CC.
*/
now = jiffies;
if (time_before_eq(cmd_timeout, now))
timeout = 1;
else
timeout = cmd_timeout - now;
if (ctrl->slot_ctrl & PCI_EXP_SLTCTL_HPIE &&
ctrl->slot_ctrl & PCI_EXP_SLTCTL_CCIE)
rc = wait_event_timeout(ctrl->queue, !ctrl->cmd_busy, timeout);
else
rc = pcie_poll_cmd(ctrl, jiffies_to_msecs(timeout));
/*
* Controllers with errata like Intel CF118 don't generate
* completion notifications unless the power/indicator/interlock
* control bits are changed. On such controllers, we'll emit this
* timeout message when we wait for completion of commands that
* don't change those bits, e.g., commands that merely enable
* interrupts.
*/
if (!rc)
ctrl_info(ctrl, "Timeout on hotplug command %#06x (issued %u msec ago)\n",
ctrl->slot_ctrl,
jiffies_to_msecs(jiffies - ctrl->cmd_started));
}
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
static void pcie_do_write_cmd(struct controller *ctrl, u16 cmd,
u16 mask, bool wait)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl;
mutex_lock(&ctrl->ctrl_lock);
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
/*
* Always wait for any previous command that might still be in progress
*/
PCI: pciehp: Wait for hotplug command completion lazily Previously we issued a hotplug command and waited for it to complete. But there's no need to wait until we're ready to issue the *next* command. The next command will probably be much later, so the first one may have already completed and we may not have to actually wait at all. Because of hardware errata, some controllers generate command completion events for some commands but not others. In the case of Intel CF118 (see spec update reference), the controller indicates command completion only for Slot Control writes that change the value of the following bits: Power Controller Control Power Indicator Control Attention Indicator Control Electromechanical Interlock Control Changes to other bits, e.g., the interrupt enable bits, do not cause the Command Completed bit to be set. Controllers from AMD and Nvidia are reported to have similar errata. These errata cause timeouts when pcie_enable_notification() enables interrupts. Previously that timeout occurred at boot-time. With this change, the timeout occurs later, when we change the state of the slot power, indicators, or interlock. This speeds up boot but causes a timeout at the first hotplug event on the slot. Subsequent events don't timeout because only the first (boot-time) hotplug command updates Slot Control without touching the power/indicator/interlock controls. Link: http://www.intel.com/content/www/us/en/processors/xeon/xeon-e7-v2-spec-update.html Tested-by: Rajat Jain <rajatxjain@gmail.com> (IDT 807a controller) Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Yinghai Lu <yinghai@kernel.org>
2014-06-14 05:06:40 +08:00
pcie_wait_cmd(ctrl);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
PCI: pciehp: Handle invalid data when reading from non-existent devices It's platform-dependent, but an MMIO read to a non-existent PCI device generally returns data with all bits set. This happens when the host bridge or Root Complex times out waiting for a response from the device and fabricates return data to complete the CPU's read. One example, reported in the bugzilla below, involved this hierarchy: pci 0000:00:1c.0: PCI bridge to [bus 02-3a] Root Port pci 0000:02:00.0: PCI bridge to [bus 03-0a] Upstream Port pci 0000:03:03.0: PCI bridge to [bus 05-07] Downstream Port pci 0000:05:00.0: PCI bridge to [bus 06-07] Thunderbolt Upstream Port pci 0000:06:00.0: PCI bridge to [bus 07] Thunderbolt Downstream Port pci 0000:07:00.0: BCM57762 NIC Unplugging the Thunderbolt switch and the NIC below it resulted in this: pciehp 0000:03:03.0: Surprise Removal tg3 0000:07:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE=ffffffff pciehp 0000:06:00.0: unloading service driver pciehp pciehp 0000:06:00.0: pcie_isr: intr_loc 11f pciehp 0000:06:00.0: Switch interrupt received pciehp 0000:06:00.0: Latch open on Slot pciehp 0000:06:00.0: Attention button interrupt received pciehp 0000:06:00.0: Button pressed on Slot pciehp 0000:06:00.0: Presence/Notify input change pciehp 0000:06:00.0: Card present on Slot pciehp 0000:06:00.0: Power fault interrupt received pciehp 0000:06:00.0: Data Link Layer State change pciehp 0000:06:00.0: Link Up event The pciehp driver correctly noticed that the Thunderbolt switch (05:00.0 and 06:00.0) and NIC (07:00.0) had been removed, and it called their driver remove methods. Since the NIC was already gone, tg3 received 0xffffffff when it tried to read from the device. The resulting timeout is a tg3 issue and not of interest here. Similarly, since the 06:00.0 Thunderbolt switch was already gone, pcie_isr() received 0xffff when it tried to read PCI_EXP_SLTSTA, and pciehp thought that was valid status showing that many events had happened: the latch had been opened, the attention button had been pressed, a card was now present, and the link was now up. These are all wrong, of course, but pciehp went on to try to power up and enumerate devices below the non-existent bridge: pciehp 0000:06:00.0: PCI slot - powering on due to button press pciehp 0000:06:00.0: Surprise Insertion pci 0000:07:00.0 id reading try 50 times with interval 20 ms to get ffffffff [bhelgaas: changelog, also check in pcie_poll_cmd() & pcie_do_write_cmd()] Link: https://bugzilla.kernel.org/show_bug.cgi?id=99841 Suggested-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-07-22 00:25:30 +08:00
if (slot_ctrl == (u16) ~0) {
ctrl_info(ctrl, "%s: no response from device\n", __func__);
goto out;
}
slot_ctrl &= ~mask;
slot_ctrl |= (cmd & mask);
ctrl->cmd_busy = 1;
smp_mb();
pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, slot_ctrl);
ctrl->cmd_started = jiffies;
ctrl->slot_ctrl = slot_ctrl;
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
/*
* Optionally wait for the hardware to be ready for a new command,
* indicating completion of the above issued command.
*/
if (wait)
pcie_wait_cmd(ctrl);
PCI: pciehp: Handle invalid data when reading from non-existent devices It's platform-dependent, but an MMIO read to a non-existent PCI device generally returns data with all bits set. This happens when the host bridge or Root Complex times out waiting for a response from the device and fabricates return data to complete the CPU's read. One example, reported in the bugzilla below, involved this hierarchy: pci 0000:00:1c.0: PCI bridge to [bus 02-3a] Root Port pci 0000:02:00.0: PCI bridge to [bus 03-0a] Upstream Port pci 0000:03:03.0: PCI bridge to [bus 05-07] Downstream Port pci 0000:05:00.0: PCI bridge to [bus 06-07] Thunderbolt Upstream Port pci 0000:06:00.0: PCI bridge to [bus 07] Thunderbolt Downstream Port pci 0000:07:00.0: BCM57762 NIC Unplugging the Thunderbolt switch and the NIC below it resulted in this: pciehp 0000:03:03.0: Surprise Removal tg3 0000:07:00.0: tg3_abort_hw timed out, TX_MODE_ENABLE will not clear MAC_TX_MODE=ffffffff pciehp 0000:06:00.0: unloading service driver pciehp pciehp 0000:06:00.0: pcie_isr: intr_loc 11f pciehp 0000:06:00.0: Switch interrupt received pciehp 0000:06:00.0: Latch open on Slot pciehp 0000:06:00.0: Attention button interrupt received pciehp 0000:06:00.0: Button pressed on Slot pciehp 0000:06:00.0: Presence/Notify input change pciehp 0000:06:00.0: Card present on Slot pciehp 0000:06:00.0: Power fault interrupt received pciehp 0000:06:00.0: Data Link Layer State change pciehp 0000:06:00.0: Link Up event The pciehp driver correctly noticed that the Thunderbolt switch (05:00.0 and 06:00.0) and NIC (07:00.0) had been removed, and it called their driver remove methods. Since the NIC was already gone, tg3 received 0xffffffff when it tried to read from the device. The resulting timeout is a tg3 issue and not of interest here. Similarly, since the 06:00.0 Thunderbolt switch was already gone, pcie_isr() received 0xffff when it tried to read PCI_EXP_SLTSTA, and pciehp thought that was valid status showing that many events had happened: the latch had been opened, the attention button had been pressed, a card was now present, and the link was now up. These are all wrong, of course, but pciehp went on to try to power up and enumerate devices below the non-existent bridge: pciehp 0000:06:00.0: PCI slot - powering on due to button press pciehp 0000:06:00.0: Surprise Insertion pci 0000:07:00.0 id reading try 50 times with interval 20 ms to get ffffffff [bhelgaas: changelog, also check in pcie_poll_cmd() & pcie_do_write_cmd()] Link: https://bugzilla.kernel.org/show_bug.cgi?id=99841 Suggested-by: Bjorn Helgaas <bhelgaas@google.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
2015-07-22 00:25:30 +08:00
out:
mutex_unlock(&ctrl->ctrl_lock);
}
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
/**
* pcie_write_cmd - Issue controller command
* @ctrl: controller to which the command is issued
* @cmd: command value written to slot control register
* @mask: bitmask of slot control register to be modified
*/
static void pcie_write_cmd(struct controller *ctrl, u16 cmd, u16 mask)
{
pcie_do_write_cmd(ctrl, cmd, mask, true);
}
/* Same as above without waiting for the hardware to latch */
static void pcie_write_cmd_nowait(struct controller *ctrl, u16 cmd, u16 mask)
{
pcie_do_write_cmd(ctrl, cmd, mask, false);
}
bool pciehp_check_link_active(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 lnk_status;
bool ret;
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
if (ret)
ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
return ret;
}
static void __pcie_wait_link_active(struct controller *ctrl, bool active)
{
int timeout = 1000;
if (pciehp_check_link_active(ctrl) == active)
return;
while (timeout > 0) {
msleep(10);
timeout -= 10;
if (pciehp_check_link_active(ctrl) == active)
return;
}
ctrl_dbg(ctrl, "Data Link Layer Link Active not %s in 1000 msec\n",
active ? "set" : "cleared");
}
static void pcie_wait_link_active(struct controller *ctrl)
{
__pcie_wait_link_active(ctrl, true);
}
static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
{
u32 l;
int count = 0;
int delay = 1000, step = 20;
bool found = false;
do {
found = pci_bus_read_dev_vendor_id(bus, devfn, &l, 0);
count++;
if (found)
break;
msleep(step);
delay -= step;
} while (delay > 0);
if (count > 1 && pciehp_debug)
printk(KERN_DEBUG "pci %04x:%02x:%02x.%d id reading try %d times with interval %d ms to get %08x\n",
pci_domain_nr(bus), bus->number, PCI_SLOT(devfn),
PCI_FUNC(devfn), count, step, l);
return found;
}
int pciehp_check_link_status(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
bool found;
u16 lnk_status;
/*
* Data Link Layer Link Active Reporting must be capable for
* hot-plug capable downstream port. But old controller might
* not implement it. In this case, we wait for 1000 ms.
*/
if (ctrl->link_active_reporting)
pcie_wait_link_active(ctrl);
else
msleep(1000);
/* wait 100ms before read pci conf, and try in 1s */
msleep(100);
found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
PCI_DEVFN(0, 0));
pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
ctrl_dbg(ctrl, "%s: lnk_status = %x\n", __func__, lnk_status);
if ((lnk_status & PCI_EXP_LNKSTA_LT) ||
!(lnk_status & PCI_EXP_LNKSTA_NLW)) {
ctrl_err(ctrl, "link training error: status %#06x\n",
lnk_status);
return -1;
}
pcie_update_link_speed(ctrl->pcie->port->subordinate, lnk_status);
if (!found)
return -1;
return 0;
}
static int __pciehp_link_set(struct controller *ctrl, bool enable)
{
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 lnk_ctrl;
pcie_capability_read_word(pdev, PCI_EXP_LNKCTL, &lnk_ctrl);
if (enable)
lnk_ctrl &= ~PCI_EXP_LNKCTL_LD;
else
lnk_ctrl |= PCI_EXP_LNKCTL_LD;
pcie_capability_write_word(pdev, PCI_EXP_LNKCTL, lnk_ctrl);
ctrl_dbg(ctrl, "%s: lnk_ctrl = %x\n", __func__, lnk_ctrl);
return 0;
}
static int pciehp_link_enable(struct controller *ctrl)
{
return __pciehp_link_set(ctrl, true);
}
int pciehp_get_raw_indicator_status(struct hotplug_slot *hotplug_slot,
u8 *status)
{
struct slot *slot = hotplug_slot->private;
struct pci_dev *pdev = ctrl_dev(slot->ctrl);
u16 slot_ctrl;
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
*status = (slot_ctrl & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6;
return 0;
}
void pciehp_get_attention_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl;
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x, value read %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
switch (slot_ctrl & PCI_EXP_SLTCTL_AIC) {
case PCI_EXP_SLTCTL_ATTN_IND_ON:
*status = 1; /* On */
break;
case PCI_EXP_SLTCTL_ATTN_IND_BLINK:
*status = 2; /* Blink */
break;
case PCI_EXP_SLTCTL_ATTN_IND_OFF:
*status = 0; /* Off */
break;
default:
*status = 0xFF;
break;
}
}
void pciehp_get_power_status(struct slot *slot, u8 *status)
{
struct controller *ctrl = slot->ctrl;
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_ctrl;
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &slot_ctrl);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x value read %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_ctrl);
switch (slot_ctrl & PCI_EXP_SLTCTL_PCC) {
case PCI_EXP_SLTCTL_PWR_ON:
*status = 1; /* On */
break;
case PCI_EXP_SLTCTL_PWR_OFF:
*status = 0; /* Off */
break;
default:
*status = 0xFF;
break;
}
}
void pciehp_get_latch_status(struct slot *slot, u8 *status)
{
struct pci_dev *pdev = ctrl_dev(slot->ctrl);
u16 slot_status;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
*status = !!(slot_status & PCI_EXP_SLTSTA_MRLSS);
}
void pciehp_get_adapter_status(struct slot *slot, u8 *status)
{
struct pci_dev *pdev = ctrl_dev(slot->ctrl);
u16 slot_status;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
*status = !!(slot_status & PCI_EXP_SLTSTA_PDS);
}
int pciehp_query_power_fault(struct slot *slot)
{
struct pci_dev *pdev = ctrl_dev(slot->ctrl);
u16 slot_status;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
return !!(slot_status & PCI_EXP_SLTSTA_PFD);
}
int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
u8 status)
{
struct slot *slot = hotplug_slot->private;
struct controller *ctrl = slot->ctrl;
pcie_write_cmd_nowait(ctrl, status << 6,
PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC);
return 0;
}
void pciehp_set_attention_status(struct slot *slot, u8 value)
{
struct controller *ctrl = slot->ctrl;
u16 slot_cmd;
if (!ATTN_LED(ctrl))
return;
switch (value) {
case 0: /* turn off */
slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_OFF;
break;
case 1: /* turn on */
slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_ON;
break;
case 2: /* turn blink */
slot_cmd = PCI_EXP_SLTCTL_ATTN_IND_BLINK;
break;
default:
return;
}
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
pcie_write_cmd_nowait(ctrl, slot_cmd, PCI_EXP_SLTCTL_AIC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, slot_cmd);
}
void pciehp_green_led_on(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
if (!PWR_LED(ctrl))
return;
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_ON,
PCI_EXP_SLTCTL_PIC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_IND_ON);
}
void pciehp_green_led_off(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
if (!PWR_LED(ctrl))
return;
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF,
PCI_EXP_SLTCTL_PIC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_IND_OFF);
}
void pciehp_green_led_blink(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
if (!PWR_LED(ctrl))
return;
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
pcie_write_cmd_nowait(ctrl, PCI_EXP_SLTCTL_PWR_IND_BLINK,
PCI_EXP_SLTCTL_PIC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_IND_BLINK);
}
int pciehp_power_on_slot(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 slot_status;
int retval;
/* Clear sticky power-fault bit from previous power failures */
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &slot_status);
if (slot_status & PCI_EXP_SLTSTA_PFD)
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_PFD);
ctrl->power_fault_detected = 0;
pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_ON, PCI_EXP_SLTCTL_PCC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_ON);
retval = pciehp_link_enable(ctrl);
if (retval)
ctrl_err(ctrl, "%s: Can not enable the link!\n", __func__);
return retval;
}
void pciehp_power_off_slot(struct slot *slot)
{
struct controller *ctrl = slot->ctrl;
pcie_write_cmd(ctrl, PCI_EXP_SLTCTL_PWR_OFF, PCI_EXP_SLTCTL_PCC);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL,
PCI_EXP_SLTCTL_PWR_OFF);
}
static irqreturn_t pciehp_isr(int irq, void *dev_id)
{
struct controller *ctrl = (struct controller *)dev_id;
struct pci_dev *pdev = ctrl_dev(ctrl);
PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr <shawn.starr@rogers.com> Reported-by: Jose P. <lbdkmjdf@sharklasers.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Rajat Jain <rajatxjain@gmail.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> CC: stable@vger.kernel.org # v3.15+
2014-09-11 03:45:01 +08:00
struct pci_bus *subordinate = pdev->subordinate;
struct pci_dev *dev;
struct slot *slot = ctrl->slot;
u16 status, events;
u8 present;
bool link;
/* Interrupts cannot originate from a controller that's asleep */
if (pdev->current_state == PCI_D3cold)
return IRQ_NONE;
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &status);
if (status == (u16) ~0) {
ctrl_info(ctrl, "%s: no response from device\n", __func__);
return IRQ_NONE;
}
pciehp: Fix interrupt event handlig Current pciehp implementation disables and re-enables hotplug interrupts in its interrupt handler. This operation might be intend to guarantee that interrupts for the events newly occured during previous events are being handled will be successfully generated. But current implementaion has the following prolems. - Current interrupt service routin clears status changes without waiting command completion. Because of this, events might not be cleared properly. - Current interrupt service routine clears status changes caused by disabling or enabling hotplug interrupts itself. This will lose new events that occurs during previous interrupts are being handled. - Current implementation doesn't have any serialization mechanism between the code to wait for command completion and the interrupt handler that clears the command completion events caused by itself. There is clearly race conditions between them, and it may cause the problem that waiting for command completion doesn't work for example. To fix those problems, this patch stops disabling/re-enabling hotplug interrupts in interrupt service routine. Instead of this, this patch re-inspects Slot Status register after clearing what is presumed to be the last bending interrupt in order to guarantee that all interrupt events are serviced. Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2008-04-26 05:38:57 +08:00
/*
* Slot Status contains plain status bits as well as event
* notification bits; right now we only want the event bits.
pciehp: Fix interrupt event handlig Current pciehp implementation disables and re-enables hotplug interrupts in its interrupt handler. This operation might be intend to guarantee that interrupts for the events newly occured during previous events are being handled will be successfully generated. But current implementaion has the following prolems. - Current interrupt service routin clears status changes without waiting command completion. Because of this, events might not be cleared properly. - Current interrupt service routine clears status changes caused by disabling or enabling hotplug interrupts itself. This will lose new events that occurs during previous interrupts are being handled. - Current implementation doesn't have any serialization mechanism between the code to wait for command completion and the interrupt handler that clears the command completion events caused by itself. There is clearly race conditions between them, and it may cause the problem that waiting for command completion doesn't work for example. To fix those problems, this patch stops disabling/re-enabling hotplug interrupts in interrupt service routine. Instead of this, this patch re-inspects Slot Status register after clearing what is presumed to be the last bending interrupt in order to guarantee that all interrupt events are serviced. Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2008-04-26 05:38:57 +08:00
*/
events = status & (PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_CC |
PCI_EXP_SLTSTA_DLLSC);
if (!events)
return IRQ_NONE;
/* Capture link status before clearing interrupts */
if (events & PCI_EXP_SLTSTA_DLLSC)
link = pciehp_check_link_active(ctrl);
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);
ctrl_dbg(ctrl, "pending interrupts %#06x from Slot Status\n", events);
pciehp: Fix interrupt event handlig Current pciehp implementation disables and re-enables hotplug interrupts in its interrupt handler. This operation might be intend to guarantee that interrupts for the events newly occured during previous events are being handled will be successfully generated. But current implementaion has the following prolems. - Current interrupt service routin clears status changes without waiting command completion. Because of this, events might not be cleared properly. - Current interrupt service routine clears status changes caused by disabling or enabling hotplug interrupts itself. This will lose new events that occurs during previous interrupts are being handled. - Current implementation doesn't have any serialization mechanism between the code to wait for command completion and the interrupt handler that clears the command completion events caused by itself. There is clearly race conditions between them, and it may cause the problem that waiting for command completion doesn't work for example. To fix those problems, this patch stops disabling/re-enabling hotplug interrupts in interrupt service routine. Instead of this, this patch re-inspects Slot Status register after clearing what is presumed to be the last bending interrupt in order to guarantee that all interrupt events are serviced. Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2008-04-26 05:38:57 +08:00
/* Check Command Complete Interrupt Pending */
if (events & PCI_EXP_SLTSTA_CC) {
ctrl->cmd_busy = 0;
smp_mb();
wake_up(&ctrl->queue);
}
PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr <shawn.starr@rogers.com> Reported-by: Jose P. <lbdkmjdf@sharklasers.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Rajat Jain <rajatxjain@gmail.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> CC: stable@vger.kernel.org # v3.15+
2014-09-11 03:45:01 +08:00
if (subordinate) {
list_for_each_entry(dev, &subordinate->devices, bus_list) {
if (dev->ignore_hotplug) {
ctrl_dbg(ctrl, "ignoring hotplug event %#06x (%s requested no hotplug)\n",
events, pci_name(dev));
PCI: Add pci_ignore_hotplug() to ignore hotplug events for a device Powering off a hot-pluggable device, e.g., with pci_set_power_state(D3cold), normally generates a hot-remove event that unbinds the driver. Some drivers expect to remain bound to a device even while they power it off and back on again. This can be dangerous, because if the device is removed or replaced while it is powered off, the driver doesn't know that anything changed. But some drivers accept that risk. Add pci_ignore_hotplug() for use by drivers that know their device cannot be removed. Using pci_ignore_hotplug() tells the PCI core that hot-plug events for the device should be ignored. The radeon and nouveau drivers use this to switch between a low-power, integrated GPU and a higher-power, higher-performance discrete GPU. They power off the unused GPU, but they want to remain bound to it. This is a reimplementation of f244d8b623da ("ACPIPHP / radeon / nouveau: Fix VGA switcheroo problem related to hotplug") but extends it to work with both acpiphp and pciehp. This fixes a problem where systems with dual GPUs using the radeon drivers become unusable, freezing every few seconds (see bugzillas below). The resume of the radeon device may also fail, e.g., This fixes problems on dual GPU systems where the radeon driver becomes unusable because of problems while suspending the device, as in bug 79701: [drm] radeon: finishing device. radeon 0000:01:00.0: Userspace still has active objects ! radeon 0000:01:00.0: ffff8800cb4ec288 ffff8800cb4ec000 16384 4294967297 force free ... WARNING: CPU: 0 PID: 67 at /home/apw/COD/linux/drivers/gpu/drm/radeon/radeon_gart.c:234 radeon_gart_unbind+0xd2/0xe0 [radeon]() trying to unbind memory from uninitialized GART ! or while resuming it, as in bug 77261: radeon 0000:01:00.0: ring 0 stalled for more than 10158msec radeon 0000:01:00.0: GPU lockup ... radeon 0000:01:00.0: GPU pci config reset pciehp 0000:00:01.0:pcie04: Card not present on Slot(1-1) radeon 0000:01:00.0: GPU reset succeeded, trying to resume *ERROR* radeon: dpm resume failed radeon 0000:01:00.0: Wait for MC idle timedout ! Link: https://bugzilla.kernel.org/show_bug.cgi?id=77261 Link: https://bugzilla.kernel.org/show_bug.cgi?id=79701 Reported-by: Shawn Starr <shawn.starr@rogers.com> Reported-by: Jose P. <lbdkmjdf@sharklasers.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Rajat Jain <rajatxjain@gmail.com> Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Dave Airlie <airlied@redhat.com> CC: stable@vger.kernel.org # v3.15+
2014-09-11 03:45:01 +08:00
return IRQ_HANDLED;
}
}
}
pciehp: Fix interrupt event handlig Current pciehp implementation disables and re-enables hotplug interrupts in its interrupt handler. This operation might be intend to guarantee that interrupts for the events newly occured during previous events are being handled will be successfully generated. But current implementaion has the following prolems. - Current interrupt service routin clears status changes without waiting command completion. Because of this, events might not be cleared properly. - Current interrupt service routine clears status changes caused by disabling or enabling hotplug interrupts itself. This will lose new events that occurs during previous interrupts are being handled. - Current implementation doesn't have any serialization mechanism between the code to wait for command completion and the interrupt handler that clears the command completion events caused by itself. There is clearly race conditions between them, and it may cause the problem that waiting for command completion doesn't work for example. To fix those problems, this patch stops disabling/re-enabling hotplug interrupts in interrupt service routine. Instead of this, this patch re-inspects Slot Status register after clearing what is presumed to be the last bending interrupt in order to guarantee that all interrupt events are serviced. Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2008-04-26 05:38:57 +08:00
/* Check Attention Button Pressed */
if (events & PCI_EXP_SLTSTA_ABP) {
ctrl_info(ctrl, "Slot(%s): Attention button pressed\n",
slot_name(slot));
pciehp_queue_interrupt_event(slot, INT_BUTTON_PRESS);
}
/*
* Check Link Status Changed at higher precedence than Presence
* Detect Changed. The PDS value may be set to "card present" from
* out-of-band detection, which may be in conflict with a Link Down
* and cause the wrong event to queue.
*/
if (events & PCI_EXP_SLTSTA_DLLSC) {
ctrl_info(ctrl, "Slot(%s): Link %s\n", slot_name(slot),
link ? "Up" : "Down");
pciehp_queue_interrupt_event(slot, link ? INT_LINK_UP :
INT_LINK_DOWN);
} else if (events & PCI_EXP_SLTSTA_PDC) {
present = !!(status & PCI_EXP_SLTSTA_PDS);
ctrl_info(ctrl, "Slot(%s): Card %spresent\n", slot_name(slot),
present ? "" : "not ");
pciehp_queue_interrupt_event(slot, present ? INT_PRESENCE_ON :
INT_PRESENCE_OFF);
}
pciehp: Fix interrupt event handlig Current pciehp implementation disables and re-enables hotplug interrupts in its interrupt handler. This operation might be intend to guarantee that interrupts for the events newly occured during previous events are being handled will be successfully generated. But current implementaion has the following prolems. - Current interrupt service routin clears status changes without waiting command completion. Because of this, events might not be cleared properly. - Current interrupt service routine clears status changes caused by disabling or enabling hotplug interrupts itself. This will lose new events that occurs during previous interrupts are being handled. - Current implementation doesn't have any serialization mechanism between the code to wait for command completion and the interrupt handler that clears the command completion events caused by itself. There is clearly race conditions between them, and it may cause the problem that waiting for command completion doesn't work for example. To fix those problems, this patch stops disabling/re-enabling hotplug interrupts in interrupt service routine. Instead of this, this patch re-inspects Slot Status register after clearing what is presumed to be the last bending interrupt in order to guarantee that all interrupt events are serviced. Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Kristen Carlson Accardi <kristen.c.accardi@intel.com> Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2008-04-26 05:38:57 +08:00
/* Check Power Fault Detected */
if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
ctrl->power_fault_detected = 1;
ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(slot));
pciehp_queue_interrupt_event(slot, INT_POWER_FAULT);
}
return IRQ_HANDLED;
}
static irqreturn_t pcie_isr(int irq, void *dev_id)
{
irqreturn_t rc, handled = IRQ_NONE;
/*
* To guarantee that all interrupt events are serviced, we need to
* re-inspect Slot Status register after clearing what is presumed
* to be the last pending interrupt.
*/
do {
rc = pciehp_isr(irq, dev_id);
if (rc == IRQ_HANDLED)
handled = IRQ_HANDLED;
} while (rc == IRQ_HANDLED);
/* Return IRQ_HANDLED if we handled one or more events */
return handled;
}
void pcie_enable_notification(struct controller *ctrl)
{
u16 cmd, mask;
/*
* TBD: Power fault detected software notification support.
*
* Power fault detected software notification is not enabled
* now, because it caused power fault detected interrupt storm
* on some machines. On those machines, power fault detected
* bit in the slot status register was set again immediately
* when it is cleared in the interrupt service routine, and
* next power fault detected interrupt was notified again.
*/
/*
* Always enable link events: thus link-up and link-down shall
* always be treated as hotplug and unplug respectively. Enable
* presence detect only if Attention Button is not present.
*/
cmd = PCI_EXP_SLTCTL_DLLSCE;
if (ATTN_BUTTN(ctrl))
cmd |= PCI_EXP_SLTCTL_ABPE;
else
cmd |= PCI_EXP_SLTCTL_PDCE;
if (!pciehp_poll_mode)
cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE;
mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
PCI_EXP_SLTCTL_PFDE |
PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
PCI_EXP_SLTCTL_DLLSCE);
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
pcie_write_cmd_nowait(ctrl, cmd, mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, cmd);
}
static void pcie_disable_notification(struct controller *ctrl)
{
u16 mask;
mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
PCI_EXP_SLTCTL_MRLSCE | PCI_EXP_SLTCTL_PFDE |
PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE |
PCI_EXP_SLTCTL_DLLSCE);
pcie_write_cmd(ctrl, 0, mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
}
/*
* pciehp has a 1:1 bus:slot relationship so we ultimately want a secondary
* bus reset of the bridge, but at the same time we want to ensure that it is
* not seen as a hot-unplug, followed by the hot-plug of the device. Thus,
* disable link state notification and presence detection change notification
* momentarily, if we see that they could interfere. Also, clear any spurious
* events after.
*/
int pciehp_reset_slot(struct slot *slot, int probe)
{
struct controller *ctrl = slot->ctrl;
struct pci_dev *pdev = ctrl_dev(ctrl);
u16 stat_mask = 0, ctrl_mask = 0;
if (probe)
return 0;
if (!ATTN_BUTTN(ctrl)) {
ctrl_mask |= PCI_EXP_SLTCTL_PDCE;
stat_mask |= PCI_EXP_SLTSTA_PDC;
}
ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE;
stat_mask |= PCI_EXP_SLTSTA_DLLSC;
pcie_write_cmd(ctrl, 0, ctrl_mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0);
if (pciehp_poll_mode)
del_timer_sync(&ctrl->poll_timer);
pci_reset_bridge_secondary_bus(ctrl->pcie->port);
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask);
PCI: pciehp: Wait for hotplug command completion where necessary The commit referenced below deferred waiting for command completion until the start of the next command, allowing hardware to do the latching asynchronously. Unfortunately, being ready to accept a new command is the only indication we have that the previous command is completed. In cases where we need that state change to be enabled, we must still wait for completion. For instance, pciehp_reset_slot() attempts to disable anything that might generate a surprise hotplug on slots that support presence detection. If we don't wait for those settings to latch before the secondary bus reset, we negate any value in attempting to prevent the spurious hotplug. Create a base function with optional wait and helper functions so that pcie_write_cmd() turns back into the "safe" interface which waits before and after issuing a command and add pcie_write_cmd_nowait(), which eliminates the trailing wait for asynchronous completion. The following functions are returned to their previous behavior: pciehp_power_on_slot pciehp_power_off_slot pcie_disable_notification pciehp_reset_slot The rationale is that pciehp_power_on_slot() enables the link and therefore relies on completion of power-on. pciehp_power_off_slot() and pcie_disable_notification() need a wait because data structures may be freed after these calls and continued signaling from the device would be unexpected. And, of course, pciehp_reset_slot() needs to wait for the scenario outlined above. Fixes: 3461a068661c ("PCI: pciehp: Wait for hotplug command completion lazily") Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org # v3.17+
2015-06-09 07:10:50 +08:00
pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask);
ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__,
pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask);
if (pciehp_poll_mode)
int_poll_timeout(ctrl->poll_timer.data);
return 0;
}
int pcie_init_notification(struct controller *ctrl)
{
if (pciehp_request_irq(ctrl))
return -1;
pcie_enable_notification(ctrl);
ctrl->notification_enabled = 1;
return 0;
}
static void pcie_shutdown_notification(struct controller *ctrl)
{
if (ctrl->notification_enabled) {
pcie_disable_notification(ctrl);
pciehp_free_irq(ctrl);
ctrl->notification_enabled = 0;
}
}
static int pcie_init_slot(struct controller *ctrl)
{
struct slot *slot;
slot = kzalloc(sizeof(*slot), GFP_KERNEL);
if (!slot)
return -ENOMEM;
slot->wq = alloc_workqueue("pciehp-%u", 0, 0, PSN(ctrl));
PCI: pciehp: Use per-slot workqueues to avoid deadlock When we have a hotplug-capable PCIe port with a second hotplug-capable PCIe port below it, removing the device below the upstream port causes a deadlock. The deadlock happens because we use the pciehp_wq workqueue to run pciehp_power_thread(), which uses pciehp_disable_slot() to remove devices below the upstream port. When we remove the downstream PCIe port, we call pciehp_remove(), the pciehp driver's .remove() method. That calls flush_workqueue(pciehp_wq), which deadlocks because the pciehp_power_thread() work item is still running. This patch avoids the deadlock by creating a workqueue for every PCIe port and removing the single shared workqueue. Here's the call path that leads to the deadlock: pciehp_queue_pushbutton_work queue_work(pciehp_wq) # queue pciehp_power_thread ... pciehp_power_thread pciehp_disable_slot remove_board pciehp_unconfigure_device pci_stop_and_remove_bus_device ... pciehp_remove # pciehp driver .remove method pciehp_release_ctrl pcie_cleanup_slot flush_workqueue(pciehp_wq) This is fairly urgent because it can be caused by simply unplugging a Thunderbolt adapter, as reported by Daniel below. [bhelgaas: changelog] Reference: http://lkml.kernel.org/r/CAMVG2ssiRgcTD1bej2tkUUfsWmpL5eNtPcNif9va2-Gzb2u8nQ@mail.gmail.com Reported-and-tested-by: Daniel J Blueman <daniel@quora.org> Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Yijing Wang <wangyijing@huawei.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org
2013-01-11 10:15:54 +08:00
if (!slot->wq)
goto abort;
slot->ctrl = ctrl;
mutex_init(&slot->lock);
mutex_init(&slot->hotplug_lock);
INIT_DELAYED_WORK(&slot->work, pciehp_queue_pushbutton_work);
ctrl->slot = slot;
return 0;
PCI: pciehp: Use per-slot workqueues to avoid deadlock When we have a hotplug-capable PCIe port with a second hotplug-capable PCIe port below it, removing the device below the upstream port causes a deadlock. The deadlock happens because we use the pciehp_wq workqueue to run pciehp_power_thread(), which uses pciehp_disable_slot() to remove devices below the upstream port. When we remove the downstream PCIe port, we call pciehp_remove(), the pciehp driver's .remove() method. That calls flush_workqueue(pciehp_wq), which deadlocks because the pciehp_power_thread() work item is still running. This patch avoids the deadlock by creating a workqueue for every PCIe port and removing the single shared workqueue. Here's the call path that leads to the deadlock: pciehp_queue_pushbutton_work queue_work(pciehp_wq) # queue pciehp_power_thread ... pciehp_power_thread pciehp_disable_slot remove_board pciehp_unconfigure_device pci_stop_and_remove_bus_device ... pciehp_remove # pciehp driver .remove method pciehp_release_ctrl pcie_cleanup_slot flush_workqueue(pciehp_wq) This is fairly urgent because it can be caused by simply unplugging a Thunderbolt adapter, as reported by Daniel below. [bhelgaas: changelog] Reference: http://lkml.kernel.org/r/CAMVG2ssiRgcTD1bej2tkUUfsWmpL5eNtPcNif9va2-Gzb2u8nQ@mail.gmail.com Reported-and-tested-by: Daniel J Blueman <daniel@quora.org> Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Yijing Wang <wangyijing@huawei.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org
2013-01-11 10:15:54 +08:00
abort:
kfree(slot);
return -ENOMEM;
}
static void pcie_cleanup_slot(struct controller *ctrl)
{
struct slot *slot = ctrl->slot;
cancel_delayed_work(&slot->work);
PCI: pciehp: Use per-slot workqueues to avoid deadlock When we have a hotplug-capable PCIe port with a second hotplug-capable PCIe port below it, removing the device below the upstream port causes a deadlock. The deadlock happens because we use the pciehp_wq workqueue to run pciehp_power_thread(), which uses pciehp_disable_slot() to remove devices below the upstream port. When we remove the downstream PCIe port, we call pciehp_remove(), the pciehp driver's .remove() method. That calls flush_workqueue(pciehp_wq), which deadlocks because the pciehp_power_thread() work item is still running. This patch avoids the deadlock by creating a workqueue for every PCIe port and removing the single shared workqueue. Here's the call path that leads to the deadlock: pciehp_queue_pushbutton_work queue_work(pciehp_wq) # queue pciehp_power_thread ... pciehp_power_thread pciehp_disable_slot remove_board pciehp_unconfigure_device pci_stop_and_remove_bus_device ... pciehp_remove # pciehp driver .remove method pciehp_release_ctrl pcie_cleanup_slot flush_workqueue(pciehp_wq) This is fairly urgent because it can be caused by simply unplugging a Thunderbolt adapter, as reported by Daniel below. [bhelgaas: changelog] Reference: http://lkml.kernel.org/r/CAMVG2ssiRgcTD1bej2tkUUfsWmpL5eNtPcNif9va2-Gzb2u8nQ@mail.gmail.com Reported-and-tested-by: Daniel J Blueman <daniel@quora.org> Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com> Signed-off-by: Yijing Wang <wangyijing@huawei.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> CC: stable@vger.kernel.org
2013-01-11 10:15:54 +08:00
destroy_workqueue(slot->wq);
kfree(slot);
}
static inline void dbg_ctrl(struct controller *ctrl)
{
struct pci_dev *pdev = ctrl->pcie->port;
u16 reg16;
if (!pciehp_debug)
return;
ctrl_info(ctrl, "Slot Capabilities : 0x%08x\n", ctrl->slot_cap);
pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &reg16);
ctrl_info(ctrl, "Slot Status : 0x%04x\n", reg16);
pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &reg16);
ctrl_info(ctrl, "Slot Control : 0x%04x\n", reg16);
}
#define FLAG(x, y) (((x) & (y)) ? '+' : '-')
struct controller *pcie_init(struct pcie_device *dev)
{
struct controller *ctrl;
u32 slot_cap, link_cap;
struct pci_dev *pdev = dev->port;
ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
if (!ctrl) {
dev_err(&dev->device, "%s: Out of memory\n", __func__);
goto abort;
}
ctrl->pcie = dev;
pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap);
if (pdev->hotplug_user_indicators)
slot_cap &= ~(PCI_EXP_SLTCAP_AIP | PCI_EXP_SLTCAP_PIP);
ctrl->slot_cap = slot_cap;
mutex_init(&ctrl->ctrl_lock);
init_waitqueue_head(&ctrl->queue);
dbg_ctrl(ctrl);
2014-06-15 00:56:31 +08:00
/* Check if Data Link Layer Link Active Reporting is implemented */
pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
if (link_cap & PCI_EXP_LNKCAP_DLLLARC)
ctrl->link_active_reporting = 1;
/* Clear all remaining event bits in Slot Status register */
pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
PCI_EXP_SLTSTA_ABP | PCI_EXP_SLTSTA_PFD |
PCI_EXP_SLTSTA_MRLSC | PCI_EXP_SLTSTA_PDC |
PCI_EXP_SLTSTA_CC | PCI_EXP_SLTSTA_DLLSC);
ctrl_info(ctrl, "Slot #%d AttnBtn%c PwrCtrl%c MRL%c AttnInd%c PwrInd%c HotPlug%c Surprise%c Interlock%c NoCompl%c LLActRep%c\n",
(slot_cap & PCI_EXP_SLTCAP_PSN) >> 19,
FLAG(slot_cap, PCI_EXP_SLTCAP_ABP),
FLAG(slot_cap, PCI_EXP_SLTCAP_PCP),
FLAG(slot_cap, PCI_EXP_SLTCAP_MRLSP),
FLAG(slot_cap, PCI_EXP_SLTCAP_AIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_PIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_HPC),
FLAG(slot_cap, PCI_EXP_SLTCAP_HPS),
FLAG(slot_cap, PCI_EXP_SLTCAP_EIP),
FLAG(slot_cap, PCI_EXP_SLTCAP_NCCS),
FLAG(link_cap, PCI_EXP_LNKCAP_DLLLARC));
if (pcie_init_slot(ctrl))
goto abort_ctrl;
return ctrl;
abort_ctrl:
kfree(ctrl);
abort:
return NULL;
}
void pciehp_release_ctrl(struct controller *ctrl)
{
pcie_shutdown_notification(ctrl);
pcie_cleanup_slot(ctrl);
kfree(ctrl);
}