mirror of
https://github.com/qemu/qemu.git
synced 2024-11-25 03:43:37 +08:00
vfio-pci: Reset workaround for AMD Bonaire and Hawaii GPUs
Somehow these GPUs manage not to respond to a PCI bus reset, removing our primary mechanism for resetting graphics cards. The result is that these devices typically work well for a single VM boot. If the VM is rebooted or restarted, the guest driver is not able to init the card from the dirty state, resulting in a blue screen for Windows guests. The workaround is to use a device specific reset. This is not 100% reliable though since it depends on the incoming state of the device, but it substantially improves the usability of these devices in a VM. Credit to Alex Deucher <alexander.deucher@amd.com> for his guidance. Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
This commit is contained in:
parent
c6d231e2fd
commit
5655f931ab
162
hw/vfio/pci.c
162
hw/vfio/pci.c
@ -154,6 +154,7 @@ typedef struct VFIOPCIDevice {
|
||||
PCIHostDeviceAddress host;
|
||||
EventNotifier err_notifier;
|
||||
EventNotifier req_notifier;
|
||||
int (*resetfn)(struct VFIOPCIDevice *);
|
||||
uint32_t features;
|
||||
#define VFIO_FEATURE_ENABLE_VGA_BIT 0
|
||||
#define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
|
||||
@ -3325,6 +3326,162 @@ static void vfio_unregister_req_notifier(VFIOPCIDevice *vdev)
|
||||
vdev->req_enabled = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* AMD Radeon PCI config reset, based on Linux:
|
||||
* drivers/gpu/drm/radeon/ci_smc.c:ci_is_smc_running()
|
||||
* drivers/gpu/drm/radeon/radeon_device.c:radeon_pci_config_reset
|
||||
* drivers/gpu/drm/radeon/ci_smc.c:ci_reset_smc()
|
||||
* drivers/gpu/drm/radeon/ci_smc.c:ci_stop_smc_clock()
|
||||
* IDs: include/drm/drm_pciids.h
|
||||
* Registers: http://cgit.freedesktop.org/~agd5f/linux/commit/?id=4e2aa447f6f0
|
||||
*
|
||||
* Bonaire and Hawaii GPUs do not respond to a bus reset. This is a bug in the
|
||||
* hardware that should be fixed on future ASICs. The symptom of this is that
|
||||
* once the accerlated driver loads, Windows guests will bsod on subsequent
|
||||
* attmpts to load the driver, such as after VM reset or shutdown/restart. To
|
||||
* work around this, we do an AMD specific PCI config reset, followed by an SMC
|
||||
* reset. The PCI config reset only works if SMC firmware is running, so we
|
||||
* have a dependency on the state of the device as to whether this reset will
|
||||
* be effective. There are still cases where we won't be able to kick the
|
||||
* device into working, but this greatly improves the usability overall. The
|
||||
* config reset magic is relatively common on AMD GPUs, but the setup and SMC
|
||||
* poking is largely ASIC specific.
|
||||
*/
|
||||
static bool vfio_radeon_smc_is_running(VFIOPCIDevice *vdev)
|
||||
{
|
||||
uint32_t clk, pc_c;
|
||||
|
||||
/*
|
||||
* Registers 200h and 204h are index and data registers for acessing
|
||||
* indirect configuration registers within the device.
|
||||
*/
|
||||
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
|
||||
clk = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
|
||||
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000370, 4);
|
||||
pc_c = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
|
||||
|
||||
return (!(clk & 1) && (0x20100 <= pc_c));
|
||||
}
|
||||
|
||||
/*
|
||||
* The scope of a config reset is controlled by a mode bit in the misc register
|
||||
* and a fuse, exposed as a bit in another register. The fuse is the default
|
||||
* (0 = GFX, 1 = whole GPU), the misc bit is a toggle, with the forumula
|
||||
* scope = !(misc ^ fuse), where the resulting scope is defined the same as
|
||||
* the fuse. A truth table therefore tells us that if misc == fuse, we need
|
||||
* to flip the value of the bit in the misc register.
|
||||
*/
|
||||
static void vfio_radeon_set_gfx_only_reset(VFIOPCIDevice *vdev)
|
||||
{
|
||||
uint32_t misc, fuse;
|
||||
bool a, b;
|
||||
|
||||
vfio_region_write(&vdev->bars[5].region, 0x200, 0xc00c0000, 4);
|
||||
fuse = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
|
||||
b = fuse & 64;
|
||||
|
||||
vfio_region_write(&vdev->bars[5].region, 0x200, 0xc0000010, 4);
|
||||
misc = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
|
||||
a = misc & 2;
|
||||
|
||||
if (a == b) {
|
||||
vfio_region_write(&vdev->bars[5].region, 0x204, misc ^ 2, 4);
|
||||
vfio_region_read(&vdev->bars[5].region, 0x204, 4); /* flush */
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_radeon_reset(VFIOPCIDevice *vdev)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
int i, ret = 0;
|
||||
uint32_t data;
|
||||
|
||||
/* Defer to a kernel implemented reset */
|
||||
if (vdev->vbasedev.reset_works) {
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* Enable only memory BAR access */
|
||||
vfio_pci_write_config(pdev, PCI_COMMAND, PCI_COMMAND_MEMORY, 2);
|
||||
|
||||
/* Reset only works if SMC firmware is loaded and running */
|
||||
if (!vfio_radeon_smc_is_running(vdev)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Make sure only the GFX function is reset */
|
||||
vfio_radeon_set_gfx_only_reset(vdev);
|
||||
|
||||
/* AMD PCI config reset */
|
||||
vfio_pci_write_config(pdev, 0x7c, 0x39d5e86b, 4);
|
||||
usleep(100);
|
||||
|
||||
/* Read back the memory size to make sure we're out of reset */
|
||||
for (i = 0; i < 100000; i++) {
|
||||
if (vfio_region_read(&vdev->bars[5].region, 0x5428, 4) != 0xffffffff) {
|
||||
break;
|
||||
}
|
||||
usleep(1);
|
||||
}
|
||||
|
||||
/* Reset SMC */
|
||||
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000000, 4);
|
||||
data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
|
||||
data |= 1;
|
||||
vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
|
||||
|
||||
/* Disable SMC clock */
|
||||
vfio_region_write(&vdev->bars[5].region, 0x200, 0x80000004, 4);
|
||||
data = vfio_region_read(&vdev->bars[5].region, 0x204, 4);
|
||||
data |= 1;
|
||||
vfio_region_write(&vdev->bars[5].region, 0x204, data, 4);
|
||||
|
||||
out:
|
||||
/* Restore PCI command register */
|
||||
vfio_pci_write_config(pdev, PCI_COMMAND, 0, 2);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void vfio_setup_resetfn(VFIOPCIDevice *vdev)
|
||||
{
|
||||
PCIDevice *pdev = &vdev->pdev;
|
||||
uint16_t vendor, device;
|
||||
|
||||
vendor = pci_get_word(pdev->config + PCI_VENDOR_ID);
|
||||
device = pci_get_word(pdev->config + PCI_DEVICE_ID);
|
||||
|
||||
switch (vendor) {
|
||||
case 0x1002:
|
||||
switch (device) {
|
||||
/* Bonaire */
|
||||
case 0x6649: /* Bonaire [FirePro W5100] */
|
||||
case 0x6650:
|
||||
case 0x6651:
|
||||
case 0x6658: /* Bonaire XTX [Radeon R7 260X] */
|
||||
case 0x665c: /* Bonaire XT [Radeon HD 7790/8770 / R9 260 OEM] */
|
||||
case 0x665d: /* Bonaire [Radeon R7 200 Series] */
|
||||
/* Hawaii */
|
||||
case 0x67A0: /* Hawaii XT GL [FirePro W9100] */
|
||||
case 0x67A1: /* Hawaii PRO GL [FirePro W8100] */
|
||||
case 0x67A2:
|
||||
case 0x67A8:
|
||||
case 0x67A9:
|
||||
case 0x67AA:
|
||||
case 0x67B0: /* Hawaii XT [Radeon R9 290X] */
|
||||
case 0x67B1: /* Hawaii PRO [Radeon R9 290] */
|
||||
case 0x67B8:
|
||||
case 0x67B9:
|
||||
case 0x67BA:
|
||||
case 0x67BE:
|
||||
vdev->resetfn = vfio_radeon_reset;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_initfn(PCIDevice *pdev)
|
||||
{
|
||||
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
|
||||
@ -3473,6 +3630,7 @@ static int vfio_initfn(PCIDevice *pdev)
|
||||
|
||||
vfio_register_err_notifier(vdev);
|
||||
vfio_register_req_notifier(vdev);
|
||||
vfio_setup_resetfn(vdev);
|
||||
|
||||
return 0;
|
||||
|
||||
@ -3520,6 +3678,10 @@ static void vfio_pci_reset(DeviceState *dev)
|
||||
|
||||
vfio_pci_pre_reset(vdev);
|
||||
|
||||
if (vdev->resetfn && !vdev->resetfn(vdev)) {
|
||||
goto post_reset;
|
||||
}
|
||||
|
||||
if (vdev->vbasedev.reset_works &&
|
||||
(vdev->has_flr || !vdev->has_pm_reset) &&
|
||||
!ioctl(vdev->vbasedev.fd, VFIO_DEVICE_RESET)) {
|
||||
|
Loading…
Reference in New Issue
Block a user