Merge branch 'stable/xen-pciback-0.6.3' into stable/drivers

* stable/xen-pciback-0.6.3:
  xen/pciback: Have 'passthrough' option instead of XEN_PCIDEV_BACKEND_PASS and XEN_PCIDEV_BACKEND_VPCI
  xen/pciback: Remove the DEBUG option.
  xen/pciback: Drop two backends, squash and cleanup some code.
  xen/pciback: Print out the MSI/MSI-X (PIRQ) values
  xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices.
  xen: rename pciback module to xen-pciback.
  xen/pciback: Fine-grain the spinlocks and fix BUG: scheduling while atomic cases.
  xen/pciback: Allocate IRQ handler for device that is shared with guest.
  xen/pciback: Disable MSI/MSI-X when reseting a device
  xen/pciback: guest SR-IOV support for PV guest
  xen/pciback: Register the owner (domain) of the PCI device.
  xen/pciback: Cleanup the driver based on checkpatch warnings and errors.
  xen/pciback: xen pci backend driver.

Conflicts:
	drivers/xen/Kconfig
This commit is contained in:
Konrad Rzeszutek Wilk 2011-07-20 15:33:51 -04:00
commit 3a6d28b11a
15 changed files with 4505 additions and 0 deletions

View File

@ -129,4 +129,26 @@ config XEN_TMEM
Shim to interface in-kernel Transcendent Memory hooks
(e.g. cleancache and frontswap) to Xen tmem hypercalls.
config XEN_PCIDEV_BACKEND
tristate "Xen PCI-device backend driver"
depends on PCI && X86 && XEN
depends on XEN_BACKEND
default m
help
The PCI device backend driver allows the kernel to export arbitrary
PCI devices to other guests. If you select this to be a module, you
will need to make sure no other driver has bound to the device(s)
you want to make visible to other guests.
The parameter "passthrough" allows you specify how you want the PCI
devices to appear in the guest. You can choose the default (0) where
PCI topology starts at 00.00.0, or (1) for passthrough if you want
the PCI devices topology appear the same as in the host.
The "hide" parameter (only applicable if backend driver is compiled
into the kernel) allows you to bind the PCI devices to this module
from the default device drivers. The argument is the list of PCI BDFs:
xen-pciback.hide=(03:00.0)(04:00.0)
If in doubt, say m.
endmenu

View File

@ -18,6 +18,7 @@ obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o

View File

@ -0,0 +1,7 @@
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
xen-pciback-y += conf_space.o conf_space_header.o \
conf_space_capability.o \
conf_space_quirks.o vpci.o \
passthrough.o

View File

@ -0,0 +1,438 @@
/*
* PCI Backend - Functions for creating a virtual configuration space for
* exported PCI Devices.
* It's dangerous to allow PCI Driver Domains to change their
* device's resources (memory, i/o ports, interrupts). We need to
* restrict changes to certain PCI Configuration registers:
* BARs, INTERRUPT_PIN, most registers in the header...
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
#include "conf_space_quirks.h"
#define DRV_NAME "xen-pciback"
static int permissive;
module_param(permissive, bool, 0644);
/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
* xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
#define DEFINE_PCI_CONFIG(op, size, type) \
int xen_pcibk_##op##_config_##size \
(struct pci_dev *dev, int offset, type value, void *data) \
{ \
return pci_##op##_config_##size(dev, offset, value); \
}
DEFINE_PCI_CONFIG(read, byte, u8 *)
DEFINE_PCI_CONFIG(read, word, u16 *)
DEFINE_PCI_CONFIG(read, dword, u32 *)
DEFINE_PCI_CONFIG(write, byte, u8)
DEFINE_PCI_CONFIG(write, word, u16)
DEFINE_PCI_CONFIG(write, dword, u32)
static int conf_space_read(struct pci_dev *dev,
const struct config_field_entry *entry,
int offset, u32 *value)
{
int ret = 0;
const struct config_field *field = entry->field;
*value = 0;
switch (field->size) {
case 1:
if (field->u.b.read)
ret = field->u.b.read(dev, offset, (u8 *) value,
entry->data);
break;
case 2:
if (field->u.w.read)
ret = field->u.w.read(dev, offset, (u16 *) value,
entry->data);
break;
case 4:
if (field->u.dw.read)
ret = field->u.dw.read(dev, offset, value, entry->data);
break;
}
return ret;
}
static int conf_space_write(struct pci_dev *dev,
const struct config_field_entry *entry,
int offset, u32 value)
{
int ret = 0;
const struct config_field *field = entry->field;
switch (field->size) {
case 1:
if (field->u.b.write)
ret = field->u.b.write(dev, offset, (u8) value,
entry->data);
break;
case 2:
if (field->u.w.write)
ret = field->u.w.write(dev, offset, (u16) value,
entry->data);
break;
case 4:
if (field->u.dw.write)
ret = field->u.dw.write(dev, offset, value,
entry->data);
break;
}
return ret;
}
static inline u32 get_mask(int size)
{
if (size == 1)
return 0xff;
else if (size == 2)
return 0xffff;
else
return 0xffffffff;
}
static inline int valid_request(int offset, int size)
{
/* Validate request (no un-aligned requests) */
if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
return 1;
return 0;
}
static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
int offset)
{
if (offset >= 0) {
new_val_mask <<= (offset * 8);
new_val <<= (offset * 8);
} else {
new_val_mask >>= (offset * -8);
new_val >>= (offset * -8);
}
val = (val & ~new_val_mask) | (new_val & new_val_mask);
return val;
}
static int pcibios_err_to_errno(int err)
{
switch (err) {
case PCIBIOS_SUCCESSFUL:
return XEN_PCI_ERR_success;
case PCIBIOS_DEVICE_NOT_FOUND:
return XEN_PCI_ERR_dev_not_found;
case PCIBIOS_BAD_REGISTER_NUMBER:
return XEN_PCI_ERR_invalid_offset;
case PCIBIOS_FUNC_NOT_SUPPORTED:
return XEN_PCI_ERR_not_implemented;
case PCIBIOS_SET_FAILED:
return XEN_PCI_ERR_access_denied;
}
return err;
}
int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
u32 *ret_val)
{
int err = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
const struct config_field_entry *cfg_entry;
const struct config_field *field;
int req_start, req_end, field_start, field_end;
/* if read fails for any reason, return 0
* (as if device didn't respond) */
u32 value = 0, tmp_val;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
pci_name(dev), size, offset);
if (!valid_request(offset, size)) {
err = XEN_PCI_ERR_invalid_offset;
goto out;
}
/* Get the real value first, then modify as appropriate */
switch (size) {
case 1:
err = pci_read_config_byte(dev, offset, (u8 *) &value);
break;
case 2:
err = pci_read_config_word(dev, offset, (u16 *) &value);
break;
case 4:
err = pci_read_config_dword(dev, offset, &value);
break;
}
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
field = cfg_entry->field;
req_start = offset;
req_end = offset + size;
field_start = OFFSET(cfg_entry);
field_end = OFFSET(cfg_entry) + field->size;
if ((req_start >= field_start && req_start < field_end)
|| (req_end > field_start && req_end <= field_end)) {
err = conf_space_read(dev, cfg_entry, field_start,
&tmp_val);
if (err)
goto out;
value = merge_value(value, tmp_val,
get_mask(field->size),
field_start - req_start);
}
}
out:
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
pci_name(dev), size, offset, value);
*ret_val = value;
return pcibios_err_to_errno(err);
}
int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
{
int err = 0, handled = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
const struct config_field_entry *cfg_entry;
const struct config_field *field;
u32 tmp_val;
int req_start, req_end, field_start, field_end;
if (unlikely(verbose_request))
printk(KERN_DEBUG
DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
pci_name(dev), size, offset, value);
if (!valid_request(offset, size))
return XEN_PCI_ERR_invalid_offset;
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
field = cfg_entry->field;
req_start = offset;
req_end = offset + size;
field_start = OFFSET(cfg_entry);
field_end = OFFSET(cfg_entry) + field->size;
if ((req_start >= field_start && req_start < field_end)
|| (req_end > field_start && req_end <= field_end)) {
tmp_val = 0;
err = xen_pcibk_config_read(dev, field_start,
field->size, &tmp_val);
if (err)
break;
tmp_val = merge_value(tmp_val, value, get_mask(size),
req_start - field_start);
err = conf_space_write(dev, cfg_entry, field_start,
tmp_val);
/* handled is set true here, but not every byte
* may have been written! Properly detecting if
* every byte is handled is unnecessary as the
* flag is used to detect devices that need
* special helpers to work correctly.
*/
handled = 1;
}
}
if (!handled && !err) {
/* By default, anything not specificially handled above is
* read-only. The permissive flag changes this behavior so
* that anything not specifically handled above is writable.
* This means that some fields may still be read-only because
* they have entries in the config_field list that intercept
* the write and do nothing. */
if (dev_data->permissive || permissive) {
switch (size) {
case 1:
err = pci_write_config_byte(dev, offset,
(u8) value);
break;
case 2:
err = pci_write_config_word(dev, offset,
(u16) value);
break;
case 4:
err = pci_write_config_dword(dev, offset,
(u32) value);
break;
}
} else if (!dev_data->warned_on_write) {
dev_data->warned_on_write = 1;
dev_warn(&dev->dev, "Driver tried to write to a "
"read-only configuration space field at offset"
" 0x%x, size %d. This may be harmless, but if "
"you have problems with your device:\n"
"1) see permissive attribute in sysfs\n"
"2) report problems to the xen-devel "
"mailing list along with details of your "
"device obtained from lspci.\n", offset, size);
}
}
return pcibios_err_to_errno(err);
}
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry, *t;
const struct config_field *field;
dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
"configuration space fields\n");
if (!dev_data)
return;
list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
field = cfg_entry->field;
if (field->clean) {
field->clean((struct config_field *)field);
kfree(cfg_entry->data);
list_del(&cfg_entry->list);
kfree(cfg_entry);
}
}
}
void xen_pcibk_config_reset_dev(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
const struct config_field_entry *cfg_entry;
const struct config_field *field;
dev_dbg(&dev->dev, "resetting virtual configuration space\n");
if (!dev_data)
return;
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
field = cfg_entry->field;
if (field->reset)
field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
}
}
void xen_pcibk_config_free_dev(struct pci_dev *dev)
{
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry, *t;
const struct config_field *field;
dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
if (!dev_data)
return;
list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
list_del(&cfg_entry->list);
field = cfg_entry->field;
if (field->release)
field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
kfree(cfg_entry);
}
}
int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
const struct config_field *field,
unsigned int base_offset)
{
int err = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry;
void *tmp;
cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
if (!cfg_entry) {
err = -ENOMEM;
goto out;
}
cfg_entry->data = NULL;
cfg_entry->field = field;
cfg_entry->base_offset = base_offset;
/* silently ignore duplicate fields */
err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
if (err)
goto out;
if (field->init) {
tmp = field->init(dev, OFFSET(cfg_entry));
if (IS_ERR(tmp)) {
err = PTR_ERR(tmp);
goto out;
}
cfg_entry->data = tmp;
}
dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
OFFSET(cfg_entry));
list_add_tail(&cfg_entry->list, &dev_data->config_fields);
out:
if (err)
kfree(cfg_entry);
return err;
}
/* This sets up the device's virtual configuration space to keep track of
* certain registers (like the base address registers (BARs) so that we can
* keep the client from manipulating them directly.
*/
int xen_pcibk_config_init_dev(struct pci_dev *dev)
{
int err = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
dev_dbg(&dev->dev, "initializing virtual configuration space\n");
INIT_LIST_HEAD(&dev_data->config_fields);
err = xen_pcibk_config_header_add_fields(dev);
if (err)
goto out;
err = xen_pcibk_config_capability_add_fields(dev);
if (err)
goto out;
err = xen_pcibk_config_quirks_init(dev);
out:
return err;
}
int xen_pcibk_config_init(void)
{
return xen_pcibk_config_capability_init();
}

View File

@ -0,0 +1,126 @@
/*
* PCI Backend - Common data structures for overriding the configuration space
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#ifndef __XEN_PCIBACK_CONF_SPACE_H__
#define __XEN_PCIBACK_CONF_SPACE_H__
#include <linux/list.h>
#include <linux/err.h>
/* conf_field_init can return an errno in a ptr with ERR_PTR() */
typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
void *data);
typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
void *data);
typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
void *data);
typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
void *data);
typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
void *data);
typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
void *data);
/* These are the fields within the configuration space which we
* are interested in intercepting reads/writes to and changing their
* values.
*/
struct config_field {
unsigned int offset;
unsigned int size;
unsigned int mask;
conf_field_init init;
conf_field_reset reset;
conf_field_free release;
void (*clean) (struct config_field *field);
union {
struct {
conf_dword_write write;
conf_dword_read read;
} dw;
struct {
conf_word_write write;
conf_word_read read;
} w;
struct {
conf_byte_write write;
conf_byte_read read;
} b;
} u;
struct list_head list;
};
struct config_field_entry {
struct list_head list;
const struct config_field *field;
unsigned int base_offset;
void *data;
};
#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
/* Add fields to a device - the add_fields macro expects to get a pointer to
* the first entry in an array (of which the ending is marked by size==0)
*/
int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
const struct config_field *field,
unsigned int offset);
static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
const struct config_field *field)
{
return xen_pcibk_config_add_field_offset(dev, field, 0);
}
static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
const struct config_field *field)
{
int i, err = 0;
for (i = 0; field[i].size != 0; i++) {
err = xen_pcibk_config_add_field(dev, &field[i]);
if (err)
break;
}
return err;
}
static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
const struct config_field *field,
unsigned int offset)
{
int i, err = 0;
for (i = 0; field[i].size != 0; i++) {
err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
if (err)
break;
}
return err;
}
/* Read/Write the real configuration space */
int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
void *data);
int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
void *data);
int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
void *data);
int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
void *data);
int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
void *data);
int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
void *data);
int xen_pcibk_config_capability_init(void);
int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */

View File

@ -0,0 +1,207 @@
/*
* PCI Backend - Handles the virtual fields found on the capability lists
* in the configuration space.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
static LIST_HEAD(capabilities);
struct xen_pcibk_config_capability {
struct list_head cap_list;
int capability;
/* If the device has the capability found above, add these fields */
const struct config_field *fields;
};
static const struct config_field caplist_header[] = {
{
.offset = PCI_CAP_LIST_ID,
.size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = NULL,
},
{}
};
static inline void register_capability(struct xen_pcibk_config_capability *cap)
{
list_add_tail(&cap->cap_list, &capabilities);
}
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev)
{
int err = 0;
struct xen_pcibk_config_capability *cap;
int cap_offset;
list_for_each_entry(cap, &capabilities, cap_list) {
cap_offset = pci_find_capability(dev, cap->capability);
if (cap_offset) {
dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
cap->capability, cap_offset);
err = xen_pcibk_config_add_fields_offset(dev,
caplist_header,
cap_offset);
if (err)
goto out;
err = xen_pcibk_config_add_fields_offset(dev,
cap->fields,
cap_offset);
if (err)
goto out;
}
}
out:
return err;
}
static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
void *data)
{
/* Disallow writes to the vital product data */
if (value & PCI_VPD_ADDR_F)
return PCIBIOS_SET_FAILED;
else
return pci_write_config_word(dev, offset, value);
}
static const struct config_field caplist_vpd[] = {
{
.offset = PCI_VPD_ADDR,
.size = 2,
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = vpd_address_write,
},
{
.offset = PCI_VPD_DATA,
.size = 4,
.u.dw.read = xen_pcibk_read_config_dword,
.u.dw.write = NULL,
},
{}
};
static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
void *data)
{
int err;
u16 real_value;
err = pci_read_config_word(dev, offset, &real_value);
if (err)
goto out;
*value = real_value & ~PCI_PM_CAP_PME_MASK;
out:
return err;
}
/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
* Can't allow driver domain to enable PMEs - they're shared */
#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
void *data)
{
int err;
u16 old_value;
pci_power_t new_state, old_state;
err = pci_read_config_word(dev, offset, &old_value);
if (err)
goto out;
old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
new_value &= PM_OK_BITS;
if ((old_value & PM_OK_BITS) != new_value) {
new_value = (old_value & ~PM_OK_BITS) | new_value;
err = pci_write_config_word(dev, offset, new_value);
if (err)
goto out;
}
/* Let pci core handle the power management change */
dev_dbg(&dev->dev, "set power state to %x\n", new_state);
err = pci_set_power_state(dev, new_state);
if (err) {
err = PCIBIOS_SET_FAILED;
goto out;
}
out:
return err;
}
/* Ensure PMEs are disabled */
static void *pm_ctrl_init(struct pci_dev *dev, int offset)
{
int err;
u16 value;
err = pci_read_config_word(dev, offset, &value);
if (err)
goto out;
if (value & PCI_PM_CTRL_PME_ENABLE) {
value &= ~PCI_PM_CTRL_PME_ENABLE;
err = pci_write_config_word(dev, offset, value);
}
out:
return ERR_PTR(err);
}
static const struct config_field caplist_pm[] = {
{
.offset = PCI_PM_PMC,
.size = 2,
.u.w.read = pm_caps_read,
},
{
.offset = PCI_PM_CTRL,
.size = 2,
.init = pm_ctrl_init,
.u.w.read = xen_pcibk_read_config_word,
.u.w.write = pm_ctrl_write,
},
{
.offset = PCI_PM_PPB_EXTENSIONS,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{
.offset = PCI_PM_DATA_REGISTER,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{}
};
static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
.capability = PCI_CAP_ID_PM,
.fields = caplist_pm,
};
static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
.capability = PCI_CAP_ID_VPD,
.fields = caplist_vpd,
};
int xen_pcibk_config_capability_init(void)
{
register_capability(&xen_pcibk_config_capability_vpd);
register_capability(&xen_pcibk_config_capability_pm);
return 0;
}

View File

@ -0,0 +1,386 @@
/*
* PCI Backend - Handles the virtual fields in the configuration space headers.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
struct pci_bar_info {
u32 val;
u32 len_val;
int which;
};
#define DRV_NAME "xen-pciback"
#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
{
int i;
int ret;
ret = xen_pcibk_read_config_word(dev, offset, value, data);
if (!atomic_read(&dev->enable_cnt))
return ret;
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
if (dev->resource[i].flags & IORESOURCE_IO)
*value |= PCI_COMMAND_IO;
if (dev->resource[i].flags & IORESOURCE_MEM)
*value |= PCI_COMMAND_MEMORY;
}
return ret;
}
static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
{
struct xen_pcibk_dev_data *dev_data;
int err;
dev_data = pci_get_drvdata(dev);
if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
pci_name(dev));
err = pci_enable_device(dev);
if (err)
return err;
if (dev_data)
dev_data->enable_intx = 1;
} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
pci_name(dev));
pci_disable_device(dev);
if (dev_data)
dev_data->enable_intx = 0;
}
if (!dev->is_busmaster && is_master_cmd(value)) {
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
pci_name(dev));
pci_set_master(dev);
}
if (value & PCI_COMMAND_INVALIDATE) {
if (unlikely(verbose_request))
printk(KERN_DEBUG
DRV_NAME ": %s: enable memory-write-invalidate\n",
pci_name(dev));
err = pci_set_mwi(dev);
if (err) {
printk(KERN_WARNING
DRV_NAME ": %s: cannot enable "
"memory-write-invalidate (%d)\n",
pci_name(dev), err);
value &= ~PCI_COMMAND_INVALIDATE;
}
}
return pci_write_config_word(dev, offset, value);
}
static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
{
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
/* A write to obtain the length must happen as a 32-bit write.
* This does not (yet) support writing individual bytes
*/
if (value == ~PCI_ROM_ADDRESS_ENABLE)
bar->which = 1;
else {
u32 tmpval;
pci_read_config_dword(dev, offset, &tmpval);
if (tmpval != bar->val && value == bar->val) {
/* Allow restoration of bar value. */
pci_write_config_dword(dev, offset, bar->val);
}
bar->which = 0;
}
/* Do we need to support enabling/disabling the rom address here? */
return 0;
}
/* For the BARs, only allow writes which write ~0 or
* the correct resource information
* (Needed for when the driver probes the resource usage)
*/
static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
{
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
/* A write to obtain the length must happen as a 32-bit write.
* This does not (yet) support writing individual bytes
*/
if (value == ~0)
bar->which = 1;
else {
u32 tmpval;
pci_read_config_dword(dev, offset, &tmpval);
if (tmpval != bar->val && value == bar->val) {
/* Allow restoration of bar value. */
pci_write_config_dword(dev, offset, bar->val);
}
bar->which = 0;
}
return 0;
}
static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
{
struct pci_bar_info *bar = data;
if (unlikely(!bar)) {
printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
pci_name(dev));
return XEN_PCI_ERR_op_failed;
}
*value = bar->which ? bar->len_val : bar->val;
return 0;
}
static inline void read_dev_bar(struct pci_dev *dev,
struct pci_bar_info *bar_info, int offset,
u32 len_mask)
{
int pos;
struct resource *res = dev->resource;
if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
pos = PCI_ROM_RESOURCE;
else {
pos = (offset - PCI_BASE_ADDRESS_0) / 4;
if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
(PCI_BASE_ADDRESS_SPACE_MEMORY |
PCI_BASE_ADDRESS_MEM_TYPE_64))) {
bar_info->val = res[pos - 1].start >> 32;
bar_info->len_val = res[pos - 1].end >> 32;
return;
}
}
bar_info->val = res[pos].start |
(res[pos].flags & PCI_REGION_FLAG_MASK);
bar_info->len_val = res[pos].end - res[pos].start + 1;
}
static void *bar_init(struct pci_dev *dev, int offset)
{
struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
if (!bar)
return ERR_PTR(-ENOMEM);
read_dev_bar(dev, bar, offset, ~0);
bar->which = 0;
return bar;
}
static void *rom_init(struct pci_dev *dev, int offset)
{
struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
if (!bar)
return ERR_PTR(-ENOMEM);
read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
bar->which = 0;
return bar;
}
static void bar_reset(struct pci_dev *dev, int offset, void *data)
{
struct pci_bar_info *bar = data;
bar->which = 0;
}
static void bar_release(struct pci_dev *dev, int offset, void *data)
{
kfree(data);
}
static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset,
u16 *value, void *data)
{
*value = dev->vendor;
return 0;
}
static int xen_pcibk_read_device(struct pci_dev *dev, int offset,
u16 *value, void *data)
{
*value = dev->device;
return 0;
}
static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
void *data)
{
*value = (u8) dev->irq;
return 0;
}
static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
{
u8 cur_value;
int err;
err = pci_read_config_byte(dev, offset, &cur_value);
if (err)
goto out;
if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
|| value == PCI_BIST_START)
err = pci_write_config_byte(dev, offset, value);
out:
return err;
}
static const struct config_field header_common[] = {
{
.offset = PCI_VENDOR_ID,
.size = 2,
.u.w.read = xen_pcibk_read_vendor,
},
{
.offset = PCI_DEVICE_ID,
.size = 2,
.u.w.read = xen_pcibk_read_device,
},
{
.offset = PCI_COMMAND,
.size = 2,
.u.w.read = command_read,
.u.w.write = command_write,
},
{
.offset = PCI_INTERRUPT_LINE,
.size = 1,
.u.b.read = interrupt_read,
},
{
.offset = PCI_INTERRUPT_PIN,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{
/* Any side effects of letting driver domain control cache line? */
.offset = PCI_CACHE_LINE_SIZE,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
.u.b.write = xen_pcibk_write_config_byte,
},
{
.offset = PCI_LATENCY_TIMER,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
},
{
.offset = PCI_BIST,
.size = 1,
.u.b.read = xen_pcibk_read_config_byte,
.u.b.write = bist_write,
},
{}
};
#define CFG_FIELD_BAR(reg_offset) \
{ \
.offset = reg_offset, \
.size = 4, \
.init = bar_init, \
.reset = bar_reset, \
.release = bar_release, \
.u.dw.read = bar_read, \
.u.dw.write = bar_write, \
}
#define CFG_FIELD_ROM(reg_offset) \
{ \
.offset = reg_offset, \
.size = 4, \
.init = rom_init, \
.reset = bar_reset, \
.release = bar_release, \
.u.dw.read = bar_read, \
.u.dw.write = rom_write, \
}
static const struct config_field header_0[] = {
CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
CFG_FIELD_ROM(PCI_ROM_ADDRESS),
{}
};
static const struct config_field header_1[] = {
CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
{}
};
int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
{
int err;
err = xen_pcibk_config_add_fields(dev, header_common);
if (err)
goto out;
switch (dev->hdr_type) {
case PCI_HEADER_TYPE_NORMAL:
err = xen_pcibk_config_add_fields(dev, header_0);
break;
case PCI_HEADER_TYPE_BRIDGE:
err = xen_pcibk_config_add_fields(dev, header_1);
break;
default:
err = -EINVAL;
printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n",
pci_name(dev), dev->hdr_type);
break;
}
out:
return err;
}

View File

@ -0,0 +1,140 @@
/*
* PCI Backend - Handle special overlays for broken devices.
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
* Author: Chris Bookholt <hap10@epoch.ncsc.mil>
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include "pciback.h"
#include "conf_space.h"
#include "conf_space_quirks.h"
LIST_HEAD(xen_pcibk_quirks);
#define DRV_NAME "xen-pciback"
static inline const struct pci_device_id *
match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
{
if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
(id->device == PCI_ANY_ID || id->device == dev->device) &&
(id->subvendor == PCI_ANY_ID ||
id->subvendor == dev->subsystem_vendor) &&
(id->subdevice == PCI_ANY_ID ||
id->subdevice == dev->subsystem_device) &&
!((id->class ^ dev->class) & id->class_mask))
return id;
return NULL;
}
static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
{
struct xen_pcibk_config_quirk *tmp_quirk;
list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list)
if (match_one_device(&tmp_quirk->devid, dev) != NULL)
goto out;
tmp_quirk = NULL;
printk(KERN_DEBUG DRV_NAME
":quirk didn't match any device xen_pciback knows about\n");
out:
return tmp_quirk;
}
static inline void register_quirk(struct xen_pcibk_config_quirk *quirk)
{
list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks);
}
int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg)
{
int ret = 0;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
struct config_field_entry *cfg_entry;
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
if (OFFSET(cfg_entry) == reg) {
ret = 1;
break;
}
}
return ret;
}
int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
*field)
{
int err = 0;
switch (field->size) {
case 1:
field->u.b.read = xen_pcibk_read_config_byte;
field->u.b.write = xen_pcibk_write_config_byte;
break;
case 2:
field->u.w.read = xen_pcibk_read_config_word;
field->u.w.write = xen_pcibk_write_config_word;
break;
case 4:
field->u.dw.read = xen_pcibk_read_config_dword;
field->u.dw.write = xen_pcibk_write_config_dword;
break;
default:
err = -EINVAL;
goto out;
}
xen_pcibk_config_add_field(dev, field);
out:
return err;
}
int xen_pcibk_config_quirks_init(struct pci_dev *dev)
{
struct xen_pcibk_config_quirk *quirk;
int ret = 0;
quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
if (!quirk) {
ret = -ENOMEM;
goto out;
}
quirk->devid.vendor = dev->vendor;
quirk->devid.device = dev->device;
quirk->devid.subvendor = dev->subsystem_vendor;
quirk->devid.subdevice = dev->subsystem_device;
quirk->devid.class = 0;
quirk->devid.class_mask = 0;
quirk->devid.driver_data = 0UL;
quirk->pdev = dev;
register_quirk(quirk);
out:
return ret;
}
void xen_pcibk_config_field_free(struct config_field *field)
{
kfree(field);
}
int xen_pcibk_config_quirk_release(struct pci_dev *dev)
{
struct xen_pcibk_config_quirk *quirk;
int ret = 0;
quirk = xen_pcibk_find_quirk(dev);
if (!quirk) {
ret = -ENXIO;
goto out;
}
list_del(&quirk->quirks_list);
kfree(quirk);
out:
return ret;
}

View File

@ -0,0 +1,33 @@
/*
* PCI Backend - Data structures for special overlays for broken devices.
*
* Ryan Wilson <hap9@epoch.ncsc.mil>
* Chris Bookholt <hap10@epoch.ncsc.mil>
*/
#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
#include <linux/pci.h>
#include <linux/list.h>
struct xen_pcibk_config_quirk {
struct list_head quirks_list;
struct pci_device_id devid;
struct pci_dev *pdev;
};
int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
*field);
int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
int xen_pcibk_config_quirks_init(struct pci_dev *dev);
void xen_pcibk_config_field_free(struct config_field *field);
int xen_pcibk_config_quirk_release(struct pci_dev *dev);
int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg);
#endif

View File

@ -0,0 +1,194 @@
/*
* PCI Backend - Provides restricted access to the real PCI bus topology
* to the frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include "pciback.h"
struct passthrough_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list;
spinlock_t lock;
};
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
unsigned int domain,
unsigned int bus,
unsigned int devfn)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
struct pci_dev *dev = NULL;
unsigned long flags;
spin_lock_irqsave(&dev_data->lock, flags);
list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
&& bus == (unsigned int)dev_entry->dev->bus->number
&& devfn == dev_entry->dev->devfn) {
dev = dev_entry->dev;
break;
}
}
spin_unlock_irqrestore(&dev_data->lock, flags);
return dev;
}
static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry;
unsigned long flags;
unsigned int domain, bus, devfn;
int err;
dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
if (!dev_entry)
return -ENOMEM;
dev_entry->dev = dev;
spin_lock_irqsave(&dev_data->lock, flags);
list_add_tail(&dev_entry->list, &dev_data->dev_list);
spin_unlock_irqrestore(&dev_data->lock, flags);
/* Publish this device. */
domain = (unsigned int)pci_domain_nr(dev->bus);
bus = (unsigned int)dev->bus->number;
devfn = dev->devfn;
err = publish_cb(pdev, domain, bus, devfn, devid);
return err;
}
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
struct pci_dev *found_dev = NULL;
unsigned long flags;
spin_lock_irqsave(&dev_data->lock, flags);
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
if (dev_entry->dev == dev) {
list_del(&dev_entry->list);
found_dev = dev_entry->dev;
kfree(dev_entry);
}
}
spin_unlock_irqrestore(&dev_data->lock, flags);
if (found_dev)
pcistub_put_pci_dev(found_dev);
}
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
struct passthrough_dev_data *dev_data;
dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
if (!dev_data)
return -ENOMEM;
spin_lock_init(&dev_data->lock);
INIT_LIST_HEAD(&dev_data->dev_list);
pdev->pci_dev_data = dev_data;
return 0;
}
static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb publish_root_cb)
{
int err = 0;
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *e, *tmp;
struct pci_dev *dev;
int found;
unsigned int domain, bus;
spin_lock(&dev_data->lock);
list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
/* Only publish this device as a root if none of its
* parent bridges are exported
*/
found = 0;
dev = dev_entry->dev->bus->self;
for (; !found && dev != NULL; dev = dev->bus->self) {
list_for_each_entry(e, &dev_data->dev_list, list) {
if (dev == e->dev) {
found = 1;
break;
}
}
}
domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
bus = (unsigned int)dev_entry->dev->bus->number;
if (!found) {
spin_unlock(&dev_data->lock);
err = publish_root_cb(pdev, domain, bus);
if (err)
break;
spin_lock(&dev_data->lock);
}
}
if (!err)
spin_unlock(&dev_data->lock);
return err;
}
static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
struct pci_dev_entry *dev_entry, *t;
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
list_del(&dev_entry->list);
pcistub_put_pci_dev(dev_entry->dev);
kfree(dev_entry);
}
kfree(dev_data);
pdev->pci_dev_data = NULL;
}
static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct xen_pcibk_device *pdev,
unsigned int *domain, unsigned int *bus,
unsigned int *devfn)
{
*domain = pci_domain_nr(pcidev->bus);
*bus = pcidev->bus->number;
*devfn = pcidev->devfn;
return 1;
}
struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
.name = "passthrough",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
.find = __xen_pcibk_get_pcifront_dev,
.publish = __xen_pcibk_publish_pci_roots,
.release = __xen_pcibk_release_pci_dev,
.add = __xen_pcibk_add_pci_dev,
.get = __xen_pcibk_get_pci_dev,
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,183 @@
/*
* PCI Backend Common Data Structures & Function Declarations
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#ifndef __XEN_PCIBACK_H__
#define __XEN_PCIBACK_H__
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <xen/xenbus.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
#include <linux/atomic.h>
#include <xen/interface/io/pciif.h>
struct pci_dev_entry {
struct list_head list;
struct pci_dev *dev;
};
#define _PDEVF_op_active (0)
#define PDEVF_op_active (1<<(_PDEVF_op_active))
#define _PCIB_op_pending (1)
#define PCIB_op_pending (1<<(_PCIB_op_pending))
struct xen_pcibk_device {
void *pci_dev_data;
spinlock_t dev_lock;
struct xenbus_device *xdev;
struct xenbus_watch be_watch;
u8 be_watching;
int evtchn_irq;
struct xen_pci_sharedinfo *sh_info;
unsigned long flags;
struct work_struct op_work;
};
struct xen_pcibk_dev_data {
struct list_head config_fields;
unsigned int permissive:1;
unsigned int warned_on_write:1;
unsigned int enable_intx:1;
unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
unsigned int ack_intr:1; /* .. and ACK-ing */
unsigned long handled;
unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
char irq_name[0]; /* xen-pcibk[000:04:00.0] */
};
/* Used by XenBus and xen_pcibk_ops.c */
extern wait_queue_head_t xen_pcibk_aer_wait_queue;
extern struct workqueue_struct *xen_pcibk_wq;
/* Used by pcistub.c and conf_space_quirks.c */
extern struct list_head xen_pcibk_quirks;
/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
int domain, int bus,
int slot, int func);
struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev);
void pcistub_put_pci_dev(struct pci_dev *dev);
/* Ensure a device is turned off or reset */
void xen_pcibk_reset_device(struct pci_dev *pdev);
/* Access a virtual configuration space for a PCI device */
int xen_pcibk_config_init(void);
int xen_pcibk_config_init_dev(struct pci_dev *dev);
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev);
void xen_pcibk_config_reset_dev(struct pci_dev *dev);
void xen_pcibk_config_free_dev(struct pci_dev *dev);
int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
u32 *ret_val);
int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size,
u32 value);
/* Handle requests for specific devices from the frontend */
typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus,
unsigned int devfn, unsigned int devid);
typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus);
/* Backend registration for the two types of BDF representation:
* vpci - BDFs start at 00
* passthrough - BDFs are exactly like in the host.
*/
struct xen_pcibk_backend {
char *name;
int (*init)(struct xen_pcibk_device *pdev);
void (*free)(struct xen_pcibk_device *pdev);
int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
unsigned int *domain, unsigned int *bus,
unsigned int *devfn);
int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
int devid, publish_pci_dev_cb publish_cb);
struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus,
unsigned int devfn);
};
extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
extern struct xen_pcibk_backend *xen_pcibk_backend;
static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev,
int devid,
publish_pci_dev_cb publish_cb)
{
if (xen_pcibk_backend && xen_pcibk_backend->add)
return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
return -1;
};
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->release(pdev, dev);
};
static inline struct pci_dev *
xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
unsigned int bus, unsigned int devfn)
{
if (xen_pcibk_backend && xen_pcibk_backend->get)
return xen_pcibk_backend->get(pdev, domain, bus, devfn);
return NULL;
};
/**
* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
* before sending aer request to pcifront, so that guest could identify
* device, coopearte with xen_pcibk to finish aer recovery job if device driver
* has the capability
*/
static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct xen_pcibk_device *pdev,
unsigned int *domain,
unsigned int *bus,
unsigned int *devfn)
{
if (xen_pcibk_backend && xen_pcibk_backend->find)
return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
devfn);
return -1;
};
static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->init)
return xen_pcibk_backend->init(pdev);
return -1;
};
static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb cb)
{
if (xen_pcibk_backend && xen_pcibk_backend->publish)
return xen_pcibk_backend->publish(pdev, cb);
return -1;
};
static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
if (xen_pcibk_backend && xen_pcibk_backend->free)
return xen_pcibk_backend->free(pdev);
};
/* Handles events from front-end */
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
void xen_pcibk_do_op(struct work_struct *data);
int xen_pcibk_xenbus_register(void);
void xen_pcibk_xenbus_unregister(void);
extern int verbose_request;
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
#endif
/* Handles shared IRQs that can to device domain and control domain. */
void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);

View File

@ -0,0 +1,384 @@
/*
* PCI Backend Operations - respond to PCI requests from Frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/module.h>
#include <linux/wait.h>
#include <linux/bitops.h>
#include <xen/events.h>
#include <linux/sched.h>
#include "pciback.h"
#define DRV_NAME "xen-pciback"
int verbose_request;
module_param(verbose_request, int, 0644);
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
/* Ensure a device is has the fake IRQ handler "turned on/off" and is
* ready to be exported. This MUST be run after xen_pcibk_reset_device
* which does the actual PCI device enable/disable.
*/
static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
{
struct xen_pcibk_dev_data *dev_data;
int rc;
int enable = 0;
dev_data = pci_get_drvdata(dev);
if (!dev_data)
return;
/* We don't deal with bridges */
if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
return;
if (reset) {
dev_data->enable_intx = 0;
dev_data->ack_intr = 0;
}
enable = dev_data->enable_intx;
/* Asked to disable, but ISR isn't runnig */
if (!enable && !dev_data->isr_on)
return;
/* Squirrel away the IRQs in the dev_data. We need this
* b/c when device transitions to MSI, the dev->irq is
* overwritten with the MSI vector.
*/
if (enable)
dev_data->irq = dev->irq;
/*
* SR-IOV devices in all use MSI-X and have no legacy
* interrupts, so inhibit creating a fake IRQ handler for them.
*/
if (dev_data->irq == 0)
goto out;
dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
dev_data->irq_name,
dev_data->irq,
pci_is_enabled(dev) ? "on" : "off",
dev->msi_enabled ? "MSI" : "",
dev->msix_enabled ? "MSI/X" : "",
dev_data->isr_on ? "enable" : "disable",
enable ? "enable" : "disable");
if (enable) {
rc = request_irq(dev_data->irq,
xen_pcibk_guest_interrupt, IRQF_SHARED,
dev_data->irq_name, dev);
if (rc) {
dev_err(&dev->dev, "%s: failed to install fake IRQ " \
"handler for IRQ %d! (rc:%d)\n",
dev_data->irq_name, dev_data->irq, rc);
goto out;
}
} else {
free_irq(dev_data->irq, dev);
dev_data->irq = 0;
}
dev_data->isr_on = enable;
dev_data->ack_intr = enable;
out:
dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
dev_data->irq_name,
dev_data->irq,
pci_is_enabled(dev) ? "on" : "off",
dev->msi_enabled ? "MSI" : "",
dev->msix_enabled ? "MSI/X" : "",
enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
(dev_data->isr_on ? "failed to disable" : "disabled"));
}
/* Ensure a device is "turned off" and ready to be exported.
* (Also see xen_pcibk_config_reset to ensure virtual configuration space is
* ready to be re-exported)
*/
void xen_pcibk_reset_device(struct pci_dev *dev)
{
u16 cmd;
xen_pcibk_control_isr(dev, 1 /* reset device */);
/* Disable devices (but not bridges) */
if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
#ifdef CONFIG_PCI_MSI
/* The guest could have been abruptly killed without
* disabling MSI/MSI-X interrupts.*/
if (dev->msix_enabled)
pci_disable_msix(dev);
if (dev->msi_enabled)
pci_disable_msi(dev);
#endif
pci_disable_device(dev);
pci_write_config_word(dev, PCI_COMMAND, 0);
dev->is_busmaster = 0;
} else {
pci_read_config_word(dev, PCI_COMMAND, &cmd);
if (cmd & (PCI_COMMAND_INVALIDATE)) {
cmd &= ~(PCI_COMMAND_INVALIDATE);
pci_write_config_word(dev, PCI_COMMAND, cmd);
dev->is_busmaster = 0;
}
}
}
#ifdef CONFIG_PCI_MSI
static
int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
int otherend = pdev->xdev->otherend_id;
int status;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
status = pci_enable_msi(dev);
if (status) {
printk(KERN_ERR "error enable msi for guest %x status %x\n",
otherend, status);
op->value = 0;
return XEN_PCI_ERR_op_failed;
}
/* The value the guest needs is actually the IDT vector, not the
* the local domain's IRQ number. */
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 0;
return 0;
}
static
int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
pci_name(dev));
pci_disable_msi(dev);
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 1;
return 0;
}
static
int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
int i, result;
struct msix_entry *entries;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
pci_name(dev));
if (op->value > SH_INFO_MAX_VEC)
return -EINVAL;
entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
if (entries == NULL)
return -ENOMEM;
for (i = 0; i < op->value; i++) {
entries[i].entry = op->msix_entries[i].entry;
entries[i].vector = op->msix_entries[i].vector;
}
result = pci_enable_msix(dev, entries, op->value);
if (result == 0) {
for (i = 0; i < op->value; i++) {
op->msix_entries[i].entry = entries[i].entry;
if (entries[i].vector)
op->msix_entries[i].vector =
xen_pirq_from_irq(entries[i].vector);
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: " \
"MSI-X[%d]: %d\n",
pci_name(dev), i,
op->msix_entries[i].vector);
}
} else {
printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n",
pci_name(dev), result);
}
kfree(entries);
op->value = result;
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 0;
return result;
}
static
int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
{
struct xen_pcibk_dev_data *dev_data;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
pci_name(dev));
pci_disable_msix(dev);
/*
* SR-IOV devices (which don't have any legacy IRQ) have
* an undefined IRQ value of zero.
*/
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
if (unlikely(verbose_request))
printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
op->value);
dev_data = pci_get_drvdata(dev);
if (dev_data)
dev_data->ack_intr = 1;
return 0;
}
#endif
/*
* Now the same evtchn is used for both pcifront conf_read_write request
* as well as pcie aer front end ack. We use a new work_queue to schedule
* xen_pcibk conf_read_write service for avoiding confict with aer_core
* do_recovery job which also use the system default work_queue
*/
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
{
/* Check that frontend is requesting an operation and that we are not
* already processing a request */
if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
&& !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
queue_work(xen_pcibk_wq, &pdev->op_work);
}
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
wake_up(&xen_pcibk_aer_wait_queue);
}
}
/* Performing the configuration space reads/writes must not be done in atomic
* context because some of the pci_* functions can sleep (mostly due to ACPI
* use of semaphores). This function is intended to be called from a work
* queue in process context taking a struct xen_pcibk_device as a parameter */
void xen_pcibk_do_op(struct work_struct *data)
{
struct xen_pcibk_device *pdev =
container_of(data, struct xen_pcibk_device, op_work);
struct pci_dev *dev;
struct xen_pcibk_dev_data *dev_data = NULL;
struct xen_pci_op *op = &pdev->sh_info->op;
int test_intx = 0;
dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
if (dev == NULL)
op->err = XEN_PCI_ERR_dev_not_found;
else {
dev_data = pci_get_drvdata(dev);
if (dev_data)
test_intx = dev_data->enable_intx;
switch (op->cmd) {
case XEN_PCI_OP_conf_read:
op->err = xen_pcibk_config_read(dev,
op->offset, op->size, &op->value);
break;
case XEN_PCI_OP_conf_write:
op->err = xen_pcibk_config_write(dev,
op->offset, op->size, op->value);
break;
#ifdef CONFIG_PCI_MSI
case XEN_PCI_OP_enable_msi:
op->err = xen_pcibk_enable_msi(pdev, dev, op);
break;
case XEN_PCI_OP_disable_msi:
op->err = xen_pcibk_disable_msi(pdev, dev, op);
break;
case XEN_PCI_OP_enable_msix:
op->err = xen_pcibk_enable_msix(pdev, dev, op);
break;
case XEN_PCI_OP_disable_msix:
op->err = xen_pcibk_disable_msix(pdev, dev, op);
break;
#endif
default:
op->err = XEN_PCI_ERR_not_implemented;
break;
}
}
if (!op->err && dev && dev_data) {
/* Transition detected */
if ((dev_data->enable_intx != test_intx))
xen_pcibk_control_isr(dev, 0 /* no reset */);
}
/* Tell the driver domain that we're done. */
wmb();
clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
notify_remote_via_irq(pdev->evtchn_irq);
/* Mark that we're done. */
smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
clear_bit(_PDEVF_op_active, &pdev->flags);
smp_mb__after_clear_bit(); /* /before/ final check for work */
/* Check to see if the driver domain tried to start another request in
* between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
*/
xen_pcibk_test_and_schedule_op(pdev);
}
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
{
struct xen_pcibk_device *pdev = dev_id;
xen_pcibk_test_and_schedule_op(pdev);
return IRQ_HANDLED;
}
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
{
struct pci_dev *dev = (struct pci_dev *)dev_id;
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
if (dev_data->isr_on && dev_data->ack_intr) {
dev_data->handled++;
if ((dev_data->handled % 1000) == 0) {
if (xen_test_irq_shared(irq)) {
printk(KERN_INFO "%s IRQ line is not shared "
"with other domains. Turning ISR off\n",
dev_data->irq_name);
dev_data->ack_intr = 0;
}
}
return IRQ_HANDLED;
}
return IRQ_NONE;
}

View File

@ -0,0 +1,259 @@
/*
* PCI Backend - Provides a Virtual PCI bus (with real devices)
* to the frontend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include "pciback.h"
#define PCI_SLOT_MAX 32
#define DRV_NAME "xen-pciback"
struct vpci_dev_data {
/* Access to dev_list must be protected by lock */
struct list_head dev_list[PCI_SLOT_MAX];
spinlock_t lock;
};
static inline struct list_head *list_first(struct list_head *head)
{
return head->next;
}
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
unsigned int domain,
unsigned int bus,
unsigned int devfn)
{
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
unsigned long flags;
if (domain != 0 || bus != 0)
return NULL;
if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
spin_lock_irqsave(&vpci_dev->lock, flags);
list_for_each_entry(entry,
&vpci_dev->dev_list[PCI_SLOT(devfn)],
list) {
if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
dev = entry->dev;
break;
}
}
spin_unlock_irqrestore(&vpci_dev->lock, flags);
}
return dev;
}
static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
{
if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
&& l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
return 1;
return 0;
}
static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev, int devid,
publish_pci_dev_cb publish_cb)
{
int err = 0, slot, func = -1;
struct pci_dev_entry *t, *dev_entry;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
unsigned long flags;
if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
err = -EFAULT;
xenbus_dev_fatal(pdev->xdev, err,
"Can't export bridges on the virtual PCI bus");
goto out;
}
dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
if (!dev_entry) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"Error adding entry to virtual PCI bus");
goto out;
}
dev_entry->dev = dev;
spin_lock_irqsave(&vpci_dev->lock, flags);
/* Keep multi-function devices together on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (!list_empty(&vpci_dev->dev_list[slot])) {
t = list_entry(list_first(&vpci_dev->dev_list[slot]),
struct pci_dev_entry, list);
if (match_slot(dev, t->dev)) {
pr_info(DRV_NAME ": vpci: %s: "
"assign to virtual slot %d func %d\n",
pci_name(dev), slot,
PCI_FUNC(dev->devfn));
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
func = PCI_FUNC(dev->devfn);
goto unlock;
}
}
}
/* Assign to a new slot on the virtual PCI bus */
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
if (list_empty(&vpci_dev->dev_list[slot])) {
printk(KERN_INFO DRV_NAME
": vpci: %s: assign to virtual slot %d\n",
pci_name(dev), slot);
list_add_tail(&dev_entry->list,
&vpci_dev->dev_list[slot]);
func = PCI_FUNC(dev->devfn);
goto unlock;
}
}
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"No more space on root virtual PCI bus");
unlock:
spin_unlock_irqrestore(&vpci_dev->lock, flags);
/* Publish this device. */
if (!err)
err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
out:
return err;
}
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
struct pci_dev *dev)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
struct pci_dev *found_dev = NULL;
unsigned long flags;
spin_lock_irqsave(&vpci_dev->lock, flags);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
struct pci_dev_entry *e, *tmp;
list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
list) {
if (e->dev == dev) {
list_del(&e->list);
found_dev = e->dev;
kfree(e);
goto out;
}
}
}
out:
spin_unlock_irqrestore(&vpci_dev->lock, flags);
if (found_dev)
pcistub_put_pci_dev(found_dev);
}
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
{
int slot;
struct vpci_dev_data *vpci_dev;
vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
if (!vpci_dev)
return -ENOMEM;
spin_lock_init(&vpci_dev->lock);
for (slot = 0; slot < PCI_SLOT_MAX; slot++)
INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
pdev->pci_dev_data = vpci_dev;
return 0;
}
static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
publish_pci_root_cb publish_cb)
{
/* The Virtual PCI bus has only one root */
return publish_cb(pdev, 0, 0);
}
static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
{
int slot;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
struct pci_dev_entry *e, *tmp;
list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
list) {
list_del(&e->list);
pcistub_put_pci_dev(e->dev);
kfree(e);
}
}
kfree(vpci_dev);
pdev->pci_dev_data = NULL;
}
static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
struct xen_pcibk_device *pdev,
unsigned int *domain, unsigned int *bus,
unsigned int *devfn)
{
struct pci_dev_entry *entry;
struct pci_dev *dev = NULL;
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
unsigned long flags;
int found = 0, slot;
spin_lock_irqsave(&vpci_dev->lock, flags);
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
list_for_each_entry(entry,
&vpci_dev->dev_list[slot],
list) {
dev = entry->dev;
if (dev && dev->bus->number == pcidev->bus->number
&& pci_domain_nr(dev->bus) ==
pci_domain_nr(pcidev->bus)
&& dev->devfn == pcidev->devfn) {
found = 1;
*domain = 0;
*bus = 0;
*devfn = PCI_DEVFN(slot,
PCI_FUNC(pcidev->devfn));
}
}
}
spin_unlock_irqrestore(&vpci_dev->lock, flags);
return found;
}
struct xen_pcibk_backend xen_pcibk_vpci_backend = {
.name = "vpci",
.init = __xen_pcibk_init_devices,
.free = __xen_pcibk_release_devices,
.find = __xen_pcibk_get_pcifront_dev,
.publish = __xen_pcibk_publish_pci_roots,
.release = __xen_pcibk_release_pci_dev,
.add = __xen_pcibk_add_pci_dev,
.get = __xen_pcibk_get_pci_dev,
};

View File

@ -0,0 +1,749 @@
/*
* PCI Backend Xenbus Setup - handles setup with frontend and xend
*
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <asm/xen/pci.h>
#include <linux/workqueue.h>
#include "pciback.h"
#define DRV_NAME "xen-pciback"
#define INVALID_EVTCHN_IRQ (-1)
struct workqueue_struct *xen_pcibk_wq;
static int __read_mostly passthrough;
module_param(passthrough, bool, S_IRUGO);
MODULE_PARM_DESC(passthrough,
"Option to specify how to export PCI topology to guest:\n"\
" 0 - (default) Hide the true PCI topology and makes the frontend\n"\
" there is a single PCI bus with only the exported devices on it.\n"\
" For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
" while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
" 1 - Passthrough provides a real view of the PCI topology to the\n"\
" frontend (for example, a device at 06:01.b will still appear at\n"\
" 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
" exposed PCI devices to its driver domains. This may be required\n"\
" for drivers which depend on finding their hardward in certain\n"\
" bus/slot locations.");
static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
{
struct xen_pcibk_device *pdev;
pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
if (pdev == NULL)
goto out;
dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
pdev->xdev = xdev;
dev_set_drvdata(&xdev->dev, pdev);
spin_lock_init(&pdev->dev_lock);
pdev->sh_info = NULL;
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
pdev->be_watching = 0;
INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
if (xen_pcibk_init_devices(pdev)) {
kfree(pdev);
pdev = NULL;
}
out:
return pdev;
}
static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
{
spin_lock(&pdev->dev_lock);
/* Ensure the guest can't trigger our handler before removing devices */
if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
unbind_from_irqhandler(pdev->evtchn_irq, pdev);
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
}
spin_unlock(&pdev->dev_lock);
/* If the driver domain started an op, make sure we complete it
* before releasing the shared memory */
/* Note, the workqueue does not use spinlocks at all.*/
flush_workqueue(xen_pcibk_wq);
spin_lock(&pdev->dev_lock);
if (pdev->sh_info != NULL) {
xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
pdev->sh_info = NULL;
}
spin_unlock(&pdev->dev_lock);
}
static void free_pdev(struct xen_pcibk_device *pdev)
{
if (pdev->be_watching) {
unregister_xenbus_watch(&pdev->be_watch);
pdev->be_watching = 0;
}
xen_pcibk_disconnect(pdev);
xen_pcibk_release_devices(pdev);
dev_set_drvdata(&pdev->xdev->dev, NULL);
pdev->xdev = NULL;
kfree(pdev);
}
static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
int remote_evtchn)
{
int err = 0;
void *vaddr;
dev_dbg(&pdev->xdev->dev,
"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
gnt_ref, remote_evtchn);
err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error mapping other domain page in ours.");
goto out;
}
spin_lock(&pdev->dev_lock);
pdev->sh_info = vaddr;
spin_unlock(&pdev->dev_lock);
err = bind_interdomain_evtchn_to_irqhandler(
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
0, DRV_NAME, pdev);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error binding event channel to IRQ");
goto out;
}
spin_lock(&pdev->dev_lock);
pdev->evtchn_irq = err;
spin_unlock(&pdev->dev_lock);
err = 0;
dev_dbg(&pdev->xdev->dev, "Attached!\n");
out:
return err;
}
static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
{
int err = 0;
int gnt_ref, remote_evtchn;
char *magic = NULL;
/* Make sure we only do this setup once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitialised)
goto out;
/* Wait for frontend to state that it has published the configuration */
if (xenbus_read_driver_state(pdev->xdev->otherend) !=
XenbusStateInitialised)
goto out;
dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
"pci-op-ref", "%u", &gnt_ref,
"event-channel", "%u", &remote_evtchn,
"magic", NULL, &magic, NULL);
if (err) {
/* If configuration didn't get read correctly, wait longer */
xenbus_dev_fatal(pdev->xdev, err,
"Error reading configuration from frontend");
goto out;
}
if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
xenbus_dev_fatal(pdev->xdev, -EFAULT,
"version mismatch (%s/%s) with pcifront - "
"halting xen_pcibk",
magic, XEN_PCI_MAGIC);
goto out;
}
err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
if (err)
goto out;
dev_dbg(&pdev->xdev->dev, "Connecting...\n");
err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
if (err)
xenbus_dev_fatal(pdev->xdev, err,
"Error switching to connected state!");
dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
out:
kfree(magic);
return err;
}
static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus,
unsigned int devfn, unsigned int devid)
{
int err;
int len;
char str[64];
len = snprintf(str, sizeof(str), "vdev-%d", devid);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
"%04x:%02x:%02x.%02x", domain, bus,
PCI_SLOT(devfn), PCI_FUNC(devfn));
out:
return err;
}
static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
int domain, int bus, int slot, int func,
int devid)
{
struct pci_dev *dev;
int err = 0;
dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
domain, bus, slot, func);
dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
if (!dev) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Couldn't locate PCI device "
"(%04x:%02x:%02x.%01x)! "
"perhaps already in-use?",
domain, bus, slot, func);
goto out;
}
err = xen_pcibk_add_pci_dev(pdev, dev, devid,
xen_pcibk_publish_pci_dev);
if (err)
goto out;
dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
if (xen_register_device_domain_owner(dev,
pdev->xdev->otherend_id) != 0) {
dev_err(&dev->dev, "device has been assigned to another " \
"domain! Over-writting the ownership, but beware.\n");
xen_unregister_device_domain_owner(dev);
xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
}
/* TODO: It'd be nice to export a bridge and have all of its children
* get exported with it. This may be best done in xend (which will
* have to calculate resource usage anyway) but we probably want to
* put something in here to ensure that if a bridge gets given to a
* driver domain, that all devices under that bridge are not given
* to other driver domains (as he who controls the bridge can disable
* it and stop the other devices from working).
*/
out:
return err;
}
static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
int domain, int bus, int slot, int func)
{
int err = 0;
struct pci_dev *dev;
dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
domain, bus, slot, func);
dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
if (!dev) {
err = -EINVAL;
dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
domain, bus, slot, func);
goto out;
}
dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
xen_unregister_device_domain_owner(dev);
xen_pcibk_release_pci_dev(pdev, dev);
out:
return err;
}
static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
unsigned int domain, unsigned int bus)
{
unsigned int d, b;
int i, root_num, len, err;
char str[64];
dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
"root_num", "%d", &root_num);
if (err == 0 || err == -ENOENT)
root_num = 0;
else if (err < 0)
goto out;
/* Verify that we haven't already published this pci root */
for (i = 0; i < root_num; i++) {
len = snprintf(str, sizeof(str), "root-%d", i);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
str, "%x:%x", &d, &b);
if (err < 0)
goto out;
if (err != 2) {
err = -EINVAL;
goto out;
}
if (d == domain && b == bus) {
err = 0;
goto out;
}
}
len = snprintf(str, sizeof(str), "root-%d", root_num);
if (unlikely(len >= (sizeof(str) - 1))) {
err = -ENOMEM;
goto out;
}
dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
root_num, domain, bus);
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
"%04x:%02x", domain, bus);
if (err)
goto out;
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
"root_num", "%d", (root_num + 1));
out:
return err;
}
static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
{
int err = 0;
int num_devs;
int domain, bus, slot, func;
int substate;
int i, len;
char state_str[64];
char dev_str[64];
dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
/* Make sure we only reconfigure once */
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateReconfiguring)
goto out;
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
&num_devs);
if (err != 1) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of devices");
goto out;
}
for (i = 0; i < num_devs; i++) {
len = snprintf(state_str, sizeof(state_str), "state-%d", i);
if (unlikely(len >= (sizeof(state_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while reading "
"configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
"%d", &substate);
if (err != 1)
substate = XenbusStateUnknown;
switch (substate) {
case XenbusStateInitialising:
dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
if (unlikely(len >= (sizeof(dev_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while "
"reading configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
dev_str, "%x:%x:%x.%x",
&domain, &bus, &slot, &func);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error reading device "
"configuration");
goto out;
}
if (err != 4) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error parsing pci device "
"configuration");
goto out;
}
err = xen_pcibk_export_device(pdev, domain, bus, slot,
func, i);
if (err)
goto out;
/* Publish pci roots. */
err = xen_pcibk_publish_pci_roots(pdev,
xen_pcibk_publish_pci_root);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error while publish PCI root"
"buses for frontend");
goto out;
}
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
state_str, "%d",
XenbusStateInitialised);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error switching substate of "
"dev-%d\n", i);
goto out;
}
break;
case XenbusStateClosing:
dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
if (unlikely(len >= (sizeof(dev_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while "
"reading configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
dev_str, "%x:%x:%x.%x",
&domain, &bus, &slot, &func);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error reading device "
"configuration");
goto out;
}
if (err != 4) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error parsing pci device "
"configuration");
goto out;
}
err = xen_pcibk_remove_device(pdev, domain, bus, slot,
func);
if (err)
goto out;
/* TODO: If at some point we implement support for pci
* root hot-remove on pcifront side, we'll need to
* remove unnecessary xenstore nodes of pci roots here.
*/
break;
default:
break;
}
}
err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error switching to reconfigured state!");
goto out;
}
out:
return 0;
}
static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
enum xenbus_state fe_state)
{
struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
switch (fe_state) {
case XenbusStateInitialised:
xen_pcibk_attach(pdev);
break;
case XenbusStateReconfiguring:
xen_pcibk_reconfigure(pdev);
break;
case XenbusStateConnected:
/* pcifront switched its state from reconfiguring to connected.
* Then switch to connected state.
*/
xenbus_switch_state(xdev, XenbusStateConnected);
break;
case XenbusStateClosing:
xen_pcibk_disconnect(pdev);
xenbus_switch_state(xdev, XenbusStateClosing);
break;
case XenbusStateClosed:
xen_pcibk_disconnect(pdev);
xenbus_switch_state(xdev, XenbusStateClosed);
if (xenbus_dev_is_online(xdev))
break;
/* fall through if not online */
case XenbusStateUnknown:
dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
device_unregister(&xdev->dev);
break;
default:
break;
}
}
static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
{
/* Get configuration from xend (if available now) */
int domain, bus, slot, func;
int err = 0;
int i, num_devs;
char dev_str[64];
char state_str[64];
/* It's possible we could get the call to setup twice, so make sure
* we're not already connected.
*/
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
XenbusStateInitWait)
goto out;
dev_dbg(&pdev->xdev->dev, "getting be setup\n");
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
&num_devs);
if (err != 1) {
if (err >= 0)
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error reading number of devices");
goto out;
}
for (i = 0; i < num_devs; i++) {
int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
if (unlikely(l >= (sizeof(dev_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while reading "
"configuration");
goto out;
}
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
"%x:%x:%x.%x", &domain, &bus, &slot, &func);
if (err < 0) {
xenbus_dev_fatal(pdev->xdev, err,
"Error reading device configuration");
goto out;
}
if (err != 4) {
err = -EINVAL;
xenbus_dev_fatal(pdev->xdev, err,
"Error parsing pci device "
"configuration");
goto out;
}
err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
if (err)
goto out;
/* Switch substate of this device. */
l = snprintf(state_str, sizeof(state_str), "state-%d", i);
if (unlikely(l >= (sizeof(state_str) - 1))) {
err = -ENOMEM;
xenbus_dev_fatal(pdev->xdev, err,
"String overflow while reading "
"configuration");
goto out;
}
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
"%d", XenbusStateInitialised);
if (err) {
xenbus_dev_fatal(pdev->xdev, err, "Error switching "
"substate of dev-%d\n", i);
goto out;
}
}
err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error while publish PCI root buses "
"for frontend");
goto out;
}
err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
if (err)
xenbus_dev_fatal(pdev->xdev, err,
"Error switching to initialised state!");
out:
if (!err)
/* see if pcifront is already configured (if not, we'll wait) */
xen_pcibk_attach(pdev);
return err;
}
static void xen_pcibk_be_watch(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
struct xen_pcibk_device *pdev =
container_of(watch, struct xen_pcibk_device, be_watch);
switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
case XenbusStateInitWait:
xen_pcibk_setup_backend(pdev);
break;
default:
break;
}
}
static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
int err = 0;
struct xen_pcibk_device *pdev = alloc_pdev(dev);
if (pdev == NULL) {
err = -ENOMEM;
xenbus_dev_fatal(dev, err,
"Error allocating xen_pcibk_device struct");
goto out;
}
/* wait for xend to configure us */
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto out;
/* watch the backend node for backend configuration information */
err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
xen_pcibk_be_watch);
if (err)
goto out;
pdev->be_watching = 1;
/* We need to force a call to our callback here in case
* xend already configured us!
*/
xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
out:
return err;
}
static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
{
struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
if (pdev != NULL)
free_pdev(pdev);
return 0;
}
static const struct xenbus_device_id xenpci_ids[] = {
{"pci"},
{""},
};
static struct xenbus_driver xenbus_xen_pcibk_driver = {
.name = DRV_NAME,
.owner = THIS_MODULE,
.ids = xenpci_ids,
.probe = xen_pcibk_xenbus_probe,
.remove = xen_pcibk_xenbus_remove,
.otherend_changed = xen_pcibk_frontend_changed,
};
struct xen_pcibk_backend *xen_pcibk_backend;
int __init xen_pcibk_xenbus_register(void)
{
xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
if (!xen_pcibk_wq) {
printk(KERN_ERR "%s: create"
"xen_pciback_workqueue failed\n", __func__);
return -EFAULT;
}
xen_pcibk_backend = &xen_pcibk_vpci_backend;
if (passthrough)
xen_pcibk_backend = &xen_pcibk_passthrough_backend;
pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
return xenbus_register_backend(&xenbus_xen_pcibk_driver);
}
void __exit xen_pcibk_xenbus_unregister(void)
{
destroy_workqueue(xen_pcibk_wq);
xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
}