mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-25 05:04:09 +08:00
Merge branch 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen
* 'stable/drivers' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen: xen/pciback: Have 'passthrough' option instead of XEN_PCIDEV_BACKEND_PASS and XEN_PCIDEV_BACKEND_VPCI xen/pciback: Remove the DEBUG option. xen/pciback: Drop two backends, squash and cleanup some code. xen/pciback: Print out the MSI/MSI-X (PIRQ) values xen/pciback: Don't setup an fake IRQ handler for SR-IOV devices. xen: rename pciback module to xen-pciback. xen/pciback: Fine-grain the spinlocks and fix BUG: scheduling while atomic cases. xen/pciback: Allocate IRQ handler for device that is shared with guest. xen/pciback: Disable MSI/MSI-X when reseting a device xen/pciback: guest SR-IOV support for PV guest xen/pciback: Register the owner (domain) of the PCI device. xen/pciback: Cleanup the driver based on checkpatch warnings and errors. xen/pciback: xen pci backend driver. xen: tmem: self-ballooning and frontswap-selfshrinking xen: Add module alias to autoload backend drivers xen: Populate xenbus device attributes xen: Add __attribute__((format(printf... where appropriate xen: prepare tmem shim to handle frontswap xen: allow enable use of VGA console on dom0
This commit is contained in:
commit
111ad119d1
@ -18,5 +18,5 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
|
||||
obj-$(CONFIG_SMP) += smp.o
|
||||
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
|
||||
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o
|
||||
|
||||
obj-$(CONFIG_XEN_DOM0) += vga.o
|
||||
obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o
|
||||
|
@ -1248,6 +1248,14 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
if (pci_xen)
|
||||
x86_init.pci.arch_init = pci_xen_init;
|
||||
} else {
|
||||
const struct dom0_vga_console_info *info =
|
||||
(void *)((char *)xen_start_info +
|
||||
xen_start_info->console.dom0.info_off);
|
||||
|
||||
xen_init_vga(info, xen_start_info->console.dom0.info_size);
|
||||
xen_start_info->console.domU.mfn = 0;
|
||||
xen_start_info->console.domU.evtchn = 0;
|
||||
|
||||
/* Make sure ACS will be enabled */
|
||||
pci_request_acs();
|
||||
}
|
||||
|
67
arch/x86/xen/vga.c
Normal file
67
arch/x86/xen/vga.c
Normal file
@ -0,0 +1,67 @@
|
||||
#include <linux/screen_info.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/bootparam.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
#include <xen/interface/xen.h>
|
||||
|
||||
#include "xen-ops.h"
|
||||
|
||||
void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
|
||||
{
|
||||
struct screen_info *screen_info = &boot_params.screen_info;
|
||||
|
||||
/* This is drawn from a dump from vgacon:startup in
|
||||
* standard Linux. */
|
||||
screen_info->orig_video_mode = 3;
|
||||
screen_info->orig_video_isVGA = 1;
|
||||
screen_info->orig_video_lines = 25;
|
||||
screen_info->orig_video_cols = 80;
|
||||
screen_info->orig_video_ega_bx = 3;
|
||||
screen_info->orig_video_points = 16;
|
||||
screen_info->orig_y = screen_info->orig_video_lines - 1;
|
||||
|
||||
switch (info->video_type) {
|
||||
case XEN_VGATYPE_TEXT_MODE_3:
|
||||
if (size < offsetof(struct dom0_vga_console_info, u.text_mode_3)
|
||||
+ sizeof(info->u.text_mode_3))
|
||||
break;
|
||||
screen_info->orig_video_lines = info->u.text_mode_3.rows;
|
||||
screen_info->orig_video_cols = info->u.text_mode_3.columns;
|
||||
screen_info->orig_x = info->u.text_mode_3.cursor_x;
|
||||
screen_info->orig_y = info->u.text_mode_3.cursor_y;
|
||||
screen_info->orig_video_points =
|
||||
info->u.text_mode_3.font_height;
|
||||
break;
|
||||
|
||||
case XEN_VGATYPE_VESA_LFB:
|
||||
if (size < offsetof(struct dom0_vga_console_info,
|
||||
u.vesa_lfb.gbl_caps))
|
||||
break;
|
||||
screen_info->orig_video_isVGA = VIDEO_TYPE_VLFB;
|
||||
screen_info->lfb_width = info->u.vesa_lfb.width;
|
||||
screen_info->lfb_height = info->u.vesa_lfb.height;
|
||||
screen_info->lfb_depth = info->u.vesa_lfb.bits_per_pixel;
|
||||
screen_info->lfb_base = info->u.vesa_lfb.lfb_base;
|
||||
screen_info->lfb_size = info->u.vesa_lfb.lfb_size;
|
||||
screen_info->lfb_linelength = info->u.vesa_lfb.bytes_per_line;
|
||||
screen_info->red_size = info->u.vesa_lfb.red_size;
|
||||
screen_info->red_pos = info->u.vesa_lfb.red_pos;
|
||||
screen_info->green_size = info->u.vesa_lfb.green_size;
|
||||
screen_info->green_pos = info->u.vesa_lfb.green_pos;
|
||||
screen_info->blue_size = info->u.vesa_lfb.blue_size;
|
||||
screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
|
||||
screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
|
||||
screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
|
||||
if (size >= offsetof(struct dom0_vga_console_info,
|
||||
u.vesa_lfb.gbl_caps)
|
||||
+ sizeof(info->u.vesa_lfb.gbl_caps))
|
||||
screen_info->capabilities = info->u.vesa_lfb.gbl_caps;
|
||||
if (size >= offsetof(struct dom0_vga_console_info,
|
||||
u.vesa_lfb.mode_attrs)
|
||||
+ sizeof(info->u.vesa_lfb.mode_attrs))
|
||||
screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs;
|
||||
break;
|
||||
}
|
||||
}
|
@ -88,6 +88,17 @@ static inline void xen_uninit_lock_cpu(int cpu)
|
||||
}
|
||||
#endif
|
||||
|
||||
struct dom0_vga_console_info;
|
||||
|
||||
#ifdef CONFIG_XEN_DOM0
|
||||
void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
|
||||
#else
|
||||
static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
|
||||
size_t size)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Declare an asm function, along with symbols needed to make it
|
||||
inlineable */
|
||||
#define DECL_ASM(ret, name, ...) \
|
||||
|
@ -684,7 +684,7 @@ again:
|
||||
|
||||
err = xenbus_switch_state(dev, XenbusStateConnected);
|
||||
if (err)
|
||||
xenbus_dev_fatal(dev, err, "switching to Connected state",
|
||||
xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
|
||||
dev->nodename);
|
||||
|
||||
return;
|
||||
|
@ -9,6 +9,23 @@ config XEN_BALLOON
|
||||
the system to expand the domain's memory allocation, or alternatively
|
||||
return unneeded memory to the system.
|
||||
|
||||
config XEN_SELFBALLOONING
|
||||
bool "Dynamically self-balloon kernel memory to target"
|
||||
depends on XEN && XEN_BALLOON && CLEANCACHE && SWAP
|
||||
default n
|
||||
help
|
||||
Self-ballooning dynamically balloons available kernel memory driven
|
||||
by the current usage of anonymous memory ("committed AS") and
|
||||
controlled by various sysfs-settable parameters. Configuring
|
||||
FRONTSWAP is highly recommended; if it is not configured, self-
|
||||
ballooning is disabled by default but can be enabled with the
|
||||
'selfballooning' kernel boot parameter. If FRONTSWAP is configured,
|
||||
frontswap-selfshrinking is enabled by default but can be disabled
|
||||
with the 'noselfshrink' kernel boot parameter; and self-ballooning
|
||||
is enabled by default but can be disabled with the 'noselfballooning'
|
||||
kernel boot parameter. Note that systems without a sufficiently
|
||||
large swap device should not enable self-ballooning.
|
||||
|
||||
config XEN_SCRUB_PAGES
|
||||
bool "Scrub pages before returning them to system"
|
||||
depends on XEN_BALLOON
|
||||
@ -105,4 +122,33 @@ config SWIOTLB_XEN
|
||||
depends on PCI
|
||||
select SWIOTLB
|
||||
|
||||
config XEN_TMEM
|
||||
bool
|
||||
default y if (CLEANCACHE || FRONTSWAP)
|
||||
help
|
||||
Shim to interface in-kernel Transcendent Memory hooks
|
||||
(e.g. cleancache and frontswap) to Xen tmem hypercalls.
|
||||
|
||||
config XEN_PCIDEV_BACKEND
|
||||
tristate "Xen PCI-device backend driver"
|
||||
depends on PCI && X86 && XEN
|
||||
depends on XEN_BACKEND
|
||||
default m
|
||||
help
|
||||
The PCI device backend driver allows the kernel to export arbitrary
|
||||
PCI devices to other guests. If you select this to be a module, you
|
||||
will need to make sure no other driver has bound to the device(s)
|
||||
you want to make visible to other guests.
|
||||
|
||||
The parameter "passthrough" allows you specify how you want the PCI
|
||||
devices to appear in the guest. You can choose the default (0) where
|
||||
PCI topology starts at 00.00.0, or (1) for passthrough if you want
|
||||
the PCI devices topology appear the same as in the host.
|
||||
|
||||
The "hide" parameter (only applicable if backend driver is compiled
|
||||
into the kernel) allows you to bind the PCI devices to this module
|
||||
from the default device drivers. The argument is the list of PCI BDFs:
|
||||
xen-pciback.hide=(03:00.0)(04:00.0)
|
||||
|
||||
If in doubt, say m.
|
||||
endmenu
|
||||
|
@ -1,6 +1,5 @@
|
||||
obj-y += grant-table.o features.o events.o manage.o balloon.o
|
||||
obj-y += xenbus/
|
||||
obj-y += tmem.o
|
||||
|
||||
nostackp := $(call cc-option, -fno-stack-protector)
|
||||
CFLAGS_features.o := $(nostackp)
|
||||
@ -9,14 +8,17 @@ obj-$(CONFIG_BLOCK) += biomerge.o
|
||||
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
|
||||
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
|
||||
obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o
|
||||
obj-$(CONFIG_XEN_SELFBALLOONING) += xen-selfballoon.o
|
||||
obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
|
||||
obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
|
||||
obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
|
||||
obj-$(CONFIG_XENFS) += xenfs/
|
||||
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
|
||||
obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
|
||||
obj-$(CONFIG_XEN_TMEM) += tmem.o
|
||||
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
|
||||
obj-$(CONFIG_XEN_DOM0) += pci.o
|
||||
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
|
||||
|
||||
xen-evtchn-y := evtchn.o
|
||||
xen-gntdev-y := gntdev.o
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Xen implementation for transcendent memory (tmem)
|
||||
*
|
||||
* Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
|
||||
* Copyright (C) 2009-2011 Oracle Corp. All rights reserved.
|
||||
* Author: Dan Magenheimer
|
||||
*/
|
||||
|
||||
@ -9,8 +9,14 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cleancache.h>
|
||||
|
||||
/* temporary ifdef until include/linux/frontswap.h is upstream */
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
#include <linux/frontswap.h>
|
||||
#endif
|
||||
|
||||
#include <xen/xen.h>
|
||||
#include <xen/interface/xen.h>
|
||||
#include <asm/xen/hypercall.h>
|
||||
@ -122,14 +128,8 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
|
||||
return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
static int xen_tmem_destroy_pool(u32 pool_id)
|
||||
{
|
||||
struct tmem_oid oid = { { 0 } };
|
||||
|
||||
return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
int tmem_enabled;
|
||||
int tmem_enabled __read_mostly;
|
||||
EXPORT_SYMBOL(tmem_enabled);
|
||||
|
||||
static int __init enable_tmem(char *s)
|
||||
{
|
||||
@ -139,6 +139,14 @@ static int __init enable_tmem(char *s)
|
||||
|
||||
__setup("tmem", enable_tmem);
|
||||
|
||||
#ifdef CONFIG_CLEANCACHE
|
||||
static int xen_tmem_destroy_pool(u32 pool_id)
|
||||
{
|
||||
struct tmem_oid oid = { { 0 } };
|
||||
|
||||
return xen_tmem_op(TMEM_DESTROY_POOL, pool_id, oid, 0, 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
/* cleancache ops */
|
||||
|
||||
static void tmem_cleancache_put_page(int pool, struct cleancache_filekey key,
|
||||
@ -240,18 +248,156 @@ static struct cleancache_ops tmem_cleancache_ops = {
|
||||
.init_shared_fs = tmem_cleancache_init_shared_fs,
|
||||
.init_fs = tmem_cleancache_init_fs
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
/* frontswap tmem operations */
|
||||
|
||||
/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
|
||||
static int tmem_frontswap_poolid;
|
||||
|
||||
/*
|
||||
* Swizzling increases objects per swaptype, increasing tmem concurrency
|
||||
* for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
|
||||
*/
|
||||
#define SWIZ_BITS 4
|
||||
#define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
|
||||
#define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
|
||||
#define iswiz(_ind) (_ind >> SWIZ_BITS)
|
||||
|
||||
static inline struct tmem_oid oswiz(unsigned type, u32 ind)
|
||||
{
|
||||
struct tmem_oid oid = { .oid = { 0 } };
|
||||
oid.oid[0] = _oswiz(type, ind);
|
||||
return oid;
|
||||
}
|
||||
|
||||
/* returns 0 if the page was successfully put into frontswap, -1 if not */
|
||||
static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
|
||||
struct page *page)
|
||||
{
|
||||
u64 ind64 = (u64)offset;
|
||||
u32 ind = (u32)offset;
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
int pool = tmem_frontswap_poolid;
|
||||
int ret;
|
||||
|
||||
if (pool < 0)
|
||||
return -1;
|
||||
if (ind64 != ind)
|
||||
return -1;
|
||||
mb(); /* ensure page is quiescent; tmem may address it with an alias */
|
||||
ret = xen_tmem_put_page(pool, oswiz(type, ind), iswiz(ind), pfn);
|
||||
/* translate Xen tmem return values to linux semantics */
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns 0 if the page was successfully gotten from frontswap, -1 if
|
||||
* was not present (should never happen!)
|
||||
*/
|
||||
static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
|
||||
struct page *page)
|
||||
{
|
||||
u64 ind64 = (u64)offset;
|
||||
u32 ind = (u32)offset;
|
||||
unsigned long pfn = page_to_pfn(page);
|
||||
int pool = tmem_frontswap_poolid;
|
||||
int ret;
|
||||
|
||||
if (pool < 0)
|
||||
return -1;
|
||||
if (ind64 != ind)
|
||||
return -1;
|
||||
ret = xen_tmem_get_page(pool, oswiz(type, ind), iswiz(ind), pfn);
|
||||
/* translate Xen tmem return values to linux semantics */
|
||||
if (ret == 1)
|
||||
return 0;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* flush a single page from frontswap */
|
||||
static void tmem_frontswap_flush_page(unsigned type, pgoff_t offset)
|
||||
{
|
||||
u64 ind64 = (u64)offset;
|
||||
u32 ind = (u32)offset;
|
||||
int pool = tmem_frontswap_poolid;
|
||||
|
||||
if (pool < 0)
|
||||
return;
|
||||
if (ind64 != ind)
|
||||
return;
|
||||
(void) xen_tmem_flush_page(pool, oswiz(type, ind), iswiz(ind));
|
||||
}
|
||||
|
||||
/* flush all pages from the passed swaptype */
|
||||
static void tmem_frontswap_flush_area(unsigned type)
|
||||
{
|
||||
int pool = tmem_frontswap_poolid;
|
||||
int ind;
|
||||
|
||||
if (pool < 0)
|
||||
return;
|
||||
for (ind = SWIZ_MASK; ind >= 0; ind--)
|
||||
(void)xen_tmem_flush_object(pool, oswiz(type, ind));
|
||||
}
|
||||
|
||||
static void tmem_frontswap_init(unsigned ignored)
|
||||
{
|
||||
struct tmem_pool_uuid private = TMEM_POOL_PRIVATE_UUID;
|
||||
|
||||
/* a single tmem poolid is used for all frontswap "types" (swapfiles) */
|
||||
if (tmem_frontswap_poolid < 0)
|
||||
tmem_frontswap_poolid =
|
||||
xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
|
||||
}
|
||||
|
||||
static int __initdata use_frontswap = 1;
|
||||
|
||||
static int __init no_frontswap(char *s)
|
||||
{
|
||||
use_frontswap = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("nofrontswap", no_frontswap);
|
||||
|
||||
static struct frontswap_ops tmem_frontswap_ops = {
|
||||
.put_page = tmem_frontswap_put_page,
|
||||
.get_page = tmem_frontswap_get_page,
|
||||
.flush_page = tmem_frontswap_flush_page,
|
||||
.flush_area = tmem_frontswap_flush_area,
|
||||
.init = tmem_frontswap_init
|
||||
};
|
||||
#endif
|
||||
|
||||
static int __init xen_tmem_init(void)
|
||||
{
|
||||
struct cleancache_ops old_ops;
|
||||
|
||||
if (!xen_domain())
|
||||
return 0;
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
if (tmem_enabled && use_frontswap) {
|
||||
char *s = "";
|
||||
struct frontswap_ops old_ops =
|
||||
frontswap_register_ops(&tmem_frontswap_ops);
|
||||
|
||||
tmem_frontswap_poolid = -1;
|
||||
if (old_ops.init != NULL)
|
||||
s = " (WARNING: frontswap_ops overridden)";
|
||||
printk(KERN_INFO "frontswap enabled, RAM provided by "
|
||||
"Xen Transcendent Memory\n");
|
||||
}
|
||||
#endif
|
||||
#ifdef CONFIG_CLEANCACHE
|
||||
BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
|
||||
if (tmem_enabled && use_cleancache) {
|
||||
char *s = "";
|
||||
old_ops = cleancache_register_ops(&tmem_cleancache_ops);
|
||||
struct cleancache_ops old_ops =
|
||||
cleancache_register_ops(&tmem_cleancache_ops);
|
||||
if (old_ops.init_fs != NULL)
|
||||
s = " (WARNING: cleancache_ops overridden)";
|
||||
printk(KERN_INFO "cleancache enabled, RAM provided by "
|
||||
|
@ -98,6 +98,8 @@ static int __init balloon_init(void)
|
||||
|
||||
register_balloon(&balloon_sysdev);
|
||||
|
||||
register_xen_selfballooning(&balloon_sysdev);
|
||||
|
||||
target_watch.callback = watch_target;
|
||||
xenstore_notifier.notifier_call = balloon_init_watcher;
|
||||
|
||||
|
7
drivers/xen/xen-pciback/Makefile
Normal file
7
drivers/xen/xen-pciback/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback.o
|
||||
|
||||
xen-pciback-y := pci_stub.o pciback_ops.o xenbus.o
|
||||
xen-pciback-y += conf_space.o conf_space_header.o \
|
||||
conf_space_capability.o \
|
||||
conf_space_quirks.o vpci.o \
|
||||
passthrough.o
|
438
drivers/xen/xen-pciback/conf_space.c
Normal file
438
drivers/xen/xen-pciback/conf_space.c
Normal file
@ -0,0 +1,438 @@
|
||||
/*
|
||||
* PCI Backend - Functions for creating a virtual configuration space for
|
||||
* exported PCI Devices.
|
||||
* It's dangerous to allow PCI Driver Domains to change their
|
||||
* device's resources (memory, i/o ports, interrupts). We need to
|
||||
* restrict changes to certain PCI Configuration registers:
|
||||
* BARs, INTERRUPT_PIN, most registers in the header...
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include "pciback.h"
|
||||
#include "conf_space.h"
|
||||
#include "conf_space_quirks.h"
|
||||
|
||||
#define DRV_NAME "xen-pciback"
|
||||
static int permissive;
|
||||
module_param(permissive, bool, 0644);
|
||||
|
||||
/* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
|
||||
* xen_pcibk_write_config_word, and xen_pcibk_write_config_byte are created. */
|
||||
#define DEFINE_PCI_CONFIG(op, size, type) \
|
||||
int xen_pcibk_##op##_config_##size \
|
||||
(struct pci_dev *dev, int offset, type value, void *data) \
|
||||
{ \
|
||||
return pci_##op##_config_##size(dev, offset, value); \
|
||||
}
|
||||
|
||||
DEFINE_PCI_CONFIG(read, byte, u8 *)
|
||||
DEFINE_PCI_CONFIG(read, word, u16 *)
|
||||
DEFINE_PCI_CONFIG(read, dword, u32 *)
|
||||
|
||||
DEFINE_PCI_CONFIG(write, byte, u8)
|
||||
DEFINE_PCI_CONFIG(write, word, u16)
|
||||
DEFINE_PCI_CONFIG(write, dword, u32)
|
||||
|
||||
static int conf_space_read(struct pci_dev *dev,
|
||||
const struct config_field_entry *entry,
|
||||
int offset, u32 *value)
|
||||
{
|
||||
int ret = 0;
|
||||
const struct config_field *field = entry->field;
|
||||
|
||||
*value = 0;
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
if (field->u.b.read)
|
||||
ret = field->u.b.read(dev, offset, (u8 *) value,
|
||||
entry->data);
|
||||
break;
|
||||
case 2:
|
||||
if (field->u.w.read)
|
||||
ret = field->u.w.read(dev, offset, (u16 *) value,
|
||||
entry->data);
|
||||
break;
|
||||
case 4:
|
||||
if (field->u.dw.read)
|
||||
ret = field->u.dw.read(dev, offset, value, entry->data);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int conf_space_write(struct pci_dev *dev,
|
||||
const struct config_field_entry *entry,
|
||||
int offset, u32 value)
|
||||
{
|
||||
int ret = 0;
|
||||
const struct config_field *field = entry->field;
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
if (field->u.b.write)
|
||||
ret = field->u.b.write(dev, offset, (u8) value,
|
||||
entry->data);
|
||||
break;
|
||||
case 2:
|
||||
if (field->u.w.write)
|
||||
ret = field->u.w.write(dev, offset, (u16) value,
|
||||
entry->data);
|
||||
break;
|
||||
case 4:
|
||||
if (field->u.dw.write)
|
||||
ret = field->u.dw.write(dev, offset, value,
|
||||
entry->data);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32 get_mask(int size)
|
||||
{
|
||||
if (size == 1)
|
||||
return 0xff;
|
||||
else if (size == 2)
|
||||
return 0xffff;
|
||||
else
|
||||
return 0xffffffff;
|
||||
}
|
||||
|
||||
static inline int valid_request(int offset, int size)
|
||||
{
|
||||
/* Validate request (no un-aligned requests) */
|
||||
if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
|
||||
int offset)
|
||||
{
|
||||
if (offset >= 0) {
|
||||
new_val_mask <<= (offset * 8);
|
||||
new_val <<= (offset * 8);
|
||||
} else {
|
||||
new_val_mask >>= (offset * -8);
|
||||
new_val >>= (offset * -8);
|
||||
}
|
||||
val = (val & ~new_val_mask) | (new_val & new_val_mask);
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static int pcibios_err_to_errno(int err)
|
||||
{
|
||||
switch (err) {
|
||||
case PCIBIOS_SUCCESSFUL:
|
||||
return XEN_PCI_ERR_success;
|
||||
case PCIBIOS_DEVICE_NOT_FOUND:
|
||||
return XEN_PCI_ERR_dev_not_found;
|
||||
case PCIBIOS_BAD_REGISTER_NUMBER:
|
||||
return XEN_PCI_ERR_invalid_offset;
|
||||
case PCIBIOS_FUNC_NOT_SUPPORTED:
|
||||
return XEN_PCI_ERR_not_implemented;
|
||||
case PCIBIOS_SET_FAILED:
|
||||
return XEN_PCI_ERR_access_denied;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
|
||||
u32 *ret_val)
|
||||
{
|
||||
int err = 0;
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
const struct config_field_entry *cfg_entry;
|
||||
const struct config_field *field;
|
||||
int req_start, req_end, field_start, field_end;
|
||||
/* if read fails for any reason, return 0
|
||||
* (as if device didn't respond) */
|
||||
u32 value = 0, tmp_val;
|
||||
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x\n",
|
||||
pci_name(dev), size, offset);
|
||||
|
||||
if (!valid_request(offset, size)) {
|
||||
err = XEN_PCI_ERR_invalid_offset;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Get the real value first, then modify as appropriate */
|
||||
switch (size) {
|
||||
case 1:
|
||||
err = pci_read_config_byte(dev, offset, (u8 *) &value);
|
||||
break;
|
||||
case 2:
|
||||
err = pci_read_config_word(dev, offset, (u16 *) &value);
|
||||
break;
|
||||
case 4:
|
||||
err = pci_read_config_dword(dev, offset, &value);
|
||||
break;
|
||||
}
|
||||
|
||||
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
||||
field = cfg_entry->field;
|
||||
|
||||
req_start = offset;
|
||||
req_end = offset + size;
|
||||
field_start = OFFSET(cfg_entry);
|
||||
field_end = OFFSET(cfg_entry) + field->size;
|
||||
|
||||
if ((req_start >= field_start && req_start < field_end)
|
||||
|| (req_end > field_start && req_end <= field_end)) {
|
||||
err = conf_space_read(dev, cfg_entry, field_start,
|
||||
&tmp_val);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
value = merge_value(value, tmp_val,
|
||||
get_mask(field->size),
|
||||
field_start - req_start);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: read %d bytes at 0x%x = %x\n",
|
||||
pci_name(dev), size, offset, value);
|
||||
|
||||
*ret_val = value;
|
||||
return pcibios_err_to_errno(err);
|
||||
}
|
||||
|
||||
int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
|
||||
{
|
||||
int err = 0, handled = 0;
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
const struct config_field_entry *cfg_entry;
|
||||
const struct config_field *field;
|
||||
u32 tmp_val;
|
||||
int req_start, req_end, field_start, field_end;
|
||||
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG
|
||||
DRV_NAME ": %s: write request %d bytes at 0x%x = %x\n",
|
||||
pci_name(dev), size, offset, value);
|
||||
|
||||
if (!valid_request(offset, size))
|
||||
return XEN_PCI_ERR_invalid_offset;
|
||||
|
||||
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
||||
field = cfg_entry->field;
|
||||
|
||||
req_start = offset;
|
||||
req_end = offset + size;
|
||||
field_start = OFFSET(cfg_entry);
|
||||
field_end = OFFSET(cfg_entry) + field->size;
|
||||
|
||||
if ((req_start >= field_start && req_start < field_end)
|
||||
|| (req_end > field_start && req_end <= field_end)) {
|
||||
tmp_val = 0;
|
||||
|
||||
err = xen_pcibk_config_read(dev, field_start,
|
||||
field->size, &tmp_val);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
tmp_val = merge_value(tmp_val, value, get_mask(size),
|
||||
req_start - field_start);
|
||||
|
||||
err = conf_space_write(dev, cfg_entry, field_start,
|
||||
tmp_val);
|
||||
|
||||
/* handled is set true here, but not every byte
|
||||
* may have been written! Properly detecting if
|
||||
* every byte is handled is unnecessary as the
|
||||
* flag is used to detect devices that need
|
||||
* special helpers to work correctly.
|
||||
*/
|
||||
handled = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!handled && !err) {
|
||||
/* By default, anything not specificially handled above is
|
||||
* read-only. The permissive flag changes this behavior so
|
||||
* that anything not specifically handled above is writable.
|
||||
* This means that some fields may still be read-only because
|
||||
* they have entries in the config_field list that intercept
|
||||
* the write and do nothing. */
|
||||
if (dev_data->permissive || permissive) {
|
||||
switch (size) {
|
||||
case 1:
|
||||
err = pci_write_config_byte(dev, offset,
|
||||
(u8) value);
|
||||
break;
|
||||
case 2:
|
||||
err = pci_write_config_word(dev, offset,
|
||||
(u16) value);
|
||||
break;
|
||||
case 4:
|
||||
err = pci_write_config_dword(dev, offset,
|
||||
(u32) value);
|
||||
break;
|
||||
}
|
||||
} else if (!dev_data->warned_on_write) {
|
||||
dev_data->warned_on_write = 1;
|
||||
dev_warn(&dev->dev, "Driver tried to write to a "
|
||||
"read-only configuration space field at offset"
|
||||
" 0x%x, size %d. This may be harmless, but if "
|
||||
"you have problems with your device:\n"
|
||||
"1) see permissive attribute in sysfs\n"
|
||||
"2) report problems to the xen-devel "
|
||||
"mailing list along with details of your "
|
||||
"device obtained from lspci.\n", offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
return pcibios_err_to_errno(err);
|
||||
}
|
||||
|
||||
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
struct config_field_entry *cfg_entry, *t;
|
||||
const struct config_field *field;
|
||||
|
||||
dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
|
||||
"configuration space fields\n");
|
||||
if (!dev_data)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
|
||||
field = cfg_entry->field;
|
||||
|
||||
if (field->clean) {
|
||||
field->clean((struct config_field *)field);
|
||||
|
||||
kfree(cfg_entry->data);
|
||||
|
||||
list_del(&cfg_entry->list);
|
||||
kfree(cfg_entry);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void xen_pcibk_config_reset_dev(struct pci_dev *dev)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
const struct config_field_entry *cfg_entry;
|
||||
const struct config_field *field;
|
||||
|
||||
dev_dbg(&dev->dev, "resetting virtual configuration space\n");
|
||||
if (!dev_data)
|
||||
return;
|
||||
|
||||
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
||||
field = cfg_entry->field;
|
||||
|
||||
if (field->reset)
|
||||
field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
|
||||
}
|
||||
}
|
||||
|
||||
void xen_pcibk_config_free_dev(struct pci_dev *dev)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
struct config_field_entry *cfg_entry, *t;
|
||||
const struct config_field *field;
|
||||
|
||||
dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
|
||||
if (!dev_data)
|
||||
return;
|
||||
|
||||
list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
|
||||
list_del(&cfg_entry->list);
|
||||
|
||||
field = cfg_entry->field;
|
||||
|
||||
if (field->release)
|
||||
field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
|
||||
|
||||
kfree(cfg_entry);
|
||||
}
|
||||
}
|
||||
|
||||
int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
|
||||
const struct config_field *field,
|
||||
unsigned int base_offset)
|
||||
{
|
||||
int err = 0;
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
struct config_field_entry *cfg_entry;
|
||||
void *tmp;
|
||||
|
||||
cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
|
||||
if (!cfg_entry) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
cfg_entry->data = NULL;
|
||||
cfg_entry->field = field;
|
||||
cfg_entry->base_offset = base_offset;
|
||||
|
||||
/* silently ignore duplicate fields */
|
||||
err = xen_pcibk_field_is_dup(dev, OFFSET(cfg_entry));
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (field->init) {
|
||||
tmp = field->init(dev, OFFSET(cfg_entry));
|
||||
|
||||
if (IS_ERR(tmp)) {
|
||||
err = PTR_ERR(tmp);
|
||||
goto out;
|
||||
}
|
||||
|
||||
cfg_entry->data = tmp;
|
||||
}
|
||||
|
||||
dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
|
||||
OFFSET(cfg_entry));
|
||||
list_add_tail(&cfg_entry->list, &dev_data->config_fields);
|
||||
|
||||
out:
|
||||
if (err)
|
||||
kfree(cfg_entry);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
/* This sets up the device's virtual configuration space to keep track of
|
||||
* certain registers (like the base address registers (BARs) so that we can
|
||||
* keep the client from manipulating them directly.
|
||||
*/
|
||||
int xen_pcibk_config_init_dev(struct pci_dev *dev)
|
||||
{
|
||||
int err = 0;
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
|
||||
dev_dbg(&dev->dev, "initializing virtual configuration space\n");
|
||||
|
||||
INIT_LIST_HEAD(&dev_data->config_fields);
|
||||
|
||||
err = xen_pcibk_config_header_add_fields(dev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = xen_pcibk_config_capability_add_fields(dev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = xen_pcibk_config_quirks_init(dev);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
int xen_pcibk_config_init(void)
|
||||
{
|
||||
return xen_pcibk_config_capability_init();
|
||||
}
|
126
drivers/xen/xen-pciback/conf_space.h
Normal file
126
drivers/xen/xen-pciback/conf_space.h
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* PCI Backend - Common data structures for overriding the configuration space
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PCIBACK_CONF_SPACE_H__
|
||||
#define __XEN_PCIBACK_CONF_SPACE_H__
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
/* conf_field_init can return an errno in a ptr with ERR_PTR() */
|
||||
typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
|
||||
typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
|
||||
typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
|
||||
|
||||
typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
|
||||
void *data);
|
||||
typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
|
||||
void *data);
|
||||
typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
|
||||
void *data);
|
||||
typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
|
||||
void *data);
|
||||
typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
|
||||
void *data);
|
||||
typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
|
||||
void *data);
|
||||
|
||||
/* These are the fields within the configuration space which we
|
||||
* are interested in intercepting reads/writes to and changing their
|
||||
* values.
|
||||
*/
|
||||
struct config_field {
|
||||
unsigned int offset;
|
||||
unsigned int size;
|
||||
unsigned int mask;
|
||||
conf_field_init init;
|
||||
conf_field_reset reset;
|
||||
conf_field_free release;
|
||||
void (*clean) (struct config_field *field);
|
||||
union {
|
||||
struct {
|
||||
conf_dword_write write;
|
||||
conf_dword_read read;
|
||||
} dw;
|
||||
struct {
|
||||
conf_word_write write;
|
||||
conf_word_read read;
|
||||
} w;
|
||||
struct {
|
||||
conf_byte_write write;
|
||||
conf_byte_read read;
|
||||
} b;
|
||||
} u;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct config_field_entry {
|
||||
struct list_head list;
|
||||
const struct config_field *field;
|
||||
unsigned int base_offset;
|
||||
void *data;
|
||||
};
|
||||
|
||||
#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
|
||||
|
||||
/* Add fields to a device - the add_fields macro expects to get a pointer to
|
||||
* the first entry in an array (of which the ending is marked by size==0)
|
||||
*/
|
||||
int xen_pcibk_config_add_field_offset(struct pci_dev *dev,
|
||||
const struct config_field *field,
|
||||
unsigned int offset);
|
||||
|
||||
static inline int xen_pcibk_config_add_field(struct pci_dev *dev,
|
||||
const struct config_field *field)
|
||||
{
|
||||
return xen_pcibk_config_add_field_offset(dev, field, 0);
|
||||
}
|
||||
|
||||
static inline int xen_pcibk_config_add_fields(struct pci_dev *dev,
|
||||
const struct config_field *field)
|
||||
{
|
||||
int i, err = 0;
|
||||
for (i = 0; field[i].size != 0; i++) {
|
||||
err = xen_pcibk_config_add_field(dev, &field[i]);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int xen_pcibk_config_add_fields_offset(struct pci_dev *dev,
|
||||
const struct config_field *field,
|
||||
unsigned int offset)
|
||||
{
|
||||
int i, err = 0;
|
||||
for (i = 0; field[i].size != 0; i++) {
|
||||
err = xen_pcibk_config_add_field_offset(dev, &field[i], offset);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Read/Write the real configuration space */
|
||||
int xen_pcibk_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
|
||||
void *data);
|
||||
int xen_pcibk_read_config_word(struct pci_dev *dev, int offset, u16 *value,
|
||||
void *data);
|
||||
int xen_pcibk_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
|
||||
void *data);
|
||||
int xen_pcibk_write_config_byte(struct pci_dev *dev, int offset, u8 value,
|
||||
void *data);
|
||||
int xen_pcibk_write_config_word(struct pci_dev *dev, int offset, u16 value,
|
||||
void *data);
|
||||
int xen_pcibk_write_config_dword(struct pci_dev *dev, int offset, u32 value,
|
||||
void *data);
|
||||
|
||||
int xen_pcibk_config_capability_init(void);
|
||||
|
||||
int xen_pcibk_config_header_add_fields(struct pci_dev *dev);
|
||||
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev);
|
||||
|
||||
#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
|
207
drivers/xen/xen-pciback/conf_space_capability.c
Normal file
207
drivers/xen/xen-pciback/conf_space_capability.c
Normal file
@ -0,0 +1,207 @@
|
||||
/*
|
||||
* PCI Backend - Handles the virtual fields found on the capability lists
|
||||
* in the configuration space.
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include "pciback.h"
|
||||
#include "conf_space.h"
|
||||
|
||||
static LIST_HEAD(capabilities);
|
||||
struct xen_pcibk_config_capability {
|
||||
struct list_head cap_list;
|
||||
|
||||
int capability;
|
||||
|
||||
/* If the device has the capability found above, add these fields */
|
||||
const struct config_field *fields;
|
||||
};
|
||||
|
||||
static const struct config_field caplist_header[] = {
|
||||
{
|
||||
.offset = PCI_CAP_LIST_ID,
|
||||
.size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
|
||||
.u.w.read = xen_pcibk_read_config_word,
|
||||
.u.w.write = NULL,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static inline void register_capability(struct xen_pcibk_config_capability *cap)
|
||||
{
|
||||
list_add_tail(&cap->cap_list, &capabilities);
|
||||
}
|
||||
|
||||
int xen_pcibk_config_capability_add_fields(struct pci_dev *dev)
|
||||
{
|
||||
int err = 0;
|
||||
struct xen_pcibk_config_capability *cap;
|
||||
int cap_offset;
|
||||
|
||||
list_for_each_entry(cap, &capabilities, cap_list) {
|
||||
cap_offset = pci_find_capability(dev, cap->capability);
|
||||
if (cap_offset) {
|
||||
dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
|
||||
cap->capability, cap_offset);
|
||||
|
||||
err = xen_pcibk_config_add_fields_offset(dev,
|
||||
caplist_header,
|
||||
cap_offset);
|
||||
if (err)
|
||||
goto out;
|
||||
err = xen_pcibk_config_add_fields_offset(dev,
|
||||
cap->fields,
|
||||
cap_offset);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int vpd_address_write(struct pci_dev *dev, int offset, u16 value,
|
||||
void *data)
|
||||
{
|
||||
/* Disallow writes to the vital product data */
|
||||
if (value & PCI_VPD_ADDR_F)
|
||||
return PCIBIOS_SET_FAILED;
|
||||
else
|
||||
return pci_write_config_word(dev, offset, value);
|
||||
}
|
||||
|
||||
static const struct config_field caplist_vpd[] = {
|
||||
{
|
||||
.offset = PCI_VPD_ADDR,
|
||||
.size = 2,
|
||||
.u.w.read = xen_pcibk_read_config_word,
|
||||
.u.w.write = vpd_address_write,
|
||||
},
|
||||
{
|
||||
.offset = PCI_VPD_DATA,
|
||||
.size = 4,
|
||||
.u.dw.read = xen_pcibk_read_config_dword,
|
||||
.u.dw.write = NULL,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
|
||||
void *data)
|
||||
{
|
||||
int err;
|
||||
u16 real_value;
|
||||
|
||||
err = pci_read_config_word(dev, offset, &real_value);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
*value = real_value & ~PCI_PM_CAP_PME_MASK;
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
|
||||
* Can't allow driver domain to enable PMEs - they're shared */
|
||||
#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
|
||||
|
||||
static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
|
||||
void *data)
|
||||
{
|
||||
int err;
|
||||
u16 old_value;
|
||||
pci_power_t new_state, old_state;
|
||||
|
||||
err = pci_read_config_word(dev, offset, &old_value);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
|
||||
new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
|
||||
|
||||
new_value &= PM_OK_BITS;
|
||||
if ((old_value & PM_OK_BITS) != new_value) {
|
||||
new_value = (old_value & ~PM_OK_BITS) | new_value;
|
||||
err = pci_write_config_word(dev, offset, new_value);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Let pci core handle the power management change */
|
||||
dev_dbg(&dev->dev, "set power state to %x\n", new_state);
|
||||
err = pci_set_power_state(dev, new_state);
|
||||
if (err) {
|
||||
err = PCIBIOS_SET_FAILED;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Ensure PMEs are disabled */
|
||||
static void *pm_ctrl_init(struct pci_dev *dev, int offset)
|
||||
{
|
||||
int err;
|
||||
u16 value;
|
||||
|
||||
err = pci_read_config_word(dev, offset, &value);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (value & PCI_PM_CTRL_PME_ENABLE) {
|
||||
value &= ~PCI_PM_CTRL_PME_ENABLE;
|
||||
err = pci_write_config_word(dev, offset, value);
|
||||
}
|
||||
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static const struct config_field caplist_pm[] = {
|
||||
{
|
||||
.offset = PCI_PM_PMC,
|
||||
.size = 2,
|
||||
.u.w.read = pm_caps_read,
|
||||
},
|
||||
{
|
||||
.offset = PCI_PM_CTRL,
|
||||
.size = 2,
|
||||
.init = pm_ctrl_init,
|
||||
.u.w.read = xen_pcibk_read_config_word,
|
||||
.u.w.write = pm_ctrl_write,
|
||||
},
|
||||
{
|
||||
.offset = PCI_PM_PPB_EXTENSIONS,
|
||||
.size = 1,
|
||||
.u.b.read = xen_pcibk_read_config_byte,
|
||||
},
|
||||
{
|
||||
.offset = PCI_PM_DATA_REGISTER,
|
||||
.size = 1,
|
||||
.u.b.read = xen_pcibk_read_config_byte,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct xen_pcibk_config_capability xen_pcibk_config_capability_pm = {
|
||||
.capability = PCI_CAP_ID_PM,
|
||||
.fields = caplist_pm,
|
||||
};
|
||||
static struct xen_pcibk_config_capability xen_pcibk_config_capability_vpd = {
|
||||
.capability = PCI_CAP_ID_VPD,
|
||||
.fields = caplist_vpd,
|
||||
};
|
||||
|
||||
int xen_pcibk_config_capability_init(void)
|
||||
{
|
||||
register_capability(&xen_pcibk_config_capability_vpd);
|
||||
register_capability(&xen_pcibk_config_capability_pm);
|
||||
|
||||
return 0;
|
||||
}
|
386
drivers/xen/xen-pciback/conf_space_header.c
Normal file
386
drivers/xen/xen-pciback/conf_space_header.c
Normal file
@ -0,0 +1,386 @@
|
||||
/*
|
||||
* PCI Backend - Handles the virtual fields in the configuration space headers.
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include "pciback.h"
|
||||
#include "conf_space.h"
|
||||
|
||||
struct pci_bar_info {
|
||||
u32 val;
|
||||
u32 len_val;
|
||||
int which;
|
||||
};
|
||||
|
||||
#define DRV_NAME "xen-pciback"
|
||||
#define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
|
||||
#define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
|
||||
|
||||
static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
|
||||
{
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
ret = xen_pcibk_read_config_word(dev, offset, value, data);
|
||||
if (!atomic_read(&dev->enable_cnt))
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
|
||||
if (dev->resource[i].flags & IORESOURCE_IO)
|
||||
*value |= PCI_COMMAND_IO;
|
||||
if (dev->resource[i].flags & IORESOURCE_MEM)
|
||||
*value |= PCI_COMMAND_MEMORY;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data;
|
||||
int err;
|
||||
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: enable\n",
|
||||
pci_name(dev));
|
||||
err = pci_enable_device(dev);
|
||||
if (err)
|
||||
return err;
|
||||
if (dev_data)
|
||||
dev_data->enable_intx = 1;
|
||||
} else if (pci_is_enabled(dev) && !is_enable_cmd(value)) {
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: disable\n",
|
||||
pci_name(dev));
|
||||
pci_disable_device(dev);
|
||||
if (dev_data)
|
||||
dev_data->enable_intx = 0;
|
||||
}
|
||||
|
||||
if (!dev->is_busmaster && is_master_cmd(value)) {
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: set bus master\n",
|
||||
pci_name(dev));
|
||||
pci_set_master(dev);
|
||||
}
|
||||
|
||||
if (value & PCI_COMMAND_INVALIDATE) {
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG
|
||||
DRV_NAME ": %s: enable memory-write-invalidate\n",
|
||||
pci_name(dev));
|
||||
err = pci_set_mwi(dev);
|
||||
if (err) {
|
||||
printk(KERN_WARNING
|
||||
DRV_NAME ": %s: cannot enable "
|
||||
"memory-write-invalidate (%d)\n",
|
||||
pci_name(dev), err);
|
||||
value &= ~PCI_COMMAND_INVALIDATE;
|
||||
}
|
||||
}
|
||||
|
||||
return pci_write_config_word(dev, offset, value);
|
||||
}
|
||||
|
||||
static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
|
||||
{
|
||||
struct pci_bar_info *bar = data;
|
||||
|
||||
if (unlikely(!bar)) {
|
||||
printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
|
||||
pci_name(dev));
|
||||
return XEN_PCI_ERR_op_failed;
|
||||
}
|
||||
|
||||
/* A write to obtain the length must happen as a 32-bit write.
|
||||
* This does not (yet) support writing individual bytes
|
||||
*/
|
||||
if (value == ~PCI_ROM_ADDRESS_ENABLE)
|
||||
bar->which = 1;
|
||||
else {
|
||||
u32 tmpval;
|
||||
pci_read_config_dword(dev, offset, &tmpval);
|
||||
if (tmpval != bar->val && value == bar->val) {
|
||||
/* Allow restoration of bar value. */
|
||||
pci_write_config_dword(dev, offset, bar->val);
|
||||
}
|
||||
bar->which = 0;
|
||||
}
|
||||
|
||||
/* Do we need to support enabling/disabling the rom address here? */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* For the BARs, only allow writes which write ~0 or
|
||||
* the correct resource information
|
||||
* (Needed for when the driver probes the resource usage)
|
||||
*/
|
||||
static int bar_write(struct pci_dev *dev, int offset, u32 value, void *data)
|
||||
{
|
||||
struct pci_bar_info *bar = data;
|
||||
|
||||
if (unlikely(!bar)) {
|
||||
printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
|
||||
pci_name(dev));
|
||||
return XEN_PCI_ERR_op_failed;
|
||||
}
|
||||
|
||||
/* A write to obtain the length must happen as a 32-bit write.
|
||||
* This does not (yet) support writing individual bytes
|
||||
*/
|
||||
if (value == ~0)
|
||||
bar->which = 1;
|
||||
else {
|
||||
u32 tmpval;
|
||||
pci_read_config_dword(dev, offset, &tmpval);
|
||||
if (tmpval != bar->val && value == bar->val) {
|
||||
/* Allow restoration of bar value. */
|
||||
pci_write_config_dword(dev, offset, bar->val);
|
||||
}
|
||||
bar->which = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bar_read(struct pci_dev *dev, int offset, u32 * value, void *data)
|
||||
{
|
||||
struct pci_bar_info *bar = data;
|
||||
|
||||
if (unlikely(!bar)) {
|
||||
printk(KERN_WARNING DRV_NAME ": driver data not found for %s\n",
|
||||
pci_name(dev));
|
||||
return XEN_PCI_ERR_op_failed;
|
||||
}
|
||||
|
||||
*value = bar->which ? bar->len_val : bar->val;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void read_dev_bar(struct pci_dev *dev,
|
||||
struct pci_bar_info *bar_info, int offset,
|
||||
u32 len_mask)
|
||||
{
|
||||
int pos;
|
||||
struct resource *res = dev->resource;
|
||||
|
||||
if (offset == PCI_ROM_ADDRESS || offset == PCI_ROM_ADDRESS1)
|
||||
pos = PCI_ROM_RESOURCE;
|
||||
else {
|
||||
pos = (offset - PCI_BASE_ADDRESS_0) / 4;
|
||||
if (pos && ((res[pos - 1].flags & (PCI_BASE_ADDRESS_SPACE |
|
||||
PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
|
||||
(PCI_BASE_ADDRESS_SPACE_MEMORY |
|
||||
PCI_BASE_ADDRESS_MEM_TYPE_64))) {
|
||||
bar_info->val = res[pos - 1].start >> 32;
|
||||
bar_info->len_val = res[pos - 1].end >> 32;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
bar_info->val = res[pos].start |
|
||||
(res[pos].flags & PCI_REGION_FLAG_MASK);
|
||||
bar_info->len_val = res[pos].end - res[pos].start + 1;
|
||||
}
|
||||
|
||||
static void *bar_init(struct pci_dev *dev, int offset)
|
||||
{
|
||||
struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
|
||||
|
||||
if (!bar)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
read_dev_bar(dev, bar, offset, ~0);
|
||||
bar->which = 0;
|
||||
|
||||
return bar;
|
||||
}
|
||||
|
||||
static void *rom_init(struct pci_dev *dev, int offset)
|
||||
{
|
||||
struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
|
||||
|
||||
if (!bar)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
|
||||
bar->which = 0;
|
||||
|
||||
return bar;
|
||||
}
|
||||
|
||||
static void bar_reset(struct pci_dev *dev, int offset, void *data)
|
||||
{
|
||||
struct pci_bar_info *bar = data;
|
||||
|
||||
bar->which = 0;
|
||||
}
|
||||
|
||||
static void bar_release(struct pci_dev *dev, int offset, void *data)
|
||||
{
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static int xen_pcibk_read_vendor(struct pci_dev *dev, int offset,
|
||||
u16 *value, void *data)
|
||||
{
|
||||
*value = dev->vendor;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xen_pcibk_read_device(struct pci_dev *dev, int offset,
|
||||
u16 *value, void *data)
|
||||
{
|
||||
*value = dev->device;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int interrupt_read(struct pci_dev *dev, int offset, u8 * value,
|
||||
void *data)
|
||||
{
|
||||
*value = (u8) dev->irq;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bist_write(struct pci_dev *dev, int offset, u8 value, void *data)
|
||||
{
|
||||
u8 cur_value;
|
||||
int err;
|
||||
|
||||
err = pci_read_config_byte(dev, offset, &cur_value);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if ((cur_value & ~PCI_BIST_START) == (value & ~PCI_BIST_START)
|
||||
|| value == PCI_BIST_START)
|
||||
err = pci_write_config_byte(dev, offset, value);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct config_field header_common[] = {
|
||||
{
|
||||
.offset = PCI_VENDOR_ID,
|
||||
.size = 2,
|
||||
.u.w.read = xen_pcibk_read_vendor,
|
||||
},
|
||||
{
|
||||
.offset = PCI_DEVICE_ID,
|
||||
.size = 2,
|
||||
.u.w.read = xen_pcibk_read_device,
|
||||
},
|
||||
{
|
||||
.offset = PCI_COMMAND,
|
||||
.size = 2,
|
||||
.u.w.read = command_read,
|
||||
.u.w.write = command_write,
|
||||
},
|
||||
{
|
||||
.offset = PCI_INTERRUPT_LINE,
|
||||
.size = 1,
|
||||
.u.b.read = interrupt_read,
|
||||
},
|
||||
{
|
||||
.offset = PCI_INTERRUPT_PIN,
|
||||
.size = 1,
|
||||
.u.b.read = xen_pcibk_read_config_byte,
|
||||
},
|
||||
{
|
||||
/* Any side effects of letting driver domain control cache line? */
|
||||
.offset = PCI_CACHE_LINE_SIZE,
|
||||
.size = 1,
|
||||
.u.b.read = xen_pcibk_read_config_byte,
|
||||
.u.b.write = xen_pcibk_write_config_byte,
|
||||
},
|
||||
{
|
||||
.offset = PCI_LATENCY_TIMER,
|
||||
.size = 1,
|
||||
.u.b.read = xen_pcibk_read_config_byte,
|
||||
},
|
||||
{
|
||||
.offset = PCI_BIST,
|
||||
.size = 1,
|
||||
.u.b.read = xen_pcibk_read_config_byte,
|
||||
.u.b.write = bist_write,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
#define CFG_FIELD_BAR(reg_offset) \
|
||||
{ \
|
||||
.offset = reg_offset, \
|
||||
.size = 4, \
|
||||
.init = bar_init, \
|
||||
.reset = bar_reset, \
|
||||
.release = bar_release, \
|
||||
.u.dw.read = bar_read, \
|
||||
.u.dw.write = bar_write, \
|
||||
}
|
||||
|
||||
#define CFG_FIELD_ROM(reg_offset) \
|
||||
{ \
|
||||
.offset = reg_offset, \
|
||||
.size = 4, \
|
||||
.init = rom_init, \
|
||||
.reset = bar_reset, \
|
||||
.release = bar_release, \
|
||||
.u.dw.read = bar_read, \
|
||||
.u.dw.write = rom_write, \
|
||||
}
|
||||
|
||||
static const struct config_field header_0[] = {
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_2),
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_3),
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_4),
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_5),
|
||||
CFG_FIELD_ROM(PCI_ROM_ADDRESS),
|
||||
{}
|
||||
};
|
||||
|
||||
static const struct config_field header_1[] = {
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_0),
|
||||
CFG_FIELD_BAR(PCI_BASE_ADDRESS_1),
|
||||
CFG_FIELD_ROM(PCI_ROM_ADDRESS1),
|
||||
{}
|
||||
};
|
||||
|
||||
int xen_pcibk_config_header_add_fields(struct pci_dev *dev)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = xen_pcibk_config_add_fields(dev, header_common);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
switch (dev->hdr_type) {
|
||||
case PCI_HEADER_TYPE_NORMAL:
|
||||
err = xen_pcibk_config_add_fields(dev, header_0);
|
||||
break;
|
||||
|
||||
case PCI_HEADER_TYPE_BRIDGE:
|
||||
err = xen_pcibk_config_add_fields(dev, header_1);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EINVAL;
|
||||
printk(KERN_ERR DRV_NAME ": %s: Unsupported header type %d!\n",
|
||||
pci_name(dev), dev->hdr_type);
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
140
drivers/xen/xen-pciback/conf_space_quirks.c
Normal file
140
drivers/xen/xen-pciback/conf_space_quirks.c
Normal file
@ -0,0 +1,140 @@
|
||||
/*
|
||||
* PCI Backend - Handle special overlays for broken devices.
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
* Author: Chris Bookholt <hap10@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/pci.h>
|
||||
#include "pciback.h"
|
||||
#include "conf_space.h"
|
||||
#include "conf_space_quirks.h"
|
||||
|
||||
LIST_HEAD(xen_pcibk_quirks);
|
||||
#define DRV_NAME "xen-pciback"
|
||||
static inline const struct pci_device_id *
|
||||
match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
|
||||
{
|
||||
if ((id->vendor == PCI_ANY_ID || id->vendor == dev->vendor) &&
|
||||
(id->device == PCI_ANY_ID || id->device == dev->device) &&
|
||||
(id->subvendor == PCI_ANY_ID ||
|
||||
id->subvendor == dev->subsystem_vendor) &&
|
||||
(id->subdevice == PCI_ANY_ID ||
|
||||
id->subdevice == dev->subsystem_device) &&
|
||||
!((id->class ^ dev->class) & id->class_mask))
|
||||
return id;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev)
|
||||
{
|
||||
struct xen_pcibk_config_quirk *tmp_quirk;
|
||||
|
||||
list_for_each_entry(tmp_quirk, &xen_pcibk_quirks, quirks_list)
|
||||
if (match_one_device(&tmp_quirk->devid, dev) != NULL)
|
||||
goto out;
|
||||
tmp_quirk = NULL;
|
||||
printk(KERN_DEBUG DRV_NAME
|
||||
":quirk didn't match any device xen_pciback knows about\n");
|
||||
out:
|
||||
return tmp_quirk;
|
||||
}
|
||||
|
||||
static inline void register_quirk(struct xen_pcibk_config_quirk *quirk)
|
||||
{
|
||||
list_add_tail(&quirk->quirks_list, &xen_pcibk_quirks);
|
||||
}
|
||||
|
||||
int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg)
|
||||
{
|
||||
int ret = 0;
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
struct config_field_entry *cfg_entry;
|
||||
|
||||
list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
|
||||
if (OFFSET(cfg_entry) == reg) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
|
||||
*field)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
switch (field->size) {
|
||||
case 1:
|
||||
field->u.b.read = xen_pcibk_read_config_byte;
|
||||
field->u.b.write = xen_pcibk_write_config_byte;
|
||||
break;
|
||||
case 2:
|
||||
field->u.w.read = xen_pcibk_read_config_word;
|
||||
field->u.w.write = xen_pcibk_write_config_word;
|
||||
break;
|
||||
case 4:
|
||||
field->u.dw.read = xen_pcibk_read_config_dword;
|
||||
field->u.dw.write = xen_pcibk_write_config_dword;
|
||||
break;
|
||||
default:
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
xen_pcibk_config_add_field(dev, field);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
int xen_pcibk_config_quirks_init(struct pci_dev *dev)
|
||||
{
|
||||
struct xen_pcibk_config_quirk *quirk;
|
||||
int ret = 0;
|
||||
|
||||
quirk = kzalloc(sizeof(*quirk), GFP_ATOMIC);
|
||||
if (!quirk) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
quirk->devid.vendor = dev->vendor;
|
||||
quirk->devid.device = dev->device;
|
||||
quirk->devid.subvendor = dev->subsystem_vendor;
|
||||
quirk->devid.subdevice = dev->subsystem_device;
|
||||
quirk->devid.class = 0;
|
||||
quirk->devid.class_mask = 0;
|
||||
quirk->devid.driver_data = 0UL;
|
||||
|
||||
quirk->pdev = dev;
|
||||
|
||||
register_quirk(quirk);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void xen_pcibk_config_field_free(struct config_field *field)
|
||||
{
|
||||
kfree(field);
|
||||
}
|
||||
|
||||
int xen_pcibk_config_quirk_release(struct pci_dev *dev)
|
||||
{
|
||||
struct xen_pcibk_config_quirk *quirk;
|
||||
int ret = 0;
|
||||
|
||||
quirk = xen_pcibk_find_quirk(dev);
|
||||
if (!quirk) {
|
||||
ret = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_del(&quirk->quirks_list);
|
||||
kfree(quirk);
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
33
drivers/xen/xen-pciback/conf_space_quirks.h
Normal file
33
drivers/xen/xen-pciback/conf_space_quirks.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* PCI Backend - Data structures for special overlays for broken devices.
|
||||
*
|
||||
* Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
* Chris Bookholt <hap10@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
|
||||
#define __XEN_PCIBACK_CONF_SPACE_QUIRKS_H__
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/list.h>
|
||||
|
||||
struct xen_pcibk_config_quirk {
|
||||
struct list_head quirks_list;
|
||||
struct pci_device_id devid;
|
||||
struct pci_dev *pdev;
|
||||
};
|
||||
|
||||
int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
|
||||
*field);
|
||||
|
||||
int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
|
||||
|
||||
int xen_pcibk_config_quirks_init(struct pci_dev *dev);
|
||||
|
||||
void xen_pcibk_config_field_free(struct config_field *field);
|
||||
|
||||
int xen_pcibk_config_quirk_release(struct pci_dev *dev);
|
||||
|
||||
int xen_pcibk_field_is_dup(struct pci_dev *dev, unsigned int reg);
|
||||
|
||||
#endif
|
194
drivers/xen/xen-pciback/passthrough.c
Normal file
194
drivers/xen/xen-pciback/passthrough.c
Normal file
@ -0,0 +1,194 @@
|
||||
/*
|
||||
* PCI Backend - Provides restricted access to the real PCI bus topology
|
||||
* to the frontend
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include "pciback.h"
|
||||
|
||||
struct passthrough_dev_data {
|
||||
/* Access to dev_list must be protected by lock */
|
||||
struct list_head dev_list;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
|
||||
unsigned int domain,
|
||||
unsigned int bus,
|
||||
unsigned int devfn)
|
||||
{
|
||||
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
||||
struct pci_dev_entry *dev_entry;
|
||||
struct pci_dev *dev = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dev_data->lock, flags);
|
||||
|
||||
list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
|
||||
if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
|
||||
&& bus == (unsigned int)dev_entry->dev->bus->number
|
||||
&& devfn == dev_entry->dev->devfn) {
|
||||
dev = dev_entry->dev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&dev_data->lock, flags);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev,
|
||||
int devid, publish_pci_dev_cb publish_cb)
|
||||
{
|
||||
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
||||
struct pci_dev_entry *dev_entry;
|
||||
unsigned long flags;
|
||||
unsigned int domain, bus, devfn;
|
||||
int err;
|
||||
|
||||
dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
|
||||
if (!dev_entry)
|
||||
return -ENOMEM;
|
||||
dev_entry->dev = dev;
|
||||
|
||||
spin_lock_irqsave(&dev_data->lock, flags);
|
||||
list_add_tail(&dev_entry->list, &dev_data->dev_list);
|
||||
spin_unlock_irqrestore(&dev_data->lock, flags);
|
||||
|
||||
/* Publish this device. */
|
||||
domain = (unsigned int)pci_domain_nr(dev->bus);
|
||||
bus = (unsigned int)dev->bus->number;
|
||||
devfn = dev->devfn;
|
||||
err = publish_cb(pdev, domain, bus, devfn, devid);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev)
|
||||
{
|
||||
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
||||
struct pci_dev_entry *dev_entry, *t;
|
||||
struct pci_dev *found_dev = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&dev_data->lock, flags);
|
||||
|
||||
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
|
||||
if (dev_entry->dev == dev) {
|
||||
list_del(&dev_entry->list);
|
||||
found_dev = dev_entry->dev;
|
||||
kfree(dev_entry);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&dev_data->lock, flags);
|
||||
|
||||
if (found_dev)
|
||||
pcistub_put_pci_dev(found_dev);
|
||||
}
|
||||
|
||||
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
struct passthrough_dev_data *dev_data;
|
||||
|
||||
dev_data = kmalloc(sizeof(*dev_data), GFP_KERNEL);
|
||||
if (!dev_data)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&dev_data->lock);
|
||||
|
||||
INIT_LIST_HEAD(&dev_data->dev_list);
|
||||
|
||||
pdev->pci_dev_data = dev_data;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
|
||||
publish_pci_root_cb publish_root_cb)
|
||||
{
|
||||
int err = 0;
|
||||
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
||||
struct pci_dev_entry *dev_entry, *e, *tmp;
|
||||
struct pci_dev *dev;
|
||||
int found;
|
||||
unsigned int domain, bus;
|
||||
|
||||
spin_lock(&dev_data->lock);
|
||||
|
||||
list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
|
||||
/* Only publish this device as a root if none of its
|
||||
* parent bridges are exported
|
||||
*/
|
||||
found = 0;
|
||||
dev = dev_entry->dev->bus->self;
|
||||
for (; !found && dev != NULL; dev = dev->bus->self) {
|
||||
list_for_each_entry(e, &dev_data->dev_list, list) {
|
||||
if (dev == e->dev) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
domain = (unsigned int)pci_domain_nr(dev_entry->dev->bus);
|
||||
bus = (unsigned int)dev_entry->dev->bus->number;
|
||||
|
||||
if (!found) {
|
||||
spin_unlock(&dev_data->lock);
|
||||
err = publish_root_cb(pdev, domain, bus);
|
||||
if (err)
|
||||
break;
|
||||
spin_lock(&dev_data->lock);
|
||||
}
|
||||
}
|
||||
|
||||
if (!err)
|
||||
spin_unlock(&dev_data->lock);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
|
||||
struct pci_dev_entry *dev_entry, *t;
|
||||
|
||||
list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
|
||||
list_del(&dev_entry->list);
|
||||
pcistub_put_pci_dev(dev_entry->dev);
|
||||
kfree(dev_entry);
|
||||
}
|
||||
|
||||
kfree(dev_data);
|
||||
pdev->pci_dev_data = NULL;
|
||||
}
|
||||
|
||||
static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
|
||||
struct xen_pcibk_device *pdev,
|
||||
unsigned int *domain, unsigned int *bus,
|
||||
unsigned int *devfn)
|
||||
{
|
||||
*domain = pci_domain_nr(pcidev->bus);
|
||||
*bus = pcidev->bus->number;
|
||||
*devfn = pcidev->devfn;
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
|
||||
.name = "passthrough",
|
||||
.init = __xen_pcibk_init_devices,
|
||||
.free = __xen_pcibk_release_devices,
|
||||
.find = __xen_pcibk_get_pcifront_dev,
|
||||
.publish = __xen_pcibk_publish_pci_roots,
|
||||
.release = __xen_pcibk_release_pci_dev,
|
||||
.add = __xen_pcibk_add_pci_dev,
|
||||
.get = __xen_pcibk_get_pci_dev,
|
||||
};
|
1376
drivers/xen/xen-pciback/pci_stub.c
Normal file
1376
drivers/xen/xen-pciback/pci_stub.c
Normal file
File diff suppressed because it is too large
Load Diff
183
drivers/xen/xen-pciback/pciback.h
Normal file
183
drivers/xen/xen-pciback/pciback.h
Normal file
@ -0,0 +1,183 @@
|
||||
/*
|
||||
* PCI Backend Common Data Structures & Function Declarations
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
#ifndef __XEN_PCIBACK_H__
|
||||
#define __XEN_PCIBACK_H__
|
||||
|
||||
#include <linux/pci.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <xen/interface/io/pciif.h>
|
||||
|
||||
struct pci_dev_entry {
|
||||
struct list_head list;
|
||||
struct pci_dev *dev;
|
||||
};
|
||||
|
||||
#define _PDEVF_op_active (0)
|
||||
#define PDEVF_op_active (1<<(_PDEVF_op_active))
|
||||
#define _PCIB_op_pending (1)
|
||||
#define PCIB_op_pending (1<<(_PCIB_op_pending))
|
||||
|
||||
struct xen_pcibk_device {
|
||||
void *pci_dev_data;
|
||||
spinlock_t dev_lock;
|
||||
struct xenbus_device *xdev;
|
||||
struct xenbus_watch be_watch;
|
||||
u8 be_watching;
|
||||
int evtchn_irq;
|
||||
struct xen_pci_sharedinfo *sh_info;
|
||||
unsigned long flags;
|
||||
struct work_struct op_work;
|
||||
};
|
||||
|
||||
struct xen_pcibk_dev_data {
|
||||
struct list_head config_fields;
|
||||
unsigned int permissive:1;
|
||||
unsigned int warned_on_write:1;
|
||||
unsigned int enable_intx:1;
|
||||
unsigned int isr_on:1; /* Whether the IRQ handler is installed. */
|
||||
unsigned int ack_intr:1; /* .. and ACK-ing */
|
||||
unsigned long handled;
|
||||
unsigned int irq; /* Saved in case device transitions to MSI/MSI-X */
|
||||
char irq_name[0]; /* xen-pcibk[000:04:00.0] */
|
||||
};
|
||||
|
||||
/* Used by XenBus and xen_pcibk_ops.c */
|
||||
extern wait_queue_head_t xen_pcibk_aer_wait_queue;
|
||||
extern struct workqueue_struct *xen_pcibk_wq;
|
||||
/* Used by pcistub.c and conf_space_quirks.c */
|
||||
extern struct list_head xen_pcibk_quirks;
|
||||
|
||||
/* Get/Put PCI Devices that are hidden from the PCI Backend Domain */
|
||||
struct pci_dev *pcistub_get_pci_dev_by_slot(struct xen_pcibk_device *pdev,
|
||||
int domain, int bus,
|
||||
int slot, int func);
|
||||
struct pci_dev *pcistub_get_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev);
|
||||
void pcistub_put_pci_dev(struct pci_dev *dev);
|
||||
|
||||
/* Ensure a device is turned off or reset */
|
||||
void xen_pcibk_reset_device(struct pci_dev *pdev);
|
||||
|
||||
/* Access a virtual configuration space for a PCI device */
|
||||
int xen_pcibk_config_init(void);
|
||||
int xen_pcibk_config_init_dev(struct pci_dev *dev);
|
||||
void xen_pcibk_config_free_dyn_fields(struct pci_dev *dev);
|
||||
void xen_pcibk_config_reset_dev(struct pci_dev *dev);
|
||||
void xen_pcibk_config_free_dev(struct pci_dev *dev);
|
||||
int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
|
||||
u32 *ret_val);
|
||||
int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size,
|
||||
u32 value);
|
||||
|
||||
/* Handle requests for specific devices from the frontend */
|
||||
typedef int (*publish_pci_dev_cb) (struct xen_pcibk_device *pdev,
|
||||
unsigned int domain, unsigned int bus,
|
||||
unsigned int devfn, unsigned int devid);
|
||||
typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev,
|
||||
unsigned int domain, unsigned int bus);
|
||||
|
||||
/* Backend registration for the two types of BDF representation:
|
||||
* vpci - BDFs start at 00
|
||||
* passthrough - BDFs are exactly like in the host.
|
||||
*/
|
||||
struct xen_pcibk_backend {
|
||||
char *name;
|
||||
int (*init)(struct xen_pcibk_device *pdev);
|
||||
void (*free)(struct xen_pcibk_device *pdev);
|
||||
int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
|
||||
unsigned int *domain, unsigned int *bus,
|
||||
unsigned int *devfn);
|
||||
int (*publish)(struct xen_pcibk_device *pdev, publish_pci_root_cb cb);
|
||||
void (*release)(struct xen_pcibk_device *pdev, struct pci_dev *dev);
|
||||
int (*add)(struct xen_pcibk_device *pdev, struct pci_dev *dev,
|
||||
int devid, publish_pci_dev_cb publish_cb);
|
||||
struct pci_dev *(*get)(struct xen_pcibk_device *pdev,
|
||||
unsigned int domain, unsigned int bus,
|
||||
unsigned int devfn);
|
||||
};
|
||||
|
||||
extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
|
||||
extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
|
||||
extern struct xen_pcibk_backend *xen_pcibk_backend;
|
||||
|
||||
static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev,
|
||||
int devid,
|
||||
publish_pci_dev_cb publish_cb)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->add)
|
||||
return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
|
||||
return -1;
|
||||
};
|
||||
static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->free)
|
||||
return xen_pcibk_backend->release(pdev, dev);
|
||||
};
|
||||
|
||||
static inline struct pci_dev *
|
||||
xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
|
||||
unsigned int bus, unsigned int devfn)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->get)
|
||||
return xen_pcibk_backend->get(pdev, domain, bus, devfn);
|
||||
return NULL;
|
||||
};
|
||||
/**
|
||||
* Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
|
||||
* before sending aer request to pcifront, so that guest could identify
|
||||
* device, coopearte with xen_pcibk to finish aer recovery job if device driver
|
||||
* has the capability
|
||||
*/
|
||||
static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
|
||||
struct xen_pcibk_device *pdev,
|
||||
unsigned int *domain,
|
||||
unsigned int *bus,
|
||||
unsigned int *devfn)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->find)
|
||||
return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
|
||||
devfn);
|
||||
return -1;
|
||||
};
|
||||
static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->init)
|
||||
return xen_pcibk_backend->init(pdev);
|
||||
return -1;
|
||||
};
|
||||
static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
|
||||
publish_pci_root_cb cb)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->publish)
|
||||
return xen_pcibk_backend->publish(pdev, cb);
|
||||
return -1;
|
||||
};
|
||||
static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
if (xen_pcibk_backend && xen_pcibk_backend->free)
|
||||
return xen_pcibk_backend->free(pdev);
|
||||
};
|
||||
/* Handles events from front-end */
|
||||
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
|
||||
void xen_pcibk_do_op(struct work_struct *data);
|
||||
|
||||
int xen_pcibk_xenbus_register(void);
|
||||
void xen_pcibk_xenbus_unregister(void);
|
||||
|
||||
extern int verbose_request;
|
||||
|
||||
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev);
|
||||
#endif
|
||||
|
||||
/* Handles shared IRQs that can to device domain and control domain. */
|
||||
void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);
|
384
drivers/xen/xen-pciback/pciback_ops.c
Normal file
384
drivers/xen/xen-pciback/pciback_ops.c
Normal file
@ -0,0 +1,384 @@
|
||||
/*
|
||||
* PCI Backend Operations - respond to PCI requests from Frontend
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <xen/events.h>
|
||||
#include <linux/sched.h>
|
||||
#include "pciback.h"
|
||||
|
||||
#define DRV_NAME "xen-pciback"
|
||||
int verbose_request;
|
||||
module_param(verbose_request, int, 0644);
|
||||
|
||||
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id);
|
||||
|
||||
/* Ensure a device is has the fake IRQ handler "turned on/off" and is
|
||||
* ready to be exported. This MUST be run after xen_pcibk_reset_device
|
||||
* which does the actual PCI device enable/disable.
|
||||
*/
|
||||
static void xen_pcibk_control_isr(struct pci_dev *dev, int reset)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data;
|
||||
int rc;
|
||||
int enable = 0;
|
||||
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (!dev_data)
|
||||
return;
|
||||
|
||||
/* We don't deal with bridges */
|
||||
if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL)
|
||||
return;
|
||||
|
||||
if (reset) {
|
||||
dev_data->enable_intx = 0;
|
||||
dev_data->ack_intr = 0;
|
||||
}
|
||||
enable = dev_data->enable_intx;
|
||||
|
||||
/* Asked to disable, but ISR isn't runnig */
|
||||
if (!enable && !dev_data->isr_on)
|
||||
return;
|
||||
|
||||
/* Squirrel away the IRQs in the dev_data. We need this
|
||||
* b/c when device transitions to MSI, the dev->irq is
|
||||
* overwritten with the MSI vector.
|
||||
*/
|
||||
if (enable)
|
||||
dev_data->irq = dev->irq;
|
||||
|
||||
/*
|
||||
* SR-IOV devices in all use MSI-X and have no legacy
|
||||
* interrupts, so inhibit creating a fake IRQ handler for them.
|
||||
*/
|
||||
if (dev_data->irq == 0)
|
||||
goto out;
|
||||
|
||||
dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n",
|
||||
dev_data->irq_name,
|
||||
dev_data->irq,
|
||||
pci_is_enabled(dev) ? "on" : "off",
|
||||
dev->msi_enabled ? "MSI" : "",
|
||||
dev->msix_enabled ? "MSI/X" : "",
|
||||
dev_data->isr_on ? "enable" : "disable",
|
||||
enable ? "enable" : "disable");
|
||||
|
||||
if (enable) {
|
||||
rc = request_irq(dev_data->irq,
|
||||
xen_pcibk_guest_interrupt, IRQF_SHARED,
|
||||
dev_data->irq_name, dev);
|
||||
if (rc) {
|
||||
dev_err(&dev->dev, "%s: failed to install fake IRQ " \
|
||||
"handler for IRQ %d! (rc:%d)\n",
|
||||
dev_data->irq_name, dev_data->irq, rc);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
free_irq(dev_data->irq, dev);
|
||||
dev_data->irq = 0;
|
||||
}
|
||||
dev_data->isr_on = enable;
|
||||
dev_data->ack_intr = enable;
|
||||
out:
|
||||
dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n",
|
||||
dev_data->irq_name,
|
||||
dev_data->irq,
|
||||
pci_is_enabled(dev) ? "on" : "off",
|
||||
dev->msi_enabled ? "MSI" : "",
|
||||
dev->msix_enabled ? "MSI/X" : "",
|
||||
enable ? (dev_data->isr_on ? "enabled" : "failed to enable") :
|
||||
(dev_data->isr_on ? "failed to disable" : "disabled"));
|
||||
}
|
||||
|
||||
/* Ensure a device is "turned off" and ready to be exported.
|
||||
* (Also see xen_pcibk_config_reset to ensure virtual configuration space is
|
||||
* ready to be re-exported)
|
||||
*/
|
||||
void xen_pcibk_reset_device(struct pci_dev *dev)
|
||||
{
|
||||
u16 cmd;
|
||||
|
||||
xen_pcibk_control_isr(dev, 1 /* reset device */);
|
||||
|
||||
/* Disable devices (but not bridges) */
|
||||
if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) {
|
||||
#ifdef CONFIG_PCI_MSI
|
||||
/* The guest could have been abruptly killed without
|
||||
* disabling MSI/MSI-X interrupts.*/
|
||||
if (dev->msix_enabled)
|
||||
pci_disable_msix(dev);
|
||||
if (dev->msi_enabled)
|
||||
pci_disable_msi(dev);
|
||||
#endif
|
||||
pci_disable_device(dev);
|
||||
|
||||
pci_write_config_word(dev, PCI_COMMAND, 0);
|
||||
|
||||
dev->is_busmaster = 0;
|
||||
} else {
|
||||
pci_read_config_word(dev, PCI_COMMAND, &cmd);
|
||||
if (cmd & (PCI_COMMAND_INVALIDATE)) {
|
||||
cmd &= ~(PCI_COMMAND_INVALIDATE);
|
||||
pci_write_config_word(dev, PCI_COMMAND, cmd);
|
||||
|
||||
dev->is_busmaster = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PCI_MSI
|
||||
static
|
||||
int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev, struct xen_pci_op *op)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data;
|
||||
int otherend = pdev->xdev->otherend_id;
|
||||
int status;
|
||||
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev));
|
||||
|
||||
status = pci_enable_msi(dev);
|
||||
|
||||
if (status) {
|
||||
printk(KERN_ERR "error enable msi for guest %x status %x\n",
|
||||
otherend, status);
|
||||
op->value = 0;
|
||||
return XEN_PCI_ERR_op_failed;
|
||||
}
|
||||
|
||||
/* The value the guest needs is actually the IDT vector, not the
|
||||
* the local domain's IRQ number. */
|
||||
|
||||
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
|
||||
op->value);
|
||||
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (dev_data)
|
||||
dev_data->ack_intr = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev, struct xen_pci_op *op)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data;
|
||||
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n",
|
||||
pci_name(dev));
|
||||
pci_disable_msi(dev);
|
||||
|
||||
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev),
|
||||
op->value);
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (dev_data)
|
||||
dev_data->ack_intr = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static
|
||||
int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev, struct xen_pci_op *op)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data;
|
||||
int i, result;
|
||||
struct msix_entry *entries;
|
||||
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n",
|
||||
pci_name(dev));
|
||||
if (op->value > SH_INFO_MAX_VEC)
|
||||
return -EINVAL;
|
||||
|
||||
entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
|
||||
if (entries == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < op->value; i++) {
|
||||
entries[i].entry = op->msix_entries[i].entry;
|
||||
entries[i].vector = op->msix_entries[i].vector;
|
||||
}
|
||||
|
||||
result = pci_enable_msix(dev, entries, op->value);
|
||||
|
||||
if (result == 0) {
|
||||
for (i = 0; i < op->value; i++) {
|
||||
op->msix_entries[i].entry = entries[i].entry;
|
||||
if (entries[i].vector)
|
||||
op->msix_entries[i].vector =
|
||||
xen_pirq_from_irq(entries[i].vector);
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: " \
|
||||
"MSI-X[%d]: %d\n",
|
||||
pci_name(dev), i,
|
||||
op->msix_entries[i].vector);
|
||||
}
|
||||
} else {
|
||||
printk(KERN_WARNING DRV_NAME ": %s: failed to enable MSI-X: err %d!\n",
|
||||
pci_name(dev), result);
|
||||
}
|
||||
kfree(entries);
|
||||
|
||||
op->value = result;
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (dev_data)
|
||||
dev_data->ack_intr = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static
|
||||
int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev, struct xen_pci_op *op)
|
||||
{
|
||||
struct xen_pcibk_dev_data *dev_data;
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n",
|
||||
pci_name(dev));
|
||||
pci_disable_msix(dev);
|
||||
|
||||
/*
|
||||
* SR-IOV devices (which don't have any legacy IRQ) have
|
||||
* an undefined IRQ value of zero.
|
||||
*/
|
||||
op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
|
||||
if (unlikely(verbose_request))
|
||||
printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev),
|
||||
op->value);
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (dev_data)
|
||||
dev_data->ack_intr = 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Now the same evtchn is used for both pcifront conf_read_write request
|
||||
* as well as pcie aer front end ack. We use a new work_queue to schedule
|
||||
* xen_pcibk conf_read_write service for avoiding confict with aer_core
|
||||
* do_recovery job which also use the system default work_queue
|
||||
*/
|
||||
void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
/* Check that frontend is requesting an operation and that we are not
|
||||
* already processing a request */
|
||||
if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)
|
||||
&& !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) {
|
||||
queue_work(xen_pcibk_wq, &pdev->op_work);
|
||||
}
|
||||
/*_XEN_PCIB_active should have been cleared by pcifront. And also make
|
||||
sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/
|
||||
if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags)
|
||||
&& test_bit(_PCIB_op_pending, &pdev->flags)) {
|
||||
wake_up(&xen_pcibk_aer_wait_queue);
|
||||
}
|
||||
}
|
||||
|
||||
/* Performing the configuration space reads/writes must not be done in atomic
|
||||
* context because some of the pci_* functions can sleep (mostly due to ACPI
|
||||
* use of semaphores). This function is intended to be called from a work
|
||||
* queue in process context taking a struct xen_pcibk_device as a parameter */
|
||||
|
||||
void xen_pcibk_do_op(struct work_struct *data)
|
||||
{
|
||||
struct xen_pcibk_device *pdev =
|
||||
container_of(data, struct xen_pcibk_device, op_work);
|
||||
struct pci_dev *dev;
|
||||
struct xen_pcibk_dev_data *dev_data = NULL;
|
||||
struct xen_pci_op *op = &pdev->sh_info->op;
|
||||
int test_intx = 0;
|
||||
|
||||
dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn);
|
||||
|
||||
if (dev == NULL)
|
||||
op->err = XEN_PCI_ERR_dev_not_found;
|
||||
else {
|
||||
dev_data = pci_get_drvdata(dev);
|
||||
if (dev_data)
|
||||
test_intx = dev_data->enable_intx;
|
||||
switch (op->cmd) {
|
||||
case XEN_PCI_OP_conf_read:
|
||||
op->err = xen_pcibk_config_read(dev,
|
||||
op->offset, op->size, &op->value);
|
||||
break;
|
||||
case XEN_PCI_OP_conf_write:
|
||||
op->err = xen_pcibk_config_write(dev,
|
||||
op->offset, op->size, op->value);
|
||||
break;
|
||||
#ifdef CONFIG_PCI_MSI
|
||||
case XEN_PCI_OP_enable_msi:
|
||||
op->err = xen_pcibk_enable_msi(pdev, dev, op);
|
||||
break;
|
||||
case XEN_PCI_OP_disable_msi:
|
||||
op->err = xen_pcibk_disable_msi(pdev, dev, op);
|
||||
break;
|
||||
case XEN_PCI_OP_enable_msix:
|
||||
op->err = xen_pcibk_enable_msix(pdev, dev, op);
|
||||
break;
|
||||
case XEN_PCI_OP_disable_msix:
|
||||
op->err = xen_pcibk_disable_msix(pdev, dev, op);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
op->err = XEN_PCI_ERR_not_implemented;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!op->err && dev && dev_data) {
|
||||
/* Transition detected */
|
||||
if ((dev_data->enable_intx != test_intx))
|
||||
xen_pcibk_control_isr(dev, 0 /* no reset */);
|
||||
}
|
||||
/* Tell the driver domain that we're done. */
|
||||
wmb();
|
||||
clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
|
||||
notify_remote_via_irq(pdev->evtchn_irq);
|
||||
|
||||
/* Mark that we're done. */
|
||||
smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */
|
||||
clear_bit(_PDEVF_op_active, &pdev->flags);
|
||||
smp_mb__after_clear_bit(); /* /before/ final check for work */
|
||||
|
||||
/* Check to see if the driver domain tried to start another request in
|
||||
* between clearing _XEN_PCIF_active and clearing _PDEVF_op_active.
|
||||
*/
|
||||
xen_pcibk_test_and_schedule_op(pdev);
|
||||
}
|
||||
|
||||
irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id)
|
||||
{
|
||||
struct xen_pcibk_device *pdev = dev_id;
|
||||
|
||||
xen_pcibk_test_and_schedule_op(pdev);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
struct pci_dev *dev = (struct pci_dev *)dev_id;
|
||||
struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev);
|
||||
|
||||
if (dev_data->isr_on && dev_data->ack_intr) {
|
||||
dev_data->handled++;
|
||||
if ((dev_data->handled % 1000) == 0) {
|
||||
if (xen_test_irq_shared(irq)) {
|
||||
printk(KERN_INFO "%s IRQ line is not shared "
|
||||
"with other domains. Turning ISR off\n",
|
||||
dev_data->irq_name);
|
||||
dev_data->ack_intr = 0;
|
||||
}
|
||||
}
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
return IRQ_NONE;
|
||||
}
|
259
drivers/xen/xen-pciback/vpci.c
Normal file
259
drivers/xen/xen-pciback/vpci.c
Normal file
@ -0,0 +1,259 @@
|
||||
/*
|
||||
* PCI Backend - Provides a Virtual PCI bus (with real devices)
|
||||
* to the frontend
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include "pciback.h"
|
||||
|
||||
#define PCI_SLOT_MAX 32
|
||||
#define DRV_NAME "xen-pciback"
|
||||
|
||||
struct vpci_dev_data {
|
||||
/* Access to dev_list must be protected by lock */
|
||||
struct list_head dev_list[PCI_SLOT_MAX];
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
static inline struct list_head *list_first(struct list_head *head)
|
||||
{
|
||||
return head->next;
|
||||
}
|
||||
|
||||
static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
|
||||
unsigned int domain,
|
||||
unsigned int bus,
|
||||
unsigned int devfn)
|
||||
{
|
||||
struct pci_dev_entry *entry;
|
||||
struct pci_dev *dev = NULL;
|
||||
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
||||
unsigned long flags;
|
||||
|
||||
if (domain != 0 || bus != 0)
|
||||
return NULL;
|
||||
|
||||
if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
|
||||
spin_lock_irqsave(&vpci_dev->lock, flags);
|
||||
|
||||
list_for_each_entry(entry,
|
||||
&vpci_dev->dev_list[PCI_SLOT(devfn)],
|
||||
list) {
|
||||
if (PCI_FUNC(entry->dev->devfn) == PCI_FUNC(devfn)) {
|
||||
dev = entry->dev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
||||
}
|
||||
return dev;
|
||||
}
|
||||
|
||||
static inline int match_slot(struct pci_dev *l, struct pci_dev *r)
|
||||
{
|
||||
if (pci_domain_nr(l->bus) == pci_domain_nr(r->bus)
|
||||
&& l->bus == r->bus && PCI_SLOT(l->devfn) == PCI_SLOT(r->devfn))
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev, int devid,
|
||||
publish_pci_dev_cb publish_cb)
|
||||
{
|
||||
int err = 0, slot, func = -1;
|
||||
struct pci_dev_entry *t, *dev_entry;
|
||||
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
||||
unsigned long flags;
|
||||
|
||||
if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
|
||||
err = -EFAULT;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Can't export bridges on the virtual PCI bus");
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_entry = kmalloc(sizeof(*dev_entry), GFP_KERNEL);
|
||||
if (!dev_entry) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error adding entry to virtual PCI bus");
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_entry->dev = dev;
|
||||
|
||||
spin_lock_irqsave(&vpci_dev->lock, flags);
|
||||
|
||||
/* Keep multi-function devices together on the virtual PCI bus */
|
||||
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
||||
if (!list_empty(&vpci_dev->dev_list[slot])) {
|
||||
t = list_entry(list_first(&vpci_dev->dev_list[slot]),
|
||||
struct pci_dev_entry, list);
|
||||
|
||||
if (match_slot(dev, t->dev)) {
|
||||
pr_info(DRV_NAME ": vpci: %s: "
|
||||
"assign to virtual slot %d func %d\n",
|
||||
pci_name(dev), slot,
|
||||
PCI_FUNC(dev->devfn));
|
||||
list_add_tail(&dev_entry->list,
|
||||
&vpci_dev->dev_list[slot]);
|
||||
func = PCI_FUNC(dev->devfn);
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Assign to a new slot on the virtual PCI bus */
|
||||
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
||||
if (list_empty(&vpci_dev->dev_list[slot])) {
|
||||
printk(KERN_INFO DRV_NAME
|
||||
": vpci: %s: assign to virtual slot %d\n",
|
||||
pci_name(dev), slot);
|
||||
list_add_tail(&dev_entry->list,
|
||||
&vpci_dev->dev_list[slot]);
|
||||
func = PCI_FUNC(dev->devfn);
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"No more space on root virtual PCI bus");
|
||||
|
||||
unlock:
|
||||
spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
||||
|
||||
/* Publish this device. */
|
||||
if (!err)
|
||||
err = publish_cb(pdev, 0, 0, PCI_DEVFN(slot, func), devid);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
|
||||
struct pci_dev *dev)
|
||||
{
|
||||
int slot;
|
||||
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
||||
struct pci_dev *found_dev = NULL;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&vpci_dev->lock, flags);
|
||||
|
||||
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
||||
struct pci_dev_entry *e, *tmp;
|
||||
list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
|
||||
list) {
|
||||
if (e->dev == dev) {
|
||||
list_del(&e->list);
|
||||
found_dev = e->dev;
|
||||
kfree(e);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
||||
|
||||
if (found_dev)
|
||||
pcistub_put_pci_dev(found_dev);
|
||||
}
|
||||
|
||||
static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
int slot;
|
||||
struct vpci_dev_data *vpci_dev;
|
||||
|
||||
vpci_dev = kmalloc(sizeof(*vpci_dev), GFP_KERNEL);
|
||||
if (!vpci_dev)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&vpci_dev->lock);
|
||||
|
||||
for (slot = 0; slot < PCI_SLOT_MAX; slot++)
|
||||
INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
|
||||
|
||||
pdev->pci_dev_data = vpci_dev;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
|
||||
publish_pci_root_cb publish_cb)
|
||||
{
|
||||
/* The Virtual PCI bus has only one root */
|
||||
return publish_cb(pdev, 0, 0);
|
||||
}
|
||||
|
||||
static void __xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
int slot;
|
||||
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
||||
|
||||
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
||||
struct pci_dev_entry *e, *tmp;
|
||||
list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
|
||||
list) {
|
||||
list_del(&e->list);
|
||||
pcistub_put_pci_dev(e->dev);
|
||||
kfree(e);
|
||||
}
|
||||
}
|
||||
|
||||
kfree(vpci_dev);
|
||||
pdev->pci_dev_data = NULL;
|
||||
}
|
||||
|
||||
static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev,
|
||||
struct xen_pcibk_device *pdev,
|
||||
unsigned int *domain, unsigned int *bus,
|
||||
unsigned int *devfn)
|
||||
{
|
||||
struct pci_dev_entry *entry;
|
||||
struct pci_dev *dev = NULL;
|
||||
struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
|
||||
unsigned long flags;
|
||||
int found = 0, slot;
|
||||
|
||||
spin_lock_irqsave(&vpci_dev->lock, flags);
|
||||
for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
|
||||
list_for_each_entry(entry,
|
||||
&vpci_dev->dev_list[slot],
|
||||
list) {
|
||||
dev = entry->dev;
|
||||
if (dev && dev->bus->number == pcidev->bus->number
|
||||
&& pci_domain_nr(dev->bus) ==
|
||||
pci_domain_nr(pcidev->bus)
|
||||
&& dev->devfn == pcidev->devfn) {
|
||||
found = 1;
|
||||
*domain = 0;
|
||||
*bus = 0;
|
||||
*devfn = PCI_DEVFN(slot,
|
||||
PCI_FUNC(pcidev->devfn));
|
||||
}
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&vpci_dev->lock, flags);
|
||||
return found;
|
||||
}
|
||||
|
||||
struct xen_pcibk_backend xen_pcibk_vpci_backend = {
|
||||
.name = "vpci",
|
||||
.init = __xen_pcibk_init_devices,
|
||||
.free = __xen_pcibk_release_devices,
|
||||
.find = __xen_pcibk_get_pcifront_dev,
|
||||
.publish = __xen_pcibk_publish_pci_roots,
|
||||
.release = __xen_pcibk_release_pci_dev,
|
||||
.add = __xen_pcibk_add_pci_dev,
|
||||
.get = __xen_pcibk_get_pci_dev,
|
||||
};
|
749
drivers/xen/xen-pciback/xenbus.c
Normal file
749
drivers/xen/xen-pciback/xenbus.c
Normal file
@ -0,0 +1,749 @@
|
||||
/*
|
||||
* PCI Backend Xenbus Setup - handles setup with frontend and xend
|
||||
*
|
||||
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <xen/xenbus.h>
|
||||
#include <xen/events.h>
|
||||
#include <asm/xen/pci.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include "pciback.h"
|
||||
|
||||
#define DRV_NAME "xen-pciback"
|
||||
#define INVALID_EVTCHN_IRQ (-1)
|
||||
struct workqueue_struct *xen_pcibk_wq;
|
||||
|
||||
static int __read_mostly passthrough;
|
||||
module_param(passthrough, bool, S_IRUGO);
|
||||
MODULE_PARM_DESC(passthrough,
|
||||
"Option to specify how to export PCI topology to guest:\n"\
|
||||
" 0 - (default) Hide the true PCI topology and makes the frontend\n"\
|
||||
" there is a single PCI bus with only the exported devices on it.\n"\
|
||||
" For example, a device at 03:05.0 will be re-assigned to 00:00.0\n"\
|
||||
" while second device at 02:1a.1 will be re-assigned to 00:01.1.\n"\
|
||||
" 1 - Passthrough provides a real view of the PCI topology to the\n"\
|
||||
" frontend (for example, a device at 06:01.b will still appear at\n"\
|
||||
" 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
|
||||
" exposed PCI devices to its driver domains. This may be required\n"\
|
||||
" for drivers which depend on finding their hardward in certain\n"\
|
||||
" bus/slot locations.");
|
||||
|
||||
static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)
|
||||
{
|
||||
struct xen_pcibk_device *pdev;
|
||||
|
||||
pdev = kzalloc(sizeof(struct xen_pcibk_device), GFP_KERNEL);
|
||||
if (pdev == NULL)
|
||||
goto out;
|
||||
dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
|
||||
|
||||
pdev->xdev = xdev;
|
||||
dev_set_drvdata(&xdev->dev, pdev);
|
||||
|
||||
spin_lock_init(&pdev->dev_lock);
|
||||
|
||||
pdev->sh_info = NULL;
|
||||
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
||||
pdev->be_watching = 0;
|
||||
|
||||
INIT_WORK(&pdev->op_work, xen_pcibk_do_op);
|
||||
|
||||
if (xen_pcibk_init_devices(pdev)) {
|
||||
kfree(pdev);
|
||||
pdev = NULL;
|
||||
}
|
||||
out:
|
||||
return pdev;
|
||||
}
|
||||
|
||||
static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
spin_lock(&pdev->dev_lock);
|
||||
|
||||
/* Ensure the guest can't trigger our handler before removing devices */
|
||||
if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
|
||||
unbind_from_irqhandler(pdev->evtchn_irq, pdev);
|
||||
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
||||
}
|
||||
spin_unlock(&pdev->dev_lock);
|
||||
|
||||
/* If the driver domain started an op, make sure we complete it
|
||||
* before releasing the shared memory */
|
||||
|
||||
/* Note, the workqueue does not use spinlocks at all.*/
|
||||
flush_workqueue(xen_pcibk_wq);
|
||||
|
||||
spin_lock(&pdev->dev_lock);
|
||||
if (pdev->sh_info != NULL) {
|
||||
xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
|
||||
pdev->sh_info = NULL;
|
||||
}
|
||||
spin_unlock(&pdev->dev_lock);
|
||||
|
||||
}
|
||||
|
||||
static void free_pdev(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
if (pdev->be_watching) {
|
||||
unregister_xenbus_watch(&pdev->be_watch);
|
||||
pdev->be_watching = 0;
|
||||
}
|
||||
|
||||
xen_pcibk_disconnect(pdev);
|
||||
|
||||
xen_pcibk_release_devices(pdev);
|
||||
|
||||
dev_set_drvdata(&pdev->xdev->dev, NULL);
|
||||
pdev->xdev = NULL;
|
||||
|
||||
kfree(pdev);
|
||||
}
|
||||
|
||||
static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref,
|
||||
int remote_evtchn)
|
||||
{
|
||||
int err = 0;
|
||||
void *vaddr;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev,
|
||||
"Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
|
||||
gnt_ref, remote_evtchn);
|
||||
|
||||
err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
|
||||
if (err < 0) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error mapping other domain page in ours.");
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&pdev->dev_lock);
|
||||
pdev->sh_info = vaddr;
|
||||
spin_unlock(&pdev->dev_lock);
|
||||
|
||||
err = bind_interdomain_evtchn_to_irqhandler(
|
||||
pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
|
||||
0, DRV_NAME, pdev);
|
||||
if (err < 0) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error binding event channel to IRQ");
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&pdev->dev_lock);
|
||||
pdev->evtchn_irq = err;
|
||||
spin_unlock(&pdev->dev_lock);
|
||||
err = 0;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "Attached!\n");
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_attach(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
int err = 0;
|
||||
int gnt_ref, remote_evtchn;
|
||||
char *magic = NULL;
|
||||
|
||||
|
||||
/* Make sure we only do this setup once */
|
||||
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
||||
XenbusStateInitialised)
|
||||
goto out;
|
||||
|
||||
/* Wait for frontend to state that it has published the configuration */
|
||||
if (xenbus_read_driver_state(pdev->xdev->otherend) !=
|
||||
XenbusStateInitialised)
|
||||
goto out;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "Reading frontend config\n");
|
||||
|
||||
err = xenbus_gather(XBT_NIL, pdev->xdev->otherend,
|
||||
"pci-op-ref", "%u", &gnt_ref,
|
||||
"event-channel", "%u", &remote_evtchn,
|
||||
"magic", NULL, &magic, NULL);
|
||||
if (err) {
|
||||
/* If configuration didn't get read correctly, wait longer */
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error reading configuration from frontend");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
|
||||
xenbus_dev_fatal(pdev->xdev, -EFAULT,
|
||||
"version mismatch (%s/%s) with pcifront - "
|
||||
"halting xen_pcibk",
|
||||
magic, XEN_PCI_MAGIC);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xen_pcibk_do_attach(pdev, gnt_ref, remote_evtchn);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "Connecting...\n");
|
||||
|
||||
err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
|
||||
if (err)
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error switching to connected state!");
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
|
||||
out:
|
||||
|
||||
kfree(magic);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_publish_pci_dev(struct xen_pcibk_device *pdev,
|
||||
unsigned int domain, unsigned int bus,
|
||||
unsigned int devfn, unsigned int devid)
|
||||
{
|
||||
int err;
|
||||
int len;
|
||||
char str[64];
|
||||
|
||||
len = snprintf(str, sizeof(str), "vdev-%d", devid);
|
||||
if (unlikely(len >= (sizeof(str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
||||
"%04x:%02x:%02x.%02x", domain, bus,
|
||||
PCI_SLOT(devfn), PCI_FUNC(devfn));
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_export_device(struct xen_pcibk_device *pdev,
|
||||
int domain, int bus, int slot, int func,
|
||||
int devid)
|
||||
{
|
||||
struct pci_dev *dev;
|
||||
int err = 0;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "exporting dom %x bus %x slot %x func %x\n",
|
||||
domain, bus, slot, func);
|
||||
|
||||
dev = pcistub_get_pci_dev_by_slot(pdev, domain, bus, slot, func);
|
||||
if (!dev) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Couldn't locate PCI device "
|
||||
"(%04x:%02x:%02x.%01x)! "
|
||||
"perhaps already in-use?",
|
||||
domain, bus, slot, func);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xen_pcibk_add_pci_dev(pdev, dev, devid,
|
||||
xen_pcibk_publish_pci_dev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id);
|
||||
if (xen_register_device_domain_owner(dev,
|
||||
pdev->xdev->otherend_id) != 0) {
|
||||
dev_err(&dev->dev, "device has been assigned to another " \
|
||||
"domain! Over-writting the ownership, but beware.\n");
|
||||
xen_unregister_device_domain_owner(dev);
|
||||
xen_register_device_domain_owner(dev, pdev->xdev->otherend_id);
|
||||
}
|
||||
|
||||
/* TODO: It'd be nice to export a bridge and have all of its children
|
||||
* get exported with it. This may be best done in xend (which will
|
||||
* have to calculate resource usage anyway) but we probably want to
|
||||
* put something in here to ensure that if a bridge gets given to a
|
||||
* driver domain, that all devices under that bridge are not given
|
||||
* to other driver domains (as he who controls the bridge can disable
|
||||
* it and stop the other devices from working).
|
||||
*/
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev,
|
||||
int domain, int bus, int slot, int func)
|
||||
{
|
||||
int err = 0;
|
||||
struct pci_dev *dev;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "removing dom %x bus %x slot %x func %x\n",
|
||||
domain, bus, slot, func);
|
||||
|
||||
dev = xen_pcibk_get_pci_dev(pdev, domain, bus, PCI_DEVFN(slot, func));
|
||||
if (!dev) {
|
||||
err = -EINVAL;
|
||||
dev_dbg(&pdev->xdev->dev, "Couldn't locate PCI device "
|
||||
"(%04x:%02x:%02x.%01x)! not owned by this domain\n",
|
||||
domain, bus, slot, func);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id);
|
||||
xen_unregister_device_domain_owner(dev);
|
||||
|
||||
xen_pcibk_release_pci_dev(pdev, dev);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_publish_pci_root(struct xen_pcibk_device *pdev,
|
||||
unsigned int domain, unsigned int bus)
|
||||
{
|
||||
unsigned int d, b;
|
||||
int i, root_num, len, err;
|
||||
char str[64];
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "Publishing pci roots\n");
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
||||
"root_num", "%d", &root_num);
|
||||
if (err == 0 || err == -ENOENT)
|
||||
root_num = 0;
|
||||
else if (err < 0)
|
||||
goto out;
|
||||
|
||||
/* Verify that we haven't already published this pci root */
|
||||
for (i = 0; i < root_num; i++) {
|
||||
len = snprintf(str, sizeof(str), "root-%d", i);
|
||||
if (unlikely(len >= (sizeof(str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
||||
str, "%x:%x", &d, &b);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
if (err != 2) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (d == domain && b == bus) {
|
||||
err = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
len = snprintf(str, sizeof(str), "root-%d", root_num);
|
||||
if (unlikely(len >= (sizeof(str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "writing root %d at %04x:%02x\n",
|
||||
root_num, domain, bus);
|
||||
|
||||
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, str,
|
||||
"%04x:%02x", domain, bus);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
|
||||
"root_num", "%d", (root_num + 1));
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
int err = 0;
|
||||
int num_devs;
|
||||
int domain, bus, slot, func;
|
||||
int substate;
|
||||
int i, len;
|
||||
char state_str[64];
|
||||
char dev_str[64];
|
||||
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
|
||||
|
||||
/* Make sure we only reconfigure once */
|
||||
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
||||
XenbusStateReconfiguring)
|
||||
goto out;
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
|
||||
&num_devs);
|
||||
if (err != 1) {
|
||||
if (err >= 0)
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error reading number of devices");
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_devs; i++) {
|
||||
len = snprintf(state_str, sizeof(state_str), "state-%d", i);
|
||||
if (unlikely(len >= (sizeof(state_str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"String overflow while reading "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, state_str,
|
||||
"%d", &substate);
|
||||
if (err != 1)
|
||||
substate = XenbusStateUnknown;
|
||||
|
||||
switch (substate) {
|
||||
case XenbusStateInitialising:
|
||||
dev_dbg(&pdev->xdev->dev, "Attaching dev-%d ...\n", i);
|
||||
|
||||
len = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
|
||||
if (unlikely(len >= (sizeof(dev_str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"String overflow while "
|
||||
"reading configuration");
|
||||
goto out;
|
||||
}
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
||||
dev_str, "%x:%x:%x.%x",
|
||||
&domain, &bus, &slot, &func);
|
||||
if (err < 0) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error reading device "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
if (err != 4) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error parsing pci device "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xen_pcibk_export_device(pdev, domain, bus, slot,
|
||||
func, i);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Publish pci roots. */
|
||||
err = xen_pcibk_publish_pci_roots(pdev,
|
||||
xen_pcibk_publish_pci_root);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error while publish PCI root"
|
||||
"buses for frontend");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename,
|
||||
state_str, "%d",
|
||||
XenbusStateInitialised);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error switching substate of "
|
||||
"dev-%d\n", i);
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
|
||||
case XenbusStateClosing:
|
||||
dev_dbg(&pdev->xdev->dev, "Detaching dev-%d ...\n", i);
|
||||
|
||||
len = snprintf(dev_str, sizeof(dev_str), "vdev-%d", i);
|
||||
if (unlikely(len >= (sizeof(dev_str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"String overflow while "
|
||||
"reading configuration");
|
||||
goto out;
|
||||
}
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename,
|
||||
dev_str, "%x:%x:%x.%x",
|
||||
&domain, &bus, &slot, &func);
|
||||
if (err < 0) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error reading device "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
if (err != 4) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error parsing pci device "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xen_pcibk_remove_device(pdev, domain, bus, slot,
|
||||
func);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* TODO: If at some point we implement support for pci
|
||||
* root hot-remove on pcifront side, we'll need to
|
||||
* remove unnecessary xenstore nodes of pci roots here.
|
||||
*/
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
err = xenbus_switch_state(pdev->xdev, XenbusStateReconfigured);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error switching to reconfigured state!");
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void xen_pcibk_frontend_changed(struct xenbus_device *xdev,
|
||||
enum xenbus_state fe_state)
|
||||
{
|
||||
struct xen_pcibk_device *pdev = dev_get_drvdata(&xdev->dev);
|
||||
|
||||
dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
|
||||
|
||||
switch (fe_state) {
|
||||
case XenbusStateInitialised:
|
||||
xen_pcibk_attach(pdev);
|
||||
break;
|
||||
|
||||
case XenbusStateReconfiguring:
|
||||
xen_pcibk_reconfigure(pdev);
|
||||
break;
|
||||
|
||||
case XenbusStateConnected:
|
||||
/* pcifront switched its state from reconfiguring to connected.
|
||||
* Then switch to connected state.
|
||||
*/
|
||||
xenbus_switch_state(xdev, XenbusStateConnected);
|
||||
break;
|
||||
|
||||
case XenbusStateClosing:
|
||||
xen_pcibk_disconnect(pdev);
|
||||
xenbus_switch_state(xdev, XenbusStateClosing);
|
||||
break;
|
||||
|
||||
case XenbusStateClosed:
|
||||
xen_pcibk_disconnect(pdev);
|
||||
xenbus_switch_state(xdev, XenbusStateClosed);
|
||||
if (xenbus_dev_is_online(xdev))
|
||||
break;
|
||||
/* fall through if not online */
|
||||
case XenbusStateUnknown:
|
||||
dev_dbg(&xdev->dev, "frontend is gone! unregister device\n");
|
||||
device_unregister(&xdev->dev);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev)
|
||||
{
|
||||
/* Get configuration from xend (if available now) */
|
||||
int domain, bus, slot, func;
|
||||
int err = 0;
|
||||
int i, num_devs;
|
||||
char dev_str[64];
|
||||
char state_str[64];
|
||||
|
||||
/* It's possible we could get the call to setup twice, so make sure
|
||||
* we're not already connected.
|
||||
*/
|
||||
if (xenbus_read_driver_state(pdev->xdev->nodename) !=
|
||||
XenbusStateInitWait)
|
||||
goto out;
|
||||
|
||||
dev_dbg(&pdev->xdev->dev, "getting be setup\n");
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, "num_devs", "%d",
|
||||
&num_devs);
|
||||
if (err != 1) {
|
||||
if (err >= 0)
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error reading number of devices");
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_devs; i++) {
|
||||
int l = snprintf(dev_str, sizeof(dev_str), "dev-%d", i);
|
||||
if (unlikely(l >= (sizeof(dev_str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"String overflow while reading "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xenbus_scanf(XBT_NIL, pdev->xdev->nodename, dev_str,
|
||||
"%x:%x:%x.%x", &domain, &bus, &slot, &func);
|
||||
if (err < 0) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error reading device configuration");
|
||||
goto out;
|
||||
}
|
||||
if (err != 4) {
|
||||
err = -EINVAL;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error parsing pci device "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xen_pcibk_export_device(pdev, domain, bus, slot, func, i);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Switch substate of this device. */
|
||||
l = snprintf(state_str, sizeof(state_str), "state-%d", i);
|
||||
if (unlikely(l >= (sizeof(state_str) - 1))) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"String overflow while reading "
|
||||
"configuration");
|
||||
goto out;
|
||||
}
|
||||
err = xenbus_printf(XBT_NIL, pdev->xdev->nodename, state_str,
|
||||
"%d", XenbusStateInitialised);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(pdev->xdev, err, "Error switching "
|
||||
"substate of dev-%d\n", i);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
err = xen_pcibk_publish_pci_roots(pdev, xen_pcibk_publish_pci_root);
|
||||
if (err) {
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error while publish PCI root buses "
|
||||
"for frontend");
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = xenbus_switch_state(pdev->xdev, XenbusStateInitialised);
|
||||
if (err)
|
||||
xenbus_dev_fatal(pdev->xdev, err,
|
||||
"Error switching to initialised state!");
|
||||
|
||||
out:
|
||||
if (!err)
|
||||
/* see if pcifront is already configured (if not, we'll wait) */
|
||||
xen_pcibk_attach(pdev);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void xen_pcibk_be_watch(struct xenbus_watch *watch,
|
||||
const char **vec, unsigned int len)
|
||||
{
|
||||
struct xen_pcibk_device *pdev =
|
||||
container_of(watch, struct xen_pcibk_device, be_watch);
|
||||
|
||||
switch (xenbus_read_driver_state(pdev->xdev->nodename)) {
|
||||
case XenbusStateInitWait:
|
||||
xen_pcibk_setup_backend(pdev);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int xen_pcibk_xenbus_probe(struct xenbus_device *dev,
|
||||
const struct xenbus_device_id *id)
|
||||
{
|
||||
int err = 0;
|
||||
struct xen_pcibk_device *pdev = alloc_pdev(dev);
|
||||
|
||||
if (pdev == NULL) {
|
||||
err = -ENOMEM;
|
||||
xenbus_dev_fatal(dev, err,
|
||||
"Error allocating xen_pcibk_device struct");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* wait for xend to configure us */
|
||||
err = xenbus_switch_state(dev, XenbusStateInitWait);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* watch the backend node for backend configuration information */
|
||||
err = xenbus_watch_path(dev, dev->nodename, &pdev->be_watch,
|
||||
xen_pcibk_be_watch);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
pdev->be_watching = 1;
|
||||
|
||||
/* We need to force a call to our callback here in case
|
||||
* xend already configured us!
|
||||
*/
|
||||
xen_pcibk_be_watch(&pdev->be_watch, NULL, 0);
|
||||
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
|
||||
{
|
||||
struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
|
||||
|
||||
if (pdev != NULL)
|
||||
free_pdev(pdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct xenbus_device_id xenpci_ids[] = {
|
||||
{"pci"},
|
||||
{""},
|
||||
};
|
||||
|
||||
static struct xenbus_driver xenbus_xen_pcibk_driver = {
|
||||
.name = DRV_NAME,
|
||||
.owner = THIS_MODULE,
|
||||
.ids = xenpci_ids,
|
||||
.probe = xen_pcibk_xenbus_probe,
|
||||
.remove = xen_pcibk_xenbus_remove,
|
||||
.otherend_changed = xen_pcibk_frontend_changed,
|
||||
};
|
||||
|
||||
struct xen_pcibk_backend *xen_pcibk_backend;
|
||||
|
||||
int __init xen_pcibk_xenbus_register(void)
|
||||
{
|
||||
xen_pcibk_wq = create_workqueue("xen_pciback_workqueue");
|
||||
if (!xen_pcibk_wq) {
|
||||
printk(KERN_ERR "%s: create"
|
||||
"xen_pciback_workqueue failed\n", __func__);
|
||||
return -EFAULT;
|
||||
}
|
||||
xen_pcibk_backend = &xen_pcibk_vpci_backend;
|
||||
if (passthrough)
|
||||
xen_pcibk_backend = &xen_pcibk_passthrough_backend;
|
||||
pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name);
|
||||
return xenbus_register_backend(&xenbus_xen_pcibk_driver);
|
||||
}
|
||||
|
||||
void __exit xen_pcibk_xenbus_unregister(void)
|
||||
{
|
||||
destroy_workqueue(xen_pcibk_wq);
|
||||
xenbus_unregister_driver(&xenbus_xen_pcibk_driver);
|
||||
}
|
485
drivers/xen/xen-selfballoon.c
Normal file
485
drivers/xen/xen-selfballoon.c
Normal file
@ -0,0 +1,485 @@
|
||||
/******************************************************************************
|
||||
* Xen selfballoon driver (and optional frontswap self-shrinking driver)
|
||||
*
|
||||
* Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
|
||||
*
|
||||
* This code complements the cleancache and frontswap patchsets to optimize
|
||||
* support for Xen Transcendent Memory ("tmem"). The policy it implements
|
||||
* is rudimentary and will likely improve over time, but it does work well
|
||||
* enough today.
|
||||
*
|
||||
* Two functionalities are implemented here which both use "control theory"
|
||||
* (feedback) to optimize memory utilization. In a virtualized environment
|
||||
* such as Xen, RAM is often a scarce resource and we would like to ensure
|
||||
* that each of a possibly large number of virtual machines is using RAM
|
||||
* efficiently, i.e. using as little as possible when under light load
|
||||
* and obtaining as much as possible when memory demands are high.
|
||||
* Since RAM needs vary highly dynamically and sometimes dramatically,
|
||||
* "hysteresis" is used, that is, memory target is determined not just
|
||||
* on current data but also on past data stored in the system.
|
||||
*
|
||||
* "Selfballooning" creates memory pressure by managing the Xen balloon
|
||||
* driver to decrease and increase available kernel memory, driven
|
||||
* largely by the target value of "Committed_AS" (see /proc/meminfo).
|
||||
* Since Committed_AS does not account for clean mapped pages (i.e. pages
|
||||
* in RAM that are identical to pages on disk), selfballooning has the
|
||||
* affect of pushing less frequently used clean pagecache pages out of
|
||||
* kernel RAM and, presumably using cleancache, into Xen tmem where
|
||||
* Xen can more efficiently optimize RAM utilization for such pages.
|
||||
*
|
||||
* When kernel memory demand unexpectedly increases faster than Xen, via
|
||||
* the selfballoon driver, is able to (or chooses to) provide usable RAM,
|
||||
* the kernel may invoke swapping. In most cases, frontswap is able
|
||||
* to absorb this swapping into Xen tmem. However, due to the fact
|
||||
* that the kernel swap subsystem assumes swapping occurs to a disk,
|
||||
* swapped pages may sit on the disk for a very long time; even if
|
||||
* the kernel knows the page will never be used again. This is because
|
||||
* the disk space costs very little and can be overwritten when
|
||||
* necessary. When such stale pages are in frontswap, however, they
|
||||
* are taking up valuable real estate. "Frontswap selfshrinking" works
|
||||
* to resolve this: When frontswap activity is otherwise stable
|
||||
* and the guest kernel is not under memory pressure, the "frontswap
|
||||
* selfshrinking" accounts for this by providing pressure to remove some
|
||||
* pages from frontswap and return them to kernel memory.
|
||||
*
|
||||
* For both "selfballooning" and "frontswap-selfshrinking", a worker
|
||||
* thread is used and sysfs tunables are provided to adjust the frequency
|
||||
* and rate of adjustments to achieve the goal, as well as to disable one
|
||||
* or both functions independently.
|
||||
*
|
||||
* While some argue that this functionality can and should be implemented
|
||||
* in userspace, it has been observed that bad things happen (e.g. OOMs).
|
||||
*
|
||||
* System configuration note: Selfballooning should not be enabled on
|
||||
* systems without a sufficiently large swap device configured; for best
|
||||
* results, it is recommended that total swap be increased by the size
|
||||
* of the guest memory. Also, while technically not required to be
|
||||
* configured, it is highly recommended that frontswap also be configured
|
||||
* and enabled when selfballooning is running. So, selfballooning
|
||||
* is disabled by default if frontswap is not configured and can only
|
||||
* be enabled with the "selfballooning" kernel boot option; similarly
|
||||
* selfballooning is enabled by default if frontswap is configured and
|
||||
* can be disabled with the "noselfballooning" kernel boot option. Finally,
|
||||
* when frontswap is configured, frontswap-selfshrinking can be disabled
|
||||
* with the "noselfshrink" kernel boot option.
|
||||
*
|
||||
* Selfballooning is disallowed in domain0 and force-disabled.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mman.h>
|
||||
|
||||
#include <xen/balloon.h>
|
||||
|
||||
#include <xen/tmem.h>
|
||||
|
||||
/* Enable/disable with sysfs. */
|
||||
static int xen_selfballooning_enabled __read_mostly;
|
||||
|
||||
/*
|
||||
* Controls rate at which memory target (this iteration) approaches
|
||||
* ultimate goal when memory need is increasing (up-hysteresis) or
|
||||
* decreasing (down-hysteresis). Higher values of hysteresis cause
|
||||
* slower increases/decreases. The default values for the various
|
||||
* parameters were deemed reasonable by experimentation, may be
|
||||
* workload-dependent, and can all be adjusted via sysfs.
|
||||
*/
|
||||
static unsigned int selfballoon_downhysteresis __read_mostly = 8;
|
||||
static unsigned int selfballoon_uphysteresis __read_mostly = 1;
|
||||
|
||||
/* In HZ, controls frequency of worker invocation. */
|
||||
static unsigned int selfballoon_interval __read_mostly = 5;
|
||||
|
||||
static void selfballoon_process(struct work_struct *work);
|
||||
static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
|
||||
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
#include <linux/frontswap.h>
|
||||
|
||||
/* Enable/disable with sysfs. */
|
||||
static bool frontswap_selfshrinking __read_mostly;
|
||||
|
||||
/* Enable/disable with kernel boot option. */
|
||||
static bool use_frontswap_selfshrink __initdata = true;
|
||||
|
||||
/*
|
||||
* The default values for the following parameters were deemed reasonable
|
||||
* by experimentation, may be workload-dependent, and can all be
|
||||
* adjusted via sysfs.
|
||||
*/
|
||||
|
||||
/* Control rate for frontswap shrinking. Higher hysteresis is slower. */
|
||||
static unsigned int frontswap_hysteresis __read_mostly = 20;
|
||||
|
||||
/*
|
||||
* Number of selfballoon worker invocations to wait before observing that
|
||||
* frontswap selfshrinking should commence. Note that selfshrinking does
|
||||
* not use a separate worker thread.
|
||||
*/
|
||||
static unsigned int frontswap_inertia __read_mostly = 3;
|
||||
|
||||
/* Countdown to next invocation of frontswap_shrink() */
|
||||
static unsigned long frontswap_inertia_counter;
|
||||
|
||||
/*
|
||||
* Invoked by the selfballoon worker thread, uses current number of pages
|
||||
* in frontswap (frontswap_curr_pages()), previous status, and control
|
||||
* values (hysteresis and inertia) to determine if frontswap should be
|
||||
* shrunk and what the new frontswap size should be. Note that
|
||||
* frontswap_shrink is essentially a partial swapoff that immediately
|
||||
* transfers pages from the "swap device" (frontswap) back into kernel
|
||||
* RAM; despite the name, frontswap "shrinking" is very different from
|
||||
* the "shrinker" interface used by the kernel MM subsystem to reclaim
|
||||
* memory.
|
||||
*/
|
||||
static void frontswap_selfshrink(void)
|
||||
{
|
||||
static unsigned long cur_frontswap_pages;
|
||||
static unsigned long last_frontswap_pages;
|
||||
static unsigned long tgt_frontswap_pages;
|
||||
|
||||
last_frontswap_pages = cur_frontswap_pages;
|
||||
cur_frontswap_pages = frontswap_curr_pages();
|
||||
if (!cur_frontswap_pages ||
|
||||
(cur_frontswap_pages > last_frontswap_pages)) {
|
||||
frontswap_inertia_counter = frontswap_inertia;
|
||||
return;
|
||||
}
|
||||
if (frontswap_inertia_counter && --frontswap_inertia_counter)
|
||||
return;
|
||||
if (cur_frontswap_pages <= frontswap_hysteresis)
|
||||
tgt_frontswap_pages = 0;
|
||||
else
|
||||
tgt_frontswap_pages = cur_frontswap_pages -
|
||||
(cur_frontswap_pages / frontswap_hysteresis);
|
||||
frontswap_shrink(tgt_frontswap_pages);
|
||||
}
|
||||
|
||||
static int __init xen_nofrontswap_selfshrink_setup(char *s)
|
||||
{
|
||||
use_frontswap_selfshrink = false;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noselfshrink", xen_nofrontswap_selfshrink_setup);
|
||||
|
||||
/* Disable with kernel boot option. */
|
||||
static bool use_selfballooning __initdata = true;
|
||||
|
||||
static int __init xen_noselfballooning_setup(char *s)
|
||||
{
|
||||
use_selfballooning = false;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("noselfballooning", xen_noselfballooning_setup);
|
||||
#else /* !CONFIG_FRONTSWAP */
|
||||
/* Enable with kernel boot option. */
|
||||
static bool use_selfballooning __initdata = false;
|
||||
|
||||
static int __init xen_selfballooning_setup(char *s)
|
||||
{
|
||||
use_selfballooning = true;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("selfballooning", xen_selfballooning_setup);
|
||||
#endif /* CONFIG_FRONTSWAP */
|
||||
|
||||
/*
|
||||
* Use current balloon size, the goal (vm_committed_as), and hysteresis
|
||||
* parameters to set a new target balloon size
|
||||
*/
|
||||
static void selfballoon_process(struct work_struct *work)
|
||||
{
|
||||
unsigned long cur_pages, goal_pages, tgt_pages;
|
||||
bool reset_timer = false;
|
||||
|
||||
if (xen_selfballooning_enabled) {
|
||||
cur_pages = balloon_stats.current_pages;
|
||||
tgt_pages = cur_pages; /* default is no change */
|
||||
goal_pages = percpu_counter_read_positive(&vm_committed_as) +
|
||||
balloon_stats.current_pages - totalram_pages;
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
/* allow space for frontswap pages to be repatriated */
|
||||
if (frontswap_selfshrinking && frontswap_enabled)
|
||||
goal_pages += frontswap_curr_pages();
|
||||
#endif
|
||||
if (cur_pages > goal_pages)
|
||||
tgt_pages = cur_pages -
|
||||
((cur_pages - goal_pages) /
|
||||
selfballoon_downhysteresis);
|
||||
else if (cur_pages < goal_pages)
|
||||
tgt_pages = cur_pages +
|
||||
((goal_pages - cur_pages) /
|
||||
selfballoon_uphysteresis);
|
||||
/* else if cur_pages == goal_pages, no change */
|
||||
balloon_set_new_target(tgt_pages);
|
||||
reset_timer = true;
|
||||
}
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
if (frontswap_selfshrinking && frontswap_enabled) {
|
||||
frontswap_selfshrink();
|
||||
reset_timer = true;
|
||||
}
|
||||
#endif
|
||||
if (reset_timer)
|
||||
schedule_delayed_work(&selfballoon_worker,
|
||||
selfballoon_interval * HZ);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
|
||||
#include <linux/sysdev.h>
|
||||
#include <linux/capability.h>
|
||||
|
||||
#define SELFBALLOON_SHOW(name, format, args...) \
|
||||
static ssize_t show_##name(struct sys_device *dev, \
|
||||
struct sysdev_attribute *attr, \
|
||||
char *buf) \
|
||||
{ \
|
||||
return sprintf(buf, format, ##args); \
|
||||
}
|
||||
|
||||
SELFBALLOON_SHOW(selfballooning, "%d\n", xen_selfballooning_enabled);
|
||||
|
||||
static ssize_t store_selfballooning(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
bool was_enabled = xen_selfballooning_enabled;
|
||||
unsigned long tmp;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
err = strict_strtoul(buf, 10, &tmp);
|
||||
if (err || ((tmp != 0) && (tmp != 1)))
|
||||
return -EINVAL;
|
||||
|
||||
xen_selfballooning_enabled = !!tmp;
|
||||
if (!was_enabled && xen_selfballooning_enabled)
|
||||
schedule_delayed_work(&selfballoon_worker,
|
||||
selfballoon_interval * HZ);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(selfballooning, S_IRUGO | S_IWUSR,
|
||||
show_selfballooning, store_selfballooning);
|
||||
|
||||
SELFBALLOON_SHOW(selfballoon_interval, "%d\n", selfballoon_interval);
|
||||
|
||||
static ssize_t store_selfballoon_interval(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = strict_strtoul(buf, 10, &val);
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
selfballoon_interval = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(selfballoon_interval, S_IRUGO | S_IWUSR,
|
||||
show_selfballoon_interval, store_selfballoon_interval);
|
||||
|
||||
SELFBALLOON_SHOW(selfballoon_downhys, "%d\n", selfballoon_downhysteresis);
|
||||
|
||||
static ssize_t store_selfballoon_downhys(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = strict_strtoul(buf, 10, &val);
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
selfballoon_downhysteresis = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(selfballoon_downhysteresis, S_IRUGO | S_IWUSR,
|
||||
show_selfballoon_downhys, store_selfballoon_downhys);
|
||||
|
||||
|
||||
SELFBALLOON_SHOW(selfballoon_uphys, "%d\n", selfballoon_uphysteresis);
|
||||
|
||||
static ssize_t store_selfballoon_uphys(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = strict_strtoul(buf, 10, &val);
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
selfballoon_uphysteresis = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
|
||||
show_selfballoon_uphys, store_selfballoon_uphys);
|
||||
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
|
||||
|
||||
static ssize_t store_frontswap_selfshrinking(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
bool was_enabled = frontswap_selfshrinking;
|
||||
unsigned long tmp;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = strict_strtoul(buf, 10, &tmp);
|
||||
if (err || ((tmp != 0) && (tmp != 1)))
|
||||
return -EINVAL;
|
||||
frontswap_selfshrinking = !!tmp;
|
||||
if (!was_enabled && !xen_selfballooning_enabled &&
|
||||
frontswap_selfshrinking)
|
||||
schedule_delayed_work(&selfballoon_worker,
|
||||
selfballoon_interval * HZ);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(frontswap_selfshrinking, S_IRUGO | S_IWUSR,
|
||||
show_frontswap_selfshrinking, store_frontswap_selfshrinking);
|
||||
|
||||
SELFBALLOON_SHOW(frontswap_inertia, "%d\n", frontswap_inertia);
|
||||
|
||||
static ssize_t store_frontswap_inertia(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = strict_strtoul(buf, 10, &val);
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
frontswap_inertia = val;
|
||||
frontswap_inertia_counter = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(frontswap_inertia, S_IRUGO | S_IWUSR,
|
||||
show_frontswap_inertia, store_frontswap_inertia);
|
||||
|
||||
SELFBALLOON_SHOW(frontswap_hysteresis, "%d\n", frontswap_hysteresis);
|
||||
|
||||
static ssize_t store_frontswap_hysteresis(struct sys_device *dev,
|
||||
struct sysdev_attribute *attr,
|
||||
const char *buf,
|
||||
size_t count)
|
||||
{
|
||||
unsigned long val;
|
||||
int err;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
err = strict_strtoul(buf, 10, &val);
|
||||
if (err || val == 0)
|
||||
return -EINVAL;
|
||||
frontswap_hysteresis = val;
|
||||
return count;
|
||||
}
|
||||
|
||||
static SYSDEV_ATTR(frontswap_hysteresis, S_IRUGO | S_IWUSR,
|
||||
show_frontswap_hysteresis, store_frontswap_hysteresis);
|
||||
|
||||
#endif /* CONFIG_FRONTSWAP */
|
||||
|
||||
static struct attribute *selfballoon_attrs[] = {
|
||||
&attr_selfballooning.attr,
|
||||
&attr_selfballoon_interval.attr,
|
||||
&attr_selfballoon_downhysteresis.attr,
|
||||
&attr_selfballoon_uphysteresis.attr,
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
&attr_frontswap_selfshrinking.attr,
|
||||
&attr_frontswap_hysteresis.attr,
|
||||
&attr_frontswap_inertia.attr,
|
||||
#endif
|
||||
NULL
|
||||
};
|
||||
|
||||
static struct attribute_group selfballoon_group = {
|
||||
.name = "selfballoon",
|
||||
.attrs = selfballoon_attrs
|
||||
};
|
||||
#endif
|
||||
|
||||
int register_xen_selfballooning(struct sys_device *sysdev)
|
||||
{
|
||||
int error = -1;
|
||||
|
||||
#ifdef CONFIG_SYSFS
|
||||
error = sysfs_create_group(&sysdev->kobj, &selfballoon_group);
|
||||
#endif
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL(register_xen_selfballooning);
|
||||
|
||||
static int __init xen_selfballoon_init(void)
|
||||
{
|
||||
bool enable = false;
|
||||
|
||||
if (!xen_domain())
|
||||
return -ENODEV;
|
||||
|
||||
if (xen_initial_domain()) {
|
||||
pr_info("xen/balloon: Xen selfballooning driver "
|
||||
"disabled for domain0.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xen_selfballooning_enabled = tmem_enabled && use_selfballooning;
|
||||
if (xen_selfballooning_enabled) {
|
||||
pr_info("xen/balloon: Initializing Xen "
|
||||
"selfballooning driver.\n");
|
||||
enable = true;
|
||||
}
|
||||
#ifdef CONFIG_FRONTSWAP
|
||||
frontswap_selfshrinking = tmem_enabled && use_frontswap_selfshrink;
|
||||
if (frontswap_selfshrinking) {
|
||||
pr_info("xen/balloon: Initializing frontswap "
|
||||
"selfshrinking driver.\n");
|
||||
enable = true;
|
||||
}
|
||||
#endif
|
||||
if (!enable)
|
||||
return -ENODEV;
|
||||
|
||||
schedule_delayed_work(&selfballoon_worker, selfballoon_interval * HZ);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
subsys_initcall(xen_selfballoon_init);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
@ -378,26 +378,32 @@ static void xenbus_dev_release(struct device *dev)
|
||||
kfree(to_xenbus_device(dev));
|
||||
}
|
||||
|
||||
static ssize_t xendev_show_nodename(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t nodename_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
|
||||
}
|
||||
static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);
|
||||
|
||||
static ssize_t xendev_show_devtype(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t devtype_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
|
||||
}
|
||||
static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);
|
||||
|
||||
static ssize_t xendev_show_modalias(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
static ssize_t modalias_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
|
||||
return sprintf(buf, "%s:%s\n", dev->bus->name,
|
||||
to_xenbus_device(dev)->devicetype);
|
||||
}
|
||||
static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);
|
||||
|
||||
struct device_attribute xenbus_dev_attrs[] = {
|
||||
__ATTR_RO(nodename),
|
||||
__ATTR_RO(devtype),
|
||||
__ATTR_RO(modalias),
|
||||
__ATTR_NULL
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(xenbus_dev_attrs);
|
||||
|
||||
int xenbus_probe_node(struct xen_bus_type *bus,
|
||||
const char *type,
|
||||
@ -449,25 +455,7 @@ int xenbus_probe_node(struct xen_bus_type *bus,
|
||||
if (err)
|
||||
goto fail;
|
||||
|
||||
err = device_create_file(&xendev->dev, &dev_attr_nodename);
|
||||
if (err)
|
||||
goto fail_unregister;
|
||||
|
||||
err = device_create_file(&xendev->dev, &dev_attr_devtype);
|
||||
if (err)
|
||||
goto fail_remove_nodename;
|
||||
|
||||
err = device_create_file(&xendev->dev, &dev_attr_modalias);
|
||||
if (err)
|
||||
goto fail_remove_devtype;
|
||||
|
||||
return 0;
|
||||
fail_remove_devtype:
|
||||
device_remove_file(&xendev->dev, &dev_attr_devtype);
|
||||
fail_remove_nodename:
|
||||
device_remove_file(&xendev->dev, &dev_attr_nodename);
|
||||
fail_unregister:
|
||||
device_unregister(&xendev->dev);
|
||||
fail:
|
||||
kfree(xendev);
|
||||
return err;
|
||||
|
@ -48,6 +48,8 @@ struct xen_bus_type
|
||||
struct bus_type bus;
|
||||
};
|
||||
|
||||
extern struct device_attribute xenbus_dev_attrs[];
|
||||
|
||||
extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
|
||||
extern int xenbus_dev_probe(struct device *_dev);
|
||||
extern int xenbus_dev_remove(struct device *_dev);
|
||||
|
@ -107,6 +107,9 @@ static int xenbus_uevent_backend(struct device *dev,
|
||||
if (xdev == NULL)
|
||||
return -ENODEV;
|
||||
|
||||
if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
|
||||
return -ENOMEM;
|
||||
|
||||
/* stuff we want to pass to /sbin/hotplug */
|
||||
if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype))
|
||||
return -ENOMEM;
|
||||
@ -183,10 +186,6 @@ static void frontend_changed(struct xenbus_watch *watch,
|
||||
xenbus_otherend_changed(watch, vec, len, 0);
|
||||
}
|
||||
|
||||
static struct device_attribute xenbus_backend_dev_attrs[] = {
|
||||
__ATTR_NULL
|
||||
};
|
||||
|
||||
static struct xen_bus_type xenbus_backend = {
|
||||
.root = "backend",
|
||||
.levels = 3, /* backend/type/<frontend>/<id> */
|
||||
@ -200,7 +199,7 @@ static struct xen_bus_type xenbus_backend = {
|
||||
.probe = xenbus_dev_probe,
|
||||
.remove = xenbus_dev_remove,
|
||||
.shutdown = xenbus_dev_shutdown,
|
||||
.dev_attrs = xenbus_backend_dev_attrs,
|
||||
.dev_attrs = xenbus_dev_attrs,
|
||||
},
|
||||
};
|
||||
|
||||
|
@ -81,10 +81,6 @@ static void backend_changed(struct xenbus_watch *watch,
|
||||
xenbus_otherend_changed(watch, vec, len, 1);
|
||||
}
|
||||
|
||||
static struct device_attribute xenbus_frontend_dev_attrs[] = {
|
||||
__ATTR_NULL
|
||||
};
|
||||
|
||||
static const struct dev_pm_ops xenbus_pm_ops = {
|
||||
.suspend = xenbus_dev_suspend,
|
||||
.resume = xenbus_dev_resume,
|
||||
@ -106,7 +102,7 @@ static struct xen_bus_type xenbus_frontend = {
|
||||
.probe = xenbus_dev_probe,
|
||||
.remove = xenbus_dev_remove,
|
||||
.shutdown = xenbus_dev_shutdown,
|
||||
.dev_attrs = xenbus_frontend_dev_attrs,
|
||||
.dev_attrs = xenbus_dev_attrs,
|
||||
|
||||
.pm = &xenbus_pm_ops,
|
||||
},
|
||||
|
@ -23,3 +23,13 @@ void balloon_set_new_target(unsigned long target);
|
||||
|
||||
int alloc_xenballooned_pages(int nr_pages, struct page** pages);
|
||||
void free_xenballooned_pages(int nr_pages, struct page** pages);
|
||||
|
||||
struct sys_device;
|
||||
#ifdef CONFIG_XEN_SELFBALLOONING
|
||||
extern int register_xen_selfballooning(struct sys_device *sysdev);
|
||||
#else
|
||||
static inline int register_xen_selfballooning(struct sys_device *sysdev)
|
||||
{
|
||||
return -ENOSYS;
|
||||
}
|
||||
#endif
|
||||
|
@ -6,11 +6,13 @@ extern struct console xenboot_console;
|
||||
#ifdef CONFIG_HVC_XEN
|
||||
void xen_console_resume(void);
|
||||
void xen_raw_console_write(const char *str);
|
||||
__attribute__((format(printf, 1, 2)))
|
||||
void xen_raw_printk(const char *fmt, ...);
|
||||
#else
|
||||
static inline void xen_console_resume(void) { }
|
||||
static inline void xen_raw_console_write(const char *str) { }
|
||||
static inline void xen_raw_printk(const char *fmt, ...) { }
|
||||
static inline __attribute__((format(printf, 1, 2)))
|
||||
void xen_raw_printk(const char *fmt, ...) { }
|
||||
#endif
|
||||
|
||||
#endif /* XEN_HVC_CONSOLE_H */
|
||||
|
@ -450,6 +450,45 @@ struct start_info {
|
||||
int8_t cmd_line[MAX_GUEST_CMDLINE];
|
||||
};
|
||||
|
||||
struct dom0_vga_console_info {
|
||||
uint8_t video_type;
|
||||
#define XEN_VGATYPE_TEXT_MODE_3 0x03
|
||||
#define XEN_VGATYPE_VESA_LFB 0x23
|
||||
|
||||
union {
|
||||
struct {
|
||||
/* Font height, in pixels. */
|
||||
uint16_t font_height;
|
||||
/* Cursor location (column, row). */
|
||||
uint16_t cursor_x, cursor_y;
|
||||
/* Number of rows and columns (dimensions in characters). */
|
||||
uint16_t rows, columns;
|
||||
} text_mode_3;
|
||||
|
||||
struct {
|
||||
/* Width and height, in pixels. */
|
||||
uint16_t width, height;
|
||||
/* Bytes per scan line. */
|
||||
uint16_t bytes_per_line;
|
||||
/* Bits per pixel. */
|
||||
uint16_t bits_per_pixel;
|
||||
/* LFB physical address, and size (in units of 64kB). */
|
||||
uint32_t lfb_base;
|
||||
uint32_t lfb_size;
|
||||
/* RGB mask offsets and sizes, as defined by VBE 1.2+ */
|
||||
uint8_t red_pos, red_size;
|
||||
uint8_t green_pos, green_size;
|
||||
uint8_t blue_pos, blue_size;
|
||||
uint8_t rsvd_pos, rsvd_size;
|
||||
|
||||
/* VESA capabilities (offset 0xa, VESA command 0x4f00). */
|
||||
uint32_t gbl_caps;
|
||||
/* Mode attributes (offset 0x0, VESA command 0x4f01). */
|
||||
uint16_t mode_attrs;
|
||||
} vesa_lfb;
|
||||
} u;
|
||||
};
|
||||
|
||||
/* These flags are passed in the 'flags' field of start_info_t. */
|
||||
#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */
|
||||
#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */
|
||||
|
5
include/xen/tmem.h
Normal file
5
include/xen/tmem.h
Normal file
@ -0,0 +1,5 @@
|
||||
#ifndef _XEN_TMEM_H
|
||||
#define _XEN_TMEM_H
|
||||
/* defined in drivers/xen/tmem.c */
|
||||
extern int tmem_enabled;
|
||||
#endif /* _XEN_TMEM_H */
|
@ -223,7 +223,9 @@ int xenbus_free_evtchn(struct xenbus_device *dev, int port);
|
||||
|
||||
enum xenbus_state xenbus_read_driver_state(const char *path);
|
||||
|
||||
__attribute__((format(printf, 3, 4)))
|
||||
void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
|
||||
__attribute__((format(printf, 3, 4)))
|
||||
void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...);
|
||||
|
||||
const char *xenbus_strstate(enum xenbus_state state);
|
||||
|
Loading…
Reference in New Issue
Block a user