qemu/hw/ppc/spapr_vio.c
Alexey Kardashevskiy df7625d422 spapr_iommu: Introduce "enabled" state for TCE table
Currently TCE tables are created once at start and their sizes never
change. We are going to change that by introducing a Dynamic DMA windows
support where DMA configuration may change during the guest execution.

This changes spapr_tce_new_table() to create an empty zero-size IOMMU
memory region (IOMMU MR). Only LIOBN is assigned by the time of creation.
It still will be called once at the owner object (VIO or PHB) creation.

This introduces an "enabled" state for TCE table objects, some
helper functions are added:
- spapr_tce_table_enable() receives TCE table parameters, stores in
sPAPRTCETable and allocates a guest view of the TCE table
(in the user space or KVM) and sets the correct size on the IOMMU MR;
- spapr_tce_table_disable() disposes the table and resets the IOMMU MR
size; it is made public as the following DDW code will be using it.

This changes the PHB reset handler to do the default DMA initialization
instead of spapr_phb_realize(). This does not make differenct now but
later with more than just one DMA window, we will have to remove them all
and create the default one on a system reset.

No visible change in behaviour is expected except the actual table
will be reallocated every reset. We might optimize this later.

The other way to implement this would be dynamically create/remove
the TCE table QOM objects but this would make migration impossible
as the migration code expects all QOM objects to exist at the receiver
so we have to have TCE table objects created when migration begins.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2016-06-07 10:17:45 +10:00

706 lines
18 KiB
C

/*
* QEMU sPAPR VIO code
*
* Copyright (c) 2010 David Gibson, IBM Corporation <dwg@au1.ibm.com>
* Based on the s390 virtio bus code:
* Copyright (c) 2009 Alexander Graf <agraf@suse.de>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "hw/hw.h"
#include "qemu/log.h"
#include "sysemu/sysemu.h"
#include "hw/boards.h"
#include "hw/loader.h"
#include "elf.h"
#include "hw/sysbus.h"
#include "sysemu/kvm.h"
#include "sysemu/device_tree.h"
#include "kvm_ppc.h"
#include "hw/ppc/spapr.h"
#include "hw/ppc/spapr_vio.h"
#include "hw/ppc/xics.h"
#include <libfdt.h>
/* #define DEBUG_SPAPR */
#ifdef DEBUG_SPAPR
#define DPRINTF(fmt, ...) \
do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
#else
#define DPRINTF(fmt, ...) \
do { } while (0)
#endif
static Property spapr_vio_props[] = {
DEFINE_PROP_UINT32("irq", VIOsPAPRDevice, irq, 0), \
DEFINE_PROP_END_OF_LIST(),
};
static char *spapr_vio_get_dev_name(DeviceState *qdev)
{
VIOsPAPRDevice *dev = VIO_SPAPR_DEVICE(qdev);
VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
char *name;
/* Device tree style name device@reg */
name = g_strdup_printf("%s@%x", pc->dt_name, dev->reg);
return name;
}
static void spapr_vio_bus_class_init(ObjectClass *klass, void *data)
{
BusClass *k = BUS_CLASS(klass);
k->get_dev_path = spapr_vio_get_dev_name;
k->get_fw_dev_path = spapr_vio_get_dev_name;
}
static const TypeInfo spapr_vio_bus_info = {
.name = TYPE_SPAPR_VIO_BUS,
.parent = TYPE_BUS,
.class_init = spapr_vio_bus_class_init,
.instance_size = sizeof(VIOsPAPRBus),
};
VIOsPAPRDevice *spapr_vio_find_by_reg(VIOsPAPRBus *bus, uint32_t reg)
{
BusChild *kid;
VIOsPAPRDevice *dev = NULL;
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
dev = (VIOsPAPRDevice *)kid->child;
if (dev->reg == reg) {
return dev;
}
}
return NULL;
}
static int vio_make_devnode(VIOsPAPRDevice *dev,
void *fdt)
{
VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
int vdevice_off, node_off, ret;
char *dt_name;
vdevice_off = fdt_path_offset(fdt, "/vdevice");
if (vdevice_off < 0) {
return vdevice_off;
}
dt_name = spapr_vio_get_dev_name(DEVICE(dev));
node_off = fdt_add_subnode(fdt, vdevice_off, dt_name);
g_free(dt_name);
if (node_off < 0) {
return node_off;
}
ret = fdt_setprop_cell(fdt, node_off, "reg", dev->reg);
if (ret < 0) {
return ret;
}
if (pc->dt_type) {
ret = fdt_setprop_string(fdt, node_off, "device_type",
pc->dt_type);
if (ret < 0) {
return ret;
}
}
if (pc->dt_compatible) {
ret = fdt_setprop_string(fdt, node_off, "compatible",
pc->dt_compatible);
if (ret < 0) {
return ret;
}
}
if (dev->irq) {
uint32_t ints_prop[] = {cpu_to_be32(dev->irq), 0};
ret = fdt_setprop(fdt, node_off, "interrupts", ints_prop,
sizeof(ints_prop));
if (ret < 0) {
return ret;
}
}
ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->tcet);
if (ret < 0) {
return ret;
}
if (pc->devnode) {
ret = (pc->devnode)(dev, fdt, node_off);
if (ret < 0) {
return ret;
}
}
return node_off;
}
/*
* CRQ handling
*/
static target_ulong h_reg_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
target_ulong queue_addr = args[1];
target_ulong queue_len = args[2];
VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
if (!dev) {
hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
return H_PARAMETER;
}
/* We can't grok a queue size bigger than 256M for now */
if (queue_len < 0x1000 || queue_len > 0x10000000) {
hcall_dprintf("Queue size too small or too big (0x" TARGET_FMT_lx
")\n", queue_len);
return H_PARAMETER;
}
/* Check queue alignment */
if (queue_addr & 0xfff) {
hcall_dprintf("Queue not aligned (0x" TARGET_FMT_lx ")\n", queue_addr);
return H_PARAMETER;
}
/* Check if device supports CRQs */
if (!dev->crq.SendFunc) {
hcall_dprintf("Device does not support CRQ\n");
return H_NOT_FOUND;
}
/* Already a queue ? */
if (dev->crq.qsize) {
hcall_dprintf("CRQ already registered\n");
return H_RESOURCE;
}
dev->crq.qladdr = queue_addr;
dev->crq.qsize = queue_len;
dev->crq.qnext = 0;
DPRINTF("CRQ for dev 0x" TARGET_FMT_lx " registered at 0x"
TARGET_FMT_lx "/0x" TARGET_FMT_lx "\n",
reg, queue_addr, queue_len);
return H_SUCCESS;
}
static target_ulong free_crq(VIOsPAPRDevice *dev)
{
dev->crq.qladdr = 0;
dev->crq.qsize = 0;
dev->crq.qnext = 0;
DPRINTF("CRQ for dev 0x%" PRIx32 " freed\n", dev->reg);
return H_SUCCESS;
}
static target_ulong h_free_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
if (!dev) {
hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
return H_PARAMETER;
}
return free_crq(dev);
}
static target_ulong h_send_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
target_ulong msg_hi = args[1];
target_ulong msg_lo = args[2];
VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
uint64_t crq_mangle[2];
if (!dev) {
hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
return H_PARAMETER;
}
crq_mangle[0] = cpu_to_be64(msg_hi);
crq_mangle[1] = cpu_to_be64(msg_lo);
if (dev->crq.SendFunc) {
return dev->crq.SendFunc(dev, (uint8_t *)crq_mangle);
}
return H_HARDWARE;
}
static target_ulong h_enable_crq(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode, target_ulong *args)
{
target_ulong reg = args[0];
VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
if (!dev) {
hcall_dprintf("Unit 0x" TARGET_FMT_lx " does not exist\n", reg);
return H_PARAMETER;
}
return 0;
}
/* Returns negative error, 0 success, or positive: queue full */
int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
{
int rc;
uint8_t byte;
if (!dev->crq.qsize) {
fprintf(stderr, "spapr_vio_send_creq on uninitialized queue\n");
return -1;
}
/* Maybe do a fast path for KVM just writing to the pages */
rc = spapr_vio_dma_read(dev, dev->crq.qladdr + dev->crq.qnext, &byte, 1);
if (rc) {
return rc;
}
if (byte != 0) {
return 1;
}
rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext + 8,
&crq[8], 8);
if (rc) {
return rc;
}
kvmppc_eieio();
rc = spapr_vio_dma_write(dev, dev->crq.qladdr + dev->crq.qnext, crq, 8);
if (rc) {
return rc;
}
dev->crq.qnext = (dev->crq.qnext + 16) % dev->crq.qsize;
if (dev->signal_state & 1) {
qemu_irq_pulse(spapr_vio_qirq(dev));
}
return 0;
}
/* "quiesce" handling */
static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
{
if (dev->tcet) {
device_reset(DEVICE(dev->tcet));
}
free_crq(dev);
}
void spapr_vio_set_bypass(VIOsPAPRDevice *dev, bool bypass)
{
if (!dev->tcet) {
return;
}
memory_region_set_enabled(&dev->mrbypass, bypass);
memory_region_set_enabled(spapr_tce_get_iommu(dev->tcet), !bypass);
dev->tcet->bypass = bypass;
}
static void rtas_set_tce_bypass(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
VIOsPAPRBus *bus = spapr->vio_bus;
VIOsPAPRDevice *dev;
uint32_t unit, enable;
if (nargs != 2) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
unit = rtas_ld(args, 0);
enable = rtas_ld(args, 1);
dev = spapr_vio_find_by_reg(bus, unit);
if (!dev) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
if (!dev->tcet) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
spapr_vio_set_bypass(dev, !!enable);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
static void rtas_quiesce(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token,
uint32_t nargs, target_ulong args,
uint32_t nret, target_ulong rets)
{
VIOsPAPRBus *bus = spapr->vio_bus;
BusChild *kid;
VIOsPAPRDevice *dev = NULL;
if (nargs != 0) {
rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
return;
}
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
dev = (VIOsPAPRDevice *)kid->child;
spapr_vio_quiesce_one(dev);
}
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
}
static VIOsPAPRDevice *reg_conflict(VIOsPAPRDevice *dev)
{
VIOsPAPRBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus);
BusChild *kid;
VIOsPAPRDevice *other;
/*
* Check for a device other than the given one which is already
* using the requested address. We have to open code this because
* the given dev might already be in the list.
*/
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
other = VIO_SPAPR_DEVICE(kid->child);
if (other != dev && other->reg == dev->reg) {
return other;
}
}
return 0;
}
static void spapr_vio_busdev_reset(DeviceState *qdev)
{
VIOsPAPRDevice *dev = VIO_SPAPR_DEVICE(qdev);
VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
/* Shut down the request queue and TCEs if necessary */
spapr_vio_quiesce_one(dev);
dev->signal_state = 0;
spapr_vio_set_bypass(dev, false);
if (pc->reset) {
pc->reset(dev);
}
}
static void spapr_vio_busdev_realize(DeviceState *qdev, Error **errp)
{
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
char *id;
Error *local_err = NULL;
if (dev->reg != -1) {
/*
* Explicitly assigned address, just verify that no-one else
* is using it. other mechanism). We have to open code this
* rather than using spapr_vio_find_by_reg() because sdev
* itself is already in the list.
*/
VIOsPAPRDevice *other = reg_conflict(dev);
if (other) {
error_setg(errp, "%s and %s devices conflict at address %#x",
object_get_typename(OBJECT(qdev)),
object_get_typename(OBJECT(&other->qdev)),
dev->reg);
return;
}
} else {
/* Need to assign an address */
VIOsPAPRBus *bus = SPAPR_VIO_BUS(dev->qdev.parent_bus);
do {
dev->reg = bus->next_reg++;
} while (reg_conflict(dev));
}
/* Don't overwrite ids assigned on the command line */
if (!dev->qdev.id) {
id = spapr_vio_get_dev_name(DEVICE(dev));
dev->qdev.id = id;
}
dev->irq = xics_alloc(spapr->icp, 0, dev->irq, false, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
if (pc->rtce_window_size) {
uint32_t liobn = SPAPR_VIO_LIOBN(dev->reg);
memory_region_init(&dev->mrroot, OBJECT(dev), "iommu-spapr-root",
ram_size);
memory_region_init_alias(&dev->mrbypass, OBJECT(dev),
"iommu-spapr-bypass", get_system_memory(),
0, ram_size);
memory_region_add_subregion_overlap(&dev->mrroot, 0, &dev->mrbypass, 1);
address_space_init(&dev->as, &dev->mrroot, qdev->id);
dev->tcet = spapr_tce_new_table(qdev, liobn);
spapr_tce_table_enable(dev->tcet, SPAPR_TCE_PAGE_SHIFT, 0,
pc->rtce_window_size >> SPAPR_TCE_PAGE_SHIFT);
dev->tcet->vdev = dev;
memory_region_add_subregion_overlap(&dev->mrroot, 0,
spapr_tce_get_iommu(dev->tcet), 2);
}
pc->realize(dev, errp);
}
static target_ulong h_vio_signal(PowerPCCPU *cpu, sPAPRMachineState *spapr,
target_ulong opcode,
target_ulong *args)
{
target_ulong reg = args[0];
target_ulong mode = args[1];
VIOsPAPRDevice *dev = spapr_vio_find_by_reg(spapr->vio_bus, reg);
VIOsPAPRDeviceClass *pc;
if (!dev) {
return H_PARAMETER;
}
pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
if (mode & ~pc->signal_mask) {
return H_PARAMETER;
}
dev->signal_state = mode;
return H_SUCCESS;
}
VIOsPAPRBus *spapr_vio_bus_init(void)
{
VIOsPAPRBus *bus;
BusState *qbus;
DeviceState *dev;
/* Create bridge device */
dev = qdev_create(NULL, TYPE_SPAPR_VIO_BRIDGE);
qdev_init_nofail(dev);
/* Create bus on bridge device */
qbus = qbus_create(TYPE_SPAPR_VIO_BUS, dev, "spapr-vio");
bus = SPAPR_VIO_BUS(qbus);
bus->next_reg = 0x71000000;
/* hcall-vio */
spapr_register_hypercall(H_VIO_SIGNAL, h_vio_signal);
/* hcall-crq */
spapr_register_hypercall(H_REG_CRQ, h_reg_crq);
spapr_register_hypercall(H_FREE_CRQ, h_free_crq);
spapr_register_hypercall(H_SEND_CRQ, h_send_crq);
spapr_register_hypercall(H_ENABLE_CRQ, h_enable_crq);
/* RTAS calls */
spapr_rtas_register(RTAS_IBM_SET_TCE_BYPASS, "ibm,set-tce-bypass",
rtas_set_tce_bypass);
spapr_rtas_register(RTAS_QUIESCE, "quiesce", rtas_quiesce);
return bus;
}
/* Represents sPAPR hcall VIO devices */
static int spapr_vio_bridge_init(SysBusDevice *dev)
{
/* nothing */
return 0;
}
static void spapr_vio_bridge_class_init(ObjectClass *klass, void *data)
{
SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
DeviceClass *dc = DEVICE_CLASS(klass);
dc->fw_name = "vdevice";
k->init = spapr_vio_bridge_init;
}
static const TypeInfo spapr_vio_bridge_info = {
.name = TYPE_SPAPR_VIO_BRIDGE,
.parent = TYPE_SYS_BUS_DEVICE,
.class_init = spapr_vio_bridge_class_init,
};
const VMStateDescription vmstate_spapr_vio = {
.name = "spapr_vio",
.version_id = 1,
.minimum_version_id = 1,
.fields = (VMStateField[]) {
/* Sanity check */
VMSTATE_UINT32_EQUAL(reg, VIOsPAPRDevice),
VMSTATE_UINT32_EQUAL(irq, VIOsPAPRDevice),
/* General VIO device state */
VMSTATE_UINT64(signal_state, VIOsPAPRDevice),
VMSTATE_UINT64(crq.qladdr, VIOsPAPRDevice),
VMSTATE_UINT32(crq.qsize, VIOsPAPRDevice),
VMSTATE_UINT32(crq.qnext, VIOsPAPRDevice),
VMSTATE_END_OF_LIST()
},
};
static void vio_spapr_device_class_init(ObjectClass *klass, void *data)
{
DeviceClass *k = DEVICE_CLASS(klass);
k->realize = spapr_vio_busdev_realize;
k->reset = spapr_vio_busdev_reset;
k->bus_type = TYPE_SPAPR_VIO_BUS;
k->props = spapr_vio_props;
}
static const TypeInfo spapr_vio_type_info = {
.name = TYPE_VIO_SPAPR_DEVICE,
.parent = TYPE_DEVICE,
.instance_size = sizeof(VIOsPAPRDevice),
.abstract = true,
.class_size = sizeof(VIOsPAPRDeviceClass),
.class_init = vio_spapr_device_class_init,
};
static void spapr_vio_register_types(void)
{
type_register_static(&spapr_vio_bus_info);
type_register_static(&spapr_vio_bridge_info);
type_register_static(&spapr_vio_type_info);
}
type_init(spapr_vio_register_types)
static int compare_reg(const void *p1, const void *p2)
{
VIOsPAPRDevice const *dev1, *dev2;
dev1 = (VIOsPAPRDevice *)*(DeviceState **)p1;
dev2 = (VIOsPAPRDevice *)*(DeviceState **)p2;
if (dev1->reg < dev2->reg) {
return -1;
}
if (dev1->reg == dev2->reg) {
return 0;
}
/* dev1->reg > dev2->reg */
return 1;
}
int spapr_populate_vdevice(VIOsPAPRBus *bus, void *fdt)
{
DeviceState *qdev, **qdevs;
BusChild *kid;
int i, num, ret = 0;
/* Count qdevs on the bus list */
num = 0;
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
num++;
}
/* Copy out into an array of pointers */
qdevs = g_malloc(sizeof(qdev) * num);
num = 0;
QTAILQ_FOREACH(kid, &bus->bus.children, sibling) {
qdevs[num++] = kid->child;
}
/* Sort the array */
qsort(qdevs, num, sizeof(qdev), compare_reg);
/* Hack alert. Give the devices to libfdt in reverse order, we happen
* to know that will mean they are in forward order in the tree. */
for (i = num - 1; i >= 0; i--) {
VIOsPAPRDevice *dev = (VIOsPAPRDevice *)(qdevs[i]);
ret = vio_make_devnode(dev, fdt);
if (ret < 0) {
goto out;
}
}
ret = 0;
out:
g_free(qdevs);
return ret;
}
int spapr_populate_chosen_stdout(void *fdt, VIOsPAPRBus *bus)
{
VIOsPAPRDevice *dev;
char *name, *path;
int ret, offset;
dev = spapr_vty_get_default(bus);
if (!dev)
return 0;
offset = fdt_path_offset(fdt, "/chosen");
if (offset < 0) {
return offset;
}
name = spapr_vio_get_dev_name(DEVICE(dev));
path = g_strdup_printf("/vdevice/%s", name);
ret = fdt_setprop_string(fdt, offset, "linux,stdout-path", path);
g_free(name);
g_free(path);
return ret;
}