virtio: features, cleanups

virtio net failover
 rcu cleanup
 
 Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJduMQGAAoJECgfDbjSjVRpVnUH/1XoBNn5ZcWr5CHyaXDijrWa
 UsgOps4MkbN9EVLSmGlvixY3zPSey/NQsU5xL0lgnhpL4DlukB4V8sqUTArAGVVV
 1DMUkVCdmns1LEV3D36xRiV/Fm2Ah6HtquIv1v8ms8kIVphncJkfEzxFBz1u1euz
 K0ZfYE404wCODr2gst7kFXF6a0M9fhjUUkllbjeFVm0xNj5yTkBZAv4f5HgBPk02
 Ia6fPXQpFO+0y2oCMBtUzDgj+AH2WAQQgxrBSaQlEoaafVZdtu+3lx/kevjpTO0O
 laDdPnBBXLk1NlSum+SmlssHXj/EZJHBEKHXr5zPa+oUYFquaFmVfh6lGNST0Qs=
 =2tpZ
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio: features, cleanups

virtio net failover
rcu cleanup

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Tue 29 Oct 2019 22:58:14 GMT
# gpg:                using RSA key 281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>" [full]
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>" [full]
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio: Use auto rcu_read macros
  virtio_net: use RCU_READ_LOCK_GUARD
  virtio/vhost: Use auto_rcu_read macros
  vfio: unplug failover primary device before migration
  net/virtio: add failover support
  libqos: tolerate wait-unplug migration state
  migration: add new migration state wait-unplug
  migration: allow unplug during migration for failover devices
  qapi: add failover negotiated event
  qapi: add unplug primary event
  pci: mark device having guest unplug request pending
  pci: mark devices partially unplugged
  pci: add option for net failover
  qdev/qbus: add hidden device support

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2019-10-30 06:15:44 +00:00
commit 63df86b264
24 changed files with 673 additions and 65 deletions

View File

@ -1435,6 +1435,7 @@ S: Odd Fixes
F: hw/net/
F: include/hw/net/
F: tests/virtio-net-test.c
F: docs/virtio-net-failover.rst
T: git https://github.com/jasowang/qemu.git net
Parallel NOR Flash devices

View File

@ -0,0 +1,68 @@
========================
QEMU virtio-net standby (net_failover)
========================
This document explains the setup and usage of virtio-net standby feature which
is used to create a net_failover pair of devices.
The general idea is that we have a pair of devices, a (vfio-)pci and a
virtio-net device. Before migration the vfio device is unplugged and data flows
through the virtio-net device, on the target side another vfio-pci device is
plugged in to take over the data-path. In the guest the net_failover kernel
module will pair net devices with the same MAC address.
The two devices are called primary and standby device. The fast hardware based
networking device is called the primary device and the virtio-net device is the
standby device.
Restrictions
------------
Currently only PCIe devices are allowed as primary devices, this restriction
can be lifted in the future with enhanced QEMU support. Also, only networking
devices are allowed as primary device. The user needs to ensure that primary
and standby devices are not plugged into the same PCIe slot.
Usecase
-------
Virtio-net standby allows easy migration while using a passed-through fast
networking device by falling back to a virtio-net device for the duration of
the migration. It is like a simple version of a bond, the difference is that it
requires no configuration in the guest. When a guest is live-migrated to
another host QEMU will unplug the primary device via the PCIe based hotplug
handler and traffic will go through the virtio-net device. On the target
system the primary device will be automatically plugged back and the
net_failover module registers it again as the primary device.
Usage
-----
The primary device can be hotplugged or be part of the startup configuration
-device virtio-net-pci,netdev=hostnet1,id=net1,mac=52:54:00:6f:55:cc, \
bus=root2,failover=on
With the parameter failover=on the VIRTIO_NET_F_STANDBY feature will be enabled.
-device vfio-pci,host=5e:00.2,id=hostdev0,bus=root1,failover_pair_id=net1
failover_pair_id references the id of the virtio-net standby device. This
is only for pairing the devices within QEMU. The guest kernel module
net_failover will match devices with identical MAC addresses.
Hotplug
-------
Both primary and standby device can be hotplugged via the QEMU monitor. Note
that if the virtio-net device is plugged first a warning will be issued that it
couldn't find the primary device.
Migration
---------
A new migration state wait-unplug was added for this feature. If failover primary
devices are present in the configuration, migration will go into this state.
It will wait until the device unplug is completed in the guest and then move into
active state. On the target system the primary devices will be automatically hotplugged
when the feature bit was negotiated for the virtio-net standby device.

View File

@ -212,6 +212,30 @@ void device_listener_unregister(DeviceListener *listener)
QTAILQ_REMOVE(&device_listeners, listener, link);
}
bool qdev_should_hide_device(QemuOpts *opts)
{
int rc = -1;
DeviceListener *listener;
QTAILQ_FOREACH(listener, &device_listeners, link) {
if (listener->should_be_hidden) {
/*
* should_be_hidden_will return
* 1 if device matches opts and it should be hidden
* 0 if device matches opts and should not be hidden
* -1 if device doesn't match ops
*/
rc = listener->should_be_hidden(listener, opts);
}
if (rc > 0) {
break;
}
}
return rc > 0;
}
void qdev_set_legacy_instance_id(DeviceState *dev, int alias_id,
int required_for_version)
{
@ -972,6 +996,7 @@ static void device_initfn(Object *obj)
dev->instance_id_alias = -1;
dev->realized = false;
dev->allow_unplug_during_migration = false;
object_property_add_bool(obj, "realized",
device_get_realized, device_set_realized, NULL);

View File

@ -12,6 +12,7 @@
*/
#include "qemu/osdep.h"
#include "qemu/atomic.h"
#include "qemu/iov.h"
#include "qemu/main-loop.h"
#include "qemu/module.h"
@ -21,6 +22,10 @@
#include "net/tap.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "qemu/option.h"
#include "qemu/option_int.h"
#include "qemu/config-file.h"
#include "qapi/qmp/qdict.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
#include "net/announce.h"
@ -28,11 +33,15 @@
#include "qapi/error.h"
#include "qapi/qapi-events-net.h"
#include "hw/qdev-properties.h"
#include "qapi/qapi-types-migration.h"
#include "qapi/qapi-events-migration.h"
#include "hw/virtio/virtio-access.h"
#include "migration/misc.h"
#include "standard-headers/linux/ethtool.h"
#include "sysemu/sysemu.h"
#include "trace.h"
#include "monitor/qdev.h"
#include "hw/pci/pci.h"
#define VIRTIO_NET_VM_VERSION 11
@ -746,9 +755,99 @@ static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
return virtio_net_guest_offloads_by_features(vdev->guest_features);
}
static void failover_add_primary(VirtIONet *n, Error **errp)
{
Error *err = NULL;
n->primary_device_opts = qemu_opts_find(qemu_find_opts("device"),
n->primary_device_id);
if (n->primary_device_opts) {
n->primary_dev = qdev_device_add(n->primary_device_opts, &err);
if (err) {
qemu_opts_del(n->primary_device_opts);
}
if (n->primary_dev) {
n->primary_bus = n->primary_dev->parent_bus;
if (err) {
qdev_unplug(n->primary_dev, &err);
qdev_set_id(n->primary_dev, "");
}
}
} else {
error_setg(errp, "Primary device not found");
error_append_hint(errp, "Virtio-net failover will not work. Make "
"sure primary device has parameter"
" failover_pair_id=<virtio-net-id>\n");
}
if (err) {
error_propagate(errp, err);
}
}
static int is_my_primary(void *opaque, QemuOpts *opts, Error **errp)
{
VirtIONet *n = opaque;
int ret = 0;
const char *standby_id = qemu_opt_get(opts, "failover_pair_id");
if (standby_id != NULL && (g_strcmp0(standby_id, n->netclient_name) == 0)) {
n->primary_device_id = g_strdup(opts->id);
ret = 1;
}
return ret;
}
static DeviceState *virtio_net_find_primary(VirtIONet *n, Error **errp)
{
DeviceState *dev = NULL;
Error *err = NULL;
if (qemu_opts_foreach(qemu_find_opts("device"),
is_my_primary, n, &err)) {
if (err) {
error_propagate(errp, err);
return NULL;
}
if (n->primary_device_id) {
dev = qdev_find_recursive(sysbus_get_default(),
n->primary_device_id);
} else {
error_setg(errp, "Primary device id not found");
return NULL;
}
}
return dev;
}
static DeviceState *virtio_connect_failover_devices(VirtIONet *n,
DeviceState *dev,
Error **errp)
{
DeviceState *prim_dev = NULL;
Error *err = NULL;
prim_dev = virtio_net_find_primary(n, &err);
if (prim_dev) {
n->primary_device_id = g_strdup(prim_dev->id);
n->primary_device_opts = prim_dev->opts;
} else {
if (err) {
error_propagate(errp, err);
}
}
return prim_dev;
}
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
{
VirtIONet *n = VIRTIO_NET(vdev);
Error *err = NULL;
int i;
if (n->mtu_bypass_backend &&
@ -790,6 +889,28 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
} else {
memset(n->vlans, 0xff, MAX_VLAN >> 3);
}
if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) {
qapi_event_send_failover_negotiated(n->netclient_name);
atomic_set(&n->primary_should_be_hidden, false);
failover_add_primary(n, &err);
if (err) {
n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
if (err) {
goto out_err;
}
failover_add_primary(n, &err);
if (err) {
goto out_err;
}
}
}
return;
out_err:
if (err) {
warn_report_err(err);
}
}
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
@ -1369,12 +1490,9 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf,
size_t size)
{
ssize_t r;
RCU_READ_LOCK_GUARD();
rcu_read_lock();
r = virtio_net_receive_rcu(nc, buf, size);
rcu_read_unlock();
return r;
return virtio_net_receive_rcu(nc, buf, size);
}
static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain,
@ -2650,6 +2768,150 @@ void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
n->netclient_type = g_strdup(type);
}
static bool failover_unplug_primary(VirtIONet *n)
{
HotplugHandler *hotplug_ctrl;
PCIDevice *pci_dev;
Error *err = NULL;
hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
if (hotplug_ctrl) {
pci_dev = PCI_DEVICE(n->primary_dev);
pci_dev->partially_hotplugged = true;
hotplug_handler_unplug_request(hotplug_ctrl, n->primary_dev, &err);
if (err) {
error_report_err(err);
return false;
}
} else {
return false;
}
return true;
}
static bool failover_replug_primary(VirtIONet *n, Error **errp)
{
HotplugHandler *hotplug_ctrl;
PCIDevice *pdev = PCI_DEVICE(n->primary_dev);
if (!pdev->partially_hotplugged) {
return true;
}
if (!n->primary_device_opts) {
n->primary_device_opts = qemu_opts_from_qdict(
qemu_find_opts("device"),
n->primary_device_dict, errp);
}
if (n->primary_device_opts) {
if (n->primary_dev) {
n->primary_bus = n->primary_dev->parent_bus;
}
qdev_set_parent_bus(n->primary_dev, n->primary_bus);
n->primary_should_be_hidden = false;
qemu_opt_set_bool(n->primary_device_opts,
"partially_hotplugged", true, errp);
hotplug_ctrl = qdev_get_hotplug_handler(n->primary_dev);
if (hotplug_ctrl) {
hotplug_handler_pre_plug(hotplug_ctrl, n->primary_dev, errp);
hotplug_handler_plug(hotplug_ctrl, n->primary_dev, errp);
}
if (!n->primary_dev) {
error_setg(errp, "virtio_net: couldn't find primary device");
}
}
return *errp != NULL;
}
static void virtio_net_handle_migration_primary(VirtIONet *n,
MigrationState *s)
{
bool should_be_hidden;
Error *err = NULL;
should_be_hidden = atomic_read(&n->primary_should_be_hidden);
if (!n->primary_dev) {
n->primary_dev = virtio_connect_failover_devices(n, n->qdev, &err);
if (!n->primary_dev) {
return;
}
}
if (migration_in_setup(s) && !should_be_hidden &&
n->primary_dev) {
if (failover_unplug_primary(n)) {
vmstate_unregister(n->primary_dev, qdev_get_vmsd(n->primary_dev),
n->primary_dev);
qapi_event_send_unplug_primary(n->primary_device_id);
atomic_set(&n->primary_should_be_hidden, true);
} else {
warn_report("couldn't unplug primary device");
}
} else if (migration_has_failed(s)) {
/* We already unplugged the device let's plugged it back */
if (!failover_replug_primary(n, &err)) {
if (err) {
error_report_err(err);
}
}
}
}
static void virtio_net_migration_state_notifier(Notifier *notifier, void *data)
{
MigrationState *s = data;
VirtIONet *n = container_of(notifier, VirtIONet, migration_state);
virtio_net_handle_migration_primary(n, s);
}
static int virtio_net_primary_should_be_hidden(DeviceListener *listener,
QemuOpts *device_opts)
{
VirtIONet *n = container_of(listener, VirtIONet, primary_listener);
bool match_found;
bool hide;
n->primary_device_dict = qemu_opts_to_qdict(device_opts,
n->primary_device_dict);
if (n->primary_device_dict) {
g_free(n->standby_id);
n->standby_id = g_strdup(qdict_get_try_str(n->primary_device_dict,
"failover_pair_id"));
}
if (device_opts && g_strcmp0(n->standby_id, n->netclient_name) == 0) {
match_found = true;
} else {
match_found = false;
hide = false;
g_free(n->standby_id);
n->primary_device_dict = NULL;
goto out;
}
n->primary_device_opts = device_opts;
/* primary_should_be_hidden is set during feature negotiation */
hide = atomic_read(&n->primary_should_be_hidden);
if (n->primary_device_dict) {
g_free(n->primary_device_id);
n->primary_device_id = g_strdup(qdict_get_try_str(
n->primary_device_dict, "id"));
if (!n->primary_device_id) {
warn_report("primary_device_id not set");
}
}
out:
if (match_found && hide) {
return 1;
} else if (match_found && !hide) {
return 0;
} else {
return -1;
}
}
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
{
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
@ -2680,6 +2942,16 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
n->host_features |= (1ULL << VIRTIO_NET_F_SPEED_DUPLEX);
}
if (n->failover) {
n->primary_listener.should_be_hidden =
virtio_net_primary_should_be_hidden;
atomic_set(&n->primary_should_be_hidden, true);
device_listener_register(&n->primary_listener);
n->migration_state.notify = virtio_net_migration_state_notifier;
add_migration_state_change_notifier(&n->migration_state);
n->host_features |= (1ULL << VIRTIO_NET_F_STANDBY);
}
virtio_net_set_config_size(n, n->host_features);
virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
@ -2802,6 +3074,13 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
g_free(n->mac_table.macs);
g_free(n->vlans);
if (n->failover) {
g_free(n->primary_device_id);
g_free(n->standby_id);
qobject_unref(n->primary_device_dict);
n->primary_device_dict = NULL;
}
max_queues = n->multiqueue ? n->max_queues : 1;
for (i = 0; i < max_queues; i++) {
virtio_net_del_queue(n, i);
@ -2839,6 +3118,23 @@ static int virtio_net_pre_save(void *opaque)
return 0;
}
static bool primary_unplug_pending(void *opaque)
{
DeviceState *dev = opaque;
VirtIODevice *vdev = VIRTIO_DEVICE(dev);
VirtIONet *n = VIRTIO_NET(vdev);
return n->primary_dev ? n->primary_dev->pending_deleted_event : false;
}
static bool dev_unplug_pending(void *opaque)
{
DeviceState *dev = opaque;
VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
return vdc->primary_unplug_pending(dev);
}
static const VMStateDescription vmstate_virtio_net = {
.name = "virtio-net",
.minimum_version_id = VIRTIO_NET_VM_VERSION,
@ -2848,6 +3144,7 @@ static const VMStateDescription vmstate_virtio_net = {
VMSTATE_END_OF_LIST()
},
.pre_save = virtio_net_pre_save,
.dev_unplug_pending = dev_unplug_pending,
};
static Property virtio_net_properties[] = {
@ -2909,6 +3206,7 @@ static Property virtio_net_properties[] = {
true),
DEFINE_PROP_INT32("speed", VirtIONet, net_conf.speed, SPEED_UNKNOWN),
DEFINE_PROP_STRING("duplex", VirtIONet, net_conf.duplex_str),
DEFINE_PROP_BOOL("failover", VirtIONet, failover, false),
DEFINE_PROP_END_OF_LIST(),
};
@ -2934,6 +3232,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data)
vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
vdc->post_load = virtio_net_post_load_virtio;
vdc->vmsd = &vmstate_virtio_net_device;
vdc->primary_unplug_pending = primary_unplug_pending;
}
static const TypeInfo virtio_net_info = {

View File

@ -75,6 +75,8 @@ static Property pci_props[] = {
QEMU_PCIE_LNKSTA_DLLLA_BITNR, true),
DEFINE_PROP_BIT("x-pcie-extcap-init", PCIDevice, cap_present,
QEMU_PCIE_EXTCAP_INIT_BITNR, true),
DEFINE_PROP_STRING("failover_pair_id", PCIDevice,
failover_pair_id),
DEFINE_PROP_END_OF_LIST()
};
@ -2077,6 +2079,7 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
ObjectClass *klass = OBJECT_CLASS(pc);
Error *local_err = NULL;
bool is_default_rom;
uint16_t class_id;
/* initialize cap_present for pci_is_express() and pci_config_size(),
* Note that hybrid PCIs are not set automatically and need to manage
@ -2101,6 +2104,35 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
}
if (pci_dev->failover_pair_id) {
if (!pci_bus_is_express(pci_get_bus(pci_dev))) {
error_setg(errp, "failover primary device must be on "
"PCIExpress bus");
error_propagate(errp, local_err);
pci_qdev_unrealize(DEVICE(pci_dev), NULL);
return;
}
class_id = pci_get_word(pci_dev->config + PCI_CLASS_DEVICE);
if (class_id != PCI_CLASS_NETWORK_ETHERNET) {
error_setg(errp, "failover primary device is not an "
"Ethernet device");
error_propagate(errp, local_err);
pci_qdev_unrealize(DEVICE(pci_dev), NULL);
return;
}
if (!(pci_dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION)
&& (PCI_FUNC(pci_dev->devfn) == 0)) {
qdev->allow_unplug_during_migration = true;
} else {
error_setg(errp, "failover: primary device must be in its own "
"PCI slot");
error_propagate(errp, local_err);
pci_qdev_unrealize(DEVICE(pci_dev), NULL);
return;
}
qdev->allow_unplug_during_migration = true;
}
/* rom loading */
is_default_rom = false;
if (pci_dev->romfile == NULL && pc->romfile != NULL) {

View File

@ -456,6 +456,10 @@ static void pcie_unplug_device(PCIBus *bus, PCIDevice *dev, void *opaque)
{
HotplugHandler *hotplug_ctrl = qdev_get_hotplug_handler(DEVICE(dev));
if (dev->partially_hotplugged) {
dev->qdev.pending_deleted_event = false;
return;
}
hotplug_handler_unplug(hotplug_ctrl, DEVICE(dev), &error_abort);
object_unparent(OBJECT(dev));
}
@ -473,6 +477,8 @@ void pcie_cap_slot_unplug_request_cb(HotplugHandler *hotplug_dev,
return;
}
dev->pending_deleted_event = true;
/* In case user cancel the operation of multi-function hot-add,
* remove the function that is unexposed to guest individually,
* without interaction with guest.

View File

@ -40,6 +40,7 @@
#include "pci.h"
#include "trace.h"
#include "qapi/error.h"
#include "migration/blocker.h"
#define TYPE_VFIO_PCI "vfio-pci"
#define PCI_VFIO(obj) OBJECT_CHECK(VFIOPCIDevice, obj, TYPE_VFIO_PCI)
@ -2732,6 +2733,17 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
return;
}
if (!pdev->failover_pair_id) {
error_setg(&vdev->migration_blocker,
"VFIO device doesn't support migration");
ret = migrate_add_blocker(vdev->migration_blocker, &err);
if (err) {
error_propagate(errp, err);
error_free(vdev->migration_blocker);
return;
}
}
vdev->vbasedev.name = g_path_get_basename(vdev->vbasedev.sysfsdev);
vdev->vbasedev.ops = &vfio_pci_ops;
vdev->vbasedev.type = VFIO_DEVICE_TYPE_PCI;
@ -3008,6 +3020,10 @@ out_teardown:
vfio_bars_exit(vdev);
error:
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
if (vdev->migration_blocker) {
migrate_del_blocker(vdev->migration_blocker);
error_free(vdev->migration_blocker);
}
}
static void vfio_instance_finalize(Object *obj)
@ -3019,6 +3035,10 @@ static void vfio_instance_finalize(Object *obj)
vfio_bars_finalize(vdev);
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
if (vdev->migration_blocker) {
migrate_del_blocker(vdev->migration_blocker);
error_free(vdev->migration_blocker);
}
/*
* XXX Leaking igd_opregion is not an oversight, we can't remove the
* fw_cfg entry therefore leaking this allocation seems like the safest
@ -3151,11 +3171,6 @@ static Property vfio_pci_dev_properties[] = {
DEFINE_PROP_END_OF_LIST(),
};
static const VMStateDescription vfio_pci_vmstate = {
.name = "vfio-pci",
.unmigratable = 1,
};
static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
@ -3163,7 +3178,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
dc->reset = vfio_pci_reset;
dc->props = vfio_pci_dev_properties;
dc->vmsd = &vfio_pci_vmstate;
dc->desc = "VFIO-based PCI device assignment";
set_bit(DEVICE_CATEGORY_MISC, dc->categories);
pdc->realize = vfio_realize;

View File

@ -168,6 +168,7 @@ typedef struct VFIOPCIDevice {
bool no_vfio_ioeventfd;
bool enable_ramfb;
VFIODisplay *dpy;
Error *migration_blocker;
} VFIOPCIDevice;
uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);

View File

@ -924,7 +924,7 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
uint64_t uaddr, len;
int ret = -EFAULT;
rcu_read_lock();
RCU_READ_LOCK_GUARD();
trace_vhost_iotlb_miss(dev, 1);
@ -956,8 +956,6 @@ int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
trace_vhost_iotlb_miss(dev, 2);
out:
rcu_read_unlock();
return ret;
}

View File

@ -387,7 +387,8 @@ static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
{
rcu_read_lock();
RCU_READ_LOCK_GUARD();
if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
@ -399,7 +400,6 @@ static void virtio_queue_split_set_notification(VirtQueue *vq, int enable)
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
rcu_read_unlock();
}
static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
@ -408,7 +408,7 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
VRingPackedDescEvent e;
VRingMemoryRegionCaches *caches;
rcu_read_lock();
RCU_READ_LOCK_GUARD();
caches = vring_get_region_caches(vq);
vring_packed_event_read(vq->vdev, &caches->used, &e);
@ -429,7 +429,6 @@ static void virtio_queue_packed_set_notification(VirtQueue *vq, int enable)
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
rcu_read_unlock();
}
void virtio_queue_set_notification(VirtQueue *vq, int enable)
@ -577,9 +576,8 @@ static int virtio_queue_split_empty(VirtQueue *vq)
return 0;
}
rcu_read_lock();
RCU_READ_LOCK_GUARD();
empty = vring_avail_idx(vq) == vq->last_avail_idx;
rcu_read_unlock();
return empty;
}
@ -601,12 +599,8 @@ static int virtio_queue_packed_empty_rcu(VirtQueue *vq)
static int virtio_queue_packed_empty(VirtQueue *vq)
{
bool empty;
rcu_read_lock();
empty = virtio_queue_packed_empty_rcu(vq);
rcu_read_unlock();
return empty;
RCU_READ_LOCK_GUARD();
return virtio_queue_packed_empty_rcu(vq);
}
int virtio_queue_empty(VirtQueue *vq)
@ -859,10 +853,9 @@ void virtqueue_flush(VirtQueue *vq, unsigned int count)
void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
unsigned int len)
{
rcu_read_lock();
RCU_READ_LOCK_GUARD();
virtqueue_fill(vq, elem, len, 0);
virtqueue_flush(vq, 1);
rcu_read_unlock();
}
/* Called within rcu_read_lock(). */
@ -943,7 +936,8 @@ static void virtqueue_split_get_avail_bytes(VirtQueue *vq,
int64_t len = 0;
int rc;
rcu_read_lock();
RCU_READ_LOCK_GUARD();
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;
@ -1033,7 +1027,6 @@ done:
if (out_bytes) {
*out_bytes = out_total;
}
rcu_read_unlock();
return;
err:
@ -1083,7 +1076,7 @@ static void virtqueue_packed_get_avail_bytes(VirtQueue *vq,
VRingPackedDesc desc;
bool wrap_counter;
rcu_read_lock();
RCU_READ_LOCK_GUARD();
idx = vq->last_avail_idx;
wrap_counter = vq->last_avail_wrap_counter;
total_bufs = in_total = out_total = 0;
@ -1176,7 +1169,6 @@ done:
if (out_bytes) {
*out_bytes = out_total;
}
rcu_read_unlock();
return;
err:
@ -1360,7 +1352,7 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
VRingDesc desc;
int rc;
rcu_read_lock();
RCU_READ_LOCK_GUARD();
if (virtio_queue_empty_rcu(vq)) {
goto done;
}
@ -1469,7 +1461,6 @@ static void *virtqueue_split_pop(VirtQueue *vq, size_t sz)
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);
rcu_read_unlock();
return elem;
@ -1494,7 +1485,7 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
uint16_t id;
int rc;
rcu_read_lock();
RCU_READ_LOCK_GUARD();
if (virtio_queue_packed_empty_rcu(vq)) {
goto done;
}
@ -1600,7 +1591,6 @@ static void *virtqueue_packed_pop(VirtQueue *vq, size_t sz)
trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
done:
address_space_cache_destroy(&indirect_desc_cache);
rcu_read_unlock();
return elem;
@ -2437,14 +2427,11 @@ static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
bool should_notify;
rcu_read_lock();
should_notify = virtio_should_notify(vdev, vq);
rcu_read_unlock();
if (!should_notify) {
WITH_RCU_READ_LOCK_GUARD() {
if (!virtio_should_notify(vdev, vq)) {
return;
}
}
trace_virtio_notify_irqfd(vdev, vq);
@ -2475,14 +2462,11 @@ static void virtio_irq(VirtQueue *vq)
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
bool should_notify;
rcu_read_lock();
should_notify = virtio_should_notify(vdev, vq);
rcu_read_unlock();
if (!should_notify) {
WITH_RCU_READ_LOCK_GUARD() {
if (!virtio_should_notify(vdev, vq)) {
return;
}
}
trace_virtio_notify(vdev, vq);
virtio_irq(vq);
@ -3032,7 +3016,7 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
vdev->start_on_kick = true;
}
rcu_read_lock();
RCU_READ_LOCK_GUARD();
for (i = 0; i < num; i++) {
if (vdev->vq[i].vring.desc) {
uint16_t nheads;
@ -3087,7 +3071,6 @@ int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
}
}
}
rcu_read_unlock();
if (vdc->post_load) {
ret = vdc->post_load(vdev);
@ -3297,12 +3280,11 @@ static void virtio_queue_packed_restore_last_avail_idx(VirtIODevice *vdev,
static void virtio_queue_split_restore_last_avail_idx(VirtIODevice *vdev,
int n)
{
rcu_read_lock();
RCU_READ_LOCK_GUARD();
if (vdev->vq[n].vring.desc) {
vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
}
rcu_read_unlock();
}
void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
@ -3322,11 +3304,10 @@ static void virtio_queue_packed_update_used_idx(VirtIODevice *vdev, int n)
static void virtio_split_packed_update_used_idx(VirtIODevice *vdev, int n)
{
rcu_read_lock();
RCU_READ_LOCK_GUARD();
if (vdev->vq[n].vring.desc) {
vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
}
rcu_read_unlock();
}
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)

View File

@ -265,6 +265,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
struct PCIDevice {
DeviceState qdev;
bool partially_hotplugged;
/* PCI config space */
uint8_t *config;
@ -352,6 +353,9 @@ struct PCIDevice {
MSIVectorUseNotifier msix_vector_use_notifier;
MSIVectorReleaseNotifier msix_vector_release_notifier;
MSIVectorPollNotifier msix_vector_poll_notifier;
/* ID of standby device in net_failover pair */
char *failover_pair_id;
};
void pci_register_bar(PCIDevice *pci_dev, int region_num,

View File

@ -78,6 +78,19 @@ typedef void (*BusUnrealize)(BusState *bus, Error **errp);
* respective parent types.
* </para>
* </note>
*
* # Hiding a device #
* To hide a device, a DeviceListener function should_be_hidden() needs to
* be registered.
* It can be used to defer adding a device and therefore hide it from the
* guest. The handler registering to this DeviceListener can save the QOpts
* passed to it for re-using it later and must return that it wants the device
* to be/remain hidden or not. When the handler function decides the device
* shall not be hidden it will be added in qdev_device_add() and
* realized as any other device. Otherwise qdev_device_add() will return early
* without adding the device. The guest will not see a "hidden" device
* until it was marked don't hide and qdev_device_add called again.
*
*/
typedef struct DeviceClass {
/*< private >*/
@ -143,6 +156,7 @@ struct DeviceState {
bool pending_deleted_event;
QemuOpts *opts;
int hotplugged;
bool allow_unplug_during_migration;
BusState *parent_bus;
QLIST_HEAD(, NamedGPIOList) gpios;
QLIST_HEAD(, BusState) child_bus;
@ -154,6 +168,12 @@ struct DeviceState {
struct DeviceListener {
void (*realize)(DeviceListener *listener, DeviceState *dev);
void (*unrealize)(DeviceListener *listener, DeviceState *dev);
/*
* This callback is called upon init of the DeviceState and allows to
* inform qdev that a device should be hidden, depending on the device
* opts, for example, to hide a standby device.
*/
int (*should_be_hidden)(DeviceListener *listener, QemuOpts *device_opts);
QTAILQ_ENTRY(DeviceListener) link;
};
@ -451,4 +471,14 @@ static inline bool qbus_is_hotpluggable(BusState *bus)
void device_listener_register(DeviceListener *listener);
void device_listener_unregister(DeviceListener *listener);
/**
* @qdev_should_hide_device:
* @opts: QemuOpts as passed on cmdline.
*
* Check if a device should be added.
* When a device is added via qdev_device_add() this will be called,
* and return if the device should be added now or not.
*/
bool qdev_should_hide_device(QemuOpts *opts);
#endif

View File

@ -18,6 +18,7 @@
#include "standard-headers/linux/virtio_net.h"
#include "hw/virtio/virtio.h"
#include "net/announce.h"
#include "qemu/option_int.h"
#define TYPE_VIRTIO_NET "virtio-net-device"
#define VIRTIO_NET(obj) \
@ -43,6 +44,7 @@ typedef struct virtio_net_conf
int32_t speed;
char *duplex_str;
uint8_t duplex;
char *primary_id_str;
} virtio_net_conf;
/* Coalesced packets type & status */
@ -187,6 +189,16 @@ struct VirtIONet {
AnnounceTimer announce_timer;
bool needs_vnet_hdr_swap;
bool mtu_bypass_backend;
QemuOpts *primary_device_opts;
QDict *primary_device_dict;
DeviceState *primary_dev;
BusState *primary_bus;
char *primary_device_id;
char *standby_id;
bool primary_should_be_hidden;
bool failover;
DeviceListener primary_listener;
Notifier migration_state;
};
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,

View File

@ -160,6 +160,7 @@ typedef struct VirtioDeviceClass {
*/
int (*post_load)(VirtIODevice *vdev);
const VMStateDescription *vmsd;
bool (*primary_unplug_pending)(void *opaque);
} VirtioDeviceClass;
void virtio_instance_init_common(Object *proxy_obj, void *data,

View File

@ -186,6 +186,8 @@ struct VMStateDescription {
int (*pre_save)(void *opaque);
int (*post_save)(void *opaque);
bool (*needed)(void *opaque);
bool (*dev_unplug_pending)(void *opaque);
const VMStateField *fields;
const VMStateDescription **subsections;
};

View File

@ -52,6 +52,7 @@
#include "hw/qdev-properties.h"
#include "monitor/monitor.h"
#include "net/announce.h"
#include "qemu/queue.h"
#define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
@ -819,6 +820,7 @@ bool migration_is_setup_or_active(int state)
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
case MIGRATION_STATUS_WAIT_UNPLUG:
return true;
default:
@ -954,6 +956,9 @@ static void fill_source_migration_info(MigrationInfo *info)
case MIGRATION_STATUS_CANCELLED:
info->has_status = true;
break;
case MIGRATION_STATUS_WAIT_UNPLUG:
info->has_status = true;
break;
}
info->status = s->state;
}
@ -1694,6 +1699,7 @@ bool migration_is_idle(void)
case MIGRATION_STATUS_COLO:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
case MIGRATION_STATUS_WAIT_UNPLUG:
return false;
case MIGRATION_STATUS__MAX:
g_assert_not_reached();
@ -3264,6 +3270,19 @@ static void *migration_thread(void *opaque)
qemu_savevm_state_setup(s->to_dst_file);
if (qemu_savevm_nr_failover_devices()) {
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_WAIT_UNPLUG);
while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
qemu_savevm_state_guest_unplug_pending()) {
qemu_sem_timedwait(&s->wait_unplug_sem, 250);
}
migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG,
MIGRATION_STATUS_ACTIVE);
}
s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_ACTIVE);
@ -3511,6 +3530,7 @@ static void migration_instance_finalize(Object *obj)
qemu_mutex_destroy(&ms->qemu_file_lock);
g_free(params->tls_hostname);
g_free(params->tls_creds);
qemu_sem_destroy(&ms->wait_unplug_sem);
qemu_sem_destroy(&ms->rate_limit_sem);
qemu_sem_destroy(&ms->pause_sem);
qemu_sem_destroy(&ms->postcopy_pause_sem);
@ -3556,6 +3576,7 @@ static void migration_instance_init(Object *obj)
qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
qemu_sem_init(&ms->rp_state.rp_sem, 0);
qemu_sem_init(&ms->rate_limit_sem, 0);
qemu_sem_init(&ms->wait_unplug_sem, 0);
qemu_mutex_init(&ms->qemu_file_lock);
}

View File

@ -206,6 +206,9 @@ struct MigrationState
/* Flag set once the migration thread called bdrv_inactivate_all */
bool block_inactive;
/* Migration is waiting for guest to unplug device */
QemuSemaphore wait_unplug_sem;
/* Migration is paused due to pause-before-switchover */
QemuSemaphore pause_sem;

View File

@ -1113,6 +1113,37 @@ void qemu_savevm_state_header(QEMUFile *f)
}
}
int qemu_savevm_nr_failover_devices(void)
{
SaveStateEntry *se;
int n = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (se->vmsd && se->vmsd->dev_unplug_pending) {
n++;
}
}
return n;
}
bool qemu_savevm_state_guest_unplug_pending(void)
{
SaveStateEntry *se;
int n = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->vmsd || !se->vmsd->dev_unplug_pending) {
continue;
}
if (se->vmsd->dev_unplug_pending(se->opaque)) {
n++;
}
}
return n > 0;
}
void qemu_savevm_state_setup(QEMUFile *f)
{
SaveStateEntry *se;

View File

@ -31,6 +31,8 @@
bool qemu_savevm_state_blocked(Error **errp);
void qemu_savevm_state_setup(QEMUFile *f);
int qemu_savevm_nr_failover_devices(void);
bool qemu_savevm_state_guest_unplug_pending(void);
int qemu_savevm_state_resume_prepare(MigrationState *s);
void qemu_savevm_state_header(QEMUFile *f);
int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);

View File

@ -133,6 +133,9 @@
# @device: During device serialisation when pause-before-switchover is enabled
# (since 2.11)
#
# @wait-unplug: wait for device unplug request by guest OS to be completed.
# (since 4.2)
#
# Since: 2.3
#
##
@ -140,7 +143,7 @@
'data': [ 'none', 'setup', 'cancelling', 'cancelled',
'active', 'postcopy-active', 'postcopy-paused',
'postcopy-recover', 'completed', 'failed', 'colo',
'pre-switchover', 'device' ] }
'pre-switchover', 'device', 'wait-unplug' ] }
##
# @MigrationInfo:
@ -1448,3 +1451,22 @@
# Since: 3.0
##
{ 'command': 'migrate-pause', 'allow-oob': true }
##
# @UNPLUG_PRIMARY:
#
# Emitted from source side of a migration when migration state is
# WAIT_UNPLUG. Device was unplugged by guest operating system.
# Device resources in QEMU are kept on standby to be able to re-plug it in case
# of migration failure.
#
# @device-id: QEMU device id of the unplugged device
#
# Since: 4.2
#
# Example:
# {"event": "UNPLUG_PRIMARY", "data": {"device-id": "hostdev0"} }
#
##
{ 'event': 'UNPLUG_PRIMARY',
'data': { 'device-id': 'str' } }

View File

@ -735,3 +735,22 @@
##
{ 'command': 'announce-self', 'boxed': true,
'data' : 'AnnounceParameters'}
##
# @FAILOVER_NEGOTIATED:
#
# Emitted when VIRTIO_NET_F_STANDBY was enabled during feature negotiation.
# Failover primary devices which were hidden (not hotplugged when requested)
# before will now be hotplugged by the virtio-net standby device.
#
# device-id: QEMU device id of the unplugged device
# Since: 4.2
#
# Example:
#
# <- { "event": "FAILOVER_NEGOTIATED",
# "data": "net1" }
#
##
{ 'event': 'FAILOVER_NEGOTIATED',
'data': {'device-id': 'str'} }

View File

@ -32,9 +32,11 @@
#include "qemu/help_option.h"
#include "qemu/option.h"
#include "qemu/qemu-print.h"
#include "qemu/option_int.h"
#include "sysemu/block-backend.h"
#include "sysemu/sysemu.h"
#include "migration/misc.h"
#include "migration/migration.h"
/*
* Aliases were a bad idea from the start. Let's keep them
@ -562,13 +564,36 @@ void qdev_set_id(DeviceState *dev, const char *id)
}
}
static int is_failover_device(void *opaque, const char *name, const char *value,
Error **errp)
{
if (strcmp(name, "failover_pair_id") == 0) {
QemuOpts *opts = (QemuOpts *)opaque;
if (qdev_should_hide_device(opts)) {
return 1;
}
}
return 0;
}
static bool should_hide_device(QemuOpts *opts)
{
if (qemu_opt_foreach(opts, is_failover_device, opts, NULL) == 0) {
return false;
}
return true;
}
DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
{
DeviceClass *dc;
const char *driver, *path;
DeviceState *dev;
DeviceState *dev = NULL;
BusState *bus = NULL;
Error *err = NULL;
bool hide;
driver = qemu_opt_get(opts, "driver");
if (!driver) {
@ -602,11 +627,17 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
return NULL;
}
}
if (qdev_hotplug && bus && !qbus_is_hotpluggable(bus)) {
hide = should_hide_device(opts);
if ((hide || qdev_hotplug) && bus && !qbus_is_hotpluggable(bus)) {
error_setg(errp, QERR_BUS_NO_HOTPLUG, bus->name);
return NULL;
}
if (hide) {
return NULL;
}
if (!migration_is_idle()) {
error_setg(errp, "device_add not allowed while migrating");
return NULL;
@ -648,8 +679,10 @@ DeviceState *qdev_device_add(QemuOpts *opts, Error **errp)
err_del_dev:
error_propagate(errp, err);
if (dev) {
object_unparent(OBJECT(dev));
object_unref(OBJECT(dev));
}
return NULL;
}
@ -818,7 +851,7 @@ void qdev_unplug(DeviceState *dev, Error **errp)
return;
}
if (!migration_is_idle()) {
if (!migration_is_idle() && !dev->allow_unplug_during_migration) {
error_setg(errp, "device_del not allowed while migrating");
return;
}

View File

@ -125,7 +125,8 @@ void migrate(QOSState *from, QOSState *to, const char *uri)
break;
}
if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)) {
if ((strcmp(st, "setup") == 0) || (strcmp(st, "active") == 0)
|| (strcmp(st, "wait-unplug") == 0)) {
qobject_unref(rsp);
g_usleep(5000);
continue;

6
vl.c
View File

@ -2207,10 +2207,12 @@ static int device_init_func(void *opaque, QemuOpts *opts, Error **errp)
DeviceState *dev;
dev = qdev_device_add(opts, errp);
if (!dev) {
if (!dev && *errp) {
error_report_err(*errp);
return -1;
}
} else if (dev) {
object_unref(OBJECT(dev));
}
return 0;
}