linux/drivers/net/xen-netback/xenbus.c
Wei Liu f7b50c4e7c xen-netback: bookkeep number of active queues in our own module
The original code uses netdev->real_num_tx_queues to bookkeep number of
queues and invokes netif_set_real_num_tx_queues to set the number of
queues. However, netif_set_real_num_tx_queues doesn't allow
real_num_tx_queues to be smaller than 1, which means setting the number
to 0 will not work and real_num_tx_queues is untouched.

This is bogus when xenvif_free is invoked before any number of queues is
allocated. That function needs to iterate through all queues to free
resources. Using the wrong number of queues results in NULL pointer
dereference.

So we bookkeep the number of queues in xen-netback to solve this
problem. This fixes a regression introduced by multiqueue patchset in
3.16-rc1.

There's another bug in original code that the real number of RX queues
is never set. In current Xen multiqueue design, the number of TX queues
and RX queues are in fact the same. We need to set the numbers of TX and
RX queues to the same value.

Also remove xenvif_select_queue and leave queue selection to core
driver, as suggested by David Miller.

Reported-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Signed-off-by: Wei Liu <wei.liu2@citrix.com>
CC: Ian Campbell <ian.campbell@citrix.com>
CC: Paul Durrant <paul.durrant@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-06-25 15:59:47 -07:00

787 lines
19 KiB
C

/*
* Xenbus code for netif backend
*
* Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
* Copyright (C) 2005 XenSource Ltd
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "common.h"
#include <linux/vmalloc.h>
#include <linux/rtnetlink.h>
struct backend_info {
struct xenbus_device *dev;
struct xenvif *vif;
/* This is the state that will be reflected in xenstore when any
* active hotplug script completes.
*/
enum xenbus_state state;
enum xenbus_state frontend_state;
struct xenbus_watch hotplug_status_watch;
u8 have_hotplug_status_watch:1;
};
static int connect_rings(struct backend_info *be, struct xenvif_queue *queue);
static void connect(struct backend_info *be);
static int read_xenbus_vif_flags(struct backend_info *be);
static void backend_create_xenvif(struct backend_info *be);
static void unregister_hotplug_status_watch(struct backend_info *be);
static void set_backend_state(struct backend_info *be,
enum xenbus_state state);
static int netback_remove(struct xenbus_device *dev)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
set_backend_state(be, XenbusStateClosed);
unregister_hotplug_status_watch(be);
if (be->vif) {
kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
xenvif_free(be->vif);
be->vif = NULL;
}
kfree(be);
dev_set_drvdata(&dev->dev, NULL);
return 0;
}
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and switch to InitWait.
*/
static int netback_probe(struct xenbus_device *dev,
const struct xenbus_device_id *id)
{
const char *message;
struct xenbus_transaction xbt;
int err;
int sg;
struct backend_info *be = kzalloc(sizeof(struct backend_info),
GFP_KERNEL);
if (!be) {
xenbus_dev_fatal(dev, -ENOMEM,
"allocating backend structure");
return -ENOMEM;
}
be->dev = dev;
dev_set_drvdata(&dev->dev, be);
sg = 1;
do {
err = xenbus_transaction_start(&xbt);
if (err) {
xenbus_dev_fatal(dev, err, "starting transaction");
goto fail;
}
err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
if (err) {
message = "writing feature-sg";
goto abort_transaction;
}
err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
"%d", sg);
if (err) {
message = "writing feature-gso-tcpv4";
goto abort_transaction;
}
err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
"%d", sg);
if (err) {
message = "writing feature-gso-tcpv6";
goto abort_transaction;
}
/* We support partial checksum setup for IPv6 packets */
err = xenbus_printf(xbt, dev->nodename,
"feature-ipv6-csum-offload",
"%d", 1);
if (err) {
message = "writing feature-ipv6-csum-offload";
goto abort_transaction;
}
/* We support rx-copy path. */
err = xenbus_printf(xbt, dev->nodename,
"feature-rx-copy", "%d", 1);
if (err) {
message = "writing feature-rx-copy";
goto abort_transaction;
}
/*
* We don't support rx-flip path (except old guests who don't
* grok this feature flag).
*/
err = xenbus_printf(xbt, dev->nodename,
"feature-rx-flip", "%d", 0);
if (err) {
message = "writing feature-rx-flip";
goto abort_transaction;
}
err = xenbus_transaction_end(xbt, 0);
} while (err == -EAGAIN);
if (err) {
xenbus_dev_fatal(dev, err, "completing transaction");
goto fail;
}
/*
* Split event channels support, this is optional so it is not
* put inside the above loop.
*/
err = xenbus_printf(XBT_NIL, dev->nodename,
"feature-split-event-channels",
"%u", separate_tx_rx_irq);
if (err)
pr_debug("Error writing feature-split-event-channels\n");
/* Multi-queue support: This is an optional feature. */
err = xenbus_printf(XBT_NIL, dev->nodename,
"multi-queue-max-queues", "%u", xenvif_max_queues);
if (err)
pr_debug("Error writing multi-queue-max-queues\n");
err = xenbus_switch_state(dev, XenbusStateInitWait);
if (err)
goto fail;
be->state = XenbusStateInitWait;
/* This kicks hotplug scripts, so do it immediately. */
backend_create_xenvif(be);
return 0;
abort_transaction:
xenbus_transaction_end(xbt, 1);
xenbus_dev_fatal(dev, err, "%s", message);
fail:
pr_debug("failed\n");
netback_remove(dev);
return err;
}
/*
* Handle the creation of the hotplug script environment. We add the script
* and vif variables to the environment, for the benefit of the vif-* hotplug
* scripts.
*/
static int netback_uevent(struct xenbus_device *xdev,
struct kobj_uevent_env *env)
{
struct backend_info *be = dev_get_drvdata(&xdev->dev);
char *val;
val = xenbus_read(XBT_NIL, xdev->nodename, "script", NULL);
if (IS_ERR(val)) {
int err = PTR_ERR(val);
xenbus_dev_fatal(xdev, err, "reading script");
return err;
} else {
if (add_uevent_var(env, "script=%s", val)) {
kfree(val);
return -ENOMEM;
}
kfree(val);
}
if (!be || !be->vif)
return 0;
return add_uevent_var(env, "vif=%s", be->vif->dev->name);
}
static void backend_create_xenvif(struct backend_info *be)
{
int err;
long handle;
struct xenbus_device *dev = be->dev;
if (be->vif != NULL)
return;
err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%li", &handle);
if (err != 1) {
xenbus_dev_fatal(dev, err, "reading handle");
return;
}
be->vif = xenvif_alloc(&dev->dev, dev->otherend_id, handle);
if (IS_ERR(be->vif)) {
err = PTR_ERR(be->vif);
be->vif = NULL;
xenbus_dev_fatal(dev, err, "creating interface");
return;
}
kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
}
static void backend_disconnect(struct backend_info *be)
{
if (be->vif)
xenvif_disconnect(be->vif);
}
static void backend_connect(struct backend_info *be)
{
if (be->vif)
connect(be);
}
static inline void backend_switch_state(struct backend_info *be,
enum xenbus_state state)
{
struct xenbus_device *dev = be->dev;
pr_debug("%s -> %s\n", dev->nodename, xenbus_strstate(state));
be->state = state;
/* If we are waiting for a hotplug script then defer the
* actual xenbus state change.
*/
if (!be->have_hotplug_status_watch)
xenbus_switch_state(dev, state);
}
/* Handle backend state transitions:
*
* The backend state starts in InitWait and the following transitions are
* allowed.
*
* InitWait -> Connected
*
* ^ \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | \ |
* | V V
*
* Closed <-> Closing
*
* The state argument specifies the eventual state of the backend and the
* function transitions to that state via the shortest path.
*/
static void set_backend_state(struct backend_info *be,
enum xenbus_state state)
{
while (be->state != state) {
switch (be->state) {
case XenbusStateClosed:
switch (state) {
case XenbusStateInitWait:
case XenbusStateConnected:
pr_info("%s: prepare for reconnect\n",
be->dev->nodename);
backend_switch_state(be, XenbusStateInitWait);
break;
case XenbusStateClosing:
backend_switch_state(be, XenbusStateClosing);
break;
default:
BUG();
}
break;
case XenbusStateInitWait:
switch (state) {
case XenbusStateConnected:
backend_connect(be);
backend_switch_state(be, XenbusStateConnected);
break;
case XenbusStateClosing:
case XenbusStateClosed:
backend_switch_state(be, XenbusStateClosing);
break;
default:
BUG();
}
break;
case XenbusStateConnected:
switch (state) {
case XenbusStateInitWait:
case XenbusStateClosing:
case XenbusStateClosed:
backend_disconnect(be);
backend_switch_state(be, XenbusStateClosing);
break;
default:
BUG();
}
break;
case XenbusStateClosing:
switch (state) {
case XenbusStateInitWait:
case XenbusStateConnected:
case XenbusStateClosed:
backend_switch_state(be, XenbusStateClosed);
break;
default:
BUG();
}
break;
default:
BUG();
}
}
}
/**
* Callback received when the frontend's state changes.
*/
static void frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
struct backend_info *be = dev_get_drvdata(&dev->dev);
pr_debug("%s -> %s\n", dev->otherend, xenbus_strstate(frontend_state));
be->frontend_state = frontend_state;
switch (frontend_state) {
case XenbusStateInitialising:
set_backend_state(be, XenbusStateInitWait);
break;
case XenbusStateInitialised:
break;
case XenbusStateConnected:
set_backend_state(be, XenbusStateConnected);
break;
case XenbusStateClosing:
set_backend_state(be, XenbusStateClosing);
break;
case XenbusStateClosed:
set_backend_state(be, XenbusStateClosed);
if (xenbus_dev_is_online(dev))
break;
/* fall through if not online */
case XenbusStateUnknown:
set_backend_state(be, XenbusStateClosed);
device_unregister(&dev->dev);
break;
default:
xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
frontend_state);
break;
}
}
static void xen_net_read_rate(struct xenbus_device *dev,
unsigned long *bytes, unsigned long *usec)
{
char *s, *e;
unsigned long b, u;
char *ratestr;
/* Default to unlimited bandwidth. */
*bytes = ~0UL;
*usec = 0;
ratestr = xenbus_read(XBT_NIL, dev->nodename, "rate", NULL);
if (IS_ERR(ratestr))
return;
s = ratestr;
b = simple_strtoul(s, &e, 10);
if ((s == e) || (*e != ','))
goto fail;
s = e + 1;
u = simple_strtoul(s, &e, 10);
if ((s == e) || (*e != '\0'))
goto fail;
*bytes = b;
*usec = u;
kfree(ratestr);
return;
fail:
pr_warn("Failed to parse network rate limit. Traffic unlimited.\n");
kfree(ratestr);
}
static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
{
char *s, *e, *macstr;
int i;
macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
if (IS_ERR(macstr))
return PTR_ERR(macstr);
for (i = 0; i < ETH_ALEN; i++) {
mac[i] = simple_strtoul(s, &e, 16);
if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
kfree(macstr);
return -ENOENT;
}
s = e+1;
}
kfree(macstr);
return 0;
}
static void unregister_hotplug_status_watch(struct backend_info *be)
{
if (be->have_hotplug_status_watch) {
unregister_xenbus_watch(&be->hotplug_status_watch);
kfree(be->hotplug_status_watch.node);
}
be->have_hotplug_status_watch = 0;
}
static void hotplug_status_changed(struct xenbus_watch *watch,
const char **vec,
unsigned int vec_size)
{
struct backend_info *be = container_of(watch,
struct backend_info,
hotplug_status_watch);
char *str;
unsigned int len;
str = xenbus_read(XBT_NIL, be->dev->nodename, "hotplug-status", &len);
if (IS_ERR(str))
return;
if (len == sizeof("connected")-1 && !memcmp(str, "connected", len)) {
/* Complete any pending state change */
xenbus_switch_state(be->dev, be->state);
/* Not interested in this watch anymore. */
unregister_hotplug_status_watch(be);
}
kfree(str);
}
static void connect(struct backend_info *be)
{
int err;
struct xenbus_device *dev = be->dev;
unsigned long credit_bytes, credit_usec;
unsigned int queue_index;
unsigned int requested_num_queues;
struct xenvif_queue *queue;
/* Check whether the frontend requested multiple queues
* and read the number requested.
*/
err = xenbus_scanf(XBT_NIL, dev->otherend,
"multi-queue-num-queues",
"%u", &requested_num_queues);
if (err < 0) {
requested_num_queues = 1; /* Fall back to single queue */
} else if (requested_num_queues > xenvif_max_queues) {
/* buggy or malicious guest */
xenbus_dev_fatal(dev, err,
"guest requested %u queues, exceeding the maximum of %u.",
requested_num_queues, xenvif_max_queues);
return;
}
err = xen_net_read_mac(dev, be->vif->fe_dev_addr);
if (err) {
xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
return;
}
xen_net_read_rate(dev, &credit_bytes, &credit_usec);
read_xenbus_vif_flags(be);
/* Use the number of queues requested by the frontend */
be->vif->queues = vzalloc(requested_num_queues *
sizeof(struct xenvif_queue));
be->vif->num_queues = requested_num_queues;
for (queue_index = 0; queue_index < requested_num_queues; ++queue_index) {
queue = &be->vif->queues[queue_index];
queue->vif = be->vif;
queue->id = queue_index;
snprintf(queue->name, sizeof(queue->name), "%s-q%u",
be->vif->dev->name, queue->id);
err = xenvif_init_queue(queue);
if (err) {
/* xenvif_init_queue() cleans up after itself on
* failure, but we need to clean up any previously
* initialised queues. Set num_queues to i so that
* earlier queues can be destroyed using the regular
* disconnect logic.
*/
be->vif->num_queues = queue_index;
goto err;
}
queue->remaining_credit = credit_bytes;
err = connect_rings(be, queue);
if (err) {
/* connect_rings() cleans up after itself on failure,
* but we need to clean up after xenvif_init_queue() here,
* and also clean up any previously initialised queues.
*/
xenvif_deinit_queue(queue);
be->vif->num_queues = queue_index;
goto err;
}
}
/* Initialisation completed, tell core driver the number of
* active queues.
*/
rtnl_lock();
netif_set_real_num_tx_queues(be->vif->dev, requested_num_queues);
netif_set_real_num_rx_queues(be->vif->dev, requested_num_queues);
rtnl_unlock();
xenvif_carrier_on(be->vif);
unregister_hotplug_status_watch(be);
err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch,
hotplug_status_changed,
"%s/%s", dev->nodename, "hotplug-status");
if (!err)
be->have_hotplug_status_watch = 1;
netif_tx_wake_all_queues(be->vif->dev);
return;
err:
if (be->vif->num_queues > 0)
xenvif_disconnect(be->vif); /* Clean up existing queues */
vfree(be->vif->queues);
be->vif->queues = NULL;
be->vif->num_queues = 0;
return;
}
static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
{
struct xenbus_device *dev = be->dev;
unsigned int num_queues = queue->vif->num_queues;
unsigned long tx_ring_ref, rx_ring_ref;
unsigned int tx_evtchn, rx_evtchn;
int err;
char *xspath;
size_t xspathsize;
const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
/* If the frontend requested 1 queue, or we have fallen back
* to single queue due to lack of frontend support for multi-
* queue, expect the remaining XenStore keys in the toplevel
* directory. Otherwise, expect them in a subdirectory called
* queue-N.
*/
if (num_queues == 1) {
xspath = kzalloc(strlen(dev->otherend) + 1, GFP_KERNEL);
if (!xspath) {
xenbus_dev_fatal(dev, -ENOMEM,
"reading ring references");
return -ENOMEM;
}
strcpy(xspath, dev->otherend);
} else {
xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
xspath = kzalloc(xspathsize, GFP_KERNEL);
if (!xspath) {
xenbus_dev_fatal(dev, -ENOMEM,
"reading ring references");
return -ENOMEM;
}
snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend,
queue->id);
}
err = xenbus_gather(XBT_NIL, xspath,
"tx-ring-ref", "%lu", &tx_ring_ref,
"rx-ring-ref", "%lu", &rx_ring_ref, NULL);
if (err) {
xenbus_dev_fatal(dev, err,
"reading %s/ring-ref",
xspath);
goto err;
}
/* Try split event channels first, then single event channel. */
err = xenbus_gather(XBT_NIL, xspath,
"event-channel-tx", "%u", &tx_evtchn,
"event-channel-rx", "%u", &rx_evtchn, NULL);
if (err < 0) {
err = xenbus_scanf(XBT_NIL, xspath,
"event-channel", "%u", &tx_evtchn);
if (err < 0) {
xenbus_dev_fatal(dev, err,
"reading %s/event-channel(-tx/rx)",
xspath);
goto err;
}
rx_evtchn = tx_evtchn;
}
/* Map the shared frame, irq etc. */
err = xenvif_connect(queue, tx_ring_ref, rx_ring_ref,
tx_evtchn, rx_evtchn);
if (err) {
xenbus_dev_fatal(dev, err,
"mapping shared-frames %lu/%lu port tx %u rx %u",
tx_ring_ref, rx_ring_ref,
tx_evtchn, rx_evtchn);
goto err;
}
err = 0;
err: /* Regular return falls through with err == 0 */
kfree(xspath);
return err;
}
static int read_xenbus_vif_flags(struct backend_info *be)
{
struct xenvif *vif = be->vif;
struct xenbus_device *dev = be->dev;
unsigned int rx_copy;
int err, val;
err = xenbus_scanf(XBT_NIL, dev->otherend, "request-rx-copy", "%u",
&rx_copy);
if (err == -ENOENT) {
err = 0;
rx_copy = 0;
}
if (err < 0) {
xenbus_dev_fatal(dev, err, "reading %s/request-rx-copy",
dev->otherend);
return err;
}
if (!rx_copy)
return -EOPNOTSUPP;
if (vif->dev->tx_queue_len != 0) {
if (xenbus_scanf(XBT_NIL, dev->otherend,
"feature-rx-notify", "%d", &val) < 0)
val = 0;
if (val)
vif->can_queue = 1;
else
/* Must be non-zero for pfifo_fast to work. */
vif->dev->tx_queue_len = 1;
}
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",
"%d", &val) < 0)
val = 0;
vif->can_sg = !!val;
vif->gso_mask = 0;
vif->gso_prefix_mask = 0;
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
"%d", &val) < 0)
val = 0;
if (val)
vif->gso_mask |= GSO_BIT(TCPV4);
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
"%d", &val) < 0)
val = 0;
if (val)
vif->gso_prefix_mask |= GSO_BIT(TCPV4);
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
"%d", &val) < 0)
val = 0;
if (val)
vif->gso_mask |= GSO_BIT(TCPV6);
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
"%d", &val) < 0)
val = 0;
if (val)
vif->gso_prefix_mask |= GSO_BIT(TCPV6);
if (vif->gso_mask & vif->gso_prefix_mask) {
xenbus_dev_fatal(dev, err,
"%s: gso and gso prefix flags are not "
"mutually exclusive",
dev->otherend);
return -EOPNOTSUPP;
}
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
"%d", &val) < 0)
val = 0;
vif->ip_csum = !val;
if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload",
"%d", &val) < 0)
val = 0;
vif->ipv6_csum = !!val;
return 0;
}
/* ** Driver Registration ** */
static const struct xenbus_device_id netback_ids[] = {
{ "vif" },
{ "" }
};
static DEFINE_XENBUS_DRIVER(netback, ,
.probe = netback_probe,
.remove = netback_remove,
.uevent = netback_uevent,
.otherend_changed = frontend_changed,
);
int xenvif_xenbus_init(void)
{
return xenbus_register_backend(&netback_driver);
}
void xenvif_xenbus_fini(void)
{
return xenbus_unregister_driver(&netback_driver);
}