linux/net/sched/sch_mq.c
Patrick McHardy 23bcf634c8 net_sched: fix estimator lock selection for mq child qdiscs
When new child qdiscs are attached to the mq qdisc, they are actually
attached as root qdiscs to the device queues. The lock selection for
new estimators incorrectly picks the root lock of the existing and
to be replaced qdisc, which results in a use-after-free once the old
qdisc has been destroyed.

Mark mq qdisc instances with a new flag and treat qdiscs attached to
mq as children similar to regular root qdiscs.

Additionally prevent estimators from being attached to the mq qdisc
itself since it only updates its byte and packet counters during dumps.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
2009-09-09 18:11:23 -07:00

236 lines
5.5 KiB
C

/*
* net/sched/sch_mq.c Classful multiqueue dummy scheduler
*
* Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* version 2 as published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
struct mq_sched {
struct Qdisc **qdiscs;
};
static void mq_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
unsigned int ntx;
if (!priv->qdiscs)
return;
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
qdisc_destroy(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
static int mq_init(struct Qdisc *sch, struct nlattr *opt)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
struct netdev_queue *dev_queue;
struct Qdisc *qdisc;
unsigned int ntx;
if (sch->parent != TC_H_ROOT)
return -EOPNOTSUPP;
if (!netif_is_multiqueue(dev))
return -EOPNOTSUPP;
/* pre-allocate qdiscs, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
if (priv->qdiscs == NULL)
return -ENOMEM;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
dev_queue = netdev_get_tx_queue(dev, ntx);
qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops,
TC_H_MAKE(TC_H_MAJ(sch->handle),
TC_H_MIN(ntx + 1)));
if (qdisc == NULL)
goto err;
qdisc->flags |= TCQ_F_CAN_BYPASS;
priv->qdiscs[ntx] = qdisc;
}
sch->flags |= TCQ_F_MQROOT;
return 0;
err:
mq_destroy(sch);
return -ENOMEM;
}
static void mq_attach(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
struct Qdisc *qdisc;
unsigned int ntx;
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = priv->qdiscs[ntx];
qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
if (qdisc)
qdisc_destroy(qdisc);
}
kfree(priv->qdiscs);
priv->qdiscs = NULL;
}
static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
struct Qdisc *qdisc;
unsigned int ntx;
sch->q.qlen = 0;
memset(&sch->bstats, 0, sizeof(sch->bstats));
memset(&sch->qstats, 0, sizeof(sch->qstats));
for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
spin_lock_bh(qdisc_lock(qdisc));
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
sch->bstats.packets += qdisc->bstats.packets;
sch->qstats.qlen += qdisc->qstats.qlen;
sch->qstats.backlog += qdisc->qstats.backlog;
sch->qstats.drops += qdisc->qstats.drops;
sch->qstats.requeues += qdisc->qstats.requeues;
sch->qstats.overlimits += qdisc->qstats.overlimits;
spin_unlock_bh(qdisc_lock(qdisc));
}
return 0;
}
static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
{
struct net_device *dev = qdisc_dev(sch);
unsigned long ntx = cl - 1;
if (ntx >= dev->num_tx_queues)
return NULL;
return netdev_get_tx_queue(dev, ntx);
}
static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
{
unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
if (!mq_queue_get(sch, ntx))
return 0;
return ntx - 1;
}
static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
struct Qdisc **old)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
struct net_device *dev = qdisc_dev(sch);
if (dev->flags & IFF_UP)
dev_deactivate(dev);
*old = dev_graft_qdisc(dev_queue, new);
if (dev->flags & IFF_UP)
dev_activate(dev);
return 0;
}
static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
return dev_queue->qdisc_sleeping;
}
static unsigned long mq_get(struct Qdisc *sch, u32 classid)
{
unsigned int ntx = TC_H_MIN(classid);
if (!mq_queue_get(sch, ntx))
return 0;
return ntx;
}
static void mq_put(struct Qdisc *sch, unsigned long cl)
{
return;
}
static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_handle |= TC_H_MIN(cl);
tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
return 0;
}
static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
struct gnet_dump *d)
{
struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
gnet_stats_copy_queue(d, &sch->qstats) < 0)
return -1;
return 0;
}
static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
{
struct net_device *dev = qdisc_dev(sch);
unsigned int ntx;
if (arg->stop)
return;
arg->count = arg->skip;
for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
if (arg->fn(sch, ntx + 1, arg) < 0) {
arg->stop = 1;
break;
}
arg->count++;
}
}
static const struct Qdisc_class_ops mq_class_ops = {
.select_queue = mq_select_queue,
.graft = mq_graft,
.leaf = mq_leaf,
.get = mq_get,
.put = mq_put,
.walk = mq_walk,
.dump = mq_dump_class,
.dump_stats = mq_dump_class_stats,
};
struct Qdisc_ops mq_qdisc_ops __read_mostly = {
.cl_ops = &mq_class_ops,
.id = "mq",
.priv_size = sizeof(struct mq_sched),
.init = mq_init,
.destroy = mq_destroy,
.attach = mq_attach,
.dump = mq_dump,
.owner = THIS_MODULE,
};