linux/net/sched/sch_fifo.c

272 lines
6.0 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* net/sched/sch_fifo.c The simplest FIFO queue.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <linux/module.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
/* 1 band FIFO pseudo-"scheduler" */
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
}
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->q.qlen < sch->limit))
return qdisc_enqueue_tail(skb, sch);
return qdisc_drop(skb, sch, to_free);
}
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
unsigned int prev_backlog;
if (likely(sch->q.qlen < sch->limit))
return qdisc_enqueue_tail(skb, sch);
prev_backlog = sch->qstats.backlog;
/* queue full, remove one skb to fulfill the limit */
__qdisc_queue_drop_head(sch, &sch->q, to_free);
qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);
qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog);
return NET_XMIT_CN;
}
static void fifo_offload_init(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_fifo_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_FIFO_REPLACE;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt);
}
static void fifo_offload_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct tc_fifo_qopt_offload qopt;
if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
return;
qopt.command = TC_FIFO_DESTROY;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_FIFO, &qopt);
}
static int fifo_offload_dump(struct Qdisc *sch)
{
struct tc_fifo_qopt_offload qopt;
qopt.command = TC_FIFO_STATS;
qopt.handle = sch->handle;
qopt.parent = sch->parent;
qopt.stats.bstats = &sch->bstats;
qopt.stats.qstats = &sch->qstats;
return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_FIFO, &qopt);
}
static int __fifo_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
bool bypass;
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
u32 limit = qdisc_dev(sch)->tx_queue_len;
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
sch->limit = limit;
} else {
struct tc_fifo_qopt *ctl = nla_data(opt);
if (nla_len(opt) < sizeof(*ctl))
return -EINVAL;
sch->limit = ctl->limit;
}
if (is_bfifo)
bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
else
bypass = sch->limit >= 1;
if (bypass)
sch->flags |= TCQ_F_CAN_BYPASS;
else
sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
}
static int fifo_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
int err;
err = __fifo_init(sch, opt, extack);
if (err)
return err;
fifo_offload_init(sch);
return 0;
}
static int fifo_hd_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
return __fifo_init(sch, opt, extack);
}
static void fifo_destroy(struct Qdisc *sch)
{
fifo_offload_destroy(sch);
}
static int __fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct tc_fifo_qopt opt = { .limit = sch->limit };
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
return skb->len;
nla_put_failure:
return -1;
}
static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
{
int err;
err = fifo_offload_dump(sch);
if (err)
return err;
return __fifo_dump(sch, skb);
}
static int fifo_hd_dump(struct Qdisc *sch, struct sk_buff *skb)
{
return __fifo_dump(sch, skb);
}
struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
.id = "pfifo",
.priv_size = 0,
.enqueue = pfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.init = fifo_init,
.destroy = fifo_destroy,
.reset = qdisc_reset_queue,
.change = fifo_init,
.dump = fifo_dump,
.owner = THIS_MODULE,
};
EXPORT_SYMBOL(pfifo_qdisc_ops);
struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
.id = "bfifo",
.priv_size = 0,
.enqueue = bfifo_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.init = fifo_init,
.destroy = fifo_destroy,
.reset = qdisc_reset_queue,
.change = fifo_init,
.dump = fifo_dump,
.owner = THIS_MODULE,
};
EXPORT_SYMBOL(bfifo_qdisc_ops);
struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
.id = "pfifo_head_drop",
.priv_size = 0,
.enqueue = pfifo_tail_enqueue,
.dequeue = qdisc_dequeue_head,
.peek = qdisc_peek_head,
.init = fifo_hd_init,
.reset = qdisc_reset_queue,
.change = fifo_hd_init,
.dump = fifo_hd_dump,
.owner = THIS_MODULE,
};
/* Pass size change message down to embedded FIFO */
int fifo_set_limit(struct Qdisc *q, unsigned int limit)
{
struct nlattr *nla;
int ret = -ENOMEM;
/* Hack to avoid sending change message to non-FIFO */
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
net_sched: fix NULL deref in fifo_set_limit() syzbot reported another NULL deref in fifo_set_limit() [1] I could repro the issue with : unshare -n tc qd add dev lo root handle 1:0 tbf limit 200000 burst 70000 rate 100Mbit tc qd replace dev lo parent 1:0 pfifo_fast tc qd change dev lo root handle 1:0 tbf limit 300000 burst 70000 rate 100Mbit pfifo_fast does not have a change() operation. Make fifo_set_limit() more robust about this. [1] BUG: kernel NULL pointer dereference, address: 0000000000000000 PGD 1cf99067 P4D 1cf99067 PUD 7ca49067 PMD 0 Oops: 0010 [#1] PREEMPT SMP KASAN CPU: 1 PID: 14443 Comm: syz-executor959 Not tainted 5.15.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:0x0 Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6. RSP: 0018:ffffc9000e2f7310 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffffffff8d6ecc00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888024c27910 RDI: ffff888071e34000 RBP: ffff888071e34000 R08: 0000000000000001 R09: ffffffff8fcfb947 R10: 0000000000000001 R11: 0000000000000000 R12: ffff888024c27910 R13: ffff888071e34018 R14: 0000000000000000 R15: ffff88801ef74800 FS: 00007f321d897700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000000722c3000 CR4: 00000000003506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: fifo_set_limit net/sched/sch_fifo.c:242 [inline] fifo_set_limit+0x198/0x210 net/sched/sch_fifo.c:227 tbf_change+0x6ec/0x16d0 net/sched/sch_tbf.c:418 qdisc_change net/sched/sch_api.c:1332 [inline] tc_modify_qdisc+0xd9a/0x1a60 net/sched/sch_api.c:1634 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5572 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 sock_sendmsg_nosec net/socket.c:704 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:724 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2409 ___sys_sendmsg+0xf3/0x170 net/socket.c:2463 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2492 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: fb0305ce1b03 ("net-sched: consolidate default fifo qdisc setup") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20210930212239.3430364-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-10-01 05:22:39 +08:00
if (!q->ops->change)
return 0;
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
ret = q->ops->change(q, nla, NULL);
kfree(nla);
}
return ret;
}
EXPORT_SYMBOL(fifo_set_limit);
struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
unsigned int limit,
struct netlink_ext_ack *extack)
{
struct Qdisc *q;
int err = -ENOMEM;
q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1),
extack);
if (q) {
err = fifo_set_limit(q, limit);
if (err < 0) {
qdisc_put(q);
q = NULL;
}
}
return q ? : ERR_PTR(err);
}
EXPORT_SYMBOL(fifo_create_dflt);