linux/net/sched/cls_tcindex.c

736 lines
17 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* net/sched/cls_tcindex.c Packet classifier for skb->tc_index
*
* Written 1998,1999 by Werner Almesberger, EPFL ICA
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/errno.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/refcount.h>
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/sch_generic.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
* necessary. At least, u32 doesn't seem to use such dirty hacks. To be
* verified. FIXME.
*/
#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
struct tcindex_data;
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
struct tcindex_data *p;
struct rcu_work rwork;
};
struct tcindex_filter {
u16 key;
struct tcindex_filter_result result;
struct tcindex_filter __rcu *next;
struct rcu_work rwork;
};
struct tcindex_data {
struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
struct tcindex_filter __rcu **h; /* imperfect hash; */
struct tcf_proto *tp;
u16 mask; /* AND key with mask */
u32 shift; /* shift ANDed key to the right */
u32 hash; /* hash table size; 0 if undefined */
u32 alloc_hash; /* allocated size */
u32 fall_through; /* 0: only classify if explicit match */
refcount_t refcnt; /* a temporary refcnt for perfect hash */
struct rcu_work rwork;
};
static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
{
return tcf_exts_has_actions(&r->exts) || r->res.classid;
}
static void tcindex_data_get(struct tcindex_data *p)
{
refcount_inc(&p->refcnt);
}
static void tcindex_data_put(struct tcindex_data *p)
{
if (refcount_dec_and_test(&p->refcnt)) {
kfree(p->perfect);
kfree(p->h);
kfree(p);
}
}
static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
u16 key)
{
if (p->perfect) {
struct tcindex_filter_result *f = p->perfect + key;
return tcindex_filter_is_set(f) ? f : NULL;
} else if (p->h) {
struct tcindex_filter __rcu **fp;
struct tcindex_filter *f;
fp = &p->h[key % p->hash];
for (f = rcu_dereference_bh_rtnl(*fp);
f;
fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
if (f->key == key)
return &f->result;
}
return NULL;
}
static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct tcindex_data *p = rcu_dereference_bh(tp->root);
struct tcindex_filter_result *f;
int key = (skb->tc_index & p->mask) >> p->shift;
pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
skb, tp, res, p);
f = tcindex_lookup(p, key);
if (!f) {
struct Qdisc *q = tcf_block_q(tp->chain->block);
if (!p->fall_through)
return -1;
res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
res->class = 0;
pr_debug("alg 0x%x\n", res->classid);
return 0;
}
*res = f->res;
pr_debug("map 0x%x\n", res->classid);
return tcf_exts_exec(skb, &f->exts, res);
}
static void *tcindex_get(struct tcf_proto *tp, u32 handle)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r;
pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
if (p->perfect && handle >= p->alloc_hash)
return NULL;
r = tcindex_lookup(p, handle);
return r && tcindex_filter_is_set(r) ? r : NULL;
}
static int tcindex_init(struct tcf_proto *tp)
{
struct tcindex_data *p;
pr_debug("tcindex_init(tp %p)\n", tp);
p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
if (!p)
return -ENOMEM;
p->mask = 0xffff;
p->hash = DEFAULT_HASH_SIZE;
p->fall_through = 1;
refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
rcu_assign_pointer(tp->root, p);
return 0;
}
static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
{
tcf_exts_destroy(&r->exts);
tcf_exts_put_net(&r->exts);
tcindex_data_put(r->p);
}
static void tcindex_destroy_rexts_work(struct work_struct *work)
{
struct tcindex_filter_result *r;
r = container_of(to_rcu_work(work),
struct tcindex_filter_result,
rwork);
rtnl_lock();
__tcindex_destroy_rexts(r);
rtnl_unlock();
}
static void __tcindex_destroy_fexts(struct tcindex_filter *f)
{
tcf_exts_destroy(&f->result.exts);
tcf_exts_put_net(&f->result.exts);
kfree(f);
}
static void tcindex_destroy_fexts_work(struct work_struct *work)
{
struct tcindex_filter *f = container_of(to_rcu_work(work),
struct tcindex_filter,
rwork);
rtnl_lock();
__tcindex_destroy_fexts(f);
rtnl_unlock();
}
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
struct tcindex_filter __rcu **walk;
struct tcindex_filter *f = NULL;
pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
if (p->perfect) {
if (!r->res.class)
return -ENOENT;
} else {
int i;
for (i = 0; i < p->hash; i++) {
walk = p->h + i;
for (f = rtnl_dereference(*walk); f;
walk = &f->next, f = rtnl_dereference(*walk)) {
if (&f->result == r)
goto found;
}
}
return -ENOENT;
found:
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
/* all classifiers are required to call tcf_exts_destroy() after rcu
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
if (f) {
if (tcf_exts_get_net(&f->result.exts))
tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
else
__tcindex_destroy_fexts(f);
} else {
tcindex_data_get(p);
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
else
__tcindex_destroy_rexts(r);
}
*last = false;
return 0;
}
static void tcindex_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
struct tcindex_data,
rwork);
tcindex_data_put(p);
}
static inline int
valid_perfect_hash(struct tcindex_data *p)
{
return p->hash > (p->mask >> p->shift);
}
static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_HASH] = { .type = NLA_U32 },
[TCA_TCINDEX_MASK] = { .type = NLA_U16 },
[TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
[TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
static int tcindex_filter_result_init(struct tcindex_filter_result *r,
struct tcindex_data *p,
struct net *net)
net_sched: fix an oops in tcindex filter Kelly reported the following crash: IP: [<ffffffff817a993d>] tcf_action_exec+0x46/0x90 PGD 3009067 PUD 300c067 PMD 11ff30067 PTE 800000011634b060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 1 PID: 639 Comm: dhclient Not tainted 3.15.0-rc4+ #342 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8801169ecd00 ti: ffff8800d21b8000 task.ti: ffff8800d21b8000 RIP: 0010:[<ffffffff817a993d>] [<ffffffff817a993d>] tcf_action_exec+0x46/0x90 RSP: 0018:ffff8800d21b9b90 EFLAGS: 00010283 RAX: 00000000ffffffff RBX: ffff88011634b8e8 RCX: ffff8800cf7133d8 RDX: ffff88011634b900 RSI: ffff8800cf7133e0 RDI: ffff8800d210f840 RBP: ffff8800d21b9bb0 R08: ffffffff8287bf60 R09: 0000000000000001 R10: ffff8800d2b22b24 R11: 0000000000000001 R12: ffff8800d210f840 R13: ffff8800d21b9c50 R14: ffff8800cf7133e0 R15: ffff8800cad433d8 FS: 00007f49723e1840(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88011634b8f0 CR3: 00000000ce469000 CR4: 00000000000006e0 Stack: ffff8800d2170188 ffff8800d210f840 ffff8800d2171b90 0000000000000000 ffff8800d21b9be8 ffffffff817c55bb ffff8800d21b9c50 ffff8800d2171b90 ffff8800d210f840 ffff8800d21b0300 ffff8800d21b9c50 ffff8800d21b9c18 Call Trace: [<ffffffff817c55bb>] tcindex_classify+0x88/0x9b [<ffffffff817a7f7d>] tc_classify_compat+0x3e/0x7b [<ffffffff817a7fdf>] tc_classify+0x25/0x9f [<ffffffff817b0e68>] htb_enqueue+0x55/0x27a [<ffffffff817b6c2e>] dsmark_enqueue+0x165/0x1a4 [<ffffffff81775642>] __dev_queue_xmit+0x35e/0x536 [<ffffffff8177582a>] dev_queue_xmit+0x10/0x12 [<ffffffff818f8ecd>] packet_sendmsg+0xb26/0xb9a [<ffffffff810b1507>] ? __lock_acquire+0x3ae/0xdf3 [<ffffffff8175cf08>] __sock_sendmsg_nosec+0x25/0x27 [<ffffffff8175d916>] sock_aio_write+0xd0/0xe7 [<ffffffff8117d6b8>] do_sync_write+0x59/0x78 [<ffffffff8117d84d>] vfs_write+0xb5/0x10a [<ffffffff8117d96a>] SyS_write+0x49/0x7f [<ffffffff8198e212>] system_call_fastpath+0x16/0x1b This is because we memcpy struct tcindex_filter_result which contains struct tcf_exts, obviously struct list_head can not be simply copied. This is a regression introduced by commit 33be627159913b094bb578 (net_sched: act: use standard struct list_head). It's not very easy to fix it as the code is a mess: if (old_r) memcpy(&cr, r, sizeof(cr)); else { memset(&cr, 0, sizeof(cr)); tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } ... tcf_exts_change(tp, &cr.exts, &e); ... memcpy(r, &cr, sizeof(cr)); the above code should equal to: tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; ... if (old_r) tcf_exts_change(tp, &r->exts, &e); else tcf_exts_change(tp, &cr.exts, &e); ... r->res = cr.res; after this change, since there is no need to copy struct tcf_exts. And it also fixes other places zero'ing struct's contains struct tcf_exts. Fixes: commit 33be627159913b0 (net_sched: act: use standard struct list_head) Reported-by: Kelly Anderson <kelly@xilka.com> Tested-by: Kelly Anderson <kelly@xilka.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-20 03:15:49 +08:00
{
memset(r, 0, sizeof(*r));
r->p = p;
return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
TCA_TCINDEX_POLICE);
net_sched: fix an oops in tcindex filter Kelly reported the following crash: IP: [<ffffffff817a993d>] tcf_action_exec+0x46/0x90 PGD 3009067 PUD 300c067 PMD 11ff30067 PTE 800000011634b060 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC CPU: 1 PID: 639 Comm: dhclient Not tainted 3.15.0-rc4+ #342 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8801169ecd00 ti: ffff8800d21b8000 task.ti: ffff8800d21b8000 RIP: 0010:[<ffffffff817a993d>] [<ffffffff817a993d>] tcf_action_exec+0x46/0x90 RSP: 0018:ffff8800d21b9b90 EFLAGS: 00010283 RAX: 00000000ffffffff RBX: ffff88011634b8e8 RCX: ffff8800cf7133d8 RDX: ffff88011634b900 RSI: ffff8800cf7133e0 RDI: ffff8800d210f840 RBP: ffff8800d21b9bb0 R08: ffffffff8287bf60 R09: 0000000000000001 R10: ffff8800d2b22b24 R11: 0000000000000001 R12: ffff8800d210f840 R13: ffff8800d21b9c50 R14: ffff8800cf7133e0 R15: ffff8800cad433d8 FS: 00007f49723e1840(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88011634b8f0 CR3: 00000000ce469000 CR4: 00000000000006e0 Stack: ffff8800d2170188 ffff8800d210f840 ffff8800d2171b90 0000000000000000 ffff8800d21b9be8 ffffffff817c55bb ffff8800d21b9c50 ffff8800d2171b90 ffff8800d210f840 ffff8800d21b0300 ffff8800d21b9c50 ffff8800d21b9c18 Call Trace: [<ffffffff817c55bb>] tcindex_classify+0x88/0x9b [<ffffffff817a7f7d>] tc_classify_compat+0x3e/0x7b [<ffffffff817a7fdf>] tc_classify+0x25/0x9f [<ffffffff817b0e68>] htb_enqueue+0x55/0x27a [<ffffffff817b6c2e>] dsmark_enqueue+0x165/0x1a4 [<ffffffff81775642>] __dev_queue_xmit+0x35e/0x536 [<ffffffff8177582a>] dev_queue_xmit+0x10/0x12 [<ffffffff818f8ecd>] packet_sendmsg+0xb26/0xb9a [<ffffffff810b1507>] ? __lock_acquire+0x3ae/0xdf3 [<ffffffff8175cf08>] __sock_sendmsg_nosec+0x25/0x27 [<ffffffff8175d916>] sock_aio_write+0xd0/0xe7 [<ffffffff8117d6b8>] do_sync_write+0x59/0x78 [<ffffffff8117d84d>] vfs_write+0xb5/0x10a [<ffffffff8117d96a>] SyS_write+0x49/0x7f [<ffffffff8198e212>] system_call_fastpath+0x16/0x1b This is because we memcpy struct tcindex_filter_result which contains struct tcf_exts, obviously struct list_head can not be simply copied. This is a regression introduced by commit 33be627159913b094bb578 (net_sched: act: use standard struct list_head). It's not very easy to fix it as the code is a mess: if (old_r) memcpy(&cr, r, sizeof(cr)); else { memset(&cr, 0, sizeof(cr)); tcf_exts_init(&cr.exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } ... tcf_exts_change(tp, &cr.exts, &e); ... memcpy(r, &cr, sizeof(cr)); the above code should equal to: tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; ... if (old_r) tcf_exts_change(tp, &r->exts, &e); else tcf_exts_change(tp, &cr.exts, &e); ... r->res = cr.res; after this change, since there is no need to copy struct tcf_exts. And it also fixes other places zero'ing struct's contains struct tcf_exts. Fixes: commit 33be627159913b0 (net_sched: act: use standard struct list_head) Reported-by: Kelly Anderson <kelly@xilka.com> Tested-by: Kelly Anderson <kelly@xilka.com> Cc: David S. Miller <davem@davemloft.net> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-20 03:15:49 +08:00
}
static void tcindex_partial_destroy_work(struct work_struct *work)
{
struct tcindex_data *p = container_of(to_rcu_work(work),
struct tcindex_data,
rwork);
rtnl_lock();
kfree(p->perfect);
kfree(p);
rtnl_unlock();
}
static void tcindex_free_perfect_hash(struct tcindex_data *cp)
{
int i;
for (i = 0; i < cp->hash; i++)
tcf_exts_destroy(&cp->perfect[i].exts);
kfree(cp->perfect);
}
static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
{
int i, err = 0;
cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
GFP_KERNEL);
if (!cp->perfect)
return -ENOMEM;
for (i = 0; i < cp->hash; i++) {
err = tcf_exts_init(&cp->perfect[i].exts, net,
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
cp->perfect[i].p = cp;
}
return 0;
errout:
tcindex_free_perfect_hash(cp);
return err;
}
static int
tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
u32 handle, struct tcindex_data *p,
struct tcindex_filter_result *r, struct nlattr **tb,
struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
{
struct tcindex_filter_result new_filter_result, *old_r = r;
struct tcindex_data *cp = NULL, *oldp;
struct tcindex_filter *f = NULL; /* make gcc behave */
struct tcf_result cr = {};
int err, balloc = 0;
struct tcf_exts e;
err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
if (err < 0)
goto errout;
err = -ENOMEM;
/* tcindex_data attributes must look atomic to classifier/lookup so
* allocate new tcindex data and RCU assign it onto root. Keeping
* perfect hash and hash pointers from old data.
*/
cp = kzalloc(sizeof(*cp), GFP_KERNEL);
if (!cp)
goto errout;
cp->mask = p->mask;
cp->shift = p->shift;
cp->hash = p->hash;
cp->alloc_hash = p->alloc_hash;
cp->fall_through = p->fall_through;
cp->tp = tp;
refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
if (tb[TCA_TCINDEX_HASH])
cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
if (tb[TCA_TCINDEX_MASK])
cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
net_sched: avoid shift-out-of-bounds in tcindex_set_parms() tc_index being 16bit wide, we need to check that TCA_TCINDEX_SHIFT attribute is not silly. UBSAN: shift-out-of-bounds in net/sched/cls_tcindex.c:260:29 shift exponent 255 is too large for 32-bit type 'int' CPU: 0 PID: 8516 Comm: syz-executor228 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 valid_perfect_hash net/sched/cls_tcindex.c:260 [inline] tcindex_set_parms.cold+0x1b/0x215 net/sched/cls_tcindex.c:425 tcindex_change+0x232/0x340 net/sched/cls_tcindex.c:546 tc_new_tfilter+0x13fb/0x21b0 net/sched/cls_api.c:2127 rtnetlink_rcv_msg+0x8b6/0xb80 net/core/rtnetlink.c:5555 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2336 ___sys_sendmsg+0xf3/0x170 net/socket.c:2390 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2423 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20210114185229.1742255-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-15 02:52:29 +08:00
if (tb[TCA_TCINDEX_SHIFT]) {
cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
net_sched: avoid shift-out-of-bounds in tcindex_set_parms() tc_index being 16bit wide, we need to check that TCA_TCINDEX_SHIFT attribute is not silly. UBSAN: shift-out-of-bounds in net/sched/cls_tcindex.c:260:29 shift exponent 255 is too large for 32-bit type 'int' CPU: 0 PID: 8516 Comm: syz-executor228 Not tainted 5.10.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x107/0x163 lib/dump_stack.c:120 ubsan_epilogue+0xb/0x5a lib/ubsan.c:148 __ubsan_handle_shift_out_of_bounds.cold+0xb1/0x181 lib/ubsan.c:395 valid_perfect_hash net/sched/cls_tcindex.c:260 [inline] tcindex_set_parms.cold+0x1b/0x215 net/sched/cls_tcindex.c:425 tcindex_change+0x232/0x340 net/sched/cls_tcindex.c:546 tc_new_tfilter+0x13fb/0x21b0 net/sched/cls_api.c:2127 rtnetlink_rcv_msg+0x8b6/0xb80 net/core/rtnetlink.c:5555 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494 netlink_unicast_kernel net/netlink/af_netlink.c:1304 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1330 netlink_sendmsg+0x907/0xe40 net/netlink/af_netlink.c:1919 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:672 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2336 ___sys_sendmsg+0xf3/0x170 net/socket.c:2390 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2423 do_syscall_64+0x2d/0x70 arch/x86/entry/common.c:46 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet <edumazet@google.com> Reported-by: syzbot <syzkaller@googlegroups.com> Link: https://lore.kernel.org/r/20210114185229.1742255-1-eric.dumazet@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-01-15 02:52:29 +08:00
if (cp->shift > 16) {
err = -EINVAL;
goto errout;
}
}
if (!cp->hash) {
/* Hash not specified, use perfect hash if the upper limit
* of the hashing index is below the threshold.
*/
if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
cp->hash = (cp->mask >> cp->shift) + 1;
else
cp->hash = DEFAULT_HASH_SIZE;
}
if (p->perfect) {
net_sched: fix another crash in cls_tcindex This patch fixes the following crash: [ 166.670795] BUG: unable to handle kernel NULL pointer dereference at (null) [ 166.674230] IP: [<ffffffff814b739f>] __list_del_entry+0x5c/0x98 [ 166.674230] PGD d0ea5067 PUD ce7fc067 PMD 0 [ 166.674230] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 166.674230] CPU: 1 PID: 775 Comm: tc Not tainted 3.17.0-rc6+ #642 [ 166.674230] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 166.674230] task: ffff8800d03c4d20 ti: ffff8800cae7c000 task.ti: ffff8800cae7c000 [ 166.674230] RIP: 0010:[<ffffffff814b739f>] [<ffffffff814b739f>] __list_del_entry+0x5c/0x98 [ 166.674230] RSP: 0018:ffff8800cae7f7d0 EFLAGS: 00010207 [ 166.674230] RAX: 0000000000000000 RBX: ffff8800cba8d700 RCX: ffff8800cba8d700 [ 166.674230] RDX: 0000000000000000 RSI: dead000000200200 RDI: ffff8800cba8d700 [ 166.674230] RBP: ffff8800cae7f7d0 R08: 0000000000000001 R09: 0000000000000001 [ 166.674230] R10: 0000000000000000 R11: 000000000000859a R12: ffffffffffffffe8 [ 166.674230] R13: ffff8800cba8c5b8 R14: 0000000000000001 R15: ffff8800cba8d700 [ 166.674230] FS: 00007fdb5f04a740(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 [ 166.674230] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 166.674230] CR2: 0000000000000000 CR3: 00000000cf929000 CR4: 00000000000006e0 [ 166.674230] Stack: [ 166.674230] ffff8800cae7f7e8 ffffffff814b73e8 ffff8800cba8d6e8 ffff8800cae7f828 [ 166.674230] ffffffff817caeec 0000000000000046 ffff8800cba8c5b0 ffff8800cba8c5b8 [ 166.674230] 0000000000000000 0000000000000001 ffff8800cf8e33e8 ffff8800cae7f848 [ 166.674230] Call Trace: [ 166.674230] [<ffffffff814b73e8>] list_del+0xd/0x2b [ 166.674230] [<ffffffff817caeec>] tcf_action_destroy+0x4c/0x71 [ 166.674230] [<ffffffff817ca0ce>] tcf_exts_destroy+0x20/0x2d [ 166.674230] [<ffffffff817ec2b5>] tcindex_delete+0x196/0x1b7 struct list_head can not be simply copied and we should always init it. Cc: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-01 07:07:23 +08:00
int i;
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout;
cp->alloc_hash = cp->hash;
for (i = 0; i < min(cp->hash, p->hash); i++)
cp->perfect[i].res = p->perfect[i].res;
balloc = 1;
}
cp->h = p->h;
err = tcindex_filter_result_init(&new_filter_result, cp, net);
if (err < 0)
goto errout_alloc;
if (old_r)
cr = r->res;
err = -EBUSY;
/* Hash already allocated, make sure that we still meet the
* requirements for the allocated hash.
*/
if (cp->perfect) {
if (!valid_perfect_hash(cp) ||
cp->hash > cp->alloc_hash)
goto errout_alloc;
} else if (cp->h && cp->hash != cp->alloc_hash) {
goto errout_alloc;
}
err = -EINVAL;
if (tb[TCA_TCINDEX_FALL_THROUGH])
cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
if (!cp->perfect && !cp->h)
cp->alloc_hash = cp->hash;
/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
* but then, we'd fail handles that may become valid after some future
* mask change. While this is extremely unlikely to ever matter,
* the check below is safer (and also more backwards-compatible).
*/
if (cp->perfect || valid_perfect_hash(cp))
if (handle >= cp->alloc_hash)
goto errout_alloc;
err = -ENOMEM;
if (!cp->perfect && !cp->h) {
if (valid_perfect_hash(cp)) {
if (tcindex_alloc_perfect_hash(net, cp) < 0)
goto errout_alloc;
balloc = 1;
} else {
struct tcindex_filter __rcu **hash;
hash = kcalloc(cp->hash,
sizeof(struct tcindex_filter *),
GFP_KERNEL);
if (!hash)
goto errout_alloc;
cp->h = hash;
balloc = 2;
}
}
if (cp->perfect)
r = cp->perfect + handle;
else
r = tcindex_lookup(cp, handle) ? : &new_filter_result;
if (r == &new_filter_result) {
f = kzalloc(sizeof(*f), GFP_KERNEL);
if (!f)
goto errout_alloc;
net_sched: fix another crash in cls_tcindex This patch fixes the following crash: [ 166.670795] BUG: unable to handle kernel NULL pointer dereference at (null) [ 166.674230] IP: [<ffffffff814b739f>] __list_del_entry+0x5c/0x98 [ 166.674230] PGD d0ea5067 PUD ce7fc067 PMD 0 [ 166.674230] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 166.674230] CPU: 1 PID: 775 Comm: tc Not tainted 3.17.0-rc6+ #642 [ 166.674230] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 166.674230] task: ffff8800d03c4d20 ti: ffff8800cae7c000 task.ti: ffff8800cae7c000 [ 166.674230] RIP: 0010:[<ffffffff814b739f>] [<ffffffff814b739f>] __list_del_entry+0x5c/0x98 [ 166.674230] RSP: 0018:ffff8800cae7f7d0 EFLAGS: 00010207 [ 166.674230] RAX: 0000000000000000 RBX: ffff8800cba8d700 RCX: ffff8800cba8d700 [ 166.674230] RDX: 0000000000000000 RSI: dead000000200200 RDI: ffff8800cba8d700 [ 166.674230] RBP: ffff8800cae7f7d0 R08: 0000000000000001 R09: 0000000000000001 [ 166.674230] R10: 0000000000000000 R11: 000000000000859a R12: ffffffffffffffe8 [ 166.674230] R13: ffff8800cba8c5b8 R14: 0000000000000001 R15: ffff8800cba8d700 [ 166.674230] FS: 00007fdb5f04a740(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 [ 166.674230] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 166.674230] CR2: 0000000000000000 CR3: 00000000cf929000 CR4: 00000000000006e0 [ 166.674230] Stack: [ 166.674230] ffff8800cae7f7e8 ffffffff814b73e8 ffff8800cba8d6e8 ffff8800cae7f828 [ 166.674230] ffffffff817caeec 0000000000000046 ffff8800cba8c5b0 ffff8800cba8c5b8 [ 166.674230] 0000000000000000 0000000000000001 ffff8800cf8e33e8 ffff8800cae7f848 [ 166.674230] Call Trace: [ 166.674230] [<ffffffff814b73e8>] list_del+0xd/0x2b [ 166.674230] [<ffffffff817caeec>] tcf_action_destroy+0x4c/0x71 [ 166.674230] [<ffffffff817ca0ce>] tcf_exts_destroy+0x20/0x2d [ 166.674230] [<ffffffff817ec2b5>] tcindex_delete+0x196/0x1b7 struct list_head can not be simply copied and we should always init it. Cc: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-10-01 07:07:23 +08:00
f->key = handle;
f->next = NULL;
err = tcindex_filter_result_init(&f->result, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
}
}
if (tb[TCA_TCINDEX_CLASSID]) {
cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
tcf_bind_filter(tp, &cr, base);
}
if (old_r && old_r != r) {
err = tcindex_filter_result_init(old_r, cp, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
}
}
oldp = p;
r->res = cr;
net_sched: fix NULL pointer dereference when delete tcindex filter Li Shuang reported the following crash: [ 71.267724] BUG: unable to handle kernel NULL pointer dereference at 0000000000000004 [ 71.276456] PGD 800000085d9bd067 P4D 800000085d9bd067 PUD 859a0b067 PMD 0 [ 71.284127] Oops: 0000 [#1] SMP PTI [ 71.288015] CPU: 12 PID: 2386 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 [ 71.295686] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 [ 71.304037] RIP: 0010:tcindex_delete+0x72/0x280 [cls_tcindex] [ 71.310446] Code: 00 31 f6 48 87 75 20 48 85 f6 74 11 48 8b 47 18 48 8b 40 08 48 8b 40 50 e8 fb a6 f8 fc 48 85 db 0f 84 dc 00 00 00 48 8b 73 18 <8b> 56 04 48 8d 7e 04 85 d2 0f 84 7b 01 00 [ 71.331517] RSP: 0018:ffffb45207b3f898 EFLAGS: 00010282 [ 71.337345] RAX: ffff8ad3d72d6360 RBX: ffff8acc84393680 RCX: 000000000000002e [ 71.345306] RDX: ffff8ad3d72c8570 RSI: 0000000000000000 RDI: ffff8ad847a45800 [ 71.353277] RBP: ffff8acc84393688 R08: ffff8ad3d72c8400 R09: 0000000000000000 [ 71.361238] R10: ffff8ad3de786e00 R11: 0000000000000000 R12: ffffb45207b3f8c7 [ 71.369199] R13: ffff8ad3d93bd2a0 R14: 000000000000002e R15: ffff8ad3d72c9600 [ 71.377161] FS: 00007f9d3ec3e740(0000) GS:ffff8ad3df980000(0000) knlGS:0000000000000000 [ 71.386188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 71.392597] CR2: 0000000000000004 CR3: 0000000852f06003 CR4: 00000000001606e0 [ 71.400558] Call Trace: [ 71.403299] tcindex_destroy_element+0x25/0x40 [cls_tcindex] [ 71.409611] tcindex_walk+0xbb/0x110 [cls_tcindex] [ 71.414953] tcindex_destroy+0x44/0x90 [cls_tcindex] [ 71.420492] ? tcindex_delete+0x280/0x280 [cls_tcindex] [ 71.426323] tcf_proto_destroy+0x16/0x40 [ 71.430696] tcf_chain_flush+0x51/0x70 [ 71.434876] tcf_block_put_ext.part.30+0x8f/0x1b0 [ 71.440122] tcf_block_put+0x4d/0x70 [ 71.444108] cbq_destroy+0x4d/0xd0 [sch_cbq] [ 71.448869] qdisc_destroy+0x62/0x130 [ 71.452951] dsmark_destroy+0x2a/0x70 [sch_dsmark] [ 71.458300] qdisc_destroy+0x62/0x130 [ 71.462373] qdisc_graft+0x3ba/0x470 [ 71.466359] tc_get_qdisc+0x2a6/0x2c0 [ 71.470443] ? cred_has_capability+0x7d/0x130 [ 71.475307] rtnetlink_rcv_msg+0x263/0x2d0 [ 71.479875] ? rtnl_calcit.isra.30+0x110/0x110 [ 71.484832] netlink_rcv_skb+0x4d/0x130 [ 71.489109] netlink_unicast+0x1a3/0x250 [ 71.493482] netlink_sendmsg+0x2ae/0x3a0 [ 71.497859] sock_sendmsg+0x36/0x40 [ 71.501748] ___sys_sendmsg+0x26f/0x2d0 [ 71.506029] ? handle_pte_fault+0x586/0xdf0 [ 71.510694] ? __handle_mm_fault+0x389/0x500 [ 71.515457] ? __sys_sendmsg+0x5e/0xa0 [ 71.519636] __sys_sendmsg+0x5e/0xa0 [ 71.523626] do_syscall_64+0x5b/0x180 [ 71.527711] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 71.533345] RIP: 0033:0x7f9d3e257f10 [ 71.537331] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 [ 71.558401] RSP: 002b:00007fff6f893398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 71.566848] RAX: ffffffffffffffda RBX: 000000005b71274d RCX: 00007f9d3e257f10 [ 71.574810] RDX: 0000000000000000 RSI: 00007fff6f8933e0 RDI: 0000000000000003 [ 71.582770] RBP: 00007fff6f8933e0 R08: 000000000000ffff R09: 0000000000000003 [ 71.590729] R10: 00007fff6f892e20 R11: 0000000000000246 R12: 0000000000000000 [ 71.598689] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 [ 71.606651] Modules linked in: sch_cbq cls_tcindex sch_dsmark xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_coni [ 71.685425] libahci i2c_algo_bit i2c_core i40e libata dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [ 71.697075] CR2: 0000000000000004 [ 71.700792] ---[ end trace f604eb1acacd978b ]--- Reproducer: tc qdisc add dev lo handle 1:0 root dsmark indices 64 set_tc_index tc filter add dev lo parent 1:0 protocol ip prio 1 tcindex mask 0xfc shift 2 tc qdisc add dev lo parent 1:0 handle 2:0 cbq bandwidth 10Mbit cell 8 avpkt 1000 mpu 64 tc class add dev lo parent 2:0 classid 2:1 cbq bandwidth 10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated allot 1514 weight 1 maxburst 10 tc filter add dev lo parent 2:0 protocol ip prio 1 handle 0x2e tcindex classid 2:1 pass_on tc qdisc add dev lo parent 2:1 pfifo limit 5 tc qdisc del dev lo root This is because in tcindex_set_parms, when there is no old_r, we set new exts to cr.exts. And we didn't set it to filter when r == &new_filter_result. Then in tcindex_delete() -> tcf_exts_get_net(), we will get NULL pointer dereference as we didn't init exts. Fix it by moving tcf_exts_change() after "if (old_r && old_r != r)" check. Then we don't need "cr" as there is no errout after that. Fixes: bf63ac73b3e13 ("net_sched: fix an oops in tcindex filter") Reported-by: Li Shuang <shuali@redhat.com> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-13 18:44:03 +08:00
tcf_exts_change(&r->exts, &e);
rcu_assign_pointer(tp->root, cp);
if (r == &new_filter_result) {
struct tcindex_filter *nfp;
struct tcindex_filter __rcu **fp;
net_sched: Fix missing res info when create new tc_index filter Li Shuang reported the following warn: [ 733.484610] WARNING: CPU: 6 PID: 21123 at net/sched/sch_cbq.c:1418 cbq_destroy_class+0x5d/0x70 [sch_cbq] [ 733.495190] Modules linked in: sch_cbq cls_tcindex sch_dsmark rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat l [ 733.574155] syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm igb ixgbe ahci libahci i2c_algo_bit libata i40e i2c_core dca mdio megaraid_sas dm_mirror dm_region_hash dm_log dm_mod [ 733.592500] CPU: 6 PID: 21123 Comm: tc Not tainted 4.18.0-rc8.latest+ #131 [ 733.600169] Hardware name: Dell Inc. PowerEdge R730/0WCJNT, BIOS 2.1.5 04/11/2016 [ 733.608518] RIP: 0010:cbq_destroy_class+0x5d/0x70 [sch_cbq] [ 733.614734] Code: e7 d9 d2 48 8b 7b 48 e8 61 05 da d2 48 8d bb f8 00 00 00 e8 75 ae d5 d2 48 39 eb 74 0a 48 89 df 5b 5d e9 16 6c 94 d2 5b 5d c3 <0f> 0b eb b6 0f 1f 44 00 00 66 2e 0f 1f 84 [ 733.635798] RSP: 0018:ffffbfbb066bb9d8 EFLAGS: 00010202 [ 733.641627] RAX: 0000000000000001 RBX: ffff9cdd17392800 RCX: 000000008010000f [ 733.649588] RDX: ffff9cdd1df547e0 RSI: ffff9cdd17392800 RDI: ffff9cdd0f84c800 [ 733.657547] RBP: ffff9cdd0f84c800 R08: 0000000000000001 R09: 0000000000000000 [ 733.665508] R10: ffff9cdd0f84d000 R11: 0000000000000001 R12: 0000000000000001 [ 733.673469] R13: 0000000000000000 R14: 0000000000000001 R15: ffff9cdd17392200 [ 733.681430] FS: 00007f911890a740(0000) GS:ffff9cdd1f8c0000(0000) knlGS:0000000000000000 [ 733.690456] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 733.696864] CR2: 0000000000b5544c CR3: 0000000859374002 CR4: 00000000001606e0 [ 733.704826] Call Trace: [ 733.707554] cbq_destroy+0xa1/0xd0 [sch_cbq] [ 733.712318] qdisc_destroy+0x62/0x130 [ 733.716401] dsmark_destroy+0x2a/0x70 [sch_dsmark] [ 733.721745] qdisc_destroy+0x62/0x130 [ 733.725829] qdisc_graft+0x3ba/0x470 [ 733.729817] tc_get_qdisc+0x2a6/0x2c0 [ 733.733901] ? cred_has_capability+0x7d/0x130 [ 733.738761] rtnetlink_rcv_msg+0x263/0x2d0 [ 733.743330] ? rtnl_calcit.isra.30+0x110/0x110 [ 733.748287] netlink_rcv_skb+0x4d/0x130 [ 733.752576] netlink_unicast+0x1a3/0x250 [ 733.756949] netlink_sendmsg+0x2ae/0x3a0 [ 733.761324] sock_sendmsg+0x36/0x40 [ 733.765213] ___sys_sendmsg+0x26f/0x2d0 [ 733.769493] ? handle_pte_fault+0x586/0xdf0 [ 733.774158] ? __handle_mm_fault+0x389/0x500 [ 733.778919] ? __sys_sendmsg+0x5e/0xa0 [ 733.783099] __sys_sendmsg+0x5e/0xa0 [ 733.787087] do_syscall_64+0x5b/0x180 [ 733.791171] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 733.796805] RIP: 0033:0x7f9117f23f10 [ 733.800791] Code: c3 48 8b 05 82 6f 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d 8d d0 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 [ 733.821873] RSP: 002b:00007ffe96818398 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 733.830319] RAX: ffffffffffffffda RBX: 000000005b71244c RCX: 00007f9117f23f10 [ 733.838280] RDX: 0000000000000000 RSI: 00007ffe968183e0 RDI: 0000000000000003 [ 733.846241] RBP: 00007ffe968183e0 R08: 000000000000ffff R09: 0000000000000003 [ 733.854202] R10: 00007ffe96817e20 R11: 0000000000000246 R12: 0000000000000000 [ 733.862161] R13: 0000000000662ee0 R14: 0000000000000000 R15: 0000000000000000 [ 733.870121] ---[ end trace 28edd4aad712ddca ]--- This is because we didn't update f->result.res when create new filter. Then in tcindex_delete() -> tcf_unbind_filter(), we will failed to find out the res and unbind filter, which will trigger the WARN_ON() in cbq_destroy_class(). Fix it by updating f->result.res when create new filter. Fixes: 6e0565697a106 ("net_sched: fix another crash in cls_tcindex") Reported-by: Li Shuang <shuali@redhat.com> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-13 18:44:04 +08:00
f->result.res = r->res;
tcf_exts_change(&f->result.exts, &r->exts);
net_sched: fix a null pointer dereference in tcindex_set_parms() This patch fixes the following crash: [ 42.199159] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 [ 42.200027] IP: [<ffffffff817e3fc4>] tcindex_set_parms+0x45c/0x526 [ 42.200027] PGD d2319067 PUD d4ffe067 PMD 0 [ 42.200027] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 42.200027] CPU: 0 PID: 541 Comm: tc Not tainted 3.17.0-rc4+ #603 [ 42.200027] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 42.200027] task: ffff8800d22d2670 ti: ffff8800ce790000 task.ti: ffff8800ce790000 [ 42.200027] RIP: 0010:[<ffffffff817e3fc4>] [<ffffffff817e3fc4>] tcindex_set_parms+0x45c/0x526 [ 42.200027] RSP: 0018:ffff8800ce793898 EFLAGS: 00010202 [ 42.200027] RAX: 0000000000000001 RBX: ffff8800d1786498 RCX: 0000000000000000 [ 42.200027] RDX: ffffffff82114ec8 RSI: ffffffff82114ec8 RDI: ffffffff82114ec8 [ 42.200027] RBP: ffff8800ce793958 R08: 00000000000080d0 R09: 0000000000000001 [ 42.200027] R10: ffff8800ce7939a0 R11: 0000000000000246 R12: ffff8800d017d238 [ 42.200027] R13: 0000000000000018 R14: ffff8800d017c6a0 R15: ffff8800d1786620 [ 42.200027] FS: 00007f4e24539740(0000) GS:ffff88011a600000(0000) knlGS:0000000000000000 [ 42.200027] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 42.200027] CR2: 0000000000000018 CR3: 00000000cff38000 CR4: 00000000000006f0 [ 42.200027] Stack: [ 42.200027] ffff8800ce0949f0 0000000000000000 0000000200000003 ffff880000000000 [ 42.200027] ffff8800ce7938b8 ffff8800ce7938b8 0000000600000007 0000000000000000 [ 42.200027] ffff8800ce7938d8 ffff8800ce7938d8 0000000600000007 ffff8800ce0949f0 [ 42.200027] Call Trace: [ 42.200027] [<ffffffff817e4169>] tcindex_change+0xdb/0xee [ 42.200027] [<ffffffff817c16ca>] tc_ctl_tfilter+0x44d/0x63f [ 42.200027] [<ffffffff8179d161>] rtnetlink_rcv_msg+0x181/0x194 [ 42.200027] [<ffffffff8179cf9d>] ? rtnl_lock+0x17/0x19 [ 42.200027] [<ffffffff8179cfe0>] ? __rtnl_unlock+0x17/0x17 [ 42.200027] [<ffffffff817ee296>] netlink_rcv_skb+0x49/0x8b [ 43.462494] [<ffffffff8179cfc2>] rtnetlink_rcv+0x23/0x2a [ 43.462494] [<ffffffff817ec8df>] netlink_unicast+0xc7/0x148 [ 43.462494] [<ffffffff817ed413>] netlink_sendmsg+0x5cb/0x63d [ 43.462494] [<ffffffff810ad781>] ? mark_lock+0x2e/0x224 [ 43.462494] [<ffffffff817757b8>] __sock_sendmsg_nosec+0x25/0x27 [ 43.462494] [<ffffffff81778165>] sock_sendmsg+0x57/0x71 [ 43.462494] [<ffffffff81152bbd>] ? might_fault+0x57/0xa4 [ 43.462494] [<ffffffff81152c06>] ? might_fault+0xa0/0xa4 [ 43.462494] [<ffffffff81152bbd>] ? might_fault+0x57/0xa4 [ 43.462494] [<ffffffff817838fd>] ? verify_iovec+0x69/0xb7 [ 43.462494] [<ffffffff817784f8>] ___sys_sendmsg+0x21d/0x2bb [ 43.462494] [<ffffffff81009db3>] ? native_sched_clock+0x35/0x37 [ 43.462494] [<ffffffff8109ab53>] ? sched_clock_local+0x12/0x72 [ 43.462494] [<ffffffff810ad781>] ? mark_lock+0x2e/0x224 [ 43.462494] [<ffffffff8109ada4>] ? sched_clock_cpu+0xa0/0xb9 [ 43.462494] [<ffffffff810aee37>] ? __lock_acquire+0x5fe/0xde4 [ 43.462494] [<ffffffff8119f570>] ? rcu_read_lock_held+0x36/0x38 [ 43.462494] [<ffffffff8119f75a>] ? __fcheck_files.isra.7+0x4b/0x57 [ 43.462494] [<ffffffff8119fbf2>] ? __fget_light+0x30/0x54 [ 43.462494] [<ffffffff81779012>] __sys_sendmsg+0x42/0x60 [ 43.462494] [<ffffffff81779042>] SyS_sendmsg+0x12/0x1c [ 43.462494] [<ffffffff819d24d2>] system_call_fastpath+0x16/0x1b 'p->h' could be NULL while 'cp->h' is always update to date. Fixes: commit 331b72922c5f58d48fd ("net: sched: RCU cls_tcindex") Cc: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-By: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-16 07:43:43 +08:00
fp = cp->h + (handle % cp->hash);
for (nfp = rtnl_dereference(*fp);
nfp;
fp = &nfp->next, nfp = rtnl_dereference(*fp))
; /* nothing */
rcu_assign_pointer(*fp, f);
} else {
tcf_exts_destroy(&new_filter_result.exts);
}
if (oldp)
tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
return 0;
errout_alloc:
if (balloc == 1)
tcindex_free_perfect_hash(cp);
else if (balloc == 2)
kfree(cp->h);
tcf_exts_destroy(&new_filter_result.exts);
errout:
kfree(cp);
tcf_exts_destroy(&e);
return err;
}
static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = *arg;
int err;
pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
"p %p,r %p,*arg %p\n",
tp, handle, tca, arg, opt, p, r, *arg);
if (!opt)
return 0;
netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-04-26 20:07:28 +08:00
err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
tcindex_policy, NULL);
if (err < 0)
return err;
return tcindex_set_parms(net, tp, base, handle, p, r, tb,
tca[TCA_RATE], ovr, extack);
}
static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
int i;
pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
if (p->perfect) {
for (i = 0; i < p->hash; i++) {
if (!p->perfect[i].res.class)
continue;
if (walker->count >= walker->skip) {
if (walker->fn(tp, p->perfect + i, walker) < 0) {
walker->stop = 1;
return;
}
}
walker->count++;
}
}
if (!p->h)
return;
for (i = 0; i < p->hash; i++) {
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
if (walker->count >= walker->skip) {
if (walker->fn(tp, &f->result, walker) < 0) {
walker->stop = 1;
return;
}
}
walker->count++;
}
}
}
static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
int i;
pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
if (p->perfect) {
for (i = 0; i < p->hash; i++) {
struct tcindex_filter_result *r = p->perfect + i;
/* tcf_queue_work() does not guarantee the ordering we
* want, so we have to take this refcnt temporarily to
* ensure 'p' is freed after all tcindex_filter_result
* here. Imperfect hash does not need this, because it
* uses linked lists rather than an array.
*/
tcindex_data_get(p);
tcf_unbind_filter(tp, &r->res);
if (tcf_exts_get_net(&r->exts))
tcf_queue_work(&r->rwork,
tcindex_destroy_rexts_work);
else
__tcindex_destroy_rexts(r);
}
}
for (i = 0; p->h && i < p->hash; i++) {
struct tcindex_filter *f, *next;
bool last;
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
}
}
tcf_queue_work(&p->rwork, tcindex_destroy_work);
}
static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = fh;
struct nlattr *nest;
pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
tp, fh, skb, t, p, r);
pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
if (!fh) {
t->tcm_handle = ~0; /* whatever ... */
if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
goto nla_put_failure;
nla_nest_end(skb, nest);
} else {
if (p->perfect) {
t->tcm_handle = r - p->perfect;
} else {
struct tcindex_filter *f;
struct tcindex_filter __rcu **fp;
int i;
t->tcm_handle = 0;
for (i = 0; !t->tcm_handle && i < p->hash; i++) {
fp = &p->h[i];
for (f = rtnl_dereference(*fp);
!t->tcm_handle && f;
fp = &f->next, f = rtnl_dereference(*fp)) {
if (&f->result == r)
t->tcm_handle = f->key;
}
}
}
pr_debug("handle = %d\n", t->tcm_handle);
if (r->res.class &&
nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
goto nla_put_failure;
if (tcf_exts_dump(skb, &r->exts) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &r->exts) < 0)
goto nla_put_failure;
}
return skb->len;
nla_put_failure:
nla_nest_cancel(skb, nest);
return -1;
}
static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
void *q, unsigned long base)
{
struct tcindex_filter_result *r = fh;
if (r && r->res.classid == classid) {
if (cl)
__tcf_bind_filter(q, &r->res, base);
else
__tcf_unbind_filter(q, &r->res);
}
}
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
.kind = "tcindex",
.classify = tcindex_classify,
.init = tcindex_init,
.destroy = tcindex_destroy,
.get = tcindex_get,
.change = tcindex_change,
.delete = tcindex_delete,
.walk = tcindex_walk,
.dump = tcindex_dump,
.bind_class = tcindex_bind_class,
.owner = THIS_MODULE,
};
static int __init init_tcindex(void)
{
return register_tcf_proto_ops(&cls_tcindex_ops);
}
static void __exit exit_tcindex(void)
{
unregister_tcf_proto_ops(&cls_tcindex_ops);
}
module_init(init_tcindex)
module_exit(exit_tcindex)
MODULE_LICENSE("GPL");