linux/net/sched/ematch.c

533 lines
14 KiB
C
Raw Normal View History

/*
* net/sched/ematch.c Extended Match API
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Thomas Graf <tgraf@suug.ch>
*
* ==========================================================================
*
* An extended match (ematch) is a small classification tool not worth
* writing a full classifier for. Ematches can be interconnected to form
* a logic expression and get attached to classifiers to extend their
* functionatlity.
*
* The userspace part transforms the logic expressions into an array
* consisting of multiple sequences of interconnected ematches separated
* by markers. Precedence is implemented by a special ematch kind
* referencing a sequence beyond the marker of the current sequence
* causing the current position in the sequence to be pushed onto a stack
* to allow the current position to be overwritten by the position referenced
* in the special ematch. Matching continues in the new sequence until a
* marker is reached causing the position to be restored from the stack.
*
* Example:
* A AND (B1 OR B2) AND C AND D
*
* ------->-PUSH-------
* -->-- / -->-- \ -->--
* / \ / / \ \ / \
* +-------+-------+-------+-------+-------+--------+
* | A AND | B AND | C AND | D END | B1 OR | B2 END |
* +-------+-------+-------+-------+-------+--------+
* \ /
* --------<-POP---------
*
* where B is a virtual ematch referencing to sequence starting with B1.
*
* ==========================================================================
*
* How to write an ematch in 60 seconds
* ------------------------------------
*
* 1) Provide a matcher function:
* static int my_match(struct sk_buff *skb, struct tcf_ematch *m,
* struct tcf_pkt_info *info)
* {
* struct mydata *d = (struct mydata *) m->data;
*
* if (...matching goes here...)
* return 1;
* else
* return 0;
* }
*
* 2) Fill out a struct tcf_ematch_ops:
* static struct tcf_ematch_ops my_ops = {
* .kind = unique id,
* .datalen = sizeof(struct mydata),
* .match = my_match,
* .owner = THIS_MODULE,
* };
*
* 3) Register/Unregister your ematch:
* static int __init init_my_ematch(void)
* {
* return tcf_em_register(&my_ops);
* }
*
* static void __exit exit_my_ematch(void)
* {
* tcf_em_unregister(&my_ops);
* }
*
* module_init(init_my_ematch);
* module_exit(exit_my_ematch);
*
* 4) By now you should have two more seconds left, barely enough to
* open up a beer to watch the compilation going.
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <net/pkt_cls.h>
static LIST_HEAD(ematch_ops);
static DEFINE_RWLOCK(ematch_mod_lock);
static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
{
struct tcf_ematch_ops *e = NULL;
read_lock(&ematch_mod_lock);
list_for_each_entry(e, &ematch_ops, link) {
if (kind == e->kind) {
if (!try_module_get(e->owner))
e = NULL;
read_unlock(&ematch_mod_lock);
return e;
}
}
read_unlock(&ematch_mod_lock);
return NULL;
}
/**
* tcf_em_register - register an extended match
*
* @ops: ematch operations lookup table
*
* This function must be called by ematches to announce their presence.
* The given @ops must have kind set to a unique identifier and the
* callback match() must be implemented. All other callbacks are optional
* and a fallback implementation is used instead.
*
* Returns -EEXISTS if an ematch of the same kind has already registered.
*/
int tcf_em_register(struct tcf_ematch_ops *ops)
{
int err = -EEXIST;
struct tcf_ematch_ops *e;
if (ops->match == NULL)
return -EINVAL;
write_lock(&ematch_mod_lock);
list_for_each_entry(e, &ematch_ops, link)
if (ops->kind == e->kind)
goto errout;
list_add_tail(&ops->link, &ematch_ops);
err = 0;
errout:
write_unlock(&ematch_mod_lock);
return err;
}
EXPORT_SYMBOL(tcf_em_register);
/**
* tcf_em_unregister - unregster and extended match
*
* @ops: ematch operations lookup table
*
* This function must be called by ematches to announce their disappearance
* for examples when the module gets unloaded. The @ops parameter must be
* the same as the one used for registration.
*
* Returns -ENOENT if no matching ematch was found.
*/
void tcf_em_unregister(struct tcf_ematch_ops *ops)
{
write_lock(&ematch_mod_lock);
list_del(&ops->link);
write_unlock(&ematch_mod_lock);
}
EXPORT_SYMBOL(tcf_em_unregister);
static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
int index)
{
return &tree->matches[index];
}
static int tcf_em_validate(struct tcf_proto *tp,
struct tcf_ematch_tree_hdr *tree_hdr,
struct tcf_ematch *em, struct nlattr *nla, int idx)
{
int err = -EINVAL;
struct tcf_ematch_hdr *em_hdr = nla_data(nla);
int data_len = nla_len(nla) - sizeof(*em_hdr);
void *data = (void *) em_hdr + sizeof(*em_hdr);
if (!TCF_EM_REL_VALID(em_hdr->flags))
goto errout;
if (em_hdr->kind == TCF_EM_CONTAINER) {
/* Special ematch called "container", carries an index
* referencing an external ematch sequence. */
u32 ref;
if (data_len < sizeof(ref))
goto errout;
ref = *(u32 *) data;
if (ref >= tree_hdr->nmatches)
goto errout;
/* We do not allow backward jumps to avoid loops and jumps
* to our own position are of course illegal. */
if (ref <= idx)
goto errout;
em->data = ref;
} else {
/* Note: This lookup will increase the module refcnt
* of the ematch module referenced. In case of a failure,
* a destroy function is called by the underlying layer
* which automatically releases the reference again, therefore
* the module MUST not be given back under any circumstances
* here. Be aware, the destroy function assumes that the
* module is held if the ops field is non zero. */
em->ops = tcf_em_lookup(em_hdr->kind);
if (em->ops == NULL) {
err = -ENOENT;
#ifdef CONFIG_MODULES
__rtnl_unlock();
request_module("ematch-kind-%u", em_hdr->kind);
rtnl_lock();
em->ops = tcf_em_lookup(em_hdr->kind);
if (em->ops) {
/* We dropped the RTNL mutex in order to
* perform the module load. Tell the caller
* to replay the request. */
module_put(em->ops->owner);
err = -EAGAIN;
}
#endif
goto errout;
}
/* ematch module provides expected length of data, so we
* can do a basic sanity check. */
if (em->ops->datalen && data_len < em->ops->datalen)
goto errout;
if (em->ops->change) {
err = em->ops->change(tp, data, data_len, em);
if (err < 0)
goto errout;
} else if (data_len > 0) {
/* ematch module doesn't provide an own change
* procedure and expects us to allocate and copy
* the ematch data.
*
* TCF_EM_SIMPLE may be specified stating that the
* data only consists of a u32 integer and the module
* does not expected a memory reference but rather
* the value carried. */
if (em_hdr->flags & TCF_EM_SIMPLE) {
if (data_len < sizeof(u32))
goto errout;
em->data = *(u32 *) data;
} else {
void *v = kmemdup(data, data_len, GFP_KERNEL);
if (v == NULL) {
err = -ENOBUFS;
goto errout;
}
em->data = (unsigned long) v;
}
}
}
em->matchid = em_hdr->matchid;
em->flags = em_hdr->flags;
em->datalen = data_len;
err = 0;
errout:
return err;
}
static const struct nla_policy em_policy[TCA_EMATCH_TREE_MAX + 1] = {
[TCA_EMATCH_TREE_HDR] = { .len = sizeof(struct tcf_ematch_tree_hdr) },
[TCA_EMATCH_TREE_LIST] = { .type = NLA_NESTED },
};
/**
* tcf_em_tree_validate - validate ematch config TLV and build ematch tree
*
* @tp: classifier kind handle
* @nla: ematch tree configuration TLV
* @tree: destination ematch tree variable to store the resulting
* ematch tree.
*
* This function validates the given configuration TLV @nla and builds an
* ematch tree in @tree. The resulting tree must later be copied into
* the private classifier data using tcf_em_tree_change(). You MUST NOT
* provide the ematch tree variable of the private classifier data directly,
* the changes would not be locked properly.
*
* Returns a negative error code if the configuration TLV contains errors.
*/
int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
struct tcf_ematch_tree *tree)
{
int idx, list_len, matches_len, err;
struct nlattr *tb[TCA_EMATCH_TREE_MAX + 1];
struct nlattr *rt_match, *rt_hdr, *rt_list;
struct tcf_ematch_tree_hdr *tree_hdr;
struct tcf_ematch *em;
[PKT_SCHED] ematch: oops from uninitialized variable (resend) Setting up a meta match causes a kernel OOPS because of uninitialized elements in tree. [ 37.322381] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [ 37.322381] IP: [<ffffffff883fc717>] :em_meta:em_meta_destroy+0x17/0x80 [ 37.322381] Call Trace: [ 37.322381] [<ffffffff803ec83d>] tcf_em_tree_destroy+0x2d/0xa0 [ 37.322381] [<ffffffff803ecc8c>] tcf_em_tree_validate+0x2dc/0x4a0 [ 37.322381] [<ffffffff803f06d2>] nla_parse+0x92/0xe0 [ 37.322381] [<ffffffff883f9672>] :cls_basic:basic_change+0x202/0x3c0 [ 37.322381] [<ffffffff802a3917>] kmem_cache_alloc+0x67/0xa0 [ 37.322381] [<ffffffff803ea221>] tc_ctl_tfilter+0x3b1/0x580 [ 37.322381] [<ffffffff803dffd0>] rtnetlink_rcv_msg+0x0/0x260 [ 37.322381] [<ffffffff803ee944>] netlink_rcv_skb+0x74/0xa0 [ 37.322381] [<ffffffff803dffc8>] rtnetlink_rcv+0x18/0x20 [ 37.322381] [<ffffffff803ee6c3>] netlink_unicast+0x263/0x290 [ 37.322381] [<ffffffff803cf276>] __alloc_skb+0x96/0x160 [ 37.322381] [<ffffffff803ef014>] netlink_sendmsg+0x274/0x340 [ 37.322381] [<ffffffff803c7c3b>] sock_sendmsg+0x12b/0x140 [ 37.322381] [<ffffffff8024de90>] autoremove_wake_function+0x0/0x30 [ 37.322381] [<ffffffff8024de90>] autoremove_wake_function+0x0/0x30 [ 37.322381] [<ffffffff803c7c3b>] sock_sendmsg+0x12b/0x140 [ 37.322381] [<ffffffff80288611>] zone_statistics+0xb1/0xc0 [ 37.322381] [<ffffffff803c7e5e>] sys_sendmsg+0x20e/0x360 [ 37.322381] [<ffffffff803c7411>] sockfd_lookup_light+0x41/0x80 [ 37.322381] [<ffffffff8028d04b>] handle_mm_fault+0x3eb/0x7f0 [ 37.322381] [<ffffffff8020c2fb>] system_call_after_swapgs+0x7b/0x80 Signed-off-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-02-09 19:47:19 +08:00
memset(tree, 0, sizeof(*tree));
if (!nla)
return 0;
err = nla_parse_nested(tb, TCA_EMATCH_TREE_MAX, nla, em_policy);
if (err < 0)
goto errout;
err = -EINVAL;
rt_hdr = tb[TCA_EMATCH_TREE_HDR];
rt_list = tb[TCA_EMATCH_TREE_LIST];
if (rt_hdr == NULL || rt_list == NULL)
goto errout;
tree_hdr = nla_data(rt_hdr);
memcpy(&tree->hdr, tree_hdr, sizeof(*tree_hdr));
rt_match = nla_data(rt_list);
list_len = nla_len(rt_list);
matches_len = tree_hdr->nmatches * sizeof(*em);
tree->matches = kzalloc(matches_len, GFP_KERNEL);
if (tree->matches == NULL)
goto errout;
/* We do not use nla_parse_nested here because the maximum
* number of attributes is unknown. This saves us the allocation
* for a tb buffer which would serve no purpose at all.
*
* The array of rt attributes is parsed in the order as they are
* provided, their type must be incremental from 1 to n. Even
* if it does not serve any real purpose, a failure of sticking
* to this policy will result in parsing failure. */
for (idx = 0; nla_ok(rt_match, list_len); idx++) {
err = -EINVAL;
if (rt_match->nla_type != (idx + 1))
goto errout_abort;
if (idx >= tree_hdr->nmatches)
goto errout_abort;
if (nla_len(rt_match) < sizeof(struct tcf_ematch_hdr))
goto errout_abort;
em = tcf_em_get_match(tree, idx);
err = tcf_em_validate(tp, tree_hdr, em, rt_match, idx);
if (err < 0)
goto errout_abort;
rt_match = nla_next(rt_match, &list_len);
}
/* Check if the number of matches provided by userspace actually
* complies with the array of matches. The number was used for
* the validation of references and a mismatch could lead to
* undefined references during the matching process. */
if (idx != tree_hdr->nmatches) {
err = -EINVAL;
goto errout_abort;
}
err = 0;
errout:
return err;
errout_abort:
tcf_em_tree_destroy(tp, tree);
return err;
}
EXPORT_SYMBOL(tcf_em_tree_validate);
/**
* tcf_em_tree_destroy - destroy an ematch tree
*
* @tp: classifier kind handle
* @tree: ematch tree to be deleted
*
* This functions destroys an ematch tree previously created by
* tcf_em_tree_validate()/tcf_em_tree_change(). You must ensure that
* the ematch tree is not in use before calling this function.
*/
void tcf_em_tree_destroy(struct tcf_proto *tp, struct tcf_ematch_tree *tree)
{
int i;
if (tree->matches == NULL)
return;
for (i = 0; i < tree->hdr.nmatches; i++) {
struct tcf_ematch *em = tcf_em_get_match(tree, i);
if (em->ops) {
if (em->ops->destroy)
em->ops->destroy(tp, em);
else if (!tcf_em_is_simple(em))
kfree((void *) em->data);
module_put(em->ops->owner);
}
}
tree->hdr.nmatches = 0;
kfree(tree->matches);
tree->matches = NULL;
}
EXPORT_SYMBOL(tcf_em_tree_destroy);
/**
* tcf_em_tree_dump - dump ematch tree into a rtnl message
*
* @skb: skb holding the rtnl message
* @t: ematch tree to be dumped
* @tlv: TLV type to be used to encapsulate the tree
*
* This function dumps a ematch tree into a rtnl message. It is valid to
* call this function while the ematch tree is in use.
*
* Returns -1 if the skb tailroom is insufficient.
*/
int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
{
int i;
u8 *tail;
struct nlattr *top_start;
struct nlattr *list_start;
top_start = nla_nest_start(skb, tlv);
if (top_start == NULL)
goto nla_put_failure;
NLA_PUT(skb, TCA_EMATCH_TREE_HDR, sizeof(tree->hdr), &tree->hdr);
list_start = nla_nest_start(skb, TCA_EMATCH_TREE_LIST);
if (list_start == NULL)
goto nla_put_failure;
tail = skb_tail_pointer(skb);
for (i = 0; i < tree->hdr.nmatches; i++) {
struct nlattr *match_start = (struct nlattr *)tail;
struct tcf_ematch *em = tcf_em_get_match(tree, i);
struct tcf_ematch_hdr em_hdr = {
.kind = em->ops ? em->ops->kind : TCF_EM_CONTAINER,
.matchid = em->matchid,
.flags = em->flags
};
NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
if (em->ops && em->ops->dump) {
if (em->ops->dump(skb, em) < 0)
goto nla_put_failure;
} else if (tcf_em_is_container(em) || tcf_em_is_simple(em)) {
u32 u = em->data;
nla_put_nohdr(skb, sizeof(u), &u);
} else if (em->datalen > 0)
nla_put_nohdr(skb, em->datalen, (void *) em->data);
tail = skb_tail_pointer(skb);
match_start->nla_len = tail - (u8 *)match_start;
}
nla_nest_end(skb, list_start);
nla_nest_end(skb, top_start);
return 0;
nla_put_failure:
return -1;
}
EXPORT_SYMBOL(tcf_em_tree_dump);
static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
struct tcf_pkt_info *info)
{
int r = em->ops->match(skb, em, info);
return tcf_em_is_inverted(em) ? !r : r;
}
/* Do not use this function directly, use tcf_em_tree_match instead */
int __tcf_em_tree_match(struct sk_buff *skb, struct tcf_ematch_tree *tree,
struct tcf_pkt_info *info)
{
int stackp = 0, match_idx = 0, res = 0;
struct tcf_ematch *cur_match;
int stack[CONFIG_NET_EMATCH_STACK];
proceed:
while (match_idx < tree->hdr.nmatches) {
cur_match = tcf_em_get_match(tree, match_idx);
if (tcf_em_is_container(cur_match)) {
if (unlikely(stackp >= CONFIG_NET_EMATCH_STACK))
goto stack_overflow;
stack[stackp++] = match_idx;
match_idx = cur_match->data;
goto proceed;
}
res = tcf_em_match(skb, cur_match, info);
if (tcf_em_early_end(cur_match, res))
break;
match_idx++;
}
pop_stack:
if (stackp > 0) {
match_idx = stack[--stackp];
cur_match = tcf_em_get_match(tree, match_idx);
if (tcf_em_early_end(cur_match, res))
goto pop_stack;
else {
match_idx++;
goto proceed;
}
}
return res;
stack_overflow:
if (net_ratelimit())
printk("Local stack overflow, increase NET_EMATCH_STACK\n");
return -1;
}
EXPORT_SYMBOL(__tcf_em_tree_match);