linux/net/sched/act_api.c

1107 lines
24 KiB
C
Raw Normal View History

/*
* net/sched/act_api.c Packet action API.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Author: Jamal Hadi Salim
*
*
*/
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/module.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
#include <net/act_api.h>
#include <net/netlink.h>
static void free_tcf(struct rcu_head *head)
{
struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_qstats);
kfree(p);
}
static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *a)
{
struct tcf_common *p = (struct tcf_common *)a;
spin_lock_bh(&hinfo->lock);
hlist_del(&p->tcfc_head);
spin_unlock_bh(&hinfo->lock);
gen_kill_estimator(&p->tcfc_bstats,
&p->tcfc_rate_est);
/*
* gen_estimator est_timer() might access p->tcfc_lock
* or bstats, wait a RCU grace period before freeing p
*/
call_rcu(&p->tcfc_rcu, free_tcf);
}
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 05:35:25 +08:00
int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
struct tcf_common *p = (struct tcf_common *)a;
int ret = 0;
if (p) {
if (bind)
p->tcfc_bindcnt--;
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 05:35:25 +08:00
else if (strict && p->tcfc_bindcnt > 0)
return -EPERM;
p->tcfc_refcnt--;
if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) {
if (a->ops->cleanup)
a->ops->cleanup(a, bind);
list_del(&a->list);
tcf_hash_destroy(a->hinfo, a);
ret = ACT_P_DELETED;
}
}
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 05:35:25 +08:00
return ret;
}
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 05:35:25 +08:00
EXPORT_SYMBOL(__tcf_hash_release);
static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
struct netlink_callback *cb)
{
int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
struct nlattr *nest;
spin_lock_bh(&hinfo->lock);
s_i = cb->args[0];
for (i = 0; i < (hinfo->hmask + 1); i++) {
struct hlist_head *head;
struct tcf_common *p;
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_rcu(p, head, tcfc_head) {
index++;
if (index < s_i)
continue;
nest = nla_nest_start(skb, n_i);
if (nest == NULL)
goto nla_put_failure;
err = tcf_action_dump_1(skb, (struct tc_action *)p, 0, 0);
if (err < 0) {
index--;
nlmsg_trim(skb, nest);
goto done;
}
nla_nest_end(skb, nest);
n_i++;
if (n_i >= TCA_ACT_MAX_PRIO)
goto done;
}
}
done:
spin_unlock_bh(&hinfo->lock);
if (n_i)
cb->args[0] += n_i;
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
goto done;
}
static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
const struct tc_action_ops *ops)
{
struct nlattr *nest;
int i = 0, n_i = 0;
int ret = -EINVAL;
nest = nla_nest_start(skb, 0);
if (nest == NULL)
goto nla_put_failure;
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
for (i = 0; i < (hinfo->hmask + 1); i++) {
struct hlist_head *head;
struct hlist_node *n;
struct tcf_common *p;
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_safe(p, n, head, tcfc_head) {
ret = __tcf_hash_release((struct tc_action *)p, false, true);
if (ret == ACT_P_DELETED) {
module_put(p->tcfc_act.ops->owner);
n_i++;
} else if (ret < 0)
goto nla_put_failure;
}
}
if (nla_put_u32(skb, TCA_FCNT, n_i))
goto nla_put_failure;
nla_nest_end(skb, nest);
return n_i;
nla_put_failure:
nla_nest_cancel(skb, nest);
return ret;
}
int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
if (type == RTM_DELACTION) {
return tcf_del_walker(hinfo, skb, ops);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(hinfo, skb, cb);
} else {
WARN(1, "tcf_generic_walker: unknown action %d\n", type);
return -EINVAL;
}
}
EXPORT_SYMBOL(tcf_generic_walker);
static struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo)
{
struct tcf_common *p = NULL;
struct hlist_head *head;
spin_lock_bh(&hinfo->lock);
head = &hinfo->htab[tcf_hash(index, hinfo->hmask)];
hlist_for_each_entry_rcu(p, head, tcfc_head)
if (p->tcfc_index == index)
break;
spin_unlock_bh(&hinfo->lock);
return p;
}
u32 tcf_hash_new_index(struct tc_action_net *tn)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
u32 val = hinfo->index;
do {
if (++val == 0)
val = 1;
} while (tcf_hash_lookup(val, hinfo));
hinfo->index = val;
return val;
}
EXPORT_SYMBOL(tcf_hash_new_index);
int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = tcf_hash_lookup(index, hinfo);
if (p) {
*a = &p->tcfc_act;
return 1;
}
return 0;
}
EXPORT_SYMBOL(tcf_hash_search);
bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a,
int bind)
{
struct tcf_hashinfo *hinfo = tn->hinfo;
struct tcf_common *p = NULL;
if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) {
if (bind)
p->tcfc_bindcnt++;
p->tcfc_refcnt++;
*a = &p->tcfc_act;
return true;
}
return false;
}
EXPORT_SYMBOL(tcf_hash_check);
void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
{
struct tcf_common *pc = (struct tcf_common *)a;
if (est)
gen_kill_estimator(&pc->tcfc_bstats,
&pc->tcfc_rate_est);
call_rcu(&pc->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est,
struct tc_action **a, const struct tc_action_ops *ops,
int bind, bool cpustats)
{
struct tcf_common *p = kzalloc(ops->size, GFP_KERNEL);
struct tcf_hashinfo *hinfo = tn->hinfo;
int err = -ENOMEM;
if (unlikely(!p))
return -ENOMEM;
p->tcfc_refcnt = 1;
if (bind)
p->tcfc_bindcnt = 1;
if (cpustats) {
p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
if (!p->cpu_bstats) {
err1:
kfree(p);
return err;
}
p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
if (!p->cpu_qstats) {
err2:
free_percpu(p->cpu_bstats);
goto err1;
}
}
spin_lock_init(&p->tcfc_lock);
INIT_HLIST_NODE(&p->tcfc_head);
p->tcfc_index = index ? index : tcf_hash_new_index(tn);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
p->tcfc_tm.firstuse = 0;
if (est) {
err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
&p->tcfc_rate_est,
&p->tcfc_lock, NULL, est);
if (err) {
free_percpu(p->cpu_qstats);
goto err2;
}
}
p->tcfc_act.hinfo = hinfo;
p->tcfc_act.ops = ops;
INIT_LIST_HEAD(&p->tcfc_act.list);
*a = &p->tcfc_act;
return 0;
}
EXPORT_SYMBOL(tcf_hash_create);
void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a)
{
struct tcf_common *p = (struct tcf_common *)a;
struct tcf_hashinfo *hinfo = tn->hinfo;
unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
spin_lock_bh(&hinfo->lock);
hlist_add_head(&p->tcfc_head, &hinfo->htab[h]);
spin_unlock_bh(&hinfo->lock);
}
EXPORT_SYMBOL(tcf_hash_insert);
void tcf_hashinfo_destroy(const struct tc_action_ops *ops,
struct tcf_hashinfo *hinfo)
{
int i;
for (i = 0; i < hinfo->hmask + 1; i++) {
struct tcf_common *p;
struct hlist_node *n;
hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfc_head) {
int ret;
ret = __tcf_hash_release((struct tc_action *)p, false, true);
if (ret == ACT_P_DELETED)
module_put(ops->owner);
else if (ret < 0)
return;
}
}
kfree(hinfo->htab);
}
EXPORT_SYMBOL(tcf_hashinfo_destroy);
static LIST_HEAD(act_base);
static DEFINE_RWLOCK(act_mod_lock);
int tcf_register_action(struct tc_action_ops *act,
struct pernet_operations *ops)
{
struct tc_action_ops *a;
int ret;
if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup)
return -EINVAL;
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
return -EEXIST;
}
}
list_add_tail(&act->head, &act_base);
write_unlock(&act_mod_lock);
ret = register_pernet_subsys(ops);
if (ret) {
tcf_unregister_action(act, ops);
return ret;
}
return 0;
}
EXPORT_SYMBOL(tcf_register_action);
int tcf_unregister_action(struct tc_action_ops *act,
struct pernet_operations *ops)
{
struct tc_action_ops *a;
int err = -ENOENT;
unregister_pernet_subsys(ops);
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (a == act) {
list_del(&act->head);
err = 0;
break;
}
}
write_unlock(&act_mod_lock);
return err;
}
EXPORT_SYMBOL(tcf_unregister_action);
/* lookup by name */
static struct tc_action_ops *tc_lookup_action_n(char *kind)
{
struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (strcmp(kind, a->kind) == 0) {
if (try_module_get(a->owner))
res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
return res;
}
/* lookup by nlattr */
static struct tc_action_ops *tc_lookup_action(struct nlattr *kind)
{
struct tc_action_ops *a, *res = NULL;
if (kind) {
read_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
if (nla_strcmp(kind, a->kind) == 0) {
if (try_module_get(a->owner))
res = a;
break;
}
}
read_unlock(&act_mod_lock);
}
return res;
}
int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
struct tcf_result *res)
{
const struct tc_action *a;
int ret = -1;
if (skb->tc_verd & TC_NCLS) {
skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
ret = TC_ACT_OK;
goto exec_done;
}
list_for_each_entry(a, actions, list) {
repeat:
ret = a->ops->act(skb, a, res);
if (ret == TC_ACT_REPEAT)
goto repeat; /* we need a ttl - JHS */
if (ret != TC_ACT_PIPE)
goto exec_done;
}
exec_done:
return ret;
}
EXPORT_SYMBOL(tcf_action_exec);
int tcf_action_destroy(struct list_head *actions, int bind)
{
struct tc_action *a, *tmp;
int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
net: sched: fix refcount imbalance in actions Since commit 55334a5db5cd ("net_sched: act: refuse to remove bound action outside"), we end up with a wrong reference count for a tc action. Test case 1: FOO="1,6 0 0 4294967295," BAR="1,6 0 0 4294967294," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 \ action bpf bytecode "$FOO" tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 1 bind 1 tc actions replace action bpf bytecode "$BAR" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967294' default-action pipe index 1 ref 2 bind 1 tc actions replace action bpf bytecode "$FOO" index 1 tc actions show action bpf action order 0: bpf bytecode '1,6 0 0 4294967295' default-action pipe index 1 ref 3 bind 1 Test case 2: FOO="1,6 0 0 4294967295," tc filter add dev foo parent 1: bpf bytecode "$FOO" flowid 1:1 action ok tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 1 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 2 bind 1 tc actions add action drop index 1 RTNETLINK answers: File exists [...] tc actions show action gact action order 0: gact action pass random type none pass val 0 index 1 ref 3 bind 1 What happens is that in tcf_hash_check(), we check tcf_common for a given index and increase tcfc_refcnt and conditionally tcfc_bindcnt when we've found an existing action. Now there are the following cases: 1) We do a late binding of an action. In that case, we leave the tcfc_refcnt/tcfc_bindcnt increased and are done with the ->init() handler. This is correctly handeled. 2) We replace the given action, or we try to add one without replacing and find out that the action at a specific index already exists (thus, we go out with error in that case). In case of 2), we have to undo the reference count increase from tcf_hash_check() in the tcf_hash_check() function. Currently, we fail to do so because of the 'tcfc_bindcnt > 0' check which bails out early with an -EPERM error. Now, while commit 55334a5db5cd prevents 'tc actions del action ...' on an already classifier-bound action to drop the reference count (which could then become negative, wrap around etc), this restriction only accounts for invocations outside a specific action's ->init() handler. One possible solution would be to add a flag thus we possibly trigger the -EPERM ony in situations where it is indeed relevant. After the patch, above test cases have correct reference count again. Fixes: 55334a5db5cd ("net_sched: act: refuse to remove bound action outside") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: Cong Wang <cwang@twopensource.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2015-07-30 05:35:25 +08:00
ret = __tcf_hash_release(a, bind, true);
if (ret == ACT_P_DELETED)
module_put(a->ops->owner);
else if (ret < 0)
return ret;
}
return ret;
}
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
return a->ops->dump(skb, a, bind, ref);
}
int
tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
int err = -EINVAL;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
goto nla_put_failure;
if (tcf_action_copy_stats(skb, a, 0))
goto nla_put_failure;
nest = nla_nest_start(skb, TCA_OPTIONS);
if (nest == NULL)
goto nla_put_failure;
err = tcf_action_dump_old(skb, a, bind, ref);
if (err > 0) {
nla_nest_end(skb, nest);
return err;
}
nla_put_failure:
nlmsg_trim(skb, b);
return -1;
}
EXPORT_SYMBOL(tcf_action_dump_1);
int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
int bind, int ref)
{
struct tc_action *a;
int err = -EINVAL;
struct nlattr *nest;
list_for_each_entry(a, actions, list) {
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
goto nla_put_failure;
err = tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
nla_nest_end(skb, nest);
}
return 0;
nla_put_failure:
err = -EINVAL;
errout:
nla_nest_cancel(skb, nest);
return err;
}
struct tc_action *tcf_action_init_1(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
int bind)
{
struct tc_action *a;
struct tc_action_ops *a_o;
char act_name[IFNAMSIZ];
struct nlattr *tb[TCA_ACT_MAX + 1];
struct nlattr *kind;
int err;
if (name == NULL) {
err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
if (kind == NULL)
goto err_out;
if (nla_strlcpy(act_name, kind, IFNAMSIZ) >= IFNAMSIZ)
goto err_out;
} else {
err = -EINVAL;
if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ)
goto err_out;
}
a_o = tc_lookup_action_n(act_name);
if (a_o == NULL) {
#ifdef CONFIG_MODULES
rtnl_unlock();
request_module("act_%s", act_name);
rtnl_lock();
a_o = tc_lookup_action_n(act_name);
/* We dropped the RTNL semaphore in order to
* perform the module load. So, even if we
* succeeded in loading the module we have to
* tell the caller to replay the request. We
* indicate this using -EAGAIN.
*/
if (a_o != NULL) {
err = -EAGAIN;
goto err_mod;
}
#endif
err = -ENOENT;
goto err_out;
}
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind);
else
err = a_o->init(net, nla, est, &a, ovr, bind);
if (err < 0)
goto err_mod;
/* module count goes up only when brand new policy is created
* if it exists and is only bound to in a_o->init() then
* ACT_P_CREATED is not returned (a zero is).
*/
if (err != ACT_P_CREATED)
module_put(a_o->owner);
return a;
err_mod:
module_put(a_o->owner);
err_out:
return ERR_PTR(err);
}
int tcf_action_init(struct net *net, struct nlattr *nla,
struct nlattr *est, char *name, int ovr,
int bind, struct list_head *actions)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
int err;
int i;
err = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (err < 0)
return err;
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_init_1(net, tb[i], est, name, ovr, bind);
if (IS_ERR(act)) {
err = PTR_ERR(act);
goto err;
}
act->order = i;
list_add_tail(&act->list, actions);
}
return 0;
err:
tcf_action_destroy(actions, bind);
return err;
}
int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
int compat_mode)
{
int err = 0;
struct gnet_dump d;
struct tcf_common *p = (struct tcf_common *)a;
if (p == NULL)
goto errout;
/* compat_mode being true specifies a call that is supposed
* to add additional backward compatibility statistic TLVs.
*/
if (compat_mode) {
if (a->type == TCA_OLD_COMPAT)
err = gnet_stats_start_copy_compat(skb, 0,
TCA_STATS,
TCA_XSTATS,
&p->tcfc_lock, &d,
TCA_PAD);
else
return 0;
} else
err = gnet_stats_start_copy(skb, TCA_ACT_STATS,
&p->tcfc_lock, &d, TCA_ACT_PAD);
if (err < 0)
goto errout;
if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfc_qstats,
p->tcfc_qstats.qlen) < 0)
goto errout;
if (gnet_stats_finish_copy(&d) < 0)
goto errout;
return 0;
errout:
return -1;
}
static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
u32 portid, u32 seq, u16 flags, int event, int bind,
int ref)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags);
if (!nlh)
goto out_nlmsg_trim;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
goto out_nlmsg_trim;
if (tcf_action_dump(skb, actions, bind, ref) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
return skb->len;
out_nlmsg_trim:
nlmsg_trim(skb, b);
return -1;
}
static int
act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
struct list_head *actions, int event)
{
struct sk_buff *skb;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
return rtnl_unicast(skb, net, portid);
}
static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
struct nlattr *tb[TCA_ACT_MAX + 1];
const struct tc_action_ops *ops;
struct tc_action *a;
int index;
int err;
err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
if (tb[TCA_ACT_INDEX] == NULL ||
nla_len(tb[TCA_ACT_INDEX]) < sizeof(index))
goto err_out;
index = nla_get_u32(tb[TCA_ACT_INDEX]);
err = -EINVAL;
ops = tc_lookup_action(tb[TCA_ACT_KIND]);
if (!ops) /* could happen in batch of actions */
goto err_out;
err = -ENOENT;
if (ops->lookup(net, &a, index) == 0)
goto err_mod;
module_put(ops->owner);
return a;
err_mod:
module_put(ops->owner);
err_out:
return ERR_PTR(err);
}
static void cleanup_a(struct list_head *actions)
{
struct tc_action *a, *tmp;
list_for_each_entry_safe(a, tmp, actions, list) {
list_del(&a->list);
kfree(a);
}
}
static int tca_action_flush(struct net *net, struct nlattr *nla,
struct nlmsghdr *n, u32 portid)
{
struct sk_buff *skb;
unsigned char *b;
struct nlmsghdr *nlh;
struct tcamsg *t;
struct netlink_callback dcb;
struct nlattr *nest;
struct nlattr *tb[TCA_ACT_MAX + 1];
const struct tc_action_ops *ops;
struct nlattr *kind;
int err = -ENOMEM;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
pr_debug("tca_action_flush: failed skb alloc\n");
return err;
}
b = skb_tail_pointer(skb);
err = nla_parse_nested(tb, TCA_ACT_MAX, nla, NULL);
if (err < 0)
goto err_out;
err = -EINVAL;
kind = tb[TCA_ACT_KIND];
ops = tc_lookup_action(kind);
if (!ops) /*some idjot trying to flush unknown action */
goto err_out;
nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION,
sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
goto out_module_put;
err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops);
if (err < 0)
goto out_module_put;
if (err == 0)
goto noflush_out;
nla_nest_end(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
nlh->nlmsg_flags |= NLM_F_ROOT;
module_put(ops->owner);
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
return 0;
return err;
out_module_put:
module_put(ops->owner);
err_out:
noflush_out:
kfree_skb(skb);
return err;
}
static int
tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
u32 portid)
{
int ret;
struct sk_buff *skb;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 1) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
/* now do the delete */
ret = tcf_action_destroy(actions, 0);
if (ret < 0) {
kfree_skb(skb);
return ret;
}
ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (ret > 0)
return 0;
return ret;
}
static int
tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
u32 portid, int event)
{
int i, ret;
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
LIST_HEAD(actions);
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
if (ret < 0)
return ret;
if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
if (tb[1] != NULL)
return tca_action_flush(net, tb[1], n, portid);
else
return -EINVAL;
}
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
act = tcf_action_get_1(net, tb[i], n, portid);
if (IS_ERR(act)) {
ret = PTR_ERR(act);
goto err;
}
act->order = i;
list_add_tail(&act->list, &actions);
}
if (event == RTM_GETACTION)
ret = act_get_notify(net, portid, n, &actions, event);
else { /* delete */
ret = tcf_del_notify(net, n, &actions, portid);
if (ret)
goto err;
return ret;
}
err:
cleanup_a(&actions);
return ret;
}
static int
tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
u32 portid)
{
struct sk_buff *skb;
int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
n->nlmsg_flags & NLM_F_ECHO);
if (err > 0)
err = 0;
return err;
}
static int
tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
u32 portid, int ovr)
{
int ret = 0;
LIST_HEAD(actions);
ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions);
if (ret)
goto done;
/* dump then free all the actions after update; inserted policy
* stays intact
*/
ret = tcf_add_notify(net, n, &actions, portid);
cleanup_a(&actions);
done:
return ret;
}
static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n)
{
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_ACT_MAX + 1];
u32 portid = skb ? NETLINK_CB(skb).portid : 0;
int ret = 0, ovr = 0;
if ((n->nlmsg_type != RTM_GETACTION) &&
!netlink_capable(skb, CAP_NET_ADMIN))
return -EPERM;
ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
if (ret < 0)
return ret;
if (tca[TCA_ACT_TAB] == NULL) {
pr_notice("tc_ctl_action: received NO action attribs\n");
return -EINVAL;
}
/* n->nlmsg_flags & NLM_F_CREATE */
switch (n->nlmsg_type) {
case RTM_NEWACTION:
/* we are going to assume all other flags
* imply create only if it doesn't exist
* Note that CREATE | EXCL implies that
* but since we want avoid ambiguity (eg when flags
* is zero) then just set this
*/
if (n->nlmsg_flags & NLM_F_REPLACE)
ovr = 1;
replay:
ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr);
if (ret == -EAGAIN)
goto replay;
break;
case RTM_DELACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
portid, RTM_DELACTION);
break;
case RTM_GETACTION:
ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
portid, RTM_GETACTION);
break;
default:
BUG();
}
return ret;
}
static struct nlattr *
find_dump_kind(const struct nlmsghdr *n)
{
struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct nlattr *nla[TCAA_MAX + 1];
struct nlattr *kind;
if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, NULL) < 0)
return NULL;
tb1 = nla[TCA_ACT_TAB];
if (tb1 == NULL)
return NULL;
if (nla_parse(tb, TCA_ACT_MAX_PRIO, nla_data(tb1),
NLMSG_ALIGN(nla_len(tb1)), NULL) < 0)
return NULL;
if (tb[1] == NULL)
return NULL;
if (nla_parse(tb2, TCA_ACT_MAX, nla_data(tb[1]),
nla_len(tb[1]), NULL) < 0)
return NULL;
kind = tb2[TCA_ACT_KIND];
return kind;
}
static int
tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct nlmsghdr *nlh;
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
struct tc_action_ops *a_o;
int ret = 0;
struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
struct nlattr *kind = find_dump_kind(cb->nlh);
if (kind == NULL) {
pr_info("tc_dump_action: action bad kind\n");
return 0;
}
a_o = tc_lookup_action(kind);
if (a_o == NULL)
return 0;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
cb->nlh->nlmsg_type, sizeof(*t), 0);
if (!nlh)
goto out_module_put;
t = nlmsg_data(nlh);
t->tca_family = AF_UNSPEC;
t->tca__pad1 = 0;
t->tca__pad2 = 0;
nest = nla_nest_start(skb, TCA_ACT_TAB);
if (nest == NULL)
goto out_module_put;
ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o);
if (ret < 0)
goto out_module_put;
if (ret > 0) {
nla_nest_end(skb, nest);
ret = skb->len;
} else
nlmsg_trim(skb, b);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
if (NETLINK_CB(cb->skb).portid && ret)
nlh->nlmsg_flags |= NLM_F_MULTI;
module_put(a_o->owner);
return skb->len;
out_module_put:
module_put(a_o->owner);
nlmsg_trim(skb, b);
return skb->len;
}
static int __init tc_action_init(void)
{
rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
NULL);
return 0;
}
subsys_initcall(tc_action_init);