linux/net/core/fib_notifier.c
Kirill Tkhai f0b07bb151 net: Introduce net_rwsem to protect net_namespace_list
rtnl_lock() is used everywhere, and contention is very high.
When someone wants to iterate over alive net namespaces,
he/she has no a possibility to do that without exclusive lock.
But the exclusive rtnl_lock() in such places is overkill,
and it just increases the contention. Yes, there is already
for_each_net_rcu() in kernel, but it requires rcu_read_lock(),
and this can't be sleepable. Also, sometimes it may be need
really prevent net_namespace_list growth, so for_each_net_rcu()
is not fit there.

This patch introduces new rw_semaphore, which will be used
instead of rtnl_mutex to protect net_namespace_list. It is
sleepable and allows not-exclusive iterations over net
namespaces list. It allows to stop using rtnl_lock()
in several places (what is made in next patches) and makes
less the time, we keep rtnl_mutex. Here we just add new lock,
while the explanation of we can remove rtnl_lock() there are
in next patches.

Fine grained locks generally are better, then one big lock,
so let's do that with net_namespace_list, while the situation
allows that.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-29 13:47:53 -04:00

184 lines
3.9 KiB
C

#include <linux/rtnetlink.h>
#include <linux/notifier.h>
#include <linux/rcupdate.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
#include <net/net_namespace.h>
#include <net/fib_notifier.h>
static ATOMIC_NOTIFIER_HEAD(fib_chain);
int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->net = net;
return nb->notifier_call(nb, event_type, info);
}
EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
info->net = net;
return atomic_notifier_call_chain(&fib_chain, event_type, info);
}
EXPORT_SYMBOL(call_fib_notifiers);
static unsigned int fib_seq_sum(void)
{
struct fib_notifier_ops *ops;
unsigned int fib_seq = 0;
struct net *net;
rtnl_lock();
down_read(&net_rwsem);
for_each_net(net) {
rcu_read_lock();
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
if (!try_module_get(ops->owner))
continue;
fib_seq += ops->fib_seq_read(net);
module_put(ops->owner);
}
rcu_read_unlock();
}
up_read(&net_rwsem);
rtnl_unlock();
return fib_seq;
}
static int fib_net_dump(struct net *net, struct notifier_block *nb)
{
struct fib_notifier_ops *ops;
list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) {
int err;
if (!try_module_get(ops->owner))
continue;
err = ops->fib_dump(net, nb);
module_put(ops->owner);
if (err)
return err;
}
return 0;
}
static bool fib_dump_is_consistent(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb),
unsigned int fib_seq)
{
atomic_notifier_chain_register(&fib_chain, nb);
if (fib_seq == fib_seq_sum())
return true;
atomic_notifier_chain_unregister(&fib_chain, nb);
if (cb)
cb(nb);
return false;
}
#define FIB_DUMP_MAX_RETRIES 5
int register_fib_notifier(struct notifier_block *nb,
void (*cb)(struct notifier_block *nb))
{
int retries = 0;
int err;
do {
unsigned int fib_seq = fib_seq_sum();
struct net *net;
rcu_read_lock();
for_each_net_rcu(net) {
err = fib_net_dump(net, nb);
if (err)
goto err_fib_net_dump;
}
rcu_read_unlock();
if (fib_dump_is_consistent(nb, cb, fib_seq))
return 0;
} while (++retries < FIB_DUMP_MAX_RETRIES);
return -EBUSY;
err_fib_net_dump:
rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(register_fib_notifier);
int unregister_fib_notifier(struct notifier_block *nb)
{
return atomic_notifier_chain_unregister(&fib_chain, nb);
}
EXPORT_SYMBOL(unregister_fib_notifier);
static int __fib_notifier_ops_register(struct fib_notifier_ops *ops,
struct net *net)
{
struct fib_notifier_ops *o;
list_for_each_entry(o, &net->fib_notifier_ops, list)
if (ops->family == o->family)
return -EEXIST;
list_add_tail_rcu(&ops->list, &net->fib_notifier_ops);
return 0;
}
struct fib_notifier_ops *
fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net)
{
struct fib_notifier_ops *ops;
int err;
ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
if (!ops)
return ERR_PTR(-ENOMEM);
err = __fib_notifier_ops_register(ops, net);
if (err)
goto err_register;
return ops;
err_register:
kfree(ops);
return ERR_PTR(err);
}
EXPORT_SYMBOL(fib_notifier_ops_register);
void fib_notifier_ops_unregister(struct fib_notifier_ops *ops)
{
list_del_rcu(&ops->list);
kfree_rcu(ops, rcu);
}
EXPORT_SYMBOL(fib_notifier_ops_unregister);
static int __net_init fib_notifier_net_init(struct net *net)
{
INIT_LIST_HEAD(&net->fib_notifier_ops);
return 0;
}
static void __net_exit fib_notifier_net_exit(struct net *net)
{
WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops));
}
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
.exit = fib_notifier_net_exit,
};
static int __init fib_notifier_init(void)
{
return register_pernet_subsys(&fib_notifier_net_ops);
}
subsys_initcall(fib_notifier_init);