neighbour: Create netdev->neighbour association

Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://patch.msgid.link/20241107160444.2913124-7-gnaaman@drivenets.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Gilad Naaman 2024-11-07 16:04:43 +00:00 committed by Jakub Kicinski
parent a01a67ab2f
commit f7f5273863
5 changed files with 80 additions and 45 deletions

View File

@ -188,4 +188,5 @@ u64 max_pacing_offload_horizon
struct_napi_config* napi_config
unsigned_long gro_flush_timeout
u32 napi_defer_hard_irqs
struct hlist_head neighbours[2]
=================================== =========================== =================== =================== ===================================================================================

View File

@ -52,6 +52,7 @@
#include <net/net_trackers.h>
#include <net/net_debug.h>
#include <net/dropreason-core.h>
#include <net/neighbour_tables.h>
struct netpoll_info;
struct device;
@ -2032,6 +2033,9 @@ enum netdev_reg_state {
* @napi_defer_hard_irqs: If not zero, provides a counter that would
* allow to avoid NIC hard IRQ, on busy queues.
*
* @neighbours: List heads pointing to this device's neighbours'
* dev_list, one per address-family.
*
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
*/
@ -2440,6 +2444,9 @@ struct net_device {
*/
struct net_shaper_hierarchy *net_shaper_hierarchy;
#endif
struct hlist_head neighbours[NEIGH_NR_TABLES];
u8 priv[] ____cacheline_aligned
__counted_by(priv_len);
} ____cacheline_aligned;

View File

@ -29,6 +29,7 @@
#include <linux/sysctl.h>
#include <linux/workqueue.h>
#include <net/rtnetlink.h>
#include <net/neighbour_tables.h>
/*
* NUD stands for "neighbor unreachability detection"
@ -136,6 +137,7 @@ struct neigh_statistics {
struct neighbour {
struct hlist_node hash;
struct hlist_node dev_list;
struct neigh_table *tbl;
struct neigh_parms *parms;
unsigned long confirmed;
@ -236,13 +238,6 @@ struct neigh_table {
struct pneigh_entry **phash_buckets;
};
enum {
NEIGH_ARP_TABLE = 0,
NEIGH_ND_TABLE = 1,
NEIGH_NR_TABLES,
NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};
static inline int neigh_parms_family(struct neigh_parms *p)
{
return p->tbl->family;

View File

@ -0,0 +1,12 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _NET_NEIGHBOUR_TABLES_H
#define _NET_NEIGHBOUR_TABLES_H
enum {
NEIGH_ARP_TABLE = 0,
NEIGH_ND_TABLE = 1,
NEIGH_NR_TABLES,
NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
};
#endif

View File

@ -60,6 +60,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
static const struct seq_operations neigh_stat_seq_ops;
#endif
static struct hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
{
int i;
switch (family) {
default:
DEBUG_NET_WARN_ON_ONCE(1);
fallthrough; /* to avoid panic by null-ptr-deref */
case AF_INET:
i = NEIGH_ARP_TABLE;
break;
case AF_INET6:
i = NEIGH_ND_TABLE;
break;
}
return &dev->neighbours[i];
}
/*
Neighbour hash table buckets are protected with rwlock tbl->lock.
@ -211,6 +230,7 @@ bool neigh_remove_one(struct neighbour *n)
write_lock(&n->lock);
if (refcount_read(&n->refcnt) == 1) {
hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
retval = true;
}
@ -351,48 +371,42 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
int i;
struct neigh_hash_table *nht;
struct hlist_head *dev_head;
struct hlist_node *tmp;
struct neighbour *n;
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
dev_head = neigh_get_dev_table(dev, tbl->family);
for (i = 0; i < (1 << nht->hash_shift); i++) {
struct hlist_node *tmp;
struct neighbour *n;
hlist_for_each_entry_safe(n, tmp, dev_head, dev_list) {
if (skip_perm && n->nud_state & NUD_PERMANENT)
continue;
neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i]) {
if (dev && n->dev != dev)
continue;
if (skip_perm && n->nud_state & NUD_PERMANENT)
continue;
hlist_del_rcu(&n->hash);
write_lock(&n->lock);
neigh_del_timer(n);
neigh_mark_dead(n);
if (refcount_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
We must destroy neighbour entry,
but someone still uses it.
The destroy will be delayed until
the last user releases us, but
we must kill timers etc. and move
it to safe state.
*/
__skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
WRITE_ONCE(n->output, neigh_blackhole);
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
else
n->nud_state = NUD_NONE;
neigh_dbg(2, "neigh %p is stray\n", n);
}
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
write_lock(&n->lock);
neigh_del_timer(n);
neigh_mark_dead(n);
if (refcount_read(&n->refcnt) != 1) {
/* The most unpleasant situation.
* We must destroy neighbour entry,
* but someone still uses it.
*
* The destroy will be delayed until
* the last user releases us, but
* we must kill timers etc. and move
* it to safe state.
*/
__skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
WRITE_ONCE(n->output, neigh_blackhole);
if (n->nud_state & NUD_VALID)
n->nud_state = NUD_NOARP;
else
n->nud_state = NUD_NONE;
neigh_dbg(2, "neigh %p is stray\n", n);
}
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
}
}
@ -655,6 +669,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
if (want_ref)
neigh_hold(n);
hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
hlist_add_head_rcu(&n->dev_list,
neigh_get_dev_table(dev, tbl->family));
write_unlock_bh(&tbl->lock);
neigh_dbg(2, "neigh %p is created\n", n);
rc = n;
@ -935,6 +953,7 @@ static void neigh_periodic_work(struct work_struct *work)
!time_in_range_open(jiffies, n->used,
n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
write_unlock(&n->lock);
neigh_cleanup_and_release(n);
@ -3054,6 +3073,7 @@ void __neigh_for_each_release(struct neigh_table *tbl,
release = cb(n);
if (release) {
hlist_del_rcu(&n->hash);
hlist_del_rcu(&n->dev_list);
neigh_mark_dead(n);
}
write_unlock(&n->lock);