2019-05-27 14:55:01 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
2005-04-17 06:20:36 +08:00
|
|
|
/*
|
|
|
|
* Forwarding database
|
|
|
|
* Linux ethernet bridge
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Lennert Buytenhek <buytenh@gnu.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/kernel.h>
|
|
|
|
#include <linux/init.h>
|
2008-05-13 03:21:05 +08:00
|
|
|
#include <linux/rculist.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include <linux/spinlock.h>
|
|
|
|
#include <linux/times.h>
|
|
|
|
#include <linux/netdevice.h>
|
|
|
|
#include <linux/etherdevice.h>
|
|
|
|
#include <linux/jhash.h>
|
2007-03-22 04:42:33 +08:00
|
|
|
#include <linux/random.h>
|
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 16:04:11 +08:00
|
|
|
#include <linux/slab.h>
|
2011-07-27 07:09:06 +08:00
|
|
|
#include <linux/atomic.h>
|
2007-03-22 04:42:33 +08:00
|
|
|
#include <asm/unaligned.h>
|
2013-02-13 20:00:16 +08:00
|
|
|
#include <linux/if_vlan.h>
|
2015-06-15 02:33:11 +08:00
|
|
|
#include <net/switchdev.h>
|
2017-08-30 04:16:57 +08:00
|
|
|
#include <trace/events/bridge.h>
|
2005-04-17 06:20:36 +08:00
|
|
|
#include "br_private.h"
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
static const struct rhashtable_params br_fdb_rht_params = {
|
|
|
|
.head_offset = offsetof(struct net_bridge_fdb_entry, rhnode),
|
|
|
|
.key_offset = offsetof(struct net_bridge_fdb_entry, key),
|
|
|
|
.key_len = sizeof(struct net_bridge_fdb_key),
|
|
|
|
.automatic_shrinking = true,
|
|
|
|
};
|
|
|
|
|
2006-12-07 12:33:20 +08:00
|
|
|
static struct kmem_cache *br_fdb_cache __read_mostly;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2007-04-07 17:57:07 +08:00
|
|
|
int __init br_fdb_init(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
|
|
|
|
sizeof(struct net_bridge_fdb_entry),
|
|
|
|
0,
|
2007-07-20 09:11:58 +08:00
|
|
|
SLAB_HWCACHE_ALIGN, NULL);
|
2007-04-07 17:57:07 +08:00
|
|
|
if (!br_fdb_cache)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
return 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2007-12-06 13:35:23 +08:00
|
|
|
void br_fdb_fini(void)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
kmem_cache_destroy(br_fdb_cache);
|
|
|
|
}
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
int br_fdb_hash_init(struct net_bridge *br)
|
|
|
|
{
|
|
|
|
return rhashtable_init(&br->fdb_hash_tbl, &br_fdb_rht_params);
|
|
|
|
}
|
|
|
|
|
|
|
|
void br_fdb_hash_fini(struct net_bridge *br)
|
|
|
|
{
|
|
|
|
rhashtable_destroy(&br->fdb_hash_tbl);
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/* if topology_changing then use forward_delay (default 15 sec)
|
|
|
|
* otherwise keep longer (default 5 minutes)
|
|
|
|
*/
|
2007-03-22 04:42:33 +08:00
|
|
|
static inline unsigned long hold_time(const struct net_bridge *br)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
return br->topology_change ? br->forward_delay : br->ageing_time;
|
|
|
|
}
|
|
|
|
|
2007-03-22 04:42:33 +08:00
|
|
|
static inline int has_expired(const struct net_bridge *br,
|
2005-04-17 06:20:36 +08:00
|
|
|
const struct net_bridge_fdb_entry *fdb)
|
|
|
|
{
|
2019-10-29 19:45:54 +08:00
|
|
|
return !test_bit(BR_FDB_STATIC, &fdb->flags) &&
|
2019-10-29 19:45:57 +08:00
|
|
|
!test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags) &&
|
2019-10-29 19:45:54 +08:00
|
|
|
time_before_eq(fdb->updated + hold_time(br), jiffies);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2009-06-05 13:35:28 +08:00
|
|
|
static void fdb_rcu_free(struct rcu_head *head)
|
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *ent
|
|
|
|
= container_of(head, struct net_bridge_fdb_entry, rcu);
|
|
|
|
kmem_cache_free(br_fdb_cache, ent);
|
|
|
|
}
|
|
|
|
|
2021-10-26 22:27:36 +08:00
|
|
|
static int fdb_to_nud(const struct net_bridge *br,
|
|
|
|
const struct net_bridge_fdb_entry *fdb)
|
|
|
|
{
|
|
|
|
if (test_bit(BR_FDB_LOCAL, &fdb->flags))
|
|
|
|
return NUD_PERMANENT;
|
|
|
|
else if (test_bit(BR_FDB_STATIC, &fdb->flags))
|
|
|
|
return NUD_NOARP;
|
|
|
|
else if (has_expired(br, fdb))
|
|
|
|
return NUD_STALE;
|
|
|
|
else
|
|
|
|
return NUD_REACHABLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
|
|
|
|
const struct net_bridge_fdb_entry *fdb,
|
|
|
|
u32 portid, u32 seq, int type, unsigned int flags)
|
|
|
|
{
|
|
|
|
const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
|
|
|
|
unsigned long now = jiffies;
|
|
|
|
struct nda_cacheinfo ci;
|
|
|
|
struct nlmsghdr *nlh;
|
|
|
|
struct ndmsg *ndm;
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
u32 ext_flags = 0;
|
2021-10-26 22:27:36 +08:00
|
|
|
|
|
|
|
nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
|
|
|
|
if (nlh == NULL)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
ndm = nlmsg_data(nlh);
|
|
|
|
ndm->ndm_family = AF_BRIDGE;
|
|
|
|
ndm->ndm_pad1 = 0;
|
|
|
|
ndm->ndm_pad2 = 0;
|
|
|
|
ndm->ndm_flags = 0;
|
|
|
|
ndm->ndm_type = 0;
|
|
|
|
ndm->ndm_ifindex = dst ? dst->dev->ifindex : br->dev->ifindex;
|
|
|
|
ndm->ndm_state = fdb_to_nud(br, fdb);
|
|
|
|
|
|
|
|
if (test_bit(BR_FDB_OFFLOADED, &fdb->flags))
|
|
|
|
ndm->ndm_flags |= NTF_OFFLOADED;
|
|
|
|
if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
|
|
|
|
ndm->ndm_flags |= NTF_EXT_LEARNED;
|
|
|
|
if (test_bit(BR_FDB_STICKY, &fdb->flags))
|
|
|
|
ndm->ndm_flags |= NTF_STICKY;
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
if (test_bit(BR_FDB_LOCKED, &fdb->flags))
|
|
|
|
ext_flags |= NTF_EXT_LOCKED;
|
2021-10-26 22:27:36 +08:00
|
|
|
|
|
|
|
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->key.addr))
|
|
|
|
goto nla_put_failure;
|
|
|
|
if (nla_put_u32(skb, NDA_MASTER, br->dev->ifindex))
|
|
|
|
goto nla_put_failure;
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
if (nla_put_u32(skb, NDA_FLAGS_EXT, ext_flags))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
2021-10-26 22:27:36 +08:00
|
|
|
ci.ndm_used = jiffies_to_clock_t(now - fdb->used);
|
|
|
|
ci.ndm_confirmed = 0;
|
|
|
|
ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated);
|
|
|
|
ci.ndm_refcnt = 0;
|
|
|
|
if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
if (fdb->key.vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16),
|
|
|
|
&fdb->key.vlan_id))
|
|
|
|
goto nla_put_failure;
|
|
|
|
|
|
|
|
if (test_bit(BR_FDB_NOTIFY, &fdb->flags)) {
|
|
|
|
struct nlattr *nest = nla_nest_start(skb, NDA_FDB_EXT_ATTRS);
|
|
|
|
u8 notify_bits = FDB_NOTIFY_BIT;
|
|
|
|
|
|
|
|
if (!nest)
|
|
|
|
goto nla_put_failure;
|
|
|
|
if (test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
|
|
|
|
notify_bits |= FDB_NOTIFY_INACTIVE_BIT;
|
|
|
|
|
|
|
|
if (nla_put_u8(skb, NFEA_ACTIVITY_NOTIFY, notify_bits)) {
|
|
|
|
nla_nest_cancel(skb, nest);
|
|
|
|
goto nla_put_failure;
|
|
|
|
}
|
|
|
|
|
|
|
|
nla_nest_end(skb, nest);
|
|
|
|
}
|
|
|
|
|
|
|
|
nlmsg_end(skb, nlh);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nla_put_failure:
|
|
|
|
nlmsg_cancel(skb, nlh);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline size_t fdb_nlmsg_size(void)
|
|
|
|
{
|
|
|
|
return NLMSG_ALIGN(sizeof(struct ndmsg))
|
|
|
|
+ nla_total_size(ETH_ALEN) /* NDA_LLADDR */
|
|
|
|
+ nla_total_size(sizeof(u32)) /* NDA_MASTER */
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
+ nla_total_size(sizeof(u32)) /* NDA_FLAGS_EXT */
|
2021-10-26 22:27:36 +08:00
|
|
|
+ nla_total_size(sizeof(u16)) /* NDA_VLAN */
|
|
|
|
+ nla_total_size(sizeof(struct nda_cacheinfo))
|
|
|
|
+ nla_total_size(0) /* NDA_FDB_EXT_ATTRS */
|
|
|
|
+ nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fdb_notify(struct net_bridge *br,
|
|
|
|
const struct net_bridge_fdb_entry *fdb, int type,
|
|
|
|
bool swdev_notify)
|
|
|
|
{
|
|
|
|
struct net *net = dev_net(br->dev);
|
|
|
|
struct sk_buff *skb;
|
|
|
|
int err = -ENOBUFS;
|
|
|
|
|
|
|
|
if (swdev_notify)
|
|
|
|
br_switchdev_fdb_notify(br, fdb, type);
|
|
|
|
|
|
|
|
skb = nlmsg_new(fdb_nlmsg_size(), GFP_ATOMIC);
|
|
|
|
if (skb == NULL)
|
|
|
|
goto errout;
|
|
|
|
|
|
|
|
err = fdb_fill_info(skb, br, fdb, 0, 0, type, 0);
|
|
|
|
if (err < 0) {
|
|
|
|
/* -EMSGSIZE implies BUG in fdb_nlmsg_size() */
|
|
|
|
WARN_ON(err == -EMSGSIZE);
|
|
|
|
kfree_skb(skb);
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
|
|
|
|
return;
|
|
|
|
errout:
|
|
|
|
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
|
|
|
|
}
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
static struct net_bridge_fdb_entry *fdb_find_rcu(struct rhashtable *tbl,
|
2017-02-13 21:59:09 +08:00
|
|
|
const unsigned char *addr,
|
|
|
|
__u16 vid)
|
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_key key;
|
2017-02-13 21:59:09 +08:00
|
|
|
|
2017-02-13 21:59:10 +08:00
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
key.vlan_id = vid;
|
|
|
|
memcpy(key.addr.addr, addr, sizeof(key.addr.addr));
|
2017-02-13 21:59:09 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
return rhashtable_lookup(tbl, &key, br_fdb_rht_params);
|
2017-02-13 21:59:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* requires bridge hash_lock */
|
|
|
|
static struct net_bridge_fdb_entry *br_fdb_find(struct net_bridge *br,
|
|
|
|
const unsigned char *addr,
|
|
|
|
__u16 vid)
|
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
|
|
|
|
2017-03-17 01:32:42 +08:00
|
|
|
lockdep_assert_held_once(&br->hash_lock);
|
2017-02-13 21:59:10 +08:00
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
rcu_read_lock();
|
2017-12-12 22:02:50 +08:00
|
|
|
fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
|
2017-02-13 21:59:09 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return fdb;
|
|
|
|
}
|
|
|
|
|
2018-04-29 15:56:08 +08:00
|
|
|
struct net_device *br_fdb_find_port(const struct net_device *br_dev,
|
|
|
|
const unsigned char *addr,
|
|
|
|
__u16 vid)
|
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *f;
|
|
|
|
struct net_device *dev = NULL;
|
|
|
|
struct net_bridge *br;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
if (!netif_is_bridge_master(br_dev))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
br = netdev_priv(br_dev);
|
2018-06-08 21:11:47 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
f = br_fdb_find_rcu(br, addr, vid);
|
2018-04-29 15:56:08 +08:00
|
|
|
if (f && f->dst)
|
|
|
|
dev = f->dst->dev;
|
2018-06-08 21:11:47 +08:00
|
|
|
rcu_read_unlock();
|
2018-04-29 15:56:08 +08:00
|
|
|
|
|
|
|
return dev;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(br_fdb_find_port);
|
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
struct net_bridge_fdb_entry *br_fdb_find_rcu(struct net_bridge *br,
|
|
|
|
const unsigned char *addr,
|
|
|
|
__u16 vid)
|
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
return fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
|
2017-02-13 21:59:09 +08:00
|
|
|
}
|
|
|
|
|
2014-05-16 21:59:19 +08:00
|
|
|
/* When a static FDB entry is added, the mac address from the entry is
|
|
|
|
* added to the bridge private HW address list and all required ports
|
|
|
|
* are then updated with the new information.
|
|
|
|
* Called under RTNL.
|
|
|
|
*/
|
2014-11-28 21:34:12 +08:00
|
|
|
static void fdb_add_hw_addr(struct net_bridge *br, const unsigned char *addr)
|
2014-05-16 21:59:19 +08:00
|
|
|
{
|
|
|
|
int err;
|
2014-06-18 16:07:16 +08:00
|
|
|
struct net_bridge_port *p;
|
2014-05-16 21:59:19 +08:00
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
list_for_each_entry(p, &br->port_list, list) {
|
|
|
|
if (!br_promisc_port(p)) {
|
|
|
|
err = dev_uc_add(p->dev, addr);
|
|
|
|
if (err)
|
|
|
|
goto undo;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
undo:
|
2014-06-18 16:07:16 +08:00
|
|
|
list_for_each_entry_continue_reverse(p, &br->port_list, list) {
|
|
|
|
if (!br_promisc_port(p))
|
|
|
|
dev_uc_del(p->dev, addr);
|
2014-05-16 21:59:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* When a static FDB entry is deleted, the HW address from that entry is
|
|
|
|
* also removed from the bridge private HW address list and updates all
|
|
|
|
* the ports with needed information.
|
|
|
|
* Called under RTNL.
|
|
|
|
*/
|
2014-11-28 21:34:12 +08:00
|
|
|
static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
|
2014-05-16 21:59:19 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_port *p;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
list_for_each_entry(p, &br->port_list, list) {
|
|
|
|
if (!br_promisc_port(p))
|
|
|
|
dev_uc_del(p->dev, addr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-03 20:43:53 +08:00
|
|
|
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f,
|
|
|
|
bool swdev_notify)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2017-08-30 04:16:57 +08:00
|
|
|
trace_fdb_delete(br, f);
|
|
|
|
|
2019-10-29 19:45:54 +08:00
|
|
|
if (test_bit(BR_FDB_STATIC, &f->flags))
|
2017-12-12 22:02:50 +08:00
|
|
|
fdb_del_hw_addr(br, f->key.addr.addr);
|
2014-05-16 21:59:19 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
hlist_del_init_rcu(&f->fdb_node);
|
|
|
|
rhashtable_remove_fast(&br->fdb_hash_tbl, &f->rhnode,
|
|
|
|
br_fdb_rht_params);
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &f->flags))
|
|
|
|
atomic_dec(&br->fdb_n_learned);
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_notify(br, f, RTM_DELNEIGH, swdev_notify);
|
2009-06-05 13:35:28 +08:00
|
|
|
call_rcu(&f->rcu, fdb_rcu_free);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
/* Delete a local entry if no other port had the same address.
|
|
|
|
*
|
|
|
|
* This function should only be called on entries with BR_FDB_LOCAL set,
|
|
|
|
* so even with BR_FDB_ADDED_BY_USER cleared we never need to increase
|
|
|
|
* the accounting for dynamically learned entries again.
|
|
|
|
*/
|
2014-02-07 15:48:23 +08:00
|
|
|
static void fdb_delete_local(struct net_bridge *br,
|
|
|
|
const struct net_bridge_port *p,
|
|
|
|
struct net_bridge_fdb_entry *f)
|
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
const unsigned char *addr = f->key.addr.addr;
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan_group *vg;
|
|
|
|
const struct net_bridge_vlan *v;
|
2014-02-07 15:48:23 +08:00
|
|
|
struct net_bridge_port *op;
|
2017-12-12 22:02:50 +08:00
|
|
|
u16 vid = f->key.vlan_id;
|
2014-02-07 15:48:23 +08:00
|
|
|
|
|
|
|
/* Maybe another port has same hw addr? */
|
|
|
|
list_for_each_entry(op, &br->port_list, list) {
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
vg = nbp_vlan_group(op);
|
2014-02-07 15:48:23 +08:00
|
|
|
if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
(!vid || br_vlan_find(vg, vid))) {
|
2014-02-07 15:48:23 +08:00
|
|
|
f->dst = op;
|
2019-10-29 19:45:56 +08:00
|
|
|
clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
|
2014-02-07 15:48:23 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
vg = br_vlan_group(br);
|
|
|
|
v = br_vlan_find(vg, vid);
|
2014-02-07 15:48:23 +08:00
|
|
|
/* Maybe bridge device has same hw addr? */
|
|
|
|
if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
(!vid || (v && br_vlan_should_use(v)))) {
|
2014-02-07 15:48:23 +08:00
|
|
|
f->dst = NULL;
|
2019-10-29 19:45:56 +08:00
|
|
|
clear_bit(BR_FDB_ADDED_BY_USER, &f->flags);
|
2014-02-07 15:48:23 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_delete(br, f, true);
|
2014-02-07 15:48:23 +08:00
|
|
|
}
|
|
|
|
|
2014-02-07 15:48:25 +08:00
|
|
|
void br_fdb_find_delete_local(struct net_bridge *br,
|
|
|
|
const struct net_bridge_port *p,
|
|
|
|
const unsigned char *addr, u16 vid)
|
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *f;
|
|
|
|
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
2017-02-13 21:59:09 +08:00
|
|
|
f = br_fdb_find(br, addr, vid);
|
2019-10-29 19:45:53 +08:00
|
|
|
if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
|
2019-10-29 19:45:56 +08:00
|
|
|
!test_bit(BR_FDB_ADDED_BY_USER, &f->flags) && f->dst == p)
|
2014-02-07 15:48:25 +08:00
|
|
|
fdb_delete_local(br, p, f);
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
}
|
|
|
|
|
2021-10-26 22:27:37 +08:00
|
|
|
static struct net_bridge_fdb_entry *fdb_create(struct net_bridge *br,
|
|
|
|
struct net_bridge_port *source,
|
|
|
|
const unsigned char *addr,
|
|
|
|
__u16 vid,
|
|
|
|
unsigned long flags)
|
|
|
|
{
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
bool learned = !test_bit(BR_FDB_ADDED_BY_USER, &flags) &&
|
|
|
|
!test_bit(BR_FDB_LOCAL, &flags);
|
|
|
|
u32 max_learned = READ_ONCE(br->fdb_max_learned);
|
2021-10-26 22:27:37 +08:00
|
|
|
struct net_bridge_fdb_entry *fdb;
|
2021-10-26 22:27:40 +08:00
|
|
|
int err;
|
2021-10-26 22:27:37 +08:00
|
|
|
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if (likely(learned)) {
|
|
|
|
int n_learned = atomic_read(&br->fdb_n_learned);
|
|
|
|
|
|
|
|
if (unlikely(max_learned && n_learned >= max_learned))
|
|
|
|
return NULL;
|
|
|
|
__set_bit(BR_FDB_DYNAMIC_LEARNED, &flags);
|
|
|
|
}
|
|
|
|
|
2021-10-26 22:27:37 +08:00
|
|
|
fdb = kmem_cache_alloc(br_fdb_cache, GFP_ATOMIC);
|
2021-10-26 22:27:40 +08:00
|
|
|
if (!fdb)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
memcpy(fdb->key.addr.addr, addr, ETH_ALEN);
|
|
|
|
WRITE_ONCE(fdb->dst, source);
|
|
|
|
fdb->key.vlan_id = vid;
|
|
|
|
fdb->flags = flags;
|
|
|
|
fdb->updated = fdb->used = jiffies;
|
|
|
|
err = rhashtable_lookup_insert_fast(&br->fdb_hash_tbl, &fdb->rhnode,
|
|
|
|
br_fdb_rht_params);
|
|
|
|
if (err) {
|
|
|
|
kmem_cache_free(br_fdb_cache, fdb);
|
|
|
|
return NULL;
|
2021-10-26 22:27:37 +08:00
|
|
|
}
|
2021-10-26 22:27:40 +08:00
|
|
|
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if (likely(learned))
|
|
|
|
atomic_inc(&br->fdb_n_learned);
|
|
|
|
|
2021-10-26 22:27:40 +08:00
|
|
|
hlist_add_head_rcu(&fdb->fdb_node, &br->fdb_list);
|
|
|
|
|
2021-10-26 22:27:37 +08:00
|
|
|
return fdb;
|
|
|
|
}
|
|
|
|
|
2021-10-26 22:27:38 +08:00
|
|
|
static int fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
|
|
|
|
const unsigned char *addr, u16 vid)
|
2021-10-26 22:27:37 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
|
|
|
|
|
|
|
if (!is_valid_ether_addr(addr))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
fdb = br_fdb_find(br, addr, vid);
|
|
|
|
if (fdb) {
|
|
|
|
/* it is okay to have multiple ports with same
|
|
|
|
* address, just use the first one.
|
|
|
|
*/
|
|
|
|
if (test_bit(BR_FDB_LOCAL, &fdb->flags))
|
|
|
|
return 0;
|
|
|
|
br_warn(br, "adding interface %s with same address as a received packet (addr:%pM, vlan:%u)\n",
|
|
|
|
source ? source->dev->name : br->dev->name, addr, vid);
|
|
|
|
fdb_delete(br, fdb, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
fdb = fdb_create(br, source, addr, vid,
|
|
|
|
BIT(BR_FDB_LOCAL) | BIT(BR_FDB_STATIC));
|
|
|
|
if (!fdb)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
fdb_add_hw_addr(br, addr);
|
|
|
|
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
|
|
|
|
{
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan_group *vg;
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f;
|
2005-04-17 06:20:36 +08:00
|
|
|
struct net_bridge *br = p->br;
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan *v;
|
2007-02-09 22:24:35 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
spin_lock_bh(&br->hash_lock);
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
vg = nbp_vlan_group(p);
|
2017-12-12 22:02:50 +08:00
|
|
|
hlist_for_each_entry(f, &br->fdb_list, fdb_node) {
|
2019-10-29 19:45:53 +08:00
|
|
|
if (f->dst == p && test_bit(BR_FDB_LOCAL, &f->flags) &&
|
2019-10-29 19:45:56 +08:00
|
|
|
!test_bit(BR_FDB_ADDED_BY_USER, &f->flags)) {
|
2017-12-12 22:02:50 +08:00
|
|
|
/* delete old one */
|
|
|
|
fdb_delete_local(br, p, f);
|
|
|
|
|
|
|
|
/* if this port has no vlan information
|
|
|
|
* configured, we can safely be done at
|
|
|
|
* this point.
|
|
|
|
*/
|
|
|
|
if (!vg || !vg->num_vlans)
|
|
|
|
goto insert;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-02-07 15:48:19 +08:00
|
|
|
insert:
|
|
|
|
/* insert new address, may fail if invalid address or dup. */
|
2021-10-26 22:27:38 +08:00
|
|
|
fdb_add_local(br, p, newaddr, 0);
|
2014-02-07 15:48:19 +08:00
|
|
|
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
if (!vg || !vg->num_vlans)
|
2014-02-07 15:48:19 +08:00
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* Now add entries for every VLAN configured on the port.
|
|
|
|
* This function runs under RTNL so the bitmap will not change
|
|
|
|
* from under us.
|
|
|
|
*/
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
list_for_each_entry(v, &vg->vlan_list, vlist)
|
2021-10-26 22:27:38 +08:00
|
|
|
fdb_add_local(br, p, newaddr, v->vid);
|
2014-02-07 15:48:19 +08:00
|
|
|
|
2013-02-13 20:00:19 +08:00
|
|
|
done:
|
2005-04-17 06:20:36 +08:00
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
}
|
|
|
|
|
2011-12-08 15:17:49 +08:00
|
|
|
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
|
|
|
|
{
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan_group *vg;
|
2011-12-08 15:17:49 +08:00
|
|
|
struct net_bridge_fdb_entry *f;
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan *v;
|
2011-12-08 15:17:49 +08:00
|
|
|
|
2014-02-07 15:48:26 +08:00
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
|
2011-12-08 15:17:49 +08:00
|
|
|
/* If old entry was unassociated with any port, then delete it. */
|
2017-02-13 21:59:09 +08:00
|
|
|
f = br_fdb_find(br, br->dev->dev_addr, 0);
|
2019-10-29 19:45:53 +08:00
|
|
|
if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
|
2019-10-29 19:45:56 +08:00
|
|
|
!f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
|
2014-02-07 15:48:23 +08:00
|
|
|
fdb_delete_local(br, NULL, f);
|
2011-12-08 15:17:49 +08:00
|
|
|
|
2021-10-26 22:27:38 +08:00
|
|
|
fdb_add_local(br, NULL, newaddr, 0);
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
vg = br_vlan_group(br);
|
|
|
|
if (!vg || !vg->num_vlans)
|
|
|
|
goto out;
|
2013-02-13 20:00:19 +08:00
|
|
|
/* Now remove and add entries for every VLAN configured on the
|
|
|
|
* bridge. This function runs under RTNL so the bitmap will not
|
|
|
|
* change from under us.
|
|
|
|
*/
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
list_for_each_entry(v, &vg->vlan_list, vlist) {
|
2016-06-07 18:14:17 +08:00
|
|
|
if (!br_vlan_should_use(v))
|
|
|
|
continue;
|
2017-02-13 21:59:09 +08:00
|
|
|
f = br_fdb_find(br, br->dev->dev_addr, v->vid);
|
2019-10-29 19:45:53 +08:00
|
|
|
if (f && test_bit(BR_FDB_LOCAL, &f->flags) &&
|
2019-10-29 19:45:56 +08:00
|
|
|
!f->dst && !test_bit(BR_FDB_ADDED_BY_USER, &f->flags))
|
2014-02-07 15:48:23 +08:00
|
|
|
fdb_delete_local(br, NULL, f);
|
2021-10-26 22:27:38 +08:00
|
|
|
fdb_add_local(br, NULL, newaddr, v->vid);
|
2013-02-13 20:00:19 +08:00
|
|
|
}
|
2014-02-07 15:48:26 +08:00
|
|
|
out:
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
2011-12-08 15:17:49 +08:00
|
|
|
}
|
|
|
|
|
2017-02-05 01:05:07 +08:00
|
|
|
void br_fdb_cleanup(struct work_struct *work)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2017-02-05 01:05:07 +08:00
|
|
|
struct net_bridge *br = container_of(work, struct net_bridge,
|
|
|
|
gc_work.work);
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f = NULL;
|
2005-04-17 06:20:36 +08:00
|
|
|
unsigned long delay = hold_time(br);
|
2017-02-05 01:05:07 +08:00
|
|
|
unsigned long work_delay = delay;
|
|
|
|
unsigned long now = jiffies;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
/* this part is tricky, in order to avoid blocking learning and
|
|
|
|
* consequently forwarding, we rely on rcu to delete objects with
|
|
|
|
* delayed freeing allowing us to continue traversing
|
|
|
|
*/
|
|
|
|
rcu_read_lock();
|
|
|
|
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
|
2020-06-24 04:47:17 +08:00
|
|
|
unsigned long this_timer = f->updated + delay;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2019-10-29 19:45:54 +08:00
|
|
|
if (test_bit(BR_FDB_STATIC, &f->flags) ||
|
2020-06-24 04:47:17 +08:00
|
|
|
test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags)) {
|
|
|
|
if (test_bit(BR_FDB_NOTIFY, &f->flags)) {
|
|
|
|
if (time_after(this_timer, now))
|
|
|
|
work_delay = min(work_delay,
|
|
|
|
this_timer - now);
|
|
|
|
else if (!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE,
|
|
|
|
&f->flags))
|
|
|
|
fdb_notify(br, f, RTM_NEWNEIGH, false);
|
|
|
|
}
|
2017-02-05 01:05:07 +08:00
|
|
|
continue;
|
2020-06-24 04:47:17 +08:00
|
|
|
}
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
if (time_after(this_timer, now)) {
|
|
|
|
work_delay = min(work_delay, this_timer - now);
|
|
|
|
} else {
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
if (!hlist_unhashed(&f->fdb_node))
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_delete(br, f, true);
|
2017-12-12 22:02:50 +08:00
|
|
|
spin_unlock_bh(&br->hash_lock);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
}
|
2017-12-12 22:02:50 +08:00
|
|
|
rcu_read_unlock();
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-02-05 01:05:07 +08:00
|
|
|
/* Cleanup minimum 10 milliseconds apart */
|
|
|
|
work_delay = max_t(unsigned long, work_delay, msecs_to_jiffies(10));
|
|
|
|
mod_delayed_work(system_long_wq, &br->gc_work, work_delay);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
|
2022-04-13 18:51:59 +08:00
|
|
|
static bool __fdb_flush_matches(const struct net_bridge *br,
|
|
|
|
const struct net_bridge_fdb_entry *f,
|
|
|
|
const struct net_bridge_fdb_flush_desc *desc)
|
|
|
|
{
|
|
|
|
const struct net_bridge_port *dst = READ_ONCE(f->dst);
|
|
|
|
int port_ifidx = dst ? dst->dev->ifindex : br->dev->ifindex;
|
|
|
|
|
|
|
|
if (desc->vlan_id && desc->vlan_id != f->key.vlan_id)
|
|
|
|
return false;
|
|
|
|
if (desc->port_ifindex && desc->port_ifindex != port_ifidx)
|
|
|
|
return false;
|
|
|
|
if (desc->flags_mask && (f->flags & desc->flags_mask) != desc->flags)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Flush forwarding database entries matching the description */
|
|
|
|
void br_fdb_flush(struct net_bridge *br,
|
|
|
|
const struct net_bridge_fdb_flush_desc *desc)
|
2007-04-10 03:57:54 +08:00
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f;
|
2007-04-10 03:57:54 +08:00
|
|
|
|
2022-04-13 18:51:59 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
|
|
|
|
if (!__fdb_flush_matches(br, f, desc))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
if (!hlist_unhashed(&f->fdb_node))
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_delete(br, f, true);
|
2022-04-13 18:51:59 +08:00
|
|
|
spin_unlock_bh(&br->hash_lock);
|
2007-04-10 03:57:54 +08:00
|
|
|
}
|
2022-04-13 18:51:59 +08:00
|
|
|
rcu_read_unlock();
|
2007-04-10 03:57:54 +08:00
|
|
|
}
|
2006-10-13 05:45:38 +08:00
|
|
|
|
2022-04-13 18:52:01 +08:00
|
|
|
static unsigned long __ndm_state_to_fdb_flags(u16 ndm_state)
|
|
|
|
{
|
|
|
|
unsigned long flags = 0;
|
|
|
|
|
|
|
|
if (ndm_state & NUD_PERMANENT)
|
|
|
|
__set_bit(BR_FDB_LOCAL, &flags);
|
|
|
|
if (ndm_state & NUD_NOARP)
|
|
|
|
__set_bit(BR_FDB_STATIC, &flags);
|
|
|
|
|
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long __ndm_flags_to_fdb_flags(u8 ndm_flags)
|
|
|
|
{
|
|
|
|
unsigned long flags = 0;
|
|
|
|
|
|
|
|
if (ndm_flags & NTF_USE)
|
|
|
|
__set_bit(BR_FDB_ADDED_BY_USER, &flags);
|
|
|
|
if (ndm_flags & NTF_EXT_LEARNED)
|
|
|
|
__set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &flags);
|
|
|
|
if (ndm_flags & NTF_OFFLOADED)
|
|
|
|
__set_bit(BR_FDB_OFFLOADED, &flags);
|
|
|
|
if (ndm_flags & NTF_STICKY)
|
|
|
|
__set_bit(BR_FDB_STICKY, &flags);
|
|
|
|
|
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
|
2022-04-13 18:52:02 +08:00
|
|
|
static int __fdb_flush_validate_ifindex(const struct net_bridge *br,
|
|
|
|
int ifindex,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
const struct net_device *dev;
|
|
|
|
|
|
|
|
dev = __dev_get_by_index(dev_net(br->dev), ifindex);
|
|
|
|
if (!dev) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack, "Unknown flush device ifindex");
|
|
|
|
return -ENODEV;
|
|
|
|
}
|
|
|
|
if (!netif_is_bridge_master(dev) && !netif_is_bridge_port(dev)) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack, "Flush device is not a bridge or bridge port");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (netif_is_bridge_master(dev) && dev != br->dev) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"Flush bridge device does not match target bridge device");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (netif_is_bridge_port(dev)) {
|
|
|
|
struct net_bridge_port *p = br_port_get_rtnl(dev);
|
|
|
|
|
|
|
|
if (p->br != br) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-10-09 18:06:08 +08:00
|
|
|
static const struct nla_policy br_fdb_del_bulk_policy[NDA_MAX + 1] = {
|
|
|
|
[NDA_VLAN] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2),
|
|
|
|
[NDA_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
|
|
|
|
[NDA_NDM_STATE_MASK] = { .type = NLA_U16 },
|
|
|
|
[NDA_NDM_FLAGS_MASK] = { .type = NLA_U8 },
|
|
|
|
};
|
|
|
|
|
|
|
|
int br_fdb_delete_bulk(struct nlmsghdr *nlh, struct net_device *dev,
|
2022-04-13 18:51:58 +08:00
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
2023-10-09 18:06:08 +08:00
|
|
|
struct net_bridge_fdb_flush_desc desc = {};
|
|
|
|
struct ndmsg *ndm = nlmsg_data(nlh);
|
2022-04-13 18:51:58 +08:00
|
|
|
struct net_bridge_port *p = NULL;
|
2023-10-09 18:06:08 +08:00
|
|
|
struct nlattr *tb[NDA_MAX + 1];
|
2022-04-13 18:51:58 +08:00
|
|
|
struct net_bridge *br;
|
2023-10-09 18:06:08 +08:00
|
|
|
u8 ndm_flags;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
ndm_flags = ndm->ndm_flags & ~FDB_FLUSH_IGNORED_NDM_FLAGS;
|
|
|
|
|
|
|
|
err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX,
|
|
|
|
br_fdb_del_bulk_policy, extack);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2022-04-13 18:51:58 +08:00
|
|
|
|
|
|
|
if (netif_is_bridge_master(dev)) {
|
|
|
|
br = netdev_priv(dev);
|
|
|
|
} else {
|
|
|
|
p = br_port_get_rtnl(dev);
|
|
|
|
if (!p) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge port");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
br = p->br;
|
|
|
|
}
|
|
|
|
|
2023-10-09 18:06:08 +08:00
|
|
|
if (tb[NDA_VLAN])
|
|
|
|
desc.vlan_id = nla_get_u16(tb[NDA_VLAN]);
|
|
|
|
|
2022-04-13 18:52:01 +08:00
|
|
|
if (ndm_flags & ~FDB_FLUSH_ALLOWED_NDM_FLAGS) {
|
|
|
|
NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm flag bits set");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
if (ndm->ndm_state & ~FDB_FLUSH_ALLOWED_NDM_STATES) {
|
|
|
|
NL_SET_ERR_MSG(extack, "Unsupported fdb flush ndm state bits set");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
desc.flags |= __ndm_state_to_fdb_flags(ndm->ndm_state);
|
|
|
|
desc.flags |= __ndm_flags_to_fdb_flags(ndm_flags);
|
|
|
|
if (tb[NDA_NDM_STATE_MASK]) {
|
|
|
|
u16 ndm_state_mask = nla_get_u16(tb[NDA_NDM_STATE_MASK]);
|
|
|
|
|
|
|
|
desc.flags_mask |= __ndm_state_to_fdb_flags(ndm_state_mask);
|
|
|
|
}
|
|
|
|
if (tb[NDA_NDM_FLAGS_MASK]) {
|
|
|
|
u8 ndm_flags_mask = nla_get_u8(tb[NDA_NDM_FLAGS_MASK]);
|
|
|
|
|
|
|
|
desc.flags_mask |= __ndm_flags_to_fdb_flags(ndm_flags_mask);
|
|
|
|
}
|
2022-04-13 18:52:02 +08:00
|
|
|
if (tb[NDA_IFINDEX]) {
|
2023-10-09 18:06:08 +08:00
|
|
|
int ifidx = nla_get_s32(tb[NDA_IFINDEX]);
|
2022-04-13 18:52:02 +08:00
|
|
|
|
|
|
|
err = __fdb_flush_validate_ifindex(br, ifidx, extack);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
desc.port_ifindex = ifidx;
|
|
|
|
} else if (p) {
|
|
|
|
/* flush was invoked with port device and NTF_MASTER */
|
|
|
|
desc.port_ifindex = p->dev->ifindex;
|
|
|
|
}
|
2022-04-13 18:52:01 +08:00
|
|
|
|
|
|
|
br_debug(br, "flushing port ifindex: %d vlan id: %u flags: 0x%lx flags mask: 0x%lx\n",
|
|
|
|
desc.port_ifindex, desc.vlan_id, desc.flags, desc.flags_mask);
|
|
|
|
|
2022-04-13 18:51:59 +08:00
|
|
|
br_fdb_flush(br, &desc);
|
2022-04-13 18:51:58 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-03-31 09:57:33 +08:00
|
|
|
/* Flush all entries referring to a specific port.
|
2007-04-10 03:57:54 +08:00
|
|
|
* if do_all is set also flush static entries
|
2015-06-23 20:28:16 +08:00
|
|
|
* if vid is set delete all entries that match the vlan_id
|
2007-04-10 03:57:54 +08:00
|
|
|
*/
|
2006-10-13 05:45:38 +08:00
|
|
|
void br_fdb_delete_by_port(struct net_bridge *br,
|
|
|
|
const struct net_bridge_port *p,
|
2015-06-23 20:28:16 +08:00
|
|
|
u16 vid,
|
2006-10-13 05:45:38 +08:00
|
|
|
int do_all)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f;
|
|
|
|
struct hlist_node *tmp;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
2017-12-12 22:02:50 +08:00
|
|
|
hlist_for_each_entry_safe(f, tmp, &br->fdb_list, fdb_node) {
|
|
|
|
if (f->dst != p)
|
|
|
|
continue;
|
2007-02-09 22:24:35 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
if (!do_all)
|
2019-10-29 19:45:54 +08:00
|
|
|
if (test_bit(BR_FDB_STATIC, &f->flags) ||
|
2020-09-28 23:30:02 +08:00
|
|
|
(test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &f->flags) &&
|
|
|
|
!test_bit(BR_FDB_OFFLOADED, &f->flags)) ||
|
2019-10-29 19:45:54 +08:00
|
|
|
(vid && f->key.vlan_id != vid))
|
2005-04-17 06:20:36 +08:00
|
|
|
continue;
|
|
|
|
|
2019-10-29 19:45:53 +08:00
|
|
|
if (test_bit(BR_FDB_LOCAL, &f->flags))
|
2017-12-12 22:02:50 +08:00
|
|
|
fdb_delete_local(br, p, f);
|
|
|
|
else
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_delete(br, f, true);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
}
|
|
|
|
|
2011-12-12 10:58:25 +08:00
|
|
|
#if IS_ENABLED(CONFIG_ATM_LANE)
|
2009-06-05 13:35:28 +08:00
|
|
|
/* Interface used by ATM LANE hook to test
|
|
|
|
* if an addr is on some other bridge port */
|
|
|
|
int br_fdb_test_addr(struct net_device *dev, unsigned char *addr)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
2010-11-15 14:38:13 +08:00
|
|
|
struct net_bridge_port *port;
|
2009-06-05 13:35:28 +08:00
|
|
|
int ret;
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
rcu_read_lock();
|
2010-11-15 14:38:13 +08:00
|
|
|
port = br_port_get_rcu(dev);
|
|
|
|
if (!port)
|
|
|
|
ret = 0;
|
|
|
|
else {
|
2021-06-29 22:06:44 +08:00
|
|
|
const struct net_bridge_port *dst = NULL;
|
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
fdb = br_fdb_find_rcu(port->br, addr, 0);
|
2021-06-29 22:06:44 +08:00
|
|
|
if (fdb)
|
|
|
|
dst = READ_ONCE(fdb->dst);
|
|
|
|
|
|
|
|
ret = dst && dst->dev != dev &&
|
|
|
|
dst->state == BR_STATE_FORWARDING;
|
2010-11-15 14:38:13 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
rcu_read_unlock();
|
|
|
|
|
2009-06-05 13:35:28 +08:00
|
|
|
return ret;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
2009-06-05 13:35:28 +08:00
|
|
|
#endif /* CONFIG_ATM_LANE */
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
/*
|
2007-02-09 22:24:35 +08:00
|
|
|
* Fill buffer with forwarding table records in
|
2005-04-17 06:20:36 +08:00
|
|
|
* the API format.
|
|
|
|
*/
|
|
|
|
int br_fdb_fillbuf(struct net_bridge *br, void *buf,
|
|
|
|
unsigned long maxnum, unsigned long skip)
|
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *f;
|
2017-12-12 22:02:50 +08:00
|
|
|
struct __fdb_entry *fe = buf;
|
|
|
|
int num = 0;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
|
|
|
memset(buf, 0, maxnum*sizeof(struct __fdb_entry));
|
|
|
|
|
|
|
|
rcu_read_lock();
|
2017-12-12 22:02:50 +08:00
|
|
|
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
|
|
|
|
if (num >= maxnum)
|
|
|
|
break;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
if (has_expired(br, f))
|
|
|
|
continue;
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
/* ignore pseudo entry for local MAC address */
|
|
|
|
if (!f->dst)
|
|
|
|
continue;
|
2011-12-08 15:17:49 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
if (skip) {
|
|
|
|
--skip;
|
|
|
|
continue;
|
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
/* convert from internal format to API */
|
|
|
|
memcpy(fe->mac_addr, f->key.addr.addr, ETH_ALEN);
|
2008-05-03 07:53:33 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
/* due to ABI compat need to split into hi/lo */
|
|
|
|
fe->port_no = f->dst->port_no;
|
|
|
|
fe->port_hi = f->dst->port_no >> 8;
|
2008-05-03 07:53:33 +08:00
|
|
|
|
2019-10-29 19:45:53 +08:00
|
|
|
fe->is_local = test_bit(BR_FDB_LOCAL, &f->flags);
|
2019-10-29 19:45:54 +08:00
|
|
|
if (!test_bit(BR_FDB_STATIC, &f->flags))
|
2017-12-12 22:02:50 +08:00
|
|
|
fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated);
|
|
|
|
++fe;
|
|
|
|
++num;
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
rcu_read_unlock();
|
|
|
|
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
|
2011-04-04 22:03:27 +08:00
|
|
|
/* Add entry for local address of interface */
|
2021-10-26 22:27:39 +08:00
|
|
|
int br_fdb_add_local(struct net_bridge *br, struct net_bridge_port *source,
|
|
|
|
const unsigned char *addr, u16 vid)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
2021-10-26 22:27:38 +08:00
|
|
|
ret = fdb_add_local(br, source, addr, vid);
|
2005-04-17 06:20:36 +08:00
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-06-24 04:47:17 +08:00
|
|
|
/* returns true if the fdb was modified */
|
|
|
|
static bool __fdb_mark_active(struct net_bridge_fdb_entry *fdb)
|
|
|
|
{
|
|
|
|
return !!(test_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags) &&
|
|
|
|
test_and_clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags));
|
|
|
|
}
|
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
|
2019-11-01 20:46:37 +08:00
|
|
|
const unsigned char *addr, u16 vid, unsigned long flags)
|
2005-04-17 06:20:36 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
|
|
|
|
|
|
|
/* some users want to always flood. */
|
|
|
|
if (hold_time(br) == 0)
|
|
|
|
return;
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
fdb = fdb_find_rcu(&br->fdb_hash_tbl, addr, vid);
|
2005-04-17 06:20:36 +08:00
|
|
|
if (likely(fdb)) {
|
|
|
|
/* attempt to update an entry for a local interface */
|
2019-10-29 19:45:53 +08:00
|
|
|
if (unlikely(test_bit(BR_FDB_LOCAL, &fdb->flags))) {
|
2007-02-09 22:24:35 +08:00
|
|
|
if (net_ratelimit())
|
2016-10-12 07:12:56 +08:00
|
|
|
br_warn(br, "received packet on %s with own address as source address (addr:%pM, vlan:%u)\n",
|
|
|
|
source->dev->name, addr, vid);
|
2005-04-17 06:20:36 +08:00
|
|
|
} else {
|
2017-02-08 00:46:46 +08:00
|
|
|
unsigned long now = jiffies;
|
2020-06-24 04:47:17 +08:00
|
|
|
bool fdb_modified = false;
|
|
|
|
|
|
|
|
if (now != fdb->updated) {
|
|
|
|
fdb->updated = now;
|
|
|
|
fdb_modified = __fdb_mark_active(fdb);
|
|
|
|
}
|
2017-02-08 00:46:46 +08:00
|
|
|
|
2005-04-17 06:20:36 +08:00
|
|
|
/* fastpath: update of existing entry */
|
2021-06-29 22:06:44 +08:00
|
|
|
if (unlikely(source != READ_ONCE(fdb->dst) &&
|
2019-10-29 19:45:55 +08:00
|
|
|
!test_bit(BR_FDB_STICKY, &fdb->flags))) {
|
2021-06-29 22:06:45 +08:00
|
|
|
br_switchdev_fdb_notify(br, fdb, RTM_DELNEIGH);
|
2021-06-29 22:06:44 +08:00
|
|
|
WRITE_ONCE(fdb->dst, source);
|
2014-05-29 15:27:16 +08:00
|
|
|
fdb_modified = true;
|
2017-04-29 03:39:07 +08:00
|
|
|
/* Take over HW learned entry */
|
2019-11-01 20:46:39 +08:00
|
|
|
if (unlikely(test_bit(BR_FDB_ADDED_BY_EXT_LEARN,
|
|
|
|
&fdb->flags)))
|
|
|
|
clear_bit(BR_FDB_ADDED_BY_EXT_LEARN,
|
|
|
|
&fdb->flags);
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
/* Clear locked flag when roaming to an
|
|
|
|
* unlocked port.
|
|
|
|
*/
|
|
|
|
if (unlikely(test_bit(BR_FDB_LOCKED, &fdb->flags)))
|
|
|
|
clear_bit(BR_FDB_LOCKED, &fdb->flags);
|
2014-05-29 15:27:16 +08:00
|
|
|
}
|
2020-06-24 04:47:17 +08:00
|
|
|
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if (unlikely(test_bit(BR_FDB_ADDED_BY_USER, &flags))) {
|
2019-10-29 19:45:56 +08:00
|
|
|
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED,
|
|
|
|
&fdb->flags))
|
|
|
|
atomic_dec(&br->fdb_n_learned);
|
|
|
|
}
|
2017-08-31 13:18:13 +08:00
|
|
|
if (unlikely(fdb_modified)) {
|
2019-11-01 20:46:37 +08:00
|
|
|
trace_br_fdb_update(br, source, addr, vid, flags);
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
|
2017-08-31 13:18:13 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
} else {
|
2006-03-21 14:58:36 +08:00
|
|
|
spin_lock(&br->hash_lock);
|
2019-11-01 20:46:37 +08:00
|
|
|
fdb = fdb_create(br, source, addr, vid, flags);
|
2017-12-12 22:02:50 +08:00
|
|
|
if (fdb) {
|
2019-11-01 20:46:37 +08:00
|
|
|
trace_br_fdb_update(br, source, addr, vid, flags);
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
|
2011-12-06 21:02:24 +08:00
|
|
|
}
|
2005-04-17 06:20:36 +08:00
|
|
|
/* else we lose race and someone else inserts
|
|
|
|
* it first, don't bother updating
|
|
|
|
*/
|
2006-03-21 14:58:36 +08:00
|
|
|
spin_unlock(&br->hash_lock);
|
2005-04-17 06:20:36 +08:00
|
|
|
}
|
|
|
|
}
|
2011-04-04 22:03:30 +08:00
|
|
|
|
|
|
|
/* Dump information about entries, in response to GETNEIGH */
|
2012-04-15 14:43:56 +08:00
|
|
|
int br_fdb_dump(struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb,
|
|
|
|
struct net_device *dev,
|
2014-07-10 19:01:58 +08:00
|
|
|
struct net_device *filter_dev,
|
2016-08-31 12:56:45 +08:00
|
|
|
int *idx)
|
2011-04-04 22:03:30 +08:00
|
|
|
{
|
2012-04-15 14:43:56 +08:00
|
|
|
struct net_bridge *br = netdev_priv(dev);
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f;
|
2016-08-31 12:56:45 +08:00
|
|
|
int err = 0;
|
2011-04-04 22:03:30 +08:00
|
|
|
|
2021-10-16 19:21:36 +08:00
|
|
|
if (!netif_is_bridge_master(dev))
|
2017-12-12 22:02:50 +08:00
|
|
|
return err;
|
2011-04-04 22:03:30 +08:00
|
|
|
|
2016-08-31 12:56:45 +08:00
|
|
|
if (!filter_dev) {
|
|
|
|
err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx);
|
|
|
|
if (err < 0)
|
2017-12-12 22:02:50 +08:00
|
|
|
return err;
|
2016-08-31 12:56:45 +08:00
|
|
|
}
|
2015-01-06 01:29:21 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
|
|
|
|
if (*idx < cb->args[2])
|
|
|
|
goto skip;
|
|
|
|
if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) {
|
|
|
|
if (filter_dev != dev)
|
2012-04-15 14:43:56 +08:00
|
|
|
goto skip;
|
2017-12-12 22:02:50 +08:00
|
|
|
/* !f->dst is a special case for bridge
|
|
|
|
* It means the MAC belongs to the bridge
|
|
|
|
* Therefore need a little more filtering
|
|
|
|
* we only want to dump the !f->dst case
|
|
|
|
*/
|
|
|
|
if (f->dst)
|
2015-01-06 01:29:21 +08:00
|
|
|
goto skip;
|
2011-04-04 22:03:30 +08:00
|
|
|
}
|
2017-12-12 22:02:50 +08:00
|
|
|
if (!filter_dev && f->dst)
|
|
|
|
goto skip;
|
|
|
|
|
|
|
|
err = fdb_fill_info(skb, br, f,
|
|
|
|
NETLINK_CB(cb->skb).portid,
|
|
|
|
cb->nlh->nlmsg_seq,
|
|
|
|
RTM_NEWNEIGH,
|
|
|
|
NLM_F_MULTI);
|
|
|
|
if (err < 0)
|
|
|
|
break;
|
|
|
|
skip:
|
|
|
|
*idx += 1;
|
2011-04-04 22:03:30 +08:00
|
|
|
}
|
2017-12-12 22:02:50 +08:00
|
|
|
rcu_read_unlock();
|
2011-04-04 22:03:30 +08:00
|
|
|
|
2016-08-31 12:56:45 +08:00
|
|
|
return err;
|
2011-04-04 22:03:30 +08:00
|
|
|
}
|
2011-04-04 22:03:31 +08:00
|
|
|
|
2018-12-16 14:35:09 +08:00
|
|
|
int br_fdb_get(struct sk_buff *skb,
|
|
|
|
struct nlattr *tb[],
|
|
|
|
struct net_device *dev,
|
|
|
|
const unsigned char *addr,
|
|
|
|
u16 vid, u32 portid, u32 seq,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct net_bridge *br = netdev_priv(dev);
|
|
|
|
struct net_bridge_fdb_entry *f;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
f = br_fdb_find_rcu(br, addr, vid);
|
|
|
|
if (!f) {
|
|
|
|
NL_SET_ERR_MSG(extack, "Fdb entry not found");
|
|
|
|
err = -ENOENT;
|
|
|
|
goto errout;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = fdb_fill_info(skb, br, f, portid, seq,
|
|
|
|
RTM_NEWNEIGH, 0);
|
|
|
|
errout:
|
|
|
|
rcu_read_unlock();
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-06-24 04:47:17 +08:00
|
|
|
/* returns true if the fdb is modified */
|
|
|
|
static bool fdb_handle_notify(struct net_bridge_fdb_entry *fdb, u8 notify)
|
|
|
|
{
|
|
|
|
bool modified = false;
|
|
|
|
|
|
|
|
/* allow to mark an entry as inactive, usually done on creation */
|
|
|
|
if ((notify & FDB_NOTIFY_INACTIVE_BIT) &&
|
|
|
|
!test_and_set_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags))
|
|
|
|
modified = true;
|
|
|
|
|
|
|
|
if ((notify & FDB_NOTIFY_BIT) &&
|
|
|
|
!test_and_set_bit(BR_FDB_NOTIFY, &fdb->flags)) {
|
|
|
|
/* enabled activity tracking */
|
|
|
|
modified = true;
|
|
|
|
} else if (!(notify & FDB_NOTIFY_BIT) &&
|
|
|
|
test_and_clear_bit(BR_FDB_NOTIFY, &fdb->flags)) {
|
|
|
|
/* disabled activity tracking, clear notify state */
|
|
|
|
clear_bit(BR_FDB_NOTIFY_INACTIVE, &fdb->flags);
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return modified;
|
|
|
|
}
|
|
|
|
|
2011-11-10 02:30:08 +08:00
|
|
|
/* Update (create or replace) forwarding database entry */
|
2016-08-04 10:11:19 +08:00
|
|
|
static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source,
|
2020-06-24 04:47:17 +08:00
|
|
|
const u8 *addr, struct ndmsg *ndm, u16 flags, u16 vid,
|
|
|
|
struct nlattr *nfea_tb[])
|
2011-04-04 22:03:31 +08:00
|
|
|
{
|
2020-06-24 04:47:15 +08:00
|
|
|
bool is_sticky = !!(ndm->ndm_flags & NTF_STICKY);
|
2020-06-24 04:47:18 +08:00
|
|
|
bool refresh = !nfea_tb[NFEA_DONT_REFRESH];
|
2011-04-04 22:03:31 +08:00
|
|
|
struct net_bridge_fdb_entry *fdb;
|
2020-06-24 04:47:15 +08:00
|
|
|
u16 state = ndm->ndm_state;
|
2013-04-22 20:56:49 +08:00
|
|
|
bool modified = false;
|
2020-06-24 04:47:17 +08:00
|
|
|
u8 notify = 0;
|
2011-04-04 22:03:31 +08:00
|
|
|
|
2015-05-25 21:39:31 +08:00
|
|
|
/* If the port cannot learn allow only local and static entries */
|
2016-08-04 10:11:19 +08:00
|
|
|
if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) &&
|
2015-05-25 21:39:31 +08:00
|
|
|
!(source->state == BR_STATE_LEARNING ||
|
|
|
|
source->state == BR_STATE_FORWARDING))
|
|
|
|
return -EPERM;
|
|
|
|
|
2016-08-04 10:11:19 +08:00
|
|
|
if (!source && !(state & NUD_PERMANENT)) {
|
|
|
|
pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n",
|
|
|
|
br->dev->name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2018-09-11 14:39:53 +08:00
|
|
|
if (is_sticky && (state & NUD_PERMANENT))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-06-24 04:47:17 +08:00
|
|
|
if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
|
|
|
|
notify = nla_get_u8(nfea_tb[NFEA_ACTIVITY_NOTIFY]);
|
|
|
|
if ((notify & ~BR_FDB_NOTIFY_SETTABLE_BITS) ||
|
|
|
|
(notify & BR_FDB_NOTIFY_SETTABLE_BITS) == FDB_NOTIFY_INACTIVE_BIT)
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
fdb = br_fdb_find(br, addr, vid);
|
2011-09-30 22:37:27 +08:00
|
|
|
if (fdb == NULL) {
|
|
|
|
if (!(flags & NLM_F_CREATE))
|
|
|
|
return -ENOENT;
|
2011-04-04 22:03:31 +08:00
|
|
|
|
2023-10-16 21:27:20 +08:00
|
|
|
fdb = fdb_create(br, source, addr, vid,
|
|
|
|
BIT(BR_FDB_ADDED_BY_USER));
|
2011-09-30 22:37:27 +08:00
|
|
|
if (!fdb)
|
|
|
|
return -ENOMEM;
|
2013-04-22 20:56:49 +08:00
|
|
|
|
|
|
|
modified = true;
|
2011-09-30 22:37:27 +08:00
|
|
|
} else {
|
|
|
|
if (flags & NLM_F_EXCL)
|
|
|
|
return -EEXIST;
|
2013-04-22 20:56:49 +08:00
|
|
|
|
2021-06-29 22:06:44 +08:00
|
|
|
if (READ_ONCE(fdb->dst) != source) {
|
|
|
|
WRITE_ONCE(fdb->dst, source);
|
2013-04-22 20:56:49 +08:00
|
|
|
modified = true;
|
|
|
|
}
|
2023-10-16 21:27:20 +08:00
|
|
|
|
|
|
|
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if (test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags))
|
|
|
|
atomic_dec(&br->fdb_n_learned);
|
2011-11-10 02:30:08 +08:00
|
|
|
}
|
|
|
|
|
2015-10-09 01:38:52 +08:00
|
|
|
if (fdb_to_nud(br, fdb) != state) {
|
2014-05-16 21:59:19 +08:00
|
|
|
if (state & NUD_PERMANENT) {
|
2019-10-29 19:45:53 +08:00
|
|
|
set_bit(BR_FDB_LOCAL, &fdb->flags);
|
2019-10-29 19:45:54 +08:00
|
|
|
if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
|
2014-11-28 21:34:12 +08:00
|
|
|
fdb_add_hw_addr(br, addr);
|
2014-05-16 21:59:19 +08:00
|
|
|
} else if (state & NUD_NOARP) {
|
2019-10-29 19:45:53 +08:00
|
|
|
clear_bit(BR_FDB_LOCAL, &fdb->flags);
|
2019-10-29 19:45:54 +08:00
|
|
|
if (!test_and_set_bit(BR_FDB_STATIC, &fdb->flags))
|
2014-11-28 21:34:12 +08:00
|
|
|
fdb_add_hw_addr(br, addr);
|
2014-05-16 21:59:19 +08:00
|
|
|
} else {
|
2019-10-29 19:45:53 +08:00
|
|
|
clear_bit(BR_FDB_LOCAL, &fdb->flags);
|
2019-10-29 19:45:54 +08:00
|
|
|
if (test_and_clear_bit(BR_FDB_STATIC, &fdb->flags))
|
2014-11-28 21:34:12 +08:00
|
|
|
fdb_del_hw_addr(br, addr);
|
2014-05-16 21:59:19 +08:00
|
|
|
}
|
2011-09-30 22:37:27 +08:00
|
|
|
|
2013-04-22 20:56:49 +08:00
|
|
|
modified = true;
|
|
|
|
}
|
2018-09-11 14:39:53 +08:00
|
|
|
|
2019-10-29 19:45:55 +08:00
|
|
|
if (is_sticky != test_bit(BR_FDB_STICKY, &fdb->flags)) {
|
|
|
|
change_bit(BR_FDB_STICKY, &fdb->flags);
|
2018-09-11 14:39:53 +08:00
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
if (test_and_clear_bit(BR_FDB_LOCKED, &fdb->flags))
|
|
|
|
modified = true;
|
|
|
|
|
2020-06-24 04:47:17 +08:00
|
|
|
if (fdb_handle_notify(fdb, notify))
|
|
|
|
modified = true;
|
|
|
|
|
2013-04-22 20:56:49 +08:00
|
|
|
fdb->used = jiffies;
|
|
|
|
if (modified) {
|
2020-06-24 04:47:18 +08:00
|
|
|
if (refresh)
|
|
|
|
fdb->updated = jiffies;
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_notify(br, fdb, RTM_NEWNEIGH, true);
|
2011-09-30 22:37:27 +08:00
|
|
|
}
|
2011-04-04 22:03:31 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2016-08-04 10:11:19 +08:00
|
|
|
static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br,
|
|
|
|
struct net_bridge_port *p, const unsigned char *addr,
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[],
|
|
|
|
struct netlink_ext_ack *extack)
|
2013-02-13 20:00:18 +08:00
|
|
|
{
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
if (ndm->ndm_flags & NTF_USE) {
|
2016-08-04 10:11:19 +08:00
|
|
|
if (!p) {
|
|
|
|
pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n",
|
|
|
|
br->dev->name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-11-04 17:36:51 +08:00
|
|
|
if (!nbp_state_should_learn(p))
|
|
|
|
return 0;
|
|
|
|
|
2015-06-06 21:49:00 +08:00
|
|
|
local_bh_disable();
|
2013-02-13 20:00:18 +08:00
|
|
|
rcu_read_lock();
|
2019-11-01 20:46:37 +08:00
|
|
|
br_fdb_update(br, p, addr, vid, BIT(BR_FDB_ADDED_BY_USER));
|
2013-02-13 20:00:18 +08:00
|
|
|
rcu_read_unlock();
|
2015-06-06 21:49:00 +08:00
|
|
|
local_bh_enable();
|
2017-03-23 18:27:13 +08:00
|
|
|
} else if (ndm->ndm_flags & NTF_EXT_LEARNED) {
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
if (!p && !(ndm->ndm_state & NUD_PERMANENT)) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack,
|
|
|
|
"FDB entry towards bridge must be permanent");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2022-11-08 18:47:08 +08:00
|
|
|
err = br_fdb_external_learn_add(br, p, addr, vid, false, true);
|
2013-02-13 20:00:18 +08:00
|
|
|
} else {
|
2016-08-04 10:11:19 +08:00
|
|
|
spin_lock_bh(&br->hash_lock);
|
2020-06-24 04:47:17 +08:00
|
|
|
err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb);
|
2016-08-04 10:11:19 +08:00
|
|
|
spin_unlock_bh(&br->hash_lock);
|
2013-02-13 20:00:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-06-24 04:47:17 +08:00
|
|
|
static const struct nla_policy br_nda_fdb_pol[NFEA_MAX + 1] = {
|
|
|
|
[NFEA_ACTIVITY_NOTIFY] = { .type = NLA_U8 },
|
2020-06-24 04:47:18 +08:00
|
|
|
[NFEA_DONT_REFRESH] = { .type = NLA_FLAG },
|
2020-06-24 04:47:17 +08:00
|
|
|
};
|
|
|
|
|
2011-04-04 22:03:31 +08:00
|
|
|
/* Add new permanent fdb entry with RTM_NEWNEIGH */
|
2012-10-01 20:32:33 +08:00
|
|
|
int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
|
|
|
|
struct net_device *dev,
|
2019-01-17 07:06:50 +08:00
|
|
|
const unsigned char *addr, u16 vid, u16 nlh_flags,
|
|
|
|
struct netlink_ext_ack *extack)
|
2011-04-04 22:03:31 +08:00
|
|
|
{
|
2020-06-24 04:47:17 +08:00
|
|
|
struct nlattr *nfea_tb[NFEA_MAX + 1], *attr;
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan_group *vg;
|
2015-10-09 01:38:52 +08:00
|
|
|
struct net_bridge_port *p = NULL;
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan *v;
|
2015-10-09 01:38:52 +08:00
|
|
|
struct net_bridge *br = NULL;
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
u32 ext_flags = 0;
|
2012-04-15 14:43:56 +08:00
|
|
|
int err = 0;
|
2011-04-04 22:03:31 +08:00
|
|
|
|
2017-08-30 04:16:57 +08:00
|
|
|
trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags);
|
|
|
|
|
2011-11-10 02:30:08 +08:00
|
|
|
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
|
|
|
|
pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-06-26 00:34:36 +08:00
|
|
|
if (is_zero_ether_addr(addr)) {
|
|
|
|
pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2021-10-16 19:21:36 +08:00
|
|
|
if (netif_is_bridge_master(dev)) {
|
2015-10-09 01:38:52 +08:00
|
|
|
br = netdev_priv(dev);
|
|
|
|
vg = br_vlan_group(br);
|
|
|
|
} else {
|
|
|
|
p = br_port_get_rtnl(dev);
|
|
|
|
if (!p) {
|
|
|
|
pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
|
|
|
|
dev->name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
2016-08-04 10:11:19 +08:00
|
|
|
br = p->br;
|
2015-10-09 01:38:52 +08:00
|
|
|
vg = nbp_vlan_group(p);
|
2011-04-04 22:03:31 +08:00
|
|
|
}
|
|
|
|
|
bridge: Add MAC Authentication Bypass (MAB) support
Hosts that support 802.1X authentication are able to authenticate
themselves by exchanging EAPOL frames with an authenticator (Ethernet
bridge, in this case) and an authentication server. Access to the
network is only granted by the authenticator to successfully
authenticated hosts.
The above is implemented in the bridge using the "locked" bridge port
option. When enabled, link-local frames (e.g., EAPOL) can be locally
received by the bridge, but all other frames are dropped unless the host
is authenticated. That is, unless the user space control plane installed
an FDB entry according to which the source address of the frame is
located behind the locked ingress port. The entry can be dynamic, in
which case learning needs to be enabled so that the entry will be
refreshed by incoming traffic.
There are deployments in which not all the devices connected to the
authenticator (the bridge) support 802.1X. Such devices can include
printers and cameras. One option to support such deployments is to
unlock the bridge ports connecting these devices, but a slightly more
secure option is to use MAB. When MAB is enabled, the MAC address of the
connected device is used as the user name and password for the
authentication.
For MAB to work, the user space control plane needs to be notified about
MAC addresses that are trying to gain access so that they will be
compared against an allow list. This can be implemented via the regular
learning process with the sole difference that learned FDB entries are
installed with a new "locked" flag indicating that the entry cannot be
used to authenticate the device. The flag cannot be set by user space,
but user space can clear the flag by replacing the entry, thereby
authenticating the device.
Locked FDB entries implement the following semantics with regards to
roaming, aging and forwarding:
1. Roaming: Locked FDB entries can roam to unlocked (authorized) ports,
in which case the "locked" flag is cleared. FDB entries cannot roam
to locked ports regardless of MAB being enabled or not. Therefore,
locked FDB entries are only created if an FDB entry with the given {MAC,
VID} does not already exist. This behavior prevents unauthenticated
devices from disrupting traffic destined to already authenticated
devices.
2. Aging: Locked FDB entries age and refresh by incoming traffic like
regular entries.
3. Forwarding: Locked FDB entries forward traffic like regular entries.
If user space detects an unauthorized MAC behind a locked port and
wishes to prevent traffic with this MAC DA from reaching the host, it
can do so using tc or a different mechanism.
Enable the above behavior using a new bridge port option called "mab".
It can only be enabled on a bridge port that is both locked and has
learning enabled. Locked FDB entries are flushed from the port once MAB
is disabled. A new option is added because there are pure 802.1X
deployments that are not interested in notifications about locked FDB
entries.
Signed-off-by: Hans J. Schultz <netdev@kapio-technology.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2022-11-02 03:39:21 +08:00
|
|
|
if (tb[NDA_FLAGS_EXT])
|
|
|
|
ext_flags = nla_get_u32(tb[NDA_FLAGS_EXT]);
|
|
|
|
|
|
|
|
if (ext_flags & NTF_EXT_LOCKED) {
|
|
|
|
NL_SET_ERR_MSG_MOD(extack, "Cannot add FDB entry with \"locked\" flag set");
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-06-24 04:47:17 +08:00
|
|
|
if (tb[NDA_FDB_EXT_ATTRS]) {
|
|
|
|
attr = tb[NDA_FDB_EXT_ATTRS];
|
|
|
|
err = nla_parse_nested(nfea_tb, NFEA_MAX, attr,
|
|
|
|
br_nda_fdb_pol, extack);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
} else {
|
|
|
|
memset(nfea_tb, 0, sizeof(struct nlattr *) * (NFEA_MAX + 1));
|
|
|
|
}
|
|
|
|
|
2014-11-28 21:34:15 +08:00
|
|
|
if (vid) {
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
v = br_vlan_find(vg, vid);
|
2015-10-09 01:38:52 +08:00
|
|
|
if (!v || !br_vlan_should_use(v)) {
|
|
|
|
pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
|
2013-02-13 20:00:18 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* VID was specified, so use it. */
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb,
|
|
|
|
extack);
|
2011-11-10 02:30:08 +08:00
|
|
|
} else {
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb,
|
|
|
|
extack);
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
if (err || !vg || !vg->num_vlans)
|
2013-02-13 20:00:18 +08:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
/* We have vlans configured on this port and user didn't
|
|
|
|
* specify a VLAN. To be nice, add/update entry for every
|
|
|
|
* vlan on this port.
|
|
|
|
*/
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
list_for_each_entry(v, &vg->vlan_list, vlist) {
|
2015-10-09 01:38:52 +08:00
|
|
|
if (!br_vlan_should_use(v))
|
|
|
|
continue;
|
2020-06-24 04:47:17 +08:00
|
|
|
err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid,
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
nfea_tb, extack);
|
2013-02-13 20:00:18 +08:00
|
|
|
if (err)
|
|
|
|
goto out;
|
|
|
|
}
|
2011-11-10 02:30:08 +08:00
|
|
|
}
|
2011-04-04 22:03:31 +08:00
|
|
|
|
2013-02-13 20:00:18 +08:00
|
|
|
out:
|
2011-04-04 22:03:31 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-13 21:59:11 +08:00
|
|
|
static int fdb_delete_by_addr_and_port(struct net_bridge *br,
|
|
|
|
const struct net_bridge_port *p,
|
2015-06-09 18:34:13 +08:00
|
|
|
const u8 *addr, u16 vlan)
|
2011-04-04 22:03:31 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
fdb = br_fdb_find(br, addr, vlan);
|
2021-06-29 22:06:44 +08:00
|
|
|
if (!fdb || READ_ONCE(fdb->dst) != p)
|
2011-04-04 22:03:31 +08:00
|
|
|
return -ENOENT;
|
|
|
|
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_delete(br, fdb, true);
|
2017-02-13 21:59:11 +08:00
|
|
|
|
2011-04-04 22:03:31 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-02-13 21:59:11 +08:00
|
|
|
static int __br_fdb_delete(struct net_bridge *br,
|
|
|
|
const struct net_bridge_port *p,
|
2013-02-13 20:00:18 +08:00
|
|
|
const unsigned char *addr, u16 vid)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
2017-02-13 21:59:11 +08:00
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
err = fdb_delete_by_addr_and_port(br, p, addr, vid);
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
2013-02-13 20:00:18 +08:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2011-04-04 22:03:31 +08:00
|
|
|
/* Remove neighbor entry with RTM_DELNEIGH */
|
2013-02-13 20:00:18 +08:00
|
|
|
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
|
|
|
|
struct net_device *dev,
|
2022-05-05 23:09:57 +08:00
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
struct netlink_ext_ack *extack)
|
2011-04-04 22:03:31 +08:00
|
|
|
{
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan_group *vg;
|
2015-10-09 01:38:52 +08:00
|
|
|
struct net_bridge_port *p = NULL;
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
struct net_bridge_vlan *v;
|
2017-02-13 21:59:11 +08:00
|
|
|
struct net_bridge *br;
|
2011-04-04 22:03:31 +08:00
|
|
|
int err;
|
2013-02-13 20:00:18 +08:00
|
|
|
|
2021-10-16 19:21:36 +08:00
|
|
|
if (netif_is_bridge_master(dev)) {
|
2015-10-09 01:38:52 +08:00
|
|
|
br = netdev_priv(dev);
|
|
|
|
vg = br_vlan_group(br);
|
|
|
|
} else {
|
|
|
|
p = br_port_get_rtnl(dev);
|
|
|
|
if (!p) {
|
|
|
|
pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
|
|
|
|
dev->name);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
vg = nbp_vlan_group(p);
|
2017-02-13 21:59:11 +08:00
|
|
|
br = p->br;
|
2011-04-04 22:03:31 +08:00
|
|
|
}
|
|
|
|
|
2014-11-28 21:34:15 +08:00
|
|
|
if (vid) {
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
v = br_vlan_find(vg, vid);
|
|
|
|
if (!v) {
|
2015-10-09 01:38:52 +08:00
|
|
|
pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
|
2013-02-13 20:00:18 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2011-04-04 22:03:31 +08:00
|
|
|
|
2017-02-13 21:59:11 +08:00
|
|
|
err = __br_fdb_delete(br, p, addr, vid);
|
2013-02-13 20:00:18 +08:00
|
|
|
} else {
|
2015-02-09 19:16:17 +08:00
|
|
|
err = -ENOENT;
|
2017-02-13 21:59:11 +08:00
|
|
|
err &= __br_fdb_delete(br, p, addr, 0);
|
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-26 01:00:11 +08:00
|
|
|
if (!vg || !vg->num_vlans)
|
2017-02-13 21:59:11 +08:00
|
|
|
return err;
|
2013-02-13 20:00:18 +08:00
|
|
|
|
2015-10-09 01:38:52 +08:00
|
|
|
list_for_each_entry(v, &vg->vlan_list, vlist) {
|
|
|
|
if (!br_vlan_should_use(v))
|
|
|
|
continue;
|
2017-02-13 21:59:11 +08:00
|
|
|
err &= __br_fdb_delete(br, p, addr, v->vid);
|
2015-10-09 01:38:52 +08:00
|
|
|
}
|
2013-02-13 20:00:18 +08:00
|
|
|
}
|
2017-02-13 21:59:11 +08:00
|
|
|
|
2011-04-04 22:03:31 +08:00
|
|
|
return err;
|
|
|
|
}
|
2014-05-16 21:59:17 +08:00
|
|
|
|
|
|
|
int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p)
|
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f, *tmp;
|
2018-02-01 18:25:27 +08:00
|
|
|
int err = 0;
|
2014-05-16 21:59:17 +08:00
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
/* the key here is that static entries change only under rtnl */
|
|
|
|
rcu_read_lock();
|
|
|
|
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
|
|
|
|
/* We only care for static entries */
|
2019-10-29 19:45:54 +08:00
|
|
|
if (!test_bit(BR_FDB_STATIC, &f->flags))
|
2017-12-12 22:02:50 +08:00
|
|
|
continue;
|
|
|
|
err = dev_uc_add(p->dev, f->key.addr.addr);
|
|
|
|
if (err)
|
|
|
|
goto rollback;
|
2014-05-16 21:59:17 +08:00
|
|
|
}
|
2017-12-12 22:02:50 +08:00
|
|
|
done:
|
|
|
|
rcu_read_unlock();
|
2014-05-16 21:59:17 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
return err;
|
2014-05-16 21:59:17 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
rollback:
|
|
|
|
hlist_for_each_entry_rcu(tmp, &br->fdb_list, fdb_node) {
|
|
|
|
/* We only care for static entries */
|
2019-10-29 19:45:54 +08:00
|
|
|
if (!test_bit(BR_FDB_STATIC, &tmp->flags))
|
2017-12-12 22:02:50 +08:00
|
|
|
continue;
|
|
|
|
if (tmp == f)
|
|
|
|
break;
|
|
|
|
dev_uc_del(p->dev, tmp->key.addr.addr);
|
2014-05-16 21:59:17 +08:00
|
|
|
}
|
2017-12-12 22:02:50 +08:00
|
|
|
|
|
|
|
goto done;
|
2014-05-16 21:59:17 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
|
|
|
|
{
|
2017-12-12 22:02:50 +08:00
|
|
|
struct net_bridge_fdb_entry *f;
|
2014-05-16 21:59:17 +08:00
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
rcu_read_lock();
|
|
|
|
hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) {
|
|
|
|
/* We only care for static entries */
|
2019-10-29 19:45:54 +08:00
|
|
|
if (!test_bit(BR_FDB_STATIC, &f->flags))
|
2017-12-12 22:02:50 +08:00
|
|
|
continue;
|
2014-05-16 21:59:17 +08:00
|
|
|
|
2017-12-12 22:02:50 +08:00
|
|
|
dev_uc_del(p->dev, f->key.addr.addr);
|
2014-05-16 21:59:17 +08:00
|
|
|
}
|
2017-12-12 22:02:50 +08:00
|
|
|
rcu_read_unlock();
|
2014-05-16 21:59:17 +08:00
|
|
|
}
|
2014-11-28 21:34:21 +08:00
|
|
|
|
2015-01-16 06:49:37 +08:00
|
|
|
int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
|
2022-11-08 18:47:08 +08:00
|
|
|
const unsigned char *addr, u16 vid, bool locked,
|
2018-05-03 20:43:53 +08:00
|
|
|
bool swdev_notify)
|
2014-11-28 21:34:21 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
2017-07-04 06:14:59 +08:00
|
|
|
bool modified = false;
|
2014-11-28 21:34:21 +08:00
|
|
|
int err = 0;
|
|
|
|
|
2017-08-30 04:16:57 +08:00
|
|
|
trace_br_fdb_external_learn_add(br, p, addr, vid);
|
|
|
|
|
2022-11-08 18:47:08 +08:00
|
|
|
if (locked && (!p || !(p->flags & BR_PORT_MAB)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2014-11-28 21:34:21 +08:00
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
fdb = br_fdb_find(br, addr, vid);
|
2014-11-28 21:34:21 +08:00
|
|
|
if (!fdb) {
|
2019-11-01 20:46:38 +08:00
|
|
|
unsigned long flags = BIT(BR_FDB_ADDED_BY_EXT_LEARN);
|
|
|
|
|
|
|
|
if (swdev_notify)
|
|
|
|
flags |= BIT(BR_FDB_ADDED_BY_USER);
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
|
2021-08-10 19:00:10 +08:00
|
|
|
if (!p)
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
flags |= BIT(BR_FDB_LOCAL);
|
|
|
|
|
2022-11-08 18:47:08 +08:00
|
|
|
if (locked)
|
|
|
|
flags |= BIT(BR_FDB_LOCKED);
|
|
|
|
|
2019-11-01 20:46:38 +08:00
|
|
|
fdb = fdb_create(br, p, addr, vid, flags);
|
2014-11-28 21:34:21 +08:00
|
|
|
if (!fdb) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err_unlock;
|
|
|
|
}
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
|
2017-07-04 06:14:59 +08:00
|
|
|
} else {
|
2022-11-08 18:47:08 +08:00
|
|
|
if (locked &&
|
|
|
|
(!test_bit(BR_FDB_LOCKED, &fdb->flags) ||
|
|
|
|
READ_ONCE(fdb->dst) != p)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err_unlock;
|
|
|
|
}
|
|
|
|
|
2014-11-28 21:34:21 +08:00
|
|
|
fdb->updated = jiffies;
|
2017-07-04 06:14:59 +08:00
|
|
|
|
2021-06-29 22:06:44 +08:00
|
|
|
if (READ_ONCE(fdb->dst) != p) {
|
|
|
|
WRITE_ONCE(fdb->dst, p);
|
2017-07-04 06:14:59 +08:00
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
|
2019-10-29 19:45:57 +08:00
|
|
|
if (test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags)) {
|
2017-07-04 06:14:59 +08:00
|
|
|
/* Refresh entry */
|
|
|
|
fdb->used = jiffies;
|
2019-10-29 19:45:56 +08:00
|
|
|
} else if (!test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags)) {
|
2017-07-04 06:14:59 +08:00
|
|
|
/* Take over SW learned entry */
|
2019-10-29 19:45:57 +08:00
|
|
|
set_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags);
|
2017-07-04 06:14:59 +08:00
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
|
2022-11-08 18:47:08 +08:00
|
|
|
if (locked != test_bit(BR_FDB_LOCKED, &fdb->flags)) {
|
|
|
|
change_bit(BR_FDB_LOCKED, &fdb->flags);
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
|
2019-01-18 23:58:00 +08:00
|
|
|
if (swdev_notify)
|
2019-10-29 19:45:56 +08:00
|
|
|
set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags);
|
2019-01-18 23:58:00 +08:00
|
|
|
|
2021-08-10 19:00:10 +08:00
|
|
|
if (!p)
|
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 07:17:30 +08:00
|
|
|
set_bit(BR_FDB_LOCAL, &fdb->flags);
|
|
|
|
|
net: bridge: Track and limit dynamically learned FDB entries
A malicious actor behind one bridge port may spam the kernel with packets
with a random source MAC address, each of which will create an FDB entry,
each of which is a dynamic allocation in the kernel.
There are roughly 2^48 different MAC addresses, further limited by the
rhashtable they are stored in to 2^31. Each entry is of the type struct
net_bridge_fdb_entry, which is currently 128 bytes big. This means the
maximum amount of memory allocated for FDB entries is 2^31 * 128B =
256GiB, which is too much for most computers.
Mitigate this by maintaining a per bridge count of those automatically
generated entries in fdb_n_learned, and a limit in fdb_max_learned. If
the limit is hit new entries are not learned anymore.
For backwards compatibility the default setting of 0 disables the limit.
User-added entries by netlink or from bridge or bridge port addresses
are never blocked and do not count towards that limit.
Introduce a new fdb entry flag BR_FDB_DYNAMIC_LEARNED to keep track of
whether an FDB entry is included in the count. The flag is enabled for
dynamically learned entries, and disabled for all other entries. This
should be equivalent to BR_FDB_ADDED_BY_USER and BR_FDB_LOCAL being unset,
but contrary to the two flags it can be toggled atomically.
Atomicity is required here, as there are multiple callers that modify the
flags, but are not under a common lock (br_fdb_update is the exception
for br->hash_lock, br_fdb_external_learn_add for RTNL).
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Johannes Nixdorf <jnixdorf-oss@avm.de>
Link: https://lore.kernel.org/r/20231016-fdb_limit-v5-2-32cddff87758@avm.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-10-16 21:27:21 +08:00
|
|
|
if ((swdev_notify || !p) &&
|
|
|
|
test_and_clear_bit(BR_FDB_DYNAMIC_LEARNED, &fdb->flags))
|
|
|
|
atomic_dec(&br->fdb_n_learned);
|
|
|
|
|
2017-07-04 06:14:59 +08:00
|
|
|
if (modified)
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify);
|
2014-11-28 21:34:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
err_unlock:
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2015-01-16 06:49:37 +08:00
|
|
|
int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
|
2018-05-03 20:43:53 +08:00
|
|
|
const unsigned char *addr, u16 vid,
|
|
|
|
bool swdev_notify)
|
2014-11-28 21:34:21 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
|
2017-02-13 21:59:09 +08:00
|
|
|
fdb = br_fdb_find(br, addr, vid);
|
2019-10-29 19:45:57 +08:00
|
|
|
if (fdb && test_bit(BR_FDB_ADDED_BY_EXT_LEARN, &fdb->flags))
|
2018-05-03 20:43:53 +08:00
|
|
|
fdb_delete(br, fdb, swdev_notify);
|
2014-11-28 21:34:21 +08:00
|
|
|
else
|
|
|
|
err = -ENOENT;
|
|
|
|
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
2017-06-08 14:44:15 +08:00
|
|
|
|
|
|
|
void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p,
|
2018-10-17 16:53:29 +08:00
|
|
|
const unsigned char *addr, u16 vid, bool offloaded)
|
2017-06-08 14:44:15 +08:00
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *fdb;
|
|
|
|
|
|
|
|
spin_lock_bh(&br->hash_lock);
|
|
|
|
|
|
|
|
fdb = br_fdb_find(br, addr, vid);
|
2019-10-29 19:45:58 +08:00
|
|
|
if (fdb && offloaded != test_bit(BR_FDB_OFFLOADED, &fdb->flags))
|
|
|
|
change_bit(BR_FDB_OFFLOADED, &fdb->flags);
|
2017-06-08 14:44:15 +08:00
|
|
|
|
|
|
|
spin_unlock_bh(&br->hash_lock);
|
|
|
|
}
|
2018-12-08 03:55:07 +08:00
|
|
|
|
|
|
|
void br_fdb_clear_offload(const struct net_device *dev, u16 vid)
|
|
|
|
{
|
|
|
|
struct net_bridge_fdb_entry *f;
|
|
|
|
struct net_bridge_port *p;
|
|
|
|
|
|
|
|
ASSERT_RTNL();
|
|
|
|
|
|
|
|
p = br_port_get_rtnl(dev);
|
|
|
|
if (!p)
|
|
|
|
return;
|
|
|
|
|
|
|
|
spin_lock_bh(&p->br->hash_lock);
|
|
|
|
hlist_for_each_entry(f, &p->br->fdb_list, fdb_node) {
|
|
|
|
if (f->dst == p && f->key.vlan_id == vid)
|
2019-10-29 19:45:58 +08:00
|
|
|
clear_bit(BR_FDB_OFFLOADED, &f->flags);
|
2018-12-08 03:55:07 +08:00
|
|
|
}
|
|
|
|
spin_unlock_bh(&p->br->hash_lock);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(br_fdb_clear_offload);
|