linux/net/bridge/br_vlan_tunnel.c
Nikolay Aleksandrov cfc579f9d8 net: bridge: fix vlan tunnel dst refcnt when egressing
The egress tunnel code uses dst_clone() and directly sets the result
which is wrong because the entry might have 0 refcnt or be already deleted,
causing number of problems. It also triggers the WARN_ON() in dst_hold()[1]
when a refcnt couldn't be taken. Fix it by using dst_hold_safe() and
checking if a reference was actually taken before setting the dst.

[1] dmesg WARN_ON log and following refcnt errors
 WARNING: CPU: 5 PID: 38 at include/net/dst.h:230 br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge]
 Modules linked in: 8021q garp mrp bridge stp llc bonding ipv6 virtio_net
 CPU: 5 PID: 38 Comm: ksoftirqd/5 Kdump: loaded Tainted: G        W         5.13.0-rc3+ #360
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
 RIP: 0010:br_handle_egress_vlan_tunnel+0x10b/0x134 [bridge]
 Code: e8 85 bc 01 e1 45 84 f6 74 90 45 31 f6 85 db 48 c7 c7 a0 02 19 a0 41 0f 94 c6 31 c9 31 d2 44 89 f6 e8 64 bc 01 e1 85 db 75 02 <0f> 0b 31 c9 31 d2 44 89 f6 48 c7 c7 70 02 19 a0 e8 4b bc 01 e1 49
 RSP: 0018:ffff8881003d39e8 EFLAGS: 00010246
 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
 RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffffffffa01902a0
 RBP: ffff8881040c6700 R08: 0000000000000000 R09: 0000000000000001
 R10: 2ce93d0054fe0d00 R11: 54fe0d00000e0000 R12: ffff888109515000
 R13: 0000000000000000 R14: 0000000000000001 R15: 0000000000000401
 FS:  0000000000000000(0000) GS:ffff88822bf40000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f42ba70f030 CR3: 0000000109926000 CR4: 00000000000006e0
 Call Trace:
  br_handle_vlan+0xbc/0xca [bridge]
  __br_forward+0x23/0x164 [bridge]
  deliver_clone+0x41/0x48 [bridge]
  br_handle_frame_finish+0x36f/0x3aa [bridge]
  ? skb_dst+0x2e/0x38 [bridge]
  ? br_handle_ingress_vlan_tunnel+0x3e/0x1c8 [bridge]
  ? br_handle_frame_finish+0x3aa/0x3aa [bridge]
  br_handle_frame+0x2c3/0x377 [bridge]
  ? __skb_pull+0x33/0x51
  ? vlan_do_receive+0x4f/0x36a
  ? br_handle_frame_finish+0x3aa/0x3aa [bridge]
  __netif_receive_skb_core+0x539/0x7c6
  ? __list_del_entry_valid+0x16e/0x1c2
  __netif_receive_skb_list_core+0x6d/0xd6
  netif_receive_skb_list_internal+0x1d9/0x1fa
  gro_normal_list+0x22/0x3e
  dev_gro_receive+0x55b/0x600
  ? detach_buf_split+0x58/0x140
  napi_gro_receive+0x94/0x12e
  virtnet_poll+0x15d/0x315 [virtio_net]
  __napi_poll+0x2c/0x1c9
  net_rx_action+0xe6/0x1fb
  __do_softirq+0x115/0x2d8
  run_ksoftirqd+0x18/0x20
  smpboot_thread_fn+0x183/0x19c
  ? smpboot_unregister_percpu_thread+0x66/0x66
  kthread+0x10a/0x10f
  ? kthread_mod_delayed_work+0xb6/0xb6
  ret_from_fork+0x22/0x30
 ---[ end trace 49f61b07f775fd2b ]---
 dst_release: dst:00000000c02d677a refcnt:-1
 dst_release underflow

Cc: stable@vger.kernel.org
Fixes: 11538d039a ("bridge: vlan dst_metadata hooks in ingress and egress paths")
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-06-10 14:06:43 -07:00

212 lines
4.8 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Bridge per vlan tunnel port dst_metadata handling code
*
* Authors:
* Roopa Prabhu <roopa@cumulusnetworks.com>
*/
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <linux/rtnetlink.h>
#include <linux/slab.h>
#include <net/switchdev.h>
#include <net/dst_metadata.h>
#include "br_private.h"
#include "br_private_tunnel.h"
static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct net_bridge_vlan *vle = ptr;
__be64 tunid = *(__be64 *)arg->key;
return vle->tinfo.tunnel_id != tunid;
}
static const struct rhashtable_params br_vlan_tunnel_rht_params = {
.head_offset = offsetof(struct net_bridge_vlan, tnode),
.key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id),
.key_len = sizeof(__be64),
.nelem_hint = 3,
.obj_cmpfn = br_vlan_tunid_cmp,
.automatic_shrinking = true,
};
static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
__be64 tunnel_id)
{
return rhashtable_lookup_fast(tbl, &tunnel_id,
br_vlan_tunnel_rht_params);
}
static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
{
struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
dst_release(&tdst->dst);
}
void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan)
{
if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
return;
rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
br_vlan_tunnel_rht_params);
vlan_tunnel_info_release(vlan);
}
static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan, u32 tun_id)
{
struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
int err;
if (metadata)
return -EEXIST;
metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
key, 0);
if (!metadata)
return -EINVAL;
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
WRITE_ONCE(vlan->tinfo.tunnel_id, key);
err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
br_vlan_tunnel_rht_params);
if (err)
goto out;
return 0;
out:
vlan_tunnel_info_release(vlan);
return err;
}
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
int nbp_vlan_tunnel_info_add(const struct net_bridge_port *port, u16 vid,
u32 tun_id)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *vlan;
ASSERT_RTNL();
vg = nbp_vlan_group(port);
vlan = br_vlan_find(vg, vid);
if (!vlan)
return -EINVAL;
return __vlan_tunnel_info_add(vg, vlan, tun_id);
}
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
*/
int nbp_vlan_tunnel_info_delete(const struct net_bridge_port *port, u16 vid)
{
struct net_bridge_vlan_group *vg;
struct net_bridge_vlan *v;
ASSERT_RTNL();
vg = nbp_vlan_group(port);
v = br_vlan_find(vg, vid);
if (!v)
return -ENOENT;
vlan_tunnel_info_del(vg, v);
return 0;
}
static void __vlan_tunnel_info_flush(struct net_bridge_vlan_group *vg)
{
struct net_bridge_vlan *vlan, *tmp;
list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
vlan_tunnel_info_del(vg, vlan);
}
void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port)
{
struct net_bridge_vlan_group *vg;
ASSERT_RTNL();
vg = nbp_vlan_group(port);
__vlan_tunnel_info_flush(vg);
}
int vlan_tunnel_init(struct net_bridge_vlan_group *vg)
{
return rhashtable_init(&vg->tunnel_hash, &br_vlan_tunnel_rht_params);
}
void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg)
{
rhashtable_destroy(&vg->tunnel_hash);
}
int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_port *p,
struct net_bridge_vlan_group *vg)
{
struct ip_tunnel_info *tinfo = skb_tunnel_info(skb);
struct net_bridge_vlan *vlan;
if (!vg || !tinfo)
return 0;
/* if already tagged, ignore */
if (skb_vlan_tagged(skb))
return 0;
/* lookup vid, given tunnel id */
vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id);
if (!vlan)
return 0;
skb_dst_drop(skb);
__vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid);
return 0;
}
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan)
{
struct metadata_dst *tunnel_dst;
__be64 tunnel_id;
int err;
if (!vlan)
return 0;
tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
return 0;
skb_dst_drop(skb);
err = skb_vlan_pop(skb);
if (err)
return err;
tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
skb_dst_set(skb, &tunnel_dst->dst);
return 0;
}