linux/net/ipv4/xfrm4_policy.c
Maxime Bizon 418a73074d net: dst: fix missing initialization of rt_uncached
xfrm_alloc_dst() followed by xfrm4_dst_destroy(), without a
xfrm4_fill_dst() call in between, causes the following BUG:

 BUG: spinlock bad magic on CPU#0, fbxhostapd/732
  lock: 0x890b7668, .magic: 890b7668, .owner: <none>/-1, .owner_cpu: 0
 CPU: 0 PID: 732 Comm: fbxhostapd Not tainted 6.3.0-rc6-next-20230414-00613-ge8de66369925-dirty #9
 Hardware name: Marvell Kirkwood (Flattened Device Tree)
  unwind_backtrace from show_stack+0x10/0x14
  show_stack from dump_stack_lvl+0x28/0x30
  dump_stack_lvl from do_raw_spin_lock+0x20/0x80
  do_raw_spin_lock from rt_del_uncached_list+0x30/0x64
  rt_del_uncached_list from xfrm4_dst_destroy+0x3c/0xbc
  xfrm4_dst_destroy from dst_destroy+0x5c/0xb0
  dst_destroy from rcu_process_callbacks+0xc4/0xec
  rcu_process_callbacks from __do_softirq+0xb4/0x22c
  __do_softirq from call_with_stack+0x1c/0x24
  call_with_stack from do_softirq+0x60/0x6c
  do_softirq from __local_bh_enable_ip+0xa0/0xcc

Patch "net: dst: Prevent false sharing vs. dst_entry:: __refcnt" moved
rt_uncached and rt_uncached_list fields from rtable struct to dst
struct, so they are more zeroed by memset_after(xdst, 0, u.dst) in
xfrm_alloc_dst().

Note that rt_uncached (list_head) was never properly initialized at
alloc time, but xfrm[46]_dst_destroy() is written in such a way that
it was not an issue thanks to the memset:

	if (xdst->u.rt.dst.rt_uncached_list)
		rt_del_uncached_list(&xdst->u.rt);

The route code does it the other way around: rt_uncached_list is
assumed to be valid IIF rt_uncached list_head is not empty:

void rt_del_uncached_list(struct rtable *rt)
{
        if (!list_empty(&rt->dst.rt_uncached)) {
                struct uncached_list *ul = rt->dst.rt_uncached_list;

                spin_lock_bh(&ul->lock);
                list_del_init(&rt->dst.rt_uncached);
                spin_unlock_bh(&ul->lock);
        }
}

This patch adds mandatory rt_uncached list_head initialization in
generic dst_init(), and adapt xfrm[46]_dst_destroy logic to match the
rest of the code.

Fixes: d288a162dd ("net: dst: Prevent false sharing vs. dst_entry:: __refcnt")
Reported-by: kernel test robot <oliver.sang@intel.com>
Link: https://lore.kernel.org/oe-lkp/202304162125.18b7bcdd-oliver.sang@intel.com
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
CC: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
Link: https://lore.kernel.org/r/20230420182508.2417582-1-mbizon@freebox.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-04-21 20:26:56 -07:00

258 lines
5.8 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* xfrm4_policy.c
*
* Changes:
* Kazunori MIYAZAWA @USAGI
* YOSHIFUJI Hideaki @USAGI
* Split up af-specific portion
*
*/
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/inetdevice.h>
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/l3mdev.h>
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
u32 mark)
{
struct rtable *rt;
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif);
fl4->flowi4_mark = mark;
if (saddr)
fl4->saddr = saddr->a4;
rt = __ip_route_output_key(net, fl4);
if (!IS_ERR(rt))
return &rt->dst;
return ERR_CAST(rt);
}
static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
u32 mark)
{
struct flowi4 fl4;
return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark);
}
static int xfrm4_get_saddr(struct net *net, int oif,
xfrm_address_t *saddr, xfrm_address_t *daddr,
u32 mark)
{
struct dst_entry *dst;
struct flowi4 fl4;
dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark);
if (IS_ERR(dst))
return -EHOSTUNREACH;
saddr->a4 = fl4.saddr;
dst_release(dst);
return 0;
}
static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
const struct flowi *fl)
{
struct rtable *rt = (struct rtable *)xdst->route;
const struct flowi4 *fl4 = &fl->u.ip4;
xdst->u.rt.rt_iif = fl4->flowi4_iif;
xdst->u.dst.dev = dev;
netdev_hold(dev, &xdst->u.dst.dev_tracker, GFP_ATOMIC);
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
xdst->u.rt.rt_is_input = rt->rt_is_input;
xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |
RTCF_LOCAL);
xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_gw_family = rt->rt_gw_family;
if (rt->rt_gw_family == AF_INET)
xdst->u.rt.rt_gw4 = rt->rt_gw4;
else if (rt->rt_gw_family == AF_INET6)
xdst->u.rt.rt_gw6 = rt->rt_gw6;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
rt_add_uncached_list(&xdst->u.rt);
return 0;
}
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu,
bool confirm_neigh)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
}
static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
path->ops->redirect(path, sk, skb);
}
static void xfrm4_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
dst_destroy_metrics_generic(dst);
rt_del_uncached_list(&xdst->u.rt);
xfrm_dst_destroy(xdst);
}
static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int unregister)
{
if (!unregister)
return;
xfrm_dst_ifdown(dst, dev);
}
static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.update_pmtu = xfrm4_update_pmtu,
.redirect = xfrm4_redirect,
.cow_metrics = dst_cow_metrics_generic,
.destroy = xfrm4_dst_destroy,
.ifdown = xfrm4_dst_ifdown,
.local_out = __ip_local_out,
.gc_thresh = 32768,
};
static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.fill_dst = xfrm4_fill_dst,
.blackhole_route = ipv4_blackhole_route,
};
#ifdef CONFIG_SYSCTL
static struct ctl_table xfrm4_policy_table[] = {
{
.procname = "xfrm4_gc_thresh",
.data = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{ }
};
static __net_init int xfrm4_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
table = xfrm4_policy_table;
if (!net_eq(net, &init_net)) {
table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL);
if (!table)
goto err_alloc;
table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh;
}
hdr = register_net_sysctl(net, "net/ipv4", table);
if (!hdr)
goto err_reg;
net->ipv4.xfrm4_hdr = hdr;
return 0;
err_reg:
if (!net_eq(net, &init_net))
kfree(table);
err_alloc:
return -ENOMEM;
}
static __net_exit void xfrm4_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
if (!net->ipv4.xfrm4_hdr)
return;
table = net->ipv4.xfrm4_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->ipv4.xfrm4_hdr);
if (!net_eq(net, &init_net))
kfree(table);
}
#else /* CONFIG_SYSCTL */
static inline int xfrm4_net_sysctl_init(struct net *net)
{
return 0;
}
static inline void xfrm4_net_sysctl_exit(struct net *net)
{
}
#endif
static int __net_init xfrm4_net_init(struct net *net)
{
int ret;
memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
sizeof(xfrm4_dst_ops_template));
ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
if (ret)
return ret;
ret = xfrm4_net_sysctl_init(net);
if (ret)
dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
return ret;
}
static void __net_exit xfrm4_net_exit(struct net *net)
{
xfrm4_net_sysctl_exit(net);
dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
}
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
};
static void __init xfrm4_policy_init(void)
{
xfrm_policy_register_afinfo(&xfrm4_policy_afinfo, AF_INET);
}
void __init xfrm4_init(void)
{
xfrm4_state_init();
xfrm4_policy_init();
xfrm4_protocol_init();
register_pernet_subsys(&xfrm4_net_ops);
}