linux/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
Kamal Heib a3e74fb924 RDMA/ipoib: Fix warning caused by destroying non-initial netns
After the commit 5ce2dced8e ("RDMA/ipoib: Set rtnl_link_ops for ipoib
interfaces"), if the IPoIB device is moved to non-initial netns,
destroying that netns lets the device vanish instead of moving it back to
the initial netns, This is happening because default_device_exit() skips
the interfaces due to having rtnl_link_ops set.

Steps to reporoduce:
  ip netns add foo
  ip link set mlx5_ib0 netns foo
  ip netns delete foo

WARNING: CPU: 1 PID: 704 at net/core/dev.c:11435 netdev_exit+0x3f/0x50
Modules linked in: xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT
nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack
nf_defrag_ipv6 nf_defrag_ipv4 nf_tables nfnetlink tun d
 fuse
CPU: 1 PID: 704 Comm: kworker/u64:3 Tainted: G S      W  5.13.0-rc1+ #1
Hardware name: Dell Inc. PowerEdge R630/02C2CP, BIOS 2.1.5 04/11/2016
Workqueue: netns cleanup_net
RIP: 0010:netdev_exit+0x3f/0x50
Code: 48 8b bb 30 01 00 00 e8 ef 81 b1 ff 48 81 fb c0 3a 54 a1 74 13 48
8b 83 90 00 00 00 48 81 c3 90 00 00 00 48 39 d8 75 02 5b c3 <0f> 0b 5b
c3 66 66 2e 0f 1f 84 00 00 00 00 00 66 90 0f 1f 44 00
RSP: 0018:ffffb297079d7e08 EFLAGS: 00010206
RAX: ffff8eb542c00040 RBX: ffff8eb541333150 RCX: 000000008010000d
RDX: 000000008010000e RSI: 000000008010000d RDI: ffff8eb440042c00
RBP: ffffb297079d7e48 R08: 0000000000000001 R09: ffffffff9fdeac00
R10: ffff8eb5003be000 R11: 0000000000000001 R12: ffffffffa1545620
R13: ffffffffa1545628 R14: 0000000000000000 R15: ffffffffa1543b20
FS:  0000000000000000(0000) GS:ffff8ed37fa00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00005601b5f4c2e8 CR3: 0000001fc8c10002 CR4: 00000000003706e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 ops_exit_list.isra.9+0x36/0x70
 cleanup_net+0x234/0x390
 process_one_work+0x1cb/0x360
 ? process_one_work+0x360/0x360
 worker_thread+0x30/0x370
 ? process_one_work+0x360/0x360
 kthread+0x116/0x130
 ? kthread_park+0x80/0x80
 ret_from_fork+0x22/0x30

To avoid the above warning and later on the kernel panic that could happen
on shutdown due to a NULL pointer dereference, make sure to set the
netns_refund flag that was introduced by commit 3a5ca85707 ("can: dev:
Move device back to init netns on owning netns delete") to properly
restore the IPoIB interfaces to the initial netns.

Fixes: 5ce2dced8e ("RDMA/ipoib: Set rtnl_link_ops for ipoib interfaces")
Link: https://lore.kernel.org/r/20210525150134.139342-1-kamalheib1@gmail.com
Signed-off-by: Kamal Heib <kamalheib1@gmail.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2021-06-02 15:20:39 -03:00

194 lines
5.1 KiB
C

/*
* Copyright (c) 2012 Mellanox Technologies. - All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/netdevice.h>
#include <linux/if_arp.h> /* For ARPHRD_xxx */
#include <linux/module.h>
#include <net/rtnetlink.h>
#include "ipoib.h"
static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
[IFLA_IPOIB_PKEY] = { .type = NLA_U16 },
[IFLA_IPOIB_MODE] = { .type = NLA_U16 },
[IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
};
static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
u16 val;
if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
goto nla_put_failure;
val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
if (nla_put_u16(skb, IFLA_IPOIB_MODE, val))
goto nla_put_failure;
val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags);
if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val))
goto nla_put_failure;
return 0;
nla_put_failure:
return -EMSGSIZE;
}
static int ipoib_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
u16 mode, umcast;
int ret = 0;
if (data[IFLA_IPOIB_MODE]) {
mode = nla_get_u16(data[IFLA_IPOIB_MODE]);
if (mode == IPOIB_MODE_DATAGRAM)
ret = ipoib_set_mode(dev, "datagram\n");
else if (mode == IPOIB_MODE_CONNECTED)
ret = ipoib_set_mode(dev, "connected\n");
else
ret = -EINVAL;
if (ret < 0)
goto out_err;
}
if (data[IFLA_IPOIB_UMCAST]) {
umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]);
ipoib_set_umcast(dev, umcast);
}
out_err:
return ret;
}
static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
struct net_device *pdev;
struct ipoib_dev_priv *ppriv;
u16 child_pkey;
int err;
if (!tb[IFLA_LINK])
return -EINVAL;
pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
if (!pdev || pdev->type != ARPHRD_INFINIBAND)
return -ENODEV;
ppriv = ipoib_priv(pdev);
if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
ipoib_warn(ppriv, "child creation disallowed for child devices\n");
return -EINVAL;
}
if (!data || !data[IFLA_IPOIB_PKEY]) {
ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
child_pkey = ppriv->pkey;
} else
child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
err = ipoib_intf_init(ppriv->ca, ppriv->port, dev->name, dev);
if (err) {
ipoib_warn(ppriv, "failed to initialize pkey device\n");
return err;
}
err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
child_pkey, IPOIB_RTNL_CHILD);
if (err)
return err;
if (data) {
err = ipoib_changelink(dev, tb, data, extack);
if (err) {
unregister_netdevice(dev);
return err;
}
}
return 0;
}
static void ipoib_del_child_link(struct net_device *dev, struct list_head *head)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
if (!priv->parent)
return;
unregister_netdevice_queue(dev, head);
}
static size_t ipoib_get_size(const struct net_device *dev)
{
return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
nla_total_size(2) + /* IFLA_IPOIB_MODE */
nla_total_size(2); /* IFLA_IPOIB_UMCAST */
}
static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.kind = "ipoib",
.netns_refund = true,
.maxtype = IFLA_IPOIB_MAX,
.policy = ipoib_policy,
.priv_size = sizeof(struct ipoib_dev_priv),
.setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.dellink = ipoib_del_child_link,
.changelink = ipoib_changelink,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
};
struct rtnl_link_ops *ipoib_get_link_ops(void)
{
return &ipoib_link_ops;
}
int __init ipoib_netlink_init(void)
{
return rtnl_link_register(&ipoib_link_ops);
}
void __exit ipoib_netlink_fini(void)
{
rtnl_link_unregister(&ipoib_link_ops);
}
MODULE_ALIAS_RTNL_LINK("ipoib");