linux/net/sched/act_tunnel_key.c

630 lines
16 KiB
C
Raw Normal View History

/*
* Copyright (c) 2016, Amir Vadai <amir@vadai.me>
* Copyright (c) 2016, Mellanox Technologies. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <net/geneve.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
netns: make struct pernet_operations::id unsigned int Make struct pernet_operations::id unsigned. There are 2 reasons to do so: 1) This field is really an index into an zero based array and thus is unsigned entity. Using negative value is out-of-bound access by definition. 2) On x86_64 unsigned 32-bit data which are mixed with pointers via array indexing or offsets added or subtracted to pointers are preffered to signed 32-bit data. "int" being used as an array index needs to be sign-extended to 64-bit before being used. void f(long *p, int i) { g(p[i]); } roughly translates to movsx rsi, esi mov rdi, [rsi+...] call g MOVSX is 3 byte instruction which isn't necessary if the variable is unsigned because x86_64 is zero extending by default. Now, there is net_generic() function which, you guessed it right, uses "int" as an array index: static inline void *net_generic(const struct net *net, int id) { ... ptr = ng->ptr[id - 1]; ... } And this function is used a lot, so those sign extensions add up. Patch snipes ~1730 bytes on allyesconfig kernel (without all junk messing with code generation): add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) Unfortunately some functions actually grow bigger. This is a semmingly random artefact of code generation with register allocator being used differently. gcc decides that some variable needs to live in new r8+ registers and every access now requires REX prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be used which is longer than [r8] However, overall balance is in negative direction: add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730) function old new delta nfsd4_lock 3886 3959 +73 tipc_link_build_proto_msg 1096 1140 +44 mac80211_hwsim_new_radio 2776 2808 +32 tipc_mon_rcv 1032 1058 +26 svcauth_gss_legacy_init 1413 1429 +16 tipc_bcbase_select_primary 379 392 +13 nfsd4_exchange_id 1247 1260 +13 nfsd4_setclientid_confirm 782 793 +11 ... put_client_renew_locked 494 480 -14 ip_set_sockfn_get 730 716 -14 geneve_sock_add 829 813 -16 nfsd4_sequence_done 721 703 -18 nlmclnt_lookup_host 708 686 -22 nfsd4_lockt 1085 1063 -22 nfs_get_client 1077 1050 -27 tcf_bpf_init 1106 1076 -30 nfsd4_encode_fattr 5997 5930 -67 Total: Before=154856051, After=154854321, chg -0.00% Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 09:58:21 +08:00
static unsigned int tunnel_key_net_id;
static struct tc_action_ops act_tunnel_key_ops;
static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
int action;
params = rcu_dereference_bh(t->params);
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
net/sched: act_tunnel_key: fix NULL dereference when 'goto chain' is used the control action in the common member of struct tcf_tunnel_key must be a valid value, as it can contain the chain index when 'goto chain' is used. Ensure that the control action can be read as x->tcfa_action, when x is a pointer to struct tc_action and x->ops->type is TCA_ACT_TUNNEL_KEY, to prevent the following command: # tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \ > $tcflags dst_mac $h2mac action tunnel_key unset goto chain 1 from causing a NULL dereference when a matching packet is received: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 80000001097ac067 P4D 80000001097ac067 PUD 103b0a067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 3491 Comm: mausezahn Tainted: G E 4.18.0-rc2.auguri+ #421 Hardware name: Hewlett-Packard HP Z220 CMT Workstation/1790, BIOS K51 v01.58 02/07/2013 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40 FS: 00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0 Call Trace: <IRQ> fl_classify+0x1ad/0x1c0 [cls_flower] ? __update_load_avg_se.isra.47+0x1ca/0x1d0 ? __update_load_avg_se.isra.47+0x1ca/0x1d0 ? update_load_avg+0x665/0x690 ? update_load_avg+0x665/0x690 ? kmem_cache_alloc+0x38/0x1c0 tcf_classify+0x89/0x140 __netif_receive_skb_core+0x5ea/0xb70 ? enqueue_entity+0xd0/0x270 ? process_backlog+0x97/0x150 process_backlog+0x97/0x150 net_rx_action+0x14b/0x3e0 __do_softirq+0xde/0x2b4 do_softirq_own_stack+0x2a/0x40 </IRQ> do_softirq.part.18+0x49/0x50 __local_bh_enable_ip+0x49/0x50 __dev_queue_xmit+0x4ab/0x8a0 ? wait_woken+0x80/0x80 ? packet_sendmsg+0x38f/0x810 ? __dev_queue_xmit+0x8a0/0x8a0 packet_sendmsg+0x38f/0x810 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 ? do_vfs_ioctl+0xa4/0x630 ? syscall_trace_enter+0x1df/0x2e0 ? __audit_syscall_exit+0x22a/0x290 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fd67e18dc93 Code: 48 8b 0d 18 83 20 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 59 c7 20 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 2b f7 ff ff 48 89 04 24 RSP: 002b:00007ffe0189b748 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 00000000020ca010 RCX: 00007fd67e18dc93 RDX: 0000000000000062 RSI: 00000000020ca322 RDI: 0000000000000003 RBP: 00007ffe0189b780 R08: 00007ffe0189b760 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000062 R13: 00000000020ca322 R14: 00007ffe0189b760 R15: 0000000000000003 Modules linked in: act_tunnel_key act_gact cls_flower sch_ingress vrf veth act_csum(E) xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ipt_REJECT nf_reject_ipv4 tun bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter intel_rapl snd_hda_codec_hdmi x86_pkg_temp_thermal intel_powerclamp snd_hda_codec_realtek coretemp snd_hda_codec_generic kvm_intel kvm irqbypass snd_hda_intel crct10dif_pclmul crc32_pclmul hp_wmi ghash_clmulni_intel pcbc snd_hda_codec aesni_intel sparse_keymap rfkill snd_hda_core snd_hwdep snd_seq crypto_simd iTCO_wdt gpio_ich iTCO_vendor_support wmi_bmof cryptd mei_wdt glue_helper snd_seq_device snd_pcm pcspkr snd_timer snd i2c_i801 lpc_ich sg soundcore wmi mei_me mei ie31200_edac nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod sr_mod cdrom i915 video i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ahci crc32c_intel libahci serio_raw sfc libata mtd drm ixgbe mdio i2c_core e1000e dca CR2: 0000000000000000 ---[ end trace 1ab8b5b5d4639dfc ]--- RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff95145ea03c40 EFLAGS: 00010246 RAX: 0000000020000001 RBX: ffff9514499e5800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000002 RDI: 0000000000000000 RBP: ffff95145ea03e60 R08: 0000000000000000 R09: ffff95145ea03c9c R10: ffff95145ea03c78 R11: 0000000000000008 R12: ffff951456a69800 R13: ffff951456a69808 R14: 0000000000000001 R15: ffff95144965ee40 FS: 00007fd67ee11740(0000) GS:ffff95145ea00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000001038a2006 CR4: 00000000001606f0 Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x11400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-07-07 03:01:06 +08:00
action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
skb_dst_drop(skb);
break;
case TCA_TUNNEL_KEY_ACT_SET:
skb_dst_drop(skb);
skb_dst_set(skb, dst_clone(&params->tcft_enc_metadata->dst));
break;
default:
WARN_ONCE(1, "Bad tunnel_key action %d.\n",
params->tcft_action);
break;
}
return action;
}
static const struct nla_policy
enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
};
static const struct nla_policy
geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
.len = 128 },
};
static int
tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1];
int err, data_len, opt_len;
u8 *data;
err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX,
nla, geneve_opt_policy, extack);
if (err < 0)
return err;
if (!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS] ||
!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE] ||
!tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]) {
NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
return -EINVAL;
}
data = nla_data(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
data_len = nla_len(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA]);
if (data_len < 4) {
NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
return -ERANGE;
}
if (data_len % 4) {
NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
return -ERANGE;
}
opt_len = sizeof(struct geneve_opt) + data_len;
if (dst) {
struct geneve_opt *opt = dst;
WARN_ON(dst_len < opt_len);
opt->opt_class =
nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS]);
opt->type = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE]);
opt->length = data_len / 4; /* length is in units of 4 bytes */
opt->r1 = 0;
opt->r2 = 0;
opt->r3 = 0;
memcpy(opt + 1, data, data_len);
}
return opt_len;
}
static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
int dst_len, struct netlink_ext_ack *extack)
{
int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
const struct nlattr *attr, *head = nla_data(nla);
err = nla_validate(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
enc_opts_policy, extack);
if (err)
return err;
nla_for_each_attr(attr, head, len, rem) {
switch (nla_type(attr)) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
opt_len = tunnel_key_copy_geneve_opt(attr, dst,
dst_len, extack);
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
if (dst) {
dst_len -= opt_len;
dst += opt_len;
}
break;
}
}
if (!opts_len) {
NL_SET_ERR_MSG(extack, "Empty list of tunnel options");
return -EINVAL;
}
if (rem > 0) {
NL_SET_ERR_MSG(extack, "Trailing data after parsing tunnel key options attributes");
return -EINVAL;
}
return opts_len;
}
static int tunnel_key_get_opts_len(struct nlattr *nla,
struct netlink_ext_ack *extack)
{
return tunnel_key_copy_opts(nla, NULL, 0, extack);
}
static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
int opts_len, struct netlink_ext_ack *extack)
{
info->options_len = opts_len;
switch (nla_type(nla_data(nla))) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
#if IS_ENABLED(CONFIG_INET)
info->key.tun_flags |= TUNNEL_GENEVE_OPT;
return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
opts_len, extack);
#else
return -EAFNOSUPPORT;
#endif
default:
NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
return -EINVAL;
}
}
static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = {
[TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) },
[TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 },
[TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 },
[TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) },
[TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 },
[TCA_TUNNEL_KEY_ENC_DST_PORT] = {.type = NLA_U16},
[TCA_TUNNEL_KEY_NO_CSUM] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_OPTS] = { .type = NLA_NESTED },
[TCA_TUNNEL_KEY_ENC_TOS] = { .type = NLA_U8 },
[TCA_TUNNEL_KEY_ENC_TTL] = { .type = NLA_U8 },
};
net/sched: act_tunnel_key: fix memory leak in case of action replace running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-11 03:21:02 +08:00
static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
{
if (!p)
return;
net: sched: act_tunnel_key: fix metadata handling Tunnel key action params->tcft_enc_metadata is only set when action is TCA_TUNNEL_KEY_ACT_SET. However, metadata pointer is incorrectly dereferenced during tunnel key init and release without verifying that action is if correct type, which causes NULL pointer dereference. Metadata tunnel dst_cache is also leaked on action overwrite. Fix metadata handling: - Verify that metadata pointer is not NULL before dereferencing it in tunnel_key_init error handling code. - Move dst_cache destroy code into tunnel_key_release_params() function that is called in both action overwrite and release cases (fixes resource leak) and verifies that actions has correct type before dereferencing metadata pointer (fixes NULL pointer dereference). Oops with KASAN enabled during tdc tests execution: [ 261.080482] ================================================================== [ 261.088049] BUG: KASAN: null-ptr-deref in dst_cache_destroy+0x21/0xa0 [ 261.094613] Read of size 8 at addr 00000000000000b0 by task tc/2976 [ 261.102524] CPU: 14 PID: 2976 Comm: tc Not tainted 5.0.0-rc7+ #157 [ 261.108844] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 261.116726] Call Trace: [ 261.119234] dump_stack+0x9a/0xeb [ 261.122625] ? dst_cache_destroy+0x21/0xa0 [ 261.126818] ? dst_cache_destroy+0x21/0xa0 [ 261.131004] kasan_report+0x176/0x192 [ 261.134752] ? idr_get_next+0xd0/0x120 [ 261.138578] ? dst_cache_destroy+0x21/0xa0 [ 261.142768] dst_cache_destroy+0x21/0xa0 [ 261.146799] tunnel_key_release+0x3a/0x50 [act_tunnel_key] [ 261.152392] tcf_action_cleanup+0x2c/0xc0 [ 261.156490] tcf_generic_walker+0x4c2/0x5c0 [ 261.160794] ? tcf_action_dump_1+0x390/0x390 [ 261.165163] ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key] [ 261.170865] ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key] [ 261.176641] tca_action_gd+0x600/0xa40 [ 261.180482] ? tca_get_fill.constprop.17+0x200/0x200 [ 261.185548] ? __lock_acquire+0x588/0x1d20 [ 261.189741] ? __lock_acquire+0x588/0x1d20 [ 261.193922] ? mark_held_locks+0x90/0x90 [ 261.197944] ? mark_held_locks+0x90/0x90 [ 261.202018] ? __nla_parse+0xfe/0x190 [ 261.205774] tc_ctl_action+0x218/0x230 [ 261.209614] ? tcf_action_add+0x230/0x230 [ 261.213726] rtnetlink_rcv_msg+0x3a5/0x600 [ 261.217910] ? lock_downgrade+0x2d0/0x2d0 [ 261.222006] ? validate_linkmsg+0x400/0x400 [ 261.226278] ? find_held_lock+0x6d/0xd0 [ 261.230200] ? match_held_lock+0x1b/0x210 [ 261.234296] ? validate_linkmsg+0x400/0x400 [ 261.238567] netlink_rcv_skb+0xc7/0x1f0 [ 261.242489] ? netlink_ack+0x470/0x470 [ 261.246319] ? netlink_deliver_tap+0x1f3/0x5a0 [ 261.250874] netlink_unicast+0x2ae/0x350 [ 261.254884] ? netlink_attachskb+0x340/0x340 [ 261.261647] ? _copy_from_iter_full+0xdd/0x380 [ 261.268576] ? __virt_addr_valid+0xb6/0xf0 [ 261.275227] ? __check_object_size+0x159/0x240 [ 261.282184] netlink_sendmsg+0x4d3/0x630 [ 261.288572] ? netlink_unicast+0x350/0x350 [ 261.295132] ? netlink_unicast+0x350/0x350 [ 261.301608] sock_sendmsg+0x6d/0x80 [ 261.307467] ___sys_sendmsg+0x48e/0x540 [ 261.313633] ? copy_msghdr_from_user+0x210/0x210 [ 261.320545] ? save_stack+0x89/0xb0 [ 261.326289] ? __lock_acquire+0x588/0x1d20 [ 261.332605] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.340063] ? mark_held_locks+0x90/0x90 [ 261.346162] ? do_filp_open+0x138/0x1d0 [ 261.352108] ? may_open_dev+0x50/0x50 [ 261.357897] ? match_held_lock+0x1b/0x210 [ 261.364016] ? __fget_light+0xa6/0xe0 [ 261.369840] ? __sys_sendmsg+0xd2/0x150 [ 261.375814] __sys_sendmsg+0xd2/0x150 [ 261.381610] ? __ia32_sys_shutdown+0x30/0x30 [ 261.388026] ? lock_downgrade+0x2d0/0x2d0 [ 261.394182] ? mark_held_locks+0x1c/0x90 [ 261.400230] ? do_syscall_64+0x1e/0x280 [ 261.406172] do_syscall_64+0x78/0x280 [ 261.411932] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.419103] RIP: 0033:0x7f28e91a8b87 [ 261.424791] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 261.448226] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 261.458183] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87 [ 261.467728] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003 [ 261.477342] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c [ 261.486970] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 261.496599] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480 [ 261.506281] ================================================================== [ 261.516076] Disabling lock debugging due to kernel taint [ 261.523979] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b0 [ 261.534413] #PF error: [normal kernel read fault] [ 261.541730] PGD 8000000317400067 P4D 8000000317400067 PUD 316878067 PMD 0 [ 261.551294] Oops: 0000 [#1] SMP KASAN PTI [ 261.557985] CPU: 14 PID: 2976 Comm: tc Tainted: G B 5.0.0-rc7+ #157 [ 261.568306] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 261.578874] RIP: 0010:dst_cache_destroy+0x21/0xa0 [ 261.586413] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40 [ 261.611247] RSP: 0018:ffff888316447160 EFLAGS: 00010282 [ 261.619564] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071 [ 261.629862] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297 [ 261.640149] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89 [ 261.650467] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0 [ 261.660785] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001 [ 261.671110] FS: 00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000 [ 261.682447] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 261.691491] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0 [ 261.701283] Call Trace: [ 261.706374] tunnel_key_release+0x3a/0x50 [act_tunnel_key] [ 261.714522] tcf_action_cleanup+0x2c/0xc0 [ 261.721208] tcf_generic_walker+0x4c2/0x5c0 [ 261.728074] ? tcf_action_dump_1+0x390/0x390 [ 261.734996] ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key] [ 261.743247] ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key] [ 261.751557] tca_action_gd+0x600/0xa40 [ 261.757991] ? tca_get_fill.constprop.17+0x200/0x200 [ 261.765644] ? __lock_acquire+0x588/0x1d20 [ 261.772461] ? __lock_acquire+0x588/0x1d20 [ 261.779266] ? mark_held_locks+0x90/0x90 [ 261.785880] ? mark_held_locks+0x90/0x90 [ 261.792470] ? __nla_parse+0xfe/0x190 [ 261.798738] tc_ctl_action+0x218/0x230 [ 261.805145] ? tcf_action_add+0x230/0x230 [ 261.811760] rtnetlink_rcv_msg+0x3a5/0x600 [ 261.818564] ? lock_downgrade+0x2d0/0x2d0 [ 261.825433] ? validate_linkmsg+0x400/0x400 [ 261.832256] ? find_held_lock+0x6d/0xd0 [ 261.838624] ? match_held_lock+0x1b/0x210 [ 261.845142] ? validate_linkmsg+0x400/0x400 [ 261.851729] netlink_rcv_skb+0xc7/0x1f0 [ 261.857976] ? netlink_ack+0x470/0x470 [ 261.864132] ? netlink_deliver_tap+0x1f3/0x5a0 [ 261.870969] netlink_unicast+0x2ae/0x350 [ 261.877294] ? netlink_attachskb+0x340/0x340 [ 261.883962] ? _copy_from_iter_full+0xdd/0x380 [ 261.890750] ? __virt_addr_valid+0xb6/0xf0 [ 261.897188] ? __check_object_size+0x159/0x240 [ 261.903928] netlink_sendmsg+0x4d3/0x630 [ 261.910112] ? netlink_unicast+0x350/0x350 [ 261.916410] ? netlink_unicast+0x350/0x350 [ 261.922656] sock_sendmsg+0x6d/0x80 [ 261.928257] ___sys_sendmsg+0x48e/0x540 [ 261.934183] ? copy_msghdr_from_user+0x210/0x210 [ 261.940865] ? save_stack+0x89/0xb0 [ 261.946355] ? __lock_acquire+0x588/0x1d20 [ 261.952358] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.959468] ? mark_held_locks+0x90/0x90 [ 261.965248] ? do_filp_open+0x138/0x1d0 [ 261.970910] ? may_open_dev+0x50/0x50 [ 261.976386] ? match_held_lock+0x1b/0x210 [ 261.982210] ? __fget_light+0xa6/0xe0 [ 261.987648] ? __sys_sendmsg+0xd2/0x150 [ 261.993263] __sys_sendmsg+0xd2/0x150 [ 261.998613] ? __ia32_sys_shutdown+0x30/0x30 [ 262.004555] ? lock_downgrade+0x2d0/0x2d0 [ 262.010236] ? mark_held_locks+0x1c/0x90 [ 262.015758] ? do_syscall_64+0x1e/0x280 [ 262.021234] do_syscall_64+0x78/0x280 [ 262.026500] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 262.033207] RIP: 0033:0x7f28e91a8b87 [ 262.038421] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 262.060708] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 262.070112] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87 [ 262.079087] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003 [ 262.088122] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c [ 262.097157] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 262.106207] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480 [ 262.115271] Modules linked in: act_tunnel_key act_skbmod act_simple act_connmark nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 act_csum libcrc32c act_meta_skbtcindex act_meta_skbprio act_meta_mark act_ife ife act_police act_sample psample act_gact veth nfsv3 nfs_acl nfs lockd grace fscache bridge stp llc intel_rapl sb_edac mlx5_ib x86_pkg_temp_thermal sunrpc intel_powerclamp coretemp ib_uverbs kvm_intel ib_core kvm irqbypass mlx5_core crct10dif_pclmul crc32_pclmul crc32c_intel igb ghash_clmulni_intel intel_cstate mlxfw iTCO_wdt devlink intel_uncore iTCO_vendor_support ipmi_ssif ptp mei_me intel_rapl_perf ioatdma joydev pps_core ses mei i2c_i801 pcspkr enclosure lpc_ich dca wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter pcc_cpufreq ast i2c_algo_bit drm_kms_helper ttm drm mpt3sas raid_class scsi_transport_sas [ 262.204393] CR2: 00000000000000b0 [ 262.210390] ---[ end trace 2e41d786f2c7901a ]--- [ 262.226790] RIP: 0010:dst_cache_destroy+0x21/0xa0 [ 262.234083] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40 [ 262.258311] RSP: 0018:ffff888316447160 EFLAGS: 00010282 [ 262.266304] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071 [ 262.276251] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297 [ 262.286208] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89 [ 262.296183] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0 [ 262.306157] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001 [ 262.316139] FS: 00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000 [ 262.327146] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 262.335815] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0 Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Reviewed-by: Roi Dayan <roid@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-25 23:30:14 +08:00
if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
#ifdef CONFIG_DST_CACHE
struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
dst_cache_destroy(&info->dst_cache);
#endif
net/sched: act_tunnel_key: fix memory leak in case of action replace running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-11 03:21:02 +08:00
dst_release(&p->tcft_enc_metadata->dst);
net: sched: act_tunnel_key: fix metadata handling Tunnel key action params->tcft_enc_metadata is only set when action is TCA_TUNNEL_KEY_ACT_SET. However, metadata pointer is incorrectly dereferenced during tunnel key init and release without verifying that action is if correct type, which causes NULL pointer dereference. Metadata tunnel dst_cache is also leaked on action overwrite. Fix metadata handling: - Verify that metadata pointer is not NULL before dereferencing it in tunnel_key_init error handling code. - Move dst_cache destroy code into tunnel_key_release_params() function that is called in both action overwrite and release cases (fixes resource leak) and verifies that actions has correct type before dereferencing metadata pointer (fixes NULL pointer dereference). Oops with KASAN enabled during tdc tests execution: [ 261.080482] ================================================================== [ 261.088049] BUG: KASAN: null-ptr-deref in dst_cache_destroy+0x21/0xa0 [ 261.094613] Read of size 8 at addr 00000000000000b0 by task tc/2976 [ 261.102524] CPU: 14 PID: 2976 Comm: tc Not tainted 5.0.0-rc7+ #157 [ 261.108844] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 261.116726] Call Trace: [ 261.119234] dump_stack+0x9a/0xeb [ 261.122625] ? dst_cache_destroy+0x21/0xa0 [ 261.126818] ? dst_cache_destroy+0x21/0xa0 [ 261.131004] kasan_report+0x176/0x192 [ 261.134752] ? idr_get_next+0xd0/0x120 [ 261.138578] ? dst_cache_destroy+0x21/0xa0 [ 261.142768] dst_cache_destroy+0x21/0xa0 [ 261.146799] tunnel_key_release+0x3a/0x50 [act_tunnel_key] [ 261.152392] tcf_action_cleanup+0x2c/0xc0 [ 261.156490] tcf_generic_walker+0x4c2/0x5c0 [ 261.160794] ? tcf_action_dump_1+0x390/0x390 [ 261.165163] ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key] [ 261.170865] ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key] [ 261.176641] tca_action_gd+0x600/0xa40 [ 261.180482] ? tca_get_fill.constprop.17+0x200/0x200 [ 261.185548] ? __lock_acquire+0x588/0x1d20 [ 261.189741] ? __lock_acquire+0x588/0x1d20 [ 261.193922] ? mark_held_locks+0x90/0x90 [ 261.197944] ? mark_held_locks+0x90/0x90 [ 261.202018] ? __nla_parse+0xfe/0x190 [ 261.205774] tc_ctl_action+0x218/0x230 [ 261.209614] ? tcf_action_add+0x230/0x230 [ 261.213726] rtnetlink_rcv_msg+0x3a5/0x600 [ 261.217910] ? lock_downgrade+0x2d0/0x2d0 [ 261.222006] ? validate_linkmsg+0x400/0x400 [ 261.226278] ? find_held_lock+0x6d/0xd0 [ 261.230200] ? match_held_lock+0x1b/0x210 [ 261.234296] ? validate_linkmsg+0x400/0x400 [ 261.238567] netlink_rcv_skb+0xc7/0x1f0 [ 261.242489] ? netlink_ack+0x470/0x470 [ 261.246319] ? netlink_deliver_tap+0x1f3/0x5a0 [ 261.250874] netlink_unicast+0x2ae/0x350 [ 261.254884] ? netlink_attachskb+0x340/0x340 [ 261.261647] ? _copy_from_iter_full+0xdd/0x380 [ 261.268576] ? __virt_addr_valid+0xb6/0xf0 [ 261.275227] ? __check_object_size+0x159/0x240 [ 261.282184] netlink_sendmsg+0x4d3/0x630 [ 261.288572] ? netlink_unicast+0x350/0x350 [ 261.295132] ? netlink_unicast+0x350/0x350 [ 261.301608] sock_sendmsg+0x6d/0x80 [ 261.307467] ___sys_sendmsg+0x48e/0x540 [ 261.313633] ? copy_msghdr_from_user+0x210/0x210 [ 261.320545] ? save_stack+0x89/0xb0 [ 261.326289] ? __lock_acquire+0x588/0x1d20 [ 261.332605] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.340063] ? mark_held_locks+0x90/0x90 [ 261.346162] ? do_filp_open+0x138/0x1d0 [ 261.352108] ? may_open_dev+0x50/0x50 [ 261.357897] ? match_held_lock+0x1b/0x210 [ 261.364016] ? __fget_light+0xa6/0xe0 [ 261.369840] ? __sys_sendmsg+0xd2/0x150 [ 261.375814] __sys_sendmsg+0xd2/0x150 [ 261.381610] ? __ia32_sys_shutdown+0x30/0x30 [ 261.388026] ? lock_downgrade+0x2d0/0x2d0 [ 261.394182] ? mark_held_locks+0x1c/0x90 [ 261.400230] ? do_syscall_64+0x1e/0x280 [ 261.406172] do_syscall_64+0x78/0x280 [ 261.411932] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.419103] RIP: 0033:0x7f28e91a8b87 [ 261.424791] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 261.448226] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 261.458183] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87 [ 261.467728] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003 [ 261.477342] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c [ 261.486970] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 261.496599] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480 [ 261.506281] ================================================================== [ 261.516076] Disabling lock debugging due to kernel taint [ 261.523979] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b0 [ 261.534413] #PF error: [normal kernel read fault] [ 261.541730] PGD 8000000317400067 P4D 8000000317400067 PUD 316878067 PMD 0 [ 261.551294] Oops: 0000 [#1] SMP KASAN PTI [ 261.557985] CPU: 14 PID: 2976 Comm: tc Tainted: G B 5.0.0-rc7+ #157 [ 261.568306] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 261.578874] RIP: 0010:dst_cache_destroy+0x21/0xa0 [ 261.586413] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40 [ 261.611247] RSP: 0018:ffff888316447160 EFLAGS: 00010282 [ 261.619564] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071 [ 261.629862] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297 [ 261.640149] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89 [ 261.650467] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0 [ 261.660785] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001 [ 261.671110] FS: 00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000 [ 261.682447] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 261.691491] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0 [ 261.701283] Call Trace: [ 261.706374] tunnel_key_release+0x3a/0x50 [act_tunnel_key] [ 261.714522] tcf_action_cleanup+0x2c/0xc0 [ 261.721208] tcf_generic_walker+0x4c2/0x5c0 [ 261.728074] ? tcf_action_dump_1+0x390/0x390 [ 261.734996] ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key] [ 261.743247] ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key] [ 261.751557] tca_action_gd+0x600/0xa40 [ 261.757991] ? tca_get_fill.constprop.17+0x200/0x200 [ 261.765644] ? __lock_acquire+0x588/0x1d20 [ 261.772461] ? __lock_acquire+0x588/0x1d20 [ 261.779266] ? mark_held_locks+0x90/0x90 [ 261.785880] ? mark_held_locks+0x90/0x90 [ 261.792470] ? __nla_parse+0xfe/0x190 [ 261.798738] tc_ctl_action+0x218/0x230 [ 261.805145] ? tcf_action_add+0x230/0x230 [ 261.811760] rtnetlink_rcv_msg+0x3a5/0x600 [ 261.818564] ? lock_downgrade+0x2d0/0x2d0 [ 261.825433] ? validate_linkmsg+0x400/0x400 [ 261.832256] ? find_held_lock+0x6d/0xd0 [ 261.838624] ? match_held_lock+0x1b/0x210 [ 261.845142] ? validate_linkmsg+0x400/0x400 [ 261.851729] netlink_rcv_skb+0xc7/0x1f0 [ 261.857976] ? netlink_ack+0x470/0x470 [ 261.864132] ? netlink_deliver_tap+0x1f3/0x5a0 [ 261.870969] netlink_unicast+0x2ae/0x350 [ 261.877294] ? netlink_attachskb+0x340/0x340 [ 261.883962] ? _copy_from_iter_full+0xdd/0x380 [ 261.890750] ? __virt_addr_valid+0xb6/0xf0 [ 261.897188] ? __check_object_size+0x159/0x240 [ 261.903928] netlink_sendmsg+0x4d3/0x630 [ 261.910112] ? netlink_unicast+0x350/0x350 [ 261.916410] ? netlink_unicast+0x350/0x350 [ 261.922656] sock_sendmsg+0x6d/0x80 [ 261.928257] ___sys_sendmsg+0x48e/0x540 [ 261.934183] ? copy_msghdr_from_user+0x210/0x210 [ 261.940865] ? save_stack+0x89/0xb0 [ 261.946355] ? __lock_acquire+0x588/0x1d20 [ 261.952358] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.959468] ? mark_held_locks+0x90/0x90 [ 261.965248] ? do_filp_open+0x138/0x1d0 [ 261.970910] ? may_open_dev+0x50/0x50 [ 261.976386] ? match_held_lock+0x1b/0x210 [ 261.982210] ? __fget_light+0xa6/0xe0 [ 261.987648] ? __sys_sendmsg+0xd2/0x150 [ 261.993263] __sys_sendmsg+0xd2/0x150 [ 261.998613] ? __ia32_sys_shutdown+0x30/0x30 [ 262.004555] ? lock_downgrade+0x2d0/0x2d0 [ 262.010236] ? mark_held_locks+0x1c/0x90 [ 262.015758] ? do_syscall_64+0x1e/0x280 [ 262.021234] do_syscall_64+0x78/0x280 [ 262.026500] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 262.033207] RIP: 0033:0x7f28e91a8b87 [ 262.038421] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 262.060708] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 262.070112] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87 [ 262.079087] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003 [ 262.088122] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c [ 262.097157] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 262.106207] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480 [ 262.115271] Modules linked in: act_tunnel_key act_skbmod act_simple act_connmark nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 act_csum libcrc32c act_meta_skbtcindex act_meta_skbprio act_meta_mark act_ife ife act_police act_sample psample act_gact veth nfsv3 nfs_acl nfs lockd grace fscache bridge stp llc intel_rapl sb_edac mlx5_ib x86_pkg_temp_thermal sunrpc intel_powerclamp coretemp ib_uverbs kvm_intel ib_core kvm irqbypass mlx5_core crct10dif_pclmul crc32_pclmul crc32c_intel igb ghash_clmulni_intel intel_cstate mlxfw iTCO_wdt devlink intel_uncore iTCO_vendor_support ipmi_ssif ptp mei_me intel_rapl_perf ioatdma joydev pps_core ses mei i2c_i801 pcspkr enclosure lpc_ich dca wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter pcc_cpufreq ast i2c_algo_bit drm_kms_helper ttm drm mpt3sas raid_class scsi_transport_sas [ 262.204393] CR2: 00000000000000b0 [ 262.210390] ---[ end trace 2e41d786f2c7901a ]--- [ 262.226790] RIP: 0010:dst_cache_destroy+0x21/0xa0 [ 262.234083] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40 [ 262.258311] RSP: 0018:ffff888316447160 EFLAGS: 00010282 [ 262.266304] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071 [ 262.276251] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297 [ 262.286208] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89 [ 262.296183] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0 [ 262.306157] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001 [ 262.316139] FS: 00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000 [ 262.327146] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 262.335815] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0 Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Reviewed-by: Roi Dayan <roid@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-25 23:30:14 +08:00
}
net/sched: act_tunnel_key: fix memory leak in case of action replace running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-11 03:21:02 +08:00
kfree_rcu(p, rcu);
}
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
__be16 dst_port = 0;
__be64 key_id = 0;
int opts_len = 0;
__be16 flags = 0;
u8 tos, ttl;
int ret = 0;
int err;
if (!nla) {
NL_SET_ERR_MSG(extack, "Tunnel requires attributes to be passed");
return -EINVAL;
}
err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy,
extack);
if (err < 0) {
NL_SET_ERR_MSG(extack, "Failed to parse nested tunnel key attributes");
return err;
}
if (!tb[TCA_TUNNEL_KEY_PARMS]) {
NL_SET_ERR_MSG(extack, "Missing tunnel key parameters");
return -EINVAL;
}
parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]);
err = tcf_idr_check_alloc(tn, &parm->index, a, bind);
if (err < 0)
return err;
exists = err;
if (exists && bind)
return 0;
switch (parm->t_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
break;
case TCA_TUNNEL_KEY_ACT_SET:
if (tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) {
__be32 key32;
key32 = nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID]);
key_id = key32_to_tunnel_id(key32);
flags = TUNNEL_KEY;
}
flags |= TUNNEL_CSUM;
if (tb[TCA_TUNNEL_KEY_NO_CSUM] &&
nla_get_u8(tb[TCA_TUNNEL_KEY_NO_CSUM]))
flags &= ~TUNNEL_CSUM;
if (tb[TCA_TUNNEL_KEY_ENC_DST_PORT])
dst_port = nla_get_be16(tb[TCA_TUNNEL_KEY_ENC_DST_PORT]);
if (tb[TCA_TUNNEL_KEY_ENC_OPTS]) {
opts_len = tunnel_key_get_opts_len(tb[TCA_TUNNEL_KEY_ENC_OPTS],
extack);
if (opts_len < 0) {
ret = opts_len;
goto err_out;
}
}
tos = 0;
if (tb[TCA_TUNNEL_KEY_ENC_TOS])
tos = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TOS]);
ttl = 0;
if (tb[TCA_TUNNEL_KEY_ENC_TTL])
ttl = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_TTL]);
if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) {
__be32 saddr;
__be32 daddr;
saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]);
daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]);
metadata = __ip_tun_set_dst(saddr, daddr, tos, ttl,
dst_port, flags,
key_id, opts_len);
} else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] &&
tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) {
struct in6_addr saddr;
struct in6_addr daddr;
saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]);
daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]);
metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
0, flags,
key_id, 0);
} else {
NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
ret = -EINVAL;
goto err_out;
}
if (!metadata) {
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel metadata dst");
ret = -ENOMEM;
goto err_out;
}
#ifdef CONFIG_DST_CACHE
ret = dst_cache_init(&metadata->u.tun_info.dst_cache, GFP_KERNEL);
if (ret)
goto release_tun_meta;
#endif
if (opts_len) {
ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
&metadata->u.tun_info,
opts_len, extack);
if (ret < 0)
goto release_dst_cache;
}
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX;
break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel key action");
net sched actions: return explicit error when tunnel_key mode is not specified If set/unset mode of the tunnel_key action is not provided, ->init() still returns 0, and the caller proceeds with bogus 'struct tc_action *' object, this results in crash: % tc actions add action tunnel_key src_ip 1.1.1.1 dst_ip 2.2.2.1 id 7 index 1 [ 35.805515] general protection fault: 0000 [#1] SMP PTI [ 35.806161] Modules linked in: act_tunnel_key kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel aes_x86_64 crypto_simd glue_helper cryptd serio_raw [ 35.808233] CPU: 1 PID: 428 Comm: tc Not tainted 4.16.0-rc4+ #286 [ 35.808929] RIP: 0010:tcf_action_init+0x90/0x190 [ 35.809457] RSP: 0018:ffffb8edc068b9a0 EFLAGS: 00010206 [ 35.810053] RAX: 1320c000000a0003 RBX: 0000000000000001 RCX: 0000000000000000 [ 35.810866] RDX: 0000000000000070 RSI: 0000000000007965 RDI: ffffb8edc068b910 [ 35.811660] RBP: ffffb8edc068b9d0 R08: 0000000000000000 R09: ffffb8edc068b808 [ 35.812463] R10: ffffffffc02bf040 R11: 0000000000000040 R12: ffffb8edc068bb38 [ 35.813235] R13: 0000000000000000 R14: 0000000000000000 R15: ffffb8edc068b910 [ 35.814006] FS: 00007f3d0d8556c0(0000) GS:ffff91d1dbc40000(0000) knlGS:0000000000000000 [ 35.814881] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 35.815540] CR2: 000000000043f720 CR3: 0000000019248001 CR4: 00000000001606a0 [ 35.816457] Call Trace: [ 35.817158] tc_ctl_action+0x11a/0x220 [ 35.817795] rtnetlink_rcv_msg+0x23d/0x2e0 [ 35.818457] ? __slab_alloc+0x1c/0x30 [ 35.819079] ? __kmalloc_node_track_caller+0xb1/0x2b0 [ 35.819544] ? rtnl_calcit.isra.30+0xe0/0xe0 [ 35.820231] netlink_rcv_skb+0xce/0x100 [ 35.820744] netlink_unicast+0x164/0x220 [ 35.821500] netlink_sendmsg+0x293/0x370 [ 35.822040] sock_sendmsg+0x30/0x40 [ 35.822508] ___sys_sendmsg+0x2c5/0x2e0 [ 35.823149] ? pagecache_get_page+0x27/0x220 [ 35.823714] ? filemap_fault+0xa2/0x640 [ 35.824423] ? page_add_file_rmap+0x108/0x200 [ 35.825065] ? alloc_set_pte+0x2aa/0x530 [ 35.825585] ? finish_fault+0x4e/0x70 [ 35.826140] ? __handle_mm_fault+0xbc1/0x10d0 [ 35.826723] ? __sys_sendmsg+0x41/0x70 [ 35.827230] __sys_sendmsg+0x41/0x70 [ 35.827710] do_syscall_64+0x68/0x120 [ 35.828195] entry_SYSCALL_64_after_hwframe+0x3d/0xa2 [ 35.828859] RIP: 0033:0x7f3d0ca4da67 [ 35.829331] RSP: 002b:00007ffc9f284338 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 35.830304] RAX: ffffffffffffffda RBX: 00007ffc9f284460 RCX: 00007f3d0ca4da67 [ 35.831247] RDX: 0000000000000000 RSI: 00007ffc9f2843b0 RDI: 0000000000000003 [ 35.832167] RBP: 000000005aa6a7a9 R08: 0000000000000001 R09: 0000000000000000 [ 35.833075] R10: 00000000000005f1 R11: 0000000000000246 R12: 0000000000000000 [ 35.833997] R13: 00007ffc9f2884c0 R14: 0000000000000001 R15: 0000000000674640 [ 35.834923] Code: 24 30 bb 01 00 00 00 45 31 f6 eb 5e 8b 50 08 83 c2 07 83 e2 fc 83 c2 70 49 8b 07 48 8b 40 70 48 85 c0 74 10 48 89 14 24 4c 89 ff <ff> d0 48 8b 14 24 48 01 c2 49 01 d6 45 85 ed 74 05 41 83 47 2c [ 35.837442] RIP: tcf_action_init+0x90/0x190 RSP: ffffb8edc068b9a0 [ 35.838291] ---[ end trace a095c06ee4b97a26 ]--- Fixes: d0f6dd8a914f ("net/sched: Introduce act_tunnel_key") Signed-off-by: Roman Mashak <mrv@mojatatu.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-03-13 04:20:58 +08:00
ret = -EINVAL;
goto err_out;
}
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a,
&act_tunnel_key_ops, bind, true);
if (ret) {
NL_SET_ERR_MSG(extack, "Cannot create TC IDR");
goto release_dst_cache;
}
ret = ACT_P_CREATED;
} else if (!ovr) {
NL_SET_ERR_MSG(extack, "TC IDR already exists");
net/sched: fix memory leak in act_tunnel_key_init() If users try to install act_tunnel_key 'set' rules with duplicate values of 'index', the tunnel metadata are allocated, but never released. Then, kmemleak complains as follows: # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 # echo clear > /sys/kernel/debug/kmemleak # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 Error: TC IDR already exists. We have an error talking to the kernel # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800574e6c80 (size 256): comm "tc", pid 5617, jiffies 4298118009 (age 57.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 1c e8 b0 ff ff ff ff ................ 81 24 c2 ad ff ff ff ff 00 00 00 00 00 00 00 00 .$.............. backtrace: [<00000000b7afbf4e>] tunnel_key_init+0x8a5/0x1800 [act_tunnel_key] [<000000007d98fccd>] tcf_action_init_1+0x698/0xac0 [<0000000099b8f7cc>] tcf_action_init+0x15c/0x590 [<00000000dc60eebe>] tc_ctl_action+0x336/0x5c2 [<000000002f5a2f7d>] rtnetlink_rcv_msg+0x357/0x8e0 [<000000000bfe7575>] netlink_rcv_skb+0x124/0x350 [<00000000edab656f>] netlink_unicast+0x40f/0x5d0 [<00000000b322cdcb>] netlink_sendmsg+0x6e8/0xba0 [<0000000063d9d490>] sock_sendmsg+0xb3/0xf0 [<00000000f0d3315a>] ___sys_sendmsg+0x654/0x960 [<00000000c06cbd42>] __sys_sendmsg+0xd3/0x170 [<00000000ce72e4b0>] do_syscall_64+0xa5/0x470 [<000000005caa2d97>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000fac1b476>] 0xffffffffffffffff This problem theoretically happens also in case users attempt to setup a geneve rule having wrong configuration data, or when the kernel fails to allocate 'params_new'. Ensure that tunnel_key_init() releases the tunnel metadata also in the above conditions. Addresses-Coverity-ID: 1373974 ("Resource leak") Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Fixes: 0ed5269f9e41f ("net/sched: add tunnel option support to act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-09-05 01:00:19 +08:00
ret = -EEXIST;
goto release_dst_cache;
}
t = to_tunnel_key(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
net/sched: fix memory leak in act_tunnel_key_init() If users try to install act_tunnel_key 'set' rules with duplicate values of 'index', the tunnel metadata are allocated, but never released. Then, kmemleak complains as follows: # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 # echo clear > /sys/kernel/debug/kmemleak # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 Error: TC IDR already exists. We have an error talking to the kernel # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800574e6c80 (size 256): comm "tc", pid 5617, jiffies 4298118009 (age 57.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 1c e8 b0 ff ff ff ff ................ 81 24 c2 ad ff ff ff ff 00 00 00 00 00 00 00 00 .$.............. backtrace: [<00000000b7afbf4e>] tunnel_key_init+0x8a5/0x1800 [act_tunnel_key] [<000000007d98fccd>] tcf_action_init_1+0x698/0xac0 [<0000000099b8f7cc>] tcf_action_init+0x15c/0x590 [<00000000dc60eebe>] tc_ctl_action+0x336/0x5c2 [<000000002f5a2f7d>] rtnetlink_rcv_msg+0x357/0x8e0 [<000000000bfe7575>] netlink_rcv_skb+0x124/0x350 [<00000000edab656f>] netlink_unicast+0x40f/0x5d0 [<00000000b322cdcb>] netlink_sendmsg+0x6e8/0xba0 [<0000000063d9d490>] sock_sendmsg+0xb3/0xf0 [<00000000f0d3315a>] ___sys_sendmsg+0x654/0x960 [<00000000c06cbd42>] __sys_sendmsg+0xd3/0x170 [<00000000ce72e4b0>] do_syscall_64+0xa5/0x470 [<000000005caa2d97>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000fac1b476>] 0xffffffffffffffff This problem theoretically happens also in case users attempt to setup a geneve rule having wrong configuration data, or when the kernel fails to allocate 'params_new'. Ensure that tunnel_key_init() releases the tunnel metadata also in the above conditions. Addresses-Coverity-ID: 1373974 ("Resource leak") Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Fixes: 0ed5269f9e41f ("net/sched: add tunnel option support to act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-09-05 01:00:19 +08:00
ret = -ENOMEM;
exists = true;
goto release_dst_cache;
}
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
net: sched: always disable bh when taking tcf_lock Recently, ops->init() and ops->dump() of all actions were modified to always obtain tcf_lock when accessing private action state. Actions that don't depend on tcf_lock for synchronization with their data path use non-bh locking API. However, tcf_lock is also used to protect rate estimator stats in softirq context by timer callback. Change ops->init() and ops->dump() of all actions to disable bh when using tcf_lock to prevent deadlock reported by following lockdep warning: [ 105.470398] ================================ [ 105.475014] WARNING: inconsistent lock state [ 105.479628] 4.18.0-rc8+ #664 Not tainted [ 105.483897] -------------------------------- [ 105.488511] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 105.494871] swapper/16/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 105.500449] 00000000f86c012e (&(&p->tcfa_lock)->rlock){+.?.}, at: est_fetch_counters+0x3c/0xa0 [ 105.509696] {SOFTIRQ-ON-W} state was registered at: [ 105.514925] _raw_spin_lock+0x2c/0x40 [ 105.519022] tcf_bpf_init+0x579/0x820 [act_bpf] [ 105.523990] tcf_action_init_1+0x4e4/0x660 [ 105.528518] tcf_action_init+0x1ce/0x2d0 [ 105.532880] tcf_exts_validate+0x1d8/0x200 [ 105.537416] fl_change+0x55a/0x268b [cls_flower] [ 105.542469] tc_new_tfilter+0x748/0xa20 [ 105.546738] rtnetlink_rcv_msg+0x56a/0x6d0 [ 105.551268] netlink_rcv_skb+0x18d/0x200 [ 105.555628] netlink_unicast+0x2d0/0x370 [ 105.559990] netlink_sendmsg+0x3b9/0x6a0 [ 105.564349] sock_sendmsg+0x6b/0x80 [ 105.568271] ___sys_sendmsg+0x4a1/0x520 [ 105.572547] __sys_sendmsg+0xd7/0x150 [ 105.576655] do_syscall_64+0x72/0x2c0 [ 105.580757] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 105.586243] irq event stamp: 489296 [ 105.590084] hardirqs last enabled at (489296): [<ffffffffb507e639>] _raw_spin_unlock_irq+0x29/0x40 [ 105.599765] hardirqs last disabled at (489295): [<ffffffffb507e745>] _raw_spin_lock_irq+0x15/0x50 [ 105.609277] softirqs last enabled at (489292): [<ffffffffb413a6a3>] irq_enter+0x83/0xa0 [ 105.618001] softirqs last disabled at (489293): [<ffffffffb413a800>] irq_exit+0x140/0x190 [ 105.626813] other info that might help us debug this: [ 105.633976] Possible unsafe locking scenario: [ 105.640526] CPU0 [ 105.643325] ---- [ 105.646125] lock(&(&p->tcfa_lock)->rlock); [ 105.650747] <Interrupt> [ 105.653717] lock(&(&p->tcfa_lock)->rlock); [ 105.658514] *** DEADLOCK *** [ 105.665349] 1 lock held by swapper/16/0: [ 105.669629] #0: 00000000a640ad99 ((&est->timer)){+.-.}, at: call_timer_fn+0x10b/0x550 [ 105.678200] stack backtrace: [ 105.683194] CPU: 16 PID: 0 Comm: swapper/16 Not tainted 4.18.0-rc8+ #664 [ 105.690249] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 105.698626] Call Trace: [ 105.701421] <IRQ> [ 105.703791] dump_stack+0x92/0xeb [ 105.707461] print_usage_bug+0x336/0x34c [ 105.711744] mark_lock+0x7c9/0x980 [ 105.715500] ? print_shortest_lock_dependencies+0x2e0/0x2e0 [ 105.721424] ? check_usage_forwards+0x230/0x230 [ 105.726315] __lock_acquire+0x923/0x26f0 [ 105.730597] ? debug_show_all_locks+0x240/0x240 [ 105.735478] ? mark_lock+0x493/0x980 [ 105.739412] ? check_chain_key+0x140/0x1f0 [ 105.743861] ? __lock_acquire+0x836/0x26f0 [ 105.748323] ? lock_acquire+0x12e/0x290 [ 105.752516] lock_acquire+0x12e/0x290 [ 105.756539] ? est_fetch_counters+0x3c/0xa0 [ 105.761084] _raw_spin_lock+0x2c/0x40 [ 105.765099] ? est_fetch_counters+0x3c/0xa0 [ 105.769633] est_fetch_counters+0x3c/0xa0 [ 105.773995] est_timer+0x87/0x390 [ 105.777670] ? est_fetch_counters+0xa0/0xa0 [ 105.782210] ? lock_acquire+0x12e/0x290 [ 105.786410] call_timer_fn+0x161/0x550 [ 105.790512] ? est_fetch_counters+0xa0/0xa0 [ 105.795055] ? del_timer_sync+0xd0/0xd0 [ 105.799249] ? __lock_is_held+0x93/0x110 [ 105.803531] ? mark_held_locks+0x20/0xe0 [ 105.807813] ? _raw_spin_unlock_irq+0x29/0x40 [ 105.812525] ? est_fetch_counters+0xa0/0xa0 [ 105.817069] ? est_fetch_counters+0xa0/0xa0 [ 105.821610] run_timer_softirq+0x3c4/0x9f0 [ 105.826064] ? lock_acquire+0x12e/0x290 [ 105.830257] ? __bpf_trace_timer_class+0x10/0x10 [ 105.835237] ? __lock_is_held+0x25/0x110 [ 105.839517] __do_softirq+0x11d/0x7bf [ 105.843542] irq_exit+0x140/0x190 [ 105.847208] smp_apic_timer_interrupt+0xac/0x3b0 [ 105.852182] apic_timer_interrupt+0xf/0x20 [ 105.856628] </IRQ> [ 105.859081] RIP: 0010:cpuidle_enter_state+0xd8/0x4d0 [ 105.864395] Code: 46 ff 48 89 44 24 08 0f 1f 44 00 00 31 ff e8 cf ec 46 ff 80 7c 24 07 00 0f 85 1d 02 00 00 e8 9f 90 4b ff fb 66 0f 1f 44 00 00 <4c> 8b 6c 24 08 4d 29 fd 0f 80 36 03 00 00 4c 89 e8 48 ba cf f7 53 [ 105.884288] RSP: 0018:ffff8803ad94fd20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 105.892494] RAX: 0000000000000000 RBX: ffffe8fb300829c0 RCX: ffffffffb41e19e1 [ 105.899988] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8803ad9358ac [ 105.907503] RBP: ffffffffb6636300 R08: 0000000000000004 R09: 0000000000000000 [ 105.914997] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000004 [ 105.922487] R13: ffffffffb6636140 R14: ffffffffb66362d8 R15: 000000188d36091b [ 105.929988] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.935232] do_idle+0x28e/0x320 [ 105.938817] ? arch_cpu_idle_exit+0x40/0x40 [ 105.943361] ? mark_lock+0x8c1/0x980 [ 105.947295] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.952619] cpu_startup_entry+0xc2/0xd0 [ 105.956900] ? cpu_in_idle+0x20/0x20 [ 105.960830] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.966146] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.971391] start_secondary+0x2b5/0x360 [ 105.975669] ? set_cpu_sibling_map+0x1330/0x1330 [ 105.980654] secondary_startup_64+0xa5/0xb0 Taking tcf_lock in sample action with bh disabled causes lockdep to issue a warning regarding possible irq lock inversion dependency between tcf_lock, and psample_groups_lock that is taken when holding tcf_lock in sample init: [ 162.108959] Possible interrupt unsafe locking scenario: [ 162.116386] CPU0 CPU1 [ 162.121277] ---- ---- [ 162.126162] lock(psample_groups_lock); [ 162.130447] local_irq_disable(); [ 162.136772] lock(&(&p->tcfa_lock)->rlock); [ 162.143957] lock(psample_groups_lock); [ 162.150813] <Interrupt> [ 162.153808] lock(&(&p->tcfa_lock)->rlock); [ 162.158608] *** DEADLOCK *** In order to prevent potential lock inversion dependency between tcf_lock and psample_groups_lock, extract call to psample_group_get() from tcf_lock protected section in sample action init function. Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Fixes: 764e9a24480f ("net: sched: act_vlan: remove dependency on rtnl lock") Fixes: 729e01260989 ("net: sched: act_tunnel_key: remove dependency on rtnl lock") Fixes: d77284956656 ("net: sched: act_sample: remove dependency on rtnl lock") Fixes: e8917f437006 ("net: sched: act_gact: remove dependency on rtnl lock") Fixes: b6a2b971c0b0 ("net: sched: act_csum: remove dependency on rtnl lock") Fixes: 2142236b4584 ("net: sched: act_bpf: remove dependency on rtnl lock") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-15 02:46:16 +08:00
spin_lock_bh(&t->tcf_lock);
t->tcf_action = parm->action;
rcu_swap_protected(t->params, params_new,
lockdep_is_held(&t->tcf_lock));
net: sched: always disable bh when taking tcf_lock Recently, ops->init() and ops->dump() of all actions were modified to always obtain tcf_lock when accessing private action state. Actions that don't depend on tcf_lock for synchronization with their data path use non-bh locking API. However, tcf_lock is also used to protect rate estimator stats in softirq context by timer callback. Change ops->init() and ops->dump() of all actions to disable bh when using tcf_lock to prevent deadlock reported by following lockdep warning: [ 105.470398] ================================ [ 105.475014] WARNING: inconsistent lock state [ 105.479628] 4.18.0-rc8+ #664 Not tainted [ 105.483897] -------------------------------- [ 105.488511] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 105.494871] swapper/16/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 105.500449] 00000000f86c012e (&(&p->tcfa_lock)->rlock){+.?.}, at: est_fetch_counters+0x3c/0xa0 [ 105.509696] {SOFTIRQ-ON-W} state was registered at: [ 105.514925] _raw_spin_lock+0x2c/0x40 [ 105.519022] tcf_bpf_init+0x579/0x820 [act_bpf] [ 105.523990] tcf_action_init_1+0x4e4/0x660 [ 105.528518] tcf_action_init+0x1ce/0x2d0 [ 105.532880] tcf_exts_validate+0x1d8/0x200 [ 105.537416] fl_change+0x55a/0x268b [cls_flower] [ 105.542469] tc_new_tfilter+0x748/0xa20 [ 105.546738] rtnetlink_rcv_msg+0x56a/0x6d0 [ 105.551268] netlink_rcv_skb+0x18d/0x200 [ 105.555628] netlink_unicast+0x2d0/0x370 [ 105.559990] netlink_sendmsg+0x3b9/0x6a0 [ 105.564349] sock_sendmsg+0x6b/0x80 [ 105.568271] ___sys_sendmsg+0x4a1/0x520 [ 105.572547] __sys_sendmsg+0xd7/0x150 [ 105.576655] do_syscall_64+0x72/0x2c0 [ 105.580757] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 105.586243] irq event stamp: 489296 [ 105.590084] hardirqs last enabled at (489296): [<ffffffffb507e639>] _raw_spin_unlock_irq+0x29/0x40 [ 105.599765] hardirqs last disabled at (489295): [<ffffffffb507e745>] _raw_spin_lock_irq+0x15/0x50 [ 105.609277] softirqs last enabled at (489292): [<ffffffffb413a6a3>] irq_enter+0x83/0xa0 [ 105.618001] softirqs last disabled at (489293): [<ffffffffb413a800>] irq_exit+0x140/0x190 [ 105.626813] other info that might help us debug this: [ 105.633976] Possible unsafe locking scenario: [ 105.640526] CPU0 [ 105.643325] ---- [ 105.646125] lock(&(&p->tcfa_lock)->rlock); [ 105.650747] <Interrupt> [ 105.653717] lock(&(&p->tcfa_lock)->rlock); [ 105.658514] *** DEADLOCK *** [ 105.665349] 1 lock held by swapper/16/0: [ 105.669629] #0: 00000000a640ad99 ((&est->timer)){+.-.}, at: call_timer_fn+0x10b/0x550 [ 105.678200] stack backtrace: [ 105.683194] CPU: 16 PID: 0 Comm: swapper/16 Not tainted 4.18.0-rc8+ #664 [ 105.690249] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 105.698626] Call Trace: [ 105.701421] <IRQ> [ 105.703791] dump_stack+0x92/0xeb [ 105.707461] print_usage_bug+0x336/0x34c [ 105.711744] mark_lock+0x7c9/0x980 [ 105.715500] ? print_shortest_lock_dependencies+0x2e0/0x2e0 [ 105.721424] ? check_usage_forwards+0x230/0x230 [ 105.726315] __lock_acquire+0x923/0x26f0 [ 105.730597] ? debug_show_all_locks+0x240/0x240 [ 105.735478] ? mark_lock+0x493/0x980 [ 105.739412] ? check_chain_key+0x140/0x1f0 [ 105.743861] ? __lock_acquire+0x836/0x26f0 [ 105.748323] ? lock_acquire+0x12e/0x290 [ 105.752516] lock_acquire+0x12e/0x290 [ 105.756539] ? est_fetch_counters+0x3c/0xa0 [ 105.761084] _raw_spin_lock+0x2c/0x40 [ 105.765099] ? est_fetch_counters+0x3c/0xa0 [ 105.769633] est_fetch_counters+0x3c/0xa0 [ 105.773995] est_timer+0x87/0x390 [ 105.777670] ? est_fetch_counters+0xa0/0xa0 [ 105.782210] ? lock_acquire+0x12e/0x290 [ 105.786410] call_timer_fn+0x161/0x550 [ 105.790512] ? est_fetch_counters+0xa0/0xa0 [ 105.795055] ? del_timer_sync+0xd0/0xd0 [ 105.799249] ? __lock_is_held+0x93/0x110 [ 105.803531] ? mark_held_locks+0x20/0xe0 [ 105.807813] ? _raw_spin_unlock_irq+0x29/0x40 [ 105.812525] ? est_fetch_counters+0xa0/0xa0 [ 105.817069] ? est_fetch_counters+0xa0/0xa0 [ 105.821610] run_timer_softirq+0x3c4/0x9f0 [ 105.826064] ? lock_acquire+0x12e/0x290 [ 105.830257] ? __bpf_trace_timer_class+0x10/0x10 [ 105.835237] ? __lock_is_held+0x25/0x110 [ 105.839517] __do_softirq+0x11d/0x7bf [ 105.843542] irq_exit+0x140/0x190 [ 105.847208] smp_apic_timer_interrupt+0xac/0x3b0 [ 105.852182] apic_timer_interrupt+0xf/0x20 [ 105.856628] </IRQ> [ 105.859081] RIP: 0010:cpuidle_enter_state+0xd8/0x4d0 [ 105.864395] Code: 46 ff 48 89 44 24 08 0f 1f 44 00 00 31 ff e8 cf ec 46 ff 80 7c 24 07 00 0f 85 1d 02 00 00 e8 9f 90 4b ff fb 66 0f 1f 44 00 00 <4c> 8b 6c 24 08 4d 29 fd 0f 80 36 03 00 00 4c 89 e8 48 ba cf f7 53 [ 105.884288] RSP: 0018:ffff8803ad94fd20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 105.892494] RAX: 0000000000000000 RBX: ffffe8fb300829c0 RCX: ffffffffb41e19e1 [ 105.899988] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8803ad9358ac [ 105.907503] RBP: ffffffffb6636300 R08: 0000000000000004 R09: 0000000000000000 [ 105.914997] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000004 [ 105.922487] R13: ffffffffb6636140 R14: ffffffffb66362d8 R15: 000000188d36091b [ 105.929988] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.935232] do_idle+0x28e/0x320 [ 105.938817] ? arch_cpu_idle_exit+0x40/0x40 [ 105.943361] ? mark_lock+0x8c1/0x980 [ 105.947295] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.952619] cpu_startup_entry+0xc2/0xd0 [ 105.956900] ? cpu_in_idle+0x20/0x20 [ 105.960830] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.966146] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.971391] start_secondary+0x2b5/0x360 [ 105.975669] ? set_cpu_sibling_map+0x1330/0x1330 [ 105.980654] secondary_startup_64+0xa5/0xb0 Taking tcf_lock in sample action with bh disabled causes lockdep to issue a warning regarding possible irq lock inversion dependency between tcf_lock, and psample_groups_lock that is taken when holding tcf_lock in sample init: [ 162.108959] Possible interrupt unsafe locking scenario: [ 162.116386] CPU0 CPU1 [ 162.121277] ---- ---- [ 162.126162] lock(psample_groups_lock); [ 162.130447] local_irq_disable(); [ 162.136772] lock(&(&p->tcfa_lock)->rlock); [ 162.143957] lock(psample_groups_lock); [ 162.150813] <Interrupt> [ 162.153808] lock(&(&p->tcfa_lock)->rlock); [ 162.158608] *** DEADLOCK *** In order to prevent potential lock inversion dependency between tcf_lock and psample_groups_lock, extract call to psample_group_get() from tcf_lock protected section in sample action init function. Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Fixes: 764e9a24480f ("net: sched: act_vlan: remove dependency on rtnl lock") Fixes: 729e01260989 ("net: sched: act_tunnel_key: remove dependency on rtnl lock") Fixes: d77284956656 ("net: sched: act_sample: remove dependency on rtnl lock") Fixes: e8917f437006 ("net: sched: act_gact: remove dependency on rtnl lock") Fixes: b6a2b971c0b0 ("net: sched: act_csum: remove dependency on rtnl lock") Fixes: 2142236b4584 ("net: sched: act_bpf: remove dependency on rtnl lock") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-15 02:46:16 +08:00
spin_unlock_bh(&t->tcf_lock);
net/sched: act_tunnel_key: fix memory leak in case of action replace running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-11 03:21:02 +08:00
tunnel_key_release_params(params_new);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
release_dst_cache:
#ifdef CONFIG_DST_CACHE
net: sched: act_tunnel_key: fix metadata handling Tunnel key action params->tcft_enc_metadata is only set when action is TCA_TUNNEL_KEY_ACT_SET. However, metadata pointer is incorrectly dereferenced during tunnel key init and release without verifying that action is if correct type, which causes NULL pointer dereference. Metadata tunnel dst_cache is also leaked on action overwrite. Fix metadata handling: - Verify that metadata pointer is not NULL before dereferencing it in tunnel_key_init error handling code. - Move dst_cache destroy code into tunnel_key_release_params() function that is called in both action overwrite and release cases (fixes resource leak) and verifies that actions has correct type before dereferencing metadata pointer (fixes NULL pointer dereference). Oops with KASAN enabled during tdc tests execution: [ 261.080482] ================================================================== [ 261.088049] BUG: KASAN: null-ptr-deref in dst_cache_destroy+0x21/0xa0 [ 261.094613] Read of size 8 at addr 00000000000000b0 by task tc/2976 [ 261.102524] CPU: 14 PID: 2976 Comm: tc Not tainted 5.0.0-rc7+ #157 [ 261.108844] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 261.116726] Call Trace: [ 261.119234] dump_stack+0x9a/0xeb [ 261.122625] ? dst_cache_destroy+0x21/0xa0 [ 261.126818] ? dst_cache_destroy+0x21/0xa0 [ 261.131004] kasan_report+0x176/0x192 [ 261.134752] ? idr_get_next+0xd0/0x120 [ 261.138578] ? dst_cache_destroy+0x21/0xa0 [ 261.142768] dst_cache_destroy+0x21/0xa0 [ 261.146799] tunnel_key_release+0x3a/0x50 [act_tunnel_key] [ 261.152392] tcf_action_cleanup+0x2c/0xc0 [ 261.156490] tcf_generic_walker+0x4c2/0x5c0 [ 261.160794] ? tcf_action_dump_1+0x390/0x390 [ 261.165163] ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key] [ 261.170865] ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key] [ 261.176641] tca_action_gd+0x600/0xa40 [ 261.180482] ? tca_get_fill.constprop.17+0x200/0x200 [ 261.185548] ? __lock_acquire+0x588/0x1d20 [ 261.189741] ? __lock_acquire+0x588/0x1d20 [ 261.193922] ? mark_held_locks+0x90/0x90 [ 261.197944] ? mark_held_locks+0x90/0x90 [ 261.202018] ? __nla_parse+0xfe/0x190 [ 261.205774] tc_ctl_action+0x218/0x230 [ 261.209614] ? tcf_action_add+0x230/0x230 [ 261.213726] rtnetlink_rcv_msg+0x3a5/0x600 [ 261.217910] ? lock_downgrade+0x2d0/0x2d0 [ 261.222006] ? validate_linkmsg+0x400/0x400 [ 261.226278] ? find_held_lock+0x6d/0xd0 [ 261.230200] ? match_held_lock+0x1b/0x210 [ 261.234296] ? validate_linkmsg+0x400/0x400 [ 261.238567] netlink_rcv_skb+0xc7/0x1f0 [ 261.242489] ? netlink_ack+0x470/0x470 [ 261.246319] ? netlink_deliver_tap+0x1f3/0x5a0 [ 261.250874] netlink_unicast+0x2ae/0x350 [ 261.254884] ? netlink_attachskb+0x340/0x340 [ 261.261647] ? _copy_from_iter_full+0xdd/0x380 [ 261.268576] ? __virt_addr_valid+0xb6/0xf0 [ 261.275227] ? __check_object_size+0x159/0x240 [ 261.282184] netlink_sendmsg+0x4d3/0x630 [ 261.288572] ? netlink_unicast+0x350/0x350 [ 261.295132] ? netlink_unicast+0x350/0x350 [ 261.301608] sock_sendmsg+0x6d/0x80 [ 261.307467] ___sys_sendmsg+0x48e/0x540 [ 261.313633] ? copy_msghdr_from_user+0x210/0x210 [ 261.320545] ? save_stack+0x89/0xb0 [ 261.326289] ? __lock_acquire+0x588/0x1d20 [ 261.332605] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.340063] ? mark_held_locks+0x90/0x90 [ 261.346162] ? do_filp_open+0x138/0x1d0 [ 261.352108] ? may_open_dev+0x50/0x50 [ 261.357897] ? match_held_lock+0x1b/0x210 [ 261.364016] ? __fget_light+0xa6/0xe0 [ 261.369840] ? __sys_sendmsg+0xd2/0x150 [ 261.375814] __sys_sendmsg+0xd2/0x150 [ 261.381610] ? __ia32_sys_shutdown+0x30/0x30 [ 261.388026] ? lock_downgrade+0x2d0/0x2d0 [ 261.394182] ? mark_held_locks+0x1c/0x90 [ 261.400230] ? do_syscall_64+0x1e/0x280 [ 261.406172] do_syscall_64+0x78/0x280 [ 261.411932] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.419103] RIP: 0033:0x7f28e91a8b87 [ 261.424791] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 261.448226] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 261.458183] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87 [ 261.467728] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003 [ 261.477342] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c [ 261.486970] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 261.496599] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480 [ 261.506281] ================================================================== [ 261.516076] Disabling lock debugging due to kernel taint [ 261.523979] BUG: unable to handle kernel NULL pointer dereference at 00000000000000b0 [ 261.534413] #PF error: [normal kernel read fault] [ 261.541730] PGD 8000000317400067 P4D 8000000317400067 PUD 316878067 PMD 0 [ 261.551294] Oops: 0000 [#1] SMP KASAN PTI [ 261.557985] CPU: 14 PID: 2976 Comm: tc Tainted: G B 5.0.0-rc7+ #157 [ 261.568306] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 261.578874] RIP: 0010:dst_cache_destroy+0x21/0xa0 [ 261.586413] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40 [ 261.611247] RSP: 0018:ffff888316447160 EFLAGS: 00010282 [ 261.619564] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071 [ 261.629862] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297 [ 261.640149] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89 [ 261.650467] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0 [ 261.660785] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001 [ 261.671110] FS: 00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000 [ 261.682447] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 261.691491] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0 [ 261.701283] Call Trace: [ 261.706374] tunnel_key_release+0x3a/0x50 [act_tunnel_key] [ 261.714522] tcf_action_cleanup+0x2c/0xc0 [ 261.721208] tcf_generic_walker+0x4c2/0x5c0 [ 261.728074] ? tcf_action_dump_1+0x390/0x390 [ 261.734996] ? tunnel_key_walker+0x5/0x1a0 [act_tunnel_key] [ 261.743247] ? tunnel_key_walker+0xe9/0x1a0 [act_tunnel_key] [ 261.751557] tca_action_gd+0x600/0xa40 [ 261.757991] ? tca_get_fill.constprop.17+0x200/0x200 [ 261.765644] ? __lock_acquire+0x588/0x1d20 [ 261.772461] ? __lock_acquire+0x588/0x1d20 [ 261.779266] ? mark_held_locks+0x90/0x90 [ 261.785880] ? mark_held_locks+0x90/0x90 [ 261.792470] ? __nla_parse+0xfe/0x190 [ 261.798738] tc_ctl_action+0x218/0x230 [ 261.805145] ? tcf_action_add+0x230/0x230 [ 261.811760] rtnetlink_rcv_msg+0x3a5/0x600 [ 261.818564] ? lock_downgrade+0x2d0/0x2d0 [ 261.825433] ? validate_linkmsg+0x400/0x400 [ 261.832256] ? find_held_lock+0x6d/0xd0 [ 261.838624] ? match_held_lock+0x1b/0x210 [ 261.845142] ? validate_linkmsg+0x400/0x400 [ 261.851729] netlink_rcv_skb+0xc7/0x1f0 [ 261.857976] ? netlink_ack+0x470/0x470 [ 261.864132] ? netlink_deliver_tap+0x1f3/0x5a0 [ 261.870969] netlink_unicast+0x2ae/0x350 [ 261.877294] ? netlink_attachskb+0x340/0x340 [ 261.883962] ? _copy_from_iter_full+0xdd/0x380 [ 261.890750] ? __virt_addr_valid+0xb6/0xf0 [ 261.897188] ? __check_object_size+0x159/0x240 [ 261.903928] netlink_sendmsg+0x4d3/0x630 [ 261.910112] ? netlink_unicast+0x350/0x350 [ 261.916410] ? netlink_unicast+0x350/0x350 [ 261.922656] sock_sendmsg+0x6d/0x80 [ 261.928257] ___sys_sendmsg+0x48e/0x540 [ 261.934183] ? copy_msghdr_from_user+0x210/0x210 [ 261.940865] ? save_stack+0x89/0xb0 [ 261.946355] ? __lock_acquire+0x588/0x1d20 [ 261.952358] ? entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 261.959468] ? mark_held_locks+0x90/0x90 [ 261.965248] ? do_filp_open+0x138/0x1d0 [ 261.970910] ? may_open_dev+0x50/0x50 [ 261.976386] ? match_held_lock+0x1b/0x210 [ 261.982210] ? __fget_light+0xa6/0xe0 [ 261.987648] ? __sys_sendmsg+0xd2/0x150 [ 261.993263] __sys_sendmsg+0xd2/0x150 [ 261.998613] ? __ia32_sys_shutdown+0x30/0x30 [ 262.004555] ? lock_downgrade+0x2d0/0x2d0 [ 262.010236] ? mark_held_locks+0x1c/0x90 [ 262.015758] ? do_syscall_64+0x1e/0x280 [ 262.021234] do_syscall_64+0x78/0x280 [ 262.026500] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 262.033207] RIP: 0033:0x7f28e91a8b87 [ 262.038421] Code: 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 80 00 00 00 00 8b 05 6a 2b 2c 00 48 63 d2 48 63 ff 85 c0 75 18 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 59 f3 c3 0f 1f 80 00 00 00 00 53 48 89 f3 48 [ 262.060708] RSP: 002b:00007ffdc5c4e2d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e [ 262.070112] RAX: ffffffffffffffda RBX: 000000005c73c202 RCX: 00007f28e91a8b87 [ 262.079087] RDX: 0000000000000000 RSI: 00007ffdc5c4e340 RDI: 0000000000000003 [ 262.088122] RBP: 0000000000000000 R08: 0000000000000001 R09: 000000000000000c [ 262.097157] R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 [ 262.106207] R13: 000000000067b4e0 R14: 00007ffdc5c5248c R15: 00007ffdc5c52480 [ 262.115271] Modules linked in: act_tunnel_key act_skbmod act_simple act_connmark nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 act_csum libcrc32c act_meta_skbtcindex act_meta_skbprio act_meta_mark act_ife ife act_police act_sample psample act_gact veth nfsv3 nfs_acl nfs lockd grace fscache bridge stp llc intel_rapl sb_edac mlx5_ib x86_pkg_temp_thermal sunrpc intel_powerclamp coretemp ib_uverbs kvm_intel ib_core kvm irqbypass mlx5_core crct10dif_pclmul crc32_pclmul crc32c_intel igb ghash_clmulni_intel intel_cstate mlxfw iTCO_wdt devlink intel_uncore iTCO_vendor_support ipmi_ssif ptp mei_me intel_rapl_perf ioatdma joydev pps_core ses mei i2c_i801 pcspkr enclosure lpc_ich dca wmi ipmi_si ipmi_devintf ipmi_msghandler acpi_pad acpi_power_meter pcc_cpufreq ast i2c_algo_bit drm_kms_helper ttm drm mpt3sas raid_class scsi_transport_sas [ 262.204393] CR2: 00000000000000b0 [ 262.210390] ---[ end trace 2e41d786f2c7901a ]--- [ 262.226790] RIP: 0010:dst_cache_destroy+0x21/0xa0 [ 262.234083] Code: f4 ff ff ff eb f6 0f 1f 00 0f 1f 44 00 00 41 56 41 55 49 c7 c6 60 fe 35 af 41 54 55 49 89 fc 53 bd ff ff ff ff e8 ef 98 73 ff <49> 83 3c 24 00 75 35 eb 6c 4c 63 ed e8 de 98 73 ff 4a 8d 3c ed 40 [ 262.258311] RSP: 0018:ffff888316447160 EFLAGS: 00010282 [ 262.266304] RAX: 0000000000000000 RBX: ffff88835b3e2f00 RCX: ffffffffad1c5071 [ 262.276251] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: 0000000000000297 [ 262.286208] RBP: 00000000ffffffff R08: fffffbfff5dd4e89 R09: fffffbfff5dd4e89 [ 262.296183] R10: 0000000000000001 R11: fffffbfff5dd4e88 R12: 00000000000000b0 [ 262.306157] R13: ffff8883267a10c0 R14: ffffffffaf35fe60 R15: 0000000000000001 [ 262.316139] FS: 00007f28ea3e6400(0000) GS:ffff888364200000(0000) knlGS:0000000000000000 [ 262.327146] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 262.335815] CR2: 00000000000000b0 CR3: 00000003178ae004 CR4: 00000000001606e0 Fixes: 41411e2fd6b8 ("net/sched: act_tunnel_key: Add dst_cache support") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Reviewed-by: Roi Dayan <roid@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-02-25 23:30:14 +08:00
if (metadata)
dst_cache_destroy(&metadata->u.tun_info.dst_cache);
net/sched: fix memory leak in act_tunnel_key_init() If users try to install act_tunnel_key 'set' rules with duplicate values of 'index', the tunnel metadata are allocated, but never released. Then, kmemleak complains as follows: # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 # echo clear > /sys/kernel/debug/kmemleak # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 Error: TC IDR already exists. We have an error talking to the kernel # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800574e6c80 (size 256): comm "tc", pid 5617, jiffies 4298118009 (age 57.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 1c e8 b0 ff ff ff ff ................ 81 24 c2 ad ff ff ff ff 00 00 00 00 00 00 00 00 .$.............. backtrace: [<00000000b7afbf4e>] tunnel_key_init+0x8a5/0x1800 [act_tunnel_key] [<000000007d98fccd>] tcf_action_init_1+0x698/0xac0 [<0000000099b8f7cc>] tcf_action_init+0x15c/0x590 [<00000000dc60eebe>] tc_ctl_action+0x336/0x5c2 [<000000002f5a2f7d>] rtnetlink_rcv_msg+0x357/0x8e0 [<000000000bfe7575>] netlink_rcv_skb+0x124/0x350 [<00000000edab656f>] netlink_unicast+0x40f/0x5d0 [<00000000b322cdcb>] netlink_sendmsg+0x6e8/0xba0 [<0000000063d9d490>] sock_sendmsg+0xb3/0xf0 [<00000000f0d3315a>] ___sys_sendmsg+0x654/0x960 [<00000000c06cbd42>] __sys_sendmsg+0xd3/0x170 [<00000000ce72e4b0>] do_syscall_64+0xa5/0x470 [<000000005caa2d97>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000fac1b476>] 0xffffffffffffffff This problem theoretically happens also in case users attempt to setup a geneve rule having wrong configuration data, or when the kernel fails to allocate 'params_new'. Ensure that tunnel_key_init() releases the tunnel metadata also in the above conditions. Addresses-Coverity-ID: 1373974 ("Resource leak") Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Fixes: 0ed5269f9e41f ("net/sched: add tunnel option support to act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-09-05 01:00:19 +08:00
release_tun_meta:
#endif
if (metadata)
dst_release(&metadata->dst);
net/sched: fix memory leak in act_tunnel_key_init() If users try to install act_tunnel_key 'set' rules with duplicate values of 'index', the tunnel metadata are allocated, but never released. Then, kmemleak complains as follows: # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 # echo clear > /sys/kernel/debug/kmemleak # tc a a a tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 index 111 Error: TC IDR already exists. We have an error talking to the kernel # echo scan > /sys/kernel/debug/kmemleak # cat /sys/kernel/debug/kmemleak unreferenced object 0xffff8800574e6c80 (size 256): comm "tc", pid 5617, jiffies 4298118009 (age 57.990s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 1c e8 b0 ff ff ff ff ................ 81 24 c2 ad ff ff ff ff 00 00 00 00 00 00 00 00 .$.............. backtrace: [<00000000b7afbf4e>] tunnel_key_init+0x8a5/0x1800 [act_tunnel_key] [<000000007d98fccd>] tcf_action_init_1+0x698/0xac0 [<0000000099b8f7cc>] tcf_action_init+0x15c/0x590 [<00000000dc60eebe>] tc_ctl_action+0x336/0x5c2 [<000000002f5a2f7d>] rtnetlink_rcv_msg+0x357/0x8e0 [<000000000bfe7575>] netlink_rcv_skb+0x124/0x350 [<00000000edab656f>] netlink_unicast+0x40f/0x5d0 [<00000000b322cdcb>] netlink_sendmsg+0x6e8/0xba0 [<0000000063d9d490>] sock_sendmsg+0xb3/0xf0 [<00000000f0d3315a>] ___sys_sendmsg+0x654/0x960 [<00000000c06cbd42>] __sys_sendmsg+0xd3/0x170 [<00000000ce72e4b0>] do_syscall_64+0xa5/0x470 [<000000005caa2d97>] entry_SYSCALL_64_after_hwframe+0x49/0xbe [<00000000fac1b476>] 0xffffffffffffffff This problem theoretically happens also in case users attempt to setup a geneve rule having wrong configuration data, or when the kernel fails to allocate 'params_new'. Ensure that tunnel_key_init() releases the tunnel metadata also in the above conditions. Addresses-Coverity-ID: 1373974 ("Resource leak") Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Fixes: 0ed5269f9e41f ("net/sched: add tunnel option support to act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-09-05 01:00:19 +08:00
err_out:
if (exists)
tcf_idr_release(*a, bind);
else
tcf_idr_cleanup(tn, parm->index);
return ret;
}
static void tunnel_key_release(struct tc_action *a)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params, 1);
net/sched: act_tunnel_key: fix memory leak in case of action replace running the following TDC test cases: 7afc - Replace tunnel_key set action with all parameters 364d - Replace tunnel_key set action with all parameters and cookie it's possible to trigger kmemleak warnings like: unreferenced object 0xffff94797127ab40 (size 192): comm "tc", pid 3248, jiffies 4300565293 (age 1006.862s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 c0 93 f9 8a ff ff ff ff ................ 41 84 ee 89 ff ff ff ff 00 00 00 00 00 00 00 00 A............... backtrace: [<000000001e85b61c>] tunnel_key_init+0x31d/0x820 [act_tunnel_key] [<000000007f3f6ee7>] tcf_action_init_1+0x384/0x4c0 [<00000000e89e3ded>] tcf_action_init+0x12b/0x1a0 [<00000000c1c8c0f8>] tcf_action_add+0x73/0x170 [<0000000095a9fc28>] tc_ctl_action+0x122/0x160 [<000000004bebeac5>] rtnetlink_rcv_msg+0x263/0x2d0 [<000000009fd862dd>] netlink_rcv_skb+0x4a/0x110 [<00000000b55199e7>] netlink_unicast+0x1a0/0x250 [<000000004996cd21>] netlink_sendmsg+0x2c1/0x3c0 [<000000004d6a94b4>] sock_sendmsg+0x36/0x40 [<000000005d9f0208>] ___sys_sendmsg+0x280/0x2f0 [<00000000dec19023>] __sys_sendmsg+0x5e/0xa0 [<000000004b82ac81>] do_syscall_64+0x5b/0x180 [<00000000a0f1209a>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<000000002926b2ab>] 0xffffffffffffffff when the tunnel_key action is replaced, the kernel forgets to release the dst metadata: ensure they are released by tunnel_key_init(), the same way it's done in tunnel_key_release(). Fixes: d0f6dd8a914f4 ("net/sched: Introduce act_tunnel_key") Signed-off-by: Davide Caratti <dcaratti@redhat.com> Acked-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2019-01-11 03:21:02 +08:00
tunnel_key_release_params(params);
}
static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
int len = info->options_len;
u8 *src = (u8 *)(info + 1);
struct nlattr *start;
start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
if (!start)
return -EMSGSIZE;
while (len > 0) {
struct geneve_opt *opt = (struct geneve_opt *)src;
if (nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_TYPE,
opt->type) ||
nla_put(skb, TCA_TUNNEL_KEY_ENC_OPT_GENEVE_DATA,
opt->length * 4, opt + 1)) {
nla_nest_cancel(skb, start);
return -EMSGSIZE;
}
len -= sizeof(struct geneve_opt) + opt->length * 4;
src += sizeof(struct geneve_opt) + opt->length * 4;
}
nla_nest_end(skb, start);
return 0;
}
static int tunnel_key_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
struct nlattr *start;
int err = -EINVAL;
if (!info->options_len)
return 0;
start = nla_nest_start(skb, TCA_TUNNEL_KEY_ENC_OPTS);
if (!start)
return -EMSGSIZE;
if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
err = tunnel_key_geneve_opts_dump(skb, info);
if (err)
goto err_out;
} else {
err_out:
nla_nest_cancel(skb, start);
return err;
}
nla_nest_end(skb, start);
return 0;
}
static int tunnel_key_dump_addresses(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
unsigned short family = ip_tunnel_info_af(info);
if (family == AF_INET) {
__be32 saddr = info->key.u.ipv4.src;
__be32 daddr = info->key.u.ipv4.dst;
if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) &&
!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr))
return 0;
}
if (family == AF_INET6) {
const struct in6_addr *saddr6 = &info->key.u.ipv6.src;
const struct in6_addr *daddr6 = &info->key.u.ipv6.dst;
if (!nla_put_in6_addr(skb,
TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) &&
!nla_put_in6_addr(skb,
TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6))
return 0;
}
return -EINVAL;
}
static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
struct tc_tunnel_key opt = {
.index = t->tcf_index,
.refcnt = refcount_read(&t->tcf_refcnt) - ref,
.bindcnt = atomic_read(&t->tcf_bindcnt) - bind,
};
struct tcf_t tm;
net: sched: always disable bh when taking tcf_lock Recently, ops->init() and ops->dump() of all actions were modified to always obtain tcf_lock when accessing private action state. Actions that don't depend on tcf_lock for synchronization with their data path use non-bh locking API. However, tcf_lock is also used to protect rate estimator stats in softirq context by timer callback. Change ops->init() and ops->dump() of all actions to disable bh when using tcf_lock to prevent deadlock reported by following lockdep warning: [ 105.470398] ================================ [ 105.475014] WARNING: inconsistent lock state [ 105.479628] 4.18.0-rc8+ #664 Not tainted [ 105.483897] -------------------------------- [ 105.488511] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 105.494871] swapper/16/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 105.500449] 00000000f86c012e (&(&p->tcfa_lock)->rlock){+.?.}, at: est_fetch_counters+0x3c/0xa0 [ 105.509696] {SOFTIRQ-ON-W} state was registered at: [ 105.514925] _raw_spin_lock+0x2c/0x40 [ 105.519022] tcf_bpf_init+0x579/0x820 [act_bpf] [ 105.523990] tcf_action_init_1+0x4e4/0x660 [ 105.528518] tcf_action_init+0x1ce/0x2d0 [ 105.532880] tcf_exts_validate+0x1d8/0x200 [ 105.537416] fl_change+0x55a/0x268b [cls_flower] [ 105.542469] tc_new_tfilter+0x748/0xa20 [ 105.546738] rtnetlink_rcv_msg+0x56a/0x6d0 [ 105.551268] netlink_rcv_skb+0x18d/0x200 [ 105.555628] netlink_unicast+0x2d0/0x370 [ 105.559990] netlink_sendmsg+0x3b9/0x6a0 [ 105.564349] sock_sendmsg+0x6b/0x80 [ 105.568271] ___sys_sendmsg+0x4a1/0x520 [ 105.572547] __sys_sendmsg+0xd7/0x150 [ 105.576655] do_syscall_64+0x72/0x2c0 [ 105.580757] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 105.586243] irq event stamp: 489296 [ 105.590084] hardirqs last enabled at (489296): [<ffffffffb507e639>] _raw_spin_unlock_irq+0x29/0x40 [ 105.599765] hardirqs last disabled at (489295): [<ffffffffb507e745>] _raw_spin_lock_irq+0x15/0x50 [ 105.609277] softirqs last enabled at (489292): [<ffffffffb413a6a3>] irq_enter+0x83/0xa0 [ 105.618001] softirqs last disabled at (489293): [<ffffffffb413a800>] irq_exit+0x140/0x190 [ 105.626813] other info that might help us debug this: [ 105.633976] Possible unsafe locking scenario: [ 105.640526] CPU0 [ 105.643325] ---- [ 105.646125] lock(&(&p->tcfa_lock)->rlock); [ 105.650747] <Interrupt> [ 105.653717] lock(&(&p->tcfa_lock)->rlock); [ 105.658514] *** DEADLOCK *** [ 105.665349] 1 lock held by swapper/16/0: [ 105.669629] #0: 00000000a640ad99 ((&est->timer)){+.-.}, at: call_timer_fn+0x10b/0x550 [ 105.678200] stack backtrace: [ 105.683194] CPU: 16 PID: 0 Comm: swapper/16 Not tainted 4.18.0-rc8+ #664 [ 105.690249] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 105.698626] Call Trace: [ 105.701421] <IRQ> [ 105.703791] dump_stack+0x92/0xeb [ 105.707461] print_usage_bug+0x336/0x34c [ 105.711744] mark_lock+0x7c9/0x980 [ 105.715500] ? print_shortest_lock_dependencies+0x2e0/0x2e0 [ 105.721424] ? check_usage_forwards+0x230/0x230 [ 105.726315] __lock_acquire+0x923/0x26f0 [ 105.730597] ? debug_show_all_locks+0x240/0x240 [ 105.735478] ? mark_lock+0x493/0x980 [ 105.739412] ? check_chain_key+0x140/0x1f0 [ 105.743861] ? __lock_acquire+0x836/0x26f0 [ 105.748323] ? lock_acquire+0x12e/0x290 [ 105.752516] lock_acquire+0x12e/0x290 [ 105.756539] ? est_fetch_counters+0x3c/0xa0 [ 105.761084] _raw_spin_lock+0x2c/0x40 [ 105.765099] ? est_fetch_counters+0x3c/0xa0 [ 105.769633] est_fetch_counters+0x3c/0xa0 [ 105.773995] est_timer+0x87/0x390 [ 105.777670] ? est_fetch_counters+0xa0/0xa0 [ 105.782210] ? lock_acquire+0x12e/0x290 [ 105.786410] call_timer_fn+0x161/0x550 [ 105.790512] ? est_fetch_counters+0xa0/0xa0 [ 105.795055] ? del_timer_sync+0xd0/0xd0 [ 105.799249] ? __lock_is_held+0x93/0x110 [ 105.803531] ? mark_held_locks+0x20/0xe0 [ 105.807813] ? _raw_spin_unlock_irq+0x29/0x40 [ 105.812525] ? est_fetch_counters+0xa0/0xa0 [ 105.817069] ? est_fetch_counters+0xa0/0xa0 [ 105.821610] run_timer_softirq+0x3c4/0x9f0 [ 105.826064] ? lock_acquire+0x12e/0x290 [ 105.830257] ? __bpf_trace_timer_class+0x10/0x10 [ 105.835237] ? __lock_is_held+0x25/0x110 [ 105.839517] __do_softirq+0x11d/0x7bf [ 105.843542] irq_exit+0x140/0x190 [ 105.847208] smp_apic_timer_interrupt+0xac/0x3b0 [ 105.852182] apic_timer_interrupt+0xf/0x20 [ 105.856628] </IRQ> [ 105.859081] RIP: 0010:cpuidle_enter_state+0xd8/0x4d0 [ 105.864395] Code: 46 ff 48 89 44 24 08 0f 1f 44 00 00 31 ff e8 cf ec 46 ff 80 7c 24 07 00 0f 85 1d 02 00 00 e8 9f 90 4b ff fb 66 0f 1f 44 00 00 <4c> 8b 6c 24 08 4d 29 fd 0f 80 36 03 00 00 4c 89 e8 48 ba cf f7 53 [ 105.884288] RSP: 0018:ffff8803ad94fd20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 105.892494] RAX: 0000000000000000 RBX: ffffe8fb300829c0 RCX: ffffffffb41e19e1 [ 105.899988] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8803ad9358ac [ 105.907503] RBP: ffffffffb6636300 R08: 0000000000000004 R09: 0000000000000000 [ 105.914997] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000004 [ 105.922487] R13: ffffffffb6636140 R14: ffffffffb66362d8 R15: 000000188d36091b [ 105.929988] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.935232] do_idle+0x28e/0x320 [ 105.938817] ? arch_cpu_idle_exit+0x40/0x40 [ 105.943361] ? mark_lock+0x8c1/0x980 [ 105.947295] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.952619] cpu_startup_entry+0xc2/0xd0 [ 105.956900] ? cpu_in_idle+0x20/0x20 [ 105.960830] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.966146] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.971391] start_secondary+0x2b5/0x360 [ 105.975669] ? set_cpu_sibling_map+0x1330/0x1330 [ 105.980654] secondary_startup_64+0xa5/0xb0 Taking tcf_lock in sample action with bh disabled causes lockdep to issue a warning regarding possible irq lock inversion dependency between tcf_lock, and psample_groups_lock that is taken when holding tcf_lock in sample init: [ 162.108959] Possible interrupt unsafe locking scenario: [ 162.116386] CPU0 CPU1 [ 162.121277] ---- ---- [ 162.126162] lock(psample_groups_lock); [ 162.130447] local_irq_disable(); [ 162.136772] lock(&(&p->tcfa_lock)->rlock); [ 162.143957] lock(psample_groups_lock); [ 162.150813] <Interrupt> [ 162.153808] lock(&(&p->tcfa_lock)->rlock); [ 162.158608] *** DEADLOCK *** In order to prevent potential lock inversion dependency between tcf_lock and psample_groups_lock, extract call to psample_group_get() from tcf_lock protected section in sample action init function. Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Fixes: 764e9a24480f ("net: sched: act_vlan: remove dependency on rtnl lock") Fixes: 729e01260989 ("net: sched: act_tunnel_key: remove dependency on rtnl lock") Fixes: d77284956656 ("net: sched: act_sample: remove dependency on rtnl lock") Fixes: e8917f437006 ("net: sched: act_gact: remove dependency on rtnl lock") Fixes: b6a2b971c0b0 ("net: sched: act_csum: remove dependency on rtnl lock") Fixes: 2142236b4584 ("net: sched: act_bpf: remove dependency on rtnl lock") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-15 02:46:16 +08:00
spin_lock_bh(&t->tcf_lock);
params = rcu_dereference_protected(t->params,
lockdep_is_held(&t->tcf_lock));
opt.action = t->tcf_action;
opt.t_action = params->tcft_action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
struct ip_tunnel_info *info =
&params->tcft_enc_metadata->u.tun_info;
struct ip_tunnel_key *key = &info->key;
__be32 key_id = tunnel_id_to_key32(key->tun_id);
if (((key->tun_flags & TUNNEL_KEY) &&
nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id)) ||
tunnel_key_dump_addresses(skb,
&params->tcft_enc_metadata->u.tun_info) ||
(key->tp_dst &&
nla_put_be16(skb, TCA_TUNNEL_KEY_ENC_DST_PORT,
key->tp_dst)) ||
nla_put_u8(skb, TCA_TUNNEL_KEY_NO_CSUM,
!(key->tun_flags & TUNNEL_CSUM)) ||
tunnel_key_opts_dump(skb, info))
goto nla_put_failure;
if (key->tos && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TOS, key->tos))
goto nla_put_failure;
if (key->ttl && nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_TTL, key->ttl))
goto nla_put_failure;
}
tcf_tm_dump(&tm, &t->tcf_tm);
if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm),
&tm, TCA_TUNNEL_KEY_PAD))
goto nla_put_failure;
net: sched: always disable bh when taking tcf_lock Recently, ops->init() and ops->dump() of all actions were modified to always obtain tcf_lock when accessing private action state. Actions that don't depend on tcf_lock for synchronization with their data path use non-bh locking API. However, tcf_lock is also used to protect rate estimator stats in softirq context by timer callback. Change ops->init() and ops->dump() of all actions to disable bh when using tcf_lock to prevent deadlock reported by following lockdep warning: [ 105.470398] ================================ [ 105.475014] WARNING: inconsistent lock state [ 105.479628] 4.18.0-rc8+ #664 Not tainted [ 105.483897] -------------------------------- [ 105.488511] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 105.494871] swapper/16/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 105.500449] 00000000f86c012e (&(&p->tcfa_lock)->rlock){+.?.}, at: est_fetch_counters+0x3c/0xa0 [ 105.509696] {SOFTIRQ-ON-W} state was registered at: [ 105.514925] _raw_spin_lock+0x2c/0x40 [ 105.519022] tcf_bpf_init+0x579/0x820 [act_bpf] [ 105.523990] tcf_action_init_1+0x4e4/0x660 [ 105.528518] tcf_action_init+0x1ce/0x2d0 [ 105.532880] tcf_exts_validate+0x1d8/0x200 [ 105.537416] fl_change+0x55a/0x268b [cls_flower] [ 105.542469] tc_new_tfilter+0x748/0xa20 [ 105.546738] rtnetlink_rcv_msg+0x56a/0x6d0 [ 105.551268] netlink_rcv_skb+0x18d/0x200 [ 105.555628] netlink_unicast+0x2d0/0x370 [ 105.559990] netlink_sendmsg+0x3b9/0x6a0 [ 105.564349] sock_sendmsg+0x6b/0x80 [ 105.568271] ___sys_sendmsg+0x4a1/0x520 [ 105.572547] __sys_sendmsg+0xd7/0x150 [ 105.576655] do_syscall_64+0x72/0x2c0 [ 105.580757] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 105.586243] irq event stamp: 489296 [ 105.590084] hardirqs last enabled at (489296): [<ffffffffb507e639>] _raw_spin_unlock_irq+0x29/0x40 [ 105.599765] hardirqs last disabled at (489295): [<ffffffffb507e745>] _raw_spin_lock_irq+0x15/0x50 [ 105.609277] softirqs last enabled at (489292): [<ffffffffb413a6a3>] irq_enter+0x83/0xa0 [ 105.618001] softirqs last disabled at (489293): [<ffffffffb413a800>] irq_exit+0x140/0x190 [ 105.626813] other info that might help us debug this: [ 105.633976] Possible unsafe locking scenario: [ 105.640526] CPU0 [ 105.643325] ---- [ 105.646125] lock(&(&p->tcfa_lock)->rlock); [ 105.650747] <Interrupt> [ 105.653717] lock(&(&p->tcfa_lock)->rlock); [ 105.658514] *** DEADLOCK *** [ 105.665349] 1 lock held by swapper/16/0: [ 105.669629] #0: 00000000a640ad99 ((&est->timer)){+.-.}, at: call_timer_fn+0x10b/0x550 [ 105.678200] stack backtrace: [ 105.683194] CPU: 16 PID: 0 Comm: swapper/16 Not tainted 4.18.0-rc8+ #664 [ 105.690249] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 105.698626] Call Trace: [ 105.701421] <IRQ> [ 105.703791] dump_stack+0x92/0xeb [ 105.707461] print_usage_bug+0x336/0x34c [ 105.711744] mark_lock+0x7c9/0x980 [ 105.715500] ? print_shortest_lock_dependencies+0x2e0/0x2e0 [ 105.721424] ? check_usage_forwards+0x230/0x230 [ 105.726315] __lock_acquire+0x923/0x26f0 [ 105.730597] ? debug_show_all_locks+0x240/0x240 [ 105.735478] ? mark_lock+0x493/0x980 [ 105.739412] ? check_chain_key+0x140/0x1f0 [ 105.743861] ? __lock_acquire+0x836/0x26f0 [ 105.748323] ? lock_acquire+0x12e/0x290 [ 105.752516] lock_acquire+0x12e/0x290 [ 105.756539] ? est_fetch_counters+0x3c/0xa0 [ 105.761084] _raw_spin_lock+0x2c/0x40 [ 105.765099] ? est_fetch_counters+0x3c/0xa0 [ 105.769633] est_fetch_counters+0x3c/0xa0 [ 105.773995] est_timer+0x87/0x390 [ 105.777670] ? est_fetch_counters+0xa0/0xa0 [ 105.782210] ? lock_acquire+0x12e/0x290 [ 105.786410] call_timer_fn+0x161/0x550 [ 105.790512] ? est_fetch_counters+0xa0/0xa0 [ 105.795055] ? del_timer_sync+0xd0/0xd0 [ 105.799249] ? __lock_is_held+0x93/0x110 [ 105.803531] ? mark_held_locks+0x20/0xe0 [ 105.807813] ? _raw_spin_unlock_irq+0x29/0x40 [ 105.812525] ? est_fetch_counters+0xa0/0xa0 [ 105.817069] ? est_fetch_counters+0xa0/0xa0 [ 105.821610] run_timer_softirq+0x3c4/0x9f0 [ 105.826064] ? lock_acquire+0x12e/0x290 [ 105.830257] ? __bpf_trace_timer_class+0x10/0x10 [ 105.835237] ? __lock_is_held+0x25/0x110 [ 105.839517] __do_softirq+0x11d/0x7bf [ 105.843542] irq_exit+0x140/0x190 [ 105.847208] smp_apic_timer_interrupt+0xac/0x3b0 [ 105.852182] apic_timer_interrupt+0xf/0x20 [ 105.856628] </IRQ> [ 105.859081] RIP: 0010:cpuidle_enter_state+0xd8/0x4d0 [ 105.864395] Code: 46 ff 48 89 44 24 08 0f 1f 44 00 00 31 ff e8 cf ec 46 ff 80 7c 24 07 00 0f 85 1d 02 00 00 e8 9f 90 4b ff fb 66 0f 1f 44 00 00 <4c> 8b 6c 24 08 4d 29 fd 0f 80 36 03 00 00 4c 89 e8 48 ba cf f7 53 [ 105.884288] RSP: 0018:ffff8803ad94fd20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 105.892494] RAX: 0000000000000000 RBX: ffffe8fb300829c0 RCX: ffffffffb41e19e1 [ 105.899988] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8803ad9358ac [ 105.907503] RBP: ffffffffb6636300 R08: 0000000000000004 R09: 0000000000000000 [ 105.914997] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000004 [ 105.922487] R13: ffffffffb6636140 R14: ffffffffb66362d8 R15: 000000188d36091b [ 105.929988] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.935232] do_idle+0x28e/0x320 [ 105.938817] ? arch_cpu_idle_exit+0x40/0x40 [ 105.943361] ? mark_lock+0x8c1/0x980 [ 105.947295] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.952619] cpu_startup_entry+0xc2/0xd0 [ 105.956900] ? cpu_in_idle+0x20/0x20 [ 105.960830] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.966146] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.971391] start_secondary+0x2b5/0x360 [ 105.975669] ? set_cpu_sibling_map+0x1330/0x1330 [ 105.980654] secondary_startup_64+0xa5/0xb0 Taking tcf_lock in sample action with bh disabled causes lockdep to issue a warning regarding possible irq lock inversion dependency between tcf_lock, and psample_groups_lock that is taken when holding tcf_lock in sample init: [ 162.108959] Possible interrupt unsafe locking scenario: [ 162.116386] CPU0 CPU1 [ 162.121277] ---- ---- [ 162.126162] lock(psample_groups_lock); [ 162.130447] local_irq_disable(); [ 162.136772] lock(&(&p->tcfa_lock)->rlock); [ 162.143957] lock(psample_groups_lock); [ 162.150813] <Interrupt> [ 162.153808] lock(&(&p->tcfa_lock)->rlock); [ 162.158608] *** DEADLOCK *** In order to prevent potential lock inversion dependency between tcf_lock and psample_groups_lock, extract call to psample_group_get() from tcf_lock protected section in sample action init function. Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Fixes: 764e9a24480f ("net: sched: act_vlan: remove dependency on rtnl lock") Fixes: 729e01260989 ("net: sched: act_tunnel_key: remove dependency on rtnl lock") Fixes: d77284956656 ("net: sched: act_sample: remove dependency on rtnl lock") Fixes: e8917f437006 ("net: sched: act_gact: remove dependency on rtnl lock") Fixes: b6a2b971c0b0 ("net: sched: act_csum: remove dependency on rtnl lock") Fixes: 2142236b4584 ("net: sched: act_bpf: remove dependency on rtnl lock") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-15 02:46:16 +08:00
spin_unlock_bh(&t->tcf_lock);
return skb->len;
nla_put_failure:
net: sched: always disable bh when taking tcf_lock Recently, ops->init() and ops->dump() of all actions were modified to always obtain tcf_lock when accessing private action state. Actions that don't depend on tcf_lock for synchronization with their data path use non-bh locking API. However, tcf_lock is also used to protect rate estimator stats in softirq context by timer callback. Change ops->init() and ops->dump() of all actions to disable bh when using tcf_lock to prevent deadlock reported by following lockdep warning: [ 105.470398] ================================ [ 105.475014] WARNING: inconsistent lock state [ 105.479628] 4.18.0-rc8+ #664 Not tainted [ 105.483897] -------------------------------- [ 105.488511] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 105.494871] swapper/16/0 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 105.500449] 00000000f86c012e (&(&p->tcfa_lock)->rlock){+.?.}, at: est_fetch_counters+0x3c/0xa0 [ 105.509696] {SOFTIRQ-ON-W} state was registered at: [ 105.514925] _raw_spin_lock+0x2c/0x40 [ 105.519022] tcf_bpf_init+0x579/0x820 [act_bpf] [ 105.523990] tcf_action_init_1+0x4e4/0x660 [ 105.528518] tcf_action_init+0x1ce/0x2d0 [ 105.532880] tcf_exts_validate+0x1d8/0x200 [ 105.537416] fl_change+0x55a/0x268b [cls_flower] [ 105.542469] tc_new_tfilter+0x748/0xa20 [ 105.546738] rtnetlink_rcv_msg+0x56a/0x6d0 [ 105.551268] netlink_rcv_skb+0x18d/0x200 [ 105.555628] netlink_unicast+0x2d0/0x370 [ 105.559990] netlink_sendmsg+0x3b9/0x6a0 [ 105.564349] sock_sendmsg+0x6b/0x80 [ 105.568271] ___sys_sendmsg+0x4a1/0x520 [ 105.572547] __sys_sendmsg+0xd7/0x150 [ 105.576655] do_syscall_64+0x72/0x2c0 [ 105.580757] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 105.586243] irq event stamp: 489296 [ 105.590084] hardirqs last enabled at (489296): [<ffffffffb507e639>] _raw_spin_unlock_irq+0x29/0x40 [ 105.599765] hardirqs last disabled at (489295): [<ffffffffb507e745>] _raw_spin_lock_irq+0x15/0x50 [ 105.609277] softirqs last enabled at (489292): [<ffffffffb413a6a3>] irq_enter+0x83/0xa0 [ 105.618001] softirqs last disabled at (489293): [<ffffffffb413a800>] irq_exit+0x140/0x190 [ 105.626813] other info that might help us debug this: [ 105.633976] Possible unsafe locking scenario: [ 105.640526] CPU0 [ 105.643325] ---- [ 105.646125] lock(&(&p->tcfa_lock)->rlock); [ 105.650747] <Interrupt> [ 105.653717] lock(&(&p->tcfa_lock)->rlock); [ 105.658514] *** DEADLOCK *** [ 105.665349] 1 lock held by swapper/16/0: [ 105.669629] #0: 00000000a640ad99 ((&est->timer)){+.-.}, at: call_timer_fn+0x10b/0x550 [ 105.678200] stack backtrace: [ 105.683194] CPU: 16 PID: 0 Comm: swapper/16 Not tainted 4.18.0-rc8+ #664 [ 105.690249] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017 [ 105.698626] Call Trace: [ 105.701421] <IRQ> [ 105.703791] dump_stack+0x92/0xeb [ 105.707461] print_usage_bug+0x336/0x34c [ 105.711744] mark_lock+0x7c9/0x980 [ 105.715500] ? print_shortest_lock_dependencies+0x2e0/0x2e0 [ 105.721424] ? check_usage_forwards+0x230/0x230 [ 105.726315] __lock_acquire+0x923/0x26f0 [ 105.730597] ? debug_show_all_locks+0x240/0x240 [ 105.735478] ? mark_lock+0x493/0x980 [ 105.739412] ? check_chain_key+0x140/0x1f0 [ 105.743861] ? __lock_acquire+0x836/0x26f0 [ 105.748323] ? lock_acquire+0x12e/0x290 [ 105.752516] lock_acquire+0x12e/0x290 [ 105.756539] ? est_fetch_counters+0x3c/0xa0 [ 105.761084] _raw_spin_lock+0x2c/0x40 [ 105.765099] ? est_fetch_counters+0x3c/0xa0 [ 105.769633] est_fetch_counters+0x3c/0xa0 [ 105.773995] est_timer+0x87/0x390 [ 105.777670] ? est_fetch_counters+0xa0/0xa0 [ 105.782210] ? lock_acquire+0x12e/0x290 [ 105.786410] call_timer_fn+0x161/0x550 [ 105.790512] ? est_fetch_counters+0xa0/0xa0 [ 105.795055] ? del_timer_sync+0xd0/0xd0 [ 105.799249] ? __lock_is_held+0x93/0x110 [ 105.803531] ? mark_held_locks+0x20/0xe0 [ 105.807813] ? _raw_spin_unlock_irq+0x29/0x40 [ 105.812525] ? est_fetch_counters+0xa0/0xa0 [ 105.817069] ? est_fetch_counters+0xa0/0xa0 [ 105.821610] run_timer_softirq+0x3c4/0x9f0 [ 105.826064] ? lock_acquire+0x12e/0x290 [ 105.830257] ? __bpf_trace_timer_class+0x10/0x10 [ 105.835237] ? __lock_is_held+0x25/0x110 [ 105.839517] __do_softirq+0x11d/0x7bf [ 105.843542] irq_exit+0x140/0x190 [ 105.847208] smp_apic_timer_interrupt+0xac/0x3b0 [ 105.852182] apic_timer_interrupt+0xf/0x20 [ 105.856628] </IRQ> [ 105.859081] RIP: 0010:cpuidle_enter_state+0xd8/0x4d0 [ 105.864395] Code: 46 ff 48 89 44 24 08 0f 1f 44 00 00 31 ff e8 cf ec 46 ff 80 7c 24 07 00 0f 85 1d 02 00 00 e8 9f 90 4b ff fb 66 0f 1f 44 00 00 <4c> 8b 6c 24 08 4d 29 fd 0f 80 36 03 00 00 4c 89 e8 48 ba cf f7 53 [ 105.884288] RSP: 0018:ffff8803ad94fd20 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 [ 105.892494] RAX: 0000000000000000 RBX: ffffe8fb300829c0 RCX: ffffffffb41e19e1 [ 105.899988] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8803ad9358ac [ 105.907503] RBP: ffffffffb6636300 R08: 0000000000000004 R09: 0000000000000000 [ 105.914997] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000004 [ 105.922487] R13: ffffffffb6636140 R14: ffffffffb66362d8 R15: 000000188d36091b [ 105.929988] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.935232] do_idle+0x28e/0x320 [ 105.938817] ? arch_cpu_idle_exit+0x40/0x40 [ 105.943361] ? mark_lock+0x8c1/0x980 [ 105.947295] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.952619] cpu_startup_entry+0xc2/0xd0 [ 105.956900] ? cpu_in_idle+0x20/0x20 [ 105.960830] ? _raw_spin_unlock_irqrestore+0x32/0x60 [ 105.966146] ? trace_hardirqs_on_caller+0x141/0x2d0 [ 105.971391] start_secondary+0x2b5/0x360 [ 105.975669] ? set_cpu_sibling_map+0x1330/0x1330 [ 105.980654] secondary_startup_64+0xa5/0xb0 Taking tcf_lock in sample action with bh disabled causes lockdep to issue a warning regarding possible irq lock inversion dependency between tcf_lock, and psample_groups_lock that is taken when holding tcf_lock in sample init: [ 162.108959] Possible interrupt unsafe locking scenario: [ 162.116386] CPU0 CPU1 [ 162.121277] ---- ---- [ 162.126162] lock(psample_groups_lock); [ 162.130447] local_irq_disable(); [ 162.136772] lock(&(&p->tcfa_lock)->rlock); [ 162.143957] lock(psample_groups_lock); [ 162.150813] <Interrupt> [ 162.153808] lock(&(&p->tcfa_lock)->rlock); [ 162.158608] *** DEADLOCK *** In order to prevent potential lock inversion dependency between tcf_lock and psample_groups_lock, extract call to psample_group_get() from tcf_lock protected section in sample action init function. Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Fixes: 764e9a24480f ("net: sched: act_vlan: remove dependency on rtnl lock") Fixes: 729e01260989 ("net: sched: act_tunnel_key: remove dependency on rtnl lock") Fixes: d77284956656 ("net: sched: act_sample: remove dependency on rtnl lock") Fixes: e8917f437006 ("net: sched: act_gact: remove dependency on rtnl lock") Fixes: b6a2b971c0b0 ("net: sched: act_csum: remove dependency on rtnl lock") Fixes: 2142236b4584 ("net: sched: act_bpf: remove dependency on rtnl lock") Signed-off-by: Vlad Buslov <vladbu@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2018-08-15 02:46:16 +08:00
spin_unlock_bh(&t->tcf_lock);
nlmsg_trim(skb, b);
return -1;
}
static int tunnel_key_walker(struct net *net, struct sk_buff *skb,
struct netlink_callback *cb, int type,
const struct tc_action_ops *ops,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
return tcf_generic_walker(tn, skb, cb, type, ops, extack);
}
static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
return tcf_idr_search(tn, a, index);
}
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
.id = TCA_ID_TUNNEL_KEY,
.owner = THIS_MODULE,
.act = tunnel_key_act,
.dump = tunnel_key_dump,
.init = tunnel_key_init,
.cleanup = tunnel_key_release,
.walk = tunnel_key_walker,
.lookup = tunnel_key_search,
.size = sizeof(struct tcf_tunnel_key),
};
static __net_init int tunnel_key_init_net(struct net *net)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
return tc_action_net_init(tn, &act_tunnel_key_ops);
}
static void __net_exit tunnel_key_exit_net(struct list_head *net_list)
{
tc_action_net_exit(net_list, tunnel_key_net_id);
}
static struct pernet_operations tunnel_key_net_ops = {
.init = tunnel_key_init_net,
.exit_batch = tunnel_key_exit_net,
.id = &tunnel_key_net_id,
.size = sizeof(struct tc_action_net),
};
static int __init tunnel_key_init_module(void)
{
return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
}
static void __exit tunnel_key_cleanup_module(void)
{
tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops);
}
module_init(tunnel_key_init_module);
module_exit(tunnel_key_cleanup_module);
MODULE_AUTHOR("Amir Vadai <amir@vadai.me>");
MODULE_DESCRIPTION("ip tunnel manipulation actions");
MODULE_LICENSE("GPL v2");