2019-05-19 20:08:20 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-10-21 18:03:44 +08:00
|
|
|
#include <linux/module.h>
|
|
|
|
|
|
|
|
#include <linux/inet_diag.h>
|
|
|
|
#include <linux/sock_diag.h>
|
|
|
|
|
2016-10-25 23:53:22 +08:00
|
|
|
#include <net/inet_sock.h>
|
2016-10-21 18:03:44 +08:00
|
|
|
#include <net/raw.h>
|
|
|
|
#include <net/rawv6.h>
|
|
|
|
|
|
|
|
#ifdef pr_fmt
|
|
|
|
# undef pr_fmt
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
|
|
|
|
static struct raw_hashinfo *
|
|
|
|
raw_get_hashinfo(const struct inet_diag_req_v2 *r)
|
|
|
|
{
|
|
|
|
if (r->sdiag_family == AF_INET) {
|
|
|
|
return &raw_v4_hashinfo;
|
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
} else if (r->sdiag_family == AF_INET6) {
|
|
|
|
return &raw_v6_hashinfo;
|
|
|
|
#endif
|
|
|
|
} else {
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Due to requirement of not breaking user API we can't simply
|
|
|
|
* rename @pad field in inet_diag_req_v2 structure, instead
|
|
|
|
* use helper to figure it out.
|
|
|
|
*/
|
|
|
|
|
2023-03-16 23:32:02 +08:00
|
|
|
static bool raw_lookup(struct net *net, const struct sock *sk,
|
2022-06-18 11:47:04 +08:00
|
|
|
const struct inet_diag_req_v2 *req)
|
2016-10-21 18:03:44 +08:00
|
|
|
{
|
|
|
|
struct inet_diag_req_raw *r = (void *)req;
|
|
|
|
|
|
|
|
if (r->sdiag_family == AF_INET)
|
2022-06-18 11:47:04 +08:00
|
|
|
return raw_v4_match(net, sk, r->sdiag_raw_protocol,
|
|
|
|
r->id.idiag_dst[0],
|
|
|
|
r->id.idiag_src[0],
|
|
|
|
r->id.idiag_if, 0);
|
2016-10-21 18:03:44 +08:00
|
|
|
#if IS_ENABLED(CONFIG_IPV6)
|
|
|
|
else
|
2022-06-18 11:47:04 +08:00
|
|
|
return raw_v6_match(net, sk, r->sdiag_raw_protocol,
|
|
|
|
(const struct in6_addr *)r->id.idiag_src,
|
|
|
|
(const struct in6_addr *)r->id.idiag_dst,
|
|
|
|
r->id.idiag_if, 0);
|
2016-10-21 18:03:44 +08:00
|
|
|
#endif
|
2022-06-18 11:47:04 +08:00
|
|
|
return false;
|
2016-10-21 18:03:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r)
|
|
|
|
{
|
|
|
|
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
|
raw: Fix NULL deref in raw_get_next().
Dae R. Jeong reported a NULL deref in raw_get_next() [0].
It seems that the repro was running these sequences in parallel so
that one thread was iterating on a socket that was being freed in
another netns.
unshare(0x40060200)
r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00')
socket$inet_icmp_raw(0x2, 0x3, 0x1)
pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f)
After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we
use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW
sockets. However, we should use spinlock for slow paths to avoid
the NULL deref.
Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object
is not reused during iteration in the grace period. In fact, the
lockless readers do not check the nulls marker with get_nulls_value().
So, SOCK_RAW should use hlist instead of hlist_nulls.
Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(),
let's convert hlist_nulls to hlist and use sk_for_each_rcu() for
fast paths and sk_for_each() and spinlock for /proc/net/raw.
[0]:
general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225
seq_read+0x224/0x320 fs/seq_file.c:162
pde_read fs/proc/inode.c:316 [inline]
proc_reg_read+0x23f/0x330 fs/proc/inode.c:328
vfs_read+0x31e/0xd30 fs/read_write.c:468
ksys_pread64 fs/read_write.c:665 [inline]
__do_sys_pread64 fs/read_write.c:675 [inline]
__se_sys_pread64 fs/read_write.c:672 [inline]
__x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x478d29
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011
RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29
RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003
RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000
R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740
R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50
</TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Dae R. Jeong <threeearcat@gmail.com>
Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-04-04 03:49:58 +08:00
|
|
|
struct hlist_head *hlist;
|
2022-06-18 11:47:04 +08:00
|
|
|
struct sock *sk;
|
2016-10-21 18:03:44 +08:00
|
|
|
int slot;
|
|
|
|
|
|
|
|
if (IS_ERR(hashinfo))
|
|
|
|
return ERR_CAST(hashinfo);
|
|
|
|
|
2022-06-18 11:47:05 +08:00
|
|
|
rcu_read_lock();
|
2016-10-21 18:03:44 +08:00
|
|
|
for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) {
|
2022-06-18 11:47:05 +08:00
|
|
|
hlist = &hashinfo->ht[slot];
|
raw: Fix NULL deref in raw_get_next().
Dae R. Jeong reported a NULL deref in raw_get_next() [0].
It seems that the repro was running these sequences in parallel so
that one thread was iterating on a socket that was being freed in
another netns.
unshare(0x40060200)
r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00')
socket$inet_icmp_raw(0x2, 0x3, 0x1)
pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f)
After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we
use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW
sockets. However, we should use spinlock for slow paths to avoid
the NULL deref.
Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object
is not reused during iteration in the grace period. In fact, the
lockless readers do not check the nulls marker with get_nulls_value().
So, SOCK_RAW should use hlist instead of hlist_nulls.
Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(),
let's convert hlist_nulls to hlist and use sk_for_each_rcu() for
fast paths and sk_for_each() and spinlock for /proc/net/raw.
[0]:
general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225
seq_read+0x224/0x320 fs/seq_file.c:162
pde_read fs/proc/inode.c:316 [inline]
proc_reg_read+0x23f/0x330 fs/proc/inode.c:328
vfs_read+0x31e/0xd30 fs/read_write.c:468
ksys_pread64 fs/read_write.c:665 [inline]
__do_sys_pread64 fs/read_write.c:675 [inline]
__se_sys_pread64 fs/read_write.c:672 [inline]
__x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x478d29
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011
RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29
RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003
RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000
R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740
R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50
</TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Dae R. Jeong <threeearcat@gmail.com>
Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-04-04 03:49:58 +08:00
|
|
|
sk_for_each_rcu(sk, hlist) {
|
2022-06-18 11:47:04 +08:00
|
|
|
if (raw_lookup(net, sk, r)) {
|
2016-10-21 18:03:44 +08:00
|
|
|
/*
|
|
|
|
* Grab it and keep until we fill
|
2022-06-18 11:47:05 +08:00
|
|
|
* diag message to be reported, so
|
2016-10-21 18:03:44 +08:00
|
|
|
* caller should call sock_put then.
|
|
|
|
*/
|
2022-06-18 11:47:05 +08:00
|
|
|
if (refcount_inc_not_zero(&sk->sk_refcnt))
|
|
|
|
goto out_unlock;
|
2016-10-21 18:03:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-06-18 11:47:04 +08:00
|
|
|
sk = ERR_PTR(-ENOENT);
|
2016-11-02 20:36:32 +08:00
|
|
|
out_unlock:
|
2022-06-18 11:47:05 +08:00
|
|
|
rcu_read_unlock();
|
2016-10-21 18:03:44 +08:00
|
|
|
|
2022-06-18 11:47:04 +08:00
|
|
|
return sk;
|
2016-10-21 18:03:44 +08:00
|
|
|
}
|
|
|
|
|
2020-02-26 07:04:09 +08:00
|
|
|
static int raw_diag_dump_one(struct netlink_callback *cb,
|
2016-10-21 18:03:44 +08:00
|
|
|
const struct inet_diag_req_v2 *r)
|
|
|
|
{
|
2020-02-26 07:04:09 +08:00
|
|
|
struct sk_buff *in_skb = cb->skb;
|
2016-10-21 18:03:44 +08:00
|
|
|
struct sk_buff *rep;
|
|
|
|
struct sock *sk;
|
2020-02-26 07:04:09 +08:00
|
|
|
struct net *net;
|
2016-10-21 18:03:44 +08:00
|
|
|
int err;
|
|
|
|
|
2020-02-26 07:04:09 +08:00
|
|
|
net = sock_net(in_skb->sk);
|
2016-10-21 18:03:44 +08:00
|
|
|
sk = raw_sock_get(net, r);
|
|
|
|
if (IS_ERR(sk))
|
|
|
|
return PTR_ERR(sk);
|
|
|
|
|
2020-03-05 20:33:12 +08:00
|
|
|
rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) +
|
|
|
|
inet_diag_msg_attrs_size() +
|
|
|
|
nla_total_size(sizeof(struct inet_diag_meminfo)) + 64,
|
2016-10-21 18:03:44 +08:00
|
|
|
GFP_KERNEL);
|
|
|
|
if (!rep) {
|
|
|
|
sock_put(sk);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
2020-02-26 07:04:09 +08:00
|
|
|
err = inet_sk_diag_fill(sk, NULL, rep, cb, r, 0,
|
2016-10-21 18:03:44 +08:00
|
|
|
netlink_net_capable(in_skb, CAP_NET_ADMIN));
|
|
|
|
sock_put(sk);
|
|
|
|
|
|
|
|
if (err < 0) {
|
|
|
|
kfree_skb(rep);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2021-07-13 10:48:24 +08:00
|
|
|
err = nlmsg_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid);
|
|
|
|
|
2016-10-21 18:03:44 +08:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sk_diag_dump(struct sock *sk, struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb,
|
|
|
|
const struct inet_diag_req_v2 *r,
|
|
|
|
struct nlattr *bc, bool net_admin)
|
|
|
|
{
|
|
|
|
if (!inet_diag_bc_sk(bc, sk))
|
|
|
|
return 0;
|
|
|
|
|
2020-02-26 07:04:09 +08:00
|
|
|
return inet_sk_diag_fill(sk, NULL, skb, cb, r, NLM_F_MULTI, net_admin);
|
2016-10-21 18:03:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
|
2020-02-26 07:04:15 +08:00
|
|
|
const struct inet_diag_req_v2 *r)
|
2016-10-21 18:03:44 +08:00
|
|
|
{
|
|
|
|
bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN);
|
|
|
|
struct raw_hashinfo *hashinfo = raw_get_hashinfo(r);
|
|
|
|
struct net *net = sock_net(skb->sk);
|
2020-02-26 07:04:15 +08:00
|
|
|
struct inet_diag_dump_data *cb_data;
|
2016-10-21 18:03:44 +08:00
|
|
|
int num, s_num, slot, s_slot;
|
raw: Fix NULL deref in raw_get_next().
Dae R. Jeong reported a NULL deref in raw_get_next() [0].
It seems that the repro was running these sequences in parallel so
that one thread was iterating on a socket that was being freed in
another netns.
unshare(0x40060200)
r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00')
socket$inet_icmp_raw(0x2, 0x3, 0x1)
pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f)
After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we
use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW
sockets. However, we should use spinlock for slow paths to avoid
the NULL deref.
Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object
is not reused during iteration in the grace period. In fact, the
lockless readers do not check the nulls marker with get_nulls_value().
So, SOCK_RAW should use hlist instead of hlist_nulls.
Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(),
let's convert hlist_nulls to hlist and use sk_for_each_rcu() for
fast paths and sk_for_each() and spinlock for /proc/net/raw.
[0]:
general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225
seq_read+0x224/0x320 fs/seq_file.c:162
pde_read fs/proc/inode.c:316 [inline]
proc_reg_read+0x23f/0x330 fs/proc/inode.c:328
vfs_read+0x31e/0xd30 fs/read_write.c:468
ksys_pread64 fs/read_write.c:665 [inline]
__do_sys_pread64 fs/read_write.c:675 [inline]
__se_sys_pread64 fs/read_write.c:672 [inline]
__x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x478d29
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011
RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29
RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003
RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000
R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740
R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50
</TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Dae R. Jeong <threeearcat@gmail.com>
Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-04-04 03:49:58 +08:00
|
|
|
struct hlist_head *hlist;
|
2016-10-21 18:03:44 +08:00
|
|
|
struct sock *sk = NULL;
|
2020-02-26 07:04:15 +08:00
|
|
|
struct nlattr *bc;
|
2016-10-21 18:03:44 +08:00
|
|
|
|
|
|
|
if (IS_ERR(hashinfo))
|
|
|
|
return;
|
|
|
|
|
2020-02-26 07:04:15 +08:00
|
|
|
cb_data = cb->data;
|
|
|
|
bc = cb_data->inet_diag_nla_bc;
|
2016-10-21 18:03:44 +08:00
|
|
|
s_slot = cb->args[0];
|
|
|
|
num = s_num = cb->args[1];
|
|
|
|
|
2022-06-20 18:05:09 +08:00
|
|
|
rcu_read_lock();
|
2016-10-21 18:03:44 +08:00
|
|
|
for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) {
|
|
|
|
num = 0;
|
|
|
|
|
2022-06-18 11:47:05 +08:00
|
|
|
hlist = &hashinfo->ht[slot];
|
raw: Fix NULL deref in raw_get_next().
Dae R. Jeong reported a NULL deref in raw_get_next() [0].
It seems that the repro was running these sequences in parallel so
that one thread was iterating on a socket that was being freed in
another netns.
unshare(0x40060200)
r0 = syz_open_procfs(0x0, &(0x7f0000002080)='net/raw\x00')
socket$inet_icmp_raw(0x2, 0x3, 0x1)
pread64(r0, &(0x7f0000000000)=""/10, 0xa, 0x10000000007f)
After commit 0daf07e52709 ("raw: convert raw sockets to RCU"), we
use RCU and hlist_nulls_for_each_entry() to iterate over SOCK_RAW
sockets. However, we should use spinlock for slow paths to avoid
the NULL deref.
Also, SOCK_RAW does not use SLAB_TYPESAFE_BY_RCU, and the slab object
is not reused during iteration in the grace period. In fact, the
lockless readers do not check the nulls marker with get_nulls_value().
So, SOCK_RAW should use hlist instead of hlist_nulls.
Instead of adding an unnecessary barrier by sk_nulls_for_each_rcu(),
let's convert hlist_nulls to hlist and use sk_for_each_rcu() for
fast paths and sk_for_each() and spinlock for /proc/net/raw.
[0]:
general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f]
CPU: 2 PID: 20952 Comm: syz-executor.0 Not tainted 6.2.0-g048ec869bafd-dirty #7
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000055bb9614b35f CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
seq_read_iter+0x4c6/0x10f0 fs/seq_file.c:225
seq_read+0x224/0x320 fs/seq_file.c:162
pde_read fs/proc/inode.c:316 [inline]
proc_reg_read+0x23f/0x330 fs/proc/inode.c:328
vfs_read+0x31e/0xd30 fs/read_write.c:468
ksys_pread64 fs/read_write.c:665 [inline]
__do_sys_pread64 fs/read_write.c:675 [inline]
__se_sys_pread64 fs/read_write.c:672 [inline]
__x64_sys_pread64+0x1e9/0x280 fs/read_write.c:672
do_syscall_x64 arch/x86/entry/common.c:51 [inline]
do_syscall_64+0x4e/0xa0 arch/x86/entry/common.c:82
entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x478d29
Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f843ae8dbe8 EFLAGS: 00000246 ORIG_RAX: 0000000000000011
RAX: ffffffffffffffda RBX: 0000000000791408 RCX: 0000000000478d29
RDX: 000000000000000a RSI: 0000000020000000 RDI: 0000000000000003
RBP: 00000000f477909a R08: 0000000000000000 R09: 0000000000000000
R10: 000010000000007f R11: 0000000000000246 R12: 0000000000791740
R13: 0000000000791414 R14: 0000000000791408 R15: 00007ffc2eb48a50
</TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:read_pnet include/net/net_namespace.h:383 [inline]
RIP: 0010:sock_net include/net/sock.h:649 [inline]
RIP: 0010:raw_get_next net/ipv4/raw.c:974 [inline]
RIP: 0010:raw_get_idx net/ipv4/raw.c:986 [inline]
RIP: 0010:raw_seq_start+0x431/0x800 net/ipv4/raw.c:995
Code: ef e8 33 3d 94 f7 49 8b 6d 00 4c 89 ef e8 b7 65 5f f7 49 89 ed 49 83 c5 98 0f 84 9a 00 00 00 48 83 c5 c8 48 89 e8 48 c1 e8 03 <42> 80 3c 30 00 74 08 48 89 ef e8 00 3d 94 f7 4c 8b 7d 00 48 89 ef
RSP: 0018:ffffc9001154f9b0 EFLAGS: 00010206
RAX: 0000000000000005 RBX: 1ffff1100302c8fd RCX: 0000000000000000
RDX: 0000000000000028 RSI: ffffc9001154f988 RDI: ffffc9000f77a338
RBP: 0000000000000029 R08: ffffffff8a50ffb4 R09: fffffbfff24b6bd9
R10: fffffbfff24b6bd9 R11: 0000000000000000 R12: ffff88801db73b78
R13: fffffffffffffff9 R14: dffffc0000000000 R15: 0000000000000030
FS: 00007f843ae8e700(0000) GS:ffff888063700000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f92ff166000 CR3: 000000003c672000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Fixes: 0daf07e52709 ("raw: convert raw sockets to RCU")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Dae R. Jeong <threeearcat@gmail.com>
Link: https://lore.kernel.org/netdev/ZCA2mGV_cmq7lIfV@dragonet/
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2023-04-04 03:49:58 +08:00
|
|
|
sk_for_each_rcu(sk, hlist) {
|
2016-10-21 18:03:44 +08:00
|
|
|
struct inet_sock *inet = inet_sk(sk);
|
|
|
|
|
|
|
|
if (!net_eq(sock_net(sk), net))
|
|
|
|
continue;
|
|
|
|
if (num < s_num)
|
|
|
|
goto next;
|
|
|
|
if (sk->sk_family != r->sdiag_family)
|
|
|
|
goto next;
|
|
|
|
if (r->id.idiag_sport != inet->inet_sport &&
|
|
|
|
r->id.idiag_sport)
|
|
|
|
goto next;
|
|
|
|
if (r->id.idiag_dport != inet->inet_dport &&
|
|
|
|
r->id.idiag_dport)
|
|
|
|
goto next;
|
|
|
|
if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0)
|
|
|
|
goto out_unlock;
|
|
|
|
next:
|
|
|
|
num++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out_unlock:
|
2022-06-20 18:05:09 +08:00
|
|
|
rcu_read_unlock();
|
2016-10-21 18:03:44 +08:00
|
|
|
|
|
|
|
cb->args[0] = slot;
|
|
|
|
cb->args[1] = num;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void raw_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
|
|
|
|
void *info)
|
|
|
|
{
|
|
|
|
r->idiag_rqueue = sk_rmem_alloc_get(sk);
|
|
|
|
r->idiag_wqueue = sk_wmem_alloc_get(sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CONFIG_INET_DIAG_DESTROY
|
|
|
|
static int raw_diag_destroy(struct sk_buff *in_skb,
|
|
|
|
const struct inet_diag_req_v2 *r)
|
|
|
|
{
|
|
|
|
struct net *net = sock_net(in_skb->sk);
|
|
|
|
struct sock *sk;
|
2016-11-02 20:36:31 +08:00
|
|
|
int err;
|
2016-10-21 18:03:44 +08:00
|
|
|
|
|
|
|
sk = raw_sock_get(net, r);
|
|
|
|
if (IS_ERR(sk))
|
|
|
|
return PTR_ERR(sk);
|
2016-11-02 20:36:31 +08:00
|
|
|
err = sock_diag_destroy(sk, ECONNABORTED);
|
|
|
|
sock_put(sk);
|
|
|
|
return err;
|
2016-10-21 18:03:44 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static const struct inet_diag_handler raw_diag_handler = {
|
2024-01-22 19:25:57 +08:00
|
|
|
.owner = THIS_MODULE,
|
2016-10-21 18:03:44 +08:00
|
|
|
.dump = raw_diag_dump,
|
|
|
|
.dump_one = raw_diag_dump_one,
|
|
|
|
.idiag_get_info = raw_diag_get_info,
|
|
|
|
.idiag_type = IPPROTO_RAW,
|
|
|
|
.idiag_info_size = 0,
|
|
|
|
#ifdef CONFIG_INET_DIAG_DESTROY
|
|
|
|
.destroy = raw_diag_destroy,
|
|
|
|
#endif
|
|
|
|
};
|
|
|
|
|
|
|
|
static void __always_unused __check_inet_diag_req_raw(void)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Make sure the two structures are identical,
|
|
|
|
* except the @pad field.
|
|
|
|
*/
|
|
|
|
#define __offset_mismatch(m1, m2) \
|
|
|
|
(offsetof(struct inet_diag_req_v2, m1) != \
|
|
|
|
offsetof(struct inet_diag_req_raw, m2))
|
|
|
|
|
|
|
|
BUILD_BUG_ON(sizeof(struct inet_diag_req_v2) !=
|
|
|
|
sizeof(struct inet_diag_req_raw));
|
|
|
|
BUILD_BUG_ON(__offset_mismatch(sdiag_family, sdiag_family));
|
|
|
|
BUILD_BUG_ON(__offset_mismatch(sdiag_protocol, sdiag_protocol));
|
|
|
|
BUILD_BUG_ON(__offset_mismatch(idiag_ext, idiag_ext));
|
|
|
|
BUILD_BUG_ON(__offset_mismatch(pad, sdiag_raw_protocol));
|
|
|
|
BUILD_BUG_ON(__offset_mismatch(idiag_states, idiag_states));
|
|
|
|
BUILD_BUG_ON(__offset_mismatch(id, id));
|
|
|
|
#undef __offset_mismatch
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init raw_diag_init(void)
|
|
|
|
{
|
|
|
|
return inet_diag_register(&raw_diag_handler);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __exit raw_diag_exit(void)
|
|
|
|
{
|
|
|
|
inet_diag_unregister(&raw_diag_handler);
|
|
|
|
}
|
|
|
|
|
|
|
|
module_init(raw_diag_init);
|
|
|
|
module_exit(raw_diag_exit);
|
|
|
|
MODULE_LICENSE("GPL");
|
2023-11-19 11:30:06 +08:00
|
|
|
MODULE_DESCRIPTION("RAW socket monitoring via SOCK_DIAG");
|
2016-10-21 18:03:44 +08:00
|
|
|
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-255 /* AF_INET - IPPROTO_RAW */);
|
|
|
|
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 10-255 /* AF_INET6 - IPPROTO_RAW */);
|