bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
|
|
|
|
|
|
|
|
#include <linux/bpf.h>
|
2020-09-10 00:27:11 +08:00
|
|
|
#include <linux/btf_ids.h>
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
#include <linux/filter.h>
|
|
|
|
#include <linux/errno.h>
|
|
|
|
#include <linux/file.h>
|
|
|
|
#include <linux/net.h>
|
|
|
|
#include <linux/workqueue.h>
|
|
|
|
#include <linux/skmsg.h>
|
|
|
|
#include <linux/list.h>
|
|
|
|
#include <linux/jhash.h>
|
2020-02-19 01:10:18 +08:00
|
|
|
#include <linux/sock_diag.h>
|
2020-03-09 19:12:39 +08:00
|
|
|
#include <net/udp.h>
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
struct bpf_stab {
|
|
|
|
struct bpf_map map;
|
|
|
|
struct sock **sks;
|
|
|
|
struct sk_psock_progs progs;
|
|
|
|
raw_spinlock_t lock;
|
|
|
|
};
|
|
|
|
|
|
|
|
#define SOCK_CREATE_FLAG_MASK \
|
|
|
|
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
|
|
|
|
|
2021-02-24 02:49:31 +08:00
|
|
|
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
|
|
|
struct bpf_prog *old, u32 which);
|
2021-03-31 10:32:28 +08:00
|
|
|
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map);
|
2021-02-24 02:49:31 +08:00
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab;
|
|
|
|
|
|
|
|
if (!capable(CAP_NET_ADMIN))
|
|
|
|
return ERR_PTR(-EPERM);
|
|
|
|
if (attr->max_entries == 0 ||
|
|
|
|
attr->key_size != 4 ||
|
2020-02-19 01:10:18 +08:00
|
|
|
(attr->value_size != sizeof(u32) &&
|
|
|
|
attr->value_size != sizeof(u64)) ||
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
|
2020-12-02 05:58:42 +08:00
|
|
|
stab = kzalloc(sizeof(*stab), GFP_USER | __GFP_ACCOUNT);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (!stab)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
bpf_map_init_from_attr(&stab->map, attr);
|
|
|
|
raw_spin_lock_init(&stab->lock);
|
|
|
|
|
|
|
|
stab->sks = bpf_map_area_alloc(stab->map.max_entries *
|
|
|
|
sizeof(struct sock *),
|
|
|
|
stab->map.numa_node);
|
2020-12-02 05:58:54 +08:00
|
|
|
if (!stab->sks) {
|
|
|
|
kfree(stab);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
return &stab->map;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
|
|
|
|
{
|
|
|
|
u32 ufd = attr->target_fd;
|
|
|
|
struct bpf_map *map;
|
|
|
|
struct fd f;
|
|
|
|
int ret;
|
|
|
|
|
2020-06-29 17:56:27 +08:00
|
|
|
if (attr->attach_flags || attr->replace_bpf_fd)
|
|
|
|
return -EINVAL;
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
f = fdget(ufd);
|
|
|
|
map = __bpf_map_get(f);
|
|
|
|
if (IS_ERR(map))
|
|
|
|
return PTR_ERR(map);
|
2020-06-29 17:56:28 +08:00
|
|
|
ret = sock_map_prog_update(map, prog, NULL, attr->attach_type);
|
|
|
|
fdput(f);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
|
|
|
|
{
|
|
|
|
u32 ufd = attr->target_fd;
|
|
|
|
struct bpf_prog *prog;
|
|
|
|
struct bpf_map *map;
|
|
|
|
struct fd f;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (attr->attach_flags || attr->replace_bpf_fd)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
f = fdget(ufd);
|
|
|
|
map = __bpf_map_get(f);
|
|
|
|
if (IS_ERR(map))
|
|
|
|
return PTR_ERR(map);
|
|
|
|
|
|
|
|
prog = bpf_prog_get(attr->attach_bpf_fd);
|
|
|
|
if (IS_ERR(prog)) {
|
|
|
|
ret = PTR_ERR(prog);
|
|
|
|
goto put_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (prog->type != ptype) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto put_prog;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = sock_map_prog_update(map, NULL, prog, attr->attach_type);
|
|
|
|
put_prog:
|
|
|
|
bpf_prog_put(prog);
|
|
|
|
put_map:
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
fdput(f);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_sk_acquire(struct sock *sk)
|
|
|
|
__acquires(&sk->sk_lock.slock)
|
|
|
|
{
|
|
|
|
lock_sock(sk);
|
|
|
|
preempt_disable();
|
|
|
|
rcu_read_lock();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_sk_release(struct sock *sk)
|
|
|
|
__releases(&sk->sk_lock.slock)
|
|
|
|
{
|
|
|
|
rcu_read_unlock();
|
|
|
|
preempt_enable();
|
|
|
|
release_sock(sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_add_link(struct sk_psock *psock,
|
|
|
|
struct sk_psock_link *link,
|
|
|
|
struct bpf_map *map, void *link_raw)
|
|
|
|
{
|
|
|
|
link->link_raw = link_raw;
|
|
|
|
link->map = map;
|
|
|
|
spin_lock_bh(&psock->link_lock);
|
|
|
|
list_add_tail(&link->list, &psock->link);
|
|
|
|
spin_unlock_bh(&psock->link_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_del_link(struct sock *sk,
|
|
|
|
struct sk_psock *psock, void *link_raw)
|
|
|
|
{
|
2020-10-11 13:09:38 +08:00
|
|
|
bool strp_stop = false, verdict_stop = false;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct sk_psock_link *link, *tmp;
|
|
|
|
|
|
|
|
spin_lock_bh(&psock->link_lock);
|
|
|
|
list_for_each_entry_safe(link, tmp, &psock->link, list) {
|
|
|
|
if (link->link_raw == link_raw) {
|
|
|
|
struct bpf_map *map = link->map;
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab,
|
|
|
|
map);
|
2021-02-24 02:49:30 +08:00
|
|
|
if (psock->saved_data_ready && stab->progs.stream_parser)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
strp_stop = true;
|
2021-02-24 02:49:30 +08:00
|
|
|
if (psock->saved_data_ready && stab->progs.stream_verdict)
|
2020-10-11 13:09:38 +08:00
|
|
|
verdict_stop = true;
|
2021-03-31 10:32:30 +08:00
|
|
|
if (psock->saved_data_ready && stab->progs.skb_verdict)
|
|
|
|
verdict_stop = true;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
list_del(&link->list);
|
|
|
|
sk_psock_free_link(link);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
spin_unlock_bh(&psock->link_lock);
|
2020-10-11 13:09:38 +08:00
|
|
|
if (strp_stop || verdict_stop) {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
write_lock_bh(&sk->sk_callback_lock);
|
2020-10-11 13:09:38 +08:00
|
|
|
if (strp_stop)
|
|
|
|
sk_psock_stop_strp(sk, psock);
|
|
|
|
else
|
|
|
|
sk_psock_stop_verdict(sk, psock);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
write_unlock_bh(&sk->sk_callback_lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_unref(struct sock *sk, void *link_raw)
|
|
|
|
{
|
|
|
|
struct sk_psock *psock = sk_psock(sk);
|
|
|
|
|
|
|
|
if (likely(psock)) {
|
|
|
|
sock_map_del_link(sk, psock, link_raw);
|
|
|
|
sk_psock_put(sk, psock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-09 19:12:37 +08:00
|
|
|
static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock)
|
2020-03-09 19:12:36 +08:00
|
|
|
{
|
2021-03-31 10:32:31 +08:00
|
|
|
if (!sk->sk_prot->psock_update_sk_prot)
|
2020-03-09 19:12:39 +08:00
|
|
|
return -EINVAL;
|
2021-03-31 10:32:31 +08:00
|
|
|
psock->psock_update_sk_prot = sk->sk_prot->psock_update_sk_prot;
|
2021-04-07 11:21:11 +08:00
|
|
|
return sk->sk_prot->psock_update_sk_prot(sk, psock, false);
|
2020-03-09 19:12:36 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct sk_psock *sock_map_psock_get_checked(struct sock *sk)
|
|
|
|
{
|
|
|
|
struct sk_psock *psock;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
psock = sk_psock(sk);
|
|
|
|
if (psock) {
|
2020-03-09 19:12:39 +08:00
|
|
|
if (sk->sk_prot->close != sock_map_close) {
|
2020-03-09 19:12:36 +08:00
|
|
|
psock = ERR_PTR(-EBUSY);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!refcount_inc_not_zero(&psock->refcnt))
|
|
|
|
psock = ERR_PTR(-EBUSY);
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
rcu_read_unlock();
|
|
|
|
return psock;
|
|
|
|
}
|
|
|
|
|
2021-03-31 10:32:29 +08:00
|
|
|
static bool sock_map_redirect_allowed(const struct sock *sk);
|
|
|
|
|
2021-03-31 10:32:28 +08:00
|
|
|
static int sock_map_link(struct bpf_map *map, struct sock *sk)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
2021-03-31 10:32:28 +08:00
|
|
|
struct sk_psock_progs *progs = sock_map_progs(map);
|
2021-03-31 10:32:29 +08:00
|
|
|
struct bpf_prog *stream_verdict = NULL;
|
|
|
|
struct bpf_prog *stream_parser = NULL;
|
2021-03-31 10:32:30 +08:00
|
|
|
struct bpf_prog *skb_verdict = NULL;
|
2021-03-31 10:32:29 +08:00
|
|
|
struct bpf_prog *msg_parser = NULL;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct sk_psock *psock;
|
|
|
|
int ret;
|
|
|
|
|
2021-03-31 10:32:29 +08:00
|
|
|
/* Only sockets we can redirect into/from in BPF need to hold
|
|
|
|
* refs to parser/verdict progs and have their sk_data_ready
|
|
|
|
* and sk_write_space callbacks overridden.
|
|
|
|
*/
|
|
|
|
if (!sock_map_redirect_allowed(sk))
|
|
|
|
goto no_progs;
|
|
|
|
|
2021-02-24 02:49:30 +08:00
|
|
|
stream_verdict = READ_ONCE(progs->stream_verdict);
|
|
|
|
if (stream_verdict) {
|
|
|
|
stream_verdict = bpf_prog_inc_not_zero(stream_verdict);
|
|
|
|
if (IS_ERR(stream_verdict))
|
|
|
|
return PTR_ERR(stream_verdict);
|
2020-10-11 13:09:07 +08:00
|
|
|
}
|
2020-10-13 01:09:53 +08:00
|
|
|
|
2021-02-24 02:49:30 +08:00
|
|
|
stream_parser = READ_ONCE(progs->stream_parser);
|
|
|
|
if (stream_parser) {
|
|
|
|
stream_parser = bpf_prog_inc_not_zero(stream_parser);
|
|
|
|
if (IS_ERR(stream_parser)) {
|
|
|
|
ret = PTR_ERR(stream_parser);
|
|
|
|
goto out_put_stream_verdict;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
msg_parser = READ_ONCE(progs->msg_parser);
|
|
|
|
if (msg_parser) {
|
|
|
|
msg_parser = bpf_prog_inc_not_zero(msg_parser);
|
|
|
|
if (IS_ERR(msg_parser)) {
|
|
|
|
ret = PTR_ERR(msg_parser);
|
2021-02-24 02:49:30 +08:00
|
|
|
goto out_put_stream_parser;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-31 10:32:30 +08:00
|
|
|
skb_verdict = READ_ONCE(progs->skb_verdict);
|
|
|
|
if (skb_verdict) {
|
|
|
|
skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
|
|
|
|
if (IS_ERR(skb_verdict)) {
|
|
|
|
ret = PTR_ERR(skb_verdict);
|
|
|
|
goto out_put_msg_parser;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-31 10:32:29 +08:00
|
|
|
no_progs:
|
2020-03-09 19:12:36 +08:00
|
|
|
psock = sock_map_psock_get_checked(sk);
|
2018-10-19 04:58:35 +08:00
|
|
|
if (IS_ERR(psock)) {
|
|
|
|
ret = PTR_ERR(psock);
|
|
|
|
goto out_progs;
|
|
|
|
}
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (psock) {
|
|
|
|
if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
|
2021-02-24 02:49:30 +08:00
|
|
|
(stream_parser && READ_ONCE(psock->progs.stream_parser)) ||
|
2021-03-31 10:32:30 +08:00
|
|
|
(skb_verdict && READ_ONCE(psock->progs.skb_verdict)) ||
|
|
|
|
(skb_verdict && READ_ONCE(psock->progs.stream_verdict)) ||
|
|
|
|
(stream_verdict && READ_ONCE(psock->progs.skb_verdict)) ||
|
2021-02-24 02:49:30 +08:00
|
|
|
(stream_verdict && READ_ONCE(psock->progs.stream_verdict))) {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
sk_psock_put(sk, psock);
|
|
|
|
ret = -EBUSY;
|
|
|
|
goto out_progs;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
psock = sk_psock_init(sk, map->numa_node);
|
2020-08-21 18:29:43 +08:00
|
|
|
if (IS_ERR(psock)) {
|
|
|
|
ret = PTR_ERR(psock);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
goto out_progs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (msg_parser)
|
|
|
|
psock_set_prog(&psock->progs.msg_parser, msg_parser);
|
2020-03-09 19:12:34 +08:00
|
|
|
|
2020-03-09 19:12:37 +08:00
|
|
|
ret = sock_map_init_proto(sk, psock);
|
2020-03-09 19:12:34 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto out_drop;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
write_lock_bh(&sk->sk_callback_lock);
|
2021-02-24 02:49:30 +08:00
|
|
|
if (stream_parser && stream_verdict && !psock->saved_data_ready) {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
ret = sk_psock_init_strp(sk, psock);
|
2020-10-11 13:09:38 +08:00
|
|
|
if (ret)
|
|
|
|
goto out_unlock_drop;
|
2021-02-24 02:49:30 +08:00
|
|
|
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
|
|
|
|
psock_set_prog(&psock->progs.stream_parser, stream_parser);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
sk_psock_start_strp(sk, psock);
|
2021-02-24 02:49:30 +08:00
|
|
|
} else if (!stream_parser && stream_verdict && !psock->saved_data_ready) {
|
|
|
|
psock_set_prog(&psock->progs.stream_verdict, stream_verdict);
|
2020-10-11 13:09:38 +08:00
|
|
|
sk_psock_start_verdict(sk,psock);
|
2021-03-31 10:32:30 +08:00
|
|
|
} else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) {
|
|
|
|
psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
|
|
|
|
sk_psock_start_verdict(sk, psock);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
write_unlock_bh(&sk->sk_callback_lock);
|
|
|
|
return 0;
|
2020-10-11 13:09:38 +08:00
|
|
|
out_unlock_drop:
|
|
|
|
write_unlock_bh(&sk->sk_callback_lock);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
out_drop:
|
|
|
|
sk_psock_put(sk, psock);
|
|
|
|
out_progs:
|
2021-03-31 10:32:30 +08:00
|
|
|
if (skb_verdict)
|
|
|
|
bpf_prog_put(skb_verdict);
|
|
|
|
out_put_msg_parser:
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (msg_parser)
|
|
|
|
bpf_prog_put(msg_parser);
|
2021-02-24 02:49:30 +08:00
|
|
|
out_put_stream_parser:
|
|
|
|
if (stream_parser)
|
|
|
|
bpf_prog_put(stream_parser);
|
|
|
|
out_put_stream_verdict:
|
|
|
|
if (stream_verdict)
|
|
|
|
bpf_prog_put(stream_verdict);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_free(struct bpf_map *map)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
|
|
|
int i;
|
|
|
|
|
bpf, sockmap: Remove bucket->lock from sock_{hash|map}_free
The bucket->lock is not needed in the sock_hash_free and sock_map_free
calls, in fact it is causing a splat due to being inside rcu block.
| BUG: sleeping function called from invalid context at net/core/sock.c:2935
| in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 62, name: kworker/0:1
| 3 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffff8881381f6df8 (&stab->lock){+...}, at: sock_map_free+0x26/0x180
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04008-g7b083332376e #454
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep.cold+0xa6/0xb6
| lock_sock_nested+0x28/0x90
| sock_map_free+0x5f/0x180
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
The reason we have stab->lock and bucket->locks in sockmap code is to
handle checking EEXIST in update/delete cases. We need to be careful during
an update operation that we check for EEXIST and we need to ensure that the
psock object is not in some partial state of removal/insertion while we do
this. So both map_update_common and sock_map_delete need to guard from being
run together potentially deleting an entry we are checking, etc. But by the
time we get to the tear-down code in sock_{ma[|hash}_free we have already
disconnected the map and we just did synchronize_rcu() in the line above so
no updates/deletes should be in flight. Because of this we can drop the
bucket locks from the map free'ing code, noting no update/deletes can be
in-flight.
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Reported-by: Jakub Sitnicki <jakub@cloudflare.com>
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/158385850787.30597.8346421465837046618.stgit@john-Precision-5820-Tower
2020-03-11 00:41:48 +08:00
|
|
|
/* After the sync no updates or deletes will be in-flight so it
|
|
|
|
* is safe to walk map and remove entries without risking a race
|
|
|
|
* in EEXIST update case.
|
|
|
|
*/
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
synchronize_rcu();
|
|
|
|
for (i = 0; i < stab->map.max_entries; i++) {
|
|
|
|
struct sock **psk = &stab->sks[i];
|
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
sk = xchg(psk, NULL);
|
2020-01-11 14:12:00 +08:00
|
|
|
if (sk) {
|
|
|
|
lock_sock(sk);
|
bpf, sockmap: Don't sleep while holding RCU lock on tear-down
rcu_read_lock is needed to protect access to psock inside sock_map_unref
when tearing down the map. However, we can't afford to sleep in lock_sock
while in RCU read-side critical section. Grab the RCU lock only after we
have locked the socket.
This fixes RCU warnings triggerable on a VM with 1 vCPU when free'ing a
sockmap/sockhash that contains at least one socket:
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73 #450 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_map_free+0x5/0x170
| #3: ffff8881368c5df8 (&stab->lock){+...}, at: sock_map_free+0x64/0x170
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73 #450
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_map_free+0x95/0x170
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73-dirty #452 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_hash_free+0x5/0x1d0
| #3: ffff888139966e00 (&htab->buckets[i].lock){+...}, at: sock_hash_free+0x92/0x1d0
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73-dirty #452
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_hash_free+0xec/0x1d0
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
Fixes: 7e81a3530206 ("bpf: Sockmap, ensure sock lock held during tear down")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200206111652.694507-2-jakub@cloudflare.com
2020-02-06 19:16:50 +08:00
|
|
|
rcu_read_lock();
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
sock_map_unref(sk, psk);
|
bpf, sockmap: Don't sleep while holding RCU lock on tear-down
rcu_read_lock is needed to protect access to psock inside sock_map_unref
when tearing down the map. However, we can't afford to sleep in lock_sock
while in RCU read-side critical section. Grab the RCU lock only after we
have locked the socket.
This fixes RCU warnings triggerable on a VM with 1 vCPU when free'ing a
sockmap/sockhash that contains at least one socket:
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73 #450 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_map_free+0x5/0x170
| #3: ffff8881368c5df8 (&stab->lock){+...}, at: sock_map_free+0x64/0x170
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73 #450
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_map_free+0x95/0x170
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73-dirty #452 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_hash_free+0x5/0x1d0
| #3: ffff888139966e00 (&htab->buckets[i].lock){+...}, at: sock_hash_free+0x92/0x1d0
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73-dirty #452
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_hash_free+0xec/0x1d0
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
Fixes: 7e81a3530206 ("bpf: Sockmap, ensure sock lock held during tear down")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200206111652.694507-2-jakub@cloudflare.com
2020-02-06 19:16:50 +08:00
|
|
|
rcu_read_unlock();
|
2020-01-11 14:12:00 +08:00
|
|
|
release_sock(sk);
|
|
|
|
}
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
|
2020-02-06 19:16:51 +08:00
|
|
|
/* wait for psock readers accessing its map link */
|
2019-07-20 01:29:20 +08:00
|
|
|
synchronize_rcu();
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
bpf_map_area_free(stab->sks);
|
|
|
|
kfree(stab);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_release_progs(struct bpf_map *map)
|
|
|
|
{
|
|
|
|
psock_progs_drop(&container_of(map, struct bpf_stab, map)->progs);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
|
|
|
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
|
|
|
|
if (unlikely(key >= map->max_entries))
|
|
|
|
return NULL;
|
|
|
|
return READ_ONCE(stab->sks[key]);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *sock_map_lookup(struct bpf_map *map, void *key)
|
|
|
|
{
|
2020-04-30 02:11:52 +08:00
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
sk = __sock_map_lookup_elem(map, *(u32 *)key);
|
2020-09-10 00:27:10 +08:00
|
|
|
if (!sk)
|
2020-04-30 02:11:52 +08:00
|
|
|
return NULL;
|
|
|
|
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
|
|
|
|
return NULL;
|
|
|
|
return sk;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
|
2020-02-19 01:10:18 +08:00
|
|
|
static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
|
|
|
|
{
|
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
if (map->value_size != sizeof(u64))
|
|
|
|
return ERR_PTR(-ENOSPC);
|
|
|
|
|
|
|
|
sk = __sock_map_lookup_elem(map, *(u32 *)key);
|
|
|
|
if (!sk)
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
|
2020-09-30 23:18:16 +08:00
|
|
|
__sock_gen_cookie(sk);
|
2020-02-19 01:10:18 +08:00
|
|
|
return &sk->sk_cookie;
|
|
|
|
}
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
|
|
|
|
struct sock **psk)
|
|
|
|
{
|
|
|
|
struct sock *sk;
|
2019-07-20 01:29:19 +08:00
|
|
|
int err = 0;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
raw_spin_lock_bh(&stab->lock);
|
|
|
|
sk = *psk;
|
|
|
|
if (!sk_test || sk_test == sk)
|
2019-07-20 01:29:19 +08:00
|
|
|
sk = xchg(psk, NULL);
|
|
|
|
|
|
|
|
if (likely(sk))
|
|
|
|
sock_map_unref(sk, psk);
|
|
|
|
else
|
|
|
|
err = -EINVAL;
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
raw_spin_unlock_bh(&stab->lock);
|
2019-07-20 01:29:19 +08:00
|
|
|
return err;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
|
|
|
|
void *link_raw)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
|
|
|
|
|
|
|
__sock_map_delete(stab, sk, link_raw);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_map_delete_elem(struct bpf_map *map, void *key)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
|
|
|
u32 i = *(u32 *)key;
|
|
|
|
struct sock **psk;
|
|
|
|
|
|
|
|
if (unlikely(i >= map->max_entries))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
psk = &stab->sks[i];
|
|
|
|
return __sock_map_delete(stab, NULL, psk);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
|
|
|
u32 i = key ? *(u32 *)key : U32_MAX;
|
|
|
|
u32 *key_next = next;
|
|
|
|
|
|
|
|
if (i == stab->map.max_entries - 1)
|
|
|
|
return -ENOENT;
|
|
|
|
if (i >= stab->map.max_entries)
|
|
|
|
*key_next = 0;
|
|
|
|
else
|
|
|
|
*key_next = i + 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_map_update_common(struct bpf_map *map, u32 idx,
|
|
|
|
struct sock *sk, u64 flags)
|
|
|
|
{
|
|
|
|
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
|
|
|
|
struct sk_psock_link *link;
|
|
|
|
struct sk_psock *psock;
|
|
|
|
struct sock *osk;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
if (unlikely(flags > BPF_EXIST))
|
|
|
|
return -EINVAL;
|
|
|
|
if (unlikely(idx >= map->max_entries))
|
|
|
|
return -E2BIG;
|
|
|
|
|
|
|
|
link = sk_psock_init_link();
|
|
|
|
if (!link)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-03-31 10:32:29 +08:00
|
|
|
ret = sock_map_link(map, sk);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto out_free;
|
|
|
|
|
|
|
|
psock = sk_psock(sk);
|
|
|
|
WARN_ON_ONCE(!psock);
|
|
|
|
|
|
|
|
raw_spin_lock_bh(&stab->lock);
|
|
|
|
osk = stab->sks[idx];
|
|
|
|
if (osk && flags == BPF_NOEXIST) {
|
|
|
|
ret = -EEXIST;
|
|
|
|
goto out_unlock;
|
|
|
|
} else if (!osk && flags == BPF_EXIST) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
sock_map_add_link(psock, link, map, &stab->sks[idx]);
|
|
|
|
stab->sks[idx] = sk;
|
|
|
|
if (osk)
|
|
|
|
sock_map_unref(osk, &stab->sks[idx]);
|
|
|
|
raw_spin_unlock_bh(&stab->lock);
|
|
|
|
return 0;
|
|
|
|
out_unlock:
|
|
|
|
raw_spin_unlock_bh(&stab->lock);
|
|
|
|
if (psock)
|
|
|
|
sk_psock_put(sk, psock);
|
|
|
|
out_free:
|
|
|
|
sk_psock_free_link(link);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
|
|
|
|
{
|
|
|
|
return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
|
2020-02-19 01:10:16 +08:00
|
|
|
ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB ||
|
|
|
|
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
|
2020-03-09 19:12:39 +08:00
|
|
|
static bool sk_is_tcp(const struct sock *sk)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
|
|
|
return sk->sk_type == SOCK_STREAM &&
|
|
|
|
sk->sk_protocol == IPPROTO_TCP;
|
|
|
|
}
|
|
|
|
|
2020-03-09 19:12:39 +08:00
|
|
|
static bool sk_is_udp(const struct sock *sk)
|
|
|
|
{
|
|
|
|
return sk->sk_type == SOCK_DGRAM &&
|
|
|
|
sk->sk_protocol == IPPROTO_UDP;
|
|
|
|
}
|
|
|
|
|
2020-06-12 01:25:20 +08:00
|
|
|
static bool sock_map_redirect_allowed(const struct sock *sk)
|
|
|
|
{
|
2021-03-31 10:32:35 +08:00
|
|
|
if (sk_is_tcp(sk))
|
|
|
|
return sk->sk_state != TCP_LISTEN;
|
|
|
|
else
|
|
|
|
return sk->sk_state == TCP_ESTABLISHED;
|
2020-06-12 01:25:20 +08:00
|
|
|
}
|
|
|
|
|
2020-03-09 19:12:39 +08:00
|
|
|
static bool sock_map_sk_is_suitable(const struct sock *sk)
|
|
|
|
{
|
2021-03-31 10:32:31 +08:00
|
|
|
return !!sk->sk_prot->psock_update_sk_prot;
|
2020-03-09 19:12:39 +08:00
|
|
|
}
|
|
|
|
|
2020-02-19 01:10:16 +08:00
|
|
|
static bool sock_map_sk_state_allowed(const struct sock *sk)
|
|
|
|
{
|
2020-03-09 19:12:39 +08:00
|
|
|
if (sk_is_tcp(sk))
|
|
|
|
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
|
|
|
|
else if (sk_is_udp(sk))
|
|
|
|
return sk_hashed(sk);
|
|
|
|
|
|
|
|
return false;
|
2020-02-19 01:10:16 +08:00
|
|
|
}
|
|
|
|
|
2020-08-21 18:29:44 +08:00
|
|
|
static int sock_hash_update_common(struct bpf_map *map, void *key,
|
|
|
|
struct sock *sk, u64 flags);
|
|
|
|
|
2020-08-21 18:29:45 +08:00
|
|
|
int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value,
|
|
|
|
u64 flags)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
|
|
|
struct socket *sock;
|
|
|
|
struct sock *sk;
|
|
|
|
int ret;
|
2020-02-19 01:10:18 +08:00
|
|
|
u64 ufd;
|
|
|
|
|
|
|
|
if (map->value_size == sizeof(u64))
|
|
|
|
ufd = *(u64 *)value;
|
|
|
|
else
|
|
|
|
ufd = *(u32 *)value;
|
|
|
|
if (ufd > S32_MAX)
|
|
|
|
return -EINVAL;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
sock = sockfd_lookup(ufd, &ret);
|
|
|
|
if (!sock)
|
|
|
|
return ret;
|
|
|
|
sk = sock->sk;
|
|
|
|
if (!sk) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
2020-02-07 18:37:12 +08:00
|
|
|
if (!sock_map_sk_is_suitable(sk)) {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
ret = -EOPNOTSUPP;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
sock_map_sk_acquire(sk);
|
2020-02-19 01:10:16 +08:00
|
|
|
if (!sock_map_sk_state_allowed(sk))
|
2020-02-07 18:37:12 +08:00
|
|
|
ret = -EOPNOTSUPP;
|
2020-08-21 18:29:44 +08:00
|
|
|
else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
|
|
|
|
ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
|
2020-02-07 18:37:12 +08:00
|
|
|
else
|
2020-08-21 18:29:44 +08:00
|
|
|
ret = sock_hash_update_common(map, key, sk, flags);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
sock_map_sk_release(sk);
|
|
|
|
out:
|
2020-12-29 21:48:34 +08:00
|
|
|
sockfd_put(sock);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-08-21 18:29:47 +08:00
|
|
|
static int sock_map_update_elem(struct bpf_map *map, void *key,
|
|
|
|
void *value, u64 flags)
|
|
|
|
{
|
|
|
|
struct sock *sk = (struct sock *)value;
|
|
|
|
int ret;
|
|
|
|
|
2020-09-28 17:08:02 +08:00
|
|
|
if (unlikely(!sk || !sk_fullsock(sk)))
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-08-21 18:29:47 +08:00
|
|
|
if (!sock_map_sk_is_suitable(sk))
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
local_bh_disable();
|
|
|
|
bh_lock_sock(sk);
|
|
|
|
if (!sock_map_sk_state_allowed(sk))
|
|
|
|
ret = -EOPNOTSUPP;
|
|
|
|
else if (map->map_type == BPF_MAP_TYPE_SOCKMAP)
|
|
|
|
ret = sock_map_update_common(map, *(u32 *)key, sk, flags);
|
|
|
|
else
|
|
|
|
ret = sock_hash_update_common(map, key, sk, flags);
|
|
|
|
bh_unlock_sock(sk);
|
|
|
|
local_bh_enable();
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
|
|
|
|
struct bpf_map *, map, void *, key, u64, flags)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
|
|
|
|
if (likely(sock_map_sk_is_suitable(sops->sk) &&
|
|
|
|
sock_map_op_okay(sops)))
|
|
|
|
return sock_map_update_common(map, *(u32 *)key, sops->sk,
|
|
|
|
flags);
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_sock_map_update_proto = {
|
|
|
|
.func = bpf_sock_map_update,
|
|
|
|
.gpl_only = false,
|
|
|
|
.pkt_access = true,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
|
|
|
|
struct bpf_map *, map, u32, key, u64, flags)
|
|
|
|
{
|
2020-02-19 01:10:16 +08:00
|
|
|
struct sock *sk;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
|
|
|
sk = __sock_map_lookup_elem(map, key);
|
|
|
|
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
2021-02-24 02:49:29 +08:00
|
|
|
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_PASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_sk_redirect_map_proto = {
|
|
|
|
.func = bpf_sk_redirect_map,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
|
|
|
|
struct bpf_map *, map, u32, key, u64, flags)
|
|
|
|
{
|
2020-02-19 01:10:16 +08:00
|
|
|
struct sock *sk;
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
|
|
|
sk = __sock_map_lookup_elem(map, key);
|
|
|
|
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
|
|
|
msg->flags = flags;
|
|
|
|
msg->sk_redir = sk;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_PASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_msg_redirect_map_proto = {
|
|
|
|
.func = bpf_msg_redirect_map,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_ANYTHING,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
2020-09-10 00:27:11 +08:00
|
|
|
struct sock_map_seq_info {
|
|
|
|
struct bpf_map *map;
|
|
|
|
struct sock *sk;
|
|
|
|
u32 index;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct bpf_iter__sockmap {
|
|
|
|
__bpf_md_ptr(struct bpf_iter_meta *, meta);
|
|
|
|
__bpf_md_ptr(struct bpf_map *, map);
|
|
|
|
__bpf_md_ptr(void *, key);
|
|
|
|
__bpf_md_ptr(struct sock *, sk);
|
|
|
|
};
|
|
|
|
|
|
|
|
DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
|
|
|
|
struct bpf_map *map, void *key,
|
|
|
|
struct sock *sk)
|
|
|
|
|
|
|
|
static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
|
|
|
|
{
|
|
|
|
if (unlikely(info->index >= info->map->max_entries))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
info->sk = __sock_map_lookup_elem(info->map, info->index);
|
|
|
|
|
|
|
|
/* can't return sk directly, since that might be NULL */
|
|
|
|
return info;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
|
2020-10-12 17:18:50 +08:00
|
|
|
__acquires(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
struct sock_map_seq_info *info = seq->private;
|
|
|
|
|
|
|
|
if (*pos == 0)
|
|
|
|
++*pos;
|
|
|
|
|
|
|
|
/* pairs with sock_map_seq_stop */
|
|
|
|
rcu_read_lock();
|
|
|
|
return sock_map_seq_lookup_elem(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
2020-10-12 17:18:50 +08:00
|
|
|
__must_hold(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
struct sock_map_seq_info *info = seq->private;
|
|
|
|
|
|
|
|
++*pos;
|
|
|
|
++info->index;
|
|
|
|
|
|
|
|
return sock_map_seq_lookup_elem(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_map_seq_show(struct seq_file *seq, void *v)
|
2020-10-12 17:18:50 +08:00
|
|
|
__must_hold(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
struct sock_map_seq_info *info = seq->private;
|
|
|
|
struct bpf_iter__sockmap ctx = {};
|
|
|
|
struct bpf_iter_meta meta;
|
|
|
|
struct bpf_prog *prog;
|
|
|
|
|
|
|
|
meta.seq = seq;
|
|
|
|
prog = bpf_iter_get_info(&meta, !v);
|
|
|
|
if (!prog)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ctx.meta = &meta;
|
|
|
|
ctx.map = info->map;
|
|
|
|
if (v) {
|
|
|
|
ctx.key = &info->index;
|
|
|
|
ctx.sk = info->sk;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bpf_iter_run_prog(prog, &ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_seq_stop(struct seq_file *seq, void *v)
|
2020-10-12 17:18:50 +08:00
|
|
|
__releases(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
if (!v)
|
|
|
|
(void)sock_map_seq_show(seq, NULL);
|
|
|
|
|
|
|
|
/* pairs with sock_map_seq_start */
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct seq_operations sock_map_seq_ops = {
|
|
|
|
.start = sock_map_seq_start,
|
|
|
|
.next = sock_map_seq_next,
|
|
|
|
.stop = sock_map_seq_stop,
|
|
|
|
.show = sock_map_seq_show,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int sock_map_init_seq_private(void *priv_data,
|
|
|
|
struct bpf_iter_aux_info *aux)
|
|
|
|
{
|
|
|
|
struct sock_map_seq_info *info = priv_data;
|
|
|
|
|
|
|
|
info->map = aux->map;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
|
|
|
|
.seq_ops = &sock_map_seq_ops,
|
|
|
|
.init_seq_private = sock_map_init_seq_private,
|
|
|
|
.seq_priv_size = sizeof(struct sock_map_seq_info),
|
|
|
|
};
|
|
|
|
|
2020-06-20 05:11:44 +08:00
|
|
|
static int sock_map_btf_id;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
const struct bpf_map_ops sock_map_ops = {
|
2020-08-28 09:18:06 +08:00
|
|
|
.map_meta_equal = bpf_map_meta_equal,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
.map_alloc = sock_map_alloc,
|
|
|
|
.map_free = sock_map_free,
|
|
|
|
.map_get_next_key = sock_map_get_next_key,
|
2020-02-19 01:10:18 +08:00
|
|
|
.map_lookup_elem_sys_only = sock_map_lookup_sys,
|
2020-08-21 18:29:47 +08:00
|
|
|
.map_update_elem = sock_map_update_elem,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
.map_delete_elem = sock_map_delete_elem,
|
|
|
|
.map_lookup_elem = sock_map_lookup,
|
|
|
|
.map_release_uref = sock_map_release_progs,
|
|
|
|
.map_check_btf = map_check_no_btf,
|
2020-06-20 05:11:44 +08:00
|
|
|
.map_btf_name = "bpf_stab",
|
|
|
|
.map_btf_id = &sock_map_btf_id,
|
2020-09-10 00:27:11 +08:00
|
|
|
.iter_seq_info = &sock_map_iter_seq_info,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
};
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct rcu_head rcu;
|
|
|
|
u32 hash;
|
|
|
|
struct sock *sk;
|
|
|
|
struct hlist_node node;
|
2020-02-18 04:01:11 +08:00
|
|
|
u8 key[];
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
};
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_bucket {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct hlist_head head;
|
|
|
|
raw_spinlock_t lock;
|
|
|
|
};
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct bpf_map map;
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_bucket *buckets;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
u32 buckets_num;
|
|
|
|
u32 elem_size;
|
|
|
|
struct sk_psock_progs progs;
|
|
|
|
atomic_t count;
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
|
|
|
|
{
|
|
|
|
return jhash(key, len, 0);
|
|
|
|
}
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
static struct bpf_shtab_bucket *sock_hash_select_bucket(struct bpf_shtab *htab,
|
|
|
|
u32 hash)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
|
|
|
return &htab->buckets[hash & (htab->buckets_num - 1)];
|
|
|
|
}
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
static struct bpf_shtab_elem *
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
|
|
|
|
u32 key_size)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem *elem;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
hlist_for_each_entry_rcu(elem, head, node) {
|
|
|
|
if (elem->hash == hash &&
|
|
|
|
!memcmp(&elem->key, key, key_size))
|
|
|
|
return elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
u32 key_size = map->key_size, hash;
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_bucket *bucket;
|
|
|
|
struct bpf_shtab_elem *elem;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
|
|
|
|
hash = sock_hash_bucket_hash(key, key_size);
|
|
|
|
bucket = sock_hash_select_bucket(htab, hash);
|
|
|
|
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
|
|
|
|
|
|
|
|
return elem ? elem->sk : NULL;
|
|
|
|
}
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
static void sock_hash_free_elem(struct bpf_shtab *htab,
|
|
|
|
struct bpf_shtab_elem *elem)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
|
|
|
atomic_dec(&htab->count);
|
|
|
|
kfree_rcu(elem, rcu);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
|
|
|
|
void *link_raw)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
|
|
|
|
struct bpf_shtab_elem *elem_probe, *elem = link_raw;
|
|
|
|
struct bpf_shtab_bucket *bucket;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
bucket = sock_hash_select_bucket(htab, elem->hash);
|
|
|
|
|
|
|
|
/* elem may be deleted in parallel from the map, but access here
|
|
|
|
* is okay since it's going away only after RCU grace period.
|
|
|
|
* However, we need to check whether it's still present.
|
|
|
|
*/
|
|
|
|
raw_spin_lock_bh(&bucket->lock);
|
|
|
|
elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
|
|
|
|
elem->key, map->key_size);
|
|
|
|
if (elem_probe && elem_probe == elem) {
|
|
|
|
hlist_del_rcu(&elem->node);
|
|
|
|
sock_map_unref(elem->sk, elem);
|
|
|
|
sock_hash_free_elem(htab, elem);
|
|
|
|
}
|
|
|
|
raw_spin_unlock_bh(&bucket->lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_hash_delete_elem(struct bpf_map *map, void *key)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
u32 hash, key_size = map->key_size;
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_bucket *bucket;
|
|
|
|
struct bpf_shtab_elem *elem;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
int ret = -ENOENT;
|
|
|
|
|
|
|
|
hash = sock_hash_bucket_hash(key, key_size);
|
|
|
|
bucket = sock_hash_select_bucket(htab, hash);
|
|
|
|
|
|
|
|
raw_spin_lock_bh(&bucket->lock);
|
|
|
|
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
|
|
|
|
if (elem) {
|
|
|
|
hlist_del_rcu(&elem->node);
|
|
|
|
sock_map_unref(elem->sk, elem);
|
|
|
|
sock_hash_free_elem(htab, elem);
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
raw_spin_unlock_bh(&bucket->lock);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-06-20 05:11:42 +08:00
|
|
|
static struct bpf_shtab_elem *sock_hash_alloc_elem(struct bpf_shtab *htab,
|
|
|
|
void *key, u32 key_size,
|
|
|
|
u32 hash, struct sock *sk,
|
|
|
|
struct bpf_shtab_elem *old)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem *new;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
|
|
|
|
if (!old) {
|
|
|
|
atomic_dec(&htab->count);
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-02 05:58:42 +08:00
|
|
|
new = bpf_map_kmalloc_node(&htab->map, htab->elem_size,
|
|
|
|
GFP_ATOMIC | __GFP_NOWARN,
|
|
|
|
htab->map.numa_node);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (!new) {
|
|
|
|
atomic_dec(&htab->count);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
memcpy(new->key, key, key_size);
|
|
|
|
new->sk = sk;
|
|
|
|
new->hash = hash;
|
|
|
|
return new;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_hash_update_common(struct bpf_map *map, void *key,
|
|
|
|
struct sock *sk, u64 flags)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
u32 key_size = map->key_size, hash;
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem *elem, *elem_new;
|
|
|
|
struct bpf_shtab_bucket *bucket;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct sk_psock_link *link;
|
|
|
|
struct sk_psock *psock;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
if (unlikely(flags > BPF_EXIST))
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
link = sk_psock_init_link();
|
|
|
|
if (!link)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-03-31 10:32:29 +08:00
|
|
|
ret = sock_map_link(map, sk);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (ret < 0)
|
|
|
|
goto out_free;
|
|
|
|
|
|
|
|
psock = sk_psock(sk);
|
|
|
|
WARN_ON_ONCE(!psock);
|
|
|
|
|
|
|
|
hash = sock_hash_bucket_hash(key, key_size);
|
|
|
|
bucket = sock_hash_select_bucket(htab, hash);
|
|
|
|
|
|
|
|
raw_spin_lock_bh(&bucket->lock);
|
|
|
|
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
|
|
|
|
if (elem && flags == BPF_NOEXIST) {
|
|
|
|
ret = -EEXIST;
|
|
|
|
goto out_unlock;
|
|
|
|
} else if (!elem && flags == BPF_EXIST) {
|
|
|
|
ret = -ENOENT;
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
elem_new = sock_hash_alloc_elem(htab, key, key_size, hash, sk, elem);
|
|
|
|
if (IS_ERR(elem_new)) {
|
|
|
|
ret = PTR_ERR(elem_new);
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
|
|
|
sock_map_add_link(psock, link, map, elem_new);
|
|
|
|
/* Add new element to the head of the list, so that
|
|
|
|
* concurrent search will find it before old elem.
|
|
|
|
*/
|
|
|
|
hlist_add_head_rcu(&elem_new->node, &bucket->head);
|
|
|
|
if (elem) {
|
|
|
|
hlist_del_rcu(&elem->node);
|
|
|
|
sock_map_unref(elem->sk, elem);
|
|
|
|
sock_hash_free_elem(htab, elem);
|
|
|
|
}
|
|
|
|
raw_spin_unlock_bh(&bucket->lock);
|
|
|
|
return 0;
|
|
|
|
out_unlock:
|
|
|
|
raw_spin_unlock_bh(&bucket->lock);
|
|
|
|
sk_psock_put(sk, psock);
|
|
|
|
out_free:
|
|
|
|
sk_psock_free_link(link);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_hash_get_next_key(struct bpf_map *map, void *key,
|
|
|
|
void *key_next)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
|
|
|
|
struct bpf_shtab_elem *elem, *elem_next;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
u32 hash, key_size = map->key_size;
|
|
|
|
struct hlist_head *head;
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
if (!key)
|
|
|
|
goto find_first_elem;
|
|
|
|
hash = sock_hash_bucket_hash(key, key_size);
|
|
|
|
head = &sock_hash_select_bucket(htab, hash)->head;
|
|
|
|
elem = sock_hash_lookup_elem_raw(head, hash, key, key_size);
|
|
|
|
if (!elem)
|
|
|
|
goto find_first_elem;
|
|
|
|
|
2020-09-10 00:27:11 +08:00
|
|
|
elem_next = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&elem->node)),
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem, node);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (elem_next) {
|
|
|
|
memcpy(key_next, elem_next->key, key_size);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = hash & (htab->buckets_num - 1);
|
|
|
|
i++;
|
|
|
|
find_first_elem:
|
|
|
|
for (; i < htab->buckets_num; i++) {
|
|
|
|
head = &sock_hash_select_bucket(htab, i)->head;
|
2020-09-10 00:27:11 +08:00
|
|
|
elem_next = hlist_entry_safe(rcu_dereference(hlist_first_rcu(head)),
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem, node);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (elem_next) {
|
|
|
|
memcpy(key_next, elem_next->key, key_size);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
int i, err;
|
|
|
|
|
|
|
|
if (!capable(CAP_NET_ADMIN))
|
|
|
|
return ERR_PTR(-EPERM);
|
|
|
|
if (attr->max_entries == 0 ||
|
|
|
|
attr->key_size == 0 ||
|
2020-02-19 01:10:18 +08:00
|
|
|
(attr->value_size != sizeof(u32) &&
|
|
|
|
attr->value_size != sizeof(u64)) ||
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
|
|
|
|
return ERR_PTR(-EINVAL);
|
|
|
|
if (attr->key_size > MAX_BPF_STACK)
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
2020-12-02 05:58:42 +08:00
|
|
|
htab = kzalloc(sizeof(*htab), GFP_USER | __GFP_ACCOUNT);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (!htab)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
bpf_map_init_from_attr(&htab->map, attr);
|
|
|
|
|
|
|
|
htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
|
2020-06-20 05:11:42 +08:00
|
|
|
htab->elem_size = sizeof(struct bpf_shtab_elem) +
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
round_up(htab->map.key_size, 8);
|
|
|
|
if (htab->buckets_num == 0 ||
|
2020-06-20 05:11:42 +08:00
|
|
|
htab->buckets_num > U32_MAX / sizeof(struct bpf_shtab_bucket)) {
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
err = -EINVAL;
|
|
|
|
goto free_htab;
|
|
|
|
}
|
|
|
|
|
|
|
|
htab->buckets = bpf_map_area_alloc(htab->buckets_num *
|
2020-06-20 05:11:42 +08:00
|
|
|
sizeof(struct bpf_shtab_bucket),
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
htab->map.numa_node);
|
|
|
|
if (!htab->buckets) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto free_htab;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < htab->buckets_num; i++) {
|
|
|
|
INIT_HLIST_HEAD(&htab->buckets[i].head);
|
|
|
|
raw_spin_lock_init(&htab->buckets[i].lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
return &htab->map;
|
|
|
|
free_htab:
|
|
|
|
kfree(htab);
|
|
|
|
return ERR_PTR(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_hash_free(struct bpf_map *map)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab *htab = container_of(map, struct bpf_shtab, map);
|
|
|
|
struct bpf_shtab_bucket *bucket;
|
2020-06-08 04:52:29 +08:00
|
|
|
struct hlist_head unlink_list;
|
2020-06-20 05:11:42 +08:00
|
|
|
struct bpf_shtab_elem *elem;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
struct hlist_node *node;
|
|
|
|
int i;
|
|
|
|
|
bpf, sockmap: Remove bucket->lock from sock_{hash|map}_free
The bucket->lock is not needed in the sock_hash_free and sock_map_free
calls, in fact it is causing a splat due to being inside rcu block.
| BUG: sleeping function called from invalid context at net/core/sock.c:2935
| in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 62, name: kworker/0:1
| 3 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffff8881381f6df8 (&stab->lock){+...}, at: sock_map_free+0x26/0x180
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04008-g7b083332376e #454
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep.cold+0xa6/0xb6
| lock_sock_nested+0x28/0x90
| sock_map_free+0x5f/0x180
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
The reason we have stab->lock and bucket->locks in sockmap code is to
handle checking EEXIST in update/delete cases. We need to be careful during
an update operation that we check for EEXIST and we need to ensure that the
psock object is not in some partial state of removal/insertion while we do
this. So both map_update_common and sock_map_delete need to guard from being
run together potentially deleting an entry we are checking, etc. But by the
time we get to the tear-down code in sock_{ma[|hash}_free we have already
disconnected the map and we just did synchronize_rcu() in the line above so
no updates/deletes should be in flight. Because of this we can drop the
bucket locks from the map free'ing code, noting no update/deletes can be
in-flight.
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Reported-by: Jakub Sitnicki <jakub@cloudflare.com>
Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/158385850787.30597.8346421465837046618.stgit@john-Precision-5820-Tower
2020-03-11 00:41:48 +08:00
|
|
|
/* After the sync no updates or deletes will be in-flight so it
|
|
|
|
* is safe to walk map and remove entries without risking a race
|
|
|
|
* in EEXIST update case.
|
|
|
|
*/
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
synchronize_rcu();
|
|
|
|
for (i = 0; i < htab->buckets_num; i++) {
|
|
|
|
bucket = sock_hash_select_bucket(htab, i);
|
2020-06-08 04:52:29 +08:00
|
|
|
|
|
|
|
/* We are racing with sock_hash_delete_from_link to
|
|
|
|
* enter the spin-lock critical section. Every socket on
|
|
|
|
* the list is still linked to sockhash. Since link
|
|
|
|
* exists, psock exists and holds a ref to socket. That
|
|
|
|
* lets us to grab a socket ref too.
|
|
|
|
*/
|
|
|
|
raw_spin_lock_bh(&bucket->lock);
|
|
|
|
hlist_for_each_entry(elem, &bucket->head, node)
|
|
|
|
sock_hold(elem->sk);
|
|
|
|
hlist_move_list(&bucket->head, &unlink_list);
|
|
|
|
raw_spin_unlock_bh(&bucket->lock);
|
|
|
|
|
|
|
|
/* Process removed entries out of atomic context to
|
|
|
|
* block for socket lock before deleting the psock's
|
|
|
|
* link to sockhash.
|
|
|
|
*/
|
|
|
|
hlist_for_each_entry_safe(elem, node, &unlink_list, node) {
|
|
|
|
hlist_del(&elem->node);
|
2020-01-11 14:12:00 +08:00
|
|
|
lock_sock(elem->sk);
|
bpf, sockmap: Don't sleep while holding RCU lock on tear-down
rcu_read_lock is needed to protect access to psock inside sock_map_unref
when tearing down the map. However, we can't afford to sleep in lock_sock
while in RCU read-side critical section. Grab the RCU lock only after we
have locked the socket.
This fixes RCU warnings triggerable on a VM with 1 vCPU when free'ing a
sockmap/sockhash that contains at least one socket:
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73 #450 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_map_free+0x5/0x170
| #3: ffff8881368c5df8 (&stab->lock){+...}, at: sock_map_free+0x64/0x170
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73 #450
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_map_free+0x95/0x170
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73-dirty #452 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_hash_free+0x5/0x1d0
| #3: ffff888139966e00 (&htab->buckets[i].lock){+...}, at: sock_hash_free+0x92/0x1d0
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73-dirty #452
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_hash_free+0xec/0x1d0
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
Fixes: 7e81a3530206 ("bpf: Sockmap, ensure sock lock held during tear down")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200206111652.694507-2-jakub@cloudflare.com
2020-02-06 19:16:50 +08:00
|
|
|
rcu_read_lock();
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
sock_map_unref(elem->sk, elem);
|
bpf, sockmap: Don't sleep while holding RCU lock on tear-down
rcu_read_lock is needed to protect access to psock inside sock_map_unref
when tearing down the map. However, we can't afford to sleep in lock_sock
while in RCU read-side critical section. Grab the RCU lock only after we
have locked the socket.
This fixes RCU warnings triggerable on a VM with 1 vCPU when free'ing a
sockmap/sockhash that contains at least one socket:
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73 #450 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_map_free+0x5/0x170
| #3: ffff8881368c5df8 (&stab->lock){+...}, at: sock_map_free+0x64/0x170
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73 #450
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_map_free+0x95/0x170
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
| =============================
| WARNING: suspicious RCU usage
| 5.5.0-04005-g8fc91b972b73-dirty #452 Not tainted
| -----------------------------
| include/linux/rcupdate.h:272 Illegal context switch in RCU read-side critical section!
|
| other info that might help us debug this:
|
|
| rcu_scheduler_active = 2, debug_locks = 1
| 4 locks held by kworker/0:1/62:
| #0: ffff88813b019748 ((wq_completion)events){+.+.}, at: process_one_work+0x1d7/0x5e0
| #1: ffffc900000abe50 ((work_completion)(&map->work)){+.+.}, at: process_one_work+0x1d7/0x5e0
| #2: ffffffff82065d20 (rcu_read_lock){....}, at: sock_hash_free+0x5/0x1d0
| #3: ffff888139966e00 (&htab->buckets[i].lock){+...}, at: sock_hash_free+0x92/0x1d0
|
| stack backtrace:
| CPU: 0 PID: 62 Comm: kworker/0:1 Not tainted 5.5.0-04005-g8fc91b972b73-dirty #452
| Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014
| Workqueue: events bpf_map_free_deferred
| Call Trace:
| dump_stack+0x71/0xa0
| ___might_sleep+0x105/0x190
| lock_sock_nested+0x28/0x90
| sock_hash_free+0xec/0x1d0
| bpf_map_free_deferred+0x58/0x80
| process_one_work+0x260/0x5e0
| worker_thread+0x4d/0x3e0
| kthread+0x108/0x140
| ? process_one_work+0x5e0/0x5e0
| ? kthread_park+0x90/0x90
| ret_from_fork+0x3a/0x50
Fixes: 7e81a3530206 ("bpf: Sockmap, ensure sock lock held during tear down")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200206111652.694507-2-jakub@cloudflare.com
2020-02-06 19:16:50 +08:00
|
|
|
rcu_read_unlock();
|
2020-01-11 14:12:00 +08:00
|
|
|
release_sock(elem->sk);
|
2020-06-08 04:52:29 +08:00
|
|
|
sock_put(elem->sk);
|
2020-06-08 04:52:28 +08:00
|
|
|
sock_hash_free_elem(htab, elem);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-06 19:16:51 +08:00
|
|
|
/* wait for psock readers accessing its map link */
|
|
|
|
synchronize_rcu();
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
bpf_map_area_free(htab->buckets);
|
|
|
|
kfree(htab);
|
|
|
|
}
|
|
|
|
|
2020-02-19 01:10:18 +08:00
|
|
|
static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
|
|
|
|
{
|
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
if (map->value_size != sizeof(u64))
|
|
|
|
return ERR_PTR(-ENOSPC);
|
|
|
|
|
|
|
|
sk = __sock_hash_lookup_elem(map, key);
|
|
|
|
if (!sk)
|
|
|
|
return ERR_PTR(-ENOENT);
|
|
|
|
|
2020-09-30 23:18:16 +08:00
|
|
|
__sock_gen_cookie(sk);
|
2020-02-19 01:10:18 +08:00
|
|
|
return &sk->sk_cookie;
|
|
|
|
}
|
|
|
|
|
2020-02-19 01:10:19 +08:00
|
|
|
static void *sock_hash_lookup(struct bpf_map *map, void *key)
|
|
|
|
{
|
2020-04-30 02:11:52 +08:00
|
|
|
struct sock *sk;
|
|
|
|
|
|
|
|
sk = __sock_hash_lookup_elem(map, key);
|
2020-09-10 00:27:10 +08:00
|
|
|
if (!sk)
|
2020-04-30 02:11:52 +08:00
|
|
|
return NULL;
|
|
|
|
if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
|
|
|
|
return NULL;
|
|
|
|
return sk;
|
2020-02-19 01:10:19 +08:00
|
|
|
}
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
static void sock_hash_release_progs(struct bpf_map *map)
|
|
|
|
{
|
2020-06-20 05:11:42 +08:00
|
|
|
psock_progs_drop(&container_of(map, struct bpf_shtab, map)->progs);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
|
|
|
|
struct bpf_map *, map, void *, key, u64, flags)
|
|
|
|
{
|
|
|
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
|
|
|
|
|
|
|
if (likely(sock_map_sk_is_suitable(sops->sk) &&
|
|
|
|
sock_map_op_okay(sops)))
|
|
|
|
return sock_hash_update_common(map, key, sops->sk, flags);
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_sock_hash_update_proto = {
|
|
|
|
.func = bpf_sock_hash_update,
|
|
|
|
.gpl_only = false,
|
|
|
|
.pkt_access = true,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
|
|
|
|
struct bpf_map *, map, void *, key, u64, flags)
|
|
|
|
{
|
2020-02-19 01:10:16 +08:00
|
|
|
struct sock *sk;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
|
|
|
sk = __sock_hash_lookup_elem(map, key);
|
|
|
|
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
2021-02-24 02:49:29 +08:00
|
|
|
skb_bpf_set_redir(skb, sk, flags & BPF_F_INGRESS);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_PASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
|
|
|
|
.func = bpf_sk_redirect_hash,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
|
|
|
BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
|
|
|
|
struct bpf_map *, map, void *, key, u64, flags)
|
|
|
|
{
|
2020-02-19 01:10:16 +08:00
|
|
|
struct sock *sk;
|
|
|
|
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
if (unlikely(flags & ~(BPF_F_INGRESS)))
|
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
|
|
|
sk = __sock_hash_lookup_elem(map, key);
|
|
|
|
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_DROP;
|
2020-02-19 01:10:16 +08:00
|
|
|
|
|
|
|
msg->flags = flags;
|
|
|
|
msg->sk_redir = sk;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return SK_PASS;
|
|
|
|
}
|
|
|
|
|
|
|
|
const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
|
|
|
|
.func = bpf_msg_redirect_hash,
|
|
|
|
.gpl_only = false,
|
|
|
|
.ret_type = RET_INTEGER,
|
|
|
|
.arg1_type = ARG_PTR_TO_CTX,
|
|
|
|
.arg2_type = ARG_CONST_MAP_PTR,
|
|
|
|
.arg3_type = ARG_PTR_TO_MAP_KEY,
|
|
|
|
.arg4_type = ARG_ANYTHING,
|
|
|
|
};
|
|
|
|
|
2020-09-10 00:27:11 +08:00
|
|
|
struct sock_hash_seq_info {
|
|
|
|
struct bpf_map *map;
|
|
|
|
struct bpf_shtab *htab;
|
|
|
|
u32 bucket_id;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
|
|
|
|
struct bpf_shtab_elem *prev_elem)
|
|
|
|
{
|
|
|
|
const struct bpf_shtab *htab = info->htab;
|
|
|
|
struct bpf_shtab_bucket *bucket;
|
|
|
|
struct bpf_shtab_elem *elem;
|
|
|
|
struct hlist_node *node;
|
|
|
|
|
|
|
|
/* try to find next elem in the same bucket */
|
|
|
|
if (prev_elem) {
|
|
|
|
node = rcu_dereference(hlist_next_rcu(&prev_elem->node));
|
|
|
|
elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
|
|
|
|
if (elem)
|
|
|
|
return elem;
|
|
|
|
|
|
|
|
/* no more elements, continue in the next bucket */
|
|
|
|
info->bucket_id++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
|
|
|
|
bucket = &htab->buckets[info->bucket_id];
|
|
|
|
node = rcu_dereference(hlist_first_rcu(&bucket->head));
|
|
|
|
elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
|
|
|
|
if (elem)
|
|
|
|
return elem;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
|
2020-10-12 17:18:50 +08:00
|
|
|
__acquires(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
struct sock_hash_seq_info *info = seq->private;
|
|
|
|
|
|
|
|
if (*pos == 0)
|
|
|
|
++*pos;
|
|
|
|
|
|
|
|
/* pairs with sock_hash_seq_stop */
|
|
|
|
rcu_read_lock();
|
|
|
|
return sock_hash_seq_find_next(info, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
2020-10-12 17:18:50 +08:00
|
|
|
__must_hold(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
struct sock_hash_seq_info *info = seq->private;
|
|
|
|
|
|
|
|
++*pos;
|
|
|
|
return sock_hash_seq_find_next(info, v);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int sock_hash_seq_show(struct seq_file *seq, void *v)
|
2020-10-12 17:18:50 +08:00
|
|
|
__must_hold(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
struct sock_hash_seq_info *info = seq->private;
|
|
|
|
struct bpf_iter__sockmap ctx = {};
|
|
|
|
struct bpf_shtab_elem *elem = v;
|
|
|
|
struct bpf_iter_meta meta;
|
|
|
|
struct bpf_prog *prog;
|
|
|
|
|
|
|
|
meta.seq = seq;
|
|
|
|
prog = bpf_iter_get_info(&meta, !elem);
|
|
|
|
if (!prog)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
ctx.meta = &meta;
|
|
|
|
ctx.map = info->map;
|
|
|
|
if (elem) {
|
|
|
|
ctx.key = elem->key;
|
|
|
|
ctx.sk = elem->sk;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bpf_iter_run_prog(prog, &ctx);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_hash_seq_stop(struct seq_file *seq, void *v)
|
2020-10-12 17:18:50 +08:00
|
|
|
__releases(rcu)
|
2020-09-10 00:27:11 +08:00
|
|
|
{
|
|
|
|
if (!v)
|
|
|
|
(void)sock_hash_seq_show(seq, NULL);
|
|
|
|
|
|
|
|
/* pairs with sock_hash_seq_start */
|
|
|
|
rcu_read_unlock();
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct seq_operations sock_hash_seq_ops = {
|
|
|
|
.start = sock_hash_seq_start,
|
|
|
|
.next = sock_hash_seq_next,
|
|
|
|
.stop = sock_hash_seq_stop,
|
|
|
|
.show = sock_hash_seq_show,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int sock_hash_init_seq_private(void *priv_data,
|
|
|
|
struct bpf_iter_aux_info *aux)
|
|
|
|
{
|
|
|
|
struct sock_hash_seq_info *info = priv_data;
|
|
|
|
|
|
|
|
info->map = aux->map;
|
|
|
|
info->htab = container_of(aux->map, struct bpf_shtab, map);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
|
|
|
|
.seq_ops = &sock_hash_seq_ops,
|
|
|
|
.init_seq_private = sock_hash_init_seq_private,
|
|
|
|
.seq_priv_size = sizeof(struct sock_hash_seq_info),
|
|
|
|
};
|
|
|
|
|
2020-06-20 05:11:44 +08:00
|
|
|
static int sock_hash_map_btf_id;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
const struct bpf_map_ops sock_hash_ops = {
|
2020-08-28 09:18:06 +08:00
|
|
|
.map_meta_equal = bpf_map_meta_equal,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
.map_alloc = sock_hash_alloc,
|
|
|
|
.map_free = sock_hash_free,
|
|
|
|
.map_get_next_key = sock_hash_get_next_key,
|
2020-08-21 18:29:47 +08:00
|
|
|
.map_update_elem = sock_map_update_elem,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
.map_delete_elem = sock_hash_delete_elem,
|
2020-02-19 01:10:19 +08:00
|
|
|
.map_lookup_elem = sock_hash_lookup,
|
2020-02-19 01:10:18 +08:00
|
|
|
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
.map_release_uref = sock_hash_release_progs,
|
|
|
|
.map_check_btf = map_check_no_btf,
|
2020-06-20 05:11:44 +08:00
|
|
|
.map_btf_name = "bpf_shtab",
|
|
|
|
.map_btf_id = &sock_hash_map_btf_id,
|
2020-09-10 00:27:11 +08:00
|
|
|
.iter_seq_info = &sock_hash_iter_seq_info,
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
|
|
|
|
{
|
|
|
|
switch (map->map_type) {
|
|
|
|
case BPF_MAP_TYPE_SOCKMAP:
|
|
|
|
return &container_of(map, struct bpf_stab, map)->progs;
|
|
|
|
case BPF_MAP_TYPE_SOCKHASH:
|
2020-06-20 05:11:42 +08:00
|
|
|
return &container_of(map, struct bpf_shtab, map)->progs;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2021-02-24 02:49:31 +08:00
|
|
|
static int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
|
|
|
|
struct bpf_prog *old, u32 which)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
|
|
|
struct sk_psock_progs *progs = sock_map_progs(map);
|
2020-06-29 17:56:28 +08:00
|
|
|
struct bpf_prog **pprog;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
|
|
|
|
if (!progs)
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
|
|
|
|
switch (which) {
|
|
|
|
case BPF_SK_MSG_VERDICT:
|
2020-06-29 17:56:28 +08:00
|
|
|
pprog = &progs->msg_parser;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
break;
|
2021-02-24 02:49:26 +08:00
|
|
|
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
case BPF_SK_SKB_STREAM_PARSER:
|
2021-02-24 02:49:30 +08:00
|
|
|
pprog = &progs->stream_parser;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
break;
|
2021-02-24 02:49:26 +08:00
|
|
|
#endif
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
case BPF_SK_SKB_STREAM_VERDICT:
|
2021-03-31 10:32:30 +08:00
|
|
|
if (progs->skb_verdict)
|
|
|
|
return -EBUSY;
|
2021-02-24 02:49:30 +08:00
|
|
|
pprog = &progs->stream_verdict;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
break;
|
2021-03-31 10:32:30 +08:00
|
|
|
case BPF_SK_SKB_VERDICT:
|
|
|
|
if (progs->stream_verdict)
|
|
|
|
return -EBUSY;
|
|
|
|
pprog = &progs->skb_verdict;
|
|
|
|
break;
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
default:
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
|
2020-06-29 17:56:28 +08:00
|
|
|
if (old)
|
|
|
|
return psock_replace_prog(pprog, prog, old);
|
|
|
|
|
|
|
|
psock_set_prog(pprog, prog);
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-03-09 19:12:36 +08:00
|
|
|
static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link)
|
bpf, sockmap: convert to generic sk_msg interface
Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.
This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.
Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.
The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.
Joint work with John.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-10-13 08:45:58 +08:00
|
|
|
{
|
|
|
|
switch (link->map->map_type) {
|
|
|
|
case BPF_MAP_TYPE_SOCKMAP:
|
|
|
|
return sock_map_delete_from_link(link->map, sk,
|
|
|
|
link->link_raw);
|
|
|
|
case BPF_MAP_TYPE_SOCKHASH:
|
|
|
|
return sock_hash_delete_from_link(link->map, sk,
|
|
|
|
link->link_raw);
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2020-03-09 19:12:36 +08:00
|
|
|
|
|
|
|
static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock)
|
|
|
|
{
|
|
|
|
struct sk_psock_link *link;
|
|
|
|
|
|
|
|
while ((link = sk_psock_link_pop(psock))) {
|
|
|
|
sock_map_unlink(sk, link);
|
|
|
|
sk_psock_free_link(link);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void sock_map_unhash(struct sock *sk)
|
|
|
|
{
|
|
|
|
void (*saved_unhash)(struct sock *sk);
|
|
|
|
struct sk_psock *psock;
|
|
|
|
|
|
|
|
rcu_read_lock();
|
|
|
|
psock = sk_psock(sk);
|
|
|
|
if (unlikely(!psock)) {
|
|
|
|
rcu_read_unlock();
|
|
|
|
if (sk->sk_prot->unhash)
|
|
|
|
sk->sk_prot->unhash(sk);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
saved_unhash = psock->saved_unhash;
|
|
|
|
sock_map_remove_links(sk, psock);
|
|
|
|
rcu_read_unlock();
|
|
|
|
saved_unhash(sk);
|
|
|
|
}
|
|
|
|
|
|
|
|
void sock_map_close(struct sock *sk, long timeout)
|
|
|
|
{
|
|
|
|
void (*saved_close)(struct sock *sk, long timeout);
|
|
|
|
struct sk_psock *psock;
|
|
|
|
|
|
|
|
lock_sock(sk);
|
|
|
|
rcu_read_lock();
|
2021-04-08 11:05:56 +08:00
|
|
|
psock = sk_psock_get(sk);
|
2020-03-09 19:12:36 +08:00
|
|
|
if (unlikely(!psock)) {
|
|
|
|
rcu_read_unlock();
|
|
|
|
release_sock(sk);
|
|
|
|
return sk->sk_prot->close(sk, timeout);
|
|
|
|
}
|
|
|
|
|
|
|
|
saved_close = psock->saved_close;
|
|
|
|
sock_map_remove_links(sk, psock);
|
|
|
|
rcu_read_unlock();
|
2021-03-31 10:32:25 +08:00
|
|
|
sk_psock_stop(psock, true);
|
2021-04-08 11:05:56 +08:00
|
|
|
sk_psock_put(sk, psock);
|
2020-03-09 19:12:36 +08:00
|
|
|
release_sock(sk);
|
|
|
|
saved_close(sk, timeout);
|
|
|
|
}
|
2020-09-10 00:27:11 +08:00
|
|
|
|
|
|
|
static int sock_map_iter_attach_target(struct bpf_prog *prog,
|
|
|
|
union bpf_iter_link_info *linfo,
|
|
|
|
struct bpf_iter_aux_info *aux)
|
|
|
|
{
|
|
|
|
struct bpf_map *map;
|
|
|
|
int err = -EINVAL;
|
|
|
|
|
|
|
|
if (!linfo->map.map_fd)
|
|
|
|
return -EBADF;
|
|
|
|
|
|
|
|
map = bpf_map_get_with_uref(linfo->map.map_fd);
|
|
|
|
if (IS_ERR(map))
|
|
|
|
return PTR_ERR(map);
|
|
|
|
|
|
|
|
if (map->map_type != BPF_MAP_TYPE_SOCKMAP &&
|
|
|
|
map->map_type != BPF_MAP_TYPE_SOCKHASH)
|
|
|
|
goto put_map;
|
|
|
|
|
|
|
|
if (prog->aux->max_rdonly_access > map->key_size) {
|
|
|
|
err = -EACCES;
|
|
|
|
goto put_map;
|
|
|
|
}
|
|
|
|
|
|
|
|
aux->map = map;
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
put_map:
|
|
|
|
bpf_map_put_with_uref(map);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
|
|
|
|
{
|
|
|
|
bpf_map_put_with_uref(aux->map);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct bpf_iter_reg sock_map_iter_reg = {
|
|
|
|
.target = "sockmap",
|
|
|
|
.attach_target = sock_map_iter_attach_target,
|
|
|
|
.detach_target = sock_map_iter_detach_target,
|
|
|
|
.show_fdinfo = bpf_iter_map_show_fdinfo,
|
|
|
|
.fill_link_info = bpf_iter_map_fill_link_info,
|
|
|
|
.ctx_arg_info_size = 2,
|
|
|
|
.ctx_arg_info = {
|
|
|
|
{ offsetof(struct bpf_iter__sockmap, key),
|
|
|
|
PTR_TO_RDONLY_BUF_OR_NULL },
|
|
|
|
{ offsetof(struct bpf_iter__sockmap, sk),
|
|
|
|
PTR_TO_BTF_ID_OR_NULL },
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
static int __init bpf_sockmap_iter_init(void)
|
|
|
|
{
|
|
|
|
sock_map_iter_reg.ctx_arg_info[1].btf_id =
|
|
|
|
btf_sock_ids[BTF_SOCK_TYPE_SOCK];
|
|
|
|
return bpf_iter_reg_target(&sock_map_iter_reg);
|
|
|
|
}
|
|
|
|
late_initcall(bpf_sockmap_iter_init);
|