Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2020-02-21

The following pull-request contains BPF updates for your *net-next* tree.

We've added 25 non-merge commits during the last 4 day(s) which contain
a total of 33 files changed, 2433 insertions(+), 161 deletions(-).

The main changes are:

1) Allow for adding TCP listen sockets into sock_map/hash so they can be used
   with reuseport BPF programs, from Jakub Sitnicki.

2) Add a new bpf_program__set_attach_target() helper for adding libbpf support
   to specify the tracepoint/function dynamically, from Eelco Chaudron.

3) Add bpf_read_branch_records() BPF helper which helps use cases like profile
   guided optimizations, from Daniel Xu.

4) Enable bpf_perf_event_read_value() in all tracing programs, from Song Liu.

5) Relax BTF mandatory check if only used for libbpf itself e.g. to process
   BTF defined maps, from Andrii Nakryiko.

6) Move BPF selftests -mcpu compilation attribute from 'probe' to 'v3' as it has
   been observed that former fails in envs with low memlock, from Yonghong Song.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2020-02-21 15:22:45 -08:00
commit b105e8e281
33 changed files with 2433 additions and 161 deletions

View File

@ -20,11 +20,11 @@ Reporting bugs
Q: How do I report bugs for BPF kernel code?
--------------------------------------------
A: Since all BPF kernel development as well as bpftool and iproute2 BPF
loader development happens through the netdev kernel mailing list,
loader development happens through the bpf kernel mailing list,
please report any found issues around BPF to the following mailing
list:
netdev@vger.kernel.org
bpf@vger.kernel.org
This may also include issues related to XDP, BPF tracing, etc.
@ -46,17 +46,12 @@ Submitting patches
Q: To which mailing list do I need to submit my BPF patches?
------------------------------------------------------------
A: Please submit your BPF patches to the netdev kernel mailing list:
A: Please submit your BPF patches to the bpf kernel mailing list:
netdev@vger.kernel.org
Historically, BPF came out of networking and has always been maintained
by the kernel networking community. Although these days BPF touches
many other subsystems as well, the patches are still routed mainly
through the networking community.
bpf@vger.kernel.org
In case your patch has changes in various different subsystems (e.g.
tracing, security, etc), make sure to Cc the related kernel mailing
networking, tracing, security, etc), make sure to Cc the related kernel mailing
lists and maintainers from there as well, so they are able to review
the changes and provide their Acked-by's to the patches.
@ -168,7 +163,7 @@ a BPF point of view.
Be aware that this is not a final verdict that the patch will
automatically get accepted into net or net-next trees eventually:
On the netdev kernel mailing list reviews can come in at any point
On the bpf kernel mailing list reviews can come in at any point
in time. If discussions around a patch conclude that they cannot
get included as-is, we will either apply a follow-up fix or drop
them from the trees entirely. Therefore, we also reserve to rebase
@ -494,15 +489,15 @@ A: You need cmake and gcc-c++ as build requisites for LLVM. Once you have
that set up, proceed with building the latest LLVM and clang version
from the git repositories::
$ git clone http://llvm.org/git/llvm.git
$ cd llvm/tools
$ git clone --depth 1 http://llvm.org/git/clang.git
$ cd ..; mkdir build; cd build
$ cmake .. -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
$ git clone https://github.com/llvm/llvm-project.git
$ mkdir -p llvm-project/llvm/build/install
$ cd llvm-project/llvm/build
$ cmake .. -G "Ninja" -DLLVM_TARGETS_TO_BUILD="BPF;X86" \
-DLLVM_ENABLE_PROJECTS="clang" \
-DBUILD_SHARED_LIBS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_BUILD_RUNTIME=OFF
$ make -j $(getconf _NPROCESSORS_ONLN)
$ ninja
The built binaries can then be found in the build/bin/ directory, where
you can point the PATH variable to.

View File

@ -352,29 +352,15 @@ static inline void sk_psock_update_proto(struct sock *sk,
psock->saved_write_space = sk->sk_write_space;
psock->sk_proto = sk->sk_prot;
sk->sk_prot = ops;
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, ops);
}
static inline void sk_psock_restore_proto(struct sock *sk,
struct sk_psock *psock)
{
sk->sk_prot->unhash = psock->saved_unhash;
if (psock->sk_proto) {
struct inet_connection_sock *icsk = inet_csk(sk);
bool has_ulp = !!icsk->icsk_ulp_data;
if (has_ulp) {
tcp_update_ulp(sk, psock->sk_proto,
psock->saved_write_space);
} else {
sk->sk_prot = psock->sk_proto;
sk->sk_write_space = psock->saved_write_space;
}
psock->sk_proto = NULL;
} else {
sk->sk_write_space = psock->saved_write_space;
}
tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
}
static inline void sk_psock_set_state(struct sk_psock *psock,

View File

@ -527,10 +527,43 @@ enum sk_pacing {
SK_PACING_FQ = 2,
};
/* Pointer stored in sk_user_data might not be suitable for copying
* when cloning the socket. For instance, it can point to a reference
* counted object. sk_user_data bottom bit is set if pointer must not
* be copied.
*/
#define SK_USER_DATA_NOCOPY 1UL
#define SK_USER_DATA_PTRMASK ~(SK_USER_DATA_NOCOPY)
/**
* sk_user_data_is_nocopy - Test if sk_user_data pointer must not be copied
* @sk: socket
*/
static inline bool sk_user_data_is_nocopy(const struct sock *sk)
{
return ((uintptr_t)sk->sk_user_data & SK_USER_DATA_NOCOPY);
}
#define __sk_user_data(sk) ((*((void __rcu **)&(sk)->sk_user_data)))
#define rcu_dereference_sk_user_data(sk) rcu_dereference(__sk_user_data((sk)))
#define rcu_assign_sk_user_data(sk, ptr) rcu_assign_pointer(__sk_user_data((sk)), ptr)
#define rcu_dereference_sk_user_data(sk) \
({ \
void *__tmp = rcu_dereference(__sk_user_data((sk))); \
(void *)((uintptr_t)__tmp & SK_USER_DATA_PTRMASK); \
})
#define rcu_assign_sk_user_data(sk, ptr) \
({ \
uintptr_t __tmp = (uintptr_t)(ptr); \
WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
rcu_assign_pointer(__sk_user_data((sk)), __tmp); \
})
#define rcu_assign_sk_user_data_nocopy(sk, ptr) \
({ \
uintptr_t __tmp = (uintptr_t)(ptr); \
WARN_ON_ONCE(__tmp & ~SK_USER_DATA_PTRMASK); \
rcu_assign_pointer(__sk_user_data((sk)), \
__tmp | SK_USER_DATA_NOCOPY); \
})
/*
* SK_CAN_REUSE and SK_NO_REUSE on a socket mean that the socket is OK

View File

@ -55,6 +55,4 @@ static inline bool reuseport_has_conns(struct sock *sk, bool set)
return ret;
}
int reuseport_get_id(struct sock_reuseport *reuse);
#endif /* _SOCK_REUSEPORT_H */

View File

@ -2203,6 +2203,13 @@ int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int nonblock, int flags, int *addr_len);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#ifdef CONFIG_NET_SOCK_MSG
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#else
static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
}
#endif
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF

View File

@ -2890,6 +2890,25 @@ union bpf_attr {
* Obtain the 64bit jiffies
* Return
* The 64 bit jiffies
*
* int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
* Description
* For an eBPF program attached to a perf event, retrieve the
* branch records (struct perf_branch_entry) associated to *ctx*
* and store it in the buffer pointed by *buf* up to size
* *size* bytes.
* Return
* On success, number of bytes written to *buf*. On error, a
* negative value.
*
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
* instead return the number of bytes required to store all the
* branch entries. If this flag is set, *buf* may be NULL.
*
* **-EINVAL** if arguments invalid or **size** not a multiple
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -3010,7 +3029,8 @@ union bpf_attr {
FN(probe_read_kernel_str), \
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64),
FN(jiffies64), \
FN(read_branch_records),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -3089,6 +3109,9 @@ enum bpf_func_id {
/* BPF_FUNC_sk_storage_get flags */
#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
/* BPF_FUNC_read_branch_records flags. */
#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,

View File

@ -305,11 +305,6 @@ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
if (err)
goto put_file_unlock;
/* Ensure reuse->reuseport_id is set */
err = reuseport_get_id(reuse);
if (err < 0)
goto put_file_unlock;
WRITE_ONCE(nsk->sk_user_data, &array->ptrs[index]);
rcu_assign_pointer(array->ptrs[index], nsk);
free_osk = osk;

View File

@ -3693,14 +3693,16 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (func_id != BPF_FUNC_sk_redirect_map &&
func_id != BPF_FUNC_sock_map_update &&
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map)
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
case BPF_MAP_TYPE_SOCKHASH:
if (func_id != BPF_FUNC_sk_redirect_hash &&
func_id != BPF_FUNC_sock_hash_update &&
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash)
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport)
goto error;
break;
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
@ -3774,7 +3776,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
goto error;
break;
case BPF_FUNC_sk_select_reuseport:
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
map->map_type != BPF_MAP_TYPE_SOCKMAP &&
map->map_type != BPF_MAP_TYPE_SOCKHASH)
goto error;
break;
case BPF_FUNC_map_peek_elem:

View File

@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_send_signal_proto;
case BPF_FUNC_send_signal_thread:
return &bpf_send_signal_thread_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
default:
return NULL;
}
@ -858,8 +860,6 @@ kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stackid_proto;
case BPF_FUNC_get_stack:
return &bpf_get_stack_proto;
case BPF_FUNC_perf_event_read_value:
return &bpf_perf_event_read_value_proto;
#ifdef CONFIG_BPF_KPROBE_OVERRIDE
case BPF_FUNC_override_return:
return &bpf_override_return_proto;
@ -1028,6 +1028,45 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
.arg3_type = ARG_CONST_SIZE,
};
BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx,
void *, buf, u32, size, u64, flags)
{
#ifndef CONFIG_X86
return -ENOENT;
#else
static const u32 br_entry_size = sizeof(struct perf_branch_entry);
struct perf_branch_stack *br_stack = ctx->data->br_stack;
u32 to_copy;
if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE))
return -EINVAL;
if (unlikely(!br_stack))
return -EINVAL;
if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE)
return br_stack->nr * br_entry_size;
if (!buf || (size % br_entry_size != 0))
return -EINVAL;
to_copy = min_t(u32, br_stack->nr * br_entry_size, size);
memcpy(buf, br_stack->entries, to_copy);
return to_copy;
#endif
}
static const struct bpf_func_proto bpf_read_branch_records_proto = {
.func = bpf_read_branch_records,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_PTR_TO_MEM_OR_NULL,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
static const struct bpf_func_proto *
pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
@ -1040,6 +1079,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_stack_proto_tp;
case BPF_FUNC_perf_prog_read_value:
return &bpf_perf_prog_read_value_proto;
case BPF_FUNC_read_branch_records:
return &bpf_read_branch_records_proto;
default:
return tracing_func_proto(func_id, prog);
}

View File

@ -8620,6 +8620,7 @@ struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
struct bpf_map *, map, void *, key, u32, flags)
{
bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
struct sock_reuseport *reuse;
struct sock *selected_sk;
@ -8628,26 +8629,20 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
return -ENOENT;
reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
if (!reuse)
/* selected_sk is unhashed (e.g. by close()) after the
* above map_lookup_elem(). Treat selected_sk has already
* been removed from the map.
if (!reuse) {
/* reuseport_array has only sk with non NULL sk_reuseport_cb.
* The only (!reuse) case here is - the sk has already been
* unhashed (e.g. by close()), so treat it as -ENOENT.
*
* Other maps (e.g. sock_map) do not provide this guarantee and
* the sk may never be in the reuseport group to begin with.
*/
return -ENOENT;
return is_sockarray ? -ENOENT : -EINVAL;
}
if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
struct sock *sk;
struct sock *sk = reuse_kern->sk;
if (unlikely(!reuse_kern->reuseport_id))
/* There is a small race between adding the
* sk to the map and setting the
* reuse_kern->reuseport_id.
* Treat it as the sk has not been added to
* the bpf map yet.
*/
return -ENOENT;
sk = reuse_kern->sk;
if (sk->sk_protocol != selected_sk->sk_protocol)
return -EPROTOTYPE;
else if (sk->sk_family != selected_sk->sk_family)

View File

@ -512,7 +512,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
refcount_set(&psock->refcnt, 1);
rcu_assign_sk_user_data(sk, psock);
rcu_assign_sk_user_data_nocopy(sk, psock);
sock_hold(sk);
return psock;

View File

@ -1572,13 +1572,14 @@ static inline void sock_lock_init(struct sock *sk)
*/
static void sock_copy(struct sock *nsk, const struct sock *osk)
{
const struct proto *prot = READ_ONCE(osk->sk_prot);
#ifdef CONFIG_SECURITY_NETWORK
void *sptr = nsk->sk_security;
#endif
memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
osk->sk_prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
#ifdef CONFIG_SECURITY_NETWORK
nsk->sk_security = sptr;
@ -1792,16 +1793,17 @@ static void sk_init_common(struct sock *sk)
*/
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
{
struct proto *prot = READ_ONCE(sk->sk_prot);
struct sock *newsk;
bool is_charged = true;
newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
newsk = sk_prot_alloc(prot, priority, sk->sk_family);
if (newsk != NULL) {
struct sk_filter *filter;
sock_copy(newsk, sk);
newsk->sk_prot_creator = sk->sk_prot;
newsk->sk_prot_creator = prot;
/* SANITY */
if (likely(newsk->sk_net_refcnt))
@ -1863,6 +1865,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
goto out;
}
/* Clear sk_user_data if parent had the pointer tagged
* as not suitable for copying when cloning.
*/
if (sk_user_data_is_nocopy(newsk))
RCU_INIT_POINTER(newsk->sk_user_data, NULL);
newsk->sk_err = 0;
newsk->sk_err_soft = 0;
newsk->sk_priority = 0;

View File

@ -10,6 +10,7 @@
#include <linux/skmsg.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/sock_diag.h>
struct bpf_stab {
struct bpf_map map;
@ -31,7 +32,8 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-EPERM);
if (attr->max_entries == 0 ||
attr->key_size != 4 ||
attr->value_size != 4 ||
(attr->value_size != sizeof(u32) &&
attr->value_size != sizeof(u64)) ||
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
@ -228,6 +230,30 @@ out:
return ret;
}
static int sock_map_link_no_progs(struct bpf_map *map, struct sock *sk)
{
struct sk_psock *psock;
int ret;
psock = sk_psock_get_checked(sk);
if (IS_ERR(psock))
return PTR_ERR(psock);
if (psock) {
tcp_bpf_reinit(sk);
return 0;
}
psock = sk_psock_init(sk, map->numa_node);
if (!psock)
return -ENOMEM;
ret = tcp_bpf_init(sk);
if (ret < 0)
sk_psock_put(sk, psock);
return ret;
}
static void sock_map_free(struct bpf_map *map)
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
@ -275,7 +301,22 @@ static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
static void *sock_map_lookup(struct bpf_map *map, void *key)
{
return ERR_PTR(-EOPNOTSUPP);
return __sock_map_lookup_elem(map, *(u32 *)key);
}
static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
{
struct sock *sk;
if (map->value_size != sizeof(u64))
return ERR_PTR(-ENOSPC);
sk = __sock_map_lookup_elem(map, *(u32 *)key);
if (!sk)
return ERR_PTR(-ENOENT);
sock_gen_cookie(sk);
return &sk->sk_cookie;
}
static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
@ -334,6 +375,11 @@ static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
return 0;
}
static bool sock_map_redirect_allowed(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN;
}
static int sock_map_update_common(struct bpf_map *map, u32 idx,
struct sock *sk, u64 flags)
{
@ -356,7 +402,14 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx,
if (!link)
return -ENOMEM;
ret = sock_map_link(map, &stab->progs, sk);
/* Only sockets we can redirect into/from in BPF need to hold
* refs to parser/verdict progs and have their sk_data_ready
* and sk_write_space callbacks overridden.
*/
if (sock_map_redirect_allowed(sk))
ret = sock_map_link(map, &stab->progs, sk);
else
ret = sock_map_link_no_progs(map, sk);
if (ret < 0)
goto out_free;
@ -391,7 +444,8 @@ out_free:
static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
{
return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB ||
ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB;
}
static bool sock_map_sk_is_suitable(const struct sock *sk)
@ -400,14 +454,26 @@ static bool sock_map_sk_is_suitable(const struct sock *sk)
sk->sk_protocol == IPPROTO_TCP;
}
static bool sock_map_sk_state_allowed(const struct sock *sk)
{
return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
}
static int sock_map_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
u32 ufd = *(u32 *)value;
u32 idx = *(u32 *)key;
struct socket *sock;
struct sock *sk;
int ret;
u64 ufd;
if (map->value_size == sizeof(u64))
ufd = *(u64 *)value;
else
ufd = *(u32 *)value;
if (ufd > S32_MAX)
return -EINVAL;
sock = sockfd_lookup(ufd, &ret);
if (!sock)
@ -423,7 +489,7 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
}
sock_map_sk_acquire(sk);
if (sk->sk_state != TCP_ESTABLISHED)
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else
ret = sock_map_update_common(map, idx, sk, flags);
@ -460,13 +526,17 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
if (!tcb->bpf.sk_redir)
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = sk;
return SK_PASS;
}
@ -483,12 +553,17 @@ const struct bpf_func_proto bpf_sk_redirect_map_proto = {
BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
struct bpf_map *, map, u32, key, u64, flags)
{
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
msg->flags = flags;
msg->sk_redir = __sock_map_lookup_elem(map, key);
if (!msg->sk_redir)
sk = __sock_map_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
return SK_PASS;
}
@ -506,6 +581,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
.map_free = sock_map_free,
.map_get_next_key = sock_map_get_next_key,
.map_lookup_elem_sys_only = sock_map_lookup_sys,
.map_update_elem = sock_map_update_elem,
.map_delete_elem = sock_map_delete_elem,
.map_lookup_elem = sock_map_lookup,
@ -680,7 +756,14 @@ static int sock_hash_update_common(struct bpf_map *map, void *key,
if (!link)
return -ENOMEM;
ret = sock_map_link(map, &htab->progs, sk);
/* Only sockets we can redirect into/from in BPF need to hold
* refs to parser/verdict progs and have their sk_data_ready
* and sk_write_space callbacks overridden.
*/
if (sock_map_redirect_allowed(sk))
ret = sock_map_link(map, &htab->progs, sk);
else
ret = sock_map_link_no_progs(map, sk);
if (ret < 0)
goto out_free;
@ -729,10 +812,17 @@ out_free:
static int sock_hash_update_elem(struct bpf_map *map, void *key,
void *value, u64 flags)
{
u32 ufd = *(u32 *)value;
struct socket *sock;
struct sock *sk;
int ret;
u64 ufd;
if (map->value_size == sizeof(u64))
ufd = *(u64 *)value;
else
ufd = *(u32 *)value;
if (ufd > S32_MAX)
return -EINVAL;
sock = sockfd_lookup(ufd, &ret);
if (!sock)
@ -748,7 +838,7 @@ static int sock_hash_update_elem(struct bpf_map *map, void *key,
}
sock_map_sk_acquire(sk);
if (sk->sk_state != TCP_ESTABLISHED)
if (!sock_map_sk_state_allowed(sk))
ret = -EOPNOTSUPP;
else
ret = sock_hash_update_common(map, key, sk, flags);
@ -808,7 +898,8 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return ERR_PTR(-EPERM);
if (attr->max_entries == 0 ||
attr->key_size == 0 ||
attr->value_size != 4 ||
(attr->value_size != sizeof(u32) &&
attr->value_size != sizeof(u64)) ||
attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
if (attr->key_size > MAX_BPF_STACK)
@ -885,6 +976,26 @@ static void sock_hash_free(struct bpf_map *map)
kfree(htab);
}
static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
{
struct sock *sk;
if (map->value_size != sizeof(u64))
return ERR_PTR(-ENOSPC);
sk = __sock_hash_lookup_elem(map, key);
if (!sk)
return ERR_PTR(-ENOENT);
sock_gen_cookie(sk);
return &sk->sk_cookie;
}
static void *sock_hash_lookup(struct bpf_map *map, void *key)
{
return __sock_hash_lookup_elem(map, key);
}
static void sock_hash_release_progs(struct bpf_map *map)
{
psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
@ -916,13 +1027,17 @@ BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
struct bpf_map *, map, void *, key, u64, flags)
{
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
if (!tcb->bpf.sk_redir)
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
tcb->bpf.flags = flags;
tcb->bpf.sk_redir = sk;
return SK_PASS;
}
@ -939,12 +1054,17 @@ const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
struct bpf_map *, map, void *, key, u64, flags)
{
struct sock *sk;
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
msg->flags = flags;
msg->sk_redir = __sock_hash_lookup_elem(map, key);
if (!msg->sk_redir)
sk = __sock_hash_lookup_elem(map, key);
if (unlikely(!sk || !sock_map_redirect_allowed(sk)))
return SK_DROP;
msg->flags = flags;
msg->sk_redir = sk;
return SK_PASS;
}
@ -964,7 +1084,8 @@ const struct bpf_map_ops sock_hash_ops = {
.map_get_next_key = sock_hash_get_next_key,
.map_update_elem = sock_hash_update_elem,
.map_delete_elem = sock_hash_delete_elem,
.map_lookup_elem = sock_map_lookup,
.map_lookup_elem = sock_hash_lookup,
.map_lookup_elem_sys_only = sock_hash_lookup_sys,
.map_release_uref = sock_hash_release_progs,
.map_check_btf = map_check_no_btf,
};

View File

@ -16,27 +16,8 @@
DEFINE_SPINLOCK(reuseport_lock);
#define REUSEPORT_MIN_ID 1
static DEFINE_IDA(reuseport_ida);
int reuseport_get_id(struct sock_reuseport *reuse)
{
int id;
if (reuse->reuseport_id)
return reuse->reuseport_id;
id = ida_simple_get(&reuseport_ida, REUSEPORT_MIN_ID, 0,
/* Called under reuseport_lock */
GFP_ATOMIC);
if (id < 0)
return id;
reuse->reuseport_id = id;
return reuse->reuseport_id;
}
static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
{
unsigned int size = sizeof(struct sock_reuseport) +
@ -55,6 +36,7 @@ static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
int reuseport_alloc(struct sock *sk, bool bind_inany)
{
struct sock_reuseport *reuse;
int id, ret = 0;
/* bh lock used since this function call may precede hlist lock in
* soft irq of receive path or setsockopt from process context
@ -78,10 +60,18 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
return -ENOMEM;
ret = -ENOMEM;
goto out;
}
id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
if (id < 0) {
kfree(reuse);
ret = id;
goto out;
}
reuse->reuseport_id = id;
reuse->socks[0] = sk;
reuse->num_socks = 1;
reuse->bind_inany = bind_inany;
@ -90,7 +80,7 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
out:
spin_unlock_bh(&reuseport_lock);
return 0;
return ret;
}
EXPORT_SYMBOL(reuseport_alloc);
@ -134,8 +124,7 @@ static void reuseport_free_rcu(struct rcu_head *head)
reuse = container_of(head, struct sock_reuseport, rcu);
sk_reuseport_prog_free(rcu_dereference_protected(reuse->prog, 1));
if (reuse->reuseport_id)
ida_simple_remove(&reuseport_ida, reuse->reuseport_id);
ida_free(&reuseport_ida, reuse->reuseport_id);
kfree(reuse);
}
@ -199,12 +188,15 @@ void reuseport_detach_sock(struct sock *sk)
reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
lockdep_is_held(&reuseport_lock));
/* At least one of the sk in this reuseport group is added to
* a bpf map. Notify the bpf side. The bpf map logic will
* remove the sk if it is indeed added to a bpf map.
/* Notify the bpf side. The sk may be added to a sockarray
* map. If so, sockarray logic will remove it from the map.
*
* Other bpf map types that work with reuseport, like sockmap,
* don't need an explicit callback from here. They override sk
* unhash/close ops to remove the sk from the map before we
* get to this point.
*/
if (reuse->reuseport_id)
bpf_sk_reuseport_detach(sk);
bpf_sk_reuseport_detach(sk);
rcu_assign_pointer(sk->sk_reuseport_cb, NULL);

View File

@ -645,8 +645,10 @@ static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
* or added requiring sk_prot hook updates. We keep original saved
* hooks in this case.
*
* Pairs with lockless read in sk_clone_lock().
*/
sk->sk_prot = &tcp_bpf_prots[family][config];
WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
}
static int tcp_bpf_assert_proto_ops(struct proto *ops)
@ -691,3 +693,17 @@ int tcp_bpf_init(struct sock *sk)
rcu_read_unlock();
return 0;
}
/* If a child got cloned from a listening socket that had tcp_bpf
* protocol callbacks installed, we need to restore the callbacks to
* the default ones because the child does not inherit the psock state
* that tcp_bpf callbacks expect.
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
newsk->sk_prot = sk->sk_prot_creator;
}

View File

@ -548,6 +548,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->fastopen_req = NULL;
RCU_INIT_POINTER(newtp->fastopen_rsk, NULL);
tcp_bpf_clone(sk, newsk);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS);
return newsk;

View File

@ -106,7 +106,8 @@ void tcp_update_ulp(struct sock *sk, struct proto *proto,
if (!icsk->icsk_ulp_ops) {
sk->sk_write_space = write_space;
sk->sk_prot = proto;
/* Pairs with lockless read in sk_clone_lock() */
WRITE_ONCE(sk->sk_prot, proto);
return;
}

View File

@ -742,7 +742,8 @@ static void tls_update(struct sock *sk, struct proto *p,
ctx->sk_write_space = write_space;
ctx->sk_proto = p;
} else {
sk->sk_prot = p;
/* Pairs with lockless read in sk_clone_lock(). */
WRITE_ONCE(sk->sk_prot, p);
sk->sk_write_space = write_space;
}
}

View File

@ -2890,6 +2890,25 @@ union bpf_attr {
* Obtain the 64bit jiffies
* Return
* The 64 bit jiffies
*
* int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
* Description
* For an eBPF program attached to a perf event, retrieve the
* branch records (struct perf_branch_entry) associated to *ctx*
* and store it in the buffer pointed by *buf* up to size
* *size* bytes.
* Return
* On success, number of bytes written to *buf*. On error, a
* negative value.
*
* The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
* instead return the number of bytes required to store all the
* branch entries. If this flag is set, *buf* may be NULL.
*
* **-EINVAL** if arguments invalid or **size** not a multiple
* of sizeof(struct perf_branch_entry).
*
* **-ENOENT** if architecture does not support branch records.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@ -3010,7 +3029,8 @@ union bpf_attr {
FN(probe_read_kernel_str), \
FN(tcp_send_ack), \
FN(send_signal_thread), \
FN(jiffies64),
FN(jiffies64), \
FN(read_branch_records),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@ -3089,6 +3109,9 @@ enum bpf_func_id {
/* BPF_FUNC_sk_storage_get flags */
#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
/* BPF_FUNC_read_branch_records flags. */
#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
BPF_ADJ_ROOM_NET,

View File

@ -2286,9 +2286,7 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
obj->efile.st_ops_shndx >= 0 ||
obj->nr_extern > 0;
return obj->efile.st_ops_shndx >= 0 || obj->nr_extern > 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@ -4945,8 +4943,8 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
int err = 0, fd, i, btf_id;
if (prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_EXT) {
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
btf_id = libbpf_find_attach_btf_id(prog);
if (btf_id <= 0)
return btf_id;
@ -6589,6 +6587,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
else
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
if (err <= 0)
pr_warn("%s is not found in vmlinux BTF\n", name);
return err;
}
@ -6661,8 +6662,6 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
name + section_defs[i].len,
attach_type);
if (err <= 0)
pr_warn("%s is not found in vmlinux BTF\n", name);
return err;
}
pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
@ -8138,6 +8137,31 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
}
}
int bpf_program__set_attach_target(struct bpf_program *prog,
int attach_prog_fd,
const char *attach_func_name)
{
int btf_id;
if (!prog || attach_prog_fd < 0 || !attach_func_name)
return -EINVAL;
if (attach_prog_fd)
btf_id = libbpf_find_prog_btf_id(attach_func_name,
attach_prog_fd);
else
btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
attach_func_name,
prog->expected_attach_type);
if (btf_id < 0)
return btf_id;
prog->attach_btf_id = btf_id;
prog->attach_prog_fd = attach_prog_fd;
return 0;
}
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
{
int err = 0, n, len, start, end = -1;

View File

@ -334,6 +334,10 @@ LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
LIBBPF_API int
bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
const char *attach_func_name);
LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);

View File

@ -235,3 +235,8 @@ LIBBPF_0.0.7 {
btf__align_of;
libbpf_find_kernel_btf;
} LIBBPF_0.0.6;
LIBBPF_0.0.8 {
global:
bpf_program__set_attach_target;
} LIBBPF_0.0.7;

View File

@ -209,7 +209,7 @@ define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
@ -223,7 +223,7 @@ define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE

View File

@ -0,0 +1,170 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <pthread.h>
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
#include "bpf/libbpf_internal.h"
#include "test_perf_branches.skel.h"
static void check_good_sample(struct test_perf_branches *skel)
{
int written_global = skel->bss->written_global_out;
int required_size = skel->bss->required_size_out;
int written_stack = skel->bss->written_stack_out;
int pbe_size = sizeof(struct perf_branch_entry);
int duration = 0;
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
/*
* It's hard to validate the contents of the branch entries b/c it
* would require some kind of disassembler and also encoding the
* valid jump instructions for supported architectures. So just check
* the easy stuff for now.
*/
CHECK(required_size <= 0, "read_branches_size", "err %d\n", required_size);
CHECK(written_stack < 0, "read_branches_stack", "err %d\n", written_stack);
CHECK(written_stack % pbe_size != 0, "read_branches_stack",
"stack bytes written=%d not multiple of struct size=%d\n",
written_stack, pbe_size);
CHECK(written_global < 0, "read_branches_global", "err %d\n", written_global);
CHECK(written_global % pbe_size != 0, "read_branches_global",
"global bytes written=%d not multiple of struct size=%d\n",
written_global, pbe_size);
CHECK(written_global < written_stack, "read_branches_size",
"written_global=%d < written_stack=%d\n", written_global, written_stack);
}
static void check_bad_sample(struct test_perf_branches *skel)
{
int written_global = skel->bss->written_global_out;
int required_size = skel->bss->required_size_out;
int written_stack = skel->bss->written_stack_out;
int duration = 0;
if (CHECK(!skel->bss->valid, "output not valid",
"no valid sample from prog"))
return;
CHECK((required_size != -EINVAL && required_size != -ENOENT),
"read_branches_size", "err %d\n", required_size);
CHECK((written_stack != -EINVAL && written_stack != -ENOENT),
"read_branches_stack", "written %d\n", written_stack);
CHECK((written_global != -EINVAL && written_global != -ENOENT),
"read_branches_global", "written %d\n", written_global);
}
static void test_perf_branches_common(int perf_fd,
void (*cb)(struct test_perf_branches *))
{
struct test_perf_branches *skel;
int err, i, duration = 0;
bool detached = false;
struct bpf_link *link;
volatile int j = 0;
cpu_set_t cpu_set;
skel = test_perf_branches__open_and_load();
if (CHECK(!skel, "test_perf_branches_load",
"perf_branches skeleton failed\n"))
return;
/* attach perf_event */
link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
goto out_destroy_skel;
/* generate some branches on cpu 0 */
CPU_ZERO(&cpu_set);
CPU_SET(0, &cpu_set);
err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
goto out_destroy;
/* spin the loop for a while (random high number) */
for (i = 0; i < 1000000; ++i)
++j;
test_perf_branches__detach(skel);
detached = true;
cb(skel);
out_destroy:
bpf_link__destroy(link);
out_destroy_skel:
if (!detached)
test_perf_branches__detach(skel);
test_perf_branches__destroy(skel);
}
static void test_perf_branches_hw(void)
{
struct perf_event_attr attr = {0};
int duration = 0;
int pfd;
/* create perf event */
attr.size = sizeof(attr);
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.freq = 1;
attr.sample_freq = 4000;
attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
/*
* Some setups don't support branch records (virtual machines, !x86),
* so skip test in this case.
*/
if (pfd == -1) {
if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
test__skip();
return;
}
if (CHECK(pfd < 0, "perf_event_open", "err %d errno %d\n",
pfd, errno))
return;
}
test_perf_branches_common(pfd, check_good_sample);
close(pfd);
}
/*
* Tests negative case -- run bpf_read_branch_records() on improperly configured
* perf event.
*/
static void test_perf_branches_no_hw(void)
{
struct perf_event_attr attr = {0};
int duration = 0;
int pfd;
/* create perf event */
attr.size = sizeof(attr);
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.freq = 1;
attr.sample_freq = 4000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
return;
test_perf_branches_common(pfd, check_bad_sample);
close(pfd);
}
void test_perf_branches(void)
{
if (test__start_subtest("perf_branches_hw"))
test_perf_branches_hw();
if (test__start_subtest("perf_branches_no_hw"))
test_perf_branches_no_hw();
}

View File

@ -36,6 +36,7 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
static __u32 expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
static int reuseport_array = -1, outer_map = -1;
static enum bpf_map_type inner_map_type;
static int select_by_skb_data_prog;
static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
@ -63,13 +64,15 @@ static union sa46 {
} \
})
static int create_maps(void)
static int create_maps(enum bpf_map_type inner_type)
{
struct bpf_create_map_attr attr = {};
inner_map_type = inner_type;
/* Creating reuseport_array */
attr.name = "reuseport_array";
attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
attr.map_type = inner_type;
attr.key_size = sizeof(__u32);
attr.value_size = sizeof(__u32);
attr.max_entries = REUSEPORT_ARRAY_SIZE;
@ -728,12 +731,36 @@ static void cleanup_per_test(bool no_inner_map)
static void cleanup(void)
{
if (outer_map != -1)
if (outer_map != -1) {
close(outer_map);
if (reuseport_array != -1)
outer_map = -1;
}
if (reuseport_array != -1) {
close(reuseport_array);
if (obj)
reuseport_array = -1;
}
if (obj) {
bpf_object__close(obj);
obj = NULL;
}
memset(expected_results, 0, sizeof(expected_results));
}
static const char *maptype_str(enum bpf_map_type type)
{
switch (type) {
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
return "reuseport_sockarray";
case BPF_MAP_TYPE_SOCKMAP:
return "sockmap";
case BPF_MAP_TYPE_SOCKHASH:
return "sockhash";
default:
return "unknown";
}
}
static const char *family_str(sa_family_t family)
@ -781,13 +808,21 @@ static void test_config(int sotype, sa_family_t family, bool inany)
const struct test *t;
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
snprintf(s, sizeof(s), "%s/%s %s %s",
snprintf(s, sizeof(s), "%s %s/%s %s %s",
maptype_str(inner_map_type),
family_str(family), sotype_str(sotype),
inany ? "INANY" : "LOOPBACK", t->name);
if (!test__start_subtest(s))
continue;
if (sotype == SOCK_DGRAM &&
inner_map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
/* SOCKMAP/SOCKHASH don't support UDP yet */
test__skip();
continue;
}
setup_per_test(sotype, family, inany, t->no_inner_map);
t->fn(sotype, family);
cleanup_per_test(t->no_inner_map);
@ -816,13 +851,20 @@ static void test_all(void)
test_config(c->sotype, c->family, c->inany);
}
void test_select_reuseport(void)
void test_map_type(enum bpf_map_type mt)
{
if (create_maps())
if (create_maps(mt))
goto out;
if (prepare_bpf_obj())
goto out;
test_all();
out:
cleanup();
}
void test_select_reuseport(void)
{
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
if (saved_tcp_fo < 0)
goto out;
@ -835,8 +877,9 @@ void test_select_reuseport(void)
if (disable_syncookie())
goto out;
test_all();
test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
test_map_type(BPF_MAP_TYPE_SOCKMAP);
test_map_type(BPF_MAP_TYPE_SOCKHASH);
out:
cleanup();
restore_sysctls();
}

View File

@ -0,0 +1,124 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
/*
* Tests for sockmap/sockhash holding kTLS sockets.
*/
#include "test_progs.h"
#define MAX_TEST_NAME 80
#define TCP_ULP 31
static int tcp_server(int family)
{
int err, s;
s = socket(family, SOCK_STREAM, 0);
if (CHECK_FAIL(s == -1)) {
perror("socket");
return -1;
}
err = listen(s, SOMAXCONN);
if (CHECK_FAIL(err)) {
perror("listen");
return -1;
}
return s;
}
static int disconnect(int fd)
{
struct sockaddr unspec = { AF_UNSPEC };
return connect(fd, &unspec, sizeof(unspec));
}
/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
{
struct sockaddr_storage addr = {0};
socklen_t len = sizeof(addr);
int err, cli, srv, zero = 0;
srv = tcp_server(family);
if (srv == -1)
return;
err = getsockname(srv, (struct sockaddr *)&addr, &len);
if (CHECK_FAIL(err)) {
perror("getsockopt");
goto close_srv;
}
cli = socket(family, SOCK_STREAM, 0);
if (CHECK_FAIL(cli == -1)) {
perror("socket");
goto close_srv;
}
err = connect(cli, (struct sockaddr *)&addr, len);
if (CHECK_FAIL(err)) {
perror("connect");
goto close_cli;
}
err = bpf_map_update_elem(map, &zero, &cli, 0);
if (CHECK_FAIL(err)) {
perror("bpf_map_update_elem");
goto close_cli;
}
err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
if (CHECK_FAIL(err)) {
perror("setsockopt(TCP_ULP)");
goto close_cli;
}
err = bpf_map_delete_elem(map, &zero);
if (CHECK_FAIL(err)) {
perror("bpf_map_delete_elem");
goto close_cli;
}
err = disconnect(cli);
if (CHECK_FAIL(err))
perror("disconnect");
close_cli:
close(cli);
close_srv:
close(srv);
}
static void run_tests(int family, enum bpf_map_type map_type)
{
char test_name[MAX_TEST_NAME];
int map;
map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
if (CHECK_FAIL(map == -1)) {
perror("bpf_map_create");
return;
}
snprintf(test_name, MAX_TEST_NAME,
"sockmap_ktls disconnect_after_delete %s %s",
family == AF_INET ? "IPv4" : "IPv6",
map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
if (!test__start_subtest(test_name))
return;
test_sockmap_ktls_disconnect_after_delete(family, map);
close(map);
}
void test_sockmap_ktls(void)
{
run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
}

File diff suppressed because it is too large Load Diff

View File

@ -55,31 +55,40 @@ void test_trampoline_count(void)
/* attach 'allowed' 40 trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
obj = NULL;
goto cleanup;
}
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
inst[i].obj = obj;
obj = NULL;
if (rand() % 2) {
link = load(obj, fentry_name);
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
link = load(inst[i].obj, fentry_name);
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
link = NULL;
goto cleanup;
}
inst[i].link_fentry = link;
} else {
link = load(obj, fexit_name);
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
link = load(inst[i].obj, fexit_name);
if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
link = NULL;
goto cleanup;
}
inst[i].link_fexit = link;
}
}
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
obj = NULL;
goto cleanup;
}
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@ -104,7 +113,9 @@ void test_trampoline_count(void)
cleanup_extra:
bpf_object__close(obj);
cleanup:
while (--i) {
if (i >= MAX_TRAMP_PROGS)
i = MAX_TRAMP_PROGS - 1;
for (; i >= 0; i--) {
bpf_link__destroy(inst[i].link_fentry);
bpf_link__destroy(inst[i].link_fexit);
bpf_object__close(inst[i].obj);

View File

@ -14,7 +14,7 @@ void test_xdp_bpf2bpf(void)
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
struct bpf_program *prog;
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
@ -27,11 +27,21 @@ void test_xdp_bpf2bpf(void)
bpf_map_update_elem(map_fd, &key4, &value4, 0);
/* Load trace program */
opts.attach_prog_fd = pkt_fd,
ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts);
ftrace_skel = test_xdp_bpf2bpf__open();
if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
goto out;
/* Demonstrate the bpf_program__set_attach_target() API rather than
* the load with options, i.e. opts.attach_prog_fd.
*/
prog = ftrace_skel->progs.trace_on_entry;
bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY);
bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
prog = ftrace_skel->progs.trace_on_exit;
bpf_program__set_expected_attach_type(prog, BPF_TRACE_FEXIT);
bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
err = test_xdp_bpf2bpf__load(ftrace_skel);
if (CHECK(err, "__load", "ftrace skeleton failed\n"))
goto out;

View File

@ -0,0 +1,50 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#include <stddef.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_trace_helpers.h"
int valid = 0;
int required_size_out = 0;
int written_stack_out = 0;
int written_global_out = 0;
struct {
__u64 _a;
__u64 _b;
__u64 _c;
} fpbe[30] = {0};
SEC("perf_event")
int perf_branches(void *ctx)
{
__u64 entries[4 * 3] = {0};
int required_size, written_stack, written_global;
/* write to stack */
written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
/* ignore spurious events */
if (!written_stack)
return 1;
/* get required size */
required_size = bpf_read_branch_records(ctx, NULL, 0,
BPF_F_GET_BRANCH_RECORDS_SIZE);
written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0);
/* ignore spurious events */
if (!written_global)
return 1;
required_size_out = required_size;
written_stack_out = written_stack;
written_global_out = written_global;
valid = 1;
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include <errno.h>
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 2);
__type(key, __u32);
__type(value, __u64);
} sock_map SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKHASH);
__uint(max_entries, 2);
__type(key, __u32);
__type(value, __u64);
} sock_hash SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 2);
__type(key, int);
__type(value, unsigned int);
} verdict_map SEC(".maps");
static volatile bool test_sockmap; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_skb_parser(struct __sk_buff *skb)
{
return skb->len;
}
SEC("sk_skb/stream_verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
unsigned int *count;
__u32 zero = 0;
int verdict;
if (test_sockmap)
verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0);
else
verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0);
count = bpf_map_lookup_elem(&verdict_map, &verdict);
if (count)
(*count)++;
return verdict;
}
SEC("sk_msg")
int prog_msg_verdict(struct sk_msg_md *msg)
{
unsigned int *count;
__u32 zero = 0;
int verdict;
if (test_sockmap)
verdict = bpf_msg_redirect_map(msg, &sock_map, zero, 0);
else
verdict = bpf_msg_redirect_hash(msg, &sock_hash, &zero, 0);
count = bpf_map_lookup_elem(&verdict_map, &verdict);
if (count)
(*count)++;
return verdict;
}
SEC("sk_reuseport")
int prog_reuseport(struct sk_reuseport_md *reuse)
{
unsigned int *count;
int err, verdict;
__u32 zero = 0;
if (test_sockmap)
err = bpf_sk_select_reuseport(reuse, &sock_map, &zero, 0);
else
err = bpf_sk_select_reuseport(reuse, &sock_hash, &zero, 0);
verdict = err ? SK_DROP : SK_PASS;
count = bpf_map_lookup_elem(&verdict_map, &verdict);
if (count)
(*count)++;
return verdict;
}
int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";

View File

@ -28,7 +28,7 @@ struct xdp_buff {
} __attribute__((preserve_access_index));
__u64 test_result_fentry = 0;
SEC("fentry/_xdp_tx_iptunnel")
SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
test_result_fentry = xdp->rxq->dev->ifindex;
@ -36,7 +36,7 @@ int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
}
__u64 test_result_fexit = 0;
SEC("fexit/_xdp_tx_iptunnel")
SEC("fexit/FUNC")
int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret)
{
test_result_fexit = ret;

View File

@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data)
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
if (i < 2 && !err) {
printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
i, sfd[i]);
goto out_sockmap;
} else if (i >= 2 && err) {
if (err) {
printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;