mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-11 12:28:41 +08:00
bpf-for-netdev
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCZG4AiAAKCRDbK58LschI g+xlAQCmefGbDuwPckZLnomvt6gl4bkIjs7kc1ySbG9QBnaInwD/WyrJaQIPijuD qziHPAyx+MEgPseFU1b7Le35SZ66IwM= =s4R1 -----END PGP SIGNATURE----- Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf Daniel Borkmann says: ==================== pull-request: bpf 2023-05-24 We've added 19 non-merge commits during the last 10 day(s) which contain a total of 20 files changed, 738 insertions(+), 448 deletions(-). The main changes are: 1) Batch of BPF sockmap fixes found when running against NGINX TCP tests, from John Fastabend. 2) Fix a memleak in the LRU{,_PERCPU} hash map when bucket locking fails, from Anton Protopopov. 3) Init the BPF offload table earlier than just late_initcall, from Jakub Kicinski. 4) Fix ctx access mask generation for 32-bit narrow loads of 64-bit fields, from Will Deacon. 5) Remove a now unsupported __fallthrough in BPF samples, from Andrii Nakryiko. 6) Fix a typo in pkg-config call for building sign-file, from Jeremy Sowden. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf: bpf, sockmap: Test progs verifier error with latest clang bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer with drops bpf, sockmap: Test FIONREAD returns correct bytes in rx buffer bpf, sockmap: Test shutdown() correctly exits epoll and recv()=0 bpf, sockmap: Build helper to create connected socket pair bpf, sockmap: Pull socket helpers out of listen test for general use bpf, sockmap: Incorrectly handling copied_seq bpf, sockmap: Wake up polling after data copy bpf, sockmap: TCP data stall on recv before accept bpf, sockmap: Handle fin correctly bpf, sockmap: Improved check for empty queue bpf, sockmap: Reschedule is now done through backlog bpf, sockmap: Convert schedule_work into delayed_work bpf, sockmap: Pass skb ownership through read_skb bpf: fix a memory leak in the LRU and LRU_PERCPU hash maps bpf: Fix mask generation for 32-bit narrow loads of 64-bit fields samples/bpf: Drop unnecessary fallthrough bpf: netdev: init the offload table earlier selftests/bpf: Fix pkg-config call building sign-file ==================== Link: https://lore.kernel.org/r/20230524170839.13905-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
0c615f1cc3
@ -71,7 +71,6 @@ struct sk_psock_link {
|
||||
};
|
||||
|
||||
struct sk_psock_work_state {
|
||||
struct sk_buff *skb;
|
||||
u32 len;
|
||||
u32 off;
|
||||
};
|
||||
@ -105,7 +104,7 @@ struct sk_psock {
|
||||
struct proto *sk_proto;
|
||||
struct mutex work_mutex;
|
||||
struct sk_psock_work_state work_state;
|
||||
struct work_struct work;
|
||||
struct delayed_work work;
|
||||
struct rcu_work rwork;
|
||||
};
|
||||
|
||||
|
@ -1470,6 +1470,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
|
||||
}
|
||||
|
||||
void tcp_cleanup_rbuf(struct sock *sk, int copied);
|
||||
void __tcp_cleanup_rbuf(struct sock *sk, int copied);
|
||||
|
||||
|
||||
/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
|
||||
* If 87.5 % (7/8) of the space has been consumed, we want to override
|
||||
@ -2326,6 +2328,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
|
||||
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
|
||||
#else
|
||||
static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
|
||||
struct sk_msg *msg, u32 bytes, int flags);
|
||||
#endif /* CONFIG_NET_SOCK_MSG */
|
||||
|
@ -1215,7 +1215,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
|
||||
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err_lock_bucket;
|
||||
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
@ -1236,6 +1236,7 @@ static long htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
|
||||
err_lock_bucket:
|
||||
if (ret)
|
||||
htab_lru_push_free(htab, l_new);
|
||||
else if (l_old)
|
||||
@ -1338,7 +1339,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
|
||||
ret = htab_lock_bucket(htab, b, hash, &flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err_lock_bucket;
|
||||
|
||||
l_old = lookup_elem_raw(head, hash, key, key_size);
|
||||
|
||||
@ -1361,6 +1362,7 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
|
||||
ret = 0;
|
||||
err:
|
||||
htab_unlock_bucket(htab, b, hash, flags);
|
||||
err_lock_bucket:
|
||||
if (l_new)
|
||||
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
|
||||
return ret;
|
||||
|
@ -859,4 +859,4 @@ static int __init bpf_offload_init(void)
|
||||
return rhashtable_init(&offdevs, &offdevs_params);
|
||||
}
|
||||
|
||||
late_initcall(bpf_offload_init);
|
||||
core_initcall(bpf_offload_init);
|
||||
|
@ -17033,7 +17033,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
|
||||
insn->dst_reg,
|
||||
shift);
|
||||
insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
|
||||
insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
|
||||
(1ULL << size * 8) - 1);
|
||||
}
|
||||
}
|
||||
|
@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
|
||||
msg_rx = sk_psock_peek_msg(psock);
|
||||
}
|
||||
out:
|
||||
if (psock->work_state.skb && copied > 0)
|
||||
schedule_work(&psock->work);
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
|
||||
@ -624,42 +622,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
|
||||
|
||||
static void sk_psock_skb_state(struct sk_psock *psock,
|
||||
struct sk_psock_work_state *state,
|
||||
struct sk_buff *skb,
|
||||
int len, int off)
|
||||
{
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
state->skb = skb;
|
||||
state->len = len;
|
||||
state->off = off;
|
||||
} else {
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
}
|
||||
|
||||
static void sk_psock_backlog(struct work_struct *work)
|
||||
{
|
||||
struct sk_psock *psock = container_of(work, struct sk_psock, work);
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
|
||||
struct sk_psock_work_state *state = &psock->work_state;
|
||||
struct sk_buff *skb = NULL;
|
||||
u32 len = 0, off = 0;
|
||||
bool ingress;
|
||||
u32 len, off;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&psock->work_mutex);
|
||||
if (unlikely(state->skb)) {
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
skb = state->skb;
|
||||
if (unlikely(state->len)) {
|
||||
len = state->len;
|
||||
off = state->off;
|
||||
state->skb = NULL;
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
}
|
||||
if (skb)
|
||||
goto start;
|
||||
|
||||
while ((skb = skb_dequeue(&psock->ingress_skb))) {
|
||||
while ((skb = skb_peek(&psock->ingress_skb))) {
|
||||
len = skb->len;
|
||||
off = 0;
|
||||
if (skb_bpf_strparser(skb)) {
|
||||
@ -668,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
|
||||
off = stm->offset;
|
||||
len = stm->full_len;
|
||||
}
|
||||
start:
|
||||
ingress = skb_bpf_ingress(skb);
|
||||
skb_bpf_redirect_clear(skb);
|
||||
do {
|
||||
@ -678,22 +666,28 @@ start:
|
||||
len, ingress);
|
||||
if (ret <= 0) {
|
||||
if (ret == -EAGAIN) {
|
||||
sk_psock_skb_state(psock, state, skb,
|
||||
len, off);
|
||||
sk_psock_skb_state(psock, state, len, off);
|
||||
|
||||
/* Delay slightly to prioritize any
|
||||
* other work that might be here.
|
||||
*/
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
|
||||
schedule_delayed_work(&psock->work, 1);
|
||||
goto end;
|
||||
}
|
||||
/* Hard errors break pipe and stop xmit. */
|
||||
sk_psock_report_error(psock, ret ? -ret : EPIPE);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
sock_drop(psock->sk, skb);
|
||||
goto end;
|
||||
}
|
||||
off += ret;
|
||||
len -= ret;
|
||||
} while (len);
|
||||
|
||||
if (!ingress)
|
||||
skb = skb_dequeue(&psock->ingress_skb);
|
||||
if (!ingress) {
|
||||
kfree_skb(skb);
|
||||
}
|
||||
}
|
||||
end:
|
||||
mutex_unlock(&psock->work_mutex);
|
||||
@ -734,7 +728,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
|
||||
INIT_LIST_HEAD(&psock->link);
|
||||
spin_lock_init(&psock->link_lock);
|
||||
|
||||
INIT_WORK(&psock->work, sk_psock_backlog);
|
||||
INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
|
||||
mutex_init(&psock->work_mutex);
|
||||
INIT_LIST_HEAD(&psock->ingress_msg);
|
||||
spin_lock_init(&psock->ingress_lock);
|
||||
@ -786,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
|
||||
skb_bpf_redirect_clear(skb);
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
kfree_skb(psock->work_state.skb);
|
||||
/* We null the skb here to ensure that calls to sk_psock_backlog
|
||||
* do not pick up the free'd skb.
|
||||
*/
|
||||
psock->work_state.skb = NULL;
|
||||
__sk_psock_purge_ingress_msg(psock);
|
||||
}
|
||||
|
||||
@ -809,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
|
||||
sk_psock_cork_free(psock);
|
||||
__sk_psock_zap_ingress(psock);
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
}
|
||||
|
||||
@ -823,7 +811,8 @@ static void sk_psock_destroy(struct work_struct *work)
|
||||
|
||||
sk_psock_done_strp(psock);
|
||||
|
||||
cancel_work_sync(&psock->work);
|
||||
cancel_delayed_work_sync(&psock->work);
|
||||
__sk_psock_zap_ingress(psock);
|
||||
mutex_destroy(&psock->work_mutex);
|
||||
|
||||
psock_progs_drop(&psock->progs);
|
||||
@ -938,7 +927,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
|
||||
}
|
||||
|
||||
skb_queue_tail(&psock_other->ingress_skb, skb);
|
||||
schedule_work(&psock_other->work);
|
||||
schedule_delayed_work(&psock_other->work, 0);
|
||||
spin_unlock_bh(&psock_other->ingress_lock);
|
||||
return 0;
|
||||
}
|
||||
@ -990,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
|
||||
err = -EIO;
|
||||
sk_other = psock->sk;
|
||||
if (sock_flag(sk_other, SOCK_DEAD) ||
|
||||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
skb_bpf_redirect_clear(skb);
|
||||
!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
skb_bpf_set_ingress(skb);
|
||||
|
||||
@ -1018,22 +1005,23 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
|
||||
spin_lock_bh(&psock->ingress_lock);
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
|
||||
skb_queue_tail(&psock->ingress_skb, skb);
|
||||
schedule_work(&psock->work);
|
||||
schedule_delayed_work(&psock->work, 0);
|
||||
err = 0;
|
||||
}
|
||||
spin_unlock_bh(&psock->ingress_lock);
|
||||
if (err < 0) {
|
||||
skb_bpf_redirect_clear(skb);
|
||||
if (err < 0)
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
tcp_eat_skb(psock->sk, skb);
|
||||
err = sk_psock_skb_redirect(psock, skb);
|
||||
break;
|
||||
case __SK_DROP:
|
||||
default:
|
||||
out_free:
|
||||
skb_bpf_redirect_clear(skb);
|
||||
tcp_eat_skb(psock->sk, skb);
|
||||
sock_drop(psock->sk, skb);
|
||||
}
|
||||
|
||||
@ -1049,7 +1037,7 @@ static void sk_psock_write_space(struct sock *sk)
|
||||
psock = sk_psock(sk);
|
||||
if (likely(psock)) {
|
||||
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
|
||||
schedule_work(&psock->work);
|
||||
schedule_delayed_work(&psock->work, 0);
|
||||
write_space = psock->saved_write_space;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -1078,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
|
||||
skb_dst_drop(skb);
|
||||
skb_bpf_redirect_clear(skb);
|
||||
ret = bpf_prog_run_pin_on_cpu(prog, skb);
|
||||
if (ret == SK_PASS)
|
||||
skb_bpf_set_strparser(skb);
|
||||
skb_bpf_set_strparser(skb);
|
||||
ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
|
||||
skb->sk = NULL;
|
||||
}
|
||||
@ -1183,12 +1170,11 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
|
||||
int ret = __SK_DROP;
|
||||
int len = skb->len;
|
||||
|
||||
skb_get(skb);
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
if (unlikely(!psock)) {
|
||||
len = 0;
|
||||
tcp_eat_skb(sk, skb);
|
||||
sock_drop(sk, skb);
|
||||
goto out;
|
||||
}
|
||||
@ -1212,12 +1198,21 @@ out:
|
||||
static void sk_psock_verdict_data_ready(struct sock *sk)
|
||||
{
|
||||
struct socket *sock = sk->sk_socket;
|
||||
int copied;
|
||||
|
||||
trace_sk_data_ready(sk);
|
||||
|
||||
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
|
||||
return;
|
||||
sock->ops->read_skb(sk, sk_psock_verdict_recv);
|
||||
copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
|
||||
if (copied >= 0) {
|
||||
struct sk_psock *psock;
|
||||
|
||||
rcu_read_lock();
|
||||
psock = sk_psock(sk);
|
||||
psock->saved_data_ready(sk);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
}
|
||||
|
||||
void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
|
||||
|
@ -1644,9 +1644,10 @@ void sock_map_close(struct sock *sk, long timeout)
|
||||
rcu_read_unlock();
|
||||
sk_psock_stop(psock);
|
||||
release_sock(sk);
|
||||
cancel_work_sync(&psock->work);
|
||||
cancel_delayed_work_sync(&psock->work);
|
||||
sk_psock_put(sk, psock);
|
||||
}
|
||||
|
||||
/* Make sure we do not recurse. This is a bug.
|
||||
* Leak the socket instead of crashing on a stack overflow.
|
||||
*/
|
||||
|
@ -1571,7 +1571,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
|
||||
* calculation of whether or not we must ACK for the sake of
|
||||
* a window update.
|
||||
*/
|
||||
static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
|
||||
void __tcp_cleanup_rbuf(struct sock *sk, int copied)
|
||||
{
|
||||
struct tcp_sock *tp = tcp_sk(sk);
|
||||
bool time_to_ack = false;
|
||||
@ -1773,7 +1773,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
|
||||
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
|
||||
tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
|
||||
used = recv_actor(sk, skb);
|
||||
consume_skb(skb);
|
||||
if (used < 0) {
|
||||
if (!copied)
|
||||
copied = used;
|
||||
@ -1787,14 +1786,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
|
||||
break;
|
||||
}
|
||||
}
|
||||
WRITE_ONCE(tp->copied_seq, seq);
|
||||
|
||||
tcp_rcv_space_adjust(sk);
|
||||
|
||||
/* Clean up data we have read: This will do ACK frames. */
|
||||
if (copied > 0)
|
||||
__tcp_cleanup_rbuf(sk, copied);
|
||||
|
||||
return copied;
|
||||
}
|
||||
EXPORT_SYMBOL(tcp_read_skb);
|
||||
|
@ -11,6 +11,24 @@
|
||||
#include <net/inet_common.h>
|
||||
#include <net/tls.h>
|
||||
|
||||
void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct tcp_sock *tcp;
|
||||
int copied;
|
||||
|
||||
if (!skb || !skb->len || !sk_is_tcp(sk))
|
||||
return;
|
||||
|
||||
if (skb_bpf_strparser(skb))
|
||||
return;
|
||||
|
||||
tcp = tcp_sk(sk);
|
||||
copied = tcp->copied_seq + skb->len;
|
||||
WRITE_ONCE(tcp->copied_seq, copied);
|
||||
tcp_rcv_space_adjust(sk);
|
||||
__tcp_cleanup_rbuf(sk, skb->len);
|
||||
}
|
||||
|
||||
static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg, u32 apply_bytes, int flags)
|
||||
{
|
||||
@ -174,14 +192,34 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool is_next_msg_fin(struct sk_psock *psock)
|
||||
{
|
||||
struct scatterlist *sge;
|
||||
struct sk_msg *msg_rx;
|
||||
int i;
|
||||
|
||||
msg_rx = sk_psock_peek_msg(psock);
|
||||
i = msg_rx->sg.start;
|
||||
sge = sk_msg_elem(msg_rx, i);
|
||||
if (!sge->length) {
|
||||
struct sk_buff *skb = msg_rx->skb;
|
||||
|
||||
if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int tcp_bpf_recvmsg_parser(struct sock *sk,
|
||||
struct msghdr *msg,
|
||||
size_t len,
|
||||
int flags,
|
||||
int *addr_len)
|
||||
{
|
||||
struct tcp_sock *tcp = tcp_sk(sk);
|
||||
u32 seq = tcp->copied_seq;
|
||||
struct sk_psock *psock;
|
||||
int copied;
|
||||
int copied = 0;
|
||||
|
||||
if (unlikely(flags & MSG_ERRQUEUE))
|
||||
return inet_recv_error(sk, msg, len, addr_len);
|
||||
@ -194,8 +232,43 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
|
||||
return tcp_recvmsg(sk, msg, len, flags, addr_len);
|
||||
|
||||
lock_sock(sk);
|
||||
|
||||
/* We may have received data on the sk_receive_queue pre-accept and
|
||||
* then we can not use read_skb in this context because we haven't
|
||||
* assigned a sk_socket yet so have no link to the ops. The work-around
|
||||
* is to check the sk_receive_queue and in these cases read skbs off
|
||||
* queue again. The read_skb hook is not running at this point because
|
||||
* of lock_sock so we avoid having multiple runners in read_skb.
|
||||
*/
|
||||
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
|
||||
tcp_data_ready(sk);
|
||||
/* This handles the ENOMEM errors if we both receive data
|
||||
* pre accept and are already under memory pressure. At least
|
||||
* let user know to retry.
|
||||
*/
|
||||
if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
|
||||
copied = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
msg_bytes_ready:
|
||||
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
|
||||
/* The typical case for EFAULT is the socket was gracefully
|
||||
* shutdown with a FIN pkt. So check here the other case is
|
||||
* some error on copy_page_to_iter which would be unexpected.
|
||||
* On fin return correct return code to zero.
|
||||
*/
|
||||
if (copied == -EFAULT) {
|
||||
bool is_fin = is_next_msg_fin(psock);
|
||||
|
||||
if (is_fin) {
|
||||
copied = 0;
|
||||
seq++;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
seq += copied;
|
||||
if (!copied) {
|
||||
long timeo;
|
||||
int data;
|
||||
@ -233,6 +306,10 @@ msg_bytes_ready:
|
||||
copied = -EAGAIN;
|
||||
}
|
||||
out:
|
||||
WRITE_ONCE(tcp->copied_seq, seq);
|
||||
tcp_rcv_space_adjust(sk);
|
||||
if (copied > 0)
|
||||
__tcp_cleanup_rbuf(sk, copied);
|
||||
release_sock(sk);
|
||||
sk_psock_put(sk, psock);
|
||||
return copied;
|
||||
|
@ -1818,7 +1818,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
|
||||
int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
|
||||
{
|
||||
struct sk_buff *skb;
|
||||
int err, copied;
|
||||
int err;
|
||||
|
||||
try_again:
|
||||
skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
|
||||
@ -1837,10 +1837,7 @@ try_again:
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
|
||||
copied = recv_actor(sk, skb);
|
||||
kfree_skb(skb);
|
||||
|
||||
return copied;
|
||||
return recv_actor(sk, skb);
|
||||
}
|
||||
EXPORT_SYMBOL(udp_read_skb);
|
||||
|
||||
|
@ -2553,7 +2553,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
|
||||
{
|
||||
struct unix_sock *u = unix_sk(sk);
|
||||
struct sk_buff *skb;
|
||||
int err, copied;
|
||||
int err;
|
||||
|
||||
mutex_lock(&u->iolock);
|
||||
skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
|
||||
@ -2561,10 +2561,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
|
||||
if (!skb)
|
||||
return err;
|
||||
|
||||
copied = recv_actor(sk, skb);
|
||||
kfree_skb(skb);
|
||||
|
||||
return copied;
|
||||
return recv_actor(sk, skb);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1441,7 +1441,6 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
|
||||
struct sock *sk = sk_vsock(vsk);
|
||||
struct sk_buff *skb;
|
||||
int off = 0;
|
||||
int copied;
|
||||
int err;
|
||||
|
||||
spin_lock_bh(&vvs->rx_lock);
|
||||
@ -1454,9 +1453,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
|
||||
if (!skb)
|
||||
return err;
|
||||
|
||||
copied = recv_actor(sk, skb);
|
||||
kfree_skb(skb);
|
||||
return copied;
|
||||
return recv_actor(sk, skb);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
|
||||
|
||||
|
@ -498,7 +498,6 @@ int main(int argc, char **argv)
|
||||
"Option -%c requires an argument.\n\n",
|
||||
optopt);
|
||||
case 'h':
|
||||
__fallthrough;
|
||||
default:
|
||||
Usage();
|
||||
return 0;
|
||||
|
@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
|
||||
|
||||
$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
|
||||
$(call msg,SIGN-FILE,,$@)
|
||||
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
|
||||
$(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
|
||||
$< -o $@ \
|
||||
$(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
// Copyright (c) 2020 Cloudflare
|
||||
#include <error.h>
|
||||
#include <netinet/tcp.h>
|
||||
#include <sys/epoll.h>
|
||||
|
||||
#include "test_progs.h"
|
||||
#include "test_skmsg_load_helpers.skel.h"
|
||||
@ -9,8 +10,12 @@
|
||||
#include "test_sockmap_invalid_update.skel.h"
|
||||
#include "test_sockmap_skb_verdict_attach.skel.h"
|
||||
#include "test_sockmap_progs_query.skel.h"
|
||||
#include "test_sockmap_pass_prog.skel.h"
|
||||
#include "test_sockmap_drop_prog.skel.h"
|
||||
#include "bpf_iter_sockmap.skel.h"
|
||||
|
||||
#include "sockmap_helpers.h"
|
||||
|
||||
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
|
||||
|
||||
#define TCP_REPAIR_ON 1
|
||||
@ -350,6 +355,126 @@ out:
|
||||
test_sockmap_progs_query__destroy(skel);
|
||||
}
|
||||
|
||||
#define MAX_EVENTS 10
|
||||
static void test_sockmap_skb_verdict_shutdown(void)
|
||||
{
|
||||
struct epoll_event ev, events[MAX_EVENTS];
|
||||
int n, err, map, verdict, s, c1, p1;
|
||||
struct test_sockmap_pass_prog *skel;
|
||||
int epollfd;
|
||||
int zero = 0;
|
||||
char b;
|
||||
|
||||
skel = test_sockmap_pass_prog__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "open_and_load"))
|
||||
return;
|
||||
|
||||
verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
|
||||
map = bpf_map__fd(skel->maps.sock_map_rx);
|
||||
|
||||
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach"))
|
||||
goto out;
|
||||
|
||||
s = socket_loopback(AF_INET, SOCK_STREAM);
|
||||
if (s < 0)
|
||||
goto out;
|
||||
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
|
||||
if (err < 0)
|
||||
goto out_close;
|
||||
|
||||
shutdown(p1, SHUT_WR);
|
||||
|
||||
ev.events = EPOLLIN;
|
||||
ev.data.fd = c1;
|
||||
|
||||
epollfd = epoll_create1(0);
|
||||
if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
|
||||
goto out_close;
|
||||
err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
|
||||
if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
|
||||
goto out_close;
|
||||
err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
|
||||
if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
|
||||
goto out_close;
|
||||
|
||||
n = recv(c1, &b, 1, SOCK_NONBLOCK);
|
||||
ASSERT_EQ(n, 0, "recv_timeout(fin)");
|
||||
out_close:
|
||||
close(c1);
|
||||
close(p1);
|
||||
out:
|
||||
test_sockmap_pass_prog__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
|
||||
{
|
||||
int expected, zero = 0, sent, recvd, avail;
|
||||
int err, map, verdict, s, c0, c1, p0, p1;
|
||||
struct test_sockmap_pass_prog *pass;
|
||||
struct test_sockmap_drop_prog *drop;
|
||||
char buf[256] = "0123456789";
|
||||
|
||||
if (pass_prog) {
|
||||
pass = test_sockmap_pass_prog__open_and_load();
|
||||
if (!ASSERT_OK_PTR(pass, "open_and_load"))
|
||||
return;
|
||||
verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
|
||||
map = bpf_map__fd(pass->maps.sock_map_rx);
|
||||
expected = sizeof(buf);
|
||||
} else {
|
||||
drop = test_sockmap_drop_prog__open_and_load();
|
||||
if (!ASSERT_OK_PTR(drop, "open_and_load"))
|
||||
return;
|
||||
verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
|
||||
map = bpf_map__fd(drop->maps.sock_map_rx);
|
||||
/* On drop data is consumed immediately and copied_seq inc'd */
|
||||
expected = 0;
|
||||
}
|
||||
|
||||
|
||||
err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
|
||||
if (!ASSERT_OK(err, "bpf_prog_attach"))
|
||||
goto out;
|
||||
|
||||
s = socket_loopback(AF_INET, SOCK_STREAM);
|
||||
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
|
||||
goto out;
|
||||
err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
|
||||
if (!ASSERT_OK(err, "create_socket_pairs(s)"))
|
||||
goto out;
|
||||
|
||||
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
|
||||
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
|
||||
goto out_close;
|
||||
|
||||
sent = xsend(p1, &buf, sizeof(buf), 0);
|
||||
ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
|
||||
err = ioctl(c1, FIONREAD, &avail);
|
||||
ASSERT_OK(err, "ioctl(FIONREAD) error");
|
||||
ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
|
||||
/* On DROP test there will be no data to read */
|
||||
if (pass_prog) {
|
||||
recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
|
||||
ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
|
||||
}
|
||||
|
||||
out_close:
|
||||
close(c0);
|
||||
close(p0);
|
||||
close(c1);
|
||||
close(p1);
|
||||
out:
|
||||
if (pass_prog)
|
||||
test_sockmap_pass_prog__destroy(pass);
|
||||
else
|
||||
test_sockmap_drop_prog__destroy(drop);
|
||||
}
|
||||
|
||||
void test_sockmap_basic(void)
|
||||
{
|
||||
if (test__start_subtest("sockmap create_update_free"))
|
||||
@ -384,4 +509,10 @@ void test_sockmap_basic(void)
|
||||
test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
|
||||
if (test__start_subtest("sockmap skb_verdict progs query"))
|
||||
test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
|
||||
if (test__start_subtest("sockmap skb_verdict shutdown"))
|
||||
test_sockmap_skb_verdict_shutdown();
|
||||
if (test__start_subtest("sockmap skb_verdict fionread"))
|
||||
test_sockmap_skb_verdict_fionread(true);
|
||||
if (test__start_subtest("sockmap skb_verdict fionread on drop"))
|
||||
test_sockmap_skb_verdict_fionread(false);
|
||||
}
|
||||
|
390
tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
Normal file
390
tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
Normal file
@ -0,0 +1,390 @@
|
||||
#ifndef __SOCKMAP_HELPERS__
|
||||
#define __SOCKMAP_HELPERS__
|
||||
|
||||
#include <linux/vm_sockets.h>
|
||||
|
||||
#define IO_TIMEOUT_SEC 30
|
||||
#define MAX_STRERR_LEN 256
|
||||
#define MAX_TEST_NAME 80
|
||||
|
||||
/* workaround for older vm_sockets.h */
|
||||
#ifndef VMADDR_CID_LOCAL
|
||||
#define VMADDR_CID_LOCAL 1
|
||||
#endif
|
||||
|
||||
#define __always_unused __attribute__((__unused__))
|
||||
|
||||
#define _FAIL(errnum, fmt...) \
|
||||
({ \
|
||||
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
|
||||
CHECK_FAIL(true); \
|
||||
})
|
||||
#define FAIL(fmt...) _FAIL(0, fmt)
|
||||
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
|
||||
#define FAIL_LIBBPF(err, msg) \
|
||||
({ \
|
||||
char __buf[MAX_STRERR_LEN]; \
|
||||
libbpf_strerror((err), __buf, sizeof(__buf)); \
|
||||
FAIL("%s: %s", (msg), __buf); \
|
||||
})
|
||||
|
||||
/* Wrappers that fail the test on error and report it. */
|
||||
|
||||
#define xaccept_nonblock(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = \
|
||||
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("accept"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbind(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = bind((fd), (addr), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("bind"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xclose(fd) \
|
||||
({ \
|
||||
int __ret = close((fd)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("close"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xconnect(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = connect((fd), (addr), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("connect"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xgetsockname(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = getsockname((fd), (addr), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("getsockname"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xgetsockopt(fd, level, name, val, len) \
|
||||
({ \
|
||||
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("getsockopt(" #name ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xlisten(fd, backlog) \
|
||||
({ \
|
||||
int __ret = listen((fd), (backlog)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("listen"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xsetsockopt(fd, level, name, val, len) \
|
||||
({ \
|
||||
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("setsockopt(" #name ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xsend(fd, buf, len, flags) \
|
||||
({ \
|
||||
ssize_t __ret = send((fd), (buf), (len), (flags)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("send"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xrecv_nonblock(fd, buf, len, flags) \
|
||||
({ \
|
||||
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
|
||||
IO_TIMEOUT_SEC); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("recv"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xsocket(family, sotype, flags) \
|
||||
({ \
|
||||
int __ret = socket(family, sotype, flags); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("socket"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_map_delete_elem(fd, key) \
|
||||
({ \
|
||||
int __ret = bpf_map_delete_elem((fd), (key)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("map_delete"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_map_lookup_elem(fd, key, val) \
|
||||
({ \
|
||||
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("map_lookup"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_map_update_elem(fd, key, val, flags) \
|
||||
({ \
|
||||
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("map_update"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_prog_attach(prog, target, type, flags) \
|
||||
({ \
|
||||
int __ret = \
|
||||
bpf_prog_attach((prog), (target), (type), (flags)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("prog_attach(" #type ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_prog_detach2(prog, target, type) \
|
||||
({ \
|
||||
int __ret = bpf_prog_detach2((prog), (target), (type)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("prog_detach2(" #type ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xpthread_create(thread, attr, func, arg) \
|
||||
({ \
|
||||
int __ret = pthread_create((thread), (attr), (func), (arg)); \
|
||||
errno = __ret; \
|
||||
if (__ret) \
|
||||
FAIL_ERRNO("pthread_create"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xpthread_join(thread, retval) \
|
||||
({ \
|
||||
int __ret = pthread_join((thread), (retval)); \
|
||||
errno = __ret; \
|
||||
if (__ret) \
|
||||
FAIL_ERRNO("pthread_join"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
static inline int poll_read(int fd, unsigned int timeout_sec)
|
||||
{
|
||||
struct timeval timeout = { .tv_sec = timeout_sec };
|
||||
fd_set rfds;
|
||||
int r;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(fd, &rfds);
|
||||
|
||||
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
|
||||
if (r == 0)
|
||||
errno = ETIME;
|
||||
|
||||
return r == 1 ? 0 : -1;
|
||||
}
|
||||
|
||||
static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
|
||||
unsigned int timeout_sec)
|
||||
{
|
||||
if (poll_read(fd, timeout_sec))
|
||||
return -1;
|
||||
|
||||
return accept(fd, addr, len);
|
||||
}
|
||||
|
||||
static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
|
||||
unsigned int timeout_sec)
|
||||
{
|
||||
if (poll_read(fd, timeout_sec))
|
||||
return -1;
|
||||
|
||||
return recv(fd, buf, len, flags);
|
||||
}
|
||||
|
||||
static inline void init_addr_loopback4(struct sockaddr_storage *ss,
|
||||
socklen_t *len)
|
||||
{
|
||||
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
|
||||
|
||||
addr4->sin_family = AF_INET;
|
||||
addr4->sin_port = 0;
|
||||
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
*len = sizeof(*addr4);
|
||||
}
|
||||
|
||||
static inline void init_addr_loopback6(struct sockaddr_storage *ss,
|
||||
socklen_t *len)
|
||||
{
|
||||
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
|
||||
|
||||
addr6->sin6_family = AF_INET6;
|
||||
addr6->sin6_port = 0;
|
||||
addr6->sin6_addr = in6addr_loopback;
|
||||
*len = sizeof(*addr6);
|
||||
}
|
||||
|
||||
static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
|
||||
socklen_t *len)
|
||||
{
|
||||
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
|
||||
|
||||
addr->svm_family = AF_VSOCK;
|
||||
addr->svm_port = VMADDR_PORT_ANY;
|
||||
addr->svm_cid = VMADDR_CID_LOCAL;
|
||||
*len = sizeof(*addr);
|
||||
}
|
||||
|
||||
static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
|
||||
socklen_t *len)
|
||||
{
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
init_addr_loopback4(ss, len);
|
||||
return;
|
||||
case AF_INET6:
|
||||
init_addr_loopback6(ss, len);
|
||||
return;
|
||||
case AF_VSOCK:
|
||||
init_addr_loopback_vsock(ss, len);
|
||||
return;
|
||||
default:
|
||||
FAIL("unsupported address family %d", family);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
|
||||
{
|
||||
return (struct sockaddr *)ss;
|
||||
}
|
||||
|
||||
static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
|
||||
{
|
||||
u64 value;
|
||||
u32 key;
|
||||
int err;
|
||||
|
||||
key = 0;
|
||||
value = fd1;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key = 1;
|
||||
value = fd2;
|
||||
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
}
|
||||
|
||||
static inline int create_pair(int s, int family, int sotype, int *c, int *p)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t len;
|
||||
int err = 0;
|
||||
|
||||
len = sizeof(addr);
|
||||
err = xgetsockname(s, sockaddr(&addr), &len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
*c = xsocket(family, sotype, 0);
|
||||
if (*c < 0)
|
||||
return errno;
|
||||
err = xconnect(*c, sockaddr(&addr), len);
|
||||
if (err) {
|
||||
err = errno;
|
||||
goto close_cli0;
|
||||
}
|
||||
|
||||
*p = xaccept_nonblock(s, NULL, NULL);
|
||||
if (*p < 0) {
|
||||
err = errno;
|
||||
goto close_cli0;
|
||||
}
|
||||
return err;
|
||||
close_cli0:
|
||||
close(*c);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int create_socket_pairs(int s, int family, int sotype,
|
||||
int *c0, int *c1, int *p0, int *p1)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = create_pair(s, family, sotype, c0, p0);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = create_pair(s, family, sotype, c1, p1);
|
||||
if (err) {
|
||||
close(*c0);
|
||||
close(*p0);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int enable_reuseport(int s, int progfd)
|
||||
{
|
||||
int err, one = 1;
|
||||
|
||||
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
|
||||
if (err)
|
||||
return -1;
|
||||
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
|
||||
sizeof(progfd));
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t len;
|
||||
int err, s;
|
||||
|
||||
init_addr_loopback(family, &addr, &len);
|
||||
|
||||
s = xsocket(family, sotype, 0);
|
||||
if (s == -1)
|
||||
return -1;
|
||||
|
||||
if (progfd >= 0)
|
||||
enable_reuseport(s, progfd);
|
||||
|
||||
err = xbind(s, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
if (sotype & SOCK_DGRAM)
|
||||
return s;
|
||||
|
||||
err = xlisten(s, SOMAXCONN);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
return s;
|
||||
close:
|
||||
xclose(s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline int socket_loopback(int family, int sotype)
|
||||
{
|
||||
return socket_loopback_reuseport(family, sotype, -1);
|
||||
}
|
||||
|
||||
|
||||
#endif // __SOCKMAP_HELPERS__
|
@ -20,11 +20,6 @@
|
||||
#include <unistd.h>
|
||||
#include <linux/vm_sockets.h>
|
||||
|
||||
/* workaround for older vm_sockets.h */
|
||||
#ifndef VMADDR_CID_LOCAL
|
||||
#define VMADDR_CID_LOCAL 1
|
||||
#endif
|
||||
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
@ -32,315 +27,7 @@
|
||||
#include "test_progs.h"
|
||||
#include "test_sockmap_listen.skel.h"
|
||||
|
||||
#define IO_TIMEOUT_SEC 30
|
||||
#define MAX_STRERR_LEN 256
|
||||
#define MAX_TEST_NAME 80
|
||||
|
||||
#define __always_unused __attribute__((__unused__))
|
||||
|
||||
#define _FAIL(errnum, fmt...) \
|
||||
({ \
|
||||
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
|
||||
CHECK_FAIL(true); \
|
||||
})
|
||||
#define FAIL(fmt...) _FAIL(0, fmt)
|
||||
#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
|
||||
#define FAIL_LIBBPF(err, msg) \
|
||||
({ \
|
||||
char __buf[MAX_STRERR_LEN]; \
|
||||
libbpf_strerror((err), __buf, sizeof(__buf)); \
|
||||
FAIL("%s: %s", (msg), __buf); \
|
||||
})
|
||||
|
||||
/* Wrappers that fail the test on error and report it. */
|
||||
|
||||
#define xaccept_nonblock(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = \
|
||||
accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("accept"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbind(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = bind((fd), (addr), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("bind"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xclose(fd) \
|
||||
({ \
|
||||
int __ret = close((fd)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("close"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xconnect(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = connect((fd), (addr), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("connect"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xgetsockname(fd, addr, len) \
|
||||
({ \
|
||||
int __ret = getsockname((fd), (addr), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("getsockname"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xgetsockopt(fd, level, name, val, len) \
|
||||
({ \
|
||||
int __ret = getsockopt((fd), (level), (name), (val), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("getsockopt(" #name ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xlisten(fd, backlog) \
|
||||
({ \
|
||||
int __ret = listen((fd), (backlog)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("listen"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xsetsockopt(fd, level, name, val, len) \
|
||||
({ \
|
||||
int __ret = setsockopt((fd), (level), (name), (val), (len)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("setsockopt(" #name ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xsend(fd, buf, len, flags) \
|
||||
({ \
|
||||
ssize_t __ret = send((fd), (buf), (len), (flags)); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("send"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xrecv_nonblock(fd, buf, len, flags) \
|
||||
({ \
|
||||
ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
|
||||
IO_TIMEOUT_SEC); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("recv"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xsocket(family, sotype, flags) \
|
||||
({ \
|
||||
int __ret = socket(family, sotype, flags); \
|
||||
if (__ret == -1) \
|
||||
FAIL_ERRNO("socket"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_map_delete_elem(fd, key) \
|
||||
({ \
|
||||
int __ret = bpf_map_delete_elem((fd), (key)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("map_delete"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_map_lookup_elem(fd, key, val) \
|
||||
({ \
|
||||
int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("map_lookup"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_map_update_elem(fd, key, val, flags) \
|
||||
({ \
|
||||
int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("map_update"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_prog_attach(prog, target, type, flags) \
|
||||
({ \
|
||||
int __ret = \
|
||||
bpf_prog_attach((prog), (target), (type), (flags)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("prog_attach(" #type ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xbpf_prog_detach2(prog, target, type) \
|
||||
({ \
|
||||
int __ret = bpf_prog_detach2((prog), (target), (type)); \
|
||||
if (__ret < 0) \
|
||||
FAIL_ERRNO("prog_detach2(" #type ")"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xpthread_create(thread, attr, func, arg) \
|
||||
({ \
|
||||
int __ret = pthread_create((thread), (attr), (func), (arg)); \
|
||||
errno = __ret; \
|
||||
if (__ret) \
|
||||
FAIL_ERRNO("pthread_create"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define xpthread_join(thread, retval) \
|
||||
({ \
|
||||
int __ret = pthread_join((thread), (retval)); \
|
||||
errno = __ret; \
|
||||
if (__ret) \
|
||||
FAIL_ERRNO("pthread_join"); \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
static int poll_read(int fd, unsigned int timeout_sec)
|
||||
{
|
||||
struct timeval timeout = { .tv_sec = timeout_sec };
|
||||
fd_set rfds;
|
||||
int r;
|
||||
|
||||
FD_ZERO(&rfds);
|
||||
FD_SET(fd, &rfds);
|
||||
|
||||
r = select(fd + 1, &rfds, NULL, NULL, &timeout);
|
||||
if (r == 0)
|
||||
errno = ETIME;
|
||||
|
||||
return r == 1 ? 0 : -1;
|
||||
}
|
||||
|
||||
static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
|
||||
unsigned int timeout_sec)
|
||||
{
|
||||
if (poll_read(fd, timeout_sec))
|
||||
return -1;
|
||||
|
||||
return accept(fd, addr, len);
|
||||
}
|
||||
|
||||
static int recv_timeout(int fd, void *buf, size_t len, int flags,
|
||||
unsigned int timeout_sec)
|
||||
{
|
||||
if (poll_read(fd, timeout_sec))
|
||||
return -1;
|
||||
|
||||
return recv(fd, buf, len, flags);
|
||||
}
|
||||
|
||||
static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
|
||||
{
|
||||
struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
|
||||
|
||||
addr4->sin_family = AF_INET;
|
||||
addr4->sin_port = 0;
|
||||
addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
|
||||
*len = sizeof(*addr4);
|
||||
}
|
||||
|
||||
static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
|
||||
{
|
||||
struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
|
||||
|
||||
addr6->sin6_family = AF_INET6;
|
||||
addr6->sin6_port = 0;
|
||||
addr6->sin6_addr = in6addr_loopback;
|
||||
*len = sizeof(*addr6);
|
||||
}
|
||||
|
||||
static void init_addr_loopback_vsock(struct sockaddr_storage *ss, socklen_t *len)
|
||||
{
|
||||
struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
|
||||
|
||||
addr->svm_family = AF_VSOCK;
|
||||
addr->svm_port = VMADDR_PORT_ANY;
|
||||
addr->svm_cid = VMADDR_CID_LOCAL;
|
||||
*len = sizeof(*addr);
|
||||
}
|
||||
|
||||
static void init_addr_loopback(int family, struct sockaddr_storage *ss,
|
||||
socklen_t *len)
|
||||
{
|
||||
switch (family) {
|
||||
case AF_INET:
|
||||
init_addr_loopback4(ss, len);
|
||||
return;
|
||||
case AF_INET6:
|
||||
init_addr_loopback6(ss, len);
|
||||
return;
|
||||
case AF_VSOCK:
|
||||
init_addr_loopback_vsock(ss, len);
|
||||
return;
|
||||
default:
|
||||
FAIL("unsupported address family %d", family);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
|
||||
{
|
||||
return (struct sockaddr *)ss;
|
||||
}
|
||||
|
||||
static int enable_reuseport(int s, int progfd)
|
||||
{
|
||||
int err, one = 1;
|
||||
|
||||
err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
|
||||
if (err)
|
||||
return -1;
|
||||
err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
|
||||
sizeof(progfd));
|
||||
if (err)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int socket_loopback_reuseport(int family, int sotype, int progfd)
|
||||
{
|
||||
struct sockaddr_storage addr;
|
||||
socklen_t len;
|
||||
int err, s;
|
||||
|
||||
init_addr_loopback(family, &addr, &len);
|
||||
|
||||
s = xsocket(family, sotype, 0);
|
||||
if (s == -1)
|
||||
return -1;
|
||||
|
||||
if (progfd >= 0)
|
||||
enable_reuseport(s, progfd);
|
||||
|
||||
err = xbind(s, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
if (sotype & SOCK_DGRAM)
|
||||
return s;
|
||||
|
||||
err = xlisten(s, SOMAXCONN);
|
||||
if (err)
|
||||
goto close;
|
||||
|
||||
return s;
|
||||
close:
|
||||
xclose(s);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int socket_loopback(int family, int sotype)
|
||||
{
|
||||
return socket_loopback_reuseport(family, sotype, -1);
|
||||
}
|
||||
#include "sockmap_helpers.h"
|
||||
|
||||
static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
|
||||
int family, int sotype, int mapfd)
|
||||
@ -984,31 +671,12 @@ static const char *redir_mode_str(enum redir_mode mode)
|
||||
}
|
||||
}
|
||||
|
||||
static int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
|
||||
{
|
||||
u64 value;
|
||||
u32 key;
|
||||
int err;
|
||||
|
||||
key = 0;
|
||||
value = fd1;
|
||||
err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
key = 1;
|
||||
value = fd2;
|
||||
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
|
||||
}
|
||||
|
||||
static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
||||
int verd_mapfd, enum redir_mode mode)
|
||||
{
|
||||
const char *log_prefix = redir_mode_str(mode);
|
||||
struct sockaddr_storage addr;
|
||||
int s, c0, c1, p0, p1;
|
||||
unsigned int pass;
|
||||
socklen_t len;
|
||||
int err, n;
|
||||
u32 key;
|
||||
char b;
|
||||
@ -1019,36 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
||||
if (s < 0)
|
||||
return;
|
||||
|
||||
len = sizeof(addr);
|
||||
err = xgetsockname(s, sockaddr(&addr), &len);
|
||||
err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
|
||||
if (err)
|
||||
goto close_srv;
|
||||
|
||||
c0 = xsocket(family, sotype, 0);
|
||||
if (c0 < 0)
|
||||
goto close_srv;
|
||||
err = xconnect(c0, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close_cli0;
|
||||
|
||||
p0 = xaccept_nonblock(s, NULL, NULL);
|
||||
if (p0 < 0)
|
||||
goto close_cli0;
|
||||
|
||||
c1 = xsocket(family, sotype, 0);
|
||||
if (c1 < 0)
|
||||
goto close_peer0;
|
||||
err = xconnect(c1, sockaddr(&addr), len);
|
||||
if (err)
|
||||
goto close_cli1;
|
||||
|
||||
p1 = xaccept_nonblock(s, NULL, NULL);
|
||||
if (p1 < 0)
|
||||
goto close_cli1;
|
||||
|
||||
err = add_to_sockmap(sock_mapfd, p0, p1);
|
||||
if (err)
|
||||
goto close_peer1;
|
||||
goto close;
|
||||
|
||||
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
|
||||
if (n < 0)
|
||||
@ -1056,12 +701,12 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete write", log_prefix);
|
||||
if (n < 1)
|
||||
goto close_peer1;
|
||||
goto close;
|
||||
|
||||
key = SK_PASS;
|
||||
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
|
||||
if (err)
|
||||
goto close_peer1;
|
||||
goto close;
|
||||
if (pass != 1)
|
||||
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
|
||||
n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
|
||||
@ -1070,13 +715,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
|
||||
if (n == 0)
|
||||
FAIL("%s: incomplete recv", log_prefix);
|
||||
|
||||
close_peer1:
|
||||
close:
|
||||
xclose(p1);
|
||||
close_cli1:
|
||||
xclose(c1);
|
||||
close_peer0:
|
||||
xclose(p0);
|
||||
close_cli0:
|
||||
xclose(c0);
|
||||
close_srv:
|
||||
xclose(s);
|
||||
|
32
tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
Normal file
32
tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
Normal file
@ -0,0 +1,32 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_rx SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_tx SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_msg SEC(".maps");
|
||||
|
||||
SEC("sk_skb")
|
||||
int prog_skb_verdict(struct __sk_buff *skb)
|
||||
{
|
||||
return SK_DROP;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -191,7 +191,7 @@ SEC("sockops")
|
||||
int bpf_sockmap(struct bpf_sock_ops *skops)
|
||||
{
|
||||
__u32 lport, rport;
|
||||
int op, err, ret;
|
||||
int op, ret;
|
||||
|
||||
op = (int) skops->op;
|
||||
|
||||
@ -203,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
|
||||
if (lport == 10000) {
|
||||
ret = 1;
|
||||
#ifdef SOCKMAP
|
||||
err = bpf_sock_map_update(skops, &sock_map, &ret,
|
||||
bpf_sock_map_update(skops, &sock_map, &ret,
|
||||
BPF_NOEXIST);
|
||||
#else
|
||||
err = bpf_sock_hash_update(skops, &sock_map, &ret,
|
||||
bpf_sock_hash_update(skops, &sock_map, &ret,
|
||||
BPF_NOEXIST);
|
||||
#endif
|
||||
}
|
||||
@ -218,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
|
||||
if (bpf_ntohl(rport) == 10001) {
|
||||
ret = 10;
|
||||
#ifdef SOCKMAP
|
||||
err = bpf_sock_map_update(skops, &sock_map, &ret,
|
||||
bpf_sock_map_update(skops, &sock_map, &ret,
|
||||
BPF_NOEXIST);
|
||||
#else
|
||||
err = bpf_sock_hash_update(skops, &sock_map, &ret,
|
||||
bpf_sock_hash_update(skops, &sock_map, &ret,
|
||||
BPF_NOEXIST);
|
||||
#endif
|
||||
}
|
||||
@ -230,8 +230,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
|
||||
break;
|
||||
}
|
||||
|
||||
__sink(err);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
32
tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
Normal file
32
tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
Normal file
@ -0,0 +1,32 @@
|
||||
#include <linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_rx SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_tx SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SOCKMAP);
|
||||
__uint(max_entries, 20);
|
||||
__type(key, int);
|
||||
__type(value, int);
|
||||
} sock_map_msg SEC(".maps");
|
||||
|
||||
SEC("sk_skb")
|
||||
int prog_skb_verdict(struct __sk_buff *skb)
|
||||
{
|
||||
return SK_PASS;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
Loading…
Reference in New Issue
Block a user