diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index a0990c365a2e..c30405e76833 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), + SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index cae71d947252..dd7fd1f246b5 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -65,6 +65,7 @@ enum linux_mptcp_mib_field { * conflict with another subflow while updating msk rcv wnd */ MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ + MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ __MPTCP_MIB_MAX }; @@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net, __SNMP_INC_STATS(net->mib.mptcp_statistics, field); } +static inline void MPTCP_DEC_STATS(struct net *net, + enum linux_mptcp_mib_field field) +{ + if (likely(net->mib.mptcp_statistics)) + SNMP_DEC_STATS(net->mib.mptcp_statistics, field); +} + bool mptcp_mib_alloc(struct net *net); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 272e93be1aad..287a60381eae 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, if (err) return err; + /* We don't use mptcp_set_state() here because it needs to be called + * under the msk socket lock. For the moment, that will not bring + * anything more than only calling inet_sk_state_store(), because the + * old status is known (TCP_CLOSE). + */ inet_sk_state_store(newsk, TCP_LISTEN); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 91e5845d80a9..3ed4709a7509 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -429,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk) switch (sk->sk_state) { case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_FIN_WAIT2); + mptcp_set_state(sk, TCP_FIN_WAIT2); break; case TCP_CLOSING: case TCP_LAST_ACK: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; } @@ -594,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - inet_sk_state_store(sk, TCP_CLOSE_WAIT); + mptcp_set_state(sk, TCP_CLOSE_WAIT); break; case TCP_FIN_WAIT1: - inet_sk_state_store(sk, TCP_CLOSING); + mptcp_set_state(sk, TCP_CLOSING); break; case TCP_FIN_WAIT2: - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); break; default: /* Other states not expected */ @@ -775,7 +775,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk) */ ssk_state = inet_sk_state_load(ssk); if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD)) - inet_sk_state_store(sk, ssk_state); + mptcp_set_state(sk, ssk_state); WRITE_ONCE(sk->sk_err, -err); /* This barrier is coupled with smp_rmb() in mptcp_poll() */ @@ -2463,7 +2463,7 @@ out: inet_sk_state_load(msk->first) == TCP_CLOSE) { if (sk->sk_state != TCP_ESTABLISHED || msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) { - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_close_wake_up(sk); } else { mptcp_start_tout_timer(sk); @@ -2558,7 +2558,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) WRITE_ONCE(sk->sk_err, ECONNRESET); } - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK); smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); @@ -2693,7 +2693,7 @@ static void mptcp_do_fastclose(struct sock *sk) struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, MPTCP_CF_FASTCLOSE); @@ -2871,6 +2871,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how) release_sock(ssk); } +void mptcp_set_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_state; + + switch (state) { + case TCP_ESTABLISHED: + if (oldstate != TCP_ESTABLISHED) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + break; + + default: + if (oldstate == TCP_ESTABLISHED) + MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); + } + + inet_sk_state_store(sk, state); +} + static const unsigned char new_state[16] = { /* current state: new state: action: */ [0 /* (Invalid) */] = TCP_CLOSE, @@ -2893,7 +2911,7 @@ static int mptcp_close_state(struct sock *sk) int next = (int)new_state[sk->sk_state]; int ns = next & TCP_STATE_MASK; - inet_sk_state_store(sk, ns); + mptcp_set_state(sk, ns); return next & TCP_ACTION_FIN; } @@ -3004,7 +3022,7 @@ bool __mptcp_close(struct sock *sk, long timeout) if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); goto cleanup; } @@ -3047,7 +3065,7 @@ cleanup: * state, let's not keep resources busy for no reasons */ if (subflows_alive == 0) - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); @@ -3113,7 +3131,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) return -EBUSY; mptcp_check_listen_stop(sk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); mptcp_stop_rtx_timer(sk); mptcp_stop_tout_timer(sk); @@ -3201,7 +3219,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, /* this can't race with mptcp_close(), as the msk is * not yet exposted to user-space */ - inet_sk_state_store(nsk, TCP_ESTABLISHED); + mptcp_set_state(nsk, TCP_ESTABLISHED); /* The msk maintain a ref to each subflow in the connections list */ WRITE_ONCE(msk->first, ssk); @@ -3622,7 +3640,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(ssk)) return PTR_ERR(ssk); - inet_sk_state_store(sk, TCP_SYN_SENT); + mptcp_set_state(sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -3672,7 +3690,7 @@ out: if (unlikely(err)) { /* avoid leaving a dangling token in an unconnected socket */ mptcp_token_destroy(msk); - inet_sk_state_store(sk, TCP_CLOSE); + mptcp_set_state(sk, TCP_CLOSE); return err; } @@ -3761,13 +3779,13 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } - inet_sk_state_store(sk, TCP_LISTEN); + mptcp_set_state(sk, TCP_LISTEN); sock_set_flag(sk, SOCK_RCU_FREE); lock_sock(ssk); err = __inet_listen_sk(ssk, backlog); release_sock(ssk); - inet_sk_state_store(sk, inet_sk_state_load(ssk)); + mptcp_set_state(sk, inet_sk_state_load(ssk)); if (!err) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -3845,7 +3863,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, __mptcp_close_ssk(newsk, msk->first, mptcp_subflow_ctx(msk->first), 0); if (unlikely(list_is_singular(&msk->conn_list))) - inet_sk_state_store(newsk, TCP_CLOSE); + mptcp_set_state(newsk, TCP_CLOSE); } } else { MPTCP_INC_STATS(sock_net(ssk), diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 1240268f9e9e..3517f2d24a22 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -641,6 +641,7 @@ bool __mptcp_close(struct sock *sk, long timeout); void mptcp_cancel_work(struct sock *sk); void __mptcp_unaccepted_force_close(struct sock *sk); void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk); +void mptcp_set_state(struct sock *sk, int state); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 6d7684c35e93..1ef28642afc4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -425,7 +425,7 @@ void __mptcp_sync_state(struct sock *sk, int state) __mptcp_propagate_sndbuf(sk, msk->first); if (sk->sk_state == TCP_SYN_SENT) { - inet_sk_state_store(sk, state); + mptcp_set_state(sk, state); sk->sk_state_change(sk); } } diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 95b498efacd1..04fcb8a077c9 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -56,7 +56,7 @@ __chk_nr() local command="$1" local expected=$2 local msg="$3" - local skip="${4:-SKIP}" + local skip="${4-SKIP}" local nr nr=$(eval $command) @@ -182,6 +182,15 @@ chk_msk_inuse() __chk_nr get_msk_inuse $expected "$msg" 0 } +# $1: cestab nr +chk_msk_cestab() +{ + local cestab=$1 + + __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ + "${cestab}" "....chk ${cestab} cestab" "" +} + wait_connected() { local listener_ns="${1}" @@ -219,9 +228,11 @@ chk_msk_nr 2 "after MPC handshake " chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_cestab 2 flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 echo "a" | \ timeout ${timeout_test} \ @@ -237,9 +248,11 @@ echo "b" | \ wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_cestab 1 flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 NR_CLIENTS=100 for I in `seq 1 $NR_CLIENTS`; do @@ -261,9 +274,11 @@ done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_cestab $((NR_CLIENTS*2)) flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_cestab 0 mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 87590a43b50d..3a5b63026191 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -56,6 +56,8 @@ unset FAILING_LINKS unset test_linkfail unset addr_nr_ns1 unset addr_nr_ns2 +unset cestab_ns1 +unset cestab_ns2 unset sflags unset fastclose unset fullmesh @@ -976,6 +978,34 @@ pm_nl_set_endpoint() fi } +chk_cestab_nr() +{ + local ns=$1 + local cestab=$2 + local count + + print_check "cestab $cestab" + count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$cestab" ]; then + fail_test "got $count current establish[s] expected $cestab" + else + print_ok + fi +} + +# $1 namespace 1, $2 namespace 2 +check_cestab() +{ + if [ -n "${cestab_ns1}" ]; then + chk_cestab_nr ${1} ${cestab_ns1} + fi + if [ -n "${cestab_ns2}" ]; then + chk_cestab_nr ${2} ${cestab_ns2} + fi +} + do_transfer() { local listener_ns="$1" @@ -1089,6 +1119,7 @@ do_transfer() local cpid=$! pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr + check_cestab $listener_ns $connector_ns wait $cpid local retc=$? @@ -2477,47 +2508,52 @@ add_tests() if reset "add single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - addr_nr_ns2=1 speed=slow \ + addr_nr_ns2=1 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 + chk_cestab_nr $ns2 0 fi # add signal address if reset "add signal address"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 - addr_nr_ns1=1 speed=slow \ + addr_nr_ns1=1 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 + chk_cestab_nr $ns1 0 fi # add multiple subflows if reset "add multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple subflows IPv6 if reset "add multiple subflows IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple addresses IPv6 if reset "add multiple addresses IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 - addr_nr_ns1=2 speed=slow \ + addr_nr_ns1=2 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 chk_add_nr 2 2 + chk_cestab_nr $ns1 0 fi }