Merge branch 'mptcp-mib-counters'

Matthieu Baerts says:

====================
mptcp: add CurrEstab MIB counter

This MIB counter is similar to the one of TCP -- CurrEstab -- available
in /proc/net/snmp. This is useful to quickly list the number of MPTCP
connections without having to iterate over all of them.

Patch 1 prepares its support by adding new helper functions:

 - MPTCP_DEC_STATS(): similar to MPTCP_INC_STATS(), but this time to
   decrement a counter.

 - mptcp_set_state(): similar to tcp_set_state(), to change the state of
   an MPTCP socket, and to inc/decrement the new counter when needed.

Patch 2 uses mptcp_set_state() instead of directly calling
inet_sk_state_store() to change the state of MPTCP sockets.

Patch 3 and 4 validate the new feature in MPTCP "join" and "diag"
selftests.
====================

Signed-off-by: Matthieu Baerts <matttbe@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2024-01-02 13:33:58 +00:00
commit 8179cc4764
8 changed files with 110 additions and 26 deletions

View File

@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
SNMP_MIB_SENTINEL
};

View File

@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
* conflict with another subflow while updating msk rcv wnd
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
__MPTCP_MIB_MAX
};
@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
__SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
static inline void MPTCP_DEC_STATS(struct net *net,
enum linux_mptcp_mib_field field)
{
if (likely(net->mib.mptcp_statistics))
SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
}
bool mptcp_mib_alloc(struct net *net);

View File

@ -1048,6 +1048,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
/* We don't use mptcp_set_state() here because it needs to be called
* under the msk socket lock. For the moment, that will not bring
* anything more than only calling inet_sk_state_store(), because the
* old status is known (TCP_CLOSE).
*/
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);

View File

@ -429,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
switch (sk->sk_state) {
case TCP_FIN_WAIT1:
inet_sk_state_store(sk, TCP_FIN_WAIT2);
mptcp_set_state(sk, TCP_FIN_WAIT2);
break;
case TCP_CLOSING:
case TCP_LAST_ACK:
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
break;
}
@ -594,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
switch (sk->sk_state) {
case TCP_ESTABLISHED:
inet_sk_state_store(sk, TCP_CLOSE_WAIT);
mptcp_set_state(sk, TCP_CLOSE_WAIT);
break;
case TCP_FIN_WAIT1:
inet_sk_state_store(sk, TCP_CLOSING);
mptcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
break;
default:
/* Other states not expected */
@ -775,7 +775,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
*/
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
inet_sk_state_store(sk, ssk_state);
mptcp_set_state(sk, ssk_state);
WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@ -2463,7 +2463,7 @@ out:
inet_sk_state_load(msk->first) == TCP_CLOSE) {
if (sk->sk_state != TCP_ESTABLISHED ||
msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
} else {
mptcp_start_tout_timer(sk);
@ -2558,7 +2558,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
WRITE_ONCE(sk->sk_err, ECONNRESET);
}
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@ -2693,7 +2693,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@ -2871,6 +2871,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
release_sock(ssk);
}
void mptcp_set_state(struct sock *sk, int state)
{
int oldstate = sk->sk_state;
switch (state) {
case TCP_ESTABLISHED:
if (oldstate != TCP_ESTABLISHED)
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
break;
default:
if (oldstate == TCP_ESTABLISHED)
MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
}
inet_sk_state_store(sk, state);
}
static const unsigned char new_state[16] = {
/* current state: new state: action: */
[0 /* (Invalid) */] = TCP_CLOSE,
@ -2893,7 +2911,7 @@ static int mptcp_close_state(struct sock *sk)
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
inet_sk_state_store(sk, ns);
mptcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
@ -3004,7 +3022,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
goto cleanup;
}
@ -3047,7 +3065,7 @@ cleanup:
* state, let's not keep resources busy for no reasons
*/
if (subflows_alive == 0)
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
@ -3113,7 +3131,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
return -EBUSY;
mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
@ -3201,7 +3219,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
inet_sk_state_store(nsk, TCP_ESTABLISHED);
mptcp_set_state(nsk, TCP_ESTABLISHED);
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
@ -3622,7 +3640,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(ssk))
return PTR_ERR(ssk);
inet_sk_state_store(sk, TCP_SYN_SENT);
mptcp_set_state(sk, TCP_SYN_SENT);
subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@ -3672,7 +3690,7 @@ out:
if (unlikely(err)) {
/* avoid leaving a dangling token in an unconnected socket */
mptcp_token_destroy(msk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_set_state(sk, TCP_CLOSE);
return err;
}
@ -3761,13 +3779,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
inet_sk_state_store(sk, TCP_LISTEN);
mptcp_set_state(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
release_sock(ssk);
inet_sk_state_store(sk, inet_sk_state_load(ssk));
mptcp_set_state(sk, inet_sk_state_load(ssk));
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@ -3845,7 +3863,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_is_singular(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
mptcp_set_state(newsk, TCP_CLOSE);
}
} else {
MPTCP_INC_STATS(sock_net(ssk),

View File

@ -641,6 +641,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
void mptcp_set_state(struct sock *sk, int state);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);

View File

@ -425,7 +425,7 @@ void __mptcp_sync_state(struct sock *sk, int state)
__mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
inet_sk_state_store(sk, state);
mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}

View File

@ -56,7 +56,7 @@ __chk_nr()
local command="$1"
local expected=$2
local msg="$3"
local skip="${4:-SKIP}"
local skip="${4-SKIP}"
local nr
nr=$(eval $command)
@ -182,6 +182,15 @@ chk_msk_inuse()
__chk_nr get_msk_inuse $expected "$msg" 0
}
# $1: cestab nr
chk_msk_cestab()
{
local cestab=$1
__chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \
"${cestab}" "....chk ${cestab} cestab" ""
}
wait_connected()
{
local listener_ns="${1}"
@ -219,9 +228,11 @@ chk_msk_nr 2 "after MPC handshake "
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2 "....chk 2 msk in use"
chk_msk_cestab 2
flush_pids
chk_msk_inuse 0 "....chk 0 msk in use after flush"
chk_msk_cestab 0
echo "a" | \
timeout ${timeout_test} \
@ -237,9 +248,11 @@ echo "b" | \
wait_connected $ns 10001
chk_msk_fallback_nr 1 "check fallback"
chk_msk_inuse 1 "....chk 1 msk in use"
chk_msk_cestab 1
flush_pids
chk_msk_inuse 0 "....chk 0 msk in use after flush"
chk_msk_cestab 0
NR_CLIENTS=100
for I in `seq 1 $NR_CLIENTS`; do
@ -261,9 +274,11 @@ done
wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use"
chk_msk_cestab $((NR_CLIENTS*2))
flush_pids
chk_msk_inuse 0 "....chk 0 msk in use after flush"
chk_msk_cestab 0
mptcp_lib_result_print_all_tap
exit $ret

View File

@ -56,6 +56,8 @@ unset FAILING_LINKS
unset test_linkfail
unset addr_nr_ns1
unset addr_nr_ns2
unset cestab_ns1
unset cestab_ns2
unset sflags
unset fastclose
unset fullmesh
@ -976,6 +978,34 @@ pm_nl_set_endpoint()
fi
}
chk_cestab_nr()
{
local ns=$1
local cestab=$2
local count
print_check "cestab $cestab"
count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$cestab" ]; then
fail_test "got $count current establish[s] expected $cestab"
else
print_ok
fi
}
# $1 namespace 1, $2 namespace 2
check_cestab()
{
if [ -n "${cestab_ns1}" ]; then
chk_cestab_nr ${1} ${cestab_ns1}
fi
if [ -n "${cestab_ns2}" ]; then
chk_cestab_nr ${2} ${cestab_ns2}
fi
}
do_transfer()
{
local listener_ns="$1"
@ -1089,6 +1119,7 @@ do_transfer()
local cpid=$!
pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
check_cestab $listener_ns $connector_ns
wait $cpid
local retc=$?
@ -2477,47 +2508,52 @@ add_tests()
if reset "add single subflow"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
addr_nr_ns2=1 speed=slow \
addr_nr_ns2=1 speed=slow cestab_ns2=1 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_cestab_nr $ns2 0
fi
# add signal address
if reset "add signal address"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 1 1
addr_nr_ns1=1 speed=slow \
addr_nr_ns1=1 speed=slow cestab_ns1=1 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_cestab_nr $ns1 0
fi
# add multiple subflows
if reset "add multiple subflows"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
addr_nr_ns2=2 speed=slow \
addr_nr_ns2=2 speed=slow cestab_ns2=1 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_cestab_nr $ns2 0
fi
# add multiple subflows IPv6
if reset "add multiple subflows IPv6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
addr_nr_ns2=2 speed=slow \
addr_nr_ns2=2 speed=slow cestab_ns2=1 \
run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 2 2 2
chk_cestab_nr $ns2 0
fi
# add multiple addresses IPv6
if reset "add multiple addresses IPv6"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 2 2
addr_nr_ns1=2 speed=slow \
addr_nr_ns1=2 speed=slow cestab_ns1=1 \
run_tests $ns1 $ns2 dead:beef:1::1
chk_join_nr 2 2 2
chk_add_nr 2 2
chk_cestab_nr $ns1 0
fi
}