mptcp: introduce MPTCP_FULL_INFO getsockopt

Some user-space applications want to monitor the subflows utilization.

Dumping the per subflow tcp_info is not enough, as the PM could close
and re-create the subflows under-the-hood, fooling the accounting.
Even checking the src/dst addresses used by each subflow could not
be enough, because new subflows could re-use the same address/port of
the just closed one.

This patch introduces a new socket option, allow dumping all the relevant
information all-at-once (everything, everywhere...), in a consistent
manner.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/388
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Paolo Abeni 2023-06-20 18:30:18 +02:00 committed by Jakub Kicinski
parent 6f06b4d4d1
commit 492432074e
2 changed files with 149 additions and 2 deletions

View File

@ -249,9 +249,33 @@ struct mptcp_subflow_addrs {
};
};
struct mptcp_subflow_info {
__u32 id;
struct mptcp_subflow_addrs addrs;
};
struct mptcp_full_info {
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
__u32 size_tcpinfo_user;
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
__u32 size_sfinfo_user;
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
__u32 size_arrays_user; /* max subflows that userspace is interested in;
* the buffers at subflow_info/tcp_info
* are respectively at least:
* size_arrays * size_sfinfo_user
* size_arrays * size_tcpinfo_user
* bytes wide
*/
__aligned_u64 subflow_info;
__aligned_u64 tcp_info;
struct mptcp_info mptcp_info;
};
/* MPTCP socket options */
#define MPTCP_INFO 1
#define MPTCP_TCPINFO 2
#define MPTCP_SUBFLOW_ADDRS 3
#define MPTCP_FULL_INFO 4
#endif /* _UAPI_MPTCP_H */

View File

@ -14,7 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
#define MIN_INFO_OPTLEN_SIZE 16
#define MIN_INFO_OPTLEN_SIZE 16
#define MIN_FULL_INFO_OPTLEN_SIZE 40
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
@ -981,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
}
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
char __user *optval, int __user *optlen)
char __user *optval,
int __user *optlen)
{
int len, copylen;
@ -1162,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
static int mptcp_get_full_info(struct mptcp_full_info *mfi,
char __user *optval,
int __user *optlen)
{
int len;
BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
MIN_FULL_INFO_OPTLEN_SIZE);
if (get_user(len, optlen))
return -EFAULT;
if (len < MIN_FULL_INFO_OPTLEN_SIZE)
return -EINVAL;
memset(mfi, 0, sizeof(*mfi));
if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
return -EFAULT;
if (mfi->size_tcpinfo_kernel ||
mfi->size_sfinfo_kernel ||
mfi->num_subflows)
return -EINVAL;
if (mfi->size_sfinfo_user > INT_MAX ||
mfi->size_tcpinfo_user > INT_MAX)
return -EINVAL;
return len - MIN_FULL_INFO_OPTLEN_SIZE;
}
static int mptcp_put_full_info(struct mptcp_full_info *mfi,
char __user *optval,
u32 copylen,
int __user *optlen)
{
copylen += MIN_FULL_INFO_OPTLEN_SIZE;
if (put_user(copylen, optlen))
return -EFAULT;
if (copy_to_user(optval, mfi, copylen))
return -EFAULT;
return 0;
}
static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
int __user *optlen)
{
unsigned int sfcount = 0, copylen = 0;
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
void __user *tcpinfoptr, *sfinfoptr;
struct mptcp_full_info mfi;
int len;
len = mptcp_get_full_info(&mfi, optval, optlen);
if (len < 0)
return len;
/* don't bother filling the mptcp info if there is not enough
* user-space-provided storage
*/
if (len > 0) {
mptcp_diag_fill_info(msk, &mfi.mptcp_info);
copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
}
mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
sizeof(struct tcp_info));
sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
sizeof(struct mptcp_subflow_info));
tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_subflow_info sfinfo;
struct tcp_info tcp_info;
if (sfcount++ >= mfi.size_arrays_user)
continue;
/* fetch addr/tcp_info only if the user space buffers
* are wide enough
*/
memset(&sfinfo, 0, sizeof(sfinfo));
sfinfo.id = subflow->subflow_id;
if (mfi.size_sfinfo_user >
offsetof(struct mptcp_subflow_info, addrs))
mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
goto fail_release;
if (mfi.size_tcpinfo_user) {
tcp_get_info(ssk, &tcp_info);
if (copy_to_user(tcpinfoptr, &tcp_info,
mfi.size_tcpinfo_user))
goto fail_release;
}
tcpinfoptr += mfi.size_tcpinfo_user;
sfinfoptr += mfi.size_sfinfo_user;
}
release_sock(sk);
mfi.num_subflows = sfcount;
if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
return -EFAULT;
return 0;
fail_release:
release_sock(sk);
return -EFAULT;
}
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
int __user *optlen, int val)
{
@ -1235,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
switch (optname) {
case MPTCP_INFO:
return mptcp_getsockopt_info(msk, optval, optlen);
case MPTCP_FULL_INFO:
return mptcp_getsockopt_full_info(msk, optval, optlen);
case MPTCP_TCPINFO:
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
case MPTCP_SUBFLOW_ADDRS: