Merge branch 'mptcp-path-manager-fixes'

Mat Martineau says:

====================
mptcp: Path manager fixes for 5.19

The MPTCP userspace path manager is new in 5.19, and these patches fix
some issues in that new code.

Patches 1-3 fix path manager locking issues.

Patches 4 and 5 allow userspace path managers to change priority of
established subflows using the existing MPTCP_PM_CMD_SET_FLAGS generic
netlink command. Includes corresponding self test update.

Patches 6 and 7 fix accounting of available endpoint IDs and the
MPTCP_MIB_RMSUBFLOW counter.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2022-07-06 12:50:27 +01:00
commit ae9fdf6cb4
7 changed files with 192 additions and 31 deletions

View File

@ -1584,6 +1584,9 @@ mp_rst:
*ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
MPTCP_INC_STATS(sock_net((const struct sock *)tp),
MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:

View File

@ -717,9 +717,10 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
}
}
static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
u8 bkup)
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
struct mptcp_addr_info *rem,
u8 bkup)
{
struct mptcp_subflow_context *subflow;
@ -727,24 +728,29 @@ static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info local;
struct mptcp_addr_info local, remote;
bool slow;
local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
if (rem && rem->family != AF_UNSPEC) {
remote_address((struct sock_common *)ssk, &remote);
if (!mptcp_addresses_equal(&remote, rem, rem->port))
continue;
}
slow = lock_sock_fast(ssk);
if (subflow->backup != bkup)
msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX);
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
__mptcp_subflow_send_ack(ssk);
unlock_sock_fast(ssk, slow);
return 0;
}
@ -801,7 +807,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
__set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (rm_type == MPTCP_MIB_RMSUBFLOW)
__set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (!removed)
continue;
@ -1816,8 +1823,10 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk,
list.ids[list.nr++] = addr->id;
spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, &list);
mptcp_pm_create_subflow_or_signal_addr(msk);
spin_unlock_bh(&msk->pm.lock);
}
static int mptcp_nl_set_flags(struct net *net,
@ -1835,12 +1844,10 @@ static int mptcp_nl_set_flags(struct net *net,
goto next;
lock_sock(sk);
spin_lock_bh(&msk->pm.lock);
if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup);
if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
mptcp_pm_nl_fullmesh(msk, addr);
spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
next:
@ -1854,6 +1861,9 @@ next:
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
@ -1866,6 +1876,12 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
if (ret < 0)
return ret;
if (attr_rem) {
ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
if (ret < 0)
return ret;
}
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
if (addr.addr.family == AF_UNSPEC) {
@ -1874,6 +1890,10 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
if (token)
return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
token, &addr, &remote, bkup);
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
if (!entry) {

View File

@ -5,6 +5,7 @@
*/
#include "protocol.h"
#include "mib.h"
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
@ -306,15 +307,11 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
const struct mptcp_addr_info *local,
const struct mptcp_addr_info *remote)
{
struct sock *sk = &msk->sk.icsk_inet.sk;
struct mptcp_subflow_context *subflow;
struct sock *found = NULL;
if (local->family != remote->family)
return NULL;
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
const struct inet_sock *issk;
struct sock *ssk;
@ -347,16 +344,11 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk,
}
if (issk->inet_sport == local->port &&
issk->inet_dport == remote->port) {
found = ssk;
goto found;
}
issk->inet_dport == remote->port)
return ssk;
}
found:
release_sock(sk);
return found;
return NULL;
}
int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
@ -412,18 +404,51 @@ int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
}
sk = &msk->sk.icsk_inet.sk;
lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
mptcp_close_ssk(sk, ssk, subflow);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
err = 0;
} else {
err = -ESRCH;
}
release_sock(sk);
destroy_err:
destroy_err:
sock_put((struct sock *)msk);
return err;
}
int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
struct mptcp_pm_addr_entry *loc,
struct mptcp_pm_addr_entry *rem, u8 bkup)
{
struct mptcp_sock *msk;
int ret = -EINVAL;
u32 token_val;
token_val = nla_get_u32(token);
msk = mptcp_token_get_sock(net, token_val);
if (!msk)
return ret;
if (!mptcp_pm_is_userspace(msk))
goto set_flags_err;
if (loc->addr.family == AF_UNSPEC ||
rem->addr.family == AF_UNSPEC)
goto set_flags_err;
lock_sock((struct sock *)msk);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
release_sock((struct sock *)msk);
set_flags_err:
sock_put((struct sock *)msk);
return ret;
}

View File

@ -506,13 +506,18 @@ static inline bool tcp_can_send_ack(const struct sock *ssk)
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
void __mptcp_subflow_send_ack(struct sock *ssk)
{
if (tcp_can_send_ack(ssk))
tcp_send_ack(ssk);
}
void mptcp_subflow_send_ack(struct sock *ssk)
{
bool slow;
slow = lock_sock_fast(ssk);
if (tcp_can_send_ack(ssk))
tcp_send_ack(ssk);
__mptcp_subflow_send_ack(ssk);
unlock_sock_fast(ssk, slow);
}

View File

@ -607,6 +607,7 @@ void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_subflow_queue_clean(struct sock *ssk);
@ -771,6 +772,10 @@ void mptcp_pm_rm_addr_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *addr,
struct mptcp_addr_info *rem,
u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
@ -787,7 +792,9 @@ int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
struct mptcp_pm_addr_entry *loc,
struct mptcp_pm_addr_entry *rem, u8 bkup);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);

View File

@ -39,7 +39,7 @@ static void syntax(char *argv[])
fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n");
fprintf(stderr, "\tflush\n");
fprintf(stderr, "\tdump\n");
fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
@ -1279,7 +1279,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
u_int32_t flags = 0;
u_int32_t token = 0;
u_int16_t rport = 0;
u_int16_t family;
void *rip = NULL;
int nest_start;
int use_id = 0;
u_int8_t id;
@ -1339,7 +1342,13 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
error(1, 0, " missing flags keyword");
for (; arg < argc; arg++) {
if (!strcmp(argv[arg], "flags")) {
if (!strcmp(argv[arg], "token")) {
if (++arg >= argc)
error(1, 0, " missing token value");
/* token */
token = atoi(argv[arg]);
} else if (!strcmp(argv[arg], "flags")) {
char *tok, *str;
/* flags */
@ -1378,12 +1387,72 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &port, 2);
off += NLMSG_ALIGN(rta->rta_len);
} else if (!strcmp(argv[arg], "rport")) {
if (++arg >= argc)
error(1, 0, " missing remote port");
rport = atoi(argv[arg]);
} else if (!strcmp(argv[arg], "rip")) {
if (++arg >= argc)
error(1, 0, " missing remote ip");
rip = argv[arg];
} else {
error(1, 0, "unknown keyword %s", argv[arg]);
}
}
nest->rta_len = off - nest_start;
/* token */
if (token) {
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ATTR_TOKEN;
rta->rta_len = RTA_LENGTH(4);
memcpy(RTA_DATA(rta), &token, 4);
off += NLMSG_ALIGN(rta->rta_len);
}
/* remote addr/port */
if (rip) {
nest_start = off;
nest = (void *)(data + off);
nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE;
nest->rta_len = RTA_LENGTH(0);
off += NLMSG_ALIGN(nest->rta_len);
/* addr data */
rta = (void *)(data + off);
if (inet_pton(AF_INET, rip, RTA_DATA(rta))) {
family = AF_INET;
rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
rta->rta_len = RTA_LENGTH(4);
} else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) {
family = AF_INET6;
rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
rta->rta_len = RTA_LENGTH(16);
} else {
error(1, errno, "can't parse ip %s", (char *)rip);
}
off += NLMSG_ALIGN(rta->rta_len);
/* family */
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &family, 2);
off += NLMSG_ALIGN(rta->rta_len);
if (rport) {
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &rport, 2);
off += NLMSG_ALIGN(rta->rta_len);
}
nest->rta_len = off - nest_start;
}
do_nl_req(fd, nh, off, 0);
return 0;
}

View File

@ -770,10 +770,42 @@ test_subflows()
rm -f "$evts"
}
test_prio()
{
local count
# Send MP_PRIO signal from client to server machine
ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port"
sleep 0.5
# Check TX
stdbuf -o0 -e0 printf "MP_PRIO TX \t"
count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ $count != 1 ]; then
stdbuf -o0 -e0 printf "[FAIL]\n"
exit 1
else
stdbuf -o0 -e0 printf "[OK]\n"
fi
# Check RX
stdbuf -o0 -e0 printf "MP_PRIO RX \t"
count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
[ -z "$count" ] && count=0
if [ $count != 1 ]; then
stdbuf -o0 -e0 printf "[FAIL]\n"
exit 1
else
stdbuf -o0 -e0 printf "[OK]\n"
fi
}
make_connection
make_connection "v6"
test_announce
test_remove
test_subflows
test_prio
exit 0