mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-24 22:55:35 +08:00
Merge branch 'mptcp-Miscellaneous-fixes'
Mat Martineau says: ==================== mptcp: Miscellaneous fixes Here are some MPTCP fixes for the -net tree, addressing various issues we have seen thanks to syzkaller and other testing: Patch 1 correctly propagates errors at connection time and for TCP fallback connections. Patch 2 sets the expected poll() events on SEND_SHUTDOWN. Patch 3 fixes a retranmit crash and unneeded retransmissions. Patch 4 fixes possible uninitialized data on the error path during socket creation. Patch 5 addresses a problem with MPTCP window updates. Patch 6 fixes a case where MPTCP retransmission can get stuck. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
9c899aa6ac
@ -498,8 +498,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
|
||||
{
|
||||
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
|
||||
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
|
||||
u64 snd_data_fin_enable, ack_seq;
|
||||
unsigned int dss_size = 0;
|
||||
u64 snd_data_fin_enable;
|
||||
struct mptcp_ext *mpext;
|
||||
unsigned int ack_size;
|
||||
bool ret = false;
|
||||
@ -531,13 +531,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
|
||||
return ret;
|
||||
}
|
||||
|
||||
ack_seq = READ_ONCE(msk->ack_seq);
|
||||
if (READ_ONCE(msk->use_64bit_ack)) {
|
||||
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
|
||||
opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
|
||||
opts->ext_copy.data_ack = ack_seq;
|
||||
opts->ext_copy.ack64 = 1;
|
||||
} else {
|
||||
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
|
||||
opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
|
||||
opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
|
||||
opts->ext_copy.ack64 = 0;
|
||||
}
|
||||
opts->ext_copy.use_ack = 1;
|
||||
@ -879,8 +880,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
|
||||
msk->wnd_end = new_wnd_end;
|
||||
|
||||
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
|
||||
if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
|
||||
sk_stream_memory_free(ssk))
|
||||
if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
|
||||
__mptcp_check_push(sk, ssk);
|
||||
|
||||
if (after64(new_snd_una, old_snd_una)) {
|
||||
|
@ -364,8 +364,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
|
||||
|
||||
/* Look for an acknowledged DATA_FIN */
|
||||
if (mptcp_pending_data_fin_ack(sk)) {
|
||||
mptcp_stop_timer(sk);
|
||||
|
||||
WRITE_ONCE(msk->snd_data_fin_enable, 0);
|
||||
|
||||
switch (sk->sk_state) {
|
||||
@ -459,7 +457,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
|
||||
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
|
||||
{
|
||||
struct sock *ack_hint = READ_ONCE(msk->ack_hint);
|
||||
int old_space = READ_ONCE(msk->old_wspace);
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
bool cleanup;
|
||||
|
||||
/* this is a simple superset of what tcp_cleanup_rbuf() implements
|
||||
* so that we don't have to acquire the ssk socket lock most of the time
|
||||
* to do actually nothing
|
||||
*/
|
||||
cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
|
||||
if (!cleanup)
|
||||
return;
|
||||
|
||||
/* if the hinted ssk is still active, try to use it */
|
||||
if (likely(ack_hint)) {
|
||||
@ -1573,6 +1582,9 @@ out:
|
||||
mptcp_set_timeout(sk, ssk);
|
||||
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
|
||||
info.size_goal);
|
||||
if (!mptcp_timer_pending(sk))
|
||||
mptcp_reset_timer(sk);
|
||||
|
||||
if (msk->snd_data_fin_enable &&
|
||||
msk->snd_nxt + 1 == msk->write_seq)
|
||||
mptcp_schedule_work(sk);
|
||||
@ -1867,7 +1879,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
|
||||
skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
|
||||
}
|
||||
|
||||
static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
|
||||
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
unsigned int moved = 0;
|
||||
@ -1887,13 +1899,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
|
||||
|
||||
slowpath = lock_sock_fast(ssk);
|
||||
mptcp_data_lock(sk);
|
||||
__mptcp_update_rmem(sk);
|
||||
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
|
||||
mptcp_data_unlock(sk);
|
||||
if (moved && rcv) {
|
||||
WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
|
||||
tcp_cleanup_rbuf(ssk, 1);
|
||||
WRITE_ONCE(msk->rmem_pending, 0);
|
||||
}
|
||||
tcp_cleanup_rbuf(ssk, moved);
|
||||
unlock_sock_fast(ssk, slowpath);
|
||||
} while (!done);
|
||||
|
||||
@ -1906,6 +1915,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
|
||||
ret |= __mptcp_ofo_queue(msk);
|
||||
__mptcp_splice_receive_queue(sk);
|
||||
mptcp_data_unlock(sk);
|
||||
mptcp_cleanup_rbuf(msk);
|
||||
}
|
||||
if (ret)
|
||||
mptcp_check_data_fin((struct sock *)msk);
|
||||
@ -1935,7 +1945,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
|
||||
|
||||
while (copied < len) {
|
||||
int bytes_read, old_space;
|
||||
int bytes_read;
|
||||
|
||||
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
|
||||
if (unlikely(bytes_read < 0)) {
|
||||
@ -1946,15 +1956,12 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
|
||||
copied += bytes_read;
|
||||
|
||||
if (skb_queue_empty(&msk->receive_queue) &&
|
||||
__mptcp_move_skbs(msk, len - copied))
|
||||
continue;
|
||||
|
||||
/* be sure to advertise window change */
|
||||
old_space = READ_ONCE(msk->old_wspace);
|
||||
if ((tcp_space(sk) - old_space) >= old_space)
|
||||
mptcp_cleanup_rbuf(msk);
|
||||
|
||||
if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
|
||||
continue;
|
||||
|
||||
/* only the master socket status is relevant here. The exit
|
||||
* conditions mirror closely tcp_recvmsg()
|
||||
*/
|
||||
@ -1981,7 +1988,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
/* race breaker: the shutdown could be after the
|
||||
* previous receive queue check
|
||||
*/
|
||||
if (__mptcp_move_skbs(msk, len - copied))
|
||||
if (__mptcp_move_skbs(msk))
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
@ -2014,7 +2021,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
|
||||
/* .. race-breaker: ssk might have gotten new data
|
||||
* after last __mptcp_move_skbs() returned false.
|
||||
*/
|
||||
if (unlikely(__mptcp_move_skbs(msk, 0)))
|
||||
if (unlikely(__mptcp_move_skbs(msk)))
|
||||
set_bit(MPTCP_DATA_READY, &msk->flags);
|
||||
} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
|
||||
/* data to read but mptcp_wait_data() cleared DATA_READY */
|
||||
@ -2299,6 +2306,7 @@ static void mptcp_worker(struct work_struct *work)
|
||||
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
|
||||
goto unlock;
|
||||
|
||||
__mptcp_clean_una(sk);
|
||||
dfrag = mptcp_rtx_head(sk);
|
||||
if (!dfrag)
|
||||
goto unlock;
|
||||
@ -2959,6 +2967,8 @@ static void mptcp_release_cb(struct sock *sk)
|
||||
mptcp_push_pending(sk, 0);
|
||||
spin_lock_bh(&sk->sk_lock.slock);
|
||||
}
|
||||
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
|
||||
__mptcp_error_report(sk);
|
||||
|
||||
/* clear any wmem reservation and errors */
|
||||
__mptcp_update_wmem(sk);
|
||||
@ -3319,7 +3329,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
||||
if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
|
||||
return 0;
|
||||
return EPOLLOUT | EPOLLWRNORM;
|
||||
|
||||
if (sk_stream_is_writeable(sk))
|
||||
return EPOLLOUT | EPOLLWRNORM;
|
||||
@ -3352,9 +3362,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
|
||||
mask |= mptcp_check_readable(msk);
|
||||
mask |= mptcp_check_writeable(msk);
|
||||
}
|
||||
if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
|
||||
mask |= EPOLLHUP;
|
||||
if (sk->sk_shutdown & RCV_SHUTDOWN)
|
||||
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
|
||||
|
||||
/* This barrier is coupled with smp_wmb() in tcp_reset() */
|
||||
smp_rmb();
|
||||
if (sk->sk_err)
|
||||
mask |= EPOLLERR;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
@ -95,6 +95,7 @@
|
||||
#define MPTCP_WORK_CLOSE_SUBFLOW 5
|
||||
#define MPTCP_PUSH_PENDING 6
|
||||
#define MPTCP_CLEAN_UNA 7
|
||||
#define MPTCP_ERROR_REPORT 8
|
||||
|
||||
static inline bool before64(__u64 seq1, __u64 seq2)
|
||||
{
|
||||
@ -233,7 +234,6 @@ struct mptcp_sock {
|
||||
u64 wnd_end;
|
||||
unsigned long timer_ival;
|
||||
u32 token;
|
||||
int rmem_pending;
|
||||
int rmem_released;
|
||||
unsigned long flags;
|
||||
bool can_ack;
|
||||
@ -292,7 +292,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
|
||||
|
||||
static inline int __mptcp_space(const struct sock *sk)
|
||||
{
|
||||
return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
|
||||
return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
|
||||
}
|
||||
|
||||
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
|
||||
@ -325,20 +325,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
|
||||
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
|
||||
}
|
||||
|
||||
static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
|
||||
return NULL;
|
||||
|
||||
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
|
||||
}
|
||||
|
||||
static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
|
||||
{
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
if (msk->snd_una == READ_ONCE(msk->snd_nxt))
|
||||
return NULL;
|
||||
|
||||
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
|
||||
}
|
||||
|
||||
@ -414,6 +407,7 @@ struct mptcp_subflow_context {
|
||||
void (*tcp_data_ready)(struct sock *sk);
|
||||
void (*tcp_state_change)(struct sock *sk);
|
||||
void (*tcp_write_space)(struct sock *sk);
|
||||
void (*tcp_error_report)(struct sock *sk);
|
||||
|
||||
struct rcu_head rcu;
|
||||
};
|
||||
@ -478,6 +472,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
|
||||
sk->sk_data_ready = ctx->tcp_data_ready;
|
||||
sk->sk_state_change = ctx->tcp_state_change;
|
||||
sk->sk_write_space = ctx->tcp_write_space;
|
||||
sk->sk_error_report = ctx->tcp_error_report;
|
||||
|
||||
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
|
||||
}
|
||||
@ -505,6 +500,7 @@ bool mptcp_finish_join(struct sock *sk);
|
||||
bool mptcp_schedule_work(struct sock *sk);
|
||||
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
|
||||
void __mptcp_data_acked(struct sock *sk);
|
||||
void __mptcp_error_report(struct sock *sk);
|
||||
void mptcp_subflow_eof(struct sock *sk);
|
||||
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
|
||||
void __mptcp_flush_join_list(struct mptcp_sock *msk);
|
||||
|
@ -92,7 +92,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
|
||||
return msk;
|
||||
}
|
||||
|
||||
static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
|
||||
static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
|
||||
{
|
||||
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||
|
||||
@ -100,6 +100,22 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
|
||||
subflow_req->mp_join = 0;
|
||||
subflow_req->msk = NULL;
|
||||
mptcp_token_init_request(req);
|
||||
}
|
||||
|
||||
/* Init mptcp request socket.
|
||||
*
|
||||
* Returns an error code if a JOIN has failed and a TCP reset
|
||||
* should be sent.
|
||||
*/
|
||||
static int subflow_check_req(struct request_sock *req,
|
||||
const struct sock *sk_listener,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
|
||||
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||
struct mptcp_options_received mp_opt;
|
||||
|
||||
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
|
||||
|
||||
#ifdef CONFIG_TCP_MD5SIG
|
||||
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
|
||||
@ -109,29 +125,6 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
|
||||
return -EINVAL;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Init mptcp request socket.
|
||||
*
|
||||
* Returns an error code if a JOIN has failed and a TCP reset
|
||||
* should be sent.
|
||||
*/
|
||||
static int subflow_init_req(struct request_sock *req,
|
||||
const struct sock *sk_listener,
|
||||
struct sk_buff *skb)
|
||||
{
|
||||
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
|
||||
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
|
||||
struct mptcp_options_received mp_opt;
|
||||
int ret;
|
||||
|
||||
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
|
||||
|
||||
ret = __subflow_init_req(req, sk_listener);
|
||||
if (ret)
|
||||
return 0;
|
||||
|
||||
mptcp_get_options(skb, &mp_opt);
|
||||
|
||||
if (mp_opt.mp_capable) {
|
||||
@ -205,10 +198,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
|
||||
struct mptcp_options_received mp_opt;
|
||||
int err;
|
||||
|
||||
err = __subflow_init_req(req, sk_listener);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
subflow_init_req(req, sk_listener);
|
||||
mptcp_get_options(skb, &mp_opt);
|
||||
|
||||
if (mp_opt.mp_capable && mp_opt.mp_join)
|
||||
@ -248,12 +238,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
|
||||
int err;
|
||||
|
||||
tcp_rsk(req)->is_mptcp = 1;
|
||||
subflow_init_req(req, sk);
|
||||
|
||||
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
|
||||
if (!dst)
|
||||
return NULL;
|
||||
|
||||
err = subflow_init_req(req, sk, skb);
|
||||
err = subflow_check_req(req, sk, skb);
|
||||
if (err == 0)
|
||||
return dst;
|
||||
|
||||
@ -273,12 +264,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
|
||||
int err;
|
||||
|
||||
tcp_rsk(req)->is_mptcp = 1;
|
||||
subflow_init_req(req, sk);
|
||||
|
||||
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
|
||||
if (!dst)
|
||||
return NULL;
|
||||
|
||||
err = subflow_init_req(req, sk, skb);
|
||||
err = subflow_check_req(req, sk, skb);
|
||||
if (err == 0)
|
||||
return dst;
|
||||
|
||||
@ -1043,6 +1035,46 @@ static void subflow_write_space(struct sock *ssk)
|
||||
/* we take action in __mptcp_clean_una() */
|
||||
}
|
||||
|
||||
void __mptcp_error_report(struct sock *sk)
|
||||
{
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
int err = sock_error(ssk);
|
||||
|
||||
if (!err)
|
||||
continue;
|
||||
|
||||
/* only propagate errors on fallen-back sockets or
|
||||
* on MPC connect
|
||||
*/
|
||||
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
|
||||
continue;
|
||||
|
||||
inet_sk_state_store(sk, inet_sk_state_load(ssk));
|
||||
sk->sk_err = -err;
|
||||
|
||||
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
|
||||
smp_wmb();
|
||||
sk->sk_error_report(sk);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void subflow_error_report(struct sock *ssk)
|
||||
{
|
||||
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
|
||||
|
||||
mptcp_data_lock(sk);
|
||||
if (!sock_owned_by_user(sk))
|
||||
__mptcp_error_report(sk);
|
||||
else
|
||||
set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
|
||||
mptcp_data_unlock(sk);
|
||||
}
|
||||
|
||||
static struct inet_connection_sock_af_ops *
|
||||
subflow_default_af_ops(struct sock *sk)
|
||||
{
|
||||
@ -1352,9 +1384,11 @@ static int subflow_ulp_init(struct sock *sk)
|
||||
ctx->tcp_data_ready = sk->sk_data_ready;
|
||||
ctx->tcp_state_change = sk->sk_state_change;
|
||||
ctx->tcp_write_space = sk->sk_write_space;
|
||||
ctx->tcp_error_report = sk->sk_error_report;
|
||||
sk->sk_data_ready = subflow_data_ready;
|
||||
sk->sk_write_space = subflow_write_space;
|
||||
sk->sk_state_change = subflow_state_change;
|
||||
sk->sk_error_report = subflow_error_report;
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
@ -1407,6 +1441,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
|
||||
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
|
||||
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
|
||||
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
|
||||
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
|
||||
new_ctx->rel_write_seq = 1;
|
||||
new_ctx->tcp_sock = newsk;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user