From 78eb3a5379a52f291556483ea55b8a37e2ed4d5b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 17 Jan 2014 09:50:03 +0800 Subject: [PATCH 1/5] tipc: standardize connect routine Comparing the behaviour of how to wait for events in TIPC connect() with other stacks, the TIPC implementation might be perceived as different, and sometimes even incorrect. For instance, as both sock->state and sk_sleep() are directly fed to wait_event_interruptible_timeout() as its arguments, and socket lock has to be released before we call wait_event_interruptible_timeout(), the two variables associated with socket are exposed out of socket lock protection, thereby probably getting stale values so that the process of calling connect() cannot be woken up exactly even if correct event arrives or it is woken up improperly even if the wake condition is not satisfied in practice. Therefore, standardizing its behaviour with sk_stream_wait_connect routine can avoid these risks. Additionally the implementation of connect routine is simplified as a whole, allowing it to return correct values in all different cases. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 63 +++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c8341d1f995e..b2ae25ae3038 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1438,6 +1438,28 @@ static void wakeupdispatch(struct tipc_port *tport) sk->sk_write_space(sk); } +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (!*timeo_p) + return -ETIMEDOUT; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + /** * connect - establish a connection to another TIPC port * @sock: socket structure @@ -1453,7 +1475,8 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, struct sock *sk = sock->sk; struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest; struct msghdr m = {NULL,}; - unsigned int timeout; + long timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; + socket_state previous; int res; lock_sock(sk); @@ -1475,8 +1498,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, goto exit; } - timeout = (flags & O_NONBLOCK) ? 0 : tipc_sk(sk)->conn_timeout; - + previous = sock->state; switch (sock->state) { case SS_UNCONNECTED: /* Send a 'SYN-' to destination */ @@ -1498,41 +1520,22 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen, * case is EINPROGRESS, rather than EALREADY. */ res = -EINPROGRESS; - break; case SS_CONNECTING: - res = -EALREADY; + if (previous == SS_CONNECTING) + res = -EALREADY; + if (!timeout) + goto exit; + timeout = msecs_to_jiffies(timeout); + /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ + res = tipc_wait_for_connect(sock, &timeout); break; case SS_CONNECTED: res = -EISCONN; break; default: res = -EINVAL; - goto exit; + break; } - - if (sock->state == SS_CONNECTING) { - if (!timeout) - goto exit; - - /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ - release_sock(sk); - res = wait_event_interruptible_timeout(*sk_sleep(sk), - sock->state != SS_CONNECTING, - timeout ? (long)msecs_to_jiffies(timeout) - : MAX_SCHEDULE_TIMEOUT); - if (res <= 0) { - if (res == 0) - res = -ETIMEDOUT; - return res; - } - lock_sock(sk); - } - - if (unlikely(sock->state == SS_DISCONNECTING)) - res = sock_error(sk); - else - res = 0; - exit: release_sock(sk); return res; From 6398e23cdb1d807132c1d3d007d6b1ec87b511af Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 17 Jan 2014 09:50:04 +0800 Subject: [PATCH 2/5] tipc: standardize accept routine Comparing the behaviour of how to wait for events in TIPC accept() with other stacks, the TIPC implementation might be perceived as different, and sometimes even incorrect. As sk_sleep() and sk->sk_receive_queue variables associated with socket are not protected by socket lock, the process of calling accept() may be woken up improperly or sometimes cannot be woken up at all. After standardizing it with inet_csk_wait_for_connect routine, we can get benefits including: avoiding 'thundering herd' phenomenon, adding a timeout mechanism for accept(), coping with a pending signal, and having sk_sleep() and sk->sk_receive_queue being always protected within socket lock scope and so on. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 54 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b2ae25ae3038..008f6fdf95c3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1566,6 +1566,42 @@ static int listen(struct socket *sock, int len) return res; } +static int tipc_wait_for_accept(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int err; + + /* True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + */ + for (;;) { + prepare_to_wait_exclusive(sk_sleep(sk), &wait, + TASK_INTERRUPTIBLE); + if (skb_queue_empty(&sk->sk_receive_queue)) { + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = -EINVAL; + if (sock->state != SS_LISTENING) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + err = -EAGAIN; + if (!timeo) + break; + } + finish_wait(sk_sleep(sk), &wait); + return err; +} + /** * accept - wait for connection request * @sock: listening socket @@ -1582,7 +1618,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) struct tipc_port *new_tport; struct tipc_msg *msg; u32 new_ref; - + long timeo; int res; lock_sock(sk); @@ -1592,18 +1628,10 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags) goto exit; } - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (flags & O_NONBLOCK) { - res = -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - res = wait_event_interruptible(*sk_sleep(sk), - (!skb_queue_empty(&sk->sk_receive_queue))); - lock_sock(sk); - if (res) - goto exit; - } + timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + res = tipc_wait_for_accept(sock, timeo); + if (res) + goto exit; buf = skb_peek(&sk->sk_receive_queue); From 3f40504f7ee419eda0068e55e0ee2622e1fce40b Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 17 Jan 2014 09:50:05 +0800 Subject: [PATCH 3/5] tipc: standardize sendmsg routine of connectionless socket Comparing the behaviour of how to wait for events in TIPC sendmsg() with other stacks, the TIPC implementation might be perceived as different, and sometimes even incorrect. For instance, sk_sleep() and tport->congested variables associated with socket are exposed without socket lock protection while wait_event_interruptible_timeout() accesses them. So standardizing it with similar implementation in other stacks can help us correct these errors which the process of calling sendmsg() cannot be woken up event if an expected event arrive at socket or improperly woken up although the wake condition doesn't match. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 008f6fdf95c3..3e019737e4b3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -567,6 +567,31 @@ static int dest_name_check(struct sockaddr_tipc *dest, struct msghdr *m) return 0; } +static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + struct tipc_port *tport = tipc_sk_port(sk); + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + if (!*timeo_p) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, !tport->congested); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + /** * send_msg - send message in connectionless manner * @iocb: if NULL, indicates that socket lock is already held @@ -588,7 +613,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; int needs_conn; - long timeout_val; + long timeo; int res = -EINVAL; if (unlikely(!dest)) @@ -625,8 +650,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, reject_rx_queue(sk); } - timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); - + timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); do { if (dest->addrtype == TIPC_ADDR_NAME) { res = dest_name_check(dest, m); @@ -660,14 +684,9 @@ static int send_msg(struct kiocb *iocb, struct socket *sock, sock->state = SS_CONNECTING; break; } - if (timeout_val <= 0L) { - res = timeout_val ? timeout_val : -EWOULDBLOCK; + res = tipc_wait_for_sndmsg(sock, &timeo); + if (res) break; - } - release_sock(sk); - timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), - !tport->congested, timeout_val); - lock_sock(sk); } while (1); exit: From 391a6dd1da3979be7b6fc0d9ded1b0b906bd068f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 17 Jan 2014 09:50:06 +0800 Subject: [PATCH 4/5] tipc: standardize sendmsg routine of connected socket Standardize the behaviour of waiting for events in TIPC send_packet() so that all variables of socket or port structures are protected within socket lock, allowing the process of calling sendmsg() to be woken up at appropriate time. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 60 ++++++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 19 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3e019737e4b3..c4803101fbdf 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -695,6 +695,34 @@ exit: return res; } +static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) +{ + struct sock *sk = sock->sk; + struct tipc_port *tport = tipc_sk_port(sk); + DEFINE_WAIT(wait); + int done; + + do { + int err = sock_error(sk); + if (err) + return err; + if (sock->state == SS_DISCONNECTING) + return -EPIPE; + else if (sock->state != SS_CONNECTED) + return -ENOTCONN; + if (!*timeo_p) + return -EAGAIN; + if (signal_pending(current)) + return sock_intr_errno(*timeo_p); + + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, timeo_p, + (!tport->congested || !tport->connected)); + finish_wait(sk_sleep(sk), &wait); + } while (!done); + return 0; +} + /** * send_packet - send a connection-oriented message * @iocb: if NULL, indicates that socket lock is already held @@ -712,8 +740,8 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct tipc_port *tport = tipc_sk_port(sk); struct sockaddr_tipc *dest = (struct sockaddr_tipc *)m->msg_name; - long timeout_val; - int res; + int res = -EINVAL; + long timeo; /* Handle implied connection establishment */ if (unlikely(dest)) @@ -725,30 +753,24 @@ static int send_packet(struct kiocb *iocb, struct socket *sock, if (iocb) lock_sock(sk); - timeout_val = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + if (unlikely(sock->state != SS_CONNECTED)) { + if (sock->state == SS_DISCONNECTING) + res = -EPIPE; + else + res = -ENOTCONN; + goto exit; + } + timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); do { - if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) - res = -EPIPE; - else - res = -ENOTCONN; - break; - } - res = tipc_send(tport->ref, m->msg_iov, total_len); if (likely(res != -ELINKCONG)) break; - if (timeout_val <= 0L) { - res = timeout_val ? timeout_val : -EWOULDBLOCK; + res = tipc_wait_for_sndpkt(sock, &timeo); + if (res) break; - } - release_sock(sk); - timeout_val = wait_event_interruptible_timeout(*sk_sleep(sk), - (!tport->congested || !tport->connected), timeout_val); - lock_sock(sk); } while (1); - +exit: if (iocb) release_sock(sk); return res; From 9bbb4ecc681968b7cf781e08e3918604c3b699d3 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Fri, 17 Jan 2014 09:50:07 +0800 Subject: [PATCH 5/5] tipc: standardize recvmsg routine Standardize the behaviour of waiting for events in TIPC recvmsg() so that all variables of socket or port structures are protected within socket lock, allowing the process of calling recvmsg() to be woken up at appropriate time. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 80 +++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 38 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c4803101fbdf..eab17eb9ca1d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -55,9 +55,6 @@ struct tipc_sock { #define tipc_sk(sk) ((struct tipc_sock *)(sk)) #define tipc_sk_port(sk) (tipc_sk(sk)->p) -#define tipc_rx_ready(sock) (!skb_queue_empty(&sock->sk->sk_receive_queue) || \ - (sock->state == SS_DISCONNECTING)) - static int backlog_rcv(struct sock *sk, struct sk_buff *skb); static u32 dispatch(struct tipc_port *tport, struct sk_buff *buf); static void wakeupdispatch(struct tipc_port *tport); @@ -994,6 +991,37 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg, return 0; } +static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo) +{ + struct sock *sk = sock->sk; + DEFINE_WAIT(wait); + int err; + + for (;;) { + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + if (skb_queue_empty(&sk->sk_receive_queue)) { + if (sock->state == SS_DISCONNECTING) { + err = -ENOTCONN; + break; + } + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + } + err = 0; + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + err = -EAGAIN; + if (!timeo) + break; + } + finish_wait(sk_sleep(sk), &wait); + return err; +} + /** * recv_msg - receive packet-oriented message * @iocb: (unused) @@ -1013,7 +1041,7 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; - long timeout; + long timeo; unsigned int sz; u32 err; int res; @@ -1029,25 +1057,13 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { - res = -ENOTCONN; - goto exit; - } - if (timeout <= 0L) { - res = timeout ? timeout : -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - timeout = wait_event_interruptible_timeout(*sk_sleep(sk), - tipc_rx_ready(sock), - timeout); - lock_sock(sk); - } + res = tipc_wait_for_rcvmsg(sock, timeo); + if (res) + goto exit; /* Look at first message in receive queue */ buf = skb_peek(&sk->sk_receive_queue); @@ -1119,7 +1135,7 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, struct tipc_port *tport = tipc_sk_port(sk); struct sk_buff *buf; struct tipc_msg *msg; - long timeout; + long timeo; unsigned int sz; int sz_to_copy, target, needed; int sz_copied = 0; @@ -1132,31 +1148,19 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, lock_sock(sk); - if (unlikely((sock->state == SS_UNCONNECTED))) { + if (unlikely(sock->state == SS_UNCONNECTED)) { res = -ENOTCONN; goto exit; } target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: /* Look for a message in receive queue; wait if necessary */ - while (skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { - res = -ENOTCONN; - goto exit; - } - if (timeout <= 0L) { - res = timeout ? timeout : -EWOULDBLOCK; - goto exit; - } - release_sock(sk); - timeout = wait_event_interruptible_timeout(*sk_sleep(sk), - tipc_rx_ready(sock), - timeout); - lock_sock(sk); - } + res = tipc_wait_for_rcvmsg(sock, timeo); + if (res) + goto exit; /* Look at first message in receive queue */ buf = skb_peek(&sk->sk_receive_queue);