mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-13 23:34:05 +08:00
RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()
The send path needs to be quiesced before resetting callbacks from
rds_tcp_accept_one(), and commit eb19284026
("RDS:TCP: Synchronize
rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves
this using the c_state and RDS_IN_XMIT bit following the pattern
used by rds_conn_shutdown(). However this leaves the possibility
of a race window as shown in the sequence below
take t_conn_lock in rds_tcp_conn_connect
send outgoing syn to peer
drop t_conn_lock in rds_tcp_conn_connect
incoming from peer triggers rds_tcp_accept_one, conn is
marked CONNECTING
wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads
call rds_tcp_reset_callbacks
[.. race-window where incoming syn-ack can cause the conn
to be marked UP from rds_tcp_state_change ..]
lock_sock called from rds_tcp_reset_callbacks, and we set
t_sock to null
As soon as the conn is marked UP in the race-window above, rds_send_xmit()
threads will proceed to rds_tcp_xmit and may encounter a null-pointer
deref on the t_sock.
Given that rds_tcp_state_change() is invoked in softirq context, whereas
rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT
after lock_sock could result in a deadlock with tcp_sendmsg, this
commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which
will prevent a transition to RDS_CONN_UP from rds_tcp_state_change().
Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
0b6f760cff
commit
9c79440e2c
@ -74,6 +74,7 @@ enum {
|
|||||||
RDS_CONN_CONNECTING,
|
RDS_CONN_CONNECTING,
|
||||||
RDS_CONN_DISCONNECTING,
|
RDS_CONN_DISCONNECTING,
|
||||||
RDS_CONN_UP,
|
RDS_CONN_UP,
|
||||||
|
RDS_CONN_RESETTING,
|
||||||
RDS_CONN_ERROR,
|
RDS_CONN_ERROR,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *);
|
|||||||
void rds_shutdown_worker(struct work_struct *);
|
void rds_shutdown_worker(struct work_struct *);
|
||||||
void rds_send_worker(struct work_struct *);
|
void rds_send_worker(struct work_struct *);
|
||||||
void rds_recv_worker(struct work_struct *);
|
void rds_recv_worker(struct work_struct *);
|
||||||
|
void rds_connect_path_complete(struct rds_connection *conn, int curr);
|
||||||
void rds_connect_complete(struct rds_connection *conn);
|
void rds_connect_complete(struct rds_connection *conn);
|
||||||
|
|
||||||
/* transport.c */
|
/* transport.c */
|
||||||
|
@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock,
|
|||||||
* potentially have transitioned to the RDS_CONN_UP state,
|
* potentially have transitioned to the RDS_CONN_UP state,
|
||||||
* so we must quiesce any send threads before resetting
|
* so we must quiesce any send threads before resetting
|
||||||
* c_transport_data. We quiesce these threads by setting
|
* c_transport_data. We quiesce these threads by setting
|
||||||
* cp_state to something other than RDS_CONN_UP, and then
|
* c_state to something other than RDS_CONN_UP, and then
|
||||||
* waiting for any existing threads in rds_send_xmit to
|
* waiting for any existing threads in rds_send_xmit to
|
||||||
* complete release_in_xmit(). (Subsequent threads entering
|
* complete release_in_xmit(). (Subsequent threads entering
|
||||||
* rds_send_xmit() will bail on !rds_conn_up().
|
* rds_send_xmit() will bail on !rds_conn_up().
|
||||||
|
*
|
||||||
|
* However an incoming syn-ack at this point would end up
|
||||||
|
* marking the conn as RDS_CONN_UP, and would again permit
|
||||||
|
* rds_send_xmi() threads through, so ideally we would
|
||||||
|
* synchronize on RDS_CONN_UP after lock_sock(), but cannot
|
||||||
|
* do that: waiting on !RDS_IN_XMIT after lock_sock() may
|
||||||
|
* end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
|
||||||
|
* would not get set. As a result, we set c_state to
|
||||||
|
* RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
|
||||||
|
* cannot mark rds_conn_path_up() in the window before lock_sock()
|
||||||
*/
|
*/
|
||||||
|
atomic_set(&conn->c_state, RDS_CONN_RESETTING);
|
||||||
|
wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
|
||||||
lock_sock(osock->sk);
|
lock_sock(osock->sk);
|
||||||
/* reset receive side state for rds_tcp_data_recv() for osock */
|
/* reset receive side state for rds_tcp_data_recv() for osock */
|
||||||
if (tc->t_tinc) {
|
if (tc->t_tinc) {
|
||||||
|
@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk)
|
|||||||
case TCP_SYN_RECV:
|
case TCP_SYN_RECV:
|
||||||
break;
|
break;
|
||||||
case TCP_ESTABLISHED:
|
case TCP_ESTABLISHED:
|
||||||
rds_connect_complete(conn);
|
rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
|
||||||
break;
|
break;
|
||||||
case TCP_CLOSE_WAIT:
|
case TCP_CLOSE_WAIT:
|
||||||
case TCP_CLOSE:
|
case TCP_CLOSE:
|
||||||
|
@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock)
|
|||||||
!conn->c_outgoing) {
|
!conn->c_outgoing) {
|
||||||
goto rst_nsk;
|
goto rst_nsk;
|
||||||
} else {
|
} else {
|
||||||
atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
|
|
||||||
wait_event(conn->c_waitq,
|
|
||||||
!test_bit(RDS_IN_XMIT, &conn->c_flags));
|
|
||||||
rds_tcp_reset_callbacks(new_sock, conn);
|
rds_tcp_reset_callbacks(new_sock, conn);
|
||||||
conn->c_outgoing = 0;
|
conn->c_outgoing = 0;
|
||||||
|
/* rds_connect_path_complete() marks RDS_CONN_UP */
|
||||||
|
rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
rds_tcp_set_callbacks(new_sock, conn);
|
rds_tcp_set_callbacks(new_sock, conn);
|
||||||
|
rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
|
||||||
}
|
}
|
||||||
rds_connect_complete(conn); /* marks RDS_CONN_UP */
|
|
||||||
new_sock = NULL;
|
new_sock = NULL;
|
||||||
ret = 0;
|
ret = 0;
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -71,9 +71,9 @@
|
|||||||
struct workqueue_struct *rds_wq;
|
struct workqueue_struct *rds_wq;
|
||||||
EXPORT_SYMBOL_GPL(rds_wq);
|
EXPORT_SYMBOL_GPL(rds_wq);
|
||||||
|
|
||||||
void rds_connect_complete(struct rds_connection *conn)
|
void rds_connect_path_complete(struct rds_connection *conn, int curr)
|
||||||
{
|
{
|
||||||
if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
|
if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
|
||||||
printk(KERN_WARNING "%s: Cannot transition to state UP, "
|
printk(KERN_WARNING "%s: Cannot transition to state UP, "
|
||||||
"current state is %d\n",
|
"current state is %d\n",
|
||||||
__func__,
|
__func__,
|
||||||
@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn)
|
|||||||
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
|
queue_delayed_work(rds_wq, &conn->c_send_w, 0);
|
||||||
queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
|
queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(rds_connect_path_complete);
|
||||||
|
|
||||||
|
void rds_connect_complete(struct rds_connection *conn)
|
||||||
|
{
|
||||||
|
rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
|
||||||
|
}
|
||||||
EXPORT_SYMBOL_GPL(rds_connect_complete);
|
EXPORT_SYMBOL_GPL(rds_connect_complete);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user