From bd7c5f983f3185b75cc23bdd5dbc3a676aef3d1e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Mon, 2 May 2016 11:24:52 -0700 Subject: [PATCH] RDS: TCP: Synchronize accept() and connect() paths on t_conn_lock. An arbitration scheme for duelling SYNs is implemented as part of commit 241b271952eb ("RDS-TCP: Reset tcp callbacks if re-using an outgoing socket in rds_tcp_accept_one()") which ensures that both nodes involved will arrive at the same arbitration decision. However, this needs to be synchronized with an outgoing SYN to be generated by rds_tcp_conn_connect(). This commit achieves the synchronization through the t_conn_lock mutex in struct rds_tcp_connection. The rds_conn_state is checked in rds_tcp_conn_connect() after acquiring the t_conn_lock mutex. A SYN is sent out only if the RDS connection is not already UP (an UP would indicate that rds_tcp_accept_one() has completed 3WH, so no SYN needs to be generated). Similarly, the rds_conn_state is checked in rds_tcp_accept_one() after acquiring the t_conn_lock mutex. The only acceptable states (to allow continuation of the arbitration logic) are UP (i.e., outgoing SYN was SYN-ACKed by peer after it sent us the SYN) or CONNECTING (we sent outgoing SYN before we saw incoming SYN). Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 1 + net/rds/tcp.h | 4 ++++ net/rds/tcp_connect.c | 8 ++++++++ net/rds/tcp_listen.c | 30 ++++++++++++++++++++---------- 4 files changed, 33 insertions(+), 10 deletions(-) diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 9134544941c2..86187dad1440 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -216,6 +216,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) if (!tc) return -ENOMEM; + mutex_init(&tc->t_conn_lock); tc->t_sock = NULL; tc->t_tinc = NULL; tc->t_tinc_hdr_rem = sizeof(struct rds_header); diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 64f873c0c6b6..41c228300525 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,10 @@ struct rds_tcp_connection { struct list_head t_tcp_node; struct rds_connection *conn; + /* t_conn_lock synchronizes the connection establishment between + * rds_tcp_accept_one and rds_tcp_conn_connect + */ + struct mutex t_conn_lock; struct socket *t_sock; void *t_orig_write_space; void *t_orig_data_ready; diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5cb16875c460..49a3fcfed360 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -78,7 +78,14 @@ int rds_tcp_conn_connect(struct rds_connection *conn) struct socket *sock = NULL; struct sockaddr_in src, dest; int ret; + struct rds_tcp_connection *tc = conn->c_transport_data; + + mutex_lock(&tc->t_conn_lock); + if (rds_conn_up(conn)) { + mutex_unlock(&tc->t_conn_lock); + return 0; + } ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) @@ -120,6 +127,7 @@ int rds_tcp_conn_connect(struct rds_connection *conn) } out: + mutex_unlock(&tc->t_conn_lock); if (sock) sock_release(sock); return ret; diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 0896187243d6..be263cdf268b 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -76,7 +76,9 @@ int rds_tcp_accept_one(struct socket *sock) struct rds_connection *conn; int ret; struct inet_sock *inet; - struct rds_tcp_connection *rs_tcp; + struct rds_tcp_connection *rs_tcp = NULL; + int conn_state; + struct sock *nsk; ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, @@ -116,6 +118,10 @@ int rds_tcp_accept_one(struct socket *sock) */ rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data; rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING); + mutex_lock(&rs_tcp->t_conn_lock); + conn_state = rds_conn_state(conn); + if (conn_state != RDS_CONN_CONNECTING && conn_state != RDS_CONN_UP) + goto rst_nsk; if (rs_tcp->t_sock) { /* Need to resolve a duelling SYN between peers. * We have an outstanding SYN to this peer, which may @@ -126,14 +132,7 @@ int rds_tcp_accept_one(struct socket *sock) wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); if (ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) { - struct sock *nsk = new_sock->sk; - - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; - ret = 0; - goto out; + goto rst_nsk; } else if (rs_tcp->t_sock) { rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); conn->c_outgoing = 0; @@ -143,8 +142,19 @@ int rds_tcp_accept_one(struct socket *sock) rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; - + goto out; +rst_nsk: + /* reset the newly returned accept sock and bail */ + nsk = new_sock->sk; + rds_tcp_stats_inc(s_tcp_listen_closed_stale); + nsk->sk_user_data = NULL; + nsk->sk_prot->disconnect(nsk, 0); + tcp_done(nsk); + new_sock = NULL; + ret = 0; out: + if (rs_tcp) + mutex_unlock(&rs_tcp->t_conn_lock); if (new_sock) sock_release(new_sock); return ret; -- 2.34.1