xref: /openbmc/linux/net/rds/tcp_connect.c (revision c889a99a)
170041088SAndy Grover /*
2eee2fa6aSKa-Cheong Poon  * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved.
370041088SAndy Grover  *
470041088SAndy Grover  * This software is available to you under a choice of one of two
570041088SAndy Grover  * licenses.  You may choose to be licensed under the terms of the GNU
670041088SAndy Grover  * General Public License (GPL) Version 2, available from the file
770041088SAndy Grover  * COPYING in the main directory of this source tree, or the
870041088SAndy Grover  * OpenIB.org BSD license below:
970041088SAndy Grover  *
1070041088SAndy Grover  *     Redistribution and use in source and binary forms, with or
1170041088SAndy Grover  *     without modification, are permitted provided that the following
1270041088SAndy Grover  *     conditions are met:
1370041088SAndy Grover  *
1470041088SAndy Grover  *      - Redistributions of source code must retain the above
1570041088SAndy Grover  *        copyright notice, this list of conditions and the following
1670041088SAndy Grover  *        disclaimer.
1770041088SAndy Grover  *
1870041088SAndy Grover  *      - Redistributions in binary form must reproduce the above
1970041088SAndy Grover  *        copyright notice, this list of conditions and the following
2070041088SAndy Grover  *        disclaimer in the documentation and/or other materials
2170041088SAndy Grover  *        provided with the distribution.
2270041088SAndy Grover  *
2370041088SAndy Grover  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2470041088SAndy Grover  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2570041088SAndy Grover  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2670041088SAndy Grover  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2770041088SAndy Grover  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2870041088SAndy Grover  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2970041088SAndy Grover  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3070041088SAndy Grover  * SOFTWARE.
3170041088SAndy Grover  *
3270041088SAndy Grover  */
3370041088SAndy Grover #include <linux/kernel.h>
3470041088SAndy Grover #include <linux/in.h>
3570041088SAndy Grover #include <net/tcp.h>
3670041088SAndy Grover 
3770041088SAndy Grover #include "rds.h"
3870041088SAndy Grover #include "tcp.h"
3970041088SAndy Grover 
rds_tcp_state_change(struct sock * sk)4070041088SAndy Grover void rds_tcp_state_change(struct sock *sk)
4170041088SAndy Grover {
4270041088SAndy Grover 	void (*state_change)(struct sock *sk);
43ea3b1ea5SSowmini Varadhan 	struct rds_conn_path *cp;
4470041088SAndy Grover 	struct rds_tcp_connection *tc;
4570041088SAndy Grover 
4638036629SEric Dumazet 	read_lock_bh(&sk->sk_callback_lock);
47ea3b1ea5SSowmini Varadhan 	cp = sk->sk_user_data;
48ea3b1ea5SSowmini Varadhan 	if (!cp) {
4970041088SAndy Grover 		state_change = sk->sk_state_change;
5070041088SAndy Grover 		goto out;
5170041088SAndy Grover 	}
52ea3b1ea5SSowmini Varadhan 	tc = cp->cp_transport_data;
5370041088SAndy Grover 	state_change = tc->t_orig_state_change;
5470041088SAndy Grover 
5570041088SAndy Grover 	rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state);
5670041088SAndy Grover 
5770041088SAndy Grover 	switch (sk->sk_state) {
5870041088SAndy Grover 	/* ignore connecting sockets as they make progress */
5970041088SAndy Grover 	case TCP_SYN_SENT:
6070041088SAndy Grover 	case TCP_SYN_RECV:
6170041088SAndy Grover 		break;
6270041088SAndy Grover 	case TCP_ESTABLISHED:
631a0e100fSSowmini Varadhan 		/* Force the peer to reconnect so that we have the
641a0e100fSSowmini Varadhan 		 * TCP ports going from <smaller-ip>.<transient> to
651a0e100fSSowmini Varadhan 		 * <larger-ip>.<RDS_TCP_PORT>. We avoid marking the
661a0e100fSSowmini Varadhan 		 * RDS connection as RDS_CONN_UP until the reconnect,
671a0e100fSSowmini Varadhan 		 * to avoid RDS datagram loss.
681a0e100fSSowmini Varadhan 		 */
69eee2fa6aSKa-Cheong Poon 		if (rds_addr_cmp(&cp->cp_conn->c_laddr,
70eee2fa6aSKa-Cheong Poon 				 &cp->cp_conn->c_faddr) >= 0 &&
711a0e100fSSowmini Varadhan 		    rds_conn_path_transition(cp, RDS_CONN_CONNECTING,
721a0e100fSSowmini Varadhan 					     RDS_CONN_ERROR)) {
73aed20a53SSowmini Varadhan 			rds_conn_path_drop(cp, false);
741a0e100fSSowmini Varadhan 		} else {
75ea3b1ea5SSowmini Varadhan 			rds_connect_path_complete(cp, RDS_CONN_CONNECTING);
761a0e100fSSowmini Varadhan 		}
7770041088SAndy Grover 		break;
78f711a6aeSSowmini Varadhan 	case TCP_CLOSE_WAIT:
7970041088SAndy Grover 	case TCP_CLOSE:
80aed20a53SSowmini Varadhan 		rds_conn_path_drop(cp, false);
813754fa74SGustavo A. R. Silva 		break;
8270041088SAndy Grover 	default:
8370041088SAndy Grover 		break;
8470041088SAndy Grover 	}
8570041088SAndy Grover out:
8638036629SEric Dumazet 	read_unlock_bh(&sk->sk_callback_lock);
8770041088SAndy Grover 	state_change(sk);
8870041088SAndy Grover }
8970041088SAndy Grover 
rds_tcp_conn_path_connect(struct rds_conn_path * cp)90b04e8554SSowmini Varadhan int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
9170041088SAndy Grover {
9270041088SAndy Grover 	struct socket *sock = NULL;
931e2b44e7SKa-Cheong Poon 	struct sockaddr_in6 sin6;
94eee2fa6aSKa-Cheong Poon 	struct sockaddr_in sin;
95eee2fa6aSKa-Cheong Poon 	struct sockaddr *addr;
96eee2fa6aSKa-Cheong Poon 	int addrlen;
971e2b44e7SKa-Cheong Poon 	bool isv6;
9870041088SAndy Grover 	int ret;
99b04e8554SSowmini Varadhan 	struct rds_connection *conn = cp->cp_conn;
100b04e8554SSowmini Varadhan 	struct rds_tcp_connection *tc = cp->cp_transport_data;
10170041088SAndy Grover 
1025916e2c1SSowmini Varadhan 	/* for multipath rds,we only trigger the connection after
1035916e2c1SSowmini Varadhan 	 * the handshake probe has determined the number of paths.
1045916e2c1SSowmini Varadhan 	 */
1055916e2c1SSowmini Varadhan 	if (cp->cp_index > 0 && cp->cp_conn->c_npaths < 2)
1065916e2c1SSowmini Varadhan 		return -EAGAIN;
1075916e2c1SSowmini Varadhan 
10802105b2cSSowmini Varadhan 	mutex_lock(&tc->t_conn_path_lock);
109bd7c5f98SSowmini Varadhan 
110b04e8554SSowmini Varadhan 	if (rds_conn_path_up(cp)) {
11102105b2cSSowmini Varadhan 		mutex_unlock(&tc->t_conn_path_lock);
112bd7c5f98SSowmini Varadhan 		return 0;
113bd7c5f98SSowmini Varadhan 	}
1141e2b44e7SKa-Cheong Poon 	if (ipv6_addr_v4mapped(&conn->c_laddr)) {
115d5a8ac28SSowmini Varadhan 		ret = sock_create_kern(rds_conn_net(conn), PF_INET,
116d5a8ac28SSowmini Varadhan 				       SOCK_STREAM, IPPROTO_TCP, &sock);
1171e2b44e7SKa-Cheong Poon 		isv6 = false;
1181e2b44e7SKa-Cheong Poon 	} else {
1191e2b44e7SKa-Cheong Poon 		ret = sock_create_kern(rds_conn_net(conn), PF_INET6,
1201e2b44e7SKa-Cheong Poon 				       SOCK_STREAM, IPPROTO_TCP, &sock);
1211e2b44e7SKa-Cheong Poon 		isv6 = true;
1221e2b44e7SKa-Cheong Poon 	}
1231e2b44e7SKa-Cheong Poon 
12470041088SAndy Grover 	if (ret < 0)
12570041088SAndy Grover 		goto out;
12670041088SAndy Grover 
1276997fbd7STetsuo Handa 	if (!rds_tcp_tune(sock)) {
1286997fbd7STetsuo Handa 		ret = -EINVAL;
1296997fbd7STetsuo Handa 		goto out;
1306997fbd7STetsuo Handa 	}
13170041088SAndy Grover 
1321e2b44e7SKa-Cheong Poon 	if (isv6) {
1331e2b44e7SKa-Cheong Poon 		sin6.sin6_family = AF_INET6;
1341e2b44e7SKa-Cheong Poon 		sin6.sin6_addr = conn->c_laddr;
1351e2b44e7SKa-Cheong Poon 		sin6.sin6_port = 0;
1361e2b44e7SKa-Cheong Poon 		sin6.sin6_flowinfo = 0;
1371e2b44e7SKa-Cheong Poon 		sin6.sin6_scope_id = conn->c_dev_if;
1381e2b44e7SKa-Cheong Poon 		addr = (struct sockaddr *)&sin6;
1391e2b44e7SKa-Cheong Poon 		addrlen = sizeof(sin6);
1401e2b44e7SKa-Cheong Poon 	} else {
141eee2fa6aSKa-Cheong Poon 		sin.sin_family = AF_INET;
142eee2fa6aSKa-Cheong Poon 		sin.sin_addr.s_addr = conn->c_laddr.s6_addr32[3];
143eee2fa6aSKa-Cheong Poon 		sin.sin_port = 0;
144eee2fa6aSKa-Cheong Poon 		addr = (struct sockaddr *)&sin;
145eee2fa6aSKa-Cheong Poon 		addrlen = sizeof(sin);
1461e2b44e7SKa-Cheong Poon 	}
14770041088SAndy Grover 
148*c889a99aSJordan Rife 	ret = kernel_bind(sock, addr, addrlen);
14970041088SAndy Grover 	if (ret) {
150eee2fa6aSKa-Cheong Poon 		rdsdebug("bind failed with %d at address %pI6c\n",
1516884b348SJoe Perches 			 ret, &conn->c_laddr);
15270041088SAndy Grover 		goto out;
15370041088SAndy Grover 	}
15470041088SAndy Grover 
1551e2b44e7SKa-Cheong Poon 	if (isv6) {
1561e2b44e7SKa-Cheong Poon 		sin6.sin6_family = AF_INET6;
1571e2b44e7SKa-Cheong Poon 		sin6.sin6_addr = conn->c_faddr;
1581e2b44e7SKa-Cheong Poon 		sin6.sin6_port = htons(RDS_TCP_PORT);
1591e2b44e7SKa-Cheong Poon 		sin6.sin6_flowinfo = 0;
1601e2b44e7SKa-Cheong Poon 		sin6.sin6_scope_id = conn->c_dev_if;
1611e2b44e7SKa-Cheong Poon 		addr = (struct sockaddr *)&sin6;
1621e2b44e7SKa-Cheong Poon 		addrlen = sizeof(sin6);
1631e2b44e7SKa-Cheong Poon 	} else {
164eee2fa6aSKa-Cheong Poon 		sin.sin_family = AF_INET;
165eee2fa6aSKa-Cheong Poon 		sin.sin_addr.s_addr = conn->c_faddr.s6_addr32[3];
166eee2fa6aSKa-Cheong Poon 		sin.sin_port = htons(RDS_TCP_PORT);
167eee2fa6aSKa-Cheong Poon 		addr = (struct sockaddr *)&sin;
168eee2fa6aSKa-Cheong Poon 		addrlen = sizeof(sin);
1691e2b44e7SKa-Cheong Poon 	}
17070041088SAndy Grover 
17170041088SAndy Grover 	/*
17270041088SAndy Grover 	 * once we call connect() we can start getting callbacks and they
17370041088SAndy Grover 	 * own the socket
17470041088SAndy Grover 	 */
175ea3b1ea5SSowmini Varadhan 	rds_tcp_set_callbacks(sock, cp);
17626297b4cSJordan Rife 	ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK);
17770041088SAndy Grover 
178eee2fa6aSKa-Cheong Poon 	rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
17970041088SAndy Grover 	if (ret == -EINPROGRESS)
18070041088SAndy Grover 		ret = 0;
181467fa153SSowmini Varadhan 	if (ret == 0) {
182467fa153SSowmini Varadhan 		rds_tcp_keepalive(sock);
183eb74cc97SHerton R. Krzesinski 		sock = NULL;
184467fa153SSowmini Varadhan 	} else {
185b04e8554SSowmini Varadhan 		rds_tcp_restore_callbacks(sock, cp->cp_transport_data);
186467fa153SSowmini Varadhan 	}
18770041088SAndy Grover 
18870041088SAndy Grover out:
18902105b2cSSowmini Varadhan 	mutex_unlock(&tc->t_conn_path_lock);
19070041088SAndy Grover 	if (sock)
19170041088SAndy Grover 		sock_release(sock);
19270041088SAndy Grover 	return ret;
19370041088SAndy Grover }
19470041088SAndy Grover 
19570041088SAndy Grover /*
19670041088SAndy Grover  * Before killing the tcp socket this needs to serialize with callbacks.  The
19770041088SAndy Grover  * caller has already grabbed the sending sem so we're serialized with other
19870041088SAndy Grover  * senders.
19970041088SAndy Grover  *
20070041088SAndy Grover  * TCP calls the callbacks with the sock lock so we hold it while we reset the
20170041088SAndy Grover  * callbacks to those set by TCP.  Our callbacks won't execute again once we
20270041088SAndy Grover  * hold the sock lock.
20370041088SAndy Grover  */
rds_tcp_conn_path_shutdown(struct rds_conn_path * cp)204226f7a7dSSowmini Varadhan void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
20570041088SAndy Grover {
206226f7a7dSSowmini Varadhan 	struct rds_tcp_connection *tc = cp->cp_transport_data;
20770041088SAndy Grover 	struct socket *sock = tc->t_sock;
20870041088SAndy Grover 
209226f7a7dSSowmini Varadhan 	rdsdebug("shutting down conn %p tc %p sock %p\n",
210226f7a7dSSowmini Varadhan 		 cp->cp_conn, tc, sock);
21170041088SAndy Grover 
21270041088SAndy Grover 	if (sock) {
213ebeeb1adSSowmini Varadhan 		if (rds_destroy_pending(cp->cp_conn))
214c433594cSChristoph Hellwig 			sock_no_linger(sock->sk);
21570041088SAndy Grover 		sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
21670041088SAndy Grover 		lock_sock(sock->sk);
21770041088SAndy Grover 		rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
21870041088SAndy Grover 
21970041088SAndy Grover 		release_sock(sock->sk);
22070041088SAndy Grover 		sock_release(sock);
221ccbd6a5aSJoe Perches 	}
22270041088SAndy Grover 
22370041088SAndy Grover 	if (tc->t_tinc) {
22470041088SAndy Grover 		rds_inc_put(&tc->t_tinc->ti_inc);
22570041088SAndy Grover 		tc->t_tinc = NULL;
22670041088SAndy Grover 	}
22770041088SAndy Grover 	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
22870041088SAndy Grover 	tc->t_tinc_data_rem = 0;
22970041088SAndy Grover }
230