xref: /openbmc/linux/net/mptcp/protocol.c (revision 648ef4b88673dadb8463bf0d4b10fbf33d55def8)
1f870fa0bSMat Martineau // SPDX-License-Identifier: GPL-2.0
2f870fa0bSMat Martineau /* Multipath TCP
3f870fa0bSMat Martineau  *
4f870fa0bSMat Martineau  * Copyright (c) 2017 - 2019, Intel Corporation.
5f870fa0bSMat Martineau  */
6f870fa0bSMat Martineau 
7f870fa0bSMat Martineau #define pr_fmt(fmt) "MPTCP: " fmt
8f870fa0bSMat Martineau 
9f870fa0bSMat Martineau #include <linux/kernel.h>
10f870fa0bSMat Martineau #include <linux/module.h>
11f870fa0bSMat Martineau #include <linux/netdevice.h>
12f870fa0bSMat Martineau #include <net/sock.h>
13f870fa0bSMat Martineau #include <net/inet_common.h>
14f870fa0bSMat Martineau #include <net/inet_hashtables.h>
15f870fa0bSMat Martineau #include <net/protocol.h>
16f870fa0bSMat Martineau #include <net/tcp.h>
17cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
18cf7da0d6SPeter Krystad #include <net/transp_v6.h>
19cf7da0d6SPeter Krystad #endif
20f870fa0bSMat Martineau #include <net/mptcp.h>
21f870fa0bSMat Martineau #include "protocol.h"
22f870fa0bSMat Martineau 
232303f994SPeter Krystad #define MPTCP_SAME_STATE TCP_MAX_STATES
242303f994SPeter Krystad 
252303f994SPeter Krystad /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not
262303f994SPeter Krystad  * completed yet or has failed, return the subflow socket.
272303f994SPeter Krystad  * Otherwise return NULL.
282303f994SPeter Krystad  */
292303f994SPeter Krystad static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk)
302303f994SPeter Krystad {
31cec37a6eSPeter Krystad 	if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack)
322303f994SPeter Krystad 		return NULL;
332303f994SPeter Krystad 
342303f994SPeter Krystad 	return msk->subflow;
352303f994SPeter Krystad }
362303f994SPeter Krystad 
37cec37a6eSPeter Krystad /* if msk has a single subflow, and the mp_capable handshake is failed,
38cec37a6eSPeter Krystad  * return it.
39cec37a6eSPeter Krystad  * Otherwise returns NULL
40cec37a6eSPeter Krystad  */
41cec37a6eSPeter Krystad static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk)
42cec37a6eSPeter Krystad {
43cec37a6eSPeter Krystad 	struct socket *ssock = __mptcp_nmpc_socket(msk);
44cec37a6eSPeter Krystad 
45cec37a6eSPeter Krystad 	sock_owned_by_me((const struct sock *)msk);
46cec37a6eSPeter Krystad 
47cec37a6eSPeter Krystad 	if (!ssock || sk_is_mptcp(ssock->sk))
48cec37a6eSPeter Krystad 		return NULL;
49cec37a6eSPeter Krystad 
50cec37a6eSPeter Krystad 	return ssock;
51cec37a6eSPeter Krystad }
52cec37a6eSPeter Krystad 
532303f994SPeter Krystad static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk)
542303f994SPeter Krystad {
552303f994SPeter Krystad 	return ((struct sock *)msk)->sk_state == TCP_CLOSE;
562303f994SPeter Krystad }
572303f994SPeter Krystad 
582303f994SPeter Krystad static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state)
592303f994SPeter Krystad {
602303f994SPeter Krystad 	struct mptcp_subflow_context *subflow;
612303f994SPeter Krystad 	struct sock *sk = (struct sock *)msk;
622303f994SPeter Krystad 	struct socket *ssock;
632303f994SPeter Krystad 	int err;
642303f994SPeter Krystad 
652303f994SPeter Krystad 	ssock = __mptcp_nmpc_socket(msk);
662303f994SPeter Krystad 	if (ssock)
672303f994SPeter Krystad 		goto set_state;
682303f994SPeter Krystad 
692303f994SPeter Krystad 	if (!__mptcp_can_create_subflow(msk))
702303f994SPeter Krystad 		return ERR_PTR(-EINVAL);
712303f994SPeter Krystad 
722303f994SPeter Krystad 	err = mptcp_subflow_create_socket(sk, &ssock);
732303f994SPeter Krystad 	if (err)
742303f994SPeter Krystad 		return ERR_PTR(err);
752303f994SPeter Krystad 
762303f994SPeter Krystad 	msk->subflow = ssock;
772303f994SPeter Krystad 	subflow = mptcp_subflow_ctx(ssock->sk);
78cec37a6eSPeter Krystad 	list_add(&subflow->node, &msk->conn_list);
792303f994SPeter Krystad 	subflow->request_mptcp = 1;
802303f994SPeter Krystad 
812303f994SPeter Krystad set_state:
822303f994SPeter Krystad 	if (state != MPTCP_SAME_STATE)
832303f994SPeter Krystad 		inet_sk_state_store(sk, state);
842303f994SPeter Krystad 	return ssock;
852303f994SPeter Krystad }
862303f994SPeter Krystad 
87cec37a6eSPeter Krystad static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
88cec37a6eSPeter Krystad {
89cec37a6eSPeter Krystad 	struct mptcp_subflow_context *subflow;
90cec37a6eSPeter Krystad 
91cec37a6eSPeter Krystad 	sock_owned_by_me((const struct sock *)msk);
92cec37a6eSPeter Krystad 
93cec37a6eSPeter Krystad 	mptcp_for_each_subflow(msk, subflow) {
94cec37a6eSPeter Krystad 		return mptcp_subflow_tcp_sock(subflow);
95cec37a6eSPeter Krystad 	}
96cec37a6eSPeter Krystad 
97cec37a6eSPeter Krystad 	return NULL;
98cec37a6eSPeter Krystad }
99cec37a6eSPeter Krystad 
1006d0060f6SMat Martineau static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
1016d0060f6SMat Martineau {
1026d0060f6SMat Martineau 	if (!msk->cached_ext)
1036d0060f6SMat Martineau 		msk->cached_ext = __skb_ext_alloc();
1046d0060f6SMat Martineau 
1056d0060f6SMat Martineau 	return !!msk->cached_ext;
1066d0060f6SMat Martineau }
1076d0060f6SMat Martineau 
1086d0060f6SMat Martineau static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
1096d0060f6SMat Martineau 			      struct msghdr *msg, long *timeo)
1106d0060f6SMat Martineau {
1116d0060f6SMat Martineau 	int mss_now = 0, size_goal = 0, ret = 0;
1126d0060f6SMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
1136d0060f6SMat Martineau 	struct mptcp_ext *mpext = NULL;
1146d0060f6SMat Martineau 	struct page_frag *pfrag;
1156d0060f6SMat Martineau 	struct sk_buff *skb;
1166d0060f6SMat Martineau 	size_t psize;
1176d0060f6SMat Martineau 
1186d0060f6SMat Martineau 	/* use the mptcp page cache so that we can easily move the data
1196d0060f6SMat Martineau 	 * from one substream to another, but do per subflow memory accounting
1206d0060f6SMat Martineau 	 */
1216d0060f6SMat Martineau 	pfrag = sk_page_frag(sk);
1226d0060f6SMat Martineau 	while (!sk_page_frag_refill(ssk, pfrag) ||
1236d0060f6SMat Martineau 	       !mptcp_ext_cache_refill(msk)) {
1246d0060f6SMat Martineau 		ret = sk_stream_wait_memory(ssk, timeo);
1256d0060f6SMat Martineau 		if (ret)
1266d0060f6SMat Martineau 			return ret;
1276d0060f6SMat Martineau 	}
1286d0060f6SMat Martineau 
1296d0060f6SMat Martineau 	/* compute copy limit */
1306d0060f6SMat Martineau 	mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
1316d0060f6SMat Martineau 	psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
1326d0060f6SMat Martineau 
1336d0060f6SMat Martineau 	pr_debug("left=%zu", msg_data_left(msg));
1346d0060f6SMat Martineau 	psize = copy_page_from_iter(pfrag->page, pfrag->offset,
1356d0060f6SMat Martineau 				    min_t(size_t, msg_data_left(msg), psize),
1366d0060f6SMat Martineau 				    &msg->msg_iter);
1376d0060f6SMat Martineau 	pr_debug("left=%zu", msg_data_left(msg));
1386d0060f6SMat Martineau 	if (!psize)
1396d0060f6SMat Martineau 		return -EINVAL;
1406d0060f6SMat Martineau 
1416d0060f6SMat Martineau 	/* Mark the end of the previous write so the beginning of the
1426d0060f6SMat Martineau 	 * next write (with its own mptcp skb extension data) is not
1436d0060f6SMat Martineau 	 * collapsed.
1446d0060f6SMat Martineau 	 */
1456d0060f6SMat Martineau 	skb = tcp_write_queue_tail(ssk);
1466d0060f6SMat Martineau 	if (skb)
1476d0060f6SMat Martineau 		TCP_SKB_CB(skb)->eor = 1;
1486d0060f6SMat Martineau 
1496d0060f6SMat Martineau 	ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
1506d0060f6SMat Martineau 			       msg->msg_flags | MSG_SENDPAGE_NOTLAST);
1516d0060f6SMat Martineau 	if (ret <= 0)
1526d0060f6SMat Martineau 		return ret;
1536d0060f6SMat Martineau 	if (unlikely(ret < psize))
1546d0060f6SMat Martineau 		iov_iter_revert(&msg->msg_iter, psize - ret);
1556d0060f6SMat Martineau 
1566d0060f6SMat Martineau 	skb = tcp_write_queue_tail(ssk);
1576d0060f6SMat Martineau 	mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
1586d0060f6SMat Martineau 	msk->cached_ext = NULL;
1596d0060f6SMat Martineau 
1606d0060f6SMat Martineau 	memset(mpext, 0, sizeof(*mpext));
1616d0060f6SMat Martineau 	mpext->data_seq = msk->write_seq;
1626d0060f6SMat Martineau 	mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
1636d0060f6SMat Martineau 	mpext->data_len = ret;
1646d0060f6SMat Martineau 	mpext->use_map = 1;
1656d0060f6SMat Martineau 	mpext->dsn64 = 1;
1666d0060f6SMat Martineau 
1676d0060f6SMat Martineau 	pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
1686d0060f6SMat Martineau 		 mpext->data_seq, mpext->subflow_seq, mpext->data_len,
1696d0060f6SMat Martineau 		 mpext->dsn64);
1706d0060f6SMat Martineau 
1716d0060f6SMat Martineau 	pfrag->offset += ret;
1726d0060f6SMat Martineau 	msk->write_seq += ret;
1736d0060f6SMat Martineau 	mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
1746d0060f6SMat Martineau 
1756d0060f6SMat Martineau 	tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
1766d0060f6SMat Martineau 	return ret;
1776d0060f6SMat Martineau }
1786d0060f6SMat Martineau 
179f870fa0bSMat Martineau static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
180f870fa0bSMat Martineau {
181f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
182cec37a6eSPeter Krystad 	struct socket *ssock;
1836d0060f6SMat Martineau 	size_t copied = 0;
184cec37a6eSPeter Krystad 	struct sock *ssk;
1856d0060f6SMat Martineau 	int ret = 0;
1866d0060f6SMat Martineau 	long timeo;
187f870fa0bSMat Martineau 
188f870fa0bSMat Martineau 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
189f870fa0bSMat Martineau 		return -EOPNOTSUPP;
190f870fa0bSMat Martineau 
191cec37a6eSPeter Krystad 	lock_sock(sk);
192cec37a6eSPeter Krystad 	ssock = __mptcp_tcp_fallback(msk);
193cec37a6eSPeter Krystad 	if (ssock) {
194cec37a6eSPeter Krystad 		pr_debug("fallback passthrough");
195cec37a6eSPeter Krystad 		ret = sock_sendmsg(ssock, msg);
196cec37a6eSPeter Krystad 		release_sock(sk);
197cec37a6eSPeter Krystad 		return ret;
198cec37a6eSPeter Krystad 	}
199cec37a6eSPeter Krystad 
2006d0060f6SMat Martineau 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
2016d0060f6SMat Martineau 
202cec37a6eSPeter Krystad 	ssk = mptcp_subflow_get(msk);
203cec37a6eSPeter Krystad 	if (!ssk) {
204cec37a6eSPeter Krystad 		release_sock(sk);
205cec37a6eSPeter Krystad 		return -ENOTCONN;
206cec37a6eSPeter Krystad 	}
207cec37a6eSPeter Krystad 
2086d0060f6SMat Martineau 	pr_debug("conn_list->subflow=%p", ssk);
209cec37a6eSPeter Krystad 
2106d0060f6SMat Martineau 	lock_sock(ssk);
2116d0060f6SMat Martineau 	while (msg_data_left(msg)) {
2126d0060f6SMat Martineau 		ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
2136d0060f6SMat Martineau 		if (ret < 0)
2146d0060f6SMat Martineau 			break;
2156d0060f6SMat Martineau 
2166d0060f6SMat Martineau 		copied += ret;
2176d0060f6SMat Martineau 	}
2186d0060f6SMat Martineau 
2196d0060f6SMat Martineau 	if (copied > 0)
2206d0060f6SMat Martineau 		ret = copied;
2216d0060f6SMat Martineau 
2226d0060f6SMat Martineau 	release_sock(ssk);
223cec37a6eSPeter Krystad 	release_sock(sk);
224cec37a6eSPeter Krystad 	return ret;
225f870fa0bSMat Martineau }
226f870fa0bSMat Martineau 
227*648ef4b8SMat Martineau int mptcp_read_actor(read_descriptor_t *desc, struct sk_buff *skb,
228*648ef4b8SMat Martineau 		     unsigned int offset, size_t len)
229*648ef4b8SMat Martineau {
230*648ef4b8SMat Martineau 	struct mptcp_read_arg *arg = desc->arg.data;
231*648ef4b8SMat Martineau 	size_t copy_len;
232*648ef4b8SMat Martineau 
233*648ef4b8SMat Martineau 	copy_len = min(desc->count, len);
234*648ef4b8SMat Martineau 
235*648ef4b8SMat Martineau 	if (likely(arg->msg)) {
236*648ef4b8SMat Martineau 		int err;
237*648ef4b8SMat Martineau 
238*648ef4b8SMat Martineau 		err = skb_copy_datagram_msg(skb, offset, arg->msg, copy_len);
239*648ef4b8SMat Martineau 		if (err) {
240*648ef4b8SMat Martineau 			pr_debug("error path");
241*648ef4b8SMat Martineau 			desc->error = err;
242*648ef4b8SMat Martineau 			return err;
243*648ef4b8SMat Martineau 		}
244*648ef4b8SMat Martineau 	} else {
245*648ef4b8SMat Martineau 		pr_debug("Flushing skb payload");
246*648ef4b8SMat Martineau 	}
247*648ef4b8SMat Martineau 
248*648ef4b8SMat Martineau 	desc->count -= copy_len;
249*648ef4b8SMat Martineau 
250*648ef4b8SMat Martineau 	pr_debug("consumed %zu bytes, %zu left", copy_len, desc->count);
251*648ef4b8SMat Martineau 	return copy_len;
252*648ef4b8SMat Martineau }
253*648ef4b8SMat Martineau 
254f870fa0bSMat Martineau static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
255f870fa0bSMat Martineau 			 int nonblock, int flags, int *addr_len)
256f870fa0bSMat Martineau {
257f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
258cec37a6eSPeter Krystad 	struct socket *ssock;
259cec37a6eSPeter Krystad 	struct sock *ssk;
260cec37a6eSPeter Krystad 	int copied = 0;
261f870fa0bSMat Martineau 
262f870fa0bSMat Martineau 	if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT))
263f870fa0bSMat Martineau 		return -EOPNOTSUPP;
264f870fa0bSMat Martineau 
265cec37a6eSPeter Krystad 	lock_sock(sk);
266cec37a6eSPeter Krystad 	ssock = __mptcp_tcp_fallback(msk);
267cec37a6eSPeter Krystad 	if (ssock) {
268cec37a6eSPeter Krystad 		pr_debug("fallback-read subflow=%p",
269cec37a6eSPeter Krystad 			 mptcp_subflow_ctx(ssock->sk));
270cec37a6eSPeter Krystad 		copied = sock_recvmsg(ssock, msg, flags);
271cec37a6eSPeter Krystad 		release_sock(sk);
272cec37a6eSPeter Krystad 		return copied;
273cec37a6eSPeter Krystad 	}
274cec37a6eSPeter Krystad 
275cec37a6eSPeter Krystad 	ssk = mptcp_subflow_get(msk);
276cec37a6eSPeter Krystad 	if (!ssk) {
277cec37a6eSPeter Krystad 		release_sock(sk);
278cec37a6eSPeter Krystad 		return -ENOTCONN;
279cec37a6eSPeter Krystad 	}
280cec37a6eSPeter Krystad 
281cec37a6eSPeter Krystad 	copied = sock_recvmsg(ssk->sk_socket, msg, flags);
282cec37a6eSPeter Krystad 
283cec37a6eSPeter Krystad 	release_sock(sk);
284cec37a6eSPeter Krystad 
285cec37a6eSPeter Krystad 	return copied;
286cec37a6eSPeter Krystad }
287cec37a6eSPeter Krystad 
288cec37a6eSPeter Krystad /* subflow sockets can be either outgoing (connect) or incoming
289cec37a6eSPeter Krystad  * (accept).
290cec37a6eSPeter Krystad  *
291cec37a6eSPeter Krystad  * Outgoing subflows use in-kernel sockets.
292cec37a6eSPeter Krystad  * Incoming subflows do not have their own 'struct socket' allocated,
293cec37a6eSPeter Krystad  * so we need to use tcp_close() after detaching them from the mptcp
294cec37a6eSPeter Krystad  * parent socket.
295cec37a6eSPeter Krystad  */
296cec37a6eSPeter Krystad static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
297cec37a6eSPeter Krystad 			      struct mptcp_subflow_context *subflow,
298cec37a6eSPeter Krystad 			      long timeout)
299cec37a6eSPeter Krystad {
300cec37a6eSPeter Krystad 	struct socket *sock = READ_ONCE(ssk->sk_socket);
301cec37a6eSPeter Krystad 
302cec37a6eSPeter Krystad 	list_del(&subflow->node);
303cec37a6eSPeter Krystad 
304cec37a6eSPeter Krystad 	if (sock && sock != sk->sk_socket) {
305cec37a6eSPeter Krystad 		/* outgoing subflow */
306cec37a6eSPeter Krystad 		sock_release(sock);
307cec37a6eSPeter Krystad 	} else {
308cec37a6eSPeter Krystad 		/* incoming subflow */
309cec37a6eSPeter Krystad 		tcp_close(ssk, timeout);
310cec37a6eSPeter Krystad 	}
311f870fa0bSMat Martineau }
312f870fa0bSMat Martineau 
313f870fa0bSMat Martineau static int mptcp_init_sock(struct sock *sk)
314f870fa0bSMat Martineau {
315cec37a6eSPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
316cec37a6eSPeter Krystad 
317cec37a6eSPeter Krystad 	INIT_LIST_HEAD(&msk->conn_list);
318cec37a6eSPeter Krystad 
319f870fa0bSMat Martineau 	return 0;
320f870fa0bSMat Martineau }
321f870fa0bSMat Martineau 
32221498490SPeter Krystad static void mptcp_subflow_shutdown(struct sock *ssk, int how)
32321498490SPeter Krystad {
32421498490SPeter Krystad 	lock_sock(ssk);
32521498490SPeter Krystad 
32621498490SPeter Krystad 	switch (ssk->sk_state) {
32721498490SPeter Krystad 	case TCP_LISTEN:
32821498490SPeter Krystad 		if (!(how & RCV_SHUTDOWN))
32921498490SPeter Krystad 			break;
33021498490SPeter Krystad 		/* fall through */
33121498490SPeter Krystad 	case TCP_SYN_SENT:
33221498490SPeter Krystad 		tcp_disconnect(ssk, O_NONBLOCK);
33321498490SPeter Krystad 		break;
33421498490SPeter Krystad 	default:
33521498490SPeter Krystad 		ssk->sk_shutdown |= how;
33621498490SPeter Krystad 		tcp_shutdown(ssk, how);
33721498490SPeter Krystad 		break;
33821498490SPeter Krystad 	}
33921498490SPeter Krystad 
34021498490SPeter Krystad 	/* Wake up anyone sleeping in poll. */
34121498490SPeter Krystad 	ssk->sk_state_change(ssk);
34221498490SPeter Krystad 	release_sock(ssk);
34321498490SPeter Krystad }
34421498490SPeter Krystad 
345f870fa0bSMat Martineau static void mptcp_close(struct sock *sk, long timeout)
346f870fa0bSMat Martineau {
347cec37a6eSPeter Krystad 	struct mptcp_subflow_context *subflow, *tmp;
348f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
349f870fa0bSMat Martineau 
35079c0949eSPeter Krystad 	mptcp_token_destroy(msk->token);
351f870fa0bSMat Martineau 	inet_sk_state_store(sk, TCP_CLOSE);
352f870fa0bSMat Martineau 
353cec37a6eSPeter Krystad 	lock_sock(sk);
354cec37a6eSPeter Krystad 
355cec37a6eSPeter Krystad 	list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
356cec37a6eSPeter Krystad 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
357cec37a6eSPeter Krystad 
358cec37a6eSPeter Krystad 		__mptcp_close_ssk(sk, ssk, subflow, timeout);
359f870fa0bSMat Martineau 	}
360f870fa0bSMat Martineau 
3616d0060f6SMat Martineau 	if (msk->cached_ext)
3626d0060f6SMat Martineau 		__skb_ext_put(msk->cached_ext);
363cec37a6eSPeter Krystad 	release_sock(sk);
364cec37a6eSPeter Krystad 	sk_common_release(sk);
365f870fa0bSMat Martineau }
366f870fa0bSMat Martineau 
367cf7da0d6SPeter Krystad static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
368cf7da0d6SPeter Krystad {
369cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
370cf7da0d6SPeter Krystad 	const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
371cf7da0d6SPeter Krystad 	struct ipv6_pinfo *msk6 = inet6_sk(msk);
372cf7da0d6SPeter Krystad 
373cf7da0d6SPeter Krystad 	msk->sk_v6_daddr = ssk->sk_v6_daddr;
374cf7da0d6SPeter Krystad 	msk->sk_v6_rcv_saddr = ssk->sk_v6_rcv_saddr;
375cf7da0d6SPeter Krystad 
376cf7da0d6SPeter Krystad 	if (msk6 && ssk6) {
377cf7da0d6SPeter Krystad 		msk6->saddr = ssk6->saddr;
378cf7da0d6SPeter Krystad 		msk6->flow_label = ssk6->flow_label;
379cf7da0d6SPeter Krystad 	}
380cf7da0d6SPeter Krystad #endif
381cf7da0d6SPeter Krystad 
382cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_num = inet_sk(ssk)->inet_num;
383cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_dport = inet_sk(ssk)->inet_dport;
384cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_sport = inet_sk(ssk)->inet_sport;
385cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_daddr = inet_sk(ssk)->inet_daddr;
386cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_saddr = inet_sk(ssk)->inet_saddr;
387cf7da0d6SPeter Krystad 	inet_sk(msk)->inet_rcv_saddr = inet_sk(ssk)->inet_rcv_saddr;
388cf7da0d6SPeter Krystad }
389cf7da0d6SPeter Krystad 
390cf7da0d6SPeter Krystad static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
391cf7da0d6SPeter Krystad 				 bool kern)
392cf7da0d6SPeter Krystad {
393cf7da0d6SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
394cf7da0d6SPeter Krystad 	struct socket *listener;
395cf7da0d6SPeter Krystad 	struct sock *newsk;
396cf7da0d6SPeter Krystad 
397cf7da0d6SPeter Krystad 	listener = __mptcp_nmpc_socket(msk);
398cf7da0d6SPeter Krystad 	if (WARN_ON_ONCE(!listener)) {
399cf7da0d6SPeter Krystad 		*err = -EINVAL;
400cf7da0d6SPeter Krystad 		return NULL;
401cf7da0d6SPeter Krystad 	}
402cf7da0d6SPeter Krystad 
403cf7da0d6SPeter Krystad 	pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk));
404cf7da0d6SPeter Krystad 	newsk = inet_csk_accept(listener->sk, flags, err, kern);
405cf7da0d6SPeter Krystad 	if (!newsk)
406cf7da0d6SPeter Krystad 		return NULL;
407cf7da0d6SPeter Krystad 
408cf7da0d6SPeter Krystad 	pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk));
409cf7da0d6SPeter Krystad 
410cf7da0d6SPeter Krystad 	if (sk_is_mptcp(newsk)) {
411cf7da0d6SPeter Krystad 		struct mptcp_subflow_context *subflow;
412cf7da0d6SPeter Krystad 		struct sock *new_mptcp_sock;
413cf7da0d6SPeter Krystad 		struct sock *ssk = newsk;
4146d0060f6SMat Martineau 		u64 ack_seq;
415cf7da0d6SPeter Krystad 
416cf7da0d6SPeter Krystad 		subflow = mptcp_subflow_ctx(newsk);
417cf7da0d6SPeter Krystad 		lock_sock(sk);
418cf7da0d6SPeter Krystad 
419cf7da0d6SPeter Krystad 		local_bh_disable();
420cf7da0d6SPeter Krystad 		new_mptcp_sock = sk_clone_lock(sk, GFP_ATOMIC);
421cf7da0d6SPeter Krystad 		if (!new_mptcp_sock) {
422cf7da0d6SPeter Krystad 			*err = -ENOBUFS;
423cf7da0d6SPeter Krystad 			local_bh_enable();
424cf7da0d6SPeter Krystad 			release_sock(sk);
42521498490SPeter Krystad 			mptcp_subflow_shutdown(newsk, SHUT_RDWR + 1);
426cf7da0d6SPeter Krystad 			tcp_close(newsk, 0);
427cf7da0d6SPeter Krystad 			return NULL;
428cf7da0d6SPeter Krystad 		}
429cf7da0d6SPeter Krystad 
430cf7da0d6SPeter Krystad 		mptcp_init_sock(new_mptcp_sock);
431cf7da0d6SPeter Krystad 
432cf7da0d6SPeter Krystad 		msk = mptcp_sk(new_mptcp_sock);
433cf7da0d6SPeter Krystad 		msk->remote_key = subflow->remote_key;
434cf7da0d6SPeter Krystad 		msk->local_key = subflow->local_key;
43579c0949eSPeter Krystad 		msk->token = subflow->token;
436cf7da0d6SPeter Krystad 		msk->subflow = NULL;
437cf7da0d6SPeter Krystad 
43879c0949eSPeter Krystad 		mptcp_token_update_accept(newsk, new_mptcp_sock);
4396d0060f6SMat Martineau 
4406d0060f6SMat Martineau 		mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
4416d0060f6SMat Martineau 		msk->write_seq = subflow->idsn + 1;
4426d0060f6SMat Martineau 		ack_seq++;
4436d0060f6SMat Martineau 		msk->ack_seq = ack_seq;
444*648ef4b8SMat Martineau 		subflow->map_seq = ack_seq;
445*648ef4b8SMat Martineau 		subflow->map_subflow_seq = 1;
4466d0060f6SMat Martineau 		subflow->rel_write_seq = 1;
447*648ef4b8SMat Martineau 		subflow->tcp_sock = ssk;
448cf7da0d6SPeter Krystad 		newsk = new_mptcp_sock;
449cf7da0d6SPeter Krystad 		mptcp_copy_inaddrs(newsk, ssk);
450cf7da0d6SPeter Krystad 		list_add(&subflow->node, &msk->conn_list);
451cf7da0d6SPeter Krystad 
452cf7da0d6SPeter Krystad 		/* will be fully established at mptcp_stream_accept()
453cf7da0d6SPeter Krystad 		 * completion.
454cf7da0d6SPeter Krystad 		 */
455cf7da0d6SPeter Krystad 		inet_sk_state_store(new_mptcp_sock, TCP_SYN_RECV);
456cf7da0d6SPeter Krystad 		bh_unlock_sock(new_mptcp_sock);
457cf7da0d6SPeter Krystad 		local_bh_enable();
458cf7da0d6SPeter Krystad 		release_sock(sk);
459cf7da0d6SPeter Krystad 	}
460cf7da0d6SPeter Krystad 
461cf7da0d6SPeter Krystad 	return newsk;
462cf7da0d6SPeter Krystad }
463cf7da0d6SPeter Krystad 
46479c0949eSPeter Krystad static void mptcp_destroy(struct sock *sk)
46579c0949eSPeter Krystad {
46679c0949eSPeter Krystad }
46779c0949eSPeter Krystad 
468717e79c8SPeter Krystad static int mptcp_setsockopt(struct sock *sk, int level, int optname,
469717e79c8SPeter Krystad 			    char __user *uoptval, unsigned int optlen)
470717e79c8SPeter Krystad {
471717e79c8SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
472717e79c8SPeter Krystad 	char __kernel *optval;
473717e79c8SPeter Krystad 	int ret = -EOPNOTSUPP;
474717e79c8SPeter Krystad 	struct socket *ssock;
475717e79c8SPeter Krystad 
476717e79c8SPeter Krystad 	/* will be treated as __user in tcp_setsockopt */
477717e79c8SPeter Krystad 	optval = (char __kernel __force *)uoptval;
478717e79c8SPeter Krystad 
479717e79c8SPeter Krystad 	pr_debug("msk=%p", msk);
480717e79c8SPeter Krystad 
481717e79c8SPeter Krystad 	/* @@ the meaning of setsockopt() when the socket is connected and
482717e79c8SPeter Krystad 	 * there are multiple subflows is not defined.
483717e79c8SPeter Krystad 	 */
484717e79c8SPeter Krystad 	lock_sock(sk);
485717e79c8SPeter Krystad 	ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
486717e79c8SPeter Krystad 	if (!IS_ERR(ssock)) {
487717e79c8SPeter Krystad 		pr_debug("subflow=%p", ssock->sk);
488717e79c8SPeter Krystad 		ret = kernel_setsockopt(ssock, level, optname, optval, optlen);
489717e79c8SPeter Krystad 	}
490717e79c8SPeter Krystad 	release_sock(sk);
491717e79c8SPeter Krystad 
492717e79c8SPeter Krystad 	return ret;
493717e79c8SPeter Krystad }
494717e79c8SPeter Krystad 
495717e79c8SPeter Krystad static int mptcp_getsockopt(struct sock *sk, int level, int optname,
496717e79c8SPeter Krystad 			    char __user *uoptval, int __user *uoption)
497717e79c8SPeter Krystad {
498717e79c8SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sk);
499717e79c8SPeter Krystad 	char __kernel *optval;
500717e79c8SPeter Krystad 	int ret = -EOPNOTSUPP;
501717e79c8SPeter Krystad 	int __kernel *option;
502717e79c8SPeter Krystad 	struct socket *ssock;
503717e79c8SPeter Krystad 
504717e79c8SPeter Krystad 	/* will be treated as __user in tcp_getsockopt */
505717e79c8SPeter Krystad 	optval = (char __kernel __force *)uoptval;
506717e79c8SPeter Krystad 	option = (int __kernel __force *)uoption;
507717e79c8SPeter Krystad 
508717e79c8SPeter Krystad 	pr_debug("msk=%p", msk);
509717e79c8SPeter Krystad 
510717e79c8SPeter Krystad 	/* @@ the meaning of getsockopt() when the socket is connected and
511717e79c8SPeter Krystad 	 * there are multiple subflows is not defined.
512717e79c8SPeter Krystad 	 */
513717e79c8SPeter Krystad 	lock_sock(sk);
514717e79c8SPeter Krystad 	ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
515717e79c8SPeter Krystad 	if (!IS_ERR(ssock)) {
516717e79c8SPeter Krystad 		pr_debug("subflow=%p", ssock->sk);
517717e79c8SPeter Krystad 		ret = kernel_getsockopt(ssock, level, optname, optval, option);
518717e79c8SPeter Krystad 	}
519717e79c8SPeter Krystad 	release_sock(sk);
520717e79c8SPeter Krystad 
521717e79c8SPeter Krystad 	return ret;
522717e79c8SPeter Krystad }
523717e79c8SPeter Krystad 
524cec37a6eSPeter Krystad static int mptcp_get_port(struct sock *sk, unsigned short snum)
525f870fa0bSMat Martineau {
526f870fa0bSMat Martineau 	struct mptcp_sock *msk = mptcp_sk(sk);
527cec37a6eSPeter Krystad 	struct socket *ssock;
528f870fa0bSMat Martineau 
529cec37a6eSPeter Krystad 	ssock = __mptcp_nmpc_socket(msk);
530cec37a6eSPeter Krystad 	pr_debug("msk=%p, subflow=%p", msk, ssock);
531cec37a6eSPeter Krystad 	if (WARN_ON_ONCE(!ssock))
532cec37a6eSPeter Krystad 		return -EINVAL;
533f870fa0bSMat Martineau 
534cec37a6eSPeter Krystad 	return inet_csk_get_port(ssock->sk, snum);
535cec37a6eSPeter Krystad }
536f870fa0bSMat Martineau 
537cec37a6eSPeter Krystad void mptcp_finish_connect(struct sock *ssk)
538cec37a6eSPeter Krystad {
539cec37a6eSPeter Krystad 	struct mptcp_subflow_context *subflow;
540cec37a6eSPeter Krystad 	struct mptcp_sock *msk;
541cec37a6eSPeter Krystad 	struct sock *sk;
5426d0060f6SMat Martineau 	u64 ack_seq;
543f870fa0bSMat Martineau 
544cec37a6eSPeter Krystad 	subflow = mptcp_subflow_ctx(ssk);
545f870fa0bSMat Martineau 
546cec37a6eSPeter Krystad 	if (!subflow->mp_capable)
547cec37a6eSPeter Krystad 		return;
548cec37a6eSPeter Krystad 
549cec37a6eSPeter Krystad 	sk = subflow->conn;
550cec37a6eSPeter Krystad 	msk = mptcp_sk(sk);
551cec37a6eSPeter Krystad 
552*648ef4b8SMat Martineau 	pr_debug("msk=%p, token=%u", sk, subflow->token);
553*648ef4b8SMat Martineau 
5546d0060f6SMat Martineau 	mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
5556d0060f6SMat Martineau 	ack_seq++;
556*648ef4b8SMat Martineau 	subflow->map_seq = ack_seq;
557*648ef4b8SMat Martineau 	subflow->map_subflow_seq = 1;
5586d0060f6SMat Martineau 	subflow->rel_write_seq = 1;
5596d0060f6SMat Martineau 
560cec37a6eSPeter Krystad 	/* the socket is not connected yet, no msk/subflow ops can access/race
561cec37a6eSPeter Krystad 	 * accessing the field below
562cec37a6eSPeter Krystad 	 */
563cec37a6eSPeter Krystad 	WRITE_ONCE(msk->remote_key, subflow->remote_key);
564cec37a6eSPeter Krystad 	WRITE_ONCE(msk->local_key, subflow->local_key);
56579c0949eSPeter Krystad 	WRITE_ONCE(msk->token, subflow->token);
5666d0060f6SMat Martineau 	WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
5676d0060f6SMat Martineau 	WRITE_ONCE(msk->ack_seq, ack_seq);
568f870fa0bSMat Martineau }
569f870fa0bSMat Martineau 
570cf7da0d6SPeter Krystad static void mptcp_sock_graft(struct sock *sk, struct socket *parent)
571cf7da0d6SPeter Krystad {
572cf7da0d6SPeter Krystad 	write_lock_bh(&sk->sk_callback_lock);
573cf7da0d6SPeter Krystad 	rcu_assign_pointer(sk->sk_wq, &parent->wq);
574cf7da0d6SPeter Krystad 	sk_set_socket(sk, parent);
575cf7da0d6SPeter Krystad 	sk->sk_uid = SOCK_INODE(parent)->i_uid;
576cf7da0d6SPeter Krystad 	write_unlock_bh(&sk->sk_callback_lock);
577cf7da0d6SPeter Krystad }
578cf7da0d6SPeter Krystad 
579f870fa0bSMat Martineau static struct proto mptcp_prot = {
580f870fa0bSMat Martineau 	.name		= "MPTCP",
581f870fa0bSMat Martineau 	.owner		= THIS_MODULE,
582f870fa0bSMat Martineau 	.init		= mptcp_init_sock,
583f870fa0bSMat Martineau 	.close		= mptcp_close,
584cf7da0d6SPeter Krystad 	.accept		= mptcp_accept,
585717e79c8SPeter Krystad 	.setsockopt	= mptcp_setsockopt,
586717e79c8SPeter Krystad 	.getsockopt	= mptcp_getsockopt,
587f870fa0bSMat Martineau 	.shutdown	= tcp_shutdown,
58879c0949eSPeter Krystad 	.destroy	= mptcp_destroy,
589f870fa0bSMat Martineau 	.sendmsg	= mptcp_sendmsg,
590f870fa0bSMat Martineau 	.recvmsg	= mptcp_recvmsg,
591f870fa0bSMat Martineau 	.hash		= inet_hash,
592f870fa0bSMat Martineau 	.unhash		= inet_unhash,
593cec37a6eSPeter Krystad 	.get_port	= mptcp_get_port,
594f870fa0bSMat Martineau 	.obj_size	= sizeof(struct mptcp_sock),
595f870fa0bSMat Martineau 	.no_autobind	= true,
596f870fa0bSMat Martineau };
597f870fa0bSMat Martineau 
5982303f994SPeter Krystad static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
5992303f994SPeter Krystad {
6002303f994SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
6012303f994SPeter Krystad 	struct socket *ssock;
602cf7da0d6SPeter Krystad 	int err;
6032303f994SPeter Krystad 
6042303f994SPeter Krystad 	lock_sock(sock->sk);
6052303f994SPeter Krystad 	ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE);
6062303f994SPeter Krystad 	if (IS_ERR(ssock)) {
6072303f994SPeter Krystad 		err = PTR_ERR(ssock);
6082303f994SPeter Krystad 		goto unlock;
6092303f994SPeter Krystad 	}
6102303f994SPeter Krystad 
6112303f994SPeter Krystad 	err = ssock->ops->bind(ssock, uaddr, addr_len);
612cf7da0d6SPeter Krystad 	if (!err)
613cf7da0d6SPeter Krystad 		mptcp_copy_inaddrs(sock->sk, ssock->sk);
6142303f994SPeter Krystad 
6152303f994SPeter Krystad unlock:
6162303f994SPeter Krystad 	release_sock(sock->sk);
6172303f994SPeter Krystad 	return err;
6182303f994SPeter Krystad }
6192303f994SPeter Krystad 
6202303f994SPeter Krystad static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
6212303f994SPeter Krystad 				int addr_len, int flags)
6222303f994SPeter Krystad {
6232303f994SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
6242303f994SPeter Krystad 	struct socket *ssock;
6252303f994SPeter Krystad 	int err;
6262303f994SPeter Krystad 
6272303f994SPeter Krystad 	lock_sock(sock->sk);
6282303f994SPeter Krystad 	ssock = __mptcp_socket_create(msk, TCP_SYN_SENT);
6292303f994SPeter Krystad 	if (IS_ERR(ssock)) {
6302303f994SPeter Krystad 		err = PTR_ERR(ssock);
6312303f994SPeter Krystad 		goto unlock;
6322303f994SPeter Krystad 	}
6332303f994SPeter Krystad 
634cf7da0d6SPeter Krystad #ifdef CONFIG_TCP_MD5SIG
635cf7da0d6SPeter Krystad 	/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
636cf7da0d6SPeter Krystad 	 * TCP option space.
637cf7da0d6SPeter Krystad 	 */
638cf7da0d6SPeter Krystad 	if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
639cf7da0d6SPeter Krystad 		mptcp_subflow_ctx(ssock->sk)->request_mptcp = 0;
640cf7da0d6SPeter Krystad #endif
641cf7da0d6SPeter Krystad 
6422303f994SPeter Krystad 	err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
6432303f994SPeter Krystad 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
644cf7da0d6SPeter Krystad 	mptcp_copy_inaddrs(sock->sk, ssock->sk);
6452303f994SPeter Krystad 
6462303f994SPeter Krystad unlock:
6472303f994SPeter Krystad 	release_sock(sock->sk);
6482303f994SPeter Krystad 	return err;
6492303f994SPeter Krystad }
6502303f994SPeter Krystad 
651cf7da0d6SPeter Krystad static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr,
652cf7da0d6SPeter Krystad 			    int peer)
653cf7da0d6SPeter Krystad {
654cf7da0d6SPeter Krystad 	if (sock->sk->sk_prot == &tcp_prot) {
655cf7da0d6SPeter Krystad 		/* we are being invoked from __sys_accept4, after
656cf7da0d6SPeter Krystad 		 * mptcp_accept() has just accepted a non-mp-capable
657cf7da0d6SPeter Krystad 		 * flow: sk is a tcp_sk, not an mptcp one.
658cf7da0d6SPeter Krystad 		 *
659cf7da0d6SPeter Krystad 		 * Hand the socket over to tcp so all further socket ops
660cf7da0d6SPeter Krystad 		 * bypass mptcp.
661cf7da0d6SPeter Krystad 		 */
662cf7da0d6SPeter Krystad 		sock->ops = &inet_stream_ops;
663cf7da0d6SPeter Krystad 	}
664cf7da0d6SPeter Krystad 
665cf7da0d6SPeter Krystad 	return inet_getname(sock, uaddr, peer);
666cf7da0d6SPeter Krystad }
667cf7da0d6SPeter Krystad 
668cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
669cf7da0d6SPeter Krystad static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr,
670cf7da0d6SPeter Krystad 			    int peer)
671cf7da0d6SPeter Krystad {
672cf7da0d6SPeter Krystad 	if (sock->sk->sk_prot == &tcpv6_prot) {
673cf7da0d6SPeter Krystad 		/* we are being invoked from __sys_accept4 after
674cf7da0d6SPeter Krystad 		 * mptcp_accept() has accepted a non-mp-capable
675cf7da0d6SPeter Krystad 		 * subflow: sk is a tcp_sk, not mptcp.
676cf7da0d6SPeter Krystad 		 *
677cf7da0d6SPeter Krystad 		 * Hand the socket over to tcp so all further
678cf7da0d6SPeter Krystad 		 * socket ops bypass mptcp.
679cf7da0d6SPeter Krystad 		 */
680cf7da0d6SPeter Krystad 		sock->ops = &inet6_stream_ops;
681cf7da0d6SPeter Krystad 	}
682cf7da0d6SPeter Krystad 
683cf7da0d6SPeter Krystad 	return inet6_getname(sock, uaddr, peer);
684cf7da0d6SPeter Krystad }
685cf7da0d6SPeter Krystad #endif
686cf7da0d6SPeter Krystad 
687cf7da0d6SPeter Krystad static int mptcp_listen(struct socket *sock, int backlog)
688cf7da0d6SPeter Krystad {
689cf7da0d6SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
690cf7da0d6SPeter Krystad 	struct socket *ssock;
691cf7da0d6SPeter Krystad 	int err;
692cf7da0d6SPeter Krystad 
693cf7da0d6SPeter Krystad 	pr_debug("msk=%p", msk);
694cf7da0d6SPeter Krystad 
695cf7da0d6SPeter Krystad 	lock_sock(sock->sk);
696cf7da0d6SPeter Krystad 	ssock = __mptcp_socket_create(msk, TCP_LISTEN);
697cf7da0d6SPeter Krystad 	if (IS_ERR(ssock)) {
698cf7da0d6SPeter Krystad 		err = PTR_ERR(ssock);
699cf7da0d6SPeter Krystad 		goto unlock;
700cf7da0d6SPeter Krystad 	}
701cf7da0d6SPeter Krystad 
702cf7da0d6SPeter Krystad 	err = ssock->ops->listen(ssock, backlog);
703cf7da0d6SPeter Krystad 	inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
704cf7da0d6SPeter Krystad 	if (!err)
705cf7da0d6SPeter Krystad 		mptcp_copy_inaddrs(sock->sk, ssock->sk);
706cf7da0d6SPeter Krystad 
707cf7da0d6SPeter Krystad unlock:
708cf7da0d6SPeter Krystad 	release_sock(sock->sk);
709cf7da0d6SPeter Krystad 	return err;
710cf7da0d6SPeter Krystad }
711cf7da0d6SPeter Krystad 
712cf7da0d6SPeter Krystad static bool is_tcp_proto(const struct proto *p)
713cf7da0d6SPeter Krystad {
714cf7da0d6SPeter Krystad #if IS_ENABLED(CONFIG_MPTCP_IPV6)
715cf7da0d6SPeter Krystad 	return p == &tcp_prot || p == &tcpv6_prot;
716cf7da0d6SPeter Krystad #else
717cf7da0d6SPeter Krystad 	return p == &tcp_prot;
718cf7da0d6SPeter Krystad #endif
719cf7da0d6SPeter Krystad }
720cf7da0d6SPeter Krystad 
721cf7da0d6SPeter Krystad static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
722cf7da0d6SPeter Krystad 			       int flags, bool kern)
723cf7da0d6SPeter Krystad {
724cf7da0d6SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
725cf7da0d6SPeter Krystad 	struct socket *ssock;
726cf7da0d6SPeter Krystad 	int err;
727cf7da0d6SPeter Krystad 
728cf7da0d6SPeter Krystad 	pr_debug("msk=%p", msk);
729cf7da0d6SPeter Krystad 
730cf7da0d6SPeter Krystad 	lock_sock(sock->sk);
731cf7da0d6SPeter Krystad 	if (sock->sk->sk_state != TCP_LISTEN)
732cf7da0d6SPeter Krystad 		goto unlock_fail;
733cf7da0d6SPeter Krystad 
734cf7da0d6SPeter Krystad 	ssock = __mptcp_nmpc_socket(msk);
735cf7da0d6SPeter Krystad 	if (!ssock)
736cf7da0d6SPeter Krystad 		goto unlock_fail;
737cf7da0d6SPeter Krystad 
738cf7da0d6SPeter Krystad 	sock_hold(ssock->sk);
739cf7da0d6SPeter Krystad 	release_sock(sock->sk);
740cf7da0d6SPeter Krystad 
741cf7da0d6SPeter Krystad 	err = ssock->ops->accept(sock, newsock, flags, kern);
742cf7da0d6SPeter Krystad 	if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) {
743cf7da0d6SPeter Krystad 		struct mptcp_sock *msk = mptcp_sk(newsock->sk);
744cf7da0d6SPeter Krystad 		struct mptcp_subflow_context *subflow;
745cf7da0d6SPeter Krystad 
746cf7da0d6SPeter Krystad 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
747cf7da0d6SPeter Krystad 		 * This is needed so NOSPACE flag can be set from tcp stack.
748cf7da0d6SPeter Krystad 		 */
749cf7da0d6SPeter Krystad 		list_for_each_entry(subflow, &msk->conn_list, node) {
750cf7da0d6SPeter Krystad 			struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
751cf7da0d6SPeter Krystad 
752cf7da0d6SPeter Krystad 			if (!ssk->sk_socket)
753cf7da0d6SPeter Krystad 				mptcp_sock_graft(ssk, newsock);
754cf7da0d6SPeter Krystad 		}
755cf7da0d6SPeter Krystad 
756cf7da0d6SPeter Krystad 		inet_sk_state_store(newsock->sk, TCP_ESTABLISHED);
757cf7da0d6SPeter Krystad 	}
758cf7da0d6SPeter Krystad 
759cf7da0d6SPeter Krystad 	sock_put(ssock->sk);
760cf7da0d6SPeter Krystad 	return err;
761cf7da0d6SPeter Krystad 
762cf7da0d6SPeter Krystad unlock_fail:
763cf7da0d6SPeter Krystad 	release_sock(sock->sk);
764cf7da0d6SPeter Krystad 	return -EINVAL;
765cf7da0d6SPeter Krystad }
766cf7da0d6SPeter Krystad 
7672303f994SPeter Krystad static __poll_t mptcp_poll(struct file *file, struct socket *sock,
7682303f994SPeter Krystad 			   struct poll_table_struct *wait)
7692303f994SPeter Krystad {
7702303f994SPeter Krystad 	__poll_t mask = 0;
7712303f994SPeter Krystad 
7722303f994SPeter Krystad 	return mask;
7732303f994SPeter Krystad }
7742303f994SPeter Krystad 
77521498490SPeter Krystad static int mptcp_shutdown(struct socket *sock, int how)
77621498490SPeter Krystad {
77721498490SPeter Krystad 	struct mptcp_sock *msk = mptcp_sk(sock->sk);
77821498490SPeter Krystad 	struct mptcp_subflow_context *subflow;
77921498490SPeter Krystad 	int ret = 0;
78021498490SPeter Krystad 
78121498490SPeter Krystad 	pr_debug("sk=%p, how=%d", msk, how);
78221498490SPeter Krystad 
78321498490SPeter Krystad 	lock_sock(sock->sk);
78421498490SPeter Krystad 
78521498490SPeter Krystad 	if (how == SHUT_WR || how == SHUT_RDWR)
78621498490SPeter Krystad 		inet_sk_state_store(sock->sk, TCP_FIN_WAIT1);
78721498490SPeter Krystad 
78821498490SPeter Krystad 	how++;
78921498490SPeter Krystad 
79021498490SPeter Krystad 	if ((how & ~SHUTDOWN_MASK) || !how) {
79121498490SPeter Krystad 		ret = -EINVAL;
79221498490SPeter Krystad 		goto out_unlock;
79321498490SPeter Krystad 	}
79421498490SPeter Krystad 
79521498490SPeter Krystad 	if (sock->state == SS_CONNECTING) {
79621498490SPeter Krystad 		if ((1 << sock->sk->sk_state) &
79721498490SPeter Krystad 		    (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_CLOSE))
79821498490SPeter Krystad 			sock->state = SS_DISCONNECTING;
79921498490SPeter Krystad 		else
80021498490SPeter Krystad 			sock->state = SS_CONNECTED;
80121498490SPeter Krystad 	}
80221498490SPeter Krystad 
80321498490SPeter Krystad 	mptcp_for_each_subflow(msk, subflow) {
80421498490SPeter Krystad 		struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
80521498490SPeter Krystad 
80621498490SPeter Krystad 		mptcp_subflow_shutdown(tcp_sk, how);
80721498490SPeter Krystad 	}
80821498490SPeter Krystad 
80921498490SPeter Krystad out_unlock:
81021498490SPeter Krystad 	release_sock(sock->sk);
81121498490SPeter Krystad 
81221498490SPeter Krystad 	return ret;
81321498490SPeter Krystad }
81421498490SPeter Krystad 
8152303f994SPeter Krystad static struct proto_ops mptcp_stream_ops;
8162303f994SPeter Krystad 
817f870fa0bSMat Martineau static struct inet_protosw mptcp_protosw = {
818f870fa0bSMat Martineau 	.type		= SOCK_STREAM,
819f870fa0bSMat Martineau 	.protocol	= IPPROTO_MPTCP,
820f870fa0bSMat Martineau 	.prot		= &mptcp_prot,
8212303f994SPeter Krystad 	.ops		= &mptcp_stream_ops,
8222303f994SPeter Krystad 	.flags		= INET_PROTOSW_ICSK,
823f870fa0bSMat Martineau };
824f870fa0bSMat Martineau 
825f870fa0bSMat Martineau void __init mptcp_init(void)
826f870fa0bSMat Martineau {
8272303f994SPeter Krystad 	mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo;
8282303f994SPeter Krystad 	mptcp_stream_ops = inet_stream_ops;
8292303f994SPeter Krystad 	mptcp_stream_ops.bind = mptcp_bind;
8302303f994SPeter Krystad 	mptcp_stream_ops.connect = mptcp_stream_connect;
8312303f994SPeter Krystad 	mptcp_stream_ops.poll = mptcp_poll;
832cf7da0d6SPeter Krystad 	mptcp_stream_ops.accept = mptcp_stream_accept;
833cf7da0d6SPeter Krystad 	mptcp_stream_ops.getname = mptcp_v4_getname;
834cf7da0d6SPeter Krystad 	mptcp_stream_ops.listen = mptcp_listen;
83521498490SPeter Krystad 	mptcp_stream_ops.shutdown = mptcp_shutdown;
8362303f994SPeter Krystad 
8372303f994SPeter Krystad 	mptcp_subflow_init();
8382303f994SPeter Krystad 
839f870fa0bSMat Martineau 	if (proto_register(&mptcp_prot, 1) != 0)
840f870fa0bSMat Martineau 		panic("Failed to register MPTCP proto.\n");
841f870fa0bSMat Martineau 
842f870fa0bSMat Martineau 	inet_register_protosw(&mptcp_protosw);
843f870fa0bSMat Martineau }
844f870fa0bSMat Martineau 
845f870fa0bSMat Martineau #if IS_ENABLED(CONFIG_MPTCP_IPV6)
8462303f994SPeter Krystad static struct proto_ops mptcp_v6_stream_ops;
847f870fa0bSMat Martineau static struct proto mptcp_v6_prot;
848f870fa0bSMat Martineau 
84979c0949eSPeter Krystad static void mptcp_v6_destroy(struct sock *sk)
85079c0949eSPeter Krystad {
85179c0949eSPeter Krystad 	mptcp_destroy(sk);
85279c0949eSPeter Krystad 	inet6_destroy_sock(sk);
85379c0949eSPeter Krystad }
85479c0949eSPeter Krystad 
855f870fa0bSMat Martineau static struct inet_protosw mptcp_v6_protosw = {
856f870fa0bSMat Martineau 	.type		= SOCK_STREAM,
857f870fa0bSMat Martineau 	.protocol	= IPPROTO_MPTCP,
858f870fa0bSMat Martineau 	.prot		= &mptcp_v6_prot,
8592303f994SPeter Krystad 	.ops		= &mptcp_v6_stream_ops,
860f870fa0bSMat Martineau 	.flags		= INET_PROTOSW_ICSK,
861f870fa0bSMat Martineau };
862f870fa0bSMat Martineau 
863f870fa0bSMat Martineau int mptcpv6_init(void)
864f870fa0bSMat Martineau {
865f870fa0bSMat Martineau 	int err;
866f870fa0bSMat Martineau 
867f870fa0bSMat Martineau 	mptcp_v6_prot = mptcp_prot;
868f870fa0bSMat Martineau 	strcpy(mptcp_v6_prot.name, "MPTCPv6");
869f870fa0bSMat Martineau 	mptcp_v6_prot.slab = NULL;
87079c0949eSPeter Krystad 	mptcp_v6_prot.destroy = mptcp_v6_destroy;
871f870fa0bSMat Martineau 	mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) +
872f870fa0bSMat Martineau 				 sizeof(struct ipv6_pinfo);
873f870fa0bSMat Martineau 
874f870fa0bSMat Martineau 	err = proto_register(&mptcp_v6_prot, 1);
875f870fa0bSMat Martineau 	if (err)
876f870fa0bSMat Martineau 		return err;
877f870fa0bSMat Martineau 
8782303f994SPeter Krystad 	mptcp_v6_stream_ops = inet6_stream_ops;
8792303f994SPeter Krystad 	mptcp_v6_stream_ops.bind = mptcp_bind;
8802303f994SPeter Krystad 	mptcp_v6_stream_ops.connect = mptcp_stream_connect;
8812303f994SPeter Krystad 	mptcp_v6_stream_ops.poll = mptcp_poll;
882cf7da0d6SPeter Krystad 	mptcp_v6_stream_ops.accept = mptcp_stream_accept;
883cf7da0d6SPeter Krystad 	mptcp_v6_stream_ops.getname = mptcp_v6_getname;
884cf7da0d6SPeter Krystad 	mptcp_v6_stream_ops.listen = mptcp_listen;
88521498490SPeter Krystad 	mptcp_v6_stream_ops.shutdown = mptcp_shutdown;
8862303f994SPeter Krystad 
887f870fa0bSMat Martineau 	err = inet6_register_protosw(&mptcp_v6_protosw);
888f870fa0bSMat Martineau 	if (err)
889f870fa0bSMat Martineau 		proto_unregister(&mptcp_v6_prot);
890f870fa0bSMat Martineau 
891f870fa0bSMat Martineau 	return err;
892f870fa0bSMat Martineau }
893f870fa0bSMat Martineau #endif
894