1f870fa0bSMat Martineau // SPDX-License-Identifier: GPL-2.0 2f870fa0bSMat Martineau /* Multipath TCP 3f870fa0bSMat Martineau * 4f870fa0bSMat Martineau * Copyright (c) 2017 - 2019, Intel Corporation. 5f870fa0bSMat Martineau */ 6f870fa0bSMat Martineau 7f870fa0bSMat Martineau #define pr_fmt(fmt) "MPTCP: " fmt 8f870fa0bSMat Martineau 9f870fa0bSMat Martineau #include <linux/kernel.h> 10f870fa0bSMat Martineau #include <linux/module.h> 11f870fa0bSMat Martineau #include <linux/netdevice.h> 12f870fa0bSMat Martineau #include <net/sock.h> 13f870fa0bSMat Martineau #include <net/inet_common.h> 14f870fa0bSMat Martineau #include <net/inet_hashtables.h> 15f870fa0bSMat Martineau #include <net/protocol.h> 16f870fa0bSMat Martineau #include <net/tcp.h> 17f870fa0bSMat Martineau #include <net/mptcp.h> 18f870fa0bSMat Martineau #include "protocol.h" 19f870fa0bSMat Martineau 20*2303f994SPeter Krystad #define MPTCP_SAME_STATE TCP_MAX_STATES 21*2303f994SPeter Krystad 22*2303f994SPeter Krystad /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not 23*2303f994SPeter Krystad * completed yet or has failed, return the subflow socket. 24*2303f994SPeter Krystad * Otherwise return NULL. 25*2303f994SPeter Krystad */ 26*2303f994SPeter Krystad static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) 27*2303f994SPeter Krystad { 28*2303f994SPeter Krystad if (!msk->subflow) 29*2303f994SPeter Krystad return NULL; 30*2303f994SPeter Krystad 31*2303f994SPeter Krystad return msk->subflow; 32*2303f994SPeter Krystad } 33*2303f994SPeter Krystad 34*2303f994SPeter Krystad static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) 35*2303f994SPeter Krystad { 36*2303f994SPeter Krystad return ((struct sock *)msk)->sk_state == TCP_CLOSE; 37*2303f994SPeter Krystad } 38*2303f994SPeter Krystad 39*2303f994SPeter Krystad static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) 40*2303f994SPeter Krystad { 41*2303f994SPeter Krystad struct mptcp_subflow_context *subflow; 42*2303f994SPeter Krystad struct sock *sk = (struct sock *)msk; 43*2303f994SPeter Krystad struct socket *ssock; 44*2303f994SPeter Krystad int err; 45*2303f994SPeter Krystad 46*2303f994SPeter Krystad ssock = __mptcp_nmpc_socket(msk); 47*2303f994SPeter Krystad if (ssock) 48*2303f994SPeter Krystad goto set_state; 49*2303f994SPeter Krystad 50*2303f994SPeter Krystad if (!__mptcp_can_create_subflow(msk)) 51*2303f994SPeter Krystad return ERR_PTR(-EINVAL); 52*2303f994SPeter Krystad 53*2303f994SPeter Krystad err = mptcp_subflow_create_socket(sk, &ssock); 54*2303f994SPeter Krystad if (err) 55*2303f994SPeter Krystad return ERR_PTR(err); 56*2303f994SPeter Krystad 57*2303f994SPeter Krystad msk->subflow = ssock; 58*2303f994SPeter Krystad subflow = mptcp_subflow_ctx(ssock->sk); 59*2303f994SPeter Krystad subflow->request_mptcp = 1; 60*2303f994SPeter Krystad 61*2303f994SPeter Krystad set_state: 62*2303f994SPeter Krystad if (state != MPTCP_SAME_STATE) 63*2303f994SPeter Krystad inet_sk_state_store(sk, state); 64*2303f994SPeter Krystad return ssock; 65*2303f994SPeter Krystad } 66*2303f994SPeter Krystad 67f870fa0bSMat Martineau static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 68f870fa0bSMat Martineau { 69f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 70f870fa0bSMat Martineau struct socket *subflow = msk->subflow; 71f870fa0bSMat Martineau 72f870fa0bSMat Martineau if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) 73f870fa0bSMat Martineau return -EOPNOTSUPP; 74f870fa0bSMat Martineau 75f870fa0bSMat Martineau return sock_sendmsg(subflow, msg); 76f870fa0bSMat Martineau } 77f870fa0bSMat Martineau 78f870fa0bSMat Martineau static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 79f870fa0bSMat Martineau int nonblock, int flags, int *addr_len) 80f870fa0bSMat Martineau { 81f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 82f870fa0bSMat Martineau struct socket *subflow = msk->subflow; 83f870fa0bSMat Martineau 84f870fa0bSMat Martineau if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) 85f870fa0bSMat Martineau return -EOPNOTSUPP; 86f870fa0bSMat Martineau 87f870fa0bSMat Martineau return sock_recvmsg(subflow, msg, flags); 88f870fa0bSMat Martineau } 89f870fa0bSMat Martineau 90f870fa0bSMat Martineau static int mptcp_init_sock(struct sock *sk) 91f870fa0bSMat Martineau { 92f870fa0bSMat Martineau return 0; 93f870fa0bSMat Martineau } 94f870fa0bSMat Martineau 95f870fa0bSMat Martineau static void mptcp_close(struct sock *sk, long timeout) 96f870fa0bSMat Martineau { 97f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 98*2303f994SPeter Krystad struct socket *ssock; 99f870fa0bSMat Martineau 100f870fa0bSMat Martineau inet_sk_state_store(sk, TCP_CLOSE); 101f870fa0bSMat Martineau 102*2303f994SPeter Krystad ssock = __mptcp_nmpc_socket(msk); 103*2303f994SPeter Krystad if (ssock) { 104*2303f994SPeter Krystad pr_debug("subflow=%p", mptcp_subflow_ctx(ssock->sk)); 105*2303f994SPeter Krystad sock_release(ssock); 106f870fa0bSMat Martineau } 107f870fa0bSMat Martineau 108f870fa0bSMat Martineau sock_orphan(sk); 109f870fa0bSMat Martineau sock_put(sk); 110f870fa0bSMat Martineau } 111f870fa0bSMat Martineau 112f870fa0bSMat Martineau static int mptcp_connect(struct sock *sk, struct sockaddr *saddr, int len) 113f870fa0bSMat Martineau { 114f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 115f870fa0bSMat Martineau int err; 116f870fa0bSMat Martineau 117f870fa0bSMat Martineau saddr->sa_family = AF_INET; 118f870fa0bSMat Martineau 119*2303f994SPeter Krystad pr_debug("msk=%p, subflow=%p", msk, 120*2303f994SPeter Krystad mptcp_subflow_ctx(msk->subflow->sk)); 121f870fa0bSMat Martineau 122f870fa0bSMat Martineau err = kernel_connect(msk->subflow, saddr, len, 0); 123f870fa0bSMat Martineau 124f870fa0bSMat Martineau sk->sk_state = TCP_ESTABLISHED; 125f870fa0bSMat Martineau 126f870fa0bSMat Martineau return err; 127f870fa0bSMat Martineau } 128f870fa0bSMat Martineau 129f870fa0bSMat Martineau static struct proto mptcp_prot = { 130f870fa0bSMat Martineau .name = "MPTCP", 131f870fa0bSMat Martineau .owner = THIS_MODULE, 132f870fa0bSMat Martineau .init = mptcp_init_sock, 133f870fa0bSMat Martineau .close = mptcp_close, 134f870fa0bSMat Martineau .accept = inet_csk_accept, 135f870fa0bSMat Martineau .connect = mptcp_connect, 136f870fa0bSMat Martineau .shutdown = tcp_shutdown, 137f870fa0bSMat Martineau .sendmsg = mptcp_sendmsg, 138f870fa0bSMat Martineau .recvmsg = mptcp_recvmsg, 139f870fa0bSMat Martineau .hash = inet_hash, 140f870fa0bSMat Martineau .unhash = inet_unhash, 141f870fa0bSMat Martineau .get_port = inet_csk_get_port, 142f870fa0bSMat Martineau .obj_size = sizeof(struct mptcp_sock), 143f870fa0bSMat Martineau .no_autobind = true, 144f870fa0bSMat Martineau }; 145f870fa0bSMat Martineau 146*2303f994SPeter Krystad static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 147*2303f994SPeter Krystad { 148*2303f994SPeter Krystad struct mptcp_sock *msk = mptcp_sk(sock->sk); 149*2303f994SPeter Krystad struct socket *ssock; 150*2303f994SPeter Krystad int err = -ENOTSUPP; 151*2303f994SPeter Krystad 152*2303f994SPeter Krystad if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now 153*2303f994SPeter Krystad return err; 154*2303f994SPeter Krystad 155*2303f994SPeter Krystad lock_sock(sock->sk); 156*2303f994SPeter Krystad ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); 157*2303f994SPeter Krystad if (IS_ERR(ssock)) { 158*2303f994SPeter Krystad err = PTR_ERR(ssock); 159*2303f994SPeter Krystad goto unlock; 160*2303f994SPeter Krystad } 161*2303f994SPeter Krystad 162*2303f994SPeter Krystad err = ssock->ops->bind(ssock, uaddr, addr_len); 163*2303f994SPeter Krystad 164*2303f994SPeter Krystad unlock: 165*2303f994SPeter Krystad release_sock(sock->sk); 166*2303f994SPeter Krystad return err; 167*2303f994SPeter Krystad } 168*2303f994SPeter Krystad 169*2303f994SPeter Krystad static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, 170*2303f994SPeter Krystad int addr_len, int flags) 171*2303f994SPeter Krystad { 172*2303f994SPeter Krystad struct mptcp_sock *msk = mptcp_sk(sock->sk); 173*2303f994SPeter Krystad struct socket *ssock; 174*2303f994SPeter Krystad int err; 175*2303f994SPeter Krystad 176*2303f994SPeter Krystad lock_sock(sock->sk); 177*2303f994SPeter Krystad ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); 178*2303f994SPeter Krystad if (IS_ERR(ssock)) { 179*2303f994SPeter Krystad err = PTR_ERR(ssock); 180*2303f994SPeter Krystad goto unlock; 181*2303f994SPeter Krystad } 182*2303f994SPeter Krystad 183*2303f994SPeter Krystad err = ssock->ops->connect(ssock, uaddr, addr_len, flags); 184*2303f994SPeter Krystad inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); 185*2303f994SPeter Krystad 186*2303f994SPeter Krystad unlock: 187*2303f994SPeter Krystad release_sock(sock->sk); 188*2303f994SPeter Krystad return err; 189*2303f994SPeter Krystad } 190*2303f994SPeter Krystad 191*2303f994SPeter Krystad static __poll_t mptcp_poll(struct file *file, struct socket *sock, 192*2303f994SPeter Krystad struct poll_table_struct *wait) 193*2303f994SPeter Krystad { 194*2303f994SPeter Krystad __poll_t mask = 0; 195*2303f994SPeter Krystad 196*2303f994SPeter Krystad return mask; 197*2303f994SPeter Krystad } 198*2303f994SPeter Krystad 199*2303f994SPeter Krystad static struct proto_ops mptcp_stream_ops; 200*2303f994SPeter Krystad 201f870fa0bSMat Martineau static struct inet_protosw mptcp_protosw = { 202f870fa0bSMat Martineau .type = SOCK_STREAM, 203f870fa0bSMat Martineau .protocol = IPPROTO_MPTCP, 204f870fa0bSMat Martineau .prot = &mptcp_prot, 205*2303f994SPeter Krystad .ops = &mptcp_stream_ops, 206*2303f994SPeter Krystad .flags = INET_PROTOSW_ICSK, 207f870fa0bSMat Martineau }; 208f870fa0bSMat Martineau 209f870fa0bSMat Martineau void __init mptcp_init(void) 210f870fa0bSMat Martineau { 211*2303f994SPeter Krystad mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo; 212*2303f994SPeter Krystad mptcp_stream_ops = inet_stream_ops; 213*2303f994SPeter Krystad mptcp_stream_ops.bind = mptcp_bind; 214*2303f994SPeter Krystad mptcp_stream_ops.connect = mptcp_stream_connect; 215*2303f994SPeter Krystad mptcp_stream_ops.poll = mptcp_poll; 216*2303f994SPeter Krystad 217*2303f994SPeter Krystad mptcp_subflow_init(); 218*2303f994SPeter Krystad 219f870fa0bSMat Martineau if (proto_register(&mptcp_prot, 1) != 0) 220f870fa0bSMat Martineau panic("Failed to register MPTCP proto.\n"); 221f870fa0bSMat Martineau 222f870fa0bSMat Martineau inet_register_protosw(&mptcp_protosw); 223f870fa0bSMat Martineau } 224f870fa0bSMat Martineau 225f870fa0bSMat Martineau #if IS_ENABLED(CONFIG_MPTCP_IPV6) 226*2303f994SPeter Krystad static struct proto_ops mptcp_v6_stream_ops; 227f870fa0bSMat Martineau static struct proto mptcp_v6_prot; 228f870fa0bSMat Martineau 229f870fa0bSMat Martineau static struct inet_protosw mptcp_v6_protosw = { 230f870fa0bSMat Martineau .type = SOCK_STREAM, 231f870fa0bSMat Martineau .protocol = IPPROTO_MPTCP, 232f870fa0bSMat Martineau .prot = &mptcp_v6_prot, 233*2303f994SPeter Krystad .ops = &mptcp_v6_stream_ops, 234f870fa0bSMat Martineau .flags = INET_PROTOSW_ICSK, 235f870fa0bSMat Martineau }; 236f870fa0bSMat Martineau 237f870fa0bSMat Martineau int mptcpv6_init(void) 238f870fa0bSMat Martineau { 239f870fa0bSMat Martineau int err; 240f870fa0bSMat Martineau 241f870fa0bSMat Martineau mptcp_v6_prot = mptcp_prot; 242f870fa0bSMat Martineau strcpy(mptcp_v6_prot.name, "MPTCPv6"); 243f870fa0bSMat Martineau mptcp_v6_prot.slab = NULL; 244f870fa0bSMat Martineau mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) + 245f870fa0bSMat Martineau sizeof(struct ipv6_pinfo); 246f870fa0bSMat Martineau 247f870fa0bSMat Martineau err = proto_register(&mptcp_v6_prot, 1); 248f870fa0bSMat Martineau if (err) 249f870fa0bSMat Martineau return err; 250f870fa0bSMat Martineau 251*2303f994SPeter Krystad mptcp_v6_stream_ops = inet6_stream_ops; 252*2303f994SPeter Krystad mptcp_v6_stream_ops.bind = mptcp_bind; 253*2303f994SPeter Krystad mptcp_v6_stream_ops.connect = mptcp_stream_connect; 254*2303f994SPeter Krystad mptcp_v6_stream_ops.poll = mptcp_poll; 255*2303f994SPeter Krystad 256f870fa0bSMat Martineau err = inet6_register_protosw(&mptcp_v6_protosw); 257f870fa0bSMat Martineau if (err) 258f870fa0bSMat Martineau proto_unregister(&mptcp_v6_prot); 259f870fa0bSMat Martineau 260f870fa0bSMat Martineau return err; 261f870fa0bSMat Martineau } 262f870fa0bSMat Martineau #endif 263