1f870fa0bSMat Martineau // SPDX-License-Identifier: GPL-2.0 2f870fa0bSMat Martineau /* Multipath TCP 3f870fa0bSMat Martineau * 4f870fa0bSMat Martineau * Copyright (c) 2017 - 2019, Intel Corporation. 5f870fa0bSMat Martineau */ 6f870fa0bSMat Martineau 7f870fa0bSMat Martineau #define pr_fmt(fmt) "MPTCP: " fmt 8f870fa0bSMat Martineau 9f870fa0bSMat Martineau #include <linux/kernel.h> 10f870fa0bSMat Martineau #include <linux/module.h> 11f870fa0bSMat Martineau #include <linux/netdevice.h> 12f870fa0bSMat Martineau #include <net/sock.h> 13f870fa0bSMat Martineau #include <net/inet_common.h> 14f870fa0bSMat Martineau #include <net/inet_hashtables.h> 15f870fa0bSMat Martineau #include <net/protocol.h> 16f870fa0bSMat Martineau #include <net/tcp.h> 17f870fa0bSMat Martineau #include <net/mptcp.h> 18f870fa0bSMat Martineau #include "protocol.h" 19f870fa0bSMat Martineau 202303f994SPeter Krystad #define MPTCP_SAME_STATE TCP_MAX_STATES 212303f994SPeter Krystad 222303f994SPeter Krystad /* If msk has an initial subflow socket, and the MP_CAPABLE handshake has not 232303f994SPeter Krystad * completed yet or has failed, return the subflow socket. 242303f994SPeter Krystad * Otherwise return NULL. 252303f994SPeter Krystad */ 262303f994SPeter Krystad static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) 272303f994SPeter Krystad { 28*cec37a6eSPeter Krystad if (!msk->subflow || mptcp_subflow_ctx(msk->subflow->sk)->fourth_ack) 292303f994SPeter Krystad return NULL; 302303f994SPeter Krystad 312303f994SPeter Krystad return msk->subflow; 322303f994SPeter Krystad } 332303f994SPeter Krystad 34*cec37a6eSPeter Krystad /* if msk has a single subflow, and the mp_capable handshake is failed, 35*cec37a6eSPeter Krystad * return it. 36*cec37a6eSPeter Krystad * Otherwise returns NULL 37*cec37a6eSPeter Krystad */ 38*cec37a6eSPeter Krystad static struct socket *__mptcp_tcp_fallback(const struct mptcp_sock *msk) 39*cec37a6eSPeter Krystad { 40*cec37a6eSPeter Krystad struct socket *ssock = __mptcp_nmpc_socket(msk); 41*cec37a6eSPeter Krystad 42*cec37a6eSPeter Krystad sock_owned_by_me((const struct sock *)msk); 43*cec37a6eSPeter Krystad 44*cec37a6eSPeter Krystad if (!ssock || sk_is_mptcp(ssock->sk)) 45*cec37a6eSPeter Krystad return NULL; 46*cec37a6eSPeter Krystad 47*cec37a6eSPeter Krystad return ssock; 48*cec37a6eSPeter Krystad } 49*cec37a6eSPeter Krystad 502303f994SPeter Krystad static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) 512303f994SPeter Krystad { 522303f994SPeter Krystad return ((struct sock *)msk)->sk_state == TCP_CLOSE; 532303f994SPeter Krystad } 542303f994SPeter Krystad 552303f994SPeter Krystad static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) 562303f994SPeter Krystad { 572303f994SPeter Krystad struct mptcp_subflow_context *subflow; 582303f994SPeter Krystad struct sock *sk = (struct sock *)msk; 592303f994SPeter Krystad struct socket *ssock; 602303f994SPeter Krystad int err; 612303f994SPeter Krystad 622303f994SPeter Krystad ssock = __mptcp_nmpc_socket(msk); 632303f994SPeter Krystad if (ssock) 642303f994SPeter Krystad goto set_state; 652303f994SPeter Krystad 662303f994SPeter Krystad if (!__mptcp_can_create_subflow(msk)) 672303f994SPeter Krystad return ERR_PTR(-EINVAL); 682303f994SPeter Krystad 692303f994SPeter Krystad err = mptcp_subflow_create_socket(sk, &ssock); 702303f994SPeter Krystad if (err) 712303f994SPeter Krystad return ERR_PTR(err); 722303f994SPeter Krystad 732303f994SPeter Krystad msk->subflow = ssock; 742303f994SPeter Krystad subflow = mptcp_subflow_ctx(ssock->sk); 75*cec37a6eSPeter Krystad list_add(&subflow->node, &msk->conn_list); 762303f994SPeter Krystad subflow->request_mptcp = 1; 772303f994SPeter Krystad 782303f994SPeter Krystad set_state: 792303f994SPeter Krystad if (state != MPTCP_SAME_STATE) 802303f994SPeter Krystad inet_sk_state_store(sk, state); 812303f994SPeter Krystad return ssock; 822303f994SPeter Krystad } 832303f994SPeter Krystad 84*cec37a6eSPeter Krystad static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk) 85*cec37a6eSPeter Krystad { 86*cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow; 87*cec37a6eSPeter Krystad 88*cec37a6eSPeter Krystad sock_owned_by_me((const struct sock *)msk); 89*cec37a6eSPeter Krystad 90*cec37a6eSPeter Krystad mptcp_for_each_subflow(msk, subflow) { 91*cec37a6eSPeter Krystad return mptcp_subflow_tcp_sock(subflow); 92*cec37a6eSPeter Krystad } 93*cec37a6eSPeter Krystad 94*cec37a6eSPeter Krystad return NULL; 95*cec37a6eSPeter Krystad } 96*cec37a6eSPeter Krystad 97f870fa0bSMat Martineau static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) 98f870fa0bSMat Martineau { 99f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 100*cec37a6eSPeter Krystad struct socket *ssock; 101*cec37a6eSPeter Krystad struct sock *ssk; 102*cec37a6eSPeter Krystad int ret; 103f870fa0bSMat Martineau 104f870fa0bSMat Martineau if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) 105f870fa0bSMat Martineau return -EOPNOTSUPP; 106f870fa0bSMat Martineau 107*cec37a6eSPeter Krystad lock_sock(sk); 108*cec37a6eSPeter Krystad ssock = __mptcp_tcp_fallback(msk); 109*cec37a6eSPeter Krystad if (ssock) { 110*cec37a6eSPeter Krystad pr_debug("fallback passthrough"); 111*cec37a6eSPeter Krystad ret = sock_sendmsg(ssock, msg); 112*cec37a6eSPeter Krystad release_sock(sk); 113*cec37a6eSPeter Krystad return ret; 114*cec37a6eSPeter Krystad } 115*cec37a6eSPeter Krystad 116*cec37a6eSPeter Krystad ssk = mptcp_subflow_get(msk); 117*cec37a6eSPeter Krystad if (!ssk) { 118*cec37a6eSPeter Krystad release_sock(sk); 119*cec37a6eSPeter Krystad return -ENOTCONN; 120*cec37a6eSPeter Krystad } 121*cec37a6eSPeter Krystad 122*cec37a6eSPeter Krystad ret = sock_sendmsg(ssk->sk_socket, msg); 123*cec37a6eSPeter Krystad 124*cec37a6eSPeter Krystad release_sock(sk); 125*cec37a6eSPeter Krystad return ret; 126f870fa0bSMat Martineau } 127f870fa0bSMat Martineau 128f870fa0bSMat Martineau static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 129f870fa0bSMat Martineau int nonblock, int flags, int *addr_len) 130f870fa0bSMat Martineau { 131f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 132*cec37a6eSPeter Krystad struct socket *ssock; 133*cec37a6eSPeter Krystad struct sock *ssk; 134*cec37a6eSPeter Krystad int copied = 0; 135f870fa0bSMat Martineau 136f870fa0bSMat Martineau if (msg->msg_flags & ~(MSG_WAITALL | MSG_DONTWAIT)) 137f870fa0bSMat Martineau return -EOPNOTSUPP; 138f870fa0bSMat Martineau 139*cec37a6eSPeter Krystad lock_sock(sk); 140*cec37a6eSPeter Krystad ssock = __mptcp_tcp_fallback(msk); 141*cec37a6eSPeter Krystad if (ssock) { 142*cec37a6eSPeter Krystad pr_debug("fallback-read subflow=%p", 143*cec37a6eSPeter Krystad mptcp_subflow_ctx(ssock->sk)); 144*cec37a6eSPeter Krystad copied = sock_recvmsg(ssock, msg, flags); 145*cec37a6eSPeter Krystad release_sock(sk); 146*cec37a6eSPeter Krystad return copied; 147*cec37a6eSPeter Krystad } 148*cec37a6eSPeter Krystad 149*cec37a6eSPeter Krystad ssk = mptcp_subflow_get(msk); 150*cec37a6eSPeter Krystad if (!ssk) { 151*cec37a6eSPeter Krystad release_sock(sk); 152*cec37a6eSPeter Krystad return -ENOTCONN; 153*cec37a6eSPeter Krystad } 154*cec37a6eSPeter Krystad 155*cec37a6eSPeter Krystad copied = sock_recvmsg(ssk->sk_socket, msg, flags); 156*cec37a6eSPeter Krystad 157*cec37a6eSPeter Krystad release_sock(sk); 158*cec37a6eSPeter Krystad 159*cec37a6eSPeter Krystad return copied; 160*cec37a6eSPeter Krystad } 161*cec37a6eSPeter Krystad 162*cec37a6eSPeter Krystad /* subflow sockets can be either outgoing (connect) or incoming 163*cec37a6eSPeter Krystad * (accept). 164*cec37a6eSPeter Krystad * 165*cec37a6eSPeter Krystad * Outgoing subflows use in-kernel sockets. 166*cec37a6eSPeter Krystad * Incoming subflows do not have their own 'struct socket' allocated, 167*cec37a6eSPeter Krystad * so we need to use tcp_close() after detaching them from the mptcp 168*cec37a6eSPeter Krystad * parent socket. 169*cec37a6eSPeter Krystad */ 170*cec37a6eSPeter Krystad static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, 171*cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow, 172*cec37a6eSPeter Krystad long timeout) 173*cec37a6eSPeter Krystad { 174*cec37a6eSPeter Krystad struct socket *sock = READ_ONCE(ssk->sk_socket); 175*cec37a6eSPeter Krystad 176*cec37a6eSPeter Krystad list_del(&subflow->node); 177*cec37a6eSPeter Krystad 178*cec37a6eSPeter Krystad if (sock && sock != sk->sk_socket) { 179*cec37a6eSPeter Krystad /* outgoing subflow */ 180*cec37a6eSPeter Krystad sock_release(sock); 181*cec37a6eSPeter Krystad } else { 182*cec37a6eSPeter Krystad /* incoming subflow */ 183*cec37a6eSPeter Krystad tcp_close(ssk, timeout); 184*cec37a6eSPeter Krystad } 185f870fa0bSMat Martineau } 186f870fa0bSMat Martineau 187f870fa0bSMat Martineau static int mptcp_init_sock(struct sock *sk) 188f870fa0bSMat Martineau { 189*cec37a6eSPeter Krystad struct mptcp_sock *msk = mptcp_sk(sk); 190*cec37a6eSPeter Krystad 191*cec37a6eSPeter Krystad INIT_LIST_HEAD(&msk->conn_list); 192*cec37a6eSPeter Krystad 193f870fa0bSMat Martineau return 0; 194f870fa0bSMat Martineau } 195f870fa0bSMat Martineau 196f870fa0bSMat Martineau static void mptcp_close(struct sock *sk, long timeout) 197f870fa0bSMat Martineau { 198*cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow, *tmp; 199f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 200f870fa0bSMat Martineau 201f870fa0bSMat Martineau inet_sk_state_store(sk, TCP_CLOSE); 202f870fa0bSMat Martineau 203*cec37a6eSPeter Krystad lock_sock(sk); 204*cec37a6eSPeter Krystad 205*cec37a6eSPeter Krystad list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { 206*cec37a6eSPeter Krystad struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 207*cec37a6eSPeter Krystad 208*cec37a6eSPeter Krystad __mptcp_close_ssk(sk, ssk, subflow, timeout); 209f870fa0bSMat Martineau } 210f870fa0bSMat Martineau 211*cec37a6eSPeter Krystad release_sock(sk); 212*cec37a6eSPeter Krystad sk_common_release(sk); 213f870fa0bSMat Martineau } 214f870fa0bSMat Martineau 215*cec37a6eSPeter Krystad static int mptcp_get_port(struct sock *sk, unsigned short snum) 216f870fa0bSMat Martineau { 217f870fa0bSMat Martineau struct mptcp_sock *msk = mptcp_sk(sk); 218*cec37a6eSPeter Krystad struct socket *ssock; 219f870fa0bSMat Martineau 220*cec37a6eSPeter Krystad ssock = __mptcp_nmpc_socket(msk); 221*cec37a6eSPeter Krystad pr_debug("msk=%p, subflow=%p", msk, ssock); 222*cec37a6eSPeter Krystad if (WARN_ON_ONCE(!ssock)) 223*cec37a6eSPeter Krystad return -EINVAL; 224f870fa0bSMat Martineau 225*cec37a6eSPeter Krystad return inet_csk_get_port(ssock->sk, snum); 226*cec37a6eSPeter Krystad } 227f870fa0bSMat Martineau 228*cec37a6eSPeter Krystad void mptcp_finish_connect(struct sock *ssk) 229*cec37a6eSPeter Krystad { 230*cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow; 231*cec37a6eSPeter Krystad struct mptcp_sock *msk; 232*cec37a6eSPeter Krystad struct sock *sk; 233f870fa0bSMat Martineau 234*cec37a6eSPeter Krystad subflow = mptcp_subflow_ctx(ssk); 235f870fa0bSMat Martineau 236*cec37a6eSPeter Krystad if (!subflow->mp_capable) 237*cec37a6eSPeter Krystad return; 238*cec37a6eSPeter Krystad 239*cec37a6eSPeter Krystad sk = subflow->conn; 240*cec37a6eSPeter Krystad msk = mptcp_sk(sk); 241*cec37a6eSPeter Krystad 242*cec37a6eSPeter Krystad /* the socket is not connected yet, no msk/subflow ops can access/race 243*cec37a6eSPeter Krystad * accessing the field below 244*cec37a6eSPeter Krystad */ 245*cec37a6eSPeter Krystad WRITE_ONCE(msk->remote_key, subflow->remote_key); 246*cec37a6eSPeter Krystad WRITE_ONCE(msk->local_key, subflow->local_key); 247f870fa0bSMat Martineau } 248f870fa0bSMat Martineau 249f870fa0bSMat Martineau static struct proto mptcp_prot = { 250f870fa0bSMat Martineau .name = "MPTCP", 251f870fa0bSMat Martineau .owner = THIS_MODULE, 252f870fa0bSMat Martineau .init = mptcp_init_sock, 253f870fa0bSMat Martineau .close = mptcp_close, 254f870fa0bSMat Martineau .accept = inet_csk_accept, 255f870fa0bSMat Martineau .shutdown = tcp_shutdown, 256f870fa0bSMat Martineau .sendmsg = mptcp_sendmsg, 257f870fa0bSMat Martineau .recvmsg = mptcp_recvmsg, 258f870fa0bSMat Martineau .hash = inet_hash, 259f870fa0bSMat Martineau .unhash = inet_unhash, 260*cec37a6eSPeter Krystad .get_port = mptcp_get_port, 261f870fa0bSMat Martineau .obj_size = sizeof(struct mptcp_sock), 262f870fa0bSMat Martineau .no_autobind = true, 263f870fa0bSMat Martineau }; 264f870fa0bSMat Martineau 2652303f994SPeter Krystad static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 2662303f994SPeter Krystad { 2672303f994SPeter Krystad struct mptcp_sock *msk = mptcp_sk(sock->sk); 2682303f994SPeter Krystad struct socket *ssock; 2692303f994SPeter Krystad int err = -ENOTSUPP; 2702303f994SPeter Krystad 2712303f994SPeter Krystad if (uaddr->sa_family != AF_INET) // @@ allow only IPv4 for now 2722303f994SPeter Krystad return err; 2732303f994SPeter Krystad 2742303f994SPeter Krystad lock_sock(sock->sk); 2752303f994SPeter Krystad ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); 2762303f994SPeter Krystad if (IS_ERR(ssock)) { 2772303f994SPeter Krystad err = PTR_ERR(ssock); 2782303f994SPeter Krystad goto unlock; 2792303f994SPeter Krystad } 2802303f994SPeter Krystad 2812303f994SPeter Krystad err = ssock->ops->bind(ssock, uaddr, addr_len); 2822303f994SPeter Krystad 2832303f994SPeter Krystad unlock: 2842303f994SPeter Krystad release_sock(sock->sk); 2852303f994SPeter Krystad return err; 2862303f994SPeter Krystad } 2872303f994SPeter Krystad 2882303f994SPeter Krystad static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, 2892303f994SPeter Krystad int addr_len, int flags) 2902303f994SPeter Krystad { 2912303f994SPeter Krystad struct mptcp_sock *msk = mptcp_sk(sock->sk); 2922303f994SPeter Krystad struct socket *ssock; 2932303f994SPeter Krystad int err; 2942303f994SPeter Krystad 2952303f994SPeter Krystad lock_sock(sock->sk); 2962303f994SPeter Krystad ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); 2972303f994SPeter Krystad if (IS_ERR(ssock)) { 2982303f994SPeter Krystad err = PTR_ERR(ssock); 2992303f994SPeter Krystad goto unlock; 3002303f994SPeter Krystad } 3012303f994SPeter Krystad 3022303f994SPeter Krystad err = ssock->ops->connect(ssock, uaddr, addr_len, flags); 3032303f994SPeter Krystad inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); 3042303f994SPeter Krystad 3052303f994SPeter Krystad unlock: 3062303f994SPeter Krystad release_sock(sock->sk); 3072303f994SPeter Krystad return err; 3082303f994SPeter Krystad } 3092303f994SPeter Krystad 3102303f994SPeter Krystad static __poll_t mptcp_poll(struct file *file, struct socket *sock, 3112303f994SPeter Krystad struct poll_table_struct *wait) 3122303f994SPeter Krystad { 3132303f994SPeter Krystad __poll_t mask = 0; 3142303f994SPeter Krystad 3152303f994SPeter Krystad return mask; 3162303f994SPeter Krystad } 3172303f994SPeter Krystad 3182303f994SPeter Krystad static struct proto_ops mptcp_stream_ops; 3192303f994SPeter Krystad 320f870fa0bSMat Martineau static struct inet_protosw mptcp_protosw = { 321f870fa0bSMat Martineau .type = SOCK_STREAM, 322f870fa0bSMat Martineau .protocol = IPPROTO_MPTCP, 323f870fa0bSMat Martineau .prot = &mptcp_prot, 3242303f994SPeter Krystad .ops = &mptcp_stream_ops, 3252303f994SPeter Krystad .flags = INET_PROTOSW_ICSK, 326f870fa0bSMat Martineau }; 327f870fa0bSMat Martineau 328f870fa0bSMat Martineau void __init mptcp_init(void) 329f870fa0bSMat Martineau { 3302303f994SPeter Krystad mptcp_prot.h.hashinfo = tcp_prot.h.hashinfo; 3312303f994SPeter Krystad mptcp_stream_ops = inet_stream_ops; 3322303f994SPeter Krystad mptcp_stream_ops.bind = mptcp_bind; 3332303f994SPeter Krystad mptcp_stream_ops.connect = mptcp_stream_connect; 3342303f994SPeter Krystad mptcp_stream_ops.poll = mptcp_poll; 3352303f994SPeter Krystad 3362303f994SPeter Krystad mptcp_subflow_init(); 3372303f994SPeter Krystad 338f870fa0bSMat Martineau if (proto_register(&mptcp_prot, 1) != 0) 339f870fa0bSMat Martineau panic("Failed to register MPTCP proto.\n"); 340f870fa0bSMat Martineau 341f870fa0bSMat Martineau inet_register_protosw(&mptcp_protosw); 342f870fa0bSMat Martineau } 343f870fa0bSMat Martineau 344f870fa0bSMat Martineau #if IS_ENABLED(CONFIG_MPTCP_IPV6) 3452303f994SPeter Krystad static struct proto_ops mptcp_v6_stream_ops; 346f870fa0bSMat Martineau static struct proto mptcp_v6_prot; 347f870fa0bSMat Martineau 348f870fa0bSMat Martineau static struct inet_protosw mptcp_v6_protosw = { 349f870fa0bSMat Martineau .type = SOCK_STREAM, 350f870fa0bSMat Martineau .protocol = IPPROTO_MPTCP, 351f870fa0bSMat Martineau .prot = &mptcp_v6_prot, 3522303f994SPeter Krystad .ops = &mptcp_v6_stream_ops, 353f870fa0bSMat Martineau .flags = INET_PROTOSW_ICSK, 354f870fa0bSMat Martineau }; 355f870fa0bSMat Martineau 356f870fa0bSMat Martineau int mptcpv6_init(void) 357f870fa0bSMat Martineau { 358f870fa0bSMat Martineau int err; 359f870fa0bSMat Martineau 360f870fa0bSMat Martineau mptcp_v6_prot = mptcp_prot; 361f870fa0bSMat Martineau strcpy(mptcp_v6_prot.name, "MPTCPv6"); 362f870fa0bSMat Martineau mptcp_v6_prot.slab = NULL; 363f870fa0bSMat Martineau mptcp_v6_prot.obj_size = sizeof(struct mptcp_sock) + 364f870fa0bSMat Martineau sizeof(struct ipv6_pinfo); 365f870fa0bSMat Martineau 366f870fa0bSMat Martineau err = proto_register(&mptcp_v6_prot, 1); 367f870fa0bSMat Martineau if (err) 368f870fa0bSMat Martineau return err; 369f870fa0bSMat Martineau 3702303f994SPeter Krystad mptcp_v6_stream_ops = inet6_stream_ops; 3712303f994SPeter Krystad mptcp_v6_stream_ops.bind = mptcp_bind; 3722303f994SPeter Krystad mptcp_v6_stream_ops.connect = mptcp_stream_connect; 3732303f994SPeter Krystad mptcp_v6_stream_ops.poll = mptcp_poll; 3742303f994SPeter Krystad 375f870fa0bSMat Martineau err = inet6_register_protosw(&mptcp_v6_protosw); 376f870fa0bSMat Martineau if (err) 377f870fa0bSMat Martineau proto_unregister(&mptcp_v6_prot); 378f870fa0bSMat Martineau 379f870fa0bSMat Martineau return err; 380f870fa0bSMat Martineau } 381f870fa0bSMat Martineau #endif 382