1eda7acddSPeter Krystad // SPDX-License-Identifier: GPL-2.0 2eda7acddSPeter Krystad /* Multipath TCP 3eda7acddSPeter Krystad * 4eda7acddSPeter Krystad * Copyright (c) 2017 - 2019, Intel Corporation. 5eda7acddSPeter Krystad */ 6eda7acddSPeter Krystad 7eda7acddSPeter Krystad #include <linux/kernel.h> 8eda7acddSPeter Krystad #include <net/tcp.h> 9eda7acddSPeter Krystad #include <net/mptcp.h> 10eda7acddSPeter Krystad #include "protocol.h" 11eda7acddSPeter Krystad 12eda7acddSPeter Krystad void mptcp_parse_option(const unsigned char *ptr, int opsize, 13eda7acddSPeter Krystad struct tcp_options_received *opt_rx) 14eda7acddSPeter Krystad { 15eda7acddSPeter Krystad struct mptcp_options_received *mp_opt = &opt_rx->mptcp; 16eda7acddSPeter Krystad u8 subtype = *ptr >> 4; 17eda7acddSPeter Krystad u8 version; 18eda7acddSPeter Krystad u8 flags; 19eda7acddSPeter Krystad 20eda7acddSPeter Krystad switch (subtype) { 21eda7acddSPeter Krystad case MPTCPOPT_MP_CAPABLE: 22eda7acddSPeter Krystad if (opsize != TCPOLEN_MPTCP_MPC_SYN && 23eda7acddSPeter Krystad opsize != TCPOLEN_MPTCP_MPC_ACK) 24eda7acddSPeter Krystad break; 25eda7acddSPeter Krystad 26eda7acddSPeter Krystad version = *ptr++ & MPTCP_VERSION_MASK; 27eda7acddSPeter Krystad if (version != MPTCP_SUPPORTED_VERSION) 28eda7acddSPeter Krystad break; 29eda7acddSPeter Krystad 30eda7acddSPeter Krystad flags = *ptr++; 31eda7acddSPeter Krystad if (!((flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA1) || 32eda7acddSPeter Krystad (flags & MPTCP_CAP_EXTENSIBILITY)) 33eda7acddSPeter Krystad break; 34eda7acddSPeter Krystad 35eda7acddSPeter Krystad /* RFC 6824, Section 3.1: 36eda7acddSPeter Krystad * "For the Checksum Required bit (labeled "A"), if either 37eda7acddSPeter Krystad * host requires the use of checksums, checksums MUST be used. 38eda7acddSPeter Krystad * In other words, the only way for checksums not to be used 39eda7acddSPeter Krystad * is if both hosts in their SYNs set A=0." 40eda7acddSPeter Krystad * 41eda7acddSPeter Krystad * Section 3.3.0: 42eda7acddSPeter Krystad * "If a checksum is not present when its use has been 43eda7acddSPeter Krystad * negotiated, the receiver MUST close the subflow with a RST as 44eda7acddSPeter Krystad * it is considered broken." 45eda7acddSPeter Krystad * 46eda7acddSPeter Krystad * We don't implement DSS checksum - fall back to TCP. 47eda7acddSPeter Krystad */ 48eda7acddSPeter Krystad if (flags & MPTCP_CAP_CHECKSUM_REQD) 49eda7acddSPeter Krystad break; 50eda7acddSPeter Krystad 51eda7acddSPeter Krystad mp_opt->mp_capable = 1; 52eda7acddSPeter Krystad mp_opt->sndr_key = get_unaligned_be64(ptr); 53eda7acddSPeter Krystad ptr += 8; 54eda7acddSPeter Krystad 55eda7acddSPeter Krystad if (opsize == TCPOLEN_MPTCP_MPC_ACK) { 56eda7acddSPeter Krystad mp_opt->rcvr_key = get_unaligned_be64(ptr); 57eda7acddSPeter Krystad ptr += 8; 58eda7acddSPeter Krystad pr_debug("MP_CAPABLE sndr=%llu, rcvr=%llu", 59eda7acddSPeter Krystad mp_opt->sndr_key, mp_opt->rcvr_key); 60eda7acddSPeter Krystad } else { 61eda7acddSPeter Krystad pr_debug("MP_CAPABLE sndr=%llu", mp_opt->sndr_key); 62eda7acddSPeter Krystad } 63eda7acddSPeter Krystad break; 64eda7acddSPeter Krystad 65eda7acddSPeter Krystad case MPTCPOPT_DSS: 66eda7acddSPeter Krystad pr_debug("DSS"); 67eda7acddSPeter Krystad mp_opt->dss = 1; 68eda7acddSPeter Krystad break; 69eda7acddSPeter Krystad 70eda7acddSPeter Krystad default: 71eda7acddSPeter Krystad break; 72eda7acddSPeter Krystad } 73eda7acddSPeter Krystad } 74eda7acddSPeter Krystad 75cec37a6eSPeter Krystad void mptcp_get_options(const struct sk_buff *skb, 76cec37a6eSPeter Krystad struct tcp_options_received *opt_rx) 77cec37a6eSPeter Krystad { 78cec37a6eSPeter Krystad const unsigned char *ptr; 79cec37a6eSPeter Krystad const struct tcphdr *th = tcp_hdr(skb); 80cec37a6eSPeter Krystad int length = (th->doff * 4) - sizeof(struct tcphdr); 81cec37a6eSPeter Krystad 82cec37a6eSPeter Krystad ptr = (const unsigned char *)(th + 1); 83cec37a6eSPeter Krystad 84cec37a6eSPeter Krystad while (length > 0) { 85cec37a6eSPeter Krystad int opcode = *ptr++; 86cec37a6eSPeter Krystad int opsize; 87cec37a6eSPeter Krystad 88cec37a6eSPeter Krystad switch (opcode) { 89cec37a6eSPeter Krystad case TCPOPT_EOL: 90cec37a6eSPeter Krystad return; 91cec37a6eSPeter Krystad case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 92cec37a6eSPeter Krystad length--; 93cec37a6eSPeter Krystad continue; 94cec37a6eSPeter Krystad default: 95cec37a6eSPeter Krystad opsize = *ptr++; 96cec37a6eSPeter Krystad if (opsize < 2) /* "silly options" */ 97cec37a6eSPeter Krystad return; 98cec37a6eSPeter Krystad if (opsize > length) 99cec37a6eSPeter Krystad return; /* don't parse partial options */ 100cec37a6eSPeter Krystad if (opcode == TCPOPT_MPTCP) 101cec37a6eSPeter Krystad mptcp_parse_option(ptr, opsize, opt_rx); 102cec37a6eSPeter Krystad ptr += opsize - 2; 103cec37a6eSPeter Krystad length -= opsize; 104cec37a6eSPeter Krystad } 105cec37a6eSPeter Krystad } 106cec37a6eSPeter Krystad } 107cec37a6eSPeter Krystad 108cec37a6eSPeter Krystad bool mptcp_syn_options(struct sock *sk, unsigned int *size, 109cec37a6eSPeter Krystad struct mptcp_out_options *opts) 110cec37a6eSPeter Krystad { 111cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 112cec37a6eSPeter Krystad 113cec37a6eSPeter Krystad if (subflow->request_mptcp) { 114cec37a6eSPeter Krystad pr_debug("local_key=%llu", subflow->local_key); 115cec37a6eSPeter Krystad opts->suboptions = OPTION_MPTCP_MPC_SYN; 116cec37a6eSPeter Krystad opts->sndr_key = subflow->local_key; 117cec37a6eSPeter Krystad *size = TCPOLEN_MPTCP_MPC_SYN; 118cec37a6eSPeter Krystad return true; 119cec37a6eSPeter Krystad } 120cec37a6eSPeter Krystad return false; 121cec37a6eSPeter Krystad } 122cec37a6eSPeter Krystad 123cec37a6eSPeter Krystad void mptcp_rcv_synsent(struct sock *sk) 124cec37a6eSPeter Krystad { 125cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 126cec37a6eSPeter Krystad struct tcp_sock *tp = tcp_sk(sk); 127cec37a6eSPeter Krystad 128cec37a6eSPeter Krystad pr_debug("subflow=%p", subflow); 129cec37a6eSPeter Krystad if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { 130cec37a6eSPeter Krystad subflow->mp_capable = 1; 131cec37a6eSPeter Krystad subflow->remote_key = tp->rx_opt.mptcp.sndr_key; 132cec37a6eSPeter Krystad } else { 133cec37a6eSPeter Krystad tcp_sk(sk)->is_mptcp = 0; 134cec37a6eSPeter Krystad } 135cec37a6eSPeter Krystad } 136cec37a6eSPeter Krystad 137*6d0060f6SMat Martineau static bool mptcp_established_options_mp(struct sock *sk, unsigned int *size, 138*6d0060f6SMat Martineau unsigned int remaining, 139cec37a6eSPeter Krystad struct mptcp_out_options *opts) 140cec37a6eSPeter Krystad { 141cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 142cec37a6eSPeter Krystad 143*6d0060f6SMat Martineau if (!subflow->fourth_ack) { 144cec37a6eSPeter Krystad opts->suboptions = OPTION_MPTCP_MPC_ACK; 145cec37a6eSPeter Krystad opts->sndr_key = subflow->local_key; 146cec37a6eSPeter Krystad opts->rcvr_key = subflow->remote_key; 147cec37a6eSPeter Krystad *size = TCPOLEN_MPTCP_MPC_ACK; 148cec37a6eSPeter Krystad subflow->fourth_ack = 1; 149cec37a6eSPeter Krystad pr_debug("subflow=%p, local_key=%llu, remote_key=%llu", 150cec37a6eSPeter Krystad subflow, subflow->local_key, subflow->remote_key); 151cec37a6eSPeter Krystad return true; 152cec37a6eSPeter Krystad } 153cec37a6eSPeter Krystad return false; 154cec37a6eSPeter Krystad } 155cec37a6eSPeter Krystad 156*6d0060f6SMat Martineau static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, 157*6d0060f6SMat Martineau struct mptcp_ext *ext) 158*6d0060f6SMat Martineau { 159*6d0060f6SMat Martineau ext->data_fin = 1; 160*6d0060f6SMat Martineau 161*6d0060f6SMat Martineau if (!ext->use_map) { 162*6d0060f6SMat Martineau /* RFC6824 requires a DSS mapping with specific values 163*6d0060f6SMat Martineau * if DATA_FIN is set but no data payload is mapped 164*6d0060f6SMat Martineau */ 165*6d0060f6SMat Martineau ext->use_map = 1; 166*6d0060f6SMat Martineau ext->dsn64 = 1; 167*6d0060f6SMat Martineau ext->data_seq = mptcp_sk(subflow->conn)->write_seq; 168*6d0060f6SMat Martineau ext->subflow_seq = 0; 169*6d0060f6SMat Martineau ext->data_len = 1; 170*6d0060f6SMat Martineau } else { 171*6d0060f6SMat Martineau /* If there's an existing DSS mapping, DATA_FIN consumes 172*6d0060f6SMat Martineau * 1 additional byte of mapping space. 173*6d0060f6SMat Martineau */ 174*6d0060f6SMat Martineau ext->data_len++; 175*6d0060f6SMat Martineau } 176*6d0060f6SMat Martineau } 177*6d0060f6SMat Martineau 178*6d0060f6SMat Martineau static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, 179*6d0060f6SMat Martineau unsigned int *size, 180*6d0060f6SMat Martineau unsigned int remaining, 181*6d0060f6SMat Martineau struct mptcp_out_options *opts) 182*6d0060f6SMat Martineau { 183*6d0060f6SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 184*6d0060f6SMat Martineau unsigned int dss_size = 0; 185*6d0060f6SMat Martineau struct mptcp_ext *mpext; 186*6d0060f6SMat Martineau struct mptcp_sock *msk; 187*6d0060f6SMat Martineau unsigned int ack_size; 188*6d0060f6SMat Martineau u8 tcp_fin; 189*6d0060f6SMat Martineau 190*6d0060f6SMat Martineau if (skb) { 191*6d0060f6SMat Martineau mpext = mptcp_get_ext(skb); 192*6d0060f6SMat Martineau tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 193*6d0060f6SMat Martineau } else { 194*6d0060f6SMat Martineau mpext = NULL; 195*6d0060f6SMat Martineau tcp_fin = 0; 196*6d0060f6SMat Martineau } 197*6d0060f6SMat Martineau 198*6d0060f6SMat Martineau if (!skb || (mpext && mpext->use_map) || tcp_fin) { 199*6d0060f6SMat Martineau unsigned int map_size; 200*6d0060f6SMat Martineau 201*6d0060f6SMat Martineau map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; 202*6d0060f6SMat Martineau 203*6d0060f6SMat Martineau remaining -= map_size; 204*6d0060f6SMat Martineau dss_size = map_size; 205*6d0060f6SMat Martineau if (mpext) 206*6d0060f6SMat Martineau opts->ext_copy = *mpext; 207*6d0060f6SMat Martineau 208*6d0060f6SMat Martineau if (skb && tcp_fin && 209*6d0060f6SMat Martineau subflow->conn->sk_state != TCP_ESTABLISHED) 210*6d0060f6SMat Martineau mptcp_write_data_fin(subflow, &opts->ext_copy); 211*6d0060f6SMat Martineau } 212*6d0060f6SMat Martineau 213*6d0060f6SMat Martineau ack_size = TCPOLEN_MPTCP_DSS_ACK64; 214*6d0060f6SMat Martineau 215*6d0060f6SMat Martineau /* Add kind/length/subtype/flag overhead if mapping is not populated */ 216*6d0060f6SMat Martineau if (dss_size == 0) 217*6d0060f6SMat Martineau ack_size += TCPOLEN_MPTCP_DSS_BASE; 218*6d0060f6SMat Martineau 219*6d0060f6SMat Martineau dss_size += ack_size; 220*6d0060f6SMat Martineau 221*6d0060f6SMat Martineau msk = mptcp_sk(mptcp_subflow_ctx(sk)->conn); 222*6d0060f6SMat Martineau if (msk) { 223*6d0060f6SMat Martineau opts->ext_copy.data_ack = msk->ack_seq; 224*6d0060f6SMat Martineau } else { 225*6d0060f6SMat Martineau mptcp_crypto_key_sha(mptcp_subflow_ctx(sk)->remote_key, 226*6d0060f6SMat Martineau NULL, &opts->ext_copy.data_ack); 227*6d0060f6SMat Martineau opts->ext_copy.data_ack++; 228*6d0060f6SMat Martineau } 229*6d0060f6SMat Martineau 230*6d0060f6SMat Martineau opts->ext_copy.ack64 = 1; 231*6d0060f6SMat Martineau opts->ext_copy.use_ack = 1; 232*6d0060f6SMat Martineau 233*6d0060f6SMat Martineau *size = ALIGN(dss_size, 4); 234*6d0060f6SMat Martineau return true; 235*6d0060f6SMat Martineau } 236*6d0060f6SMat Martineau 237*6d0060f6SMat Martineau bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, 238*6d0060f6SMat Martineau unsigned int *size, unsigned int remaining, 239*6d0060f6SMat Martineau struct mptcp_out_options *opts) 240*6d0060f6SMat Martineau { 241*6d0060f6SMat Martineau unsigned int opt_size = 0; 242*6d0060f6SMat Martineau bool ret = false; 243*6d0060f6SMat Martineau 244*6d0060f6SMat Martineau if (mptcp_established_options_mp(sk, &opt_size, remaining, opts)) 245*6d0060f6SMat Martineau ret = true; 246*6d0060f6SMat Martineau else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, 247*6d0060f6SMat Martineau opts)) 248*6d0060f6SMat Martineau ret = true; 249*6d0060f6SMat Martineau 250*6d0060f6SMat Martineau /* we reserved enough space for the above options, and exceeding the 251*6d0060f6SMat Martineau * TCP option space would be fatal 252*6d0060f6SMat Martineau */ 253*6d0060f6SMat Martineau if (WARN_ON_ONCE(opt_size > remaining)) 254*6d0060f6SMat Martineau return false; 255*6d0060f6SMat Martineau 256*6d0060f6SMat Martineau *size += opt_size; 257*6d0060f6SMat Martineau remaining -= opt_size; 258*6d0060f6SMat Martineau 259*6d0060f6SMat Martineau return ret; 260*6d0060f6SMat Martineau } 261*6d0060f6SMat Martineau 262cec37a6eSPeter Krystad bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, 263cec37a6eSPeter Krystad struct mptcp_out_options *opts) 264cec37a6eSPeter Krystad { 265cec37a6eSPeter Krystad struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 266cec37a6eSPeter Krystad 267cec37a6eSPeter Krystad if (subflow_req->mp_capable) { 268cec37a6eSPeter Krystad opts->suboptions = OPTION_MPTCP_MPC_SYNACK; 269cec37a6eSPeter Krystad opts->sndr_key = subflow_req->local_key; 270cec37a6eSPeter Krystad *size = TCPOLEN_MPTCP_MPC_SYNACK; 271cec37a6eSPeter Krystad pr_debug("subflow_req=%p, local_key=%llu", 272cec37a6eSPeter Krystad subflow_req, subflow_req->local_key); 273cec37a6eSPeter Krystad return true; 274cec37a6eSPeter Krystad } 275cec37a6eSPeter Krystad return false; 276cec37a6eSPeter Krystad } 277cec37a6eSPeter Krystad 278eda7acddSPeter Krystad void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) 279eda7acddSPeter Krystad { 280eda7acddSPeter Krystad if ((OPTION_MPTCP_MPC_SYN | 281cec37a6eSPeter Krystad OPTION_MPTCP_MPC_SYNACK | 282eda7acddSPeter Krystad OPTION_MPTCP_MPC_ACK) & opts->suboptions) { 283eda7acddSPeter Krystad u8 len; 284eda7acddSPeter Krystad 285eda7acddSPeter Krystad if (OPTION_MPTCP_MPC_SYN & opts->suboptions) 286eda7acddSPeter Krystad len = TCPOLEN_MPTCP_MPC_SYN; 287cec37a6eSPeter Krystad else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) 288cec37a6eSPeter Krystad len = TCPOLEN_MPTCP_MPC_SYNACK; 289eda7acddSPeter Krystad else 290eda7acddSPeter Krystad len = TCPOLEN_MPTCP_MPC_ACK; 291eda7acddSPeter Krystad 292eda7acddSPeter Krystad *ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) | 293eda7acddSPeter Krystad (MPTCPOPT_MP_CAPABLE << 12) | 294eda7acddSPeter Krystad (MPTCP_SUPPORTED_VERSION << 8) | 295eda7acddSPeter Krystad MPTCP_CAP_HMAC_SHA1); 296eda7acddSPeter Krystad put_unaligned_be64(opts->sndr_key, ptr); 297eda7acddSPeter Krystad ptr += 2; 298eda7acddSPeter Krystad if (OPTION_MPTCP_MPC_ACK & opts->suboptions) { 299eda7acddSPeter Krystad put_unaligned_be64(opts->rcvr_key, ptr); 300eda7acddSPeter Krystad ptr += 2; 301eda7acddSPeter Krystad } 302eda7acddSPeter Krystad } 303*6d0060f6SMat Martineau 304*6d0060f6SMat Martineau if (opts->ext_copy.use_ack || opts->ext_copy.use_map) { 305*6d0060f6SMat Martineau struct mptcp_ext *mpext = &opts->ext_copy; 306*6d0060f6SMat Martineau u8 len = TCPOLEN_MPTCP_DSS_BASE; 307*6d0060f6SMat Martineau u8 flags = 0; 308*6d0060f6SMat Martineau 309*6d0060f6SMat Martineau if (mpext->use_ack) { 310*6d0060f6SMat Martineau len += TCPOLEN_MPTCP_DSS_ACK64; 311*6d0060f6SMat Martineau flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; 312*6d0060f6SMat Martineau } 313*6d0060f6SMat Martineau 314*6d0060f6SMat Martineau if (mpext->use_map) { 315*6d0060f6SMat Martineau len += TCPOLEN_MPTCP_DSS_MAP64; 316*6d0060f6SMat Martineau 317*6d0060f6SMat Martineau /* Use only 64-bit mapping flags for now, add 318*6d0060f6SMat Martineau * support for optional 32-bit mappings later. 319*6d0060f6SMat Martineau */ 320*6d0060f6SMat Martineau flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; 321*6d0060f6SMat Martineau if (mpext->data_fin) 322*6d0060f6SMat Martineau flags |= MPTCP_DSS_DATA_FIN; 323*6d0060f6SMat Martineau } 324*6d0060f6SMat Martineau 325*6d0060f6SMat Martineau *ptr++ = htonl((TCPOPT_MPTCP << 24) | 326*6d0060f6SMat Martineau (len << 16) | 327*6d0060f6SMat Martineau (MPTCPOPT_DSS << 12) | 328*6d0060f6SMat Martineau (flags)); 329*6d0060f6SMat Martineau 330*6d0060f6SMat Martineau if (mpext->use_ack) { 331*6d0060f6SMat Martineau put_unaligned_be64(mpext->data_ack, ptr); 332*6d0060f6SMat Martineau ptr += 2; 333*6d0060f6SMat Martineau } 334*6d0060f6SMat Martineau 335*6d0060f6SMat Martineau if (mpext->use_map) { 336*6d0060f6SMat Martineau put_unaligned_be64(mpext->data_seq, ptr); 337*6d0060f6SMat Martineau ptr += 2; 338*6d0060f6SMat Martineau put_unaligned_be32(mpext->subflow_seq, ptr); 339*6d0060f6SMat Martineau ptr += 1; 340*6d0060f6SMat Martineau put_unaligned_be32(mpext->data_len << 16 | 341*6d0060f6SMat Martineau TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); 342*6d0060f6SMat Martineau } 343*6d0060f6SMat Martineau } 344eda7acddSPeter Krystad } 345