1eda7acddSPeter Krystad // SPDX-License-Identifier: GPL-2.0 2eda7acddSPeter Krystad /* Multipath TCP 3eda7acddSPeter Krystad * 4eda7acddSPeter Krystad * Copyright (c) 2017 - 2019, Intel Corporation. 5eda7acddSPeter Krystad */ 6eda7acddSPeter Krystad 7eda7acddSPeter Krystad #include <linux/kernel.h> 8eda7acddSPeter Krystad #include <net/tcp.h> 9eda7acddSPeter Krystad #include <net/mptcp.h> 10eda7acddSPeter Krystad #include "protocol.h" 11eda7acddSPeter Krystad 1265492c5aSPaolo Abeni static bool mptcp_cap_flag_sha256(u8 flags) 1365492c5aSPaolo Abeni { 1465492c5aSPaolo Abeni return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; 1565492c5aSPaolo Abeni } 1665492c5aSPaolo Abeni 17cc7972eaSChristoph Paasch void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, 18cc7972eaSChristoph Paasch int opsize, struct tcp_options_received *opt_rx) 19eda7acddSPeter Krystad { 20eda7acddSPeter Krystad struct mptcp_options_received *mp_opt = &opt_rx->mptcp; 21eda7acddSPeter Krystad u8 subtype = *ptr >> 4; 22648ef4b8SMat Martineau int expected_opsize; 23eda7acddSPeter Krystad u8 version; 24eda7acddSPeter Krystad u8 flags; 25eda7acddSPeter Krystad 26eda7acddSPeter Krystad switch (subtype) { 27eda7acddSPeter Krystad case MPTCPOPT_MP_CAPABLE: 28cc7972eaSChristoph Paasch /* strict size checking */ 29cc7972eaSChristoph Paasch if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 30cc7972eaSChristoph Paasch if (skb->len > tcp_hdr(skb)->doff << 2) 31cc7972eaSChristoph Paasch expected_opsize = TCPOLEN_MPTCP_MPC_ACK_DATA; 32cc7972eaSChristoph Paasch else 33cc7972eaSChristoph Paasch expected_opsize = TCPOLEN_MPTCP_MPC_ACK; 34cc7972eaSChristoph Paasch } else { 35cc7972eaSChristoph Paasch if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) 36cc7972eaSChristoph Paasch expected_opsize = TCPOLEN_MPTCP_MPC_SYNACK; 37cc7972eaSChristoph Paasch else 38cc7972eaSChristoph Paasch expected_opsize = TCPOLEN_MPTCP_MPC_SYN; 39cc7972eaSChristoph Paasch } 40cc7972eaSChristoph Paasch if (opsize != expected_opsize) 41eda7acddSPeter Krystad break; 42eda7acddSPeter Krystad 43cc7972eaSChristoph Paasch /* try to be gentle vs future versions on the initial syn */ 44eda7acddSPeter Krystad version = *ptr++ & MPTCP_VERSION_MASK; 45cc7972eaSChristoph Paasch if (opsize != TCPOLEN_MPTCP_MPC_SYN) { 46eda7acddSPeter Krystad if (version != MPTCP_SUPPORTED_VERSION) 47eda7acddSPeter Krystad break; 48cc7972eaSChristoph Paasch } else if (version < MPTCP_SUPPORTED_VERSION) { 49cc7972eaSChristoph Paasch break; 50cc7972eaSChristoph Paasch } 51eda7acddSPeter Krystad 52eda7acddSPeter Krystad flags = *ptr++; 5365492c5aSPaolo Abeni if (!mptcp_cap_flag_sha256(flags) || 54eda7acddSPeter Krystad (flags & MPTCP_CAP_EXTENSIBILITY)) 55eda7acddSPeter Krystad break; 56eda7acddSPeter Krystad 57eda7acddSPeter Krystad /* RFC 6824, Section 3.1: 58eda7acddSPeter Krystad * "For the Checksum Required bit (labeled "A"), if either 59eda7acddSPeter Krystad * host requires the use of checksums, checksums MUST be used. 60eda7acddSPeter Krystad * In other words, the only way for checksums not to be used 61eda7acddSPeter Krystad * is if both hosts in their SYNs set A=0." 62eda7acddSPeter Krystad * 63eda7acddSPeter Krystad * Section 3.3.0: 64eda7acddSPeter Krystad * "If a checksum is not present when its use has been 65eda7acddSPeter Krystad * negotiated, the receiver MUST close the subflow with a RST as 66eda7acddSPeter Krystad * it is considered broken." 67eda7acddSPeter Krystad * 68eda7acddSPeter Krystad * We don't implement DSS checksum - fall back to TCP. 69eda7acddSPeter Krystad */ 70eda7acddSPeter Krystad if (flags & MPTCP_CAP_CHECKSUM_REQD) 71eda7acddSPeter Krystad break; 72eda7acddSPeter Krystad 73eda7acddSPeter Krystad mp_opt->mp_capable = 1; 74cc7972eaSChristoph Paasch if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { 75eda7acddSPeter Krystad mp_opt->sndr_key = get_unaligned_be64(ptr); 76eda7acddSPeter Krystad ptr += 8; 77cc7972eaSChristoph Paasch } 78cc7972eaSChristoph Paasch if (opsize >= TCPOLEN_MPTCP_MPC_ACK) { 79eda7acddSPeter Krystad mp_opt->rcvr_key = get_unaligned_be64(ptr); 80eda7acddSPeter Krystad ptr += 8; 81eda7acddSPeter Krystad } 82cc7972eaSChristoph Paasch if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { 83cc7972eaSChristoph Paasch /* Section 3.1.: 84cc7972eaSChristoph Paasch * "the data parameters in a MP_CAPABLE are semantically 85cc7972eaSChristoph Paasch * equivalent to those in a DSS option and can be used 86cc7972eaSChristoph Paasch * interchangeably." 87cc7972eaSChristoph Paasch */ 88cc7972eaSChristoph Paasch mp_opt->dss = 1; 89cc7972eaSChristoph Paasch mp_opt->use_map = 1; 90cc7972eaSChristoph Paasch mp_opt->mpc_map = 1; 91cc7972eaSChristoph Paasch mp_opt->data_len = get_unaligned_be16(ptr); 92cc7972eaSChristoph Paasch ptr += 2; 93cc7972eaSChristoph Paasch } 94cc7972eaSChristoph Paasch pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", 95cc7972eaSChristoph Paasch version, flags, opsize, mp_opt->sndr_key, 96cc7972eaSChristoph Paasch mp_opt->rcvr_key, mp_opt->data_len); 97eda7acddSPeter Krystad break; 98eda7acddSPeter Krystad 99eda7acddSPeter Krystad case MPTCPOPT_DSS: 100eda7acddSPeter Krystad pr_debug("DSS"); 101648ef4b8SMat Martineau ptr++; 102648ef4b8SMat Martineau 103cc7972eaSChristoph Paasch /* we must clear 'mpc_map' be able to detect MP_CAPABLE 104cc7972eaSChristoph Paasch * map vs DSS map in mptcp_incoming_options(), and reconstruct 105cc7972eaSChristoph Paasch * map info accordingly 106cc7972eaSChristoph Paasch */ 107cc7972eaSChristoph Paasch mp_opt->mpc_map = 0; 108648ef4b8SMat Martineau flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; 109648ef4b8SMat Martineau mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; 110648ef4b8SMat Martineau mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; 111648ef4b8SMat Martineau mp_opt->use_map = (flags & MPTCP_DSS_HAS_MAP) != 0; 112648ef4b8SMat Martineau mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; 113648ef4b8SMat Martineau mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); 114648ef4b8SMat Martineau 115648ef4b8SMat Martineau pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d", 116648ef4b8SMat Martineau mp_opt->data_fin, mp_opt->dsn64, 117648ef4b8SMat Martineau mp_opt->use_map, mp_opt->ack64, 118648ef4b8SMat Martineau mp_opt->use_ack); 119648ef4b8SMat Martineau 120648ef4b8SMat Martineau expected_opsize = TCPOLEN_MPTCP_DSS_BASE; 121648ef4b8SMat Martineau 122648ef4b8SMat Martineau if (mp_opt->use_ack) { 123648ef4b8SMat Martineau if (mp_opt->ack64) 124648ef4b8SMat Martineau expected_opsize += TCPOLEN_MPTCP_DSS_ACK64; 125648ef4b8SMat Martineau else 126648ef4b8SMat Martineau expected_opsize += TCPOLEN_MPTCP_DSS_ACK32; 127648ef4b8SMat Martineau } 128648ef4b8SMat Martineau 129648ef4b8SMat Martineau if (mp_opt->use_map) { 130648ef4b8SMat Martineau if (mp_opt->dsn64) 131648ef4b8SMat Martineau expected_opsize += TCPOLEN_MPTCP_DSS_MAP64; 132648ef4b8SMat Martineau else 133648ef4b8SMat Martineau expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; 134648ef4b8SMat Martineau } 135648ef4b8SMat Martineau 136648ef4b8SMat Martineau /* RFC 6824, Section 3.3: 137648ef4b8SMat Martineau * If a checksum is present, but its use had 138648ef4b8SMat Martineau * not been negotiated in the MP_CAPABLE handshake, 139648ef4b8SMat Martineau * the checksum field MUST be ignored. 140648ef4b8SMat Martineau */ 141648ef4b8SMat Martineau if (opsize != expected_opsize && 142648ef4b8SMat Martineau opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) 143648ef4b8SMat Martineau break; 144648ef4b8SMat Martineau 145eda7acddSPeter Krystad mp_opt->dss = 1; 146648ef4b8SMat Martineau 147648ef4b8SMat Martineau if (mp_opt->use_ack) { 148648ef4b8SMat Martineau if (mp_opt->ack64) { 149648ef4b8SMat Martineau mp_opt->data_ack = get_unaligned_be64(ptr); 150648ef4b8SMat Martineau ptr += 8; 151648ef4b8SMat Martineau } else { 152648ef4b8SMat Martineau mp_opt->data_ack = get_unaligned_be32(ptr); 153648ef4b8SMat Martineau ptr += 4; 154648ef4b8SMat Martineau } 155648ef4b8SMat Martineau 156648ef4b8SMat Martineau pr_debug("data_ack=%llu", mp_opt->data_ack); 157648ef4b8SMat Martineau } 158648ef4b8SMat Martineau 159648ef4b8SMat Martineau if (mp_opt->use_map) { 160648ef4b8SMat Martineau if (mp_opt->dsn64) { 161648ef4b8SMat Martineau mp_opt->data_seq = get_unaligned_be64(ptr); 162648ef4b8SMat Martineau ptr += 8; 163648ef4b8SMat Martineau } else { 164648ef4b8SMat Martineau mp_opt->data_seq = get_unaligned_be32(ptr); 165648ef4b8SMat Martineau ptr += 4; 166648ef4b8SMat Martineau } 167648ef4b8SMat Martineau 168648ef4b8SMat Martineau mp_opt->subflow_seq = get_unaligned_be32(ptr); 169648ef4b8SMat Martineau ptr += 4; 170648ef4b8SMat Martineau 171648ef4b8SMat Martineau mp_opt->data_len = get_unaligned_be16(ptr); 172648ef4b8SMat Martineau ptr += 2; 173648ef4b8SMat Martineau 174648ef4b8SMat Martineau pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", 175648ef4b8SMat Martineau mp_opt->data_seq, mp_opt->subflow_seq, 176648ef4b8SMat Martineau mp_opt->data_len); 177648ef4b8SMat Martineau } 178648ef4b8SMat Martineau 179eda7acddSPeter Krystad break; 180eda7acddSPeter Krystad 181eda7acddSPeter Krystad default: 182eda7acddSPeter Krystad break; 183eda7acddSPeter Krystad } 184eda7acddSPeter Krystad } 185eda7acddSPeter Krystad 186cec37a6eSPeter Krystad void mptcp_get_options(const struct sk_buff *skb, 187cec37a6eSPeter Krystad struct tcp_options_received *opt_rx) 188cec37a6eSPeter Krystad { 189cec37a6eSPeter Krystad const unsigned char *ptr; 190cec37a6eSPeter Krystad const struct tcphdr *th = tcp_hdr(skb); 191cec37a6eSPeter Krystad int length = (th->doff * 4) - sizeof(struct tcphdr); 192cec37a6eSPeter Krystad 193cec37a6eSPeter Krystad ptr = (const unsigned char *)(th + 1); 194cec37a6eSPeter Krystad 195cec37a6eSPeter Krystad while (length > 0) { 196cec37a6eSPeter Krystad int opcode = *ptr++; 197cec37a6eSPeter Krystad int opsize; 198cec37a6eSPeter Krystad 199cec37a6eSPeter Krystad switch (opcode) { 200cec37a6eSPeter Krystad case TCPOPT_EOL: 201cec37a6eSPeter Krystad return; 202cec37a6eSPeter Krystad case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ 203cec37a6eSPeter Krystad length--; 204cec37a6eSPeter Krystad continue; 205cec37a6eSPeter Krystad default: 206cec37a6eSPeter Krystad opsize = *ptr++; 207cec37a6eSPeter Krystad if (opsize < 2) /* "silly options" */ 208cec37a6eSPeter Krystad return; 209cec37a6eSPeter Krystad if (opsize > length) 210cec37a6eSPeter Krystad return; /* don't parse partial options */ 211cec37a6eSPeter Krystad if (opcode == TCPOPT_MPTCP) 212cc7972eaSChristoph Paasch mptcp_parse_option(skb, ptr, opsize, opt_rx); 213cec37a6eSPeter Krystad ptr += opsize - 2; 214cec37a6eSPeter Krystad length -= opsize; 215cec37a6eSPeter Krystad } 216cec37a6eSPeter Krystad } 217cec37a6eSPeter Krystad } 218cec37a6eSPeter Krystad 219cc7972eaSChristoph Paasch bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, 220cc7972eaSChristoph Paasch unsigned int *size, struct mptcp_out_options *opts) 221cec37a6eSPeter Krystad { 222cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 223cec37a6eSPeter Krystad 224cc7972eaSChristoph Paasch /* we will use snd_isn to detect first pkt [re]transmission 225cc7972eaSChristoph Paasch * in mptcp_established_options_mp() 226cc7972eaSChristoph Paasch */ 227cc7972eaSChristoph Paasch subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; 228cec37a6eSPeter Krystad if (subflow->request_mptcp) { 229cec37a6eSPeter Krystad pr_debug("local_key=%llu", subflow->local_key); 230cec37a6eSPeter Krystad opts->suboptions = OPTION_MPTCP_MPC_SYN; 231cec37a6eSPeter Krystad opts->sndr_key = subflow->local_key; 232cec37a6eSPeter Krystad *size = TCPOLEN_MPTCP_MPC_SYN; 233cec37a6eSPeter Krystad return true; 234cec37a6eSPeter Krystad } 235cec37a6eSPeter Krystad return false; 236cec37a6eSPeter Krystad } 237cec37a6eSPeter Krystad 238cec37a6eSPeter Krystad void mptcp_rcv_synsent(struct sock *sk) 239cec37a6eSPeter Krystad { 240cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 241cec37a6eSPeter Krystad struct tcp_sock *tp = tcp_sk(sk); 242cec37a6eSPeter Krystad 243cec37a6eSPeter Krystad pr_debug("subflow=%p", subflow); 244cec37a6eSPeter Krystad if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { 245cec37a6eSPeter Krystad subflow->mp_capable = 1; 246d22f4988SChristoph Paasch subflow->can_ack = 1; 247cec37a6eSPeter Krystad subflow->remote_key = tp->rx_opt.mptcp.sndr_key; 248cec37a6eSPeter Krystad } else { 249cec37a6eSPeter Krystad tcp_sk(sk)->is_mptcp = 0; 250cec37a6eSPeter Krystad } 251cec37a6eSPeter Krystad } 252cec37a6eSPeter Krystad 253cc7972eaSChristoph Paasch static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, 254cc7972eaSChristoph Paasch unsigned int *size, 2556d0060f6SMat Martineau unsigned int remaining, 256cec37a6eSPeter Krystad struct mptcp_out_options *opts) 257cec37a6eSPeter Krystad { 258cec37a6eSPeter Krystad struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 259cc7972eaSChristoph Paasch struct mptcp_ext *mpext; 260cc7972eaSChristoph Paasch unsigned int data_len; 261cec37a6eSPeter Krystad 262cc7972eaSChristoph Paasch pr_debug("subflow=%p fourth_ack=%d seq=%x:%x remaining=%d", subflow, 263cc7972eaSChristoph Paasch subflow->fourth_ack, subflow->snd_isn, 264cc7972eaSChristoph Paasch skb ? TCP_SKB_CB(skb)->seq : 0, remaining); 265cc7972eaSChristoph Paasch 266cc7972eaSChristoph Paasch if (subflow->mp_capable && !subflow->fourth_ack && skb && 267cc7972eaSChristoph Paasch subflow->snd_isn == TCP_SKB_CB(skb)->seq) { 268cc7972eaSChristoph Paasch /* When skb is not available, we better over-estimate the 269cc7972eaSChristoph Paasch * emitted options len. A full DSS option is longer than 270cc7972eaSChristoph Paasch * TCPOLEN_MPTCP_MPC_ACK_DATA, so let's the caller try to fit 271cc7972eaSChristoph Paasch * that. 272cc7972eaSChristoph Paasch */ 273cc7972eaSChristoph Paasch mpext = mptcp_get_ext(skb); 274cc7972eaSChristoph Paasch data_len = mpext ? mpext->data_len : 0; 275cc7972eaSChristoph Paasch 276cc7972eaSChristoph Paasch /* we will check ext_copy.data_len in mptcp_write_options() to 277cc7972eaSChristoph Paasch * discriminate between TCPOLEN_MPTCP_MPC_ACK_DATA and 278cc7972eaSChristoph Paasch * TCPOLEN_MPTCP_MPC_ACK 279cc7972eaSChristoph Paasch */ 280cc7972eaSChristoph Paasch opts->ext_copy.data_len = data_len; 281cec37a6eSPeter Krystad opts->suboptions = OPTION_MPTCP_MPC_ACK; 282cec37a6eSPeter Krystad opts->sndr_key = subflow->local_key; 283cec37a6eSPeter Krystad opts->rcvr_key = subflow->remote_key; 284cc7972eaSChristoph Paasch 285cc7972eaSChristoph Paasch /* Section 3.1. 286cc7972eaSChristoph Paasch * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK 287cc7972eaSChristoph Paasch * packets that start the first subflow of an MPTCP connection, 288cc7972eaSChristoph Paasch * as well as the first packet that carries data 289cc7972eaSChristoph Paasch */ 290cc7972eaSChristoph Paasch if (data_len > 0) 291cc7972eaSChristoph Paasch *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); 292cc7972eaSChristoph Paasch else 293cec37a6eSPeter Krystad *size = TCPOLEN_MPTCP_MPC_ACK; 294cc7972eaSChristoph Paasch 295cc7972eaSChristoph Paasch pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", 296cc7972eaSChristoph Paasch subflow, subflow->local_key, subflow->remote_key, 297cc7972eaSChristoph Paasch data_len); 298cc7972eaSChristoph Paasch 299cec37a6eSPeter Krystad return true; 300cec37a6eSPeter Krystad } 301cec37a6eSPeter Krystad return false; 302cec37a6eSPeter Krystad } 303cec37a6eSPeter Krystad 3046d0060f6SMat Martineau static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, 3056d0060f6SMat Martineau struct mptcp_ext *ext) 3066d0060f6SMat Martineau { 3076d0060f6SMat Martineau ext->data_fin = 1; 3086d0060f6SMat Martineau 3096d0060f6SMat Martineau if (!ext->use_map) { 3106d0060f6SMat Martineau /* RFC6824 requires a DSS mapping with specific values 3116d0060f6SMat Martineau * if DATA_FIN is set but no data payload is mapped 3126d0060f6SMat Martineau */ 3136d0060f6SMat Martineau ext->use_map = 1; 3146d0060f6SMat Martineau ext->dsn64 = 1; 3156d0060f6SMat Martineau ext->data_seq = mptcp_sk(subflow->conn)->write_seq; 3166d0060f6SMat Martineau ext->subflow_seq = 0; 3176d0060f6SMat Martineau ext->data_len = 1; 3186d0060f6SMat Martineau } else { 3196d0060f6SMat Martineau /* If there's an existing DSS mapping, DATA_FIN consumes 3206d0060f6SMat Martineau * 1 additional byte of mapping space. 3216d0060f6SMat Martineau */ 3226d0060f6SMat Martineau ext->data_len++; 3236d0060f6SMat Martineau } 3246d0060f6SMat Martineau } 3256d0060f6SMat Martineau 3266d0060f6SMat Martineau static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, 3276d0060f6SMat Martineau unsigned int *size, 3286d0060f6SMat Martineau unsigned int remaining, 3296d0060f6SMat Martineau struct mptcp_out_options *opts) 3306d0060f6SMat Martineau { 3316d0060f6SMat Martineau struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 3326d0060f6SMat Martineau unsigned int dss_size = 0; 3336d0060f6SMat Martineau struct mptcp_ext *mpext; 3346d0060f6SMat Martineau struct mptcp_sock *msk; 3356d0060f6SMat Martineau unsigned int ack_size; 336d22f4988SChristoph Paasch bool ret = false; 337*2398e399SPaolo Abeni bool can_ack; 338*2398e399SPaolo Abeni u64 ack_seq; 3396d0060f6SMat Martineau u8 tcp_fin; 3406d0060f6SMat Martineau 3416d0060f6SMat Martineau if (skb) { 3426d0060f6SMat Martineau mpext = mptcp_get_ext(skb); 3436d0060f6SMat Martineau tcp_fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 3446d0060f6SMat Martineau } else { 3456d0060f6SMat Martineau mpext = NULL; 3466d0060f6SMat Martineau tcp_fin = 0; 3476d0060f6SMat Martineau } 3486d0060f6SMat Martineau 3496d0060f6SMat Martineau if (!skb || (mpext && mpext->use_map) || tcp_fin) { 3506d0060f6SMat Martineau unsigned int map_size; 3516d0060f6SMat Martineau 3526d0060f6SMat Martineau map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; 3536d0060f6SMat Martineau 3546d0060f6SMat Martineau remaining -= map_size; 3556d0060f6SMat Martineau dss_size = map_size; 3566d0060f6SMat Martineau if (mpext) 3576d0060f6SMat Martineau opts->ext_copy = *mpext; 3586d0060f6SMat Martineau 3596d0060f6SMat Martineau if (skb && tcp_fin && 3606d0060f6SMat Martineau subflow->conn->sk_state != TCP_ESTABLISHED) 3616d0060f6SMat Martineau mptcp_write_data_fin(subflow, &opts->ext_copy); 362d22f4988SChristoph Paasch ret = true; 363d22f4988SChristoph Paasch } 364d22f4988SChristoph Paasch 365*2398e399SPaolo Abeni /* passive sockets msk will set the 'can_ack' after accept(), even 366*2398e399SPaolo Abeni * if the first subflow may have the already the remote key handy 367*2398e399SPaolo Abeni */ 368*2398e399SPaolo Abeni can_ack = true; 369d22f4988SChristoph Paasch opts->ext_copy.use_ack = 0; 370d22f4988SChristoph Paasch msk = mptcp_sk(subflow->conn); 371*2398e399SPaolo Abeni if (likely(msk && READ_ONCE(msk->can_ack))) { 372*2398e399SPaolo Abeni ack_seq = msk->ack_seq; 373*2398e399SPaolo Abeni } else if (subflow->can_ack) { 374*2398e399SPaolo Abeni mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); 375*2398e399SPaolo Abeni ack_seq++; 376*2398e399SPaolo Abeni } else { 377*2398e399SPaolo Abeni can_ack = false; 378*2398e399SPaolo Abeni } 379*2398e399SPaolo Abeni 380*2398e399SPaolo Abeni if (unlikely(!can_ack)) { 381d22f4988SChristoph Paasch *size = ALIGN(dss_size, 4); 382d22f4988SChristoph Paasch return ret; 3836d0060f6SMat Martineau } 3846d0060f6SMat Martineau 3856d0060f6SMat Martineau ack_size = TCPOLEN_MPTCP_DSS_ACK64; 3866d0060f6SMat Martineau 3876d0060f6SMat Martineau /* Add kind/length/subtype/flag overhead if mapping is not populated */ 3886d0060f6SMat Martineau if (dss_size == 0) 3896d0060f6SMat Martineau ack_size += TCPOLEN_MPTCP_DSS_BASE; 3906d0060f6SMat Martineau 3916d0060f6SMat Martineau dss_size += ack_size; 3926d0060f6SMat Martineau 393*2398e399SPaolo Abeni opts->ext_copy.data_ack = ack_seq; 3946d0060f6SMat Martineau opts->ext_copy.ack64 = 1; 3956d0060f6SMat Martineau opts->ext_copy.use_ack = 1; 3966d0060f6SMat Martineau 3976d0060f6SMat Martineau *size = ALIGN(dss_size, 4); 3986d0060f6SMat Martineau return true; 3996d0060f6SMat Martineau } 4006d0060f6SMat Martineau 4016d0060f6SMat Martineau bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, 4026d0060f6SMat Martineau unsigned int *size, unsigned int remaining, 4036d0060f6SMat Martineau struct mptcp_out_options *opts) 4046d0060f6SMat Martineau { 4056d0060f6SMat Martineau unsigned int opt_size = 0; 4066d0060f6SMat Martineau bool ret = false; 4076d0060f6SMat Martineau 408cc7972eaSChristoph Paasch if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) 4096d0060f6SMat Martineau ret = true; 4106d0060f6SMat Martineau else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, 4116d0060f6SMat Martineau opts)) 4126d0060f6SMat Martineau ret = true; 4136d0060f6SMat Martineau 4146d0060f6SMat Martineau /* we reserved enough space for the above options, and exceeding the 4156d0060f6SMat Martineau * TCP option space would be fatal 4166d0060f6SMat Martineau */ 4176d0060f6SMat Martineau if (WARN_ON_ONCE(opt_size > remaining)) 4186d0060f6SMat Martineau return false; 4196d0060f6SMat Martineau 4206d0060f6SMat Martineau *size += opt_size; 4216d0060f6SMat Martineau remaining -= opt_size; 4226d0060f6SMat Martineau 4236d0060f6SMat Martineau return ret; 4246d0060f6SMat Martineau } 4256d0060f6SMat Martineau 426cec37a6eSPeter Krystad bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, 427cec37a6eSPeter Krystad struct mptcp_out_options *opts) 428cec37a6eSPeter Krystad { 429cec37a6eSPeter Krystad struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 430cec37a6eSPeter Krystad 431cec37a6eSPeter Krystad if (subflow_req->mp_capable) { 432cec37a6eSPeter Krystad opts->suboptions = OPTION_MPTCP_MPC_SYNACK; 433cec37a6eSPeter Krystad opts->sndr_key = subflow_req->local_key; 434cec37a6eSPeter Krystad *size = TCPOLEN_MPTCP_MPC_SYNACK; 435cec37a6eSPeter Krystad pr_debug("subflow_req=%p, local_key=%llu", 436cec37a6eSPeter Krystad subflow_req, subflow_req->local_key); 437cec37a6eSPeter Krystad return true; 438cec37a6eSPeter Krystad } 439cec37a6eSPeter Krystad return false; 440cec37a6eSPeter Krystad } 441cec37a6eSPeter Krystad 442d22f4988SChristoph Paasch static bool check_fourth_ack(struct mptcp_subflow_context *subflow, 443d22f4988SChristoph Paasch struct sk_buff *skb, 444d22f4988SChristoph Paasch struct mptcp_options_received *mp_opt) 445d22f4988SChristoph Paasch { 446d22f4988SChristoph Paasch /* here we can process OoO, in-window pkts, only in-sequence 4th ack 447d22f4988SChristoph Paasch * are relevant 448d22f4988SChristoph Paasch */ 449d22f4988SChristoph Paasch if (likely(subflow->fourth_ack || 450d22f4988SChristoph Paasch TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)) 451d22f4988SChristoph Paasch return true; 452d22f4988SChristoph Paasch 453d22f4988SChristoph Paasch if (mp_opt->use_ack) 454d22f4988SChristoph Paasch subflow->fourth_ack = 1; 455d22f4988SChristoph Paasch 456d22f4988SChristoph Paasch if (subflow->can_ack) 457d22f4988SChristoph Paasch return true; 458d22f4988SChristoph Paasch 459d22f4988SChristoph Paasch /* If the first established packet does not contain MP_CAPABLE + data 460d22f4988SChristoph Paasch * then fallback to TCP 461d22f4988SChristoph Paasch */ 462d22f4988SChristoph Paasch if (!mp_opt->mp_capable) { 463d22f4988SChristoph Paasch subflow->mp_capable = 0; 464d22f4988SChristoph Paasch tcp_sk(mptcp_subflow_tcp_sock(subflow))->is_mptcp = 0; 465d22f4988SChristoph Paasch return false; 466d22f4988SChristoph Paasch } 467d22f4988SChristoph Paasch subflow->remote_key = mp_opt->sndr_key; 468d22f4988SChristoph Paasch subflow->can_ack = 1; 469d22f4988SChristoph Paasch return true; 470d22f4988SChristoph Paasch } 471d22f4988SChristoph Paasch 472648ef4b8SMat Martineau void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, 473648ef4b8SMat Martineau struct tcp_options_received *opt_rx) 474648ef4b8SMat Martineau { 475d22f4988SChristoph Paasch struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 476648ef4b8SMat Martineau struct mptcp_options_received *mp_opt; 477648ef4b8SMat Martineau struct mptcp_ext *mpext; 478648ef4b8SMat Martineau 479648ef4b8SMat Martineau mp_opt = &opt_rx->mptcp; 480d22f4988SChristoph Paasch if (!check_fourth_ack(subflow, skb, mp_opt)) 481d22f4988SChristoph Paasch return; 482648ef4b8SMat Martineau 483648ef4b8SMat Martineau if (!mp_opt->dss) 484648ef4b8SMat Martineau return; 485648ef4b8SMat Martineau 486648ef4b8SMat Martineau mpext = skb_ext_add(skb, SKB_EXT_MPTCP); 487648ef4b8SMat Martineau if (!mpext) 488648ef4b8SMat Martineau return; 489648ef4b8SMat Martineau 490648ef4b8SMat Martineau memset(mpext, 0, sizeof(*mpext)); 491648ef4b8SMat Martineau 492648ef4b8SMat Martineau if (mp_opt->use_map) { 493cc7972eaSChristoph Paasch if (mp_opt->mpc_map) { 494cc7972eaSChristoph Paasch /* this is an MP_CAPABLE carrying MPTCP data 495cc7972eaSChristoph Paasch * we know this map the first chunk of data 496cc7972eaSChristoph Paasch */ 497cc7972eaSChristoph Paasch mptcp_crypto_key_sha(subflow->remote_key, NULL, 498cc7972eaSChristoph Paasch &mpext->data_seq); 499cc7972eaSChristoph Paasch mpext->data_seq++; 500cc7972eaSChristoph Paasch mpext->subflow_seq = 1; 501cc7972eaSChristoph Paasch mpext->dsn64 = 1; 502cc7972eaSChristoph Paasch mpext->mpc_map = 1; 503cc7972eaSChristoph Paasch } else { 504648ef4b8SMat Martineau mpext->data_seq = mp_opt->data_seq; 505648ef4b8SMat Martineau mpext->subflow_seq = mp_opt->subflow_seq; 506cc7972eaSChristoph Paasch mpext->dsn64 = mp_opt->dsn64; 507cc7972eaSChristoph Paasch } 508648ef4b8SMat Martineau mpext->data_len = mp_opt->data_len; 509648ef4b8SMat Martineau mpext->use_map = 1; 510648ef4b8SMat Martineau } 511648ef4b8SMat Martineau 512648ef4b8SMat Martineau if (mp_opt->use_ack) { 513648ef4b8SMat Martineau mpext->data_ack = mp_opt->data_ack; 514648ef4b8SMat Martineau mpext->use_ack = 1; 515648ef4b8SMat Martineau mpext->ack64 = mp_opt->ack64; 516648ef4b8SMat Martineau } 517648ef4b8SMat Martineau 518648ef4b8SMat Martineau mpext->data_fin = mp_opt->data_fin; 519648ef4b8SMat Martineau } 520648ef4b8SMat Martineau 521eda7acddSPeter Krystad void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) 522eda7acddSPeter Krystad { 523cc7972eaSChristoph Paasch if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | 524eda7acddSPeter Krystad OPTION_MPTCP_MPC_ACK) & opts->suboptions) { 525eda7acddSPeter Krystad u8 len; 526eda7acddSPeter Krystad 527eda7acddSPeter Krystad if (OPTION_MPTCP_MPC_SYN & opts->suboptions) 528eda7acddSPeter Krystad len = TCPOLEN_MPTCP_MPC_SYN; 529cec37a6eSPeter Krystad else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) 530cec37a6eSPeter Krystad len = TCPOLEN_MPTCP_MPC_SYNACK; 531cc7972eaSChristoph Paasch else if (opts->ext_copy.data_len) 532cc7972eaSChristoph Paasch len = TCPOLEN_MPTCP_MPC_ACK_DATA; 533eda7acddSPeter Krystad else 534eda7acddSPeter Krystad len = TCPOLEN_MPTCP_MPC_ACK; 535eda7acddSPeter Krystad 536eda7acddSPeter Krystad *ptr++ = htonl((TCPOPT_MPTCP << 24) | (len << 16) | 537eda7acddSPeter Krystad (MPTCPOPT_MP_CAPABLE << 12) | 538eda7acddSPeter Krystad (MPTCP_SUPPORTED_VERSION << 8) | 53965492c5aSPaolo Abeni MPTCP_CAP_HMAC_SHA256); 540cc7972eaSChristoph Paasch 541cc7972eaSChristoph Paasch if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & 542cc7972eaSChristoph Paasch opts->suboptions)) 543cc7972eaSChristoph Paasch goto mp_capable_done; 544cc7972eaSChristoph Paasch 545eda7acddSPeter Krystad put_unaligned_be64(opts->sndr_key, ptr); 546eda7acddSPeter Krystad ptr += 2; 547cc7972eaSChristoph Paasch if (!((OPTION_MPTCP_MPC_ACK) & opts->suboptions)) 548cc7972eaSChristoph Paasch goto mp_capable_done; 549cc7972eaSChristoph Paasch 550eda7acddSPeter Krystad put_unaligned_be64(opts->rcvr_key, ptr); 551eda7acddSPeter Krystad ptr += 2; 552cc7972eaSChristoph Paasch if (!opts->ext_copy.data_len) 553cc7972eaSChristoph Paasch goto mp_capable_done; 554cc7972eaSChristoph Paasch 555cc7972eaSChristoph Paasch put_unaligned_be32(opts->ext_copy.data_len << 16 | 556cc7972eaSChristoph Paasch TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); 557cc7972eaSChristoph Paasch ptr += 1; 558eda7acddSPeter Krystad } 5596d0060f6SMat Martineau 560cc7972eaSChristoph Paasch mp_capable_done: 5616d0060f6SMat Martineau if (opts->ext_copy.use_ack || opts->ext_copy.use_map) { 5626d0060f6SMat Martineau struct mptcp_ext *mpext = &opts->ext_copy; 5636d0060f6SMat Martineau u8 len = TCPOLEN_MPTCP_DSS_BASE; 5646d0060f6SMat Martineau u8 flags = 0; 5656d0060f6SMat Martineau 5666d0060f6SMat Martineau if (mpext->use_ack) { 5676d0060f6SMat Martineau len += TCPOLEN_MPTCP_DSS_ACK64; 5686d0060f6SMat Martineau flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64; 5696d0060f6SMat Martineau } 5706d0060f6SMat Martineau 5716d0060f6SMat Martineau if (mpext->use_map) { 5726d0060f6SMat Martineau len += TCPOLEN_MPTCP_DSS_MAP64; 5736d0060f6SMat Martineau 5746d0060f6SMat Martineau /* Use only 64-bit mapping flags for now, add 5756d0060f6SMat Martineau * support for optional 32-bit mappings later. 5766d0060f6SMat Martineau */ 5776d0060f6SMat Martineau flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; 5786d0060f6SMat Martineau if (mpext->data_fin) 5796d0060f6SMat Martineau flags |= MPTCP_DSS_DATA_FIN; 5806d0060f6SMat Martineau } 5816d0060f6SMat Martineau 5826d0060f6SMat Martineau *ptr++ = htonl((TCPOPT_MPTCP << 24) | 5836d0060f6SMat Martineau (len << 16) | 5846d0060f6SMat Martineau (MPTCPOPT_DSS << 12) | 5856d0060f6SMat Martineau (flags)); 5866d0060f6SMat Martineau 5876d0060f6SMat Martineau if (mpext->use_ack) { 5886d0060f6SMat Martineau put_unaligned_be64(mpext->data_ack, ptr); 5896d0060f6SMat Martineau ptr += 2; 5906d0060f6SMat Martineau } 5916d0060f6SMat Martineau 5926d0060f6SMat Martineau if (mpext->use_map) { 5936d0060f6SMat Martineau put_unaligned_be64(mpext->data_seq, ptr); 5946d0060f6SMat Martineau ptr += 2; 5956d0060f6SMat Martineau put_unaligned_be32(mpext->subflow_seq, ptr); 5966d0060f6SMat Martineau ptr += 1; 5976d0060f6SMat Martineau put_unaligned_be32(mpext->data_len << 16 | 5986d0060f6SMat Martineau TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); 5996d0060f6SMat Martineau } 6006d0060f6SMat Martineau } 601eda7acddSPeter Krystad } 602