1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2017 - 2019, Intel Corporation. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/netdevice.h> 12 #include <crypto/algapi.h> 13 #include <crypto/sha.h> 14 #include <net/sock.h> 15 #include <net/inet_common.h> 16 #include <net/inet_hashtables.h> 17 #include <net/protocol.h> 18 #include <net/tcp.h> 19 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 20 #include <net/ip6_route.h> 21 #endif 22 #include <net/mptcp.h> 23 #include <uapi/linux/mptcp.h> 24 #include "protocol.h" 25 #include "mib.h" 26 27 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, 28 enum linux_mptcp_mib_field field) 29 { 30 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field); 31 } 32 33 static void subflow_req_destructor(struct request_sock *req) 34 { 35 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 36 37 pr_debug("subflow_req=%p", subflow_req); 38 39 if (subflow_req->msk) 40 sock_put((struct sock *)subflow_req->msk); 41 42 mptcp_token_destroy_request(req); 43 tcp_request_sock_ops.destructor(req); 44 } 45 46 static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, 47 void *hmac) 48 { 49 u8 msg[8]; 50 51 put_unaligned_be32(nonce1, &msg[0]); 52 put_unaligned_be32(nonce2, &msg[4]); 53 54 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); 55 } 56 57 static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) 58 { 59 return mptcp_is_fully_established((void *)msk) && 60 READ_ONCE(msk->pm.accept_subflow); 61 } 62 63 /* validate received token and create truncated hmac and nonce for SYN-ACK */ 64 static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, 65 const struct sk_buff *skb) 66 { 67 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 68 u8 hmac[SHA256_DIGEST_SIZE]; 69 struct mptcp_sock *msk; 70 int local_id; 71 72 msk = mptcp_token_get_sock(subflow_req->token); 73 if (!msk) { 74 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); 75 return NULL; 76 } 77 78 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req); 79 if (local_id < 0) { 80 sock_put((struct sock *)msk); 81 return NULL; 82 } 83 subflow_req->local_id = local_id; 84 85 get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); 86 87 subflow_generate_hmac(msk->local_key, msk->remote_key, 88 subflow_req->local_nonce, 89 subflow_req->remote_nonce, hmac); 90 91 subflow_req->thmac = get_unaligned_be64(hmac); 92 return msk; 93 } 94 95 static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) 96 { 97 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 98 99 subflow_req->mp_capable = 0; 100 subflow_req->mp_join = 0; 101 subflow_req->msk = NULL; 102 mptcp_token_init_request(req); 103 104 #ifdef CONFIG_TCP_MD5SIG 105 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of 106 * TCP option space. 107 */ 108 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) 109 return -EINVAL; 110 #endif 111 112 return 0; 113 } 114 115 static void subflow_init_req(struct request_sock *req, 116 const struct sock *sk_listener, 117 struct sk_buff *skb) 118 { 119 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 120 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 121 struct mptcp_options_received mp_opt; 122 int ret; 123 124 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); 125 126 ret = __subflow_init_req(req, sk_listener); 127 if (ret) 128 return; 129 130 mptcp_get_options(skb, &mp_opt); 131 132 if (mp_opt.mp_capable) { 133 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); 134 135 if (mp_opt.mp_join) 136 return; 137 } else if (mp_opt.mp_join) { 138 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); 139 } 140 141 if (mp_opt.mp_capable && listener->request_mptcp) { 142 int err, retries = 4; 143 144 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; 145 again: 146 do { 147 get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); 148 } while (subflow_req->local_key == 0); 149 150 if (unlikely(req->syncookie)) { 151 mptcp_crypto_key_sha(subflow_req->local_key, 152 &subflow_req->token, 153 &subflow_req->idsn); 154 if (mptcp_token_exists(subflow_req->token)) { 155 if (retries-- > 0) 156 goto again; 157 } else { 158 subflow_req->mp_capable = 1; 159 } 160 return; 161 } 162 163 err = mptcp_token_new_request(req); 164 if (err == 0) 165 subflow_req->mp_capable = 1; 166 else if (retries-- > 0) 167 goto again; 168 169 } else if (mp_opt.mp_join && listener->request_mptcp) { 170 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; 171 subflow_req->mp_join = 1; 172 subflow_req->backup = mp_opt.backup; 173 subflow_req->remote_id = mp_opt.join_id; 174 subflow_req->token = mp_opt.token; 175 subflow_req->remote_nonce = mp_opt.nonce; 176 subflow_req->msk = subflow_token_join_request(req, skb); 177 178 if (unlikely(req->syncookie) && subflow_req->msk) { 179 if (mptcp_can_accept_new_subflow(subflow_req->msk)) 180 subflow_init_req_cookie_join_save(subflow_req, skb); 181 } 182 183 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, 184 subflow_req->remote_nonce, subflow_req->msk); 185 } 186 } 187 188 int mptcp_subflow_init_cookie_req(struct request_sock *req, 189 const struct sock *sk_listener, 190 struct sk_buff *skb) 191 { 192 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 193 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 194 struct mptcp_options_received mp_opt; 195 int err; 196 197 err = __subflow_init_req(req, sk_listener); 198 if (err) 199 return err; 200 201 mptcp_get_options(skb, &mp_opt); 202 203 if (mp_opt.mp_capable && mp_opt.mp_join) 204 return -EINVAL; 205 206 if (mp_opt.mp_capable && listener->request_mptcp) { 207 if (mp_opt.sndr_key == 0) 208 return -EINVAL; 209 210 subflow_req->local_key = mp_opt.rcvr_key; 211 err = mptcp_token_new_request(req); 212 if (err) 213 return err; 214 215 subflow_req->mp_capable = 1; 216 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 217 } else if (mp_opt.mp_join && listener->request_mptcp) { 218 if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) 219 return -EINVAL; 220 221 if (mptcp_can_accept_new_subflow(subflow_req->msk)) 222 subflow_req->mp_join = 1; 223 224 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 225 } 226 227 return 0; 228 } 229 EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); 230 231 static void subflow_v4_init_req(struct request_sock *req, 232 const struct sock *sk_listener, 233 struct sk_buff *skb) 234 { 235 tcp_rsk(req)->is_mptcp = 1; 236 237 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb); 238 239 subflow_init_req(req, sk_listener, skb); 240 } 241 242 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 243 static void subflow_v6_init_req(struct request_sock *req, 244 const struct sock *sk_listener, 245 struct sk_buff *skb) 246 { 247 tcp_rsk(req)->is_mptcp = 1; 248 249 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb); 250 251 subflow_init_req(req, sk_listener, skb); 252 } 253 #endif 254 255 /* validate received truncated hmac and create hmac for third ACK */ 256 static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) 257 { 258 u8 hmac[SHA256_DIGEST_SIZE]; 259 u64 thmac; 260 261 subflow_generate_hmac(subflow->remote_key, subflow->local_key, 262 subflow->remote_nonce, subflow->local_nonce, 263 hmac); 264 265 thmac = get_unaligned_be64(hmac); 266 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n", 267 subflow, subflow->token, 268 (unsigned long long)thmac, 269 (unsigned long long)subflow->thmac); 270 271 return thmac == subflow->thmac; 272 } 273 274 void mptcp_subflow_reset(struct sock *ssk) 275 { 276 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 277 struct sock *sk = subflow->conn; 278 279 tcp_set_state(ssk, TCP_CLOSE); 280 tcp_send_active_reset(ssk, GFP_ATOMIC); 281 tcp_done(ssk); 282 if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && 283 schedule_work(&mptcp_sk(sk)->work)) 284 sock_hold(sk); 285 } 286 287 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) 288 { 289 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 290 struct mptcp_options_received mp_opt; 291 struct sock *parent = subflow->conn; 292 293 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); 294 295 if (inet_sk_state_load(parent) == TCP_SYN_SENT) { 296 inet_sk_state_store(parent, TCP_ESTABLISHED); 297 parent->sk_state_change(parent); 298 } 299 300 /* be sure no special action on any packet other than syn-ack */ 301 if (subflow->conn_finished) 302 return; 303 304 subflow->rel_write_seq = 1; 305 subflow->conn_finished = 1; 306 subflow->ssn_offset = TCP_SKB_CB(skb)->seq; 307 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); 308 309 mptcp_get_options(skb, &mp_opt); 310 if (subflow->request_mptcp) { 311 if (!mp_opt.mp_capable) { 312 MPTCP_INC_STATS(sock_net(sk), 313 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); 314 mptcp_do_fallback(sk); 315 pr_fallback(mptcp_sk(subflow->conn)); 316 goto fallback; 317 } 318 319 subflow->mp_capable = 1; 320 subflow->can_ack = 1; 321 subflow->remote_key = mp_opt.sndr_key; 322 pr_debug("subflow=%p, remote_key=%llu", subflow, 323 subflow->remote_key); 324 mptcp_finish_connect(sk); 325 } else if (subflow->request_join) { 326 u8 hmac[SHA256_DIGEST_SIZE]; 327 328 if (!mp_opt.mp_join) 329 goto do_reset; 330 331 subflow->thmac = mp_opt.thmac; 332 subflow->remote_nonce = mp_opt.nonce; 333 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, 334 subflow->thmac, subflow->remote_nonce); 335 336 if (!subflow_thmac_valid(subflow)) { 337 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); 338 goto do_reset; 339 } 340 341 subflow_generate_hmac(subflow->local_key, subflow->remote_key, 342 subflow->local_nonce, 343 subflow->remote_nonce, 344 hmac); 345 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); 346 347 if (!mptcp_finish_join(sk)) 348 goto do_reset; 349 350 subflow->mp_join = 1; 351 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); 352 } else if (mptcp_check_fallback(sk)) { 353 fallback: 354 mptcp_rcv_space_init(mptcp_sk(parent), sk); 355 } 356 return; 357 358 do_reset: 359 mptcp_subflow_reset(sk); 360 } 361 362 struct request_sock_ops mptcp_subflow_request_sock_ops; 363 EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops); 364 static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; 365 366 static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) 367 { 368 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 369 370 pr_debug("subflow=%p", subflow); 371 372 /* Never answer to SYNs sent to broadcast or multicast */ 373 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 374 goto drop; 375 376 return tcp_conn_request(&mptcp_subflow_request_sock_ops, 377 &subflow_request_sock_ipv4_ops, 378 sk, skb); 379 drop: 380 tcp_listendrop(sk); 381 return 0; 382 } 383 384 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 385 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; 386 static struct inet_connection_sock_af_ops subflow_v6_specific; 387 static struct inet_connection_sock_af_ops subflow_v6m_specific; 388 389 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) 390 { 391 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 392 393 pr_debug("subflow=%p", subflow); 394 395 if (skb->protocol == htons(ETH_P_IP)) 396 return subflow_v4_conn_request(sk, skb); 397 398 if (!ipv6_unicast_destination(skb)) 399 goto drop; 400 401 return tcp_conn_request(&mptcp_subflow_request_sock_ops, 402 &subflow_request_sock_ipv6_ops, sk, skb); 403 404 drop: 405 tcp_listendrop(sk); 406 return 0; /* don't send reset */ 407 } 408 #endif 409 410 /* validate hmac received in third ACK */ 411 static bool subflow_hmac_valid(const struct request_sock *req, 412 const struct mptcp_options_received *mp_opt) 413 { 414 const struct mptcp_subflow_request_sock *subflow_req; 415 u8 hmac[SHA256_DIGEST_SIZE]; 416 struct mptcp_sock *msk; 417 418 subflow_req = mptcp_subflow_rsk(req); 419 msk = subflow_req->msk; 420 if (!msk) 421 return false; 422 423 subflow_generate_hmac(msk->remote_key, msk->local_key, 424 subflow_req->remote_nonce, 425 subflow_req->local_nonce, hmac); 426 427 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); 428 } 429 430 static void mptcp_sock_destruct(struct sock *sk) 431 { 432 /* if new mptcp socket isn't accepted, it is free'd 433 * from the tcp listener sockets request queue, linked 434 * from req->sk. The tcp socket is released. 435 * This calls the ULP release function which will 436 * also remove the mptcp socket, via 437 * sock_put(ctx->conn). 438 * 439 * Problem is that the mptcp socket will be in 440 * ESTABLISHED state and will not have the SOCK_DEAD flag. 441 * Both result in warnings from inet_sock_destruct. 442 */ 443 444 if (sk->sk_state == TCP_ESTABLISHED) { 445 sk->sk_state = TCP_CLOSE; 446 WARN_ON_ONCE(sk->sk_socket); 447 sock_orphan(sk); 448 } 449 450 mptcp_destroy_common(mptcp_sk(sk)); 451 inet_sock_destruct(sk); 452 } 453 454 static void mptcp_force_close(struct sock *sk) 455 { 456 inet_sk_state_store(sk, TCP_CLOSE); 457 sk_common_release(sk); 458 } 459 460 static void subflow_ulp_fallback(struct sock *sk, 461 struct mptcp_subflow_context *old_ctx) 462 { 463 struct inet_connection_sock *icsk = inet_csk(sk); 464 465 mptcp_subflow_tcp_fallback(sk, old_ctx); 466 icsk->icsk_ulp_ops = NULL; 467 rcu_assign_pointer(icsk->icsk_ulp_data, NULL); 468 tcp_sk(sk)->is_mptcp = 0; 469 } 470 471 static void subflow_drop_ctx(struct sock *ssk) 472 { 473 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); 474 475 if (!ctx) 476 return; 477 478 subflow_ulp_fallback(ssk, ctx); 479 if (ctx->conn) 480 sock_put(ctx->conn); 481 482 kfree_rcu(ctx, rcu); 483 } 484 485 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, 486 struct mptcp_options_received *mp_opt) 487 { 488 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 489 490 subflow->remote_key = mp_opt->sndr_key; 491 subflow->fully_established = 1; 492 subflow->can_ack = 1; 493 WRITE_ONCE(msk->fully_established, true); 494 } 495 496 static struct sock *subflow_syn_recv_sock(const struct sock *sk, 497 struct sk_buff *skb, 498 struct request_sock *req, 499 struct dst_entry *dst, 500 struct request_sock *req_unhash, 501 bool *own_req) 502 { 503 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); 504 struct mptcp_subflow_request_sock *subflow_req; 505 struct mptcp_options_received mp_opt; 506 bool fallback, fallback_is_fatal; 507 struct sock *new_msk = NULL; 508 struct sock *child; 509 510 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); 511 512 /* After child creation we must look for 'mp_capable' even when options 513 * are not parsed 514 */ 515 mp_opt.mp_capable = 0; 516 517 /* hopefully temporary handling for MP_JOIN+syncookie */ 518 subflow_req = mptcp_subflow_rsk(req); 519 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join; 520 fallback = !tcp_rsk(req)->is_mptcp; 521 if (fallback) 522 goto create_child; 523 524 /* if the sk is MP_CAPABLE, we try to fetch the client key */ 525 if (subflow_req->mp_capable) { 526 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { 527 /* here we can receive and accept an in-window, 528 * out-of-order pkt, which will not carry the MP_CAPABLE 529 * opt even on mptcp enabled paths 530 */ 531 goto create_msk; 532 } 533 534 mptcp_get_options(skb, &mp_opt); 535 if (!mp_opt.mp_capable) { 536 fallback = true; 537 goto create_child; 538 } 539 540 create_msk: 541 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); 542 if (!new_msk) 543 fallback = true; 544 } else if (subflow_req->mp_join) { 545 mptcp_get_options(skb, &mp_opt); 546 if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || 547 !mptcp_can_accept_new_subflow(subflow_req->msk)) { 548 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); 549 fallback = true; 550 } 551 } 552 553 create_child: 554 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, 555 req_unhash, own_req); 556 557 if (child && *own_req) { 558 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); 559 560 tcp_rsk(req)->drop_req = false; 561 562 /* we need to fallback on ctx allocation failure and on pre-reqs 563 * checking above. In the latter scenario we additionally need 564 * to reset the context to non MPTCP status. 565 */ 566 if (!ctx || fallback) { 567 if (fallback_is_fatal) 568 goto dispose_child; 569 570 subflow_drop_ctx(child); 571 goto out; 572 } 573 574 if (ctx->mp_capable) { 575 /* this can't race with mptcp_close(), as the msk is 576 * not yet exposted to user-space 577 */ 578 inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); 579 580 /* new mpc subflow takes ownership of the newly 581 * created mptcp socket 582 */ 583 new_msk->sk_destruct = mptcp_sock_destruct; 584 mptcp_pm_new_connection(mptcp_sk(new_msk), 1); 585 mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); 586 ctx->conn = new_msk; 587 new_msk = NULL; 588 589 /* with OoO packets we can reach here without ingress 590 * mpc option 591 */ 592 if (mp_opt.mp_capable) 593 mptcp_subflow_fully_established(ctx, &mp_opt); 594 } else if (ctx->mp_join) { 595 struct mptcp_sock *owner; 596 597 owner = subflow_req->msk; 598 if (!owner) 599 goto dispose_child; 600 601 /* move the msk reference ownership to the subflow */ 602 subflow_req->msk = NULL; 603 ctx->conn = (struct sock *)owner; 604 if (!mptcp_finish_join(child)) 605 goto dispose_child; 606 607 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); 608 tcp_rsk(req)->drop_req = true; 609 } 610 } 611 612 out: 613 /* dispose of the left over mptcp master, if any */ 614 if (unlikely(new_msk)) 615 mptcp_force_close(new_msk); 616 617 /* check for expected invariant - should never trigger, just help 618 * catching eariler subtle bugs 619 */ 620 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && 621 (!mptcp_subflow_ctx(child) || 622 !mptcp_subflow_ctx(child)->conn)); 623 return child; 624 625 dispose_child: 626 subflow_drop_ctx(child); 627 tcp_rsk(req)->drop_req = true; 628 inet_csk_prepare_for_destroy_sock(child); 629 tcp_done(child); 630 req->rsk_ops->send_reset(sk, skb); 631 632 /* The last child reference will be released by the caller */ 633 return child; 634 } 635 636 static struct inet_connection_sock_af_ops subflow_specific; 637 638 enum mapping_status { 639 MAPPING_OK, 640 MAPPING_INVALID, 641 MAPPING_EMPTY, 642 MAPPING_DATA_FIN, 643 MAPPING_DUMMY 644 }; 645 646 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) 647 { 648 if ((u32)seq == (u32)old_seq) 649 return old_seq; 650 651 /* Assume map covers data not mapped yet. */ 652 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); 653 } 654 655 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) 656 { 657 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", 658 ssn, subflow->map_subflow_seq, subflow->map_data_len); 659 } 660 661 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) 662 { 663 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 664 unsigned int skb_consumed; 665 666 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq; 667 if (WARN_ON_ONCE(skb_consumed >= skb->len)) 668 return true; 669 670 return skb->len - skb_consumed <= subflow->map_data_len - 671 mptcp_subflow_get_map_offset(subflow); 672 } 673 674 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) 675 { 676 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 677 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; 678 679 if (unlikely(before(ssn, subflow->map_subflow_seq))) { 680 /* Mapping covers data later in the subflow stream, 681 * currently unsupported. 682 */ 683 warn_bad_map(subflow, ssn); 684 return false; 685 } 686 if (unlikely(!before(ssn, subflow->map_subflow_seq + 687 subflow->map_data_len))) { 688 /* Mapping does covers past subflow data, invalid */ 689 warn_bad_map(subflow, ssn + skb->len); 690 return false; 691 } 692 return true; 693 } 694 695 static enum mapping_status get_mapping_status(struct sock *ssk, 696 struct mptcp_sock *msk) 697 { 698 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 699 struct mptcp_ext *mpext; 700 struct sk_buff *skb; 701 u16 data_len; 702 u64 map_seq; 703 704 skb = skb_peek(&ssk->sk_receive_queue); 705 if (!skb) 706 return MAPPING_EMPTY; 707 708 if (mptcp_check_fallback(ssk)) 709 return MAPPING_DUMMY; 710 711 mpext = mptcp_get_ext(skb); 712 if (!mpext || !mpext->use_map) { 713 if (!subflow->map_valid && !skb->len) { 714 /* the TCP stack deliver 0 len FIN pkt to the receive 715 * queue, that is the only 0len pkts ever expected here, 716 * and we can admit no mapping only for 0 len pkts 717 */ 718 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 719 WARN_ONCE(1, "0len seq %d:%d flags %x", 720 TCP_SKB_CB(skb)->seq, 721 TCP_SKB_CB(skb)->end_seq, 722 TCP_SKB_CB(skb)->tcp_flags); 723 sk_eat_skb(ssk, skb); 724 return MAPPING_EMPTY; 725 } 726 727 if (!subflow->map_valid) 728 return MAPPING_INVALID; 729 730 goto validate_seq; 731 } 732 733 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d", 734 mpext->data_seq, mpext->dsn64, mpext->subflow_seq, 735 mpext->data_len, mpext->data_fin); 736 737 data_len = mpext->data_len; 738 if (data_len == 0) { 739 pr_err("Infinite mapping not handled"); 740 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); 741 return MAPPING_INVALID; 742 } 743 744 if (mpext->data_fin == 1) { 745 if (data_len == 1) { 746 bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, 747 mpext->dsn64); 748 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); 749 if (subflow->map_valid) { 750 /* A DATA_FIN might arrive in a DSS 751 * option before the previous mapping 752 * has been fully consumed. Continue 753 * handling the existing mapping. 754 */ 755 skb_ext_del(skb, SKB_EXT_MPTCP); 756 return MAPPING_OK; 757 } else { 758 if (updated && schedule_work(&msk->work)) 759 sock_hold((struct sock *)msk); 760 761 return MAPPING_DATA_FIN; 762 } 763 } else { 764 u64 data_fin_seq = mpext->data_seq + data_len - 1; 765 766 /* If mpext->data_seq is a 32-bit value, data_fin_seq 767 * must also be limited to 32 bits. 768 */ 769 if (!mpext->dsn64) 770 data_fin_seq &= GENMASK_ULL(31, 0); 771 772 mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); 773 pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", 774 data_fin_seq, mpext->dsn64); 775 } 776 777 /* Adjust for DATA_FIN using 1 byte of sequence space */ 778 data_len--; 779 } 780 781 if (!mpext->dsn64) { 782 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, 783 mpext->data_seq); 784 pr_debug("expanded seq=%llu", subflow->map_seq); 785 } else { 786 map_seq = mpext->data_seq; 787 } 788 WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); 789 790 if (subflow->map_valid) { 791 /* Allow replacing only with an identical map */ 792 if (subflow->map_seq == map_seq && 793 subflow->map_subflow_seq == mpext->subflow_seq && 794 subflow->map_data_len == data_len) { 795 skb_ext_del(skb, SKB_EXT_MPTCP); 796 return MAPPING_OK; 797 } 798 799 /* If this skb data are fully covered by the current mapping, 800 * the new map would need caching, which is not supported 801 */ 802 if (skb_is_fully_mapped(ssk, skb)) { 803 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); 804 return MAPPING_INVALID; 805 } 806 807 /* will validate the next map after consuming the current one */ 808 return MAPPING_OK; 809 } 810 811 subflow->map_seq = map_seq; 812 subflow->map_subflow_seq = mpext->subflow_seq; 813 subflow->map_data_len = data_len; 814 subflow->map_valid = 1; 815 subflow->mpc_map = mpext->mpc_map; 816 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", 817 subflow->map_seq, subflow->map_subflow_seq, 818 subflow->map_data_len); 819 820 validate_seq: 821 /* we revalidate valid mapping on new skb, because we must ensure 822 * the current skb is completely covered by the available mapping 823 */ 824 if (!validate_mapping(ssk, skb)) 825 return MAPPING_INVALID; 826 827 skb_ext_del(skb, SKB_EXT_MPTCP); 828 return MAPPING_OK; 829 } 830 831 static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, 832 u64 limit) 833 { 834 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 835 bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 836 u32 incr; 837 838 incr = limit >= skb->len ? skb->len + fin : limit; 839 840 pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, 841 subflow->map_subflow_seq); 842 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); 843 tcp_sk(ssk)->copied_seq += incr; 844 if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) 845 sk_eat_skb(ssk, skb); 846 if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) 847 subflow->map_valid = 0; 848 if (incr) 849 tcp_cleanup_rbuf(ssk, incr); 850 } 851 852 static bool subflow_check_data_avail(struct sock *ssk) 853 { 854 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 855 enum mapping_status status; 856 struct mptcp_sock *msk; 857 struct sk_buff *skb; 858 859 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk, 860 subflow->data_avail, skb_peek(&ssk->sk_receive_queue)); 861 if (!skb_peek(&ssk->sk_receive_queue)) 862 subflow->data_avail = 0; 863 if (subflow->data_avail) 864 return true; 865 866 msk = mptcp_sk(subflow->conn); 867 for (;;) { 868 u64 ack_seq; 869 u64 old_ack; 870 871 status = get_mapping_status(ssk, msk); 872 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status); 873 if (status == MAPPING_INVALID) { 874 ssk->sk_err = EBADMSG; 875 goto fatal; 876 } 877 if (status == MAPPING_DUMMY) { 878 __mptcp_do_fallback(msk); 879 skb = skb_peek(&ssk->sk_receive_queue); 880 subflow->map_valid = 1; 881 subflow->map_seq = READ_ONCE(msk->ack_seq); 882 subflow->map_data_len = skb->len; 883 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - 884 subflow->ssn_offset; 885 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 886 return true; 887 } 888 889 if (status != MAPPING_OK) 890 return false; 891 892 skb = skb_peek(&ssk->sk_receive_queue); 893 if (WARN_ON_ONCE(!skb)) 894 return false; 895 896 /* if msk lacks the remote key, this subflow must provide an 897 * MP_CAPABLE-based mapping 898 */ 899 if (unlikely(!READ_ONCE(msk->can_ack))) { 900 if (!subflow->mpc_map) { 901 ssk->sk_err = EBADMSG; 902 goto fatal; 903 } 904 WRITE_ONCE(msk->remote_key, subflow->remote_key); 905 WRITE_ONCE(msk->ack_seq, subflow->map_seq); 906 WRITE_ONCE(msk->can_ack, true); 907 } 908 909 old_ack = READ_ONCE(msk->ack_seq); 910 ack_seq = mptcp_subflow_get_mapped_dsn(subflow); 911 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, 912 ack_seq); 913 if (ack_seq == old_ack) { 914 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 915 break; 916 } else if (after64(ack_seq, old_ack)) { 917 subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; 918 break; 919 } 920 921 /* only accept in-sequence mapping. Old values are spurious 922 * retransmission 923 */ 924 mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); 925 } 926 return true; 927 928 fatal: 929 /* fatal protocol error, close the socket */ 930 /* This barrier is coupled with smp_rmb() in tcp_poll() */ 931 smp_wmb(); 932 ssk->sk_error_report(ssk); 933 tcp_set_state(ssk, TCP_CLOSE); 934 tcp_send_active_reset(ssk, GFP_ATOMIC); 935 subflow->data_avail = 0; 936 return false; 937 } 938 939 bool mptcp_subflow_data_available(struct sock *sk) 940 { 941 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 942 943 /* check if current mapping is still valid */ 944 if (subflow->map_valid && 945 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { 946 subflow->map_valid = 0; 947 subflow->data_avail = 0; 948 949 pr_debug("Done with mapping: seq=%u data_len=%u", 950 subflow->map_subflow_seq, 951 subflow->map_data_len); 952 } 953 954 return subflow_check_data_avail(sk); 955 } 956 957 /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy, 958 * not the ssk one. 959 * 960 * In mptcp, rwin is about the mptcp-level connection data. 961 * 962 * Data that is still on the ssk rx queue can thus be ignored, 963 * as far as mptcp peer is concerened that data is still inflight. 964 * DSS ACK is updated when skb is moved to the mptcp rx queue. 965 */ 966 void mptcp_space(const struct sock *ssk, int *space, int *full_space) 967 { 968 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 969 const struct sock *sk = subflow->conn; 970 971 *space = tcp_space(sk); 972 *full_space = tcp_full_space(sk); 973 } 974 975 static void subflow_data_ready(struct sock *sk) 976 { 977 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 978 u16 state = 1 << inet_sk_state_load(sk); 979 struct sock *parent = subflow->conn; 980 struct mptcp_sock *msk; 981 982 msk = mptcp_sk(parent); 983 if (state & TCPF_LISTEN) { 984 set_bit(MPTCP_DATA_READY, &msk->flags); 985 parent->sk_data_ready(parent); 986 return; 987 } 988 989 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && 990 !subflow->mp_join && !(state & TCPF_CLOSE)); 991 992 if (mptcp_subflow_data_available(sk)) 993 mptcp_data_ready(parent, sk); 994 } 995 996 static void subflow_write_space(struct sock *sk) 997 { 998 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 999 struct sock *parent = subflow->conn; 1000 1001 if (!sk_stream_is_writeable(sk)) 1002 return; 1003 1004 if (sk_stream_is_writeable(parent)) { 1005 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); 1006 smp_mb__after_atomic(); 1007 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ 1008 sk_stream_write_space(parent); 1009 } 1010 } 1011 1012 static struct inet_connection_sock_af_ops * 1013 subflow_default_af_ops(struct sock *sk) 1014 { 1015 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1016 if (sk->sk_family == AF_INET6) 1017 return &subflow_v6_specific; 1018 #endif 1019 return &subflow_specific; 1020 } 1021 1022 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1023 void mptcpv6_handle_mapped(struct sock *sk, bool mapped) 1024 { 1025 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1026 struct inet_connection_sock *icsk = inet_csk(sk); 1027 struct inet_connection_sock_af_ops *target; 1028 1029 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); 1030 1031 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d", 1032 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); 1033 1034 if (likely(icsk->icsk_af_ops == target)) 1035 return; 1036 1037 subflow->icsk_af_ops = icsk->icsk_af_ops; 1038 icsk->icsk_af_ops = target; 1039 } 1040 #endif 1041 1042 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info, 1043 struct sockaddr_storage *addr) 1044 { 1045 memset(addr, 0, sizeof(*addr)); 1046 addr->ss_family = info->family; 1047 if (addr->ss_family == AF_INET) { 1048 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 1049 1050 in_addr->sin_addr = info->addr; 1051 in_addr->sin_port = info->port; 1052 } 1053 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1054 else if (addr->ss_family == AF_INET6) { 1055 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr; 1056 1057 in6_addr->sin6_addr = info->addr6; 1058 in6_addr->sin6_port = info->port; 1059 } 1060 #endif 1061 } 1062 1063 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, 1064 const struct mptcp_addr_info *remote) 1065 { 1066 struct mptcp_sock *msk = mptcp_sk(sk); 1067 struct mptcp_subflow_context *subflow; 1068 struct sockaddr_storage addr; 1069 int remote_id = remote->id; 1070 int local_id = loc->id; 1071 struct socket *sf; 1072 struct sock *ssk; 1073 u32 remote_token; 1074 int addrlen; 1075 int err; 1076 1077 if (!mptcp_is_fully_established(sk)) 1078 return -ENOTCONN; 1079 1080 err = mptcp_subflow_create_socket(sk, &sf); 1081 if (err) 1082 return err; 1083 1084 ssk = sf->sk; 1085 subflow = mptcp_subflow_ctx(ssk); 1086 do { 1087 get_random_bytes(&subflow->local_nonce, sizeof(u32)); 1088 } while (!subflow->local_nonce); 1089 1090 if (!local_id) { 1091 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk); 1092 if (err < 0) 1093 goto failed; 1094 1095 local_id = err; 1096 } 1097 1098 subflow->remote_key = msk->remote_key; 1099 subflow->local_key = msk->local_key; 1100 subflow->token = msk->token; 1101 mptcp_info2sockaddr(loc, &addr); 1102 1103 addrlen = sizeof(struct sockaddr_in); 1104 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1105 if (loc->family == AF_INET6) 1106 addrlen = sizeof(struct sockaddr_in6); 1107 #endif 1108 ssk->sk_bound_dev_if = loc->ifindex; 1109 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); 1110 if (err) 1111 goto failed; 1112 1113 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); 1114 pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, 1115 remote_token, local_id, remote_id); 1116 subflow->remote_token = remote_token; 1117 subflow->local_id = local_id; 1118 subflow->remote_id = remote_id; 1119 subflow->request_join = 1; 1120 subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1121 mptcp_info2sockaddr(remote, &addr); 1122 1123 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); 1124 if (err && err != -EINPROGRESS) 1125 goto failed; 1126 1127 spin_lock_bh(&msk->join_list_lock); 1128 list_add_tail(&subflow->node, &msk->join_list); 1129 spin_unlock_bh(&msk->join_list_lock); 1130 1131 return err; 1132 1133 failed: 1134 sock_release(sf); 1135 return err; 1136 } 1137 1138 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) 1139 { 1140 struct mptcp_subflow_context *subflow; 1141 struct net *net = sock_net(sk); 1142 struct socket *sf; 1143 int err; 1144 1145 /* un-accepted server sockets can reach here - on bad configuration 1146 * bail early to avoid greater trouble later 1147 */ 1148 if (unlikely(!sk->sk_socket)) 1149 return -EINVAL; 1150 1151 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, 1152 &sf); 1153 if (err) 1154 return err; 1155 1156 lock_sock(sf->sk); 1157 1158 /* kernel sockets do not by default acquire net ref, but TCP timer 1159 * needs it. 1160 */ 1161 sf->sk->sk_net_refcnt = 1; 1162 get_net(net); 1163 #ifdef CONFIG_PROC_FS 1164 this_cpu_add(*net->core.sock_inuse, 1); 1165 #endif 1166 err = tcp_set_ulp(sf->sk, "mptcp"); 1167 release_sock(sf->sk); 1168 1169 if (err) { 1170 sock_release(sf); 1171 return err; 1172 } 1173 1174 /* the newly created socket really belongs to the owning MPTCP master 1175 * socket, even if for additional subflows the allocation is performed 1176 * by a kernel workqueue. Adjust inode references, so that the 1177 * procfs/diag interaces really show this one belonging to the correct 1178 * user. 1179 */ 1180 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; 1181 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; 1182 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; 1183 1184 subflow = mptcp_subflow_ctx(sf->sk); 1185 pr_debug("subflow=%p", subflow); 1186 1187 *new_sock = sf; 1188 sock_hold(sk); 1189 subflow->conn = sk; 1190 1191 return 0; 1192 } 1193 1194 static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, 1195 gfp_t priority) 1196 { 1197 struct inet_connection_sock *icsk = inet_csk(sk); 1198 struct mptcp_subflow_context *ctx; 1199 1200 ctx = kzalloc(sizeof(*ctx), priority); 1201 if (!ctx) 1202 return NULL; 1203 1204 rcu_assign_pointer(icsk->icsk_ulp_data, ctx); 1205 INIT_LIST_HEAD(&ctx->node); 1206 1207 pr_debug("subflow=%p", ctx); 1208 1209 ctx->tcp_sock = sk; 1210 1211 return ctx; 1212 } 1213 1214 static void __subflow_state_change(struct sock *sk) 1215 { 1216 struct socket_wq *wq; 1217 1218 rcu_read_lock(); 1219 wq = rcu_dereference(sk->sk_wq); 1220 if (skwq_has_sleeper(wq)) 1221 wake_up_interruptible_all(&wq->wait); 1222 rcu_read_unlock(); 1223 } 1224 1225 static bool subflow_is_done(const struct sock *sk) 1226 { 1227 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; 1228 } 1229 1230 static void subflow_state_change(struct sock *sk) 1231 { 1232 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1233 struct sock *parent = subflow->conn; 1234 1235 __subflow_state_change(sk); 1236 1237 if (subflow_simultaneous_connect(sk)) { 1238 mptcp_do_fallback(sk); 1239 mptcp_rcv_space_init(mptcp_sk(parent), sk); 1240 pr_fallback(mptcp_sk(parent)); 1241 subflow->conn_finished = 1; 1242 if (inet_sk_state_load(parent) == TCP_SYN_SENT) { 1243 inet_sk_state_store(parent, TCP_ESTABLISHED); 1244 parent->sk_state_change(parent); 1245 } 1246 } 1247 1248 /* as recvmsg() does not acquire the subflow socket for ssk selection 1249 * a fin packet carrying a DSS can be unnoticed if we don't trigger 1250 * the data available machinery here. 1251 */ 1252 if (mptcp_subflow_data_available(sk)) 1253 mptcp_data_ready(parent, sk); 1254 1255 if (__mptcp_check_fallback(mptcp_sk(parent)) && 1256 !(parent->sk_shutdown & RCV_SHUTDOWN) && 1257 !subflow->rx_eof && subflow_is_done(sk)) { 1258 subflow->rx_eof = 1; 1259 mptcp_subflow_eof(parent); 1260 } 1261 } 1262 1263 static int subflow_ulp_init(struct sock *sk) 1264 { 1265 struct inet_connection_sock *icsk = inet_csk(sk); 1266 struct mptcp_subflow_context *ctx; 1267 struct tcp_sock *tp = tcp_sk(sk); 1268 int err = 0; 1269 1270 /* disallow attaching ULP to a socket unless it has been 1271 * created with sock_create_kern() 1272 */ 1273 if (!sk->sk_kern_sock) { 1274 err = -EOPNOTSUPP; 1275 goto out; 1276 } 1277 1278 ctx = subflow_create_ctx(sk, GFP_KERNEL); 1279 if (!ctx) { 1280 err = -ENOMEM; 1281 goto out; 1282 } 1283 1284 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); 1285 1286 tp->is_mptcp = 1; 1287 ctx->icsk_af_ops = icsk->icsk_af_ops; 1288 icsk->icsk_af_ops = subflow_default_af_ops(sk); 1289 ctx->tcp_data_ready = sk->sk_data_ready; 1290 ctx->tcp_state_change = sk->sk_state_change; 1291 ctx->tcp_write_space = sk->sk_write_space; 1292 sk->sk_data_ready = subflow_data_ready; 1293 sk->sk_write_space = subflow_write_space; 1294 sk->sk_state_change = subflow_state_change; 1295 out: 1296 return err; 1297 } 1298 1299 static void subflow_ulp_release(struct sock *sk) 1300 { 1301 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk); 1302 1303 if (!ctx) 1304 return; 1305 1306 if (ctx->conn) 1307 sock_put(ctx->conn); 1308 1309 kfree_rcu(ctx, rcu); 1310 } 1311 1312 static void subflow_ulp_clone(const struct request_sock *req, 1313 struct sock *newsk, 1314 const gfp_t priority) 1315 { 1316 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 1317 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); 1318 struct mptcp_subflow_context *new_ctx; 1319 1320 if (!tcp_rsk(req)->is_mptcp || 1321 (!subflow_req->mp_capable && !subflow_req->mp_join)) { 1322 subflow_ulp_fallback(newsk, old_ctx); 1323 return; 1324 } 1325 1326 new_ctx = subflow_create_ctx(newsk, priority); 1327 if (!new_ctx) { 1328 subflow_ulp_fallback(newsk, old_ctx); 1329 return; 1330 } 1331 1332 new_ctx->conn_finished = 1; 1333 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops; 1334 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; 1335 new_ctx->tcp_state_change = old_ctx->tcp_state_change; 1336 new_ctx->tcp_write_space = old_ctx->tcp_write_space; 1337 new_ctx->rel_write_seq = 1; 1338 new_ctx->tcp_sock = newsk; 1339 1340 if (subflow_req->mp_capable) { 1341 /* see comments in subflow_syn_recv_sock(), MPTCP connection 1342 * is fully established only after we receive the remote key 1343 */ 1344 new_ctx->mp_capable = 1; 1345 new_ctx->local_key = subflow_req->local_key; 1346 new_ctx->token = subflow_req->token; 1347 new_ctx->ssn_offset = subflow_req->ssn_offset; 1348 new_ctx->idsn = subflow_req->idsn; 1349 } else if (subflow_req->mp_join) { 1350 new_ctx->ssn_offset = subflow_req->ssn_offset; 1351 new_ctx->mp_join = 1; 1352 new_ctx->fully_established = 1; 1353 new_ctx->backup = subflow_req->backup; 1354 new_ctx->local_id = subflow_req->local_id; 1355 new_ctx->remote_id = subflow_req->remote_id; 1356 new_ctx->token = subflow_req->token; 1357 new_ctx->thmac = subflow_req->thmac; 1358 } 1359 } 1360 1361 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { 1362 .name = "mptcp", 1363 .owner = THIS_MODULE, 1364 .init = subflow_ulp_init, 1365 .release = subflow_ulp_release, 1366 .clone = subflow_ulp_clone, 1367 }; 1368 1369 static int subflow_ops_init(struct request_sock_ops *subflow_ops) 1370 { 1371 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); 1372 subflow_ops->slab_name = "request_sock_subflow"; 1373 1374 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, 1375 subflow_ops->obj_size, 0, 1376 SLAB_ACCOUNT | 1377 SLAB_TYPESAFE_BY_RCU, 1378 NULL); 1379 if (!subflow_ops->slab) 1380 return -ENOMEM; 1381 1382 subflow_ops->destructor = subflow_req_destructor; 1383 1384 return 0; 1385 } 1386 1387 void __init mptcp_subflow_init(void) 1388 { 1389 mptcp_subflow_request_sock_ops = tcp_request_sock_ops; 1390 if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) 1391 panic("MPTCP: failed to init subflow request sock ops\n"); 1392 1393 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; 1394 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req; 1395 1396 subflow_specific = ipv4_specific; 1397 subflow_specific.conn_request = subflow_v4_conn_request; 1398 subflow_specific.syn_recv_sock = subflow_syn_recv_sock; 1399 subflow_specific.sk_rx_dst_set = subflow_finish_connect; 1400 1401 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1402 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; 1403 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req; 1404 1405 subflow_v6_specific = ipv6_specific; 1406 subflow_v6_specific.conn_request = subflow_v6_conn_request; 1407 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock; 1408 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect; 1409 1410 subflow_v6m_specific = subflow_v6_specific; 1411 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; 1412 subflow_v6m_specific.send_check = ipv4_specific.send_check; 1413 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; 1414 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; 1415 subflow_v6m_specific.net_frag_header_len = 0; 1416 #endif 1417 1418 mptcp_diag_subflow_init(&subflow_ulp_ops); 1419 1420 if (tcp_register_ulp(&subflow_ulp_ops) != 0) 1421 panic("MPTCP: failed to register subflows to ULP\n"); 1422 } 1423