1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2017 - 2019, Intel Corporation. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/netdevice.h> 12 #include <crypto/algapi.h> 13 #include <crypto/sha.h> 14 #include <net/sock.h> 15 #include <net/inet_common.h> 16 #include <net/inet_hashtables.h> 17 #include <net/protocol.h> 18 #include <net/tcp.h> 19 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 20 #include <net/ip6_route.h> 21 #endif 22 #include <net/mptcp.h> 23 #include <uapi/linux/mptcp.h> 24 #include "protocol.h" 25 #include "mib.h" 26 27 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, 28 enum linux_mptcp_mib_field field) 29 { 30 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field); 31 } 32 33 static void subflow_req_destructor(struct request_sock *req) 34 { 35 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 36 37 pr_debug("subflow_req=%p", subflow_req); 38 39 if (subflow_req->msk) 40 sock_put((struct sock *)subflow_req->msk); 41 42 mptcp_token_destroy_request(req); 43 tcp_request_sock_ops.destructor(req); 44 } 45 46 static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, 47 void *hmac) 48 { 49 u8 msg[8]; 50 51 put_unaligned_be32(nonce1, &msg[0]); 52 put_unaligned_be32(nonce2, &msg[4]); 53 54 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); 55 } 56 57 static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) 58 { 59 return mptcp_is_fully_established((void *)msk) && 60 READ_ONCE(msk->pm.accept_subflow); 61 } 62 63 /* validate received token and create truncated hmac and nonce for SYN-ACK */ 64 static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, 65 const struct sk_buff *skb) 66 { 67 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 68 u8 hmac[SHA256_DIGEST_SIZE]; 69 struct mptcp_sock *msk; 70 int local_id; 71 72 msk = mptcp_token_get_sock(subflow_req->token); 73 if (!msk) { 74 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); 75 return NULL; 76 } 77 78 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req); 79 if (local_id < 0) { 80 sock_put((struct sock *)msk); 81 return NULL; 82 } 83 subflow_req->local_id = local_id; 84 85 get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); 86 87 subflow_generate_hmac(msk->local_key, msk->remote_key, 88 subflow_req->local_nonce, 89 subflow_req->remote_nonce, hmac); 90 91 subflow_req->thmac = get_unaligned_be64(hmac); 92 return msk; 93 } 94 95 static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) 96 { 97 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 98 99 subflow_req->mp_capable = 0; 100 subflow_req->mp_join = 0; 101 subflow_req->msk = NULL; 102 mptcp_token_init_request(req); 103 104 #ifdef CONFIG_TCP_MD5SIG 105 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of 106 * TCP option space. 107 */ 108 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) 109 return -EINVAL; 110 #endif 111 112 return 0; 113 } 114 115 static void subflow_init_req(struct request_sock *req, 116 const struct sock *sk_listener, 117 struct sk_buff *skb) 118 { 119 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 120 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 121 struct mptcp_options_received mp_opt; 122 int ret; 123 124 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); 125 126 ret = __subflow_init_req(req, sk_listener); 127 if (ret) 128 return; 129 130 mptcp_get_options(skb, &mp_opt); 131 132 if (mp_opt.mp_capable) { 133 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); 134 135 if (mp_opt.mp_join) 136 return; 137 } else if (mp_opt.mp_join) { 138 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); 139 } 140 141 if (mp_opt.mp_capable && listener->request_mptcp) { 142 int err, retries = 4; 143 144 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; 145 again: 146 do { 147 get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); 148 } while (subflow_req->local_key == 0); 149 150 if (unlikely(req->syncookie)) { 151 mptcp_crypto_key_sha(subflow_req->local_key, 152 &subflow_req->token, 153 &subflow_req->idsn); 154 if (mptcp_token_exists(subflow_req->token)) { 155 if (retries-- > 0) 156 goto again; 157 } else { 158 subflow_req->mp_capable = 1; 159 } 160 return; 161 } 162 163 err = mptcp_token_new_request(req); 164 if (err == 0) 165 subflow_req->mp_capable = 1; 166 else if (retries-- > 0) 167 goto again; 168 169 } else if (mp_opt.mp_join && listener->request_mptcp) { 170 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; 171 subflow_req->mp_join = 1; 172 subflow_req->backup = mp_opt.backup; 173 subflow_req->remote_id = mp_opt.join_id; 174 subflow_req->token = mp_opt.token; 175 subflow_req->remote_nonce = mp_opt.nonce; 176 subflow_req->msk = subflow_token_join_request(req, skb); 177 178 if (unlikely(req->syncookie) && subflow_req->msk) { 179 if (mptcp_can_accept_new_subflow(subflow_req->msk)) 180 subflow_init_req_cookie_join_save(subflow_req, skb); 181 } 182 183 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, 184 subflow_req->remote_nonce, subflow_req->msk); 185 } 186 } 187 188 int mptcp_subflow_init_cookie_req(struct request_sock *req, 189 const struct sock *sk_listener, 190 struct sk_buff *skb) 191 { 192 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 193 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 194 struct mptcp_options_received mp_opt; 195 int err; 196 197 err = __subflow_init_req(req, sk_listener); 198 if (err) 199 return err; 200 201 mptcp_get_options(skb, &mp_opt); 202 203 if (mp_opt.mp_capable && mp_opt.mp_join) 204 return -EINVAL; 205 206 if (mp_opt.mp_capable && listener->request_mptcp) { 207 if (mp_opt.sndr_key == 0) 208 return -EINVAL; 209 210 subflow_req->local_key = mp_opt.rcvr_key; 211 err = mptcp_token_new_request(req); 212 if (err) 213 return err; 214 215 subflow_req->mp_capable = 1; 216 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 217 } else if (mp_opt.mp_join && listener->request_mptcp) { 218 if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) 219 return -EINVAL; 220 221 if (mptcp_can_accept_new_subflow(subflow_req->msk)) 222 subflow_req->mp_join = 1; 223 224 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 225 } 226 227 return 0; 228 } 229 EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); 230 231 static void subflow_v4_init_req(struct request_sock *req, 232 const struct sock *sk_listener, 233 struct sk_buff *skb) 234 { 235 tcp_rsk(req)->is_mptcp = 1; 236 237 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb); 238 239 subflow_init_req(req, sk_listener, skb); 240 } 241 242 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 243 static void subflow_v6_init_req(struct request_sock *req, 244 const struct sock *sk_listener, 245 struct sk_buff *skb) 246 { 247 tcp_rsk(req)->is_mptcp = 1; 248 249 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb); 250 251 subflow_init_req(req, sk_listener, skb); 252 } 253 #endif 254 255 /* validate received truncated hmac and create hmac for third ACK */ 256 static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) 257 { 258 u8 hmac[SHA256_DIGEST_SIZE]; 259 u64 thmac; 260 261 subflow_generate_hmac(subflow->remote_key, subflow->local_key, 262 subflow->remote_nonce, subflow->local_nonce, 263 hmac); 264 265 thmac = get_unaligned_be64(hmac); 266 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n", 267 subflow, subflow->token, 268 (unsigned long long)thmac, 269 (unsigned long long)subflow->thmac); 270 271 return thmac == subflow->thmac; 272 } 273 274 void mptcp_subflow_reset(struct sock *ssk) 275 { 276 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 277 struct sock *sk = subflow->conn; 278 279 tcp_set_state(ssk, TCP_CLOSE); 280 tcp_send_active_reset(ssk, GFP_ATOMIC); 281 tcp_done(ssk); 282 if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && 283 schedule_work(&mptcp_sk(sk)->work)) 284 sock_hold(sk); 285 } 286 287 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) 288 { 289 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 290 struct mptcp_options_received mp_opt; 291 struct sock *parent = subflow->conn; 292 293 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); 294 295 if (inet_sk_state_load(parent) == TCP_SYN_SENT) { 296 inet_sk_state_store(parent, TCP_ESTABLISHED); 297 parent->sk_state_change(parent); 298 } 299 300 /* be sure no special action on any packet other than syn-ack */ 301 if (subflow->conn_finished) 302 return; 303 304 subflow->rel_write_seq = 1; 305 subflow->conn_finished = 1; 306 subflow->ssn_offset = TCP_SKB_CB(skb)->seq; 307 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); 308 309 mptcp_get_options(skb, &mp_opt); 310 if (subflow->request_mptcp) { 311 if (!mp_opt.mp_capable) { 312 MPTCP_INC_STATS(sock_net(sk), 313 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); 314 mptcp_do_fallback(sk); 315 pr_fallback(mptcp_sk(subflow->conn)); 316 goto fallback; 317 } 318 319 subflow->mp_capable = 1; 320 subflow->can_ack = 1; 321 subflow->remote_key = mp_opt.sndr_key; 322 pr_debug("subflow=%p, remote_key=%llu", subflow, 323 subflow->remote_key); 324 mptcp_finish_connect(sk); 325 } else if (subflow->request_join) { 326 u8 hmac[SHA256_DIGEST_SIZE]; 327 328 if (!mp_opt.mp_join) 329 goto do_reset; 330 331 subflow->thmac = mp_opt.thmac; 332 subflow->remote_nonce = mp_opt.nonce; 333 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, 334 subflow->thmac, subflow->remote_nonce); 335 336 if (!subflow_thmac_valid(subflow)) { 337 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); 338 goto do_reset; 339 } 340 341 subflow_generate_hmac(subflow->local_key, subflow->remote_key, 342 subflow->local_nonce, 343 subflow->remote_nonce, 344 hmac); 345 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); 346 347 if (!mptcp_finish_join(sk)) 348 goto do_reset; 349 350 subflow->mp_join = 1; 351 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); 352 } else if (mptcp_check_fallback(sk)) { 353 fallback: 354 mptcp_rcv_space_init(mptcp_sk(parent), sk); 355 } 356 return; 357 358 do_reset: 359 mptcp_subflow_reset(sk); 360 } 361 362 struct request_sock_ops mptcp_subflow_request_sock_ops; 363 EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops); 364 static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; 365 366 static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) 367 { 368 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 369 370 pr_debug("subflow=%p", subflow); 371 372 /* Never answer to SYNs sent to broadcast or multicast */ 373 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 374 goto drop; 375 376 return tcp_conn_request(&mptcp_subflow_request_sock_ops, 377 &subflow_request_sock_ipv4_ops, 378 sk, skb); 379 drop: 380 tcp_listendrop(sk); 381 return 0; 382 } 383 384 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 385 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; 386 static struct inet_connection_sock_af_ops subflow_v6_specific; 387 static struct inet_connection_sock_af_ops subflow_v6m_specific; 388 389 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) 390 { 391 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 392 393 pr_debug("subflow=%p", subflow); 394 395 if (skb->protocol == htons(ETH_P_IP)) 396 return subflow_v4_conn_request(sk, skb); 397 398 if (!ipv6_unicast_destination(skb)) 399 goto drop; 400 401 return tcp_conn_request(&mptcp_subflow_request_sock_ops, 402 &subflow_request_sock_ipv6_ops, sk, skb); 403 404 drop: 405 tcp_listendrop(sk); 406 return 0; /* don't send reset */ 407 } 408 #endif 409 410 /* validate hmac received in third ACK */ 411 static bool subflow_hmac_valid(const struct request_sock *req, 412 const struct mptcp_options_received *mp_opt) 413 { 414 const struct mptcp_subflow_request_sock *subflow_req; 415 u8 hmac[SHA256_DIGEST_SIZE]; 416 struct mptcp_sock *msk; 417 418 subflow_req = mptcp_subflow_rsk(req); 419 msk = subflow_req->msk; 420 if (!msk) 421 return false; 422 423 subflow_generate_hmac(msk->remote_key, msk->local_key, 424 subflow_req->remote_nonce, 425 subflow_req->local_nonce, hmac); 426 427 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); 428 } 429 430 static void mptcp_sock_destruct(struct sock *sk) 431 { 432 /* if new mptcp socket isn't accepted, it is free'd 433 * from the tcp listener sockets request queue, linked 434 * from req->sk. The tcp socket is released. 435 * This calls the ULP release function which will 436 * also remove the mptcp socket, via 437 * sock_put(ctx->conn). 438 * 439 * Problem is that the mptcp socket will be in 440 * ESTABLISHED state and will not have the SOCK_DEAD flag. 441 * Both result in warnings from inet_sock_destruct. 442 */ 443 444 if (sk->sk_state == TCP_ESTABLISHED) { 445 sk->sk_state = TCP_CLOSE; 446 WARN_ON_ONCE(sk->sk_socket); 447 sock_orphan(sk); 448 } 449 450 mptcp_destroy_common(mptcp_sk(sk)); 451 inet_sock_destruct(sk); 452 } 453 454 static void mptcp_force_close(struct sock *sk) 455 { 456 inet_sk_state_store(sk, TCP_CLOSE); 457 sk_common_release(sk); 458 } 459 460 static void subflow_ulp_fallback(struct sock *sk, 461 struct mptcp_subflow_context *old_ctx) 462 { 463 struct inet_connection_sock *icsk = inet_csk(sk); 464 465 mptcp_subflow_tcp_fallback(sk, old_ctx); 466 icsk->icsk_ulp_ops = NULL; 467 rcu_assign_pointer(icsk->icsk_ulp_data, NULL); 468 tcp_sk(sk)->is_mptcp = 0; 469 } 470 471 static void subflow_drop_ctx(struct sock *ssk) 472 { 473 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); 474 475 if (!ctx) 476 return; 477 478 subflow_ulp_fallback(ssk, ctx); 479 if (ctx->conn) 480 sock_put(ctx->conn); 481 482 kfree_rcu(ctx, rcu); 483 } 484 485 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, 486 struct mptcp_options_received *mp_opt) 487 { 488 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 489 490 subflow->remote_key = mp_opt->sndr_key; 491 subflow->fully_established = 1; 492 subflow->can_ack = 1; 493 WRITE_ONCE(msk->fully_established, true); 494 } 495 496 static struct sock *subflow_syn_recv_sock(const struct sock *sk, 497 struct sk_buff *skb, 498 struct request_sock *req, 499 struct dst_entry *dst, 500 struct request_sock *req_unhash, 501 bool *own_req) 502 { 503 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); 504 struct mptcp_subflow_request_sock *subflow_req; 505 struct mptcp_options_received mp_opt; 506 bool fallback, fallback_is_fatal; 507 struct sock *new_msk = NULL; 508 struct sock *child; 509 510 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); 511 512 /* After child creation we must look for 'mp_capable' even when options 513 * are not parsed 514 */ 515 mp_opt.mp_capable = 0; 516 517 /* hopefully temporary handling for MP_JOIN+syncookie */ 518 subflow_req = mptcp_subflow_rsk(req); 519 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join; 520 fallback = !tcp_rsk(req)->is_mptcp; 521 if (fallback) 522 goto create_child; 523 524 /* if the sk is MP_CAPABLE, we try to fetch the client key */ 525 if (subflow_req->mp_capable) { 526 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { 527 /* here we can receive and accept an in-window, 528 * out-of-order pkt, which will not carry the MP_CAPABLE 529 * opt even on mptcp enabled paths 530 */ 531 goto create_msk; 532 } 533 534 mptcp_get_options(skb, &mp_opt); 535 if (!mp_opt.mp_capable) { 536 fallback = true; 537 goto create_child; 538 } 539 540 create_msk: 541 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); 542 if (!new_msk) 543 fallback = true; 544 } else if (subflow_req->mp_join) { 545 mptcp_get_options(skb, &mp_opt); 546 if (!mp_opt.mp_join || 547 !mptcp_can_accept_new_subflow(subflow_req->msk) || 548 !subflow_hmac_valid(req, &mp_opt)) { 549 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); 550 fallback = true; 551 } 552 } 553 554 create_child: 555 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, 556 req_unhash, own_req); 557 558 if (child && *own_req) { 559 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); 560 561 tcp_rsk(req)->drop_req = false; 562 563 /* we need to fallback on ctx allocation failure and on pre-reqs 564 * checking above. In the latter scenario we additionally need 565 * to reset the context to non MPTCP status. 566 */ 567 if (!ctx || fallback) { 568 if (fallback_is_fatal) 569 goto dispose_child; 570 571 subflow_drop_ctx(child); 572 goto out; 573 } 574 575 if (ctx->mp_capable) { 576 /* this can't race with mptcp_close(), as the msk is 577 * not yet exposted to user-space 578 */ 579 inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); 580 581 /* link the newly created socket to the msk */ 582 mptcp_add_pending_subflow(mptcp_sk(new_msk), ctx); 583 WRITE_ONCE(mptcp_sk(new_msk)->first, child); 584 585 /* new mpc subflow takes ownership of the newly 586 * created mptcp socket 587 */ 588 new_msk->sk_destruct = mptcp_sock_destruct; 589 mptcp_pm_new_connection(mptcp_sk(new_msk), 1); 590 mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); 591 ctx->conn = new_msk; 592 new_msk = NULL; 593 594 /* with OoO packets we can reach here without ingress 595 * mpc option 596 */ 597 if (mp_opt.mp_capable) 598 mptcp_subflow_fully_established(ctx, &mp_opt); 599 } else if (ctx->mp_join) { 600 struct mptcp_sock *owner; 601 602 owner = subflow_req->msk; 603 if (!owner) 604 goto dispose_child; 605 606 /* move the msk reference ownership to the subflow */ 607 subflow_req->msk = NULL; 608 ctx->conn = (struct sock *)owner; 609 if (!mptcp_finish_join(child)) 610 goto dispose_child; 611 612 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); 613 tcp_rsk(req)->drop_req = true; 614 } 615 } 616 617 out: 618 /* dispose of the left over mptcp master, if any */ 619 if (unlikely(new_msk)) 620 mptcp_force_close(new_msk); 621 622 /* check for expected invariant - should never trigger, just help 623 * catching eariler subtle bugs 624 */ 625 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && 626 (!mptcp_subflow_ctx(child) || 627 !mptcp_subflow_ctx(child)->conn)); 628 return child; 629 630 dispose_child: 631 subflow_drop_ctx(child); 632 tcp_rsk(req)->drop_req = true; 633 inet_csk_prepare_for_destroy_sock(child); 634 tcp_done(child); 635 req->rsk_ops->send_reset(sk, skb); 636 637 /* The last child reference will be released by the caller */ 638 return child; 639 } 640 641 static struct inet_connection_sock_af_ops subflow_specific; 642 643 enum mapping_status { 644 MAPPING_OK, 645 MAPPING_INVALID, 646 MAPPING_EMPTY, 647 MAPPING_DATA_FIN, 648 MAPPING_DUMMY 649 }; 650 651 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) 652 { 653 if ((u32)seq == (u32)old_seq) 654 return old_seq; 655 656 /* Assume map covers data not mapped yet. */ 657 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); 658 } 659 660 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) 661 { 662 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", 663 ssn, subflow->map_subflow_seq, subflow->map_data_len); 664 } 665 666 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) 667 { 668 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 669 unsigned int skb_consumed; 670 671 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq; 672 if (WARN_ON_ONCE(skb_consumed >= skb->len)) 673 return true; 674 675 return skb->len - skb_consumed <= subflow->map_data_len - 676 mptcp_subflow_get_map_offset(subflow); 677 } 678 679 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) 680 { 681 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 682 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; 683 684 if (unlikely(before(ssn, subflow->map_subflow_seq))) { 685 /* Mapping covers data later in the subflow stream, 686 * currently unsupported. 687 */ 688 warn_bad_map(subflow, ssn); 689 return false; 690 } 691 if (unlikely(!before(ssn, subflow->map_subflow_seq + 692 subflow->map_data_len))) { 693 /* Mapping does covers past subflow data, invalid */ 694 warn_bad_map(subflow, ssn + skb->len); 695 return false; 696 } 697 return true; 698 } 699 700 static enum mapping_status get_mapping_status(struct sock *ssk, 701 struct mptcp_sock *msk) 702 { 703 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 704 struct mptcp_ext *mpext; 705 struct sk_buff *skb; 706 u16 data_len; 707 u64 map_seq; 708 709 skb = skb_peek(&ssk->sk_receive_queue); 710 if (!skb) 711 return MAPPING_EMPTY; 712 713 if (mptcp_check_fallback(ssk)) 714 return MAPPING_DUMMY; 715 716 mpext = mptcp_get_ext(skb); 717 if (!mpext || !mpext->use_map) { 718 if (!subflow->map_valid && !skb->len) { 719 /* the TCP stack deliver 0 len FIN pkt to the receive 720 * queue, that is the only 0len pkts ever expected here, 721 * and we can admit no mapping only for 0 len pkts 722 */ 723 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 724 WARN_ONCE(1, "0len seq %d:%d flags %x", 725 TCP_SKB_CB(skb)->seq, 726 TCP_SKB_CB(skb)->end_seq, 727 TCP_SKB_CB(skb)->tcp_flags); 728 sk_eat_skb(ssk, skb); 729 return MAPPING_EMPTY; 730 } 731 732 if (!subflow->map_valid) 733 return MAPPING_INVALID; 734 735 goto validate_seq; 736 } 737 738 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d", 739 mpext->data_seq, mpext->dsn64, mpext->subflow_seq, 740 mpext->data_len, mpext->data_fin); 741 742 data_len = mpext->data_len; 743 if (data_len == 0) { 744 pr_err("Infinite mapping not handled"); 745 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); 746 return MAPPING_INVALID; 747 } 748 749 if (mpext->data_fin == 1) { 750 if (data_len == 1) { 751 bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, 752 mpext->dsn64); 753 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); 754 if (subflow->map_valid) { 755 /* A DATA_FIN might arrive in a DSS 756 * option before the previous mapping 757 * has been fully consumed. Continue 758 * handling the existing mapping. 759 */ 760 skb_ext_del(skb, SKB_EXT_MPTCP); 761 return MAPPING_OK; 762 } else { 763 if (updated && schedule_work(&msk->work)) 764 sock_hold((struct sock *)msk); 765 766 return MAPPING_DATA_FIN; 767 } 768 } else { 769 u64 data_fin_seq = mpext->data_seq + data_len - 1; 770 771 /* If mpext->data_seq is a 32-bit value, data_fin_seq 772 * must also be limited to 32 bits. 773 */ 774 if (!mpext->dsn64) 775 data_fin_seq &= GENMASK_ULL(31, 0); 776 777 mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); 778 pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", 779 data_fin_seq, mpext->dsn64); 780 } 781 782 /* Adjust for DATA_FIN using 1 byte of sequence space */ 783 data_len--; 784 } 785 786 if (!mpext->dsn64) { 787 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, 788 mpext->data_seq); 789 pr_debug("expanded seq=%llu", subflow->map_seq); 790 } else { 791 map_seq = mpext->data_seq; 792 } 793 WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); 794 795 if (subflow->map_valid) { 796 /* Allow replacing only with an identical map */ 797 if (subflow->map_seq == map_seq && 798 subflow->map_subflow_seq == mpext->subflow_seq && 799 subflow->map_data_len == data_len) { 800 skb_ext_del(skb, SKB_EXT_MPTCP); 801 return MAPPING_OK; 802 } 803 804 /* If this skb data are fully covered by the current mapping, 805 * the new map would need caching, which is not supported 806 */ 807 if (skb_is_fully_mapped(ssk, skb)) { 808 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); 809 return MAPPING_INVALID; 810 } 811 812 /* will validate the next map after consuming the current one */ 813 return MAPPING_OK; 814 } 815 816 subflow->map_seq = map_seq; 817 subflow->map_subflow_seq = mpext->subflow_seq; 818 subflow->map_data_len = data_len; 819 subflow->map_valid = 1; 820 subflow->mpc_map = mpext->mpc_map; 821 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", 822 subflow->map_seq, subflow->map_subflow_seq, 823 subflow->map_data_len); 824 825 validate_seq: 826 /* we revalidate valid mapping on new skb, because we must ensure 827 * the current skb is completely covered by the available mapping 828 */ 829 if (!validate_mapping(ssk, skb)) 830 return MAPPING_INVALID; 831 832 skb_ext_del(skb, SKB_EXT_MPTCP); 833 return MAPPING_OK; 834 } 835 836 static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, 837 u64 limit) 838 { 839 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 840 bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 841 u32 incr; 842 843 incr = limit >= skb->len ? skb->len + fin : limit; 844 845 pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, 846 subflow->map_subflow_seq); 847 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); 848 tcp_sk(ssk)->copied_seq += incr; 849 if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) 850 sk_eat_skb(ssk, skb); 851 if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) 852 subflow->map_valid = 0; 853 } 854 855 static bool subflow_check_data_avail(struct sock *ssk) 856 { 857 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 858 enum mapping_status status; 859 struct mptcp_sock *msk; 860 struct sk_buff *skb; 861 862 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk, 863 subflow->data_avail, skb_peek(&ssk->sk_receive_queue)); 864 if (!skb_peek(&ssk->sk_receive_queue)) 865 subflow->data_avail = 0; 866 if (subflow->data_avail) 867 return true; 868 869 msk = mptcp_sk(subflow->conn); 870 for (;;) { 871 u64 ack_seq; 872 u64 old_ack; 873 874 status = get_mapping_status(ssk, msk); 875 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status); 876 if (status == MAPPING_INVALID) { 877 ssk->sk_err = EBADMSG; 878 goto fatal; 879 } 880 if (status == MAPPING_DUMMY) { 881 __mptcp_do_fallback(msk); 882 skb = skb_peek(&ssk->sk_receive_queue); 883 subflow->map_valid = 1; 884 subflow->map_seq = READ_ONCE(msk->ack_seq); 885 subflow->map_data_len = skb->len; 886 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - 887 subflow->ssn_offset; 888 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 889 return true; 890 } 891 892 if (status != MAPPING_OK) 893 return false; 894 895 skb = skb_peek(&ssk->sk_receive_queue); 896 if (WARN_ON_ONCE(!skb)) 897 return false; 898 899 /* if msk lacks the remote key, this subflow must provide an 900 * MP_CAPABLE-based mapping 901 */ 902 if (unlikely(!READ_ONCE(msk->can_ack))) { 903 if (!subflow->mpc_map) { 904 ssk->sk_err = EBADMSG; 905 goto fatal; 906 } 907 WRITE_ONCE(msk->remote_key, subflow->remote_key); 908 WRITE_ONCE(msk->ack_seq, subflow->map_seq); 909 WRITE_ONCE(msk->can_ack, true); 910 } 911 912 old_ack = READ_ONCE(msk->ack_seq); 913 ack_seq = mptcp_subflow_get_mapped_dsn(subflow); 914 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, 915 ack_seq); 916 if (ack_seq == old_ack) { 917 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 918 break; 919 } else if (after64(ack_seq, old_ack)) { 920 subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; 921 break; 922 } 923 924 /* only accept in-sequence mapping. Old values are spurious 925 * retransmission 926 */ 927 mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); 928 } 929 return true; 930 931 fatal: 932 /* fatal protocol error, close the socket */ 933 /* This barrier is coupled with smp_rmb() in tcp_poll() */ 934 smp_wmb(); 935 ssk->sk_error_report(ssk); 936 tcp_set_state(ssk, TCP_CLOSE); 937 tcp_send_active_reset(ssk, GFP_ATOMIC); 938 subflow->data_avail = 0; 939 return false; 940 } 941 942 bool mptcp_subflow_data_available(struct sock *sk) 943 { 944 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 945 946 /* check if current mapping is still valid */ 947 if (subflow->map_valid && 948 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { 949 subflow->map_valid = 0; 950 subflow->data_avail = 0; 951 952 pr_debug("Done with mapping: seq=%u data_len=%u", 953 subflow->map_subflow_seq, 954 subflow->map_data_len); 955 } 956 957 return subflow_check_data_avail(sk); 958 } 959 960 /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy, 961 * not the ssk one. 962 * 963 * In mptcp, rwin is about the mptcp-level connection data. 964 * 965 * Data that is still on the ssk rx queue can thus be ignored, 966 * as far as mptcp peer is concerened that data is still inflight. 967 * DSS ACK is updated when skb is moved to the mptcp rx queue. 968 */ 969 void mptcp_space(const struct sock *ssk, int *space, int *full_space) 970 { 971 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 972 const struct sock *sk = subflow->conn; 973 974 *space = __mptcp_space(sk); 975 *full_space = tcp_full_space(sk); 976 } 977 978 static void subflow_data_ready(struct sock *sk) 979 { 980 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 981 u16 state = 1 << inet_sk_state_load(sk); 982 struct sock *parent = subflow->conn; 983 struct mptcp_sock *msk; 984 985 msk = mptcp_sk(parent); 986 if (state & TCPF_LISTEN) { 987 set_bit(MPTCP_DATA_READY, &msk->flags); 988 parent->sk_data_ready(parent); 989 return; 990 } 991 992 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && 993 !subflow->mp_join && !(state & TCPF_CLOSE)); 994 995 if (mptcp_subflow_data_available(sk)) 996 mptcp_data_ready(parent, sk); 997 } 998 999 static void subflow_write_space(struct sock *sk) 1000 { 1001 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1002 struct socket *sock = READ_ONCE(sk->sk_socket); 1003 struct sock *parent = subflow->conn; 1004 1005 if (!sk_stream_is_writeable(sk)) 1006 return; 1007 1008 if (sock && sk_stream_is_writeable(parent)) 1009 clear_bit(SOCK_NOSPACE, &sock->flags); 1010 1011 sk_stream_write_space(parent); 1012 } 1013 1014 static struct inet_connection_sock_af_ops * 1015 subflow_default_af_ops(struct sock *sk) 1016 { 1017 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1018 if (sk->sk_family == AF_INET6) 1019 return &subflow_v6_specific; 1020 #endif 1021 return &subflow_specific; 1022 } 1023 1024 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1025 void mptcpv6_handle_mapped(struct sock *sk, bool mapped) 1026 { 1027 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1028 struct inet_connection_sock *icsk = inet_csk(sk); 1029 struct inet_connection_sock_af_ops *target; 1030 1031 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); 1032 1033 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d", 1034 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); 1035 1036 if (likely(icsk->icsk_af_ops == target)) 1037 return; 1038 1039 subflow->icsk_af_ops = icsk->icsk_af_ops; 1040 icsk->icsk_af_ops = target; 1041 } 1042 #endif 1043 1044 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info, 1045 struct sockaddr_storage *addr) 1046 { 1047 memset(addr, 0, sizeof(*addr)); 1048 addr->ss_family = info->family; 1049 if (addr->ss_family == AF_INET) { 1050 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 1051 1052 in_addr->sin_addr = info->addr; 1053 in_addr->sin_port = info->port; 1054 } 1055 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1056 else if (addr->ss_family == AF_INET6) { 1057 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr; 1058 1059 in6_addr->sin6_addr = info->addr6; 1060 in6_addr->sin6_port = info->port; 1061 } 1062 #endif 1063 } 1064 1065 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, 1066 const struct mptcp_addr_info *remote) 1067 { 1068 struct mptcp_sock *msk = mptcp_sk(sk); 1069 struct mptcp_subflow_context *subflow; 1070 struct sockaddr_storage addr; 1071 int remote_id = remote->id; 1072 int local_id = loc->id; 1073 struct socket *sf; 1074 struct sock *ssk; 1075 u32 remote_token; 1076 int addrlen; 1077 int err; 1078 1079 if (!mptcp_is_fully_established(sk)) 1080 return -ENOTCONN; 1081 1082 err = mptcp_subflow_create_socket(sk, &sf); 1083 if (err) 1084 return err; 1085 1086 ssk = sf->sk; 1087 subflow = mptcp_subflow_ctx(ssk); 1088 do { 1089 get_random_bytes(&subflow->local_nonce, sizeof(u32)); 1090 } while (!subflow->local_nonce); 1091 1092 if (!local_id) { 1093 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk); 1094 if (err < 0) 1095 goto failed; 1096 1097 local_id = err; 1098 } 1099 1100 subflow->remote_key = msk->remote_key; 1101 subflow->local_key = msk->local_key; 1102 subflow->token = msk->token; 1103 mptcp_info2sockaddr(loc, &addr); 1104 1105 addrlen = sizeof(struct sockaddr_in); 1106 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1107 if (loc->family == AF_INET6) 1108 addrlen = sizeof(struct sockaddr_in6); 1109 #endif 1110 ssk->sk_bound_dev_if = loc->ifindex; 1111 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); 1112 if (err) 1113 goto failed; 1114 1115 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); 1116 pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, 1117 remote_token, local_id, remote_id); 1118 subflow->remote_token = remote_token; 1119 subflow->local_id = local_id; 1120 subflow->remote_id = remote_id; 1121 subflow->request_join = 1; 1122 subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1123 mptcp_info2sockaddr(remote, &addr); 1124 1125 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); 1126 if (err && err != -EINPROGRESS) 1127 goto failed; 1128 1129 mptcp_add_pending_subflow(msk, subflow); 1130 return err; 1131 1132 failed: 1133 subflow->disposable = 1; 1134 sock_release(sf); 1135 return err; 1136 } 1137 1138 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) 1139 { 1140 struct mptcp_subflow_context *subflow; 1141 struct net *net = sock_net(sk); 1142 struct socket *sf; 1143 int err; 1144 1145 /* un-accepted server sockets can reach here - on bad configuration 1146 * bail early to avoid greater trouble later 1147 */ 1148 if (unlikely(!sk->sk_socket)) 1149 return -EINVAL; 1150 1151 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, 1152 &sf); 1153 if (err) 1154 return err; 1155 1156 lock_sock(sf->sk); 1157 1158 /* kernel sockets do not by default acquire net ref, but TCP timer 1159 * needs it. 1160 */ 1161 sf->sk->sk_net_refcnt = 1; 1162 get_net(net); 1163 #ifdef CONFIG_PROC_FS 1164 this_cpu_add(*net->core.sock_inuse, 1); 1165 #endif 1166 err = tcp_set_ulp(sf->sk, "mptcp"); 1167 release_sock(sf->sk); 1168 1169 if (err) { 1170 sock_release(sf); 1171 return err; 1172 } 1173 1174 /* the newly created socket really belongs to the owning MPTCP master 1175 * socket, even if for additional subflows the allocation is performed 1176 * by a kernel workqueue. Adjust inode references, so that the 1177 * procfs/diag interaces really show this one belonging to the correct 1178 * user. 1179 */ 1180 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; 1181 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; 1182 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; 1183 1184 subflow = mptcp_subflow_ctx(sf->sk); 1185 pr_debug("subflow=%p", subflow); 1186 1187 *new_sock = sf; 1188 sock_hold(sk); 1189 subflow->conn = sk; 1190 1191 return 0; 1192 } 1193 1194 static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, 1195 gfp_t priority) 1196 { 1197 struct inet_connection_sock *icsk = inet_csk(sk); 1198 struct mptcp_subflow_context *ctx; 1199 1200 ctx = kzalloc(sizeof(*ctx), priority); 1201 if (!ctx) 1202 return NULL; 1203 1204 rcu_assign_pointer(icsk->icsk_ulp_data, ctx); 1205 INIT_LIST_HEAD(&ctx->node); 1206 1207 pr_debug("subflow=%p", ctx); 1208 1209 ctx->tcp_sock = sk; 1210 1211 return ctx; 1212 } 1213 1214 static void __subflow_state_change(struct sock *sk) 1215 { 1216 struct socket_wq *wq; 1217 1218 rcu_read_lock(); 1219 wq = rcu_dereference(sk->sk_wq); 1220 if (skwq_has_sleeper(wq)) 1221 wake_up_interruptible_all(&wq->wait); 1222 rcu_read_unlock(); 1223 } 1224 1225 static bool subflow_is_done(const struct sock *sk) 1226 { 1227 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; 1228 } 1229 1230 static void subflow_state_change(struct sock *sk) 1231 { 1232 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1233 struct sock *parent = subflow->conn; 1234 1235 __subflow_state_change(sk); 1236 1237 if (subflow_simultaneous_connect(sk)) { 1238 mptcp_do_fallback(sk); 1239 mptcp_rcv_space_init(mptcp_sk(parent), sk); 1240 pr_fallback(mptcp_sk(parent)); 1241 subflow->conn_finished = 1; 1242 if (inet_sk_state_load(parent) == TCP_SYN_SENT) { 1243 inet_sk_state_store(parent, TCP_ESTABLISHED); 1244 parent->sk_state_change(parent); 1245 } 1246 } 1247 1248 /* as recvmsg() does not acquire the subflow socket for ssk selection 1249 * a fin packet carrying a DSS can be unnoticed if we don't trigger 1250 * the data available machinery here. 1251 */ 1252 if (mptcp_subflow_data_available(sk)) 1253 mptcp_data_ready(parent, sk); 1254 1255 if (__mptcp_check_fallback(mptcp_sk(parent)) && 1256 !subflow->rx_eof && subflow_is_done(sk)) { 1257 subflow->rx_eof = 1; 1258 mptcp_subflow_eof(parent); 1259 } 1260 } 1261 1262 static int subflow_ulp_init(struct sock *sk) 1263 { 1264 struct inet_connection_sock *icsk = inet_csk(sk); 1265 struct mptcp_subflow_context *ctx; 1266 struct tcp_sock *tp = tcp_sk(sk); 1267 int err = 0; 1268 1269 /* disallow attaching ULP to a socket unless it has been 1270 * created with sock_create_kern() 1271 */ 1272 if (!sk->sk_kern_sock) { 1273 err = -EOPNOTSUPP; 1274 goto out; 1275 } 1276 1277 ctx = subflow_create_ctx(sk, GFP_KERNEL); 1278 if (!ctx) { 1279 err = -ENOMEM; 1280 goto out; 1281 } 1282 1283 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); 1284 1285 tp->is_mptcp = 1; 1286 ctx->icsk_af_ops = icsk->icsk_af_ops; 1287 icsk->icsk_af_ops = subflow_default_af_ops(sk); 1288 ctx->tcp_data_ready = sk->sk_data_ready; 1289 ctx->tcp_state_change = sk->sk_state_change; 1290 ctx->tcp_write_space = sk->sk_write_space; 1291 sk->sk_data_ready = subflow_data_ready; 1292 sk->sk_write_space = subflow_write_space; 1293 sk->sk_state_change = subflow_state_change; 1294 out: 1295 return err; 1296 } 1297 1298 static void subflow_ulp_release(struct sock *ssk) 1299 { 1300 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); 1301 bool release = true; 1302 struct sock *sk; 1303 1304 if (!ctx) 1305 return; 1306 1307 sk = ctx->conn; 1308 if (sk) { 1309 /* if the msk has been orphaned, keep the ctx 1310 * alive, will be freed by mptcp_done() 1311 */ 1312 release = ctx->disposable; 1313 sock_put(sk); 1314 } 1315 1316 if (release) 1317 kfree_rcu(ctx, rcu); 1318 } 1319 1320 static void subflow_ulp_clone(const struct request_sock *req, 1321 struct sock *newsk, 1322 const gfp_t priority) 1323 { 1324 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 1325 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); 1326 struct mptcp_subflow_context *new_ctx; 1327 1328 if (!tcp_rsk(req)->is_mptcp || 1329 (!subflow_req->mp_capable && !subflow_req->mp_join)) { 1330 subflow_ulp_fallback(newsk, old_ctx); 1331 return; 1332 } 1333 1334 new_ctx = subflow_create_ctx(newsk, priority); 1335 if (!new_ctx) { 1336 subflow_ulp_fallback(newsk, old_ctx); 1337 return; 1338 } 1339 1340 new_ctx->conn_finished = 1; 1341 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops; 1342 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; 1343 new_ctx->tcp_state_change = old_ctx->tcp_state_change; 1344 new_ctx->tcp_write_space = old_ctx->tcp_write_space; 1345 new_ctx->rel_write_seq = 1; 1346 new_ctx->tcp_sock = newsk; 1347 1348 if (subflow_req->mp_capable) { 1349 /* see comments in subflow_syn_recv_sock(), MPTCP connection 1350 * is fully established only after we receive the remote key 1351 */ 1352 new_ctx->mp_capable = 1; 1353 new_ctx->local_key = subflow_req->local_key; 1354 new_ctx->token = subflow_req->token; 1355 new_ctx->ssn_offset = subflow_req->ssn_offset; 1356 new_ctx->idsn = subflow_req->idsn; 1357 } else if (subflow_req->mp_join) { 1358 new_ctx->ssn_offset = subflow_req->ssn_offset; 1359 new_ctx->mp_join = 1; 1360 new_ctx->fully_established = 1; 1361 new_ctx->backup = subflow_req->backup; 1362 new_ctx->local_id = subflow_req->local_id; 1363 new_ctx->remote_id = subflow_req->remote_id; 1364 new_ctx->token = subflow_req->token; 1365 new_ctx->thmac = subflow_req->thmac; 1366 } 1367 } 1368 1369 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { 1370 .name = "mptcp", 1371 .owner = THIS_MODULE, 1372 .init = subflow_ulp_init, 1373 .release = subflow_ulp_release, 1374 .clone = subflow_ulp_clone, 1375 }; 1376 1377 static int subflow_ops_init(struct request_sock_ops *subflow_ops) 1378 { 1379 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); 1380 subflow_ops->slab_name = "request_sock_subflow"; 1381 1382 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, 1383 subflow_ops->obj_size, 0, 1384 SLAB_ACCOUNT | 1385 SLAB_TYPESAFE_BY_RCU, 1386 NULL); 1387 if (!subflow_ops->slab) 1388 return -ENOMEM; 1389 1390 subflow_ops->destructor = subflow_req_destructor; 1391 1392 return 0; 1393 } 1394 1395 void __init mptcp_subflow_init(void) 1396 { 1397 mptcp_subflow_request_sock_ops = tcp_request_sock_ops; 1398 if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) 1399 panic("MPTCP: failed to init subflow request sock ops\n"); 1400 1401 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; 1402 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req; 1403 1404 subflow_specific = ipv4_specific; 1405 subflow_specific.conn_request = subflow_v4_conn_request; 1406 subflow_specific.syn_recv_sock = subflow_syn_recv_sock; 1407 subflow_specific.sk_rx_dst_set = subflow_finish_connect; 1408 1409 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1410 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; 1411 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req; 1412 1413 subflow_v6_specific = ipv6_specific; 1414 subflow_v6_specific.conn_request = subflow_v6_conn_request; 1415 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock; 1416 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect; 1417 1418 subflow_v6m_specific = subflow_v6_specific; 1419 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; 1420 subflow_v6m_specific.send_check = ipv4_specific.send_check; 1421 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; 1422 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; 1423 subflow_v6m_specific.net_frag_header_len = 0; 1424 #endif 1425 1426 mptcp_diag_subflow_init(&subflow_ulp_ops); 1427 1428 if (tcp_register_ulp(&subflow_ulp_ops) != 0) 1429 panic("MPTCP: failed to register subflows to ULP\n"); 1430 } 1431