1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2017 - 2019, Intel Corporation. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <linux/netdevice.h> 12 #include <crypto/algapi.h> 13 #include <crypto/sha.h> 14 #include <net/sock.h> 15 #include <net/inet_common.h> 16 #include <net/inet_hashtables.h> 17 #include <net/protocol.h> 18 #include <net/tcp.h> 19 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 20 #include <net/ip6_route.h> 21 #endif 22 #include <net/mptcp.h> 23 #include <uapi/linux/mptcp.h> 24 #include "protocol.h" 25 #include "mib.h" 26 27 static void SUBFLOW_REQ_INC_STATS(struct request_sock *req, 28 enum linux_mptcp_mib_field field) 29 { 30 MPTCP_INC_STATS(sock_net(req_to_sk(req)), field); 31 } 32 33 static void subflow_req_destructor(struct request_sock *req) 34 { 35 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 36 37 pr_debug("subflow_req=%p", subflow_req); 38 39 if (subflow_req->msk) 40 sock_put((struct sock *)subflow_req->msk); 41 42 mptcp_token_destroy_request(req); 43 tcp_request_sock_ops.destructor(req); 44 } 45 46 static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2, 47 void *hmac) 48 { 49 u8 msg[8]; 50 51 put_unaligned_be32(nonce1, &msg[0]); 52 put_unaligned_be32(nonce2, &msg[4]); 53 54 mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); 55 } 56 57 static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) 58 { 59 return mptcp_is_fully_established((void *)msk) && 60 READ_ONCE(msk->pm.accept_subflow); 61 } 62 63 /* validate received token and create truncated hmac and nonce for SYN-ACK */ 64 static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, 65 const struct sk_buff *skb) 66 { 67 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 68 u8 hmac[SHA256_DIGEST_SIZE]; 69 struct mptcp_sock *msk; 70 int local_id; 71 72 msk = mptcp_token_get_sock(subflow_req->token); 73 if (!msk) { 74 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); 75 return NULL; 76 } 77 78 local_id = mptcp_pm_get_local_id(msk, (struct sock_common *)req); 79 if (local_id < 0) { 80 sock_put((struct sock *)msk); 81 return NULL; 82 } 83 subflow_req->local_id = local_id; 84 85 get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); 86 87 subflow_generate_hmac(msk->local_key, msk->remote_key, 88 subflow_req->local_nonce, 89 subflow_req->remote_nonce, hmac); 90 91 subflow_req->thmac = get_unaligned_be64(hmac); 92 return msk; 93 } 94 95 static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener) 96 { 97 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 98 99 subflow_req->mp_capable = 0; 100 subflow_req->mp_join = 0; 101 subflow_req->msk = NULL; 102 mptcp_token_init_request(req); 103 104 #ifdef CONFIG_TCP_MD5SIG 105 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of 106 * TCP option space. 107 */ 108 if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info)) 109 return -EINVAL; 110 #endif 111 112 return 0; 113 } 114 115 static void subflow_init_req(struct request_sock *req, 116 const struct sock *sk_listener, 117 struct sk_buff *skb) 118 { 119 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 120 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 121 struct mptcp_options_received mp_opt; 122 int ret; 123 124 pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); 125 126 ret = __subflow_init_req(req, sk_listener); 127 if (ret) 128 return; 129 130 mptcp_get_options(skb, &mp_opt); 131 132 if (mp_opt.mp_capable) { 133 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); 134 135 if (mp_opt.mp_join) 136 return; 137 } else if (mp_opt.mp_join) { 138 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); 139 } 140 141 if (mp_opt.mp_capable && listener->request_mptcp) { 142 int err, retries = 4; 143 144 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; 145 again: 146 do { 147 get_random_bytes(&subflow_req->local_key, sizeof(subflow_req->local_key)); 148 } while (subflow_req->local_key == 0); 149 150 if (unlikely(req->syncookie)) { 151 mptcp_crypto_key_sha(subflow_req->local_key, 152 &subflow_req->token, 153 &subflow_req->idsn); 154 if (mptcp_token_exists(subflow_req->token)) { 155 if (retries-- > 0) 156 goto again; 157 } else { 158 subflow_req->mp_capable = 1; 159 } 160 return; 161 } 162 163 err = mptcp_token_new_request(req); 164 if (err == 0) 165 subflow_req->mp_capable = 1; 166 else if (retries-- > 0) 167 goto again; 168 169 } else if (mp_opt.mp_join && listener->request_mptcp) { 170 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; 171 subflow_req->mp_join = 1; 172 subflow_req->backup = mp_opt.backup; 173 subflow_req->remote_id = mp_opt.join_id; 174 subflow_req->token = mp_opt.token; 175 subflow_req->remote_nonce = mp_opt.nonce; 176 subflow_req->msk = subflow_token_join_request(req, skb); 177 178 if (unlikely(req->syncookie) && subflow_req->msk) { 179 if (mptcp_can_accept_new_subflow(subflow_req->msk)) 180 subflow_init_req_cookie_join_save(subflow_req, skb); 181 } 182 183 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, 184 subflow_req->remote_nonce, subflow_req->msk); 185 } 186 } 187 188 int mptcp_subflow_init_cookie_req(struct request_sock *req, 189 const struct sock *sk_listener, 190 struct sk_buff *skb) 191 { 192 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); 193 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 194 struct mptcp_options_received mp_opt; 195 int err; 196 197 err = __subflow_init_req(req, sk_listener); 198 if (err) 199 return err; 200 201 mptcp_get_options(skb, &mp_opt); 202 203 if (mp_opt.mp_capable && mp_opt.mp_join) 204 return -EINVAL; 205 206 if (mp_opt.mp_capable && listener->request_mptcp) { 207 if (mp_opt.sndr_key == 0) 208 return -EINVAL; 209 210 subflow_req->local_key = mp_opt.rcvr_key; 211 err = mptcp_token_new_request(req); 212 if (err) 213 return err; 214 215 subflow_req->mp_capable = 1; 216 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 217 } else if (mp_opt.mp_join && listener->request_mptcp) { 218 if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) 219 return -EINVAL; 220 221 if (mptcp_can_accept_new_subflow(subflow_req->msk)) 222 subflow_req->mp_join = 1; 223 224 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 225 } 226 227 return 0; 228 } 229 EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); 230 231 static void subflow_v4_init_req(struct request_sock *req, 232 const struct sock *sk_listener, 233 struct sk_buff *skb) 234 { 235 tcp_rsk(req)->is_mptcp = 1; 236 237 tcp_request_sock_ipv4_ops.init_req(req, sk_listener, skb); 238 239 subflow_init_req(req, sk_listener, skb); 240 } 241 242 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 243 static void subflow_v6_init_req(struct request_sock *req, 244 const struct sock *sk_listener, 245 struct sk_buff *skb) 246 { 247 tcp_rsk(req)->is_mptcp = 1; 248 249 tcp_request_sock_ipv6_ops.init_req(req, sk_listener, skb); 250 251 subflow_init_req(req, sk_listener, skb); 252 } 253 #endif 254 255 /* validate received truncated hmac and create hmac for third ACK */ 256 static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) 257 { 258 u8 hmac[SHA256_DIGEST_SIZE]; 259 u64 thmac; 260 261 subflow_generate_hmac(subflow->remote_key, subflow->local_key, 262 subflow->remote_nonce, subflow->local_nonce, 263 hmac); 264 265 thmac = get_unaligned_be64(hmac); 266 pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n", 267 subflow, subflow->token, 268 (unsigned long long)thmac, 269 (unsigned long long)subflow->thmac); 270 271 return thmac == subflow->thmac; 272 } 273 274 void mptcp_subflow_reset(struct sock *ssk) 275 { 276 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 277 struct sock *sk = subflow->conn; 278 279 tcp_set_state(ssk, TCP_CLOSE); 280 tcp_send_active_reset(ssk, GFP_ATOMIC); 281 tcp_done(ssk); 282 if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) && 283 schedule_work(&mptcp_sk(sk)->work)) 284 sock_hold(sk); 285 } 286 287 static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) 288 { 289 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 290 struct mptcp_options_received mp_opt; 291 struct sock *parent = subflow->conn; 292 293 subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); 294 295 if (inet_sk_state_load(parent) == TCP_SYN_SENT) { 296 inet_sk_state_store(parent, TCP_ESTABLISHED); 297 parent->sk_state_change(parent); 298 } 299 300 /* be sure no special action on any packet other than syn-ack */ 301 if (subflow->conn_finished) 302 return; 303 304 subflow->rel_write_seq = 1; 305 subflow->conn_finished = 1; 306 subflow->ssn_offset = TCP_SKB_CB(skb)->seq; 307 pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); 308 309 mptcp_get_options(skb, &mp_opt); 310 if (subflow->request_mptcp) { 311 if (!mp_opt.mp_capable) { 312 MPTCP_INC_STATS(sock_net(sk), 313 MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); 314 mptcp_do_fallback(sk); 315 pr_fallback(mptcp_sk(subflow->conn)); 316 goto fallback; 317 } 318 319 subflow->mp_capable = 1; 320 subflow->can_ack = 1; 321 subflow->remote_key = mp_opt.sndr_key; 322 pr_debug("subflow=%p, remote_key=%llu", subflow, 323 subflow->remote_key); 324 mptcp_finish_connect(sk); 325 } else if (subflow->request_join) { 326 u8 hmac[SHA256_DIGEST_SIZE]; 327 328 if (!mp_opt.mp_join) 329 goto do_reset; 330 331 subflow->thmac = mp_opt.thmac; 332 subflow->remote_nonce = mp_opt.nonce; 333 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, 334 subflow->thmac, subflow->remote_nonce); 335 336 if (!subflow_thmac_valid(subflow)) { 337 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINACKMAC); 338 goto do_reset; 339 } 340 341 subflow_generate_hmac(subflow->local_key, subflow->remote_key, 342 subflow->local_nonce, 343 subflow->remote_nonce, 344 hmac); 345 memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); 346 347 if (!mptcp_finish_join(sk)) 348 goto do_reset; 349 350 subflow->mp_join = 1; 351 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); 352 } else if (mptcp_check_fallback(sk)) { 353 fallback: 354 mptcp_rcv_space_init(mptcp_sk(parent), sk); 355 } 356 return; 357 358 do_reset: 359 mptcp_subflow_reset(sk); 360 } 361 362 struct request_sock_ops mptcp_subflow_request_sock_ops; 363 EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops); 364 static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops; 365 366 static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb) 367 { 368 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 369 370 pr_debug("subflow=%p", subflow); 371 372 /* Never answer to SYNs sent to broadcast or multicast */ 373 if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) 374 goto drop; 375 376 return tcp_conn_request(&mptcp_subflow_request_sock_ops, 377 &subflow_request_sock_ipv4_ops, 378 sk, skb); 379 drop: 380 tcp_listendrop(sk); 381 return 0; 382 } 383 384 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 385 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops; 386 static struct inet_connection_sock_af_ops subflow_v6_specific; 387 static struct inet_connection_sock_af_ops subflow_v6m_specific; 388 389 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) 390 { 391 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 392 393 pr_debug("subflow=%p", subflow); 394 395 if (skb->protocol == htons(ETH_P_IP)) 396 return subflow_v4_conn_request(sk, skb); 397 398 if (!ipv6_unicast_destination(skb)) 399 goto drop; 400 401 return tcp_conn_request(&mptcp_subflow_request_sock_ops, 402 &subflow_request_sock_ipv6_ops, sk, skb); 403 404 drop: 405 tcp_listendrop(sk); 406 return 0; /* don't send reset */ 407 } 408 #endif 409 410 /* validate hmac received in third ACK */ 411 static bool subflow_hmac_valid(const struct request_sock *req, 412 const struct mptcp_options_received *mp_opt) 413 { 414 const struct mptcp_subflow_request_sock *subflow_req; 415 u8 hmac[SHA256_DIGEST_SIZE]; 416 struct mptcp_sock *msk; 417 418 subflow_req = mptcp_subflow_rsk(req); 419 msk = subflow_req->msk; 420 if (!msk) 421 return false; 422 423 subflow_generate_hmac(msk->remote_key, msk->local_key, 424 subflow_req->remote_nonce, 425 subflow_req->local_nonce, hmac); 426 427 return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); 428 } 429 430 static void mptcp_sock_destruct(struct sock *sk) 431 { 432 /* if new mptcp socket isn't accepted, it is free'd 433 * from the tcp listener sockets request queue, linked 434 * from req->sk. The tcp socket is released. 435 * This calls the ULP release function which will 436 * also remove the mptcp socket, via 437 * sock_put(ctx->conn). 438 * 439 * Problem is that the mptcp socket will be in 440 * ESTABLISHED state and will not have the SOCK_DEAD flag. 441 * Both result in warnings from inet_sock_destruct. 442 */ 443 444 if (sk->sk_state == TCP_ESTABLISHED) { 445 sk->sk_state = TCP_CLOSE; 446 WARN_ON_ONCE(sk->sk_socket); 447 sock_orphan(sk); 448 } 449 450 mptcp_destroy_common(mptcp_sk(sk)); 451 inet_sock_destruct(sk); 452 } 453 454 static void mptcp_force_close(struct sock *sk) 455 { 456 inet_sk_state_store(sk, TCP_CLOSE); 457 sk_common_release(sk); 458 } 459 460 static void subflow_ulp_fallback(struct sock *sk, 461 struct mptcp_subflow_context *old_ctx) 462 { 463 struct inet_connection_sock *icsk = inet_csk(sk); 464 465 mptcp_subflow_tcp_fallback(sk, old_ctx); 466 icsk->icsk_ulp_ops = NULL; 467 rcu_assign_pointer(icsk->icsk_ulp_data, NULL); 468 tcp_sk(sk)->is_mptcp = 0; 469 } 470 471 static void subflow_drop_ctx(struct sock *ssk) 472 { 473 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk); 474 475 if (!ctx) 476 return; 477 478 subflow_ulp_fallback(ssk, ctx); 479 if (ctx->conn) 480 sock_put(ctx->conn); 481 482 kfree_rcu(ctx, rcu); 483 } 484 485 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, 486 struct mptcp_options_received *mp_opt) 487 { 488 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 489 490 subflow->remote_key = mp_opt->sndr_key; 491 subflow->fully_established = 1; 492 subflow->can_ack = 1; 493 WRITE_ONCE(msk->fully_established, true); 494 } 495 496 static struct sock *subflow_syn_recv_sock(const struct sock *sk, 497 struct sk_buff *skb, 498 struct request_sock *req, 499 struct dst_entry *dst, 500 struct request_sock *req_unhash, 501 bool *own_req) 502 { 503 struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); 504 struct mptcp_subflow_request_sock *subflow_req; 505 struct mptcp_options_received mp_opt; 506 bool fallback, fallback_is_fatal; 507 struct sock *new_msk = NULL; 508 struct sock *child; 509 510 pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); 511 512 /* After child creation we must look for 'mp_capable' even when options 513 * are not parsed 514 */ 515 mp_opt.mp_capable = 0; 516 517 /* hopefully temporary handling for MP_JOIN+syncookie */ 518 subflow_req = mptcp_subflow_rsk(req); 519 fallback_is_fatal = tcp_rsk(req)->is_mptcp && subflow_req->mp_join; 520 fallback = !tcp_rsk(req)->is_mptcp; 521 if (fallback) 522 goto create_child; 523 524 /* if the sk is MP_CAPABLE, we try to fetch the client key */ 525 if (subflow_req->mp_capable) { 526 if (TCP_SKB_CB(skb)->seq != subflow_req->ssn_offset + 1) { 527 /* here we can receive and accept an in-window, 528 * out-of-order pkt, which will not carry the MP_CAPABLE 529 * opt even on mptcp enabled paths 530 */ 531 goto create_msk; 532 } 533 534 mptcp_get_options(skb, &mp_opt); 535 if (!mp_opt.mp_capable) { 536 fallback = true; 537 goto create_child; 538 } 539 540 create_msk: 541 new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); 542 if (!new_msk) 543 fallback = true; 544 } else if (subflow_req->mp_join) { 545 mptcp_get_options(skb, &mp_opt); 546 if (!mp_opt.mp_join || 547 !mptcp_can_accept_new_subflow(subflow_req->msk) || 548 !subflow_hmac_valid(req, &mp_opt)) { 549 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); 550 fallback = true; 551 } 552 } 553 554 create_child: 555 child = listener->icsk_af_ops->syn_recv_sock(sk, skb, req, dst, 556 req_unhash, own_req); 557 558 if (child && *own_req) { 559 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); 560 561 tcp_rsk(req)->drop_req = false; 562 563 /* we need to fallback on ctx allocation failure and on pre-reqs 564 * checking above. In the latter scenario we additionally need 565 * to reset the context to non MPTCP status. 566 */ 567 if (!ctx || fallback) { 568 if (fallback_is_fatal) 569 goto dispose_child; 570 571 subflow_drop_ctx(child); 572 goto out; 573 } 574 575 if (ctx->mp_capable) { 576 /* this can't race with mptcp_close(), as the msk is 577 * not yet exposted to user-space 578 */ 579 inet_sk_state_store((void *)new_msk, TCP_ESTABLISHED); 580 581 /* new mpc subflow takes ownership of the newly 582 * created mptcp socket 583 */ 584 new_msk->sk_destruct = mptcp_sock_destruct; 585 mptcp_pm_new_connection(mptcp_sk(new_msk), 1); 586 mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); 587 ctx->conn = new_msk; 588 new_msk = NULL; 589 590 /* with OoO packets we can reach here without ingress 591 * mpc option 592 */ 593 if (mp_opt.mp_capable) 594 mptcp_subflow_fully_established(ctx, &mp_opt); 595 } else if (ctx->mp_join) { 596 struct mptcp_sock *owner; 597 598 owner = subflow_req->msk; 599 if (!owner) 600 goto dispose_child; 601 602 /* move the msk reference ownership to the subflow */ 603 subflow_req->msk = NULL; 604 ctx->conn = (struct sock *)owner; 605 if (!mptcp_finish_join(child)) 606 goto dispose_child; 607 608 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); 609 tcp_rsk(req)->drop_req = true; 610 } 611 } 612 613 out: 614 /* dispose of the left over mptcp master, if any */ 615 if (unlikely(new_msk)) 616 mptcp_force_close(new_msk); 617 618 /* check for expected invariant - should never trigger, just help 619 * catching eariler subtle bugs 620 */ 621 WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && 622 (!mptcp_subflow_ctx(child) || 623 !mptcp_subflow_ctx(child)->conn)); 624 return child; 625 626 dispose_child: 627 subflow_drop_ctx(child); 628 tcp_rsk(req)->drop_req = true; 629 inet_csk_prepare_for_destroy_sock(child); 630 tcp_done(child); 631 req->rsk_ops->send_reset(sk, skb); 632 633 /* The last child reference will be released by the caller */ 634 return child; 635 } 636 637 static struct inet_connection_sock_af_ops subflow_specific; 638 639 enum mapping_status { 640 MAPPING_OK, 641 MAPPING_INVALID, 642 MAPPING_EMPTY, 643 MAPPING_DATA_FIN, 644 MAPPING_DUMMY 645 }; 646 647 static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) 648 { 649 if ((u32)seq == (u32)old_seq) 650 return old_seq; 651 652 /* Assume map covers data not mapped yet. */ 653 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); 654 } 655 656 static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) 657 { 658 WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", 659 ssn, subflow->map_subflow_seq, subflow->map_data_len); 660 } 661 662 static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) 663 { 664 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 665 unsigned int skb_consumed; 666 667 skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq; 668 if (WARN_ON_ONCE(skb_consumed >= skb->len)) 669 return true; 670 671 return skb->len - skb_consumed <= subflow->map_data_len - 672 mptcp_subflow_get_map_offset(subflow); 673 } 674 675 static bool validate_mapping(struct sock *ssk, struct sk_buff *skb) 676 { 677 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 678 u32 ssn = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; 679 680 if (unlikely(before(ssn, subflow->map_subflow_seq))) { 681 /* Mapping covers data later in the subflow stream, 682 * currently unsupported. 683 */ 684 warn_bad_map(subflow, ssn); 685 return false; 686 } 687 if (unlikely(!before(ssn, subflow->map_subflow_seq + 688 subflow->map_data_len))) { 689 /* Mapping does covers past subflow data, invalid */ 690 warn_bad_map(subflow, ssn + skb->len); 691 return false; 692 } 693 return true; 694 } 695 696 static enum mapping_status get_mapping_status(struct sock *ssk, 697 struct mptcp_sock *msk) 698 { 699 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 700 struct mptcp_ext *mpext; 701 struct sk_buff *skb; 702 u16 data_len; 703 u64 map_seq; 704 705 skb = skb_peek(&ssk->sk_receive_queue); 706 if (!skb) 707 return MAPPING_EMPTY; 708 709 if (mptcp_check_fallback(ssk)) 710 return MAPPING_DUMMY; 711 712 mpext = mptcp_get_ext(skb); 713 if (!mpext || !mpext->use_map) { 714 if (!subflow->map_valid && !skb->len) { 715 /* the TCP stack deliver 0 len FIN pkt to the receive 716 * queue, that is the only 0len pkts ever expected here, 717 * and we can admit no mapping only for 0 len pkts 718 */ 719 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 720 WARN_ONCE(1, "0len seq %d:%d flags %x", 721 TCP_SKB_CB(skb)->seq, 722 TCP_SKB_CB(skb)->end_seq, 723 TCP_SKB_CB(skb)->tcp_flags); 724 sk_eat_skb(ssk, skb); 725 return MAPPING_EMPTY; 726 } 727 728 if (!subflow->map_valid) 729 return MAPPING_INVALID; 730 731 goto validate_seq; 732 } 733 734 pr_debug("seq=%llu is64=%d ssn=%u data_len=%u data_fin=%d", 735 mpext->data_seq, mpext->dsn64, mpext->subflow_seq, 736 mpext->data_len, mpext->data_fin); 737 738 data_len = mpext->data_len; 739 if (data_len == 0) { 740 pr_err("Infinite mapping not handled"); 741 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); 742 return MAPPING_INVALID; 743 } 744 745 if (mpext->data_fin == 1) { 746 if (data_len == 1) { 747 bool updated = mptcp_update_rcv_data_fin(msk, mpext->data_seq, 748 mpext->dsn64); 749 pr_debug("DATA_FIN with no payload seq=%llu", mpext->data_seq); 750 if (subflow->map_valid) { 751 /* A DATA_FIN might arrive in a DSS 752 * option before the previous mapping 753 * has been fully consumed. Continue 754 * handling the existing mapping. 755 */ 756 skb_ext_del(skb, SKB_EXT_MPTCP); 757 return MAPPING_OK; 758 } else { 759 if (updated && schedule_work(&msk->work)) 760 sock_hold((struct sock *)msk); 761 762 return MAPPING_DATA_FIN; 763 } 764 } else { 765 u64 data_fin_seq = mpext->data_seq + data_len - 1; 766 767 /* If mpext->data_seq is a 32-bit value, data_fin_seq 768 * must also be limited to 32 bits. 769 */ 770 if (!mpext->dsn64) 771 data_fin_seq &= GENMASK_ULL(31, 0); 772 773 mptcp_update_rcv_data_fin(msk, data_fin_seq, mpext->dsn64); 774 pr_debug("DATA_FIN with mapping seq=%llu dsn64=%d", 775 data_fin_seq, mpext->dsn64); 776 } 777 778 /* Adjust for DATA_FIN using 1 byte of sequence space */ 779 data_len--; 780 } 781 782 if (!mpext->dsn64) { 783 map_seq = expand_seq(subflow->map_seq, subflow->map_data_len, 784 mpext->data_seq); 785 pr_debug("expanded seq=%llu", subflow->map_seq); 786 } else { 787 map_seq = mpext->data_seq; 788 } 789 WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64); 790 791 if (subflow->map_valid) { 792 /* Allow replacing only with an identical map */ 793 if (subflow->map_seq == map_seq && 794 subflow->map_subflow_seq == mpext->subflow_seq && 795 subflow->map_data_len == data_len) { 796 skb_ext_del(skb, SKB_EXT_MPTCP); 797 return MAPPING_OK; 798 } 799 800 /* If this skb data are fully covered by the current mapping, 801 * the new map would need caching, which is not supported 802 */ 803 if (skb_is_fully_mapped(ssk, skb)) { 804 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSNOMATCH); 805 return MAPPING_INVALID; 806 } 807 808 /* will validate the next map after consuming the current one */ 809 return MAPPING_OK; 810 } 811 812 subflow->map_seq = map_seq; 813 subflow->map_subflow_seq = mpext->subflow_seq; 814 subflow->map_data_len = data_len; 815 subflow->map_valid = 1; 816 subflow->mpc_map = mpext->mpc_map; 817 pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", 818 subflow->map_seq, subflow->map_subflow_seq, 819 subflow->map_data_len); 820 821 validate_seq: 822 /* we revalidate valid mapping on new skb, because we must ensure 823 * the current skb is completely covered by the available mapping 824 */ 825 if (!validate_mapping(ssk, skb)) 826 return MAPPING_INVALID; 827 828 skb_ext_del(skb, SKB_EXT_MPTCP); 829 return MAPPING_OK; 830 } 831 832 static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, 833 u64 limit) 834 { 835 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 836 bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; 837 u32 incr; 838 839 incr = limit >= skb->len ? skb->len + fin : limit; 840 841 pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, 842 subflow->map_subflow_seq); 843 MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); 844 tcp_sk(ssk)->copied_seq += incr; 845 if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) 846 sk_eat_skb(ssk, skb); 847 if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) 848 subflow->map_valid = 0; 849 if (incr) 850 tcp_cleanup_rbuf(ssk, incr); 851 } 852 853 static bool subflow_check_data_avail(struct sock *ssk) 854 { 855 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 856 enum mapping_status status; 857 struct mptcp_sock *msk; 858 struct sk_buff *skb; 859 860 pr_debug("msk=%p ssk=%p data_avail=%d skb=%p", subflow->conn, ssk, 861 subflow->data_avail, skb_peek(&ssk->sk_receive_queue)); 862 if (!skb_peek(&ssk->sk_receive_queue)) 863 subflow->data_avail = 0; 864 if (subflow->data_avail) 865 return true; 866 867 msk = mptcp_sk(subflow->conn); 868 for (;;) { 869 u64 ack_seq; 870 u64 old_ack; 871 872 status = get_mapping_status(ssk, msk); 873 pr_debug("msk=%p ssk=%p status=%d", msk, ssk, status); 874 if (status == MAPPING_INVALID) { 875 ssk->sk_err = EBADMSG; 876 goto fatal; 877 } 878 if (status == MAPPING_DUMMY) { 879 __mptcp_do_fallback(msk); 880 skb = skb_peek(&ssk->sk_receive_queue); 881 subflow->map_valid = 1; 882 subflow->map_seq = READ_ONCE(msk->ack_seq); 883 subflow->map_data_len = skb->len; 884 subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - 885 subflow->ssn_offset; 886 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 887 return true; 888 } 889 890 if (status != MAPPING_OK) 891 return false; 892 893 skb = skb_peek(&ssk->sk_receive_queue); 894 if (WARN_ON_ONCE(!skb)) 895 return false; 896 897 /* if msk lacks the remote key, this subflow must provide an 898 * MP_CAPABLE-based mapping 899 */ 900 if (unlikely(!READ_ONCE(msk->can_ack))) { 901 if (!subflow->mpc_map) { 902 ssk->sk_err = EBADMSG; 903 goto fatal; 904 } 905 WRITE_ONCE(msk->remote_key, subflow->remote_key); 906 WRITE_ONCE(msk->ack_seq, subflow->map_seq); 907 WRITE_ONCE(msk->can_ack, true); 908 } 909 910 old_ack = READ_ONCE(msk->ack_seq); 911 ack_seq = mptcp_subflow_get_mapped_dsn(subflow); 912 pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, 913 ack_seq); 914 if (ack_seq == old_ack) { 915 subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; 916 break; 917 } else if (after64(ack_seq, old_ack)) { 918 subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; 919 break; 920 } 921 922 /* only accept in-sequence mapping. Old values are spurious 923 * retransmission 924 */ 925 mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); 926 } 927 return true; 928 929 fatal: 930 /* fatal protocol error, close the socket */ 931 /* This barrier is coupled with smp_rmb() in tcp_poll() */ 932 smp_wmb(); 933 ssk->sk_error_report(ssk); 934 tcp_set_state(ssk, TCP_CLOSE); 935 tcp_send_active_reset(ssk, GFP_ATOMIC); 936 subflow->data_avail = 0; 937 return false; 938 } 939 940 bool mptcp_subflow_data_available(struct sock *sk) 941 { 942 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 943 944 /* check if current mapping is still valid */ 945 if (subflow->map_valid && 946 mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { 947 subflow->map_valid = 0; 948 subflow->data_avail = 0; 949 950 pr_debug("Done with mapping: seq=%u data_len=%u", 951 subflow->map_subflow_seq, 952 subflow->map_data_len); 953 } 954 955 return subflow_check_data_avail(sk); 956 } 957 958 /* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy, 959 * not the ssk one. 960 * 961 * In mptcp, rwin is about the mptcp-level connection data. 962 * 963 * Data that is still on the ssk rx queue can thus be ignored, 964 * as far as mptcp peer is concerened that data is still inflight. 965 * DSS ACK is updated when skb is moved to the mptcp rx queue. 966 */ 967 void mptcp_space(const struct sock *ssk, int *space, int *full_space) 968 { 969 const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 970 const struct sock *sk = subflow->conn; 971 972 *space = tcp_space(sk); 973 *full_space = tcp_full_space(sk); 974 } 975 976 static void subflow_data_ready(struct sock *sk) 977 { 978 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 979 u16 state = 1 << inet_sk_state_load(sk); 980 struct sock *parent = subflow->conn; 981 struct mptcp_sock *msk; 982 983 msk = mptcp_sk(parent); 984 if (state & TCPF_LISTEN) { 985 set_bit(MPTCP_DATA_READY, &msk->flags); 986 parent->sk_data_ready(parent); 987 return; 988 } 989 990 WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && 991 !subflow->mp_join && !(state & TCPF_CLOSE)); 992 993 if (mptcp_subflow_data_available(sk)) 994 mptcp_data_ready(parent, sk); 995 } 996 997 static void subflow_write_space(struct sock *sk) 998 { 999 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1000 struct sock *parent = subflow->conn; 1001 1002 if (!sk_stream_is_writeable(sk)) 1003 return; 1004 1005 if (sk_stream_is_writeable(parent)) { 1006 set_bit(MPTCP_SEND_SPACE, &mptcp_sk(parent)->flags); 1007 smp_mb__after_atomic(); 1008 /* set SEND_SPACE before sk_stream_write_space clears NOSPACE */ 1009 sk_stream_write_space(parent); 1010 } 1011 } 1012 1013 static struct inet_connection_sock_af_ops * 1014 subflow_default_af_ops(struct sock *sk) 1015 { 1016 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1017 if (sk->sk_family == AF_INET6) 1018 return &subflow_v6_specific; 1019 #endif 1020 return &subflow_specific; 1021 } 1022 1023 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1024 void mptcpv6_handle_mapped(struct sock *sk, bool mapped) 1025 { 1026 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1027 struct inet_connection_sock *icsk = inet_csk(sk); 1028 struct inet_connection_sock_af_ops *target; 1029 1030 target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk); 1031 1032 pr_debug("subflow=%p family=%d ops=%p target=%p mapped=%d", 1033 subflow, sk->sk_family, icsk->icsk_af_ops, target, mapped); 1034 1035 if (likely(icsk->icsk_af_ops == target)) 1036 return; 1037 1038 subflow->icsk_af_ops = icsk->icsk_af_ops; 1039 icsk->icsk_af_ops = target; 1040 } 1041 #endif 1042 1043 static void mptcp_info2sockaddr(const struct mptcp_addr_info *info, 1044 struct sockaddr_storage *addr) 1045 { 1046 memset(addr, 0, sizeof(*addr)); 1047 addr->ss_family = info->family; 1048 if (addr->ss_family == AF_INET) { 1049 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 1050 1051 in_addr->sin_addr = info->addr; 1052 in_addr->sin_port = info->port; 1053 } 1054 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1055 else if (addr->ss_family == AF_INET6) { 1056 struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr; 1057 1058 in6_addr->sin6_addr = info->addr6; 1059 in6_addr->sin6_port = info->port; 1060 } 1061 #endif 1062 } 1063 1064 int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, 1065 const struct mptcp_addr_info *remote) 1066 { 1067 struct mptcp_sock *msk = mptcp_sk(sk); 1068 struct mptcp_subflow_context *subflow; 1069 struct sockaddr_storage addr; 1070 int remote_id = remote->id; 1071 int local_id = loc->id; 1072 struct socket *sf; 1073 struct sock *ssk; 1074 u32 remote_token; 1075 int addrlen; 1076 int err; 1077 1078 if (!mptcp_is_fully_established(sk)) 1079 return -ENOTCONN; 1080 1081 err = mptcp_subflow_create_socket(sk, &sf); 1082 if (err) 1083 return err; 1084 1085 ssk = sf->sk; 1086 subflow = mptcp_subflow_ctx(ssk); 1087 do { 1088 get_random_bytes(&subflow->local_nonce, sizeof(u32)); 1089 } while (!subflow->local_nonce); 1090 1091 if (!local_id) { 1092 err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk); 1093 if (err < 0) 1094 goto failed; 1095 1096 local_id = err; 1097 } 1098 1099 subflow->remote_key = msk->remote_key; 1100 subflow->local_key = msk->local_key; 1101 subflow->token = msk->token; 1102 mptcp_info2sockaddr(loc, &addr); 1103 1104 addrlen = sizeof(struct sockaddr_in); 1105 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1106 if (loc->family == AF_INET6) 1107 addrlen = sizeof(struct sockaddr_in6); 1108 #endif 1109 ssk->sk_bound_dev_if = loc->ifindex; 1110 err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen); 1111 if (err) 1112 goto failed; 1113 1114 mptcp_crypto_key_sha(subflow->remote_key, &remote_token, NULL); 1115 pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk, 1116 remote_token, local_id, remote_id); 1117 subflow->remote_token = remote_token; 1118 subflow->local_id = local_id; 1119 subflow->remote_id = remote_id; 1120 subflow->request_join = 1; 1121 subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1122 mptcp_info2sockaddr(remote, &addr); 1123 1124 err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK); 1125 if (err && err != -EINPROGRESS) 1126 goto failed; 1127 1128 spin_lock_bh(&msk->join_list_lock); 1129 list_add_tail(&subflow->node, &msk->join_list); 1130 spin_unlock_bh(&msk->join_list_lock); 1131 1132 return err; 1133 1134 failed: 1135 sock_release(sf); 1136 return err; 1137 } 1138 1139 int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) 1140 { 1141 struct mptcp_subflow_context *subflow; 1142 struct net *net = sock_net(sk); 1143 struct socket *sf; 1144 int err; 1145 1146 /* un-accepted server sockets can reach here - on bad configuration 1147 * bail early to avoid greater trouble later 1148 */ 1149 if (unlikely(!sk->sk_socket)) 1150 return -EINVAL; 1151 1152 err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, 1153 &sf); 1154 if (err) 1155 return err; 1156 1157 lock_sock(sf->sk); 1158 1159 /* kernel sockets do not by default acquire net ref, but TCP timer 1160 * needs it. 1161 */ 1162 sf->sk->sk_net_refcnt = 1; 1163 get_net(net); 1164 #ifdef CONFIG_PROC_FS 1165 this_cpu_add(*net->core.sock_inuse, 1); 1166 #endif 1167 err = tcp_set_ulp(sf->sk, "mptcp"); 1168 release_sock(sf->sk); 1169 1170 if (err) { 1171 sock_release(sf); 1172 return err; 1173 } 1174 1175 /* the newly created socket really belongs to the owning MPTCP master 1176 * socket, even if for additional subflows the allocation is performed 1177 * by a kernel workqueue. Adjust inode references, so that the 1178 * procfs/diag interaces really show this one belonging to the correct 1179 * user. 1180 */ 1181 SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; 1182 SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; 1183 SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; 1184 1185 subflow = mptcp_subflow_ctx(sf->sk); 1186 pr_debug("subflow=%p", subflow); 1187 1188 *new_sock = sf; 1189 sock_hold(sk); 1190 subflow->conn = sk; 1191 1192 return 0; 1193 } 1194 1195 static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk, 1196 gfp_t priority) 1197 { 1198 struct inet_connection_sock *icsk = inet_csk(sk); 1199 struct mptcp_subflow_context *ctx; 1200 1201 ctx = kzalloc(sizeof(*ctx), priority); 1202 if (!ctx) 1203 return NULL; 1204 1205 rcu_assign_pointer(icsk->icsk_ulp_data, ctx); 1206 INIT_LIST_HEAD(&ctx->node); 1207 1208 pr_debug("subflow=%p", ctx); 1209 1210 ctx->tcp_sock = sk; 1211 1212 return ctx; 1213 } 1214 1215 static void __subflow_state_change(struct sock *sk) 1216 { 1217 struct socket_wq *wq; 1218 1219 rcu_read_lock(); 1220 wq = rcu_dereference(sk->sk_wq); 1221 if (skwq_has_sleeper(wq)) 1222 wake_up_interruptible_all(&wq->wait); 1223 rcu_read_unlock(); 1224 } 1225 1226 static bool subflow_is_done(const struct sock *sk) 1227 { 1228 return sk->sk_shutdown & RCV_SHUTDOWN || sk->sk_state == TCP_CLOSE; 1229 } 1230 1231 static void subflow_state_change(struct sock *sk) 1232 { 1233 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1234 struct sock *parent = subflow->conn; 1235 1236 __subflow_state_change(sk); 1237 1238 if (subflow_simultaneous_connect(sk)) { 1239 mptcp_do_fallback(sk); 1240 mptcp_rcv_space_init(mptcp_sk(parent), sk); 1241 pr_fallback(mptcp_sk(parent)); 1242 subflow->conn_finished = 1; 1243 if (inet_sk_state_load(parent) == TCP_SYN_SENT) { 1244 inet_sk_state_store(parent, TCP_ESTABLISHED); 1245 parent->sk_state_change(parent); 1246 } 1247 } 1248 1249 /* as recvmsg() does not acquire the subflow socket for ssk selection 1250 * a fin packet carrying a DSS can be unnoticed if we don't trigger 1251 * the data available machinery here. 1252 */ 1253 if (mptcp_subflow_data_available(sk)) 1254 mptcp_data_ready(parent, sk); 1255 1256 if (__mptcp_check_fallback(mptcp_sk(parent)) && 1257 !(parent->sk_shutdown & RCV_SHUTDOWN) && 1258 !subflow->rx_eof && subflow_is_done(sk)) { 1259 subflow->rx_eof = 1; 1260 mptcp_subflow_eof(parent); 1261 } 1262 } 1263 1264 static int subflow_ulp_init(struct sock *sk) 1265 { 1266 struct inet_connection_sock *icsk = inet_csk(sk); 1267 struct mptcp_subflow_context *ctx; 1268 struct tcp_sock *tp = tcp_sk(sk); 1269 int err = 0; 1270 1271 /* disallow attaching ULP to a socket unless it has been 1272 * created with sock_create_kern() 1273 */ 1274 if (!sk->sk_kern_sock) { 1275 err = -EOPNOTSUPP; 1276 goto out; 1277 } 1278 1279 ctx = subflow_create_ctx(sk, GFP_KERNEL); 1280 if (!ctx) { 1281 err = -ENOMEM; 1282 goto out; 1283 } 1284 1285 pr_debug("subflow=%p, family=%d", ctx, sk->sk_family); 1286 1287 tp->is_mptcp = 1; 1288 ctx->icsk_af_ops = icsk->icsk_af_ops; 1289 icsk->icsk_af_ops = subflow_default_af_ops(sk); 1290 ctx->tcp_data_ready = sk->sk_data_ready; 1291 ctx->tcp_state_change = sk->sk_state_change; 1292 ctx->tcp_write_space = sk->sk_write_space; 1293 sk->sk_data_ready = subflow_data_ready; 1294 sk->sk_write_space = subflow_write_space; 1295 sk->sk_state_change = subflow_state_change; 1296 out: 1297 return err; 1298 } 1299 1300 static void subflow_ulp_release(struct sock *sk) 1301 { 1302 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(sk); 1303 1304 if (!ctx) 1305 return; 1306 1307 if (ctx->conn) 1308 sock_put(ctx->conn); 1309 1310 kfree_rcu(ctx, rcu); 1311 } 1312 1313 static void subflow_ulp_clone(const struct request_sock *req, 1314 struct sock *newsk, 1315 const gfp_t priority) 1316 { 1317 struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); 1318 struct mptcp_subflow_context *old_ctx = mptcp_subflow_ctx(newsk); 1319 struct mptcp_subflow_context *new_ctx; 1320 1321 if (!tcp_rsk(req)->is_mptcp || 1322 (!subflow_req->mp_capable && !subflow_req->mp_join)) { 1323 subflow_ulp_fallback(newsk, old_ctx); 1324 return; 1325 } 1326 1327 new_ctx = subflow_create_ctx(newsk, priority); 1328 if (!new_ctx) { 1329 subflow_ulp_fallback(newsk, old_ctx); 1330 return; 1331 } 1332 1333 new_ctx->conn_finished = 1; 1334 new_ctx->icsk_af_ops = old_ctx->icsk_af_ops; 1335 new_ctx->tcp_data_ready = old_ctx->tcp_data_ready; 1336 new_ctx->tcp_state_change = old_ctx->tcp_state_change; 1337 new_ctx->tcp_write_space = old_ctx->tcp_write_space; 1338 new_ctx->rel_write_seq = 1; 1339 new_ctx->tcp_sock = newsk; 1340 1341 if (subflow_req->mp_capable) { 1342 /* see comments in subflow_syn_recv_sock(), MPTCP connection 1343 * is fully established only after we receive the remote key 1344 */ 1345 new_ctx->mp_capable = 1; 1346 new_ctx->local_key = subflow_req->local_key; 1347 new_ctx->token = subflow_req->token; 1348 new_ctx->ssn_offset = subflow_req->ssn_offset; 1349 new_ctx->idsn = subflow_req->idsn; 1350 } else if (subflow_req->mp_join) { 1351 new_ctx->ssn_offset = subflow_req->ssn_offset; 1352 new_ctx->mp_join = 1; 1353 new_ctx->fully_established = 1; 1354 new_ctx->backup = subflow_req->backup; 1355 new_ctx->local_id = subflow_req->local_id; 1356 new_ctx->remote_id = subflow_req->remote_id; 1357 new_ctx->token = subflow_req->token; 1358 new_ctx->thmac = subflow_req->thmac; 1359 } 1360 } 1361 1362 static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = { 1363 .name = "mptcp", 1364 .owner = THIS_MODULE, 1365 .init = subflow_ulp_init, 1366 .release = subflow_ulp_release, 1367 .clone = subflow_ulp_clone, 1368 }; 1369 1370 static int subflow_ops_init(struct request_sock_ops *subflow_ops) 1371 { 1372 subflow_ops->obj_size = sizeof(struct mptcp_subflow_request_sock); 1373 subflow_ops->slab_name = "request_sock_subflow"; 1374 1375 subflow_ops->slab = kmem_cache_create(subflow_ops->slab_name, 1376 subflow_ops->obj_size, 0, 1377 SLAB_ACCOUNT | 1378 SLAB_TYPESAFE_BY_RCU, 1379 NULL); 1380 if (!subflow_ops->slab) 1381 return -ENOMEM; 1382 1383 subflow_ops->destructor = subflow_req_destructor; 1384 1385 return 0; 1386 } 1387 1388 void __init mptcp_subflow_init(void) 1389 { 1390 mptcp_subflow_request_sock_ops = tcp_request_sock_ops; 1391 if (subflow_ops_init(&mptcp_subflow_request_sock_ops) != 0) 1392 panic("MPTCP: failed to init subflow request sock ops\n"); 1393 1394 subflow_request_sock_ipv4_ops = tcp_request_sock_ipv4_ops; 1395 subflow_request_sock_ipv4_ops.init_req = subflow_v4_init_req; 1396 1397 subflow_specific = ipv4_specific; 1398 subflow_specific.conn_request = subflow_v4_conn_request; 1399 subflow_specific.syn_recv_sock = subflow_syn_recv_sock; 1400 subflow_specific.sk_rx_dst_set = subflow_finish_connect; 1401 1402 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1403 subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops; 1404 subflow_request_sock_ipv6_ops.init_req = subflow_v6_init_req; 1405 1406 subflow_v6_specific = ipv6_specific; 1407 subflow_v6_specific.conn_request = subflow_v6_conn_request; 1408 subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock; 1409 subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect; 1410 1411 subflow_v6m_specific = subflow_v6_specific; 1412 subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; 1413 subflow_v6m_specific.send_check = ipv4_specific.send_check; 1414 subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; 1415 subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; 1416 subflow_v6m_specific.net_frag_header_len = 0; 1417 #endif 1418 1419 mptcp_diag_subflow_init(&subflow_ulp_ops); 1420 1421 if (tcp_register_ulp(&subflow_ulp_ops) != 0) 1422 panic("MPTCP: failed to register subflows to ULP\n"); 1423 } 1424