1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 #define MIN_FULL_INFO_OPTLEN_SIZE 40 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 21 { 22 msk_owned_by_me(msk); 23 24 if (likely(!__mptcp_check_fallback(msk))) 25 return NULL; 26 27 return msk->first; 28 } 29 30 static u32 sockopt_seq_reset(const struct sock *sk) 31 { 32 sock_owned_by_me(sk); 33 34 /* Highbits contain state. Allows to distinguish sockopt_seq 35 * of listener and established: 36 * s0 = new_listener() 37 * sockopt(s0) - seq is 1 38 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 39 * sockopt(s0) - seq increments to 2 on s0 40 * sockopt(s1) // seq increments to 2 on s1 (different option) 41 * new ssk completes join, inherits options from s0 // seq 2 42 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 43 * 44 * Set High order bits to sk_state so ssk->seq == msk->seq test 45 * will fail. 46 */ 47 48 return (u32)sk->sk_state << 24u; 49 } 50 51 static void sockopt_seq_inc(struct mptcp_sock *msk) 52 { 53 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 54 55 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 56 } 57 58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 59 unsigned int optlen, int *val) 60 { 61 if (optlen < sizeof(int)) 62 return -EINVAL; 63 64 if (copy_from_sockptr(val, optval, sizeof(*val))) 65 return -EFAULT; 66 67 return 0; 68 } 69 70 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 71 { 72 struct mptcp_subflow_context *subflow; 73 struct sock *sk = (struct sock *)msk; 74 75 lock_sock(sk); 76 sockopt_seq_inc(msk); 77 78 mptcp_for_each_subflow(msk, subflow) { 79 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 80 bool slow = lock_sock_fast(ssk); 81 82 switch (optname) { 83 case SO_DEBUG: 84 sock_valbool_flag(ssk, SOCK_DBG, !!val); 85 break; 86 case SO_KEEPALIVE: 87 if (ssk->sk_prot->keepalive) 88 ssk->sk_prot->keepalive(ssk, !!val); 89 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 90 break; 91 case SO_PRIORITY: 92 ssk->sk_priority = val; 93 break; 94 case SO_SNDBUF: 95 case SO_SNDBUFFORCE: 96 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 97 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 98 break; 99 case SO_RCVBUF: 100 case SO_RCVBUFFORCE: 101 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 102 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 103 break; 104 case SO_MARK: 105 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 106 WRITE_ONCE(ssk->sk_mark, sk->sk_mark); 107 sk_dst_reset(ssk); 108 } 109 break; 110 case SO_INCOMING_CPU: 111 WRITE_ONCE(ssk->sk_incoming_cpu, val); 112 break; 113 } 114 115 subflow->setsockopt_seq = msk->setsockopt_seq; 116 unlock_sock_fast(ssk, slow); 117 } 118 119 release_sock(sk); 120 } 121 122 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 123 { 124 sockptr_t optval = KERNEL_SOCKPTR(&val); 125 struct sock *sk = (struct sock *)msk; 126 int ret; 127 128 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 129 optval, sizeof(val)); 130 if (ret) 131 return ret; 132 133 mptcp_sol_socket_sync_intval(msk, optname, val); 134 return 0; 135 } 136 137 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 138 { 139 struct sock *sk = (struct sock *)msk; 140 141 WRITE_ONCE(sk->sk_incoming_cpu, val); 142 143 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 144 } 145 146 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 147 { 148 sockptr_t optval = KERNEL_SOCKPTR(&val); 149 struct mptcp_subflow_context *subflow; 150 struct sock *sk = (struct sock *)msk; 151 int ret; 152 153 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 154 optval, sizeof(val)); 155 if (ret) 156 return ret; 157 158 lock_sock(sk); 159 mptcp_for_each_subflow(msk, subflow) { 160 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 161 bool slow = lock_sock_fast(ssk); 162 163 sock_set_timestamp(sk, optname, !!val); 164 unlock_sock_fast(ssk, slow); 165 } 166 167 release_sock(sk); 168 return 0; 169 } 170 171 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 172 sockptr_t optval, 173 unsigned int optlen) 174 { 175 int val, ret; 176 177 ret = mptcp_get_int_option(msk, optval, optlen, &val); 178 if (ret) 179 return ret; 180 181 switch (optname) { 182 case SO_KEEPALIVE: 183 mptcp_sol_socket_sync_intval(msk, optname, val); 184 return 0; 185 case SO_DEBUG: 186 case SO_MARK: 187 case SO_PRIORITY: 188 case SO_SNDBUF: 189 case SO_SNDBUFFORCE: 190 case SO_RCVBUF: 191 case SO_RCVBUFFORCE: 192 return mptcp_sol_socket_intval(msk, optname, val); 193 case SO_INCOMING_CPU: 194 mptcp_so_incoming_cpu(msk, val); 195 return 0; 196 case SO_TIMESTAMP_OLD: 197 case SO_TIMESTAMP_NEW: 198 case SO_TIMESTAMPNS_OLD: 199 case SO_TIMESTAMPNS_NEW: 200 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 201 } 202 203 return -ENOPROTOOPT; 204 } 205 206 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 207 int optname, 208 sockptr_t optval, 209 unsigned int optlen) 210 { 211 struct mptcp_subflow_context *subflow; 212 struct sock *sk = (struct sock *)msk; 213 struct so_timestamping timestamping; 214 int ret; 215 216 if (optlen == sizeof(timestamping)) { 217 if (copy_from_sockptr(×tamping, optval, 218 sizeof(timestamping))) 219 return -EFAULT; 220 } else if (optlen == sizeof(int)) { 221 memset(×tamping, 0, sizeof(timestamping)); 222 223 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 224 return -EFAULT; 225 } else { 226 return -EINVAL; 227 } 228 229 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 230 KERNEL_SOCKPTR(×tamping), 231 sizeof(timestamping)); 232 if (ret) 233 return ret; 234 235 lock_sock(sk); 236 237 mptcp_for_each_subflow(msk, subflow) { 238 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 239 bool slow = lock_sock_fast(ssk); 240 241 sock_set_timestamping(sk, optname, timestamping); 242 unlock_sock_fast(ssk, slow); 243 } 244 245 release_sock(sk); 246 247 return 0; 248 } 249 250 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 251 unsigned int optlen) 252 { 253 struct mptcp_subflow_context *subflow; 254 struct sock *sk = (struct sock *)msk; 255 struct linger ling; 256 sockptr_t kopt; 257 int ret; 258 259 if (optlen < sizeof(ling)) 260 return -EINVAL; 261 262 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 263 return -EFAULT; 264 265 kopt = KERNEL_SOCKPTR(&ling); 266 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 267 if (ret) 268 return ret; 269 270 lock_sock(sk); 271 sockopt_seq_inc(msk); 272 mptcp_for_each_subflow(msk, subflow) { 273 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 274 bool slow = lock_sock_fast(ssk); 275 276 if (!ling.l_onoff) { 277 sock_reset_flag(ssk, SOCK_LINGER); 278 } else { 279 ssk->sk_lingertime = sk->sk_lingertime; 280 sock_set_flag(ssk, SOCK_LINGER); 281 } 282 283 subflow->setsockopt_seq = msk->setsockopt_seq; 284 unlock_sock_fast(ssk, slow); 285 } 286 287 release_sock(sk); 288 return 0; 289 } 290 291 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 292 sockptr_t optval, unsigned int optlen) 293 { 294 struct sock *sk = (struct sock *)msk; 295 struct sock *ssk; 296 int ret; 297 298 switch (optname) { 299 case SO_REUSEPORT: 300 case SO_REUSEADDR: 301 case SO_BINDTODEVICE: 302 case SO_BINDTOIFINDEX: 303 lock_sock(sk); 304 ssk = __mptcp_nmpc_sk(msk); 305 if (IS_ERR(ssk)) { 306 release_sock(sk); 307 return PTR_ERR(ssk); 308 } 309 310 ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 311 if (ret == 0) { 312 if (optname == SO_REUSEPORT) 313 sk->sk_reuseport = ssk->sk_reuseport; 314 else if (optname == SO_REUSEADDR) 315 sk->sk_reuse = ssk->sk_reuse; 316 else if (optname == SO_BINDTODEVICE) 317 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 318 else if (optname == SO_BINDTOIFINDEX) 319 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 320 } 321 release_sock(sk); 322 return ret; 323 case SO_KEEPALIVE: 324 case SO_PRIORITY: 325 case SO_SNDBUF: 326 case SO_SNDBUFFORCE: 327 case SO_RCVBUF: 328 case SO_RCVBUFFORCE: 329 case SO_MARK: 330 case SO_INCOMING_CPU: 331 case SO_DEBUG: 332 case SO_TIMESTAMP_OLD: 333 case SO_TIMESTAMP_NEW: 334 case SO_TIMESTAMPNS_OLD: 335 case SO_TIMESTAMPNS_NEW: 336 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 337 optlen); 338 case SO_TIMESTAMPING_OLD: 339 case SO_TIMESTAMPING_NEW: 340 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 341 optval, optlen); 342 case SO_LINGER: 343 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 344 case SO_RCVLOWAT: 345 case SO_RCVTIMEO_OLD: 346 case SO_RCVTIMEO_NEW: 347 case SO_SNDTIMEO_OLD: 348 case SO_SNDTIMEO_NEW: 349 case SO_BUSY_POLL: 350 case SO_PREFER_BUSY_POLL: 351 case SO_BUSY_POLL_BUDGET: 352 /* No need to copy: only relevant for msk */ 353 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 354 case SO_NO_CHECK: 355 case SO_DONTROUTE: 356 case SO_BROADCAST: 357 case SO_BSDCOMPAT: 358 case SO_PASSCRED: 359 case SO_PASSPIDFD: 360 case SO_PASSSEC: 361 case SO_RXQ_OVFL: 362 case SO_WIFI_STATUS: 363 case SO_NOFCS: 364 case SO_SELECT_ERR_QUEUE: 365 return 0; 366 } 367 368 /* SO_OOBINLINE is not supported, let's avoid the related mess 369 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 370 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 371 * we must be careful with subflows 372 * 373 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 374 * explicitly the sk_protocol field 375 * 376 * SO_PEEK_OFF is unsupported, as it is for plain TCP 377 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 378 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 379 * but likely needs careful design 380 * 381 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 382 * SO_TXTIME is currently unsupported 383 */ 384 385 return -EOPNOTSUPP; 386 } 387 388 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 389 sockptr_t optval, unsigned int optlen) 390 { 391 struct sock *sk = (struct sock *)msk; 392 int ret = -EOPNOTSUPP; 393 struct sock *ssk; 394 395 switch (optname) { 396 case IPV6_V6ONLY: 397 case IPV6_TRANSPARENT: 398 case IPV6_FREEBIND: 399 lock_sock(sk); 400 ssk = __mptcp_nmpc_sk(msk); 401 if (IS_ERR(ssk)) { 402 release_sock(sk); 403 return PTR_ERR(ssk); 404 } 405 406 ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 407 if (ret != 0) { 408 release_sock(sk); 409 return ret; 410 } 411 412 sockopt_seq_inc(msk); 413 414 switch (optname) { 415 case IPV6_V6ONLY: 416 sk->sk_ipv6only = ssk->sk_ipv6only; 417 break; 418 case IPV6_TRANSPARENT: 419 inet_assign_bit(TRANSPARENT, sk, 420 inet_test_bit(TRANSPARENT, ssk)); 421 break; 422 case IPV6_FREEBIND: 423 inet_assign_bit(FREEBIND, sk, 424 inet_test_bit(FREEBIND, ssk)); 425 break; 426 } 427 428 release_sock(sk); 429 break; 430 } 431 432 return ret; 433 } 434 435 static bool mptcp_supported_sockopt(int level, int optname) 436 { 437 if (level == SOL_IP) { 438 switch (optname) { 439 /* should work fine */ 440 case IP_FREEBIND: 441 case IP_TRANSPARENT: 442 443 /* the following are control cmsg related */ 444 case IP_PKTINFO: 445 case IP_RECVTTL: 446 case IP_RECVTOS: 447 case IP_RECVOPTS: 448 case IP_RETOPTS: 449 case IP_PASSSEC: 450 case IP_RECVORIGDSTADDR: 451 case IP_CHECKSUM: 452 case IP_RECVFRAGSIZE: 453 454 /* common stuff that need some love */ 455 case IP_TOS: 456 case IP_TTL: 457 case IP_BIND_ADDRESS_NO_PORT: 458 case IP_MTU_DISCOVER: 459 case IP_RECVERR: 460 461 /* possibly less common may deserve some love */ 462 case IP_MINTTL: 463 464 /* the following is apparently a no-op for plain TCP */ 465 case IP_RECVERR_RFC4884: 466 return true; 467 } 468 469 /* IP_OPTIONS is not supported, needs subflow care */ 470 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 471 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 472 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 473 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 474 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 475 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 476 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 477 * with mcast stuff 478 */ 479 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 480 return false; 481 } 482 if (level == SOL_IPV6) { 483 switch (optname) { 484 case IPV6_V6ONLY: 485 486 /* the following are control cmsg related */ 487 case IPV6_RECVPKTINFO: 488 case IPV6_2292PKTINFO: 489 case IPV6_RECVHOPLIMIT: 490 case IPV6_2292HOPLIMIT: 491 case IPV6_RECVRTHDR: 492 case IPV6_2292RTHDR: 493 case IPV6_RECVHOPOPTS: 494 case IPV6_2292HOPOPTS: 495 case IPV6_RECVDSTOPTS: 496 case IPV6_2292DSTOPTS: 497 case IPV6_RECVTCLASS: 498 case IPV6_FLOWINFO: 499 case IPV6_RECVPATHMTU: 500 case IPV6_RECVORIGDSTADDR: 501 case IPV6_RECVFRAGSIZE: 502 503 /* the following ones need some love but are quite common */ 504 case IPV6_TCLASS: 505 case IPV6_TRANSPARENT: 506 case IPV6_FREEBIND: 507 case IPV6_PKTINFO: 508 case IPV6_2292PKTOPTIONS: 509 case IPV6_UNICAST_HOPS: 510 case IPV6_MTU_DISCOVER: 511 case IPV6_MTU: 512 case IPV6_RECVERR: 513 case IPV6_FLOWINFO_SEND: 514 case IPV6_FLOWLABEL_MGR: 515 case IPV6_MINHOPCOUNT: 516 case IPV6_DONTFRAG: 517 case IPV6_AUTOFLOWLABEL: 518 519 /* the following one is a no-op for plain TCP */ 520 case IPV6_RECVERR_RFC4884: 521 return true; 522 } 523 524 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 525 * not supported 526 */ 527 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 528 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 529 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 530 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 531 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 532 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 533 * are not supported better not deal with mcast 534 */ 535 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 536 537 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 538 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 539 return false; 540 } 541 if (level == SOL_TCP) { 542 switch (optname) { 543 /* the following are no-op or should work just fine */ 544 case TCP_THIN_DUPACK: 545 case TCP_DEFER_ACCEPT: 546 547 /* the following need some love */ 548 case TCP_MAXSEG: 549 case TCP_NODELAY: 550 case TCP_THIN_LINEAR_TIMEOUTS: 551 case TCP_CONGESTION: 552 case TCP_CORK: 553 case TCP_KEEPIDLE: 554 case TCP_KEEPINTVL: 555 case TCP_KEEPCNT: 556 case TCP_SYNCNT: 557 case TCP_SAVE_SYN: 558 case TCP_LINGER2: 559 case TCP_WINDOW_CLAMP: 560 case TCP_QUICKACK: 561 case TCP_USER_TIMEOUT: 562 case TCP_TIMESTAMP: 563 case TCP_NOTSENT_LOWAT: 564 case TCP_TX_DELAY: 565 case TCP_INQ: 566 case TCP_FASTOPEN: 567 case TCP_FASTOPEN_CONNECT: 568 case TCP_FASTOPEN_KEY: 569 case TCP_FASTOPEN_NO_COOKIE: 570 return true; 571 } 572 573 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 574 575 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 576 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 577 */ 578 } 579 return false; 580 } 581 582 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 583 unsigned int optlen) 584 { 585 struct mptcp_subflow_context *subflow; 586 struct sock *sk = (struct sock *)msk; 587 char name[TCP_CA_NAME_MAX]; 588 bool cap_net_admin; 589 int ret; 590 591 if (optlen < 1) 592 return -EINVAL; 593 594 ret = strncpy_from_sockptr(name, optval, 595 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 596 if (ret < 0) 597 return -EFAULT; 598 599 name[ret] = 0; 600 601 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 602 603 ret = 0; 604 lock_sock(sk); 605 sockopt_seq_inc(msk); 606 mptcp_for_each_subflow(msk, subflow) { 607 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 608 int err; 609 610 lock_sock(ssk); 611 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 612 if (err < 0 && ret == 0) 613 ret = err; 614 subflow->setsockopt_seq = msk->setsockopt_seq; 615 release_sock(ssk); 616 } 617 618 if (ret == 0) 619 strcpy(msk->ca_name, name); 620 621 release_sock(sk); 622 return ret; 623 } 624 625 static int mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, sockptr_t optval, 626 unsigned int optlen) 627 { 628 struct mptcp_subflow_context *subflow; 629 struct sock *sk = (struct sock *)msk; 630 int val; 631 632 if (optlen < sizeof(int)) 633 return -EINVAL; 634 635 if (copy_from_sockptr(&val, optval, sizeof(val))) 636 return -EFAULT; 637 638 lock_sock(sk); 639 sockopt_seq_inc(msk); 640 msk->cork = !!val; 641 mptcp_for_each_subflow(msk, subflow) { 642 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 643 644 lock_sock(ssk); 645 __tcp_sock_set_cork(ssk, !!val); 646 release_sock(ssk); 647 } 648 if (!val) 649 mptcp_check_and_set_pending(sk); 650 release_sock(sk); 651 652 return 0; 653 } 654 655 static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t optval, 656 unsigned int optlen) 657 { 658 struct mptcp_subflow_context *subflow; 659 struct sock *sk = (struct sock *)msk; 660 int val; 661 662 if (optlen < sizeof(int)) 663 return -EINVAL; 664 665 if (copy_from_sockptr(&val, optval, sizeof(val))) 666 return -EFAULT; 667 668 lock_sock(sk); 669 sockopt_seq_inc(msk); 670 msk->nodelay = !!val; 671 mptcp_for_each_subflow(msk, subflow) { 672 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 673 674 lock_sock(ssk); 675 __tcp_sock_set_nodelay(ssk, !!val); 676 release_sock(ssk); 677 } 678 if (val) 679 mptcp_check_and_set_pending(sk); 680 release_sock(sk); 681 682 return 0; 683 } 684 685 static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, 686 sockptr_t optval, unsigned int optlen) 687 { 688 struct sock *sk = (struct sock *)msk; 689 struct sock *ssk; 690 int err; 691 692 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 693 if (err != 0) 694 return err; 695 696 lock_sock(sk); 697 698 ssk = __mptcp_nmpc_sk(msk); 699 if (IS_ERR(ssk)) { 700 release_sock(sk); 701 return PTR_ERR(ssk); 702 } 703 704 switch (optname) { 705 case IP_FREEBIND: 706 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 707 break; 708 case IP_TRANSPARENT: 709 inet_assign_bit(TRANSPARENT, ssk, 710 inet_test_bit(TRANSPARENT, sk)); 711 break; 712 default: 713 release_sock(sk); 714 WARN_ON_ONCE(1); 715 return -EOPNOTSUPP; 716 } 717 718 sockopt_seq_inc(msk); 719 release_sock(sk); 720 return 0; 721 } 722 723 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 724 sockptr_t optval, unsigned int optlen) 725 { 726 struct mptcp_subflow_context *subflow; 727 struct sock *sk = (struct sock *)msk; 728 int err, val; 729 730 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 731 732 if (err != 0) 733 return err; 734 735 lock_sock(sk); 736 sockopt_seq_inc(msk); 737 val = inet_sk(sk)->tos; 738 mptcp_for_each_subflow(msk, subflow) { 739 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 740 bool slow; 741 742 slow = lock_sock_fast(ssk); 743 __ip_sock_set_tos(ssk, val); 744 unlock_sock_fast(ssk, slow); 745 } 746 release_sock(sk); 747 748 return 0; 749 } 750 751 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 752 sockptr_t optval, unsigned int optlen) 753 { 754 switch (optname) { 755 case IP_FREEBIND: 756 case IP_TRANSPARENT: 757 return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); 758 case IP_TOS: 759 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 760 } 761 762 return -EOPNOTSUPP; 763 } 764 765 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 766 sockptr_t optval, unsigned int optlen) 767 { 768 struct sock *sk = (struct sock *)msk; 769 struct sock *ssk; 770 int ret; 771 772 /* Limit to first subflow, before the connection establishment */ 773 lock_sock(sk); 774 ssk = __mptcp_nmpc_sk(msk); 775 if (IS_ERR(ssk)) { 776 ret = PTR_ERR(ssk); 777 goto unlock; 778 } 779 780 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 781 782 unlock: 783 release_sock(sk); 784 return ret; 785 } 786 787 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 788 sockptr_t optval, unsigned int optlen) 789 { 790 struct sock *sk = (void *)msk; 791 int ret, val; 792 793 switch (optname) { 794 case TCP_INQ: 795 ret = mptcp_get_int_option(msk, optval, optlen, &val); 796 if (ret) 797 return ret; 798 if (val < 0 || val > 1) 799 return -EINVAL; 800 801 lock_sock(sk); 802 msk->recvmsg_inq = !!val; 803 release_sock(sk); 804 return 0; 805 case TCP_ULP: 806 return -EOPNOTSUPP; 807 case TCP_CONGESTION: 808 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 809 case TCP_CORK: 810 return mptcp_setsockopt_sol_tcp_cork(msk, optval, optlen); 811 case TCP_NODELAY: 812 return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); 813 case TCP_DEFER_ACCEPT: 814 /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ 815 mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); 816 return 0; 817 case TCP_FASTOPEN: 818 case TCP_FASTOPEN_CONNECT: 819 case TCP_FASTOPEN_KEY: 820 case TCP_FASTOPEN_NO_COOKIE: 821 return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, 822 optval, optlen); 823 } 824 825 return -EOPNOTSUPP; 826 } 827 828 int mptcp_setsockopt(struct sock *sk, int level, int optname, 829 sockptr_t optval, unsigned int optlen) 830 { 831 struct mptcp_sock *msk = mptcp_sk(sk); 832 struct sock *ssk; 833 834 pr_debug("msk=%p", msk); 835 836 if (level == SOL_SOCKET) 837 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 838 839 if (!mptcp_supported_sockopt(level, optname)) 840 return -ENOPROTOOPT; 841 842 /* @@ the meaning of setsockopt() when the socket is connected and 843 * there are multiple subflows is not yet defined. It is up to the 844 * MPTCP-level socket to configure the subflows until the subflow 845 * is in TCP fallback, when TCP socket options are passed through 846 * to the one remaining subflow. 847 */ 848 lock_sock(sk); 849 ssk = __mptcp_tcp_fallback(msk); 850 release_sock(sk); 851 if (ssk) 852 return tcp_setsockopt(ssk, level, optname, optval, optlen); 853 854 if (level == SOL_IP) 855 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 856 857 if (level == SOL_IPV6) 858 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 859 860 if (level == SOL_TCP) 861 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 862 863 return -EOPNOTSUPP; 864 } 865 866 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 867 char __user *optval, int __user *optlen) 868 { 869 struct sock *sk = (struct sock *)msk; 870 struct sock *ssk; 871 int ret; 872 873 lock_sock(sk); 874 ssk = msk->first; 875 if (ssk) { 876 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 877 goto out; 878 } 879 880 ssk = __mptcp_nmpc_sk(msk); 881 if (IS_ERR(ssk)) { 882 ret = PTR_ERR(ssk); 883 goto out; 884 } 885 886 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 887 888 out: 889 release_sock(sk); 890 return ret; 891 } 892 893 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 894 { 895 struct sock *sk = (struct sock *)msk; 896 u32 flags = 0; 897 bool slow; 898 899 memset(info, 0, sizeof(*info)); 900 901 info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); 902 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 903 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 904 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 905 906 if (inet_sk_state_load(sk) == TCP_LISTEN) 907 return; 908 909 /* The following limits only make sense for the in-kernel PM */ 910 if (mptcp_pm_is_kernel(msk)) { 911 info->mptcpi_subflows_max = 912 mptcp_pm_get_subflows_max(msk); 913 info->mptcpi_add_addr_signal_max = 914 mptcp_pm_get_add_addr_signal_max(msk); 915 info->mptcpi_add_addr_accepted_max = 916 mptcp_pm_get_add_addr_accept_max(msk); 917 info->mptcpi_local_addr_max = 918 mptcp_pm_get_local_addr_max(msk); 919 } 920 921 if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) 922 flags |= MPTCP_INFO_FLAG_FALLBACK; 923 if (READ_ONCE(msk->can_ack)) 924 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 925 info->mptcpi_flags = flags; 926 mptcp_data_lock(sk); 927 info->mptcpi_snd_una = msk->snd_una; 928 info->mptcpi_rcv_nxt = msk->ack_seq; 929 info->mptcpi_bytes_acked = msk->bytes_acked; 930 mptcp_data_unlock(sk); 931 932 slow = lock_sock_fast(sk); 933 info->mptcpi_csum_enabled = msk->csum_enabled; 934 info->mptcpi_token = msk->token; 935 info->mptcpi_write_seq = msk->write_seq; 936 info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 937 info->mptcpi_bytes_sent = msk->bytes_sent; 938 info->mptcpi_bytes_received = msk->bytes_received; 939 info->mptcpi_bytes_retrans = msk->bytes_retrans; 940 unlock_sock_fast(sk, slow); 941 } 942 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 943 944 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 945 { 946 struct mptcp_info m_info; 947 int len; 948 949 if (get_user(len, optlen)) 950 return -EFAULT; 951 952 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 953 954 mptcp_diag_fill_info(msk, &m_info); 955 956 if (put_user(len, optlen)) 957 return -EFAULT; 958 959 if (copy_to_user(optval, &m_info, len)) 960 return -EFAULT; 961 962 return 0; 963 } 964 965 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 966 char __user *optval, 967 u32 copied, 968 int __user *optlen) 969 { 970 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 971 972 if (copied) 973 copied += sfd->size_subflow_data; 974 else 975 copied = copylen; 976 977 if (put_user(copied, optlen)) 978 return -EFAULT; 979 980 if (copy_to_user(optval, sfd, copylen)) 981 return -EFAULT; 982 983 return 0; 984 } 985 986 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 987 char __user *optval, 988 int __user *optlen) 989 { 990 int len, copylen; 991 992 if (get_user(len, optlen)) 993 return -EFAULT; 994 995 /* if mptcp_subflow_data size is changed, need to adjust 996 * this function to deal with programs using old version. 997 */ 998 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 999 1000 if (len < MIN_INFO_OPTLEN_SIZE) 1001 return -EINVAL; 1002 1003 memset(sfd, 0, sizeof(*sfd)); 1004 1005 copylen = min_t(unsigned int, len, sizeof(*sfd)); 1006 if (copy_from_user(sfd, optval, copylen)) 1007 return -EFAULT; 1008 1009 /* size_subflow_data is u32, but len is signed */ 1010 if (sfd->size_subflow_data > INT_MAX || 1011 sfd->size_user > INT_MAX) 1012 return -EINVAL; 1013 1014 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 1015 sfd->size_subflow_data > len) 1016 return -EINVAL; 1017 1018 if (sfd->num_subflows || sfd->size_kernel) 1019 return -EINVAL; 1020 1021 return len - sfd->size_subflow_data; 1022 } 1023 1024 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 1025 int __user *optlen) 1026 { 1027 struct mptcp_subflow_context *subflow; 1028 struct sock *sk = (struct sock *)msk; 1029 unsigned int sfcount = 0, copied = 0; 1030 struct mptcp_subflow_data sfd; 1031 char __user *infoptr; 1032 int len; 1033 1034 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1035 if (len < 0) 1036 return len; 1037 1038 sfd.size_kernel = sizeof(struct tcp_info); 1039 sfd.size_user = min_t(unsigned int, sfd.size_user, 1040 sizeof(struct tcp_info)); 1041 1042 infoptr = optval + sfd.size_subflow_data; 1043 1044 lock_sock(sk); 1045 1046 mptcp_for_each_subflow(msk, subflow) { 1047 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1048 1049 ++sfcount; 1050 1051 if (len && len >= sfd.size_user) { 1052 struct tcp_info info; 1053 1054 tcp_get_info(ssk, &info); 1055 1056 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1057 release_sock(sk); 1058 return -EFAULT; 1059 } 1060 1061 infoptr += sfd.size_user; 1062 copied += sfd.size_user; 1063 len -= sfd.size_user; 1064 } 1065 } 1066 1067 release_sock(sk); 1068 1069 sfd.num_subflows = sfcount; 1070 1071 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1072 return -EFAULT; 1073 1074 return 0; 1075 } 1076 1077 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1078 { 1079 const struct inet_sock *inet = inet_sk(sk); 1080 1081 memset(a, 0, sizeof(*a)); 1082 1083 if (sk->sk_family == AF_INET) { 1084 a->sin_local.sin_family = AF_INET; 1085 a->sin_local.sin_port = inet->inet_sport; 1086 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1087 1088 if (!a->sin_local.sin_addr.s_addr) 1089 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1090 1091 a->sin_remote.sin_family = AF_INET; 1092 a->sin_remote.sin_port = inet->inet_dport; 1093 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1094 #if IS_ENABLED(CONFIG_IPV6) 1095 } else if (sk->sk_family == AF_INET6) { 1096 const struct ipv6_pinfo *np = inet6_sk(sk); 1097 1098 if (WARN_ON_ONCE(!np)) 1099 return; 1100 1101 a->sin6_local.sin6_family = AF_INET6; 1102 a->sin6_local.sin6_port = inet->inet_sport; 1103 1104 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1105 a->sin6_local.sin6_addr = np->saddr; 1106 else 1107 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1108 1109 a->sin6_remote.sin6_family = AF_INET6; 1110 a->sin6_remote.sin6_port = inet->inet_dport; 1111 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1112 #endif 1113 } 1114 } 1115 1116 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1117 int __user *optlen) 1118 { 1119 struct mptcp_subflow_context *subflow; 1120 struct sock *sk = (struct sock *)msk; 1121 unsigned int sfcount = 0, copied = 0; 1122 struct mptcp_subflow_data sfd; 1123 char __user *addrptr; 1124 int len; 1125 1126 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1127 if (len < 0) 1128 return len; 1129 1130 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1131 sfd.size_user = min_t(unsigned int, sfd.size_user, 1132 sizeof(struct mptcp_subflow_addrs)); 1133 1134 addrptr = optval + sfd.size_subflow_data; 1135 1136 lock_sock(sk); 1137 1138 mptcp_for_each_subflow(msk, subflow) { 1139 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1140 1141 ++sfcount; 1142 1143 if (len && len >= sfd.size_user) { 1144 struct mptcp_subflow_addrs a; 1145 1146 mptcp_get_sub_addrs(ssk, &a); 1147 1148 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1149 release_sock(sk); 1150 return -EFAULT; 1151 } 1152 1153 addrptr += sfd.size_user; 1154 copied += sfd.size_user; 1155 len -= sfd.size_user; 1156 } 1157 } 1158 1159 release_sock(sk); 1160 1161 sfd.num_subflows = sfcount; 1162 1163 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1164 return -EFAULT; 1165 1166 return 0; 1167 } 1168 1169 static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1170 char __user *optval, 1171 int __user *optlen) 1172 { 1173 int len; 1174 1175 BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1176 MIN_FULL_INFO_OPTLEN_SIZE); 1177 1178 if (get_user(len, optlen)) 1179 return -EFAULT; 1180 1181 if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1182 return -EINVAL; 1183 1184 memset(mfi, 0, sizeof(*mfi)); 1185 if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1186 return -EFAULT; 1187 1188 if (mfi->size_tcpinfo_kernel || 1189 mfi->size_sfinfo_kernel || 1190 mfi->num_subflows) 1191 return -EINVAL; 1192 1193 if (mfi->size_sfinfo_user > INT_MAX || 1194 mfi->size_tcpinfo_user > INT_MAX) 1195 return -EINVAL; 1196 1197 return len - MIN_FULL_INFO_OPTLEN_SIZE; 1198 } 1199 1200 static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1201 char __user *optval, 1202 u32 copylen, 1203 int __user *optlen) 1204 { 1205 copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1206 if (put_user(copylen, optlen)) 1207 return -EFAULT; 1208 1209 if (copy_to_user(optval, mfi, copylen)) 1210 return -EFAULT; 1211 return 0; 1212 } 1213 1214 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1215 int __user *optlen) 1216 { 1217 unsigned int sfcount = 0, copylen = 0; 1218 struct mptcp_subflow_context *subflow; 1219 struct sock *sk = (struct sock *)msk; 1220 void __user *tcpinfoptr, *sfinfoptr; 1221 struct mptcp_full_info mfi; 1222 int len; 1223 1224 len = mptcp_get_full_info(&mfi, optval, optlen); 1225 if (len < 0) 1226 return len; 1227 1228 /* don't bother filling the mptcp info if there is not enough 1229 * user-space-provided storage 1230 */ 1231 if (len > 0) { 1232 mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1233 copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1234 } 1235 1236 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1237 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1238 sizeof(struct tcp_info)); 1239 sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1240 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1241 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1242 sizeof(struct mptcp_subflow_info)); 1243 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1244 1245 lock_sock(sk); 1246 mptcp_for_each_subflow(msk, subflow) { 1247 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1248 struct mptcp_subflow_info sfinfo; 1249 struct tcp_info tcp_info; 1250 1251 if (sfcount++ >= mfi.size_arrays_user) 1252 continue; 1253 1254 /* fetch addr/tcp_info only if the user space buffers 1255 * are wide enough 1256 */ 1257 memset(&sfinfo, 0, sizeof(sfinfo)); 1258 sfinfo.id = subflow->subflow_id; 1259 if (mfi.size_sfinfo_user > 1260 offsetof(struct mptcp_subflow_info, addrs)) 1261 mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1262 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1263 goto fail_release; 1264 1265 if (mfi.size_tcpinfo_user) { 1266 tcp_get_info(ssk, &tcp_info); 1267 if (copy_to_user(tcpinfoptr, &tcp_info, 1268 mfi.size_tcpinfo_user)) 1269 goto fail_release; 1270 } 1271 1272 tcpinfoptr += mfi.size_tcpinfo_user; 1273 sfinfoptr += mfi.size_sfinfo_user; 1274 } 1275 release_sock(sk); 1276 1277 mfi.num_subflows = sfcount; 1278 if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1279 return -EFAULT; 1280 1281 return 0; 1282 1283 fail_release: 1284 release_sock(sk); 1285 return -EFAULT; 1286 } 1287 1288 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1289 int __user *optlen, int val) 1290 { 1291 int len; 1292 1293 if (get_user(len, optlen)) 1294 return -EFAULT; 1295 if (len < 0) 1296 return -EINVAL; 1297 1298 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1299 unsigned char ucval = (unsigned char)val; 1300 1301 len = 1; 1302 if (put_user(len, optlen)) 1303 return -EFAULT; 1304 if (copy_to_user(optval, &ucval, 1)) 1305 return -EFAULT; 1306 } else { 1307 len = min_t(unsigned int, len, sizeof(int)); 1308 if (put_user(len, optlen)) 1309 return -EFAULT; 1310 if (copy_to_user(optval, &val, len)) 1311 return -EFAULT; 1312 } 1313 1314 return 0; 1315 } 1316 1317 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1318 char __user *optval, int __user *optlen) 1319 { 1320 switch (optname) { 1321 case TCP_ULP: 1322 case TCP_CONGESTION: 1323 case TCP_INFO: 1324 case TCP_CC_INFO: 1325 case TCP_DEFER_ACCEPT: 1326 case TCP_FASTOPEN: 1327 case TCP_FASTOPEN_CONNECT: 1328 case TCP_FASTOPEN_KEY: 1329 case TCP_FASTOPEN_NO_COOKIE: 1330 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1331 optval, optlen); 1332 case TCP_INQ: 1333 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1334 case TCP_CORK: 1335 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1336 case TCP_NODELAY: 1337 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1338 } 1339 return -EOPNOTSUPP; 1340 } 1341 1342 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1343 char __user *optval, int __user *optlen) 1344 { 1345 struct sock *sk = (void *)msk; 1346 1347 switch (optname) { 1348 case IP_TOS: 1349 return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); 1350 } 1351 1352 return -EOPNOTSUPP; 1353 } 1354 1355 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1356 char __user *optval, int __user *optlen) 1357 { 1358 switch (optname) { 1359 case MPTCP_INFO: 1360 return mptcp_getsockopt_info(msk, optval, optlen); 1361 case MPTCP_FULL_INFO: 1362 return mptcp_getsockopt_full_info(msk, optval, optlen); 1363 case MPTCP_TCPINFO: 1364 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1365 case MPTCP_SUBFLOW_ADDRS: 1366 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1367 } 1368 1369 return -EOPNOTSUPP; 1370 } 1371 1372 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1373 char __user *optval, int __user *option) 1374 { 1375 struct mptcp_sock *msk = mptcp_sk(sk); 1376 struct sock *ssk; 1377 1378 pr_debug("msk=%p", msk); 1379 1380 /* @@ the meaning of setsockopt() when the socket is connected and 1381 * there are multiple subflows is not yet defined. It is up to the 1382 * MPTCP-level socket to configure the subflows until the subflow 1383 * is in TCP fallback, when socket options are passed through 1384 * to the one remaining subflow. 1385 */ 1386 lock_sock(sk); 1387 ssk = __mptcp_tcp_fallback(msk); 1388 release_sock(sk); 1389 if (ssk) 1390 return tcp_getsockopt(ssk, level, optname, optval, option); 1391 1392 if (level == SOL_IP) 1393 return mptcp_getsockopt_v4(msk, optname, optval, option); 1394 if (level == SOL_TCP) 1395 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1396 if (level == SOL_MPTCP) 1397 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1398 return -EOPNOTSUPP; 1399 } 1400 1401 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1402 { 1403 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1404 struct sock *sk = (struct sock *)msk; 1405 1406 if (ssk->sk_prot->keepalive) { 1407 if (sock_flag(sk, SOCK_KEEPOPEN)) 1408 ssk->sk_prot->keepalive(ssk, 1); 1409 else 1410 ssk->sk_prot->keepalive(ssk, 0); 1411 } 1412 1413 ssk->sk_priority = sk->sk_priority; 1414 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1415 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1416 ssk->sk_ipv6only = sk->sk_ipv6only; 1417 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1418 1419 if (sk->sk_userlocks & tx_rx_locks) { 1420 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1421 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) 1422 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1423 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1424 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 1425 } 1426 1427 if (sock_flag(sk, SOCK_LINGER)) { 1428 ssk->sk_lingertime = sk->sk_lingertime; 1429 sock_set_flag(ssk, SOCK_LINGER); 1430 } else { 1431 sock_reset_flag(ssk, SOCK_LINGER); 1432 } 1433 1434 if (sk->sk_mark != ssk->sk_mark) { 1435 ssk->sk_mark = sk->sk_mark; 1436 sk_dst_reset(ssk); 1437 } 1438 1439 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1440 1441 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1442 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1443 __tcp_sock_set_cork(ssk, !!msk->cork); 1444 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1445 1446 inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); 1447 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 1448 } 1449 1450 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1451 { 1452 bool slow = lock_sock_fast(ssk); 1453 1454 sync_socket_options(msk, ssk); 1455 1456 unlock_sock_fast(ssk, slow); 1457 } 1458 1459 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1460 { 1461 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1462 1463 msk_owned_by_me(msk); 1464 1465 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1466 __mptcp_sockopt_sync(msk, ssk); 1467 1468 subflow->setsockopt_seq = msk->setsockopt_seq; 1469 } 1470 } 1471 1472 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1473 { 1474 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1475 1476 msk_owned_by_me(msk); 1477 1478 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1479 sync_socket_options(msk, ssk); 1480 1481 subflow->setsockopt_seq = msk->setsockopt_seq; 1482 } 1483 } 1484