1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2021, Red Hat. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/kernel.h> 10 #include <linux/module.h> 11 #include <net/sock.h> 12 #include <net/protocol.h> 13 #include <net/tcp.h> 14 #include <net/mptcp.h> 15 #include "protocol.h" 16 17 #define MIN_INFO_OPTLEN_SIZE 16 18 #define MIN_FULL_INFO_OPTLEN_SIZE 40 19 20 static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) 21 { 22 msk_owned_by_me(msk); 23 24 if (likely(!__mptcp_check_fallback(msk))) 25 return NULL; 26 27 return msk->first; 28 } 29 30 static u32 sockopt_seq_reset(const struct sock *sk) 31 { 32 sock_owned_by_me(sk); 33 34 /* Highbits contain state. Allows to distinguish sockopt_seq 35 * of listener and established: 36 * s0 = new_listener() 37 * sockopt(s0) - seq is 1 38 * s1 = accept(s0) - s1 inherits seq 1 if listener sk (s0) 39 * sockopt(s0) - seq increments to 2 on s0 40 * sockopt(s1) // seq increments to 2 on s1 (different option) 41 * new ssk completes join, inherits options from s0 // seq 2 42 * Needs sync from mptcp join logic, but ssk->seq == msk->seq 43 * 44 * Set High order bits to sk_state so ssk->seq == msk->seq test 45 * will fail. 46 */ 47 48 return (u32)sk->sk_state << 24u; 49 } 50 51 static void sockopt_seq_inc(struct mptcp_sock *msk) 52 { 53 u32 seq = (msk->setsockopt_seq + 1) & 0x00ffffff; 54 55 msk->setsockopt_seq = sockopt_seq_reset((struct sock *)msk) + seq; 56 } 57 58 static int mptcp_get_int_option(struct mptcp_sock *msk, sockptr_t optval, 59 unsigned int optlen, int *val) 60 { 61 if (optlen < sizeof(int)) 62 return -EINVAL; 63 64 if (copy_from_sockptr(val, optval, sizeof(*val))) 65 return -EFAULT; 66 67 return 0; 68 } 69 70 static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, int val) 71 { 72 struct mptcp_subflow_context *subflow; 73 struct sock *sk = (struct sock *)msk; 74 75 lock_sock(sk); 76 sockopt_seq_inc(msk); 77 78 mptcp_for_each_subflow(msk, subflow) { 79 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 80 bool slow = lock_sock_fast(ssk); 81 82 switch (optname) { 83 case SO_DEBUG: 84 sock_valbool_flag(ssk, SOCK_DBG, !!val); 85 break; 86 case SO_KEEPALIVE: 87 if (ssk->sk_prot->keepalive) 88 ssk->sk_prot->keepalive(ssk, !!val); 89 sock_valbool_flag(ssk, SOCK_KEEPOPEN, !!val); 90 break; 91 case SO_PRIORITY: 92 ssk->sk_priority = val; 93 break; 94 case SO_SNDBUF: 95 case SO_SNDBUFFORCE: 96 ssk->sk_userlocks |= SOCK_SNDBUF_LOCK; 97 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 98 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 99 break; 100 case SO_RCVBUF: 101 case SO_RCVBUFFORCE: 102 ssk->sk_userlocks |= SOCK_RCVBUF_LOCK; 103 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 104 break; 105 case SO_MARK: 106 if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) { 107 WRITE_ONCE(ssk->sk_mark, sk->sk_mark); 108 sk_dst_reset(ssk); 109 } 110 break; 111 case SO_INCOMING_CPU: 112 WRITE_ONCE(ssk->sk_incoming_cpu, val); 113 break; 114 } 115 116 subflow->setsockopt_seq = msk->setsockopt_seq; 117 unlock_sock_fast(ssk, slow); 118 } 119 120 release_sock(sk); 121 } 122 123 static int mptcp_sol_socket_intval(struct mptcp_sock *msk, int optname, int val) 124 { 125 sockptr_t optval = KERNEL_SOCKPTR(&val); 126 struct sock *sk = (struct sock *)msk; 127 int ret; 128 129 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 130 optval, sizeof(val)); 131 if (ret) 132 return ret; 133 134 mptcp_sol_socket_sync_intval(msk, optname, val); 135 return 0; 136 } 137 138 static void mptcp_so_incoming_cpu(struct mptcp_sock *msk, int val) 139 { 140 struct sock *sk = (struct sock *)msk; 141 142 WRITE_ONCE(sk->sk_incoming_cpu, val); 143 144 mptcp_sol_socket_sync_intval(msk, SO_INCOMING_CPU, val); 145 } 146 147 static int mptcp_setsockopt_sol_socket_tstamp(struct mptcp_sock *msk, int optname, int val) 148 { 149 sockptr_t optval = KERNEL_SOCKPTR(&val); 150 struct mptcp_subflow_context *subflow; 151 struct sock *sk = (struct sock *)msk; 152 int ret; 153 154 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 155 optval, sizeof(val)); 156 if (ret) 157 return ret; 158 159 lock_sock(sk); 160 mptcp_for_each_subflow(msk, subflow) { 161 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 162 bool slow = lock_sock_fast(ssk); 163 164 sock_set_timestamp(sk, optname, !!val); 165 unlock_sock_fast(ssk, slow); 166 } 167 168 release_sock(sk); 169 return 0; 170 } 171 172 static int mptcp_setsockopt_sol_socket_int(struct mptcp_sock *msk, int optname, 173 sockptr_t optval, 174 unsigned int optlen) 175 { 176 int val, ret; 177 178 ret = mptcp_get_int_option(msk, optval, optlen, &val); 179 if (ret) 180 return ret; 181 182 switch (optname) { 183 case SO_KEEPALIVE: 184 case SO_DEBUG: 185 case SO_MARK: 186 case SO_PRIORITY: 187 case SO_SNDBUF: 188 case SO_SNDBUFFORCE: 189 case SO_RCVBUF: 190 case SO_RCVBUFFORCE: 191 return mptcp_sol_socket_intval(msk, optname, val); 192 case SO_INCOMING_CPU: 193 mptcp_so_incoming_cpu(msk, val); 194 return 0; 195 case SO_TIMESTAMP_OLD: 196 case SO_TIMESTAMP_NEW: 197 case SO_TIMESTAMPNS_OLD: 198 case SO_TIMESTAMPNS_NEW: 199 return mptcp_setsockopt_sol_socket_tstamp(msk, optname, val); 200 } 201 202 return -ENOPROTOOPT; 203 } 204 205 static int mptcp_setsockopt_sol_socket_timestamping(struct mptcp_sock *msk, 206 int optname, 207 sockptr_t optval, 208 unsigned int optlen) 209 { 210 struct mptcp_subflow_context *subflow; 211 struct sock *sk = (struct sock *)msk; 212 struct so_timestamping timestamping; 213 int ret; 214 215 if (optlen == sizeof(timestamping)) { 216 if (copy_from_sockptr(×tamping, optval, 217 sizeof(timestamping))) 218 return -EFAULT; 219 } else if (optlen == sizeof(int)) { 220 memset(×tamping, 0, sizeof(timestamping)); 221 222 if (copy_from_sockptr(×tamping.flags, optval, sizeof(int))) 223 return -EFAULT; 224 } else { 225 return -EINVAL; 226 } 227 228 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, 229 KERNEL_SOCKPTR(×tamping), 230 sizeof(timestamping)); 231 if (ret) 232 return ret; 233 234 lock_sock(sk); 235 236 mptcp_for_each_subflow(msk, subflow) { 237 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 238 bool slow = lock_sock_fast(ssk); 239 240 sock_set_timestamping(sk, optname, timestamping); 241 unlock_sock_fast(ssk, slow); 242 } 243 244 release_sock(sk); 245 246 return 0; 247 } 248 249 static int mptcp_setsockopt_sol_socket_linger(struct mptcp_sock *msk, sockptr_t optval, 250 unsigned int optlen) 251 { 252 struct mptcp_subflow_context *subflow; 253 struct sock *sk = (struct sock *)msk; 254 struct linger ling; 255 sockptr_t kopt; 256 int ret; 257 258 if (optlen < sizeof(ling)) 259 return -EINVAL; 260 261 if (copy_from_sockptr(&ling, optval, sizeof(ling))) 262 return -EFAULT; 263 264 kopt = KERNEL_SOCKPTR(&ling); 265 ret = sock_setsockopt(sk->sk_socket, SOL_SOCKET, SO_LINGER, kopt, sizeof(ling)); 266 if (ret) 267 return ret; 268 269 lock_sock(sk); 270 sockopt_seq_inc(msk); 271 mptcp_for_each_subflow(msk, subflow) { 272 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 273 bool slow = lock_sock_fast(ssk); 274 275 if (!ling.l_onoff) { 276 sock_reset_flag(ssk, SOCK_LINGER); 277 } else { 278 ssk->sk_lingertime = sk->sk_lingertime; 279 sock_set_flag(ssk, SOCK_LINGER); 280 } 281 282 subflow->setsockopt_seq = msk->setsockopt_seq; 283 unlock_sock_fast(ssk, slow); 284 } 285 286 release_sock(sk); 287 return 0; 288 } 289 290 static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname, 291 sockptr_t optval, unsigned int optlen) 292 { 293 struct sock *sk = (struct sock *)msk; 294 struct sock *ssk; 295 int ret; 296 297 switch (optname) { 298 case SO_REUSEPORT: 299 case SO_REUSEADDR: 300 case SO_BINDTODEVICE: 301 case SO_BINDTOIFINDEX: 302 lock_sock(sk); 303 ssk = __mptcp_nmpc_sk(msk); 304 if (IS_ERR(ssk)) { 305 release_sock(sk); 306 return PTR_ERR(ssk); 307 } 308 309 ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 310 if (ret == 0) { 311 if (optname == SO_REUSEPORT) 312 sk->sk_reuseport = ssk->sk_reuseport; 313 else if (optname == SO_REUSEADDR) 314 sk->sk_reuse = ssk->sk_reuse; 315 else if (optname == SO_BINDTODEVICE) 316 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 317 else if (optname == SO_BINDTOIFINDEX) 318 sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 319 } 320 release_sock(sk); 321 return ret; 322 case SO_KEEPALIVE: 323 case SO_PRIORITY: 324 case SO_SNDBUF: 325 case SO_SNDBUFFORCE: 326 case SO_RCVBUF: 327 case SO_RCVBUFFORCE: 328 case SO_MARK: 329 case SO_INCOMING_CPU: 330 case SO_DEBUG: 331 case SO_TIMESTAMP_OLD: 332 case SO_TIMESTAMP_NEW: 333 case SO_TIMESTAMPNS_OLD: 334 case SO_TIMESTAMPNS_NEW: 335 return mptcp_setsockopt_sol_socket_int(msk, optname, optval, 336 optlen); 337 case SO_TIMESTAMPING_OLD: 338 case SO_TIMESTAMPING_NEW: 339 return mptcp_setsockopt_sol_socket_timestamping(msk, optname, 340 optval, optlen); 341 case SO_LINGER: 342 return mptcp_setsockopt_sol_socket_linger(msk, optval, optlen); 343 case SO_RCVLOWAT: 344 case SO_RCVTIMEO_OLD: 345 case SO_RCVTIMEO_NEW: 346 case SO_SNDTIMEO_OLD: 347 case SO_SNDTIMEO_NEW: 348 case SO_BUSY_POLL: 349 case SO_PREFER_BUSY_POLL: 350 case SO_BUSY_POLL_BUDGET: 351 /* No need to copy: only relevant for msk */ 352 return sock_setsockopt(sk->sk_socket, SOL_SOCKET, optname, optval, optlen); 353 case SO_NO_CHECK: 354 case SO_DONTROUTE: 355 case SO_BROADCAST: 356 case SO_BSDCOMPAT: 357 case SO_PASSCRED: 358 case SO_PASSPIDFD: 359 case SO_PASSSEC: 360 case SO_RXQ_OVFL: 361 case SO_WIFI_STATUS: 362 case SO_NOFCS: 363 case SO_SELECT_ERR_QUEUE: 364 return 0; 365 } 366 367 /* SO_OOBINLINE is not supported, let's avoid the related mess 368 * SO_ATTACH_FILTER, SO_ATTACH_BPF, SO_ATTACH_REUSEPORT_CBPF, 369 * SO_DETACH_REUSEPORT_BPF, SO_DETACH_FILTER, SO_LOCK_FILTER, 370 * we must be careful with subflows 371 * 372 * SO_ATTACH_REUSEPORT_EBPF is not supported, at it checks 373 * explicitly the sk_protocol field 374 * 375 * SO_PEEK_OFF is unsupported, as it is for plain TCP 376 * SO_MAX_PACING_RATE is unsupported, we must be careful with subflows 377 * SO_CNX_ADVICE is currently unsupported, could possibly be relevant, 378 * but likely needs careful design 379 * 380 * SO_ZEROCOPY is currently unsupported, TODO in sndmsg 381 * SO_TXTIME is currently unsupported 382 */ 383 384 return -EOPNOTSUPP; 385 } 386 387 static int mptcp_setsockopt_v6(struct mptcp_sock *msk, int optname, 388 sockptr_t optval, unsigned int optlen) 389 { 390 struct sock *sk = (struct sock *)msk; 391 int ret = -EOPNOTSUPP; 392 struct sock *ssk; 393 394 switch (optname) { 395 case IPV6_V6ONLY: 396 case IPV6_TRANSPARENT: 397 case IPV6_FREEBIND: 398 lock_sock(sk); 399 ssk = __mptcp_nmpc_sk(msk); 400 if (IS_ERR(ssk)) { 401 release_sock(sk); 402 return PTR_ERR(ssk); 403 } 404 405 ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 406 if (ret != 0) { 407 release_sock(sk); 408 return ret; 409 } 410 411 sockopt_seq_inc(msk); 412 413 switch (optname) { 414 case IPV6_V6ONLY: 415 sk->sk_ipv6only = ssk->sk_ipv6only; 416 break; 417 case IPV6_TRANSPARENT: 418 inet_assign_bit(TRANSPARENT, sk, 419 inet_test_bit(TRANSPARENT, ssk)); 420 break; 421 case IPV6_FREEBIND: 422 inet_assign_bit(FREEBIND, sk, 423 inet_test_bit(FREEBIND, ssk)); 424 break; 425 } 426 427 release_sock(sk); 428 break; 429 } 430 431 return ret; 432 } 433 434 static bool mptcp_supported_sockopt(int level, int optname) 435 { 436 if (level == SOL_IP) { 437 switch (optname) { 438 /* should work fine */ 439 case IP_FREEBIND: 440 case IP_TRANSPARENT: 441 442 /* the following are control cmsg related */ 443 case IP_PKTINFO: 444 case IP_RECVTTL: 445 case IP_RECVTOS: 446 case IP_RECVOPTS: 447 case IP_RETOPTS: 448 case IP_PASSSEC: 449 case IP_RECVORIGDSTADDR: 450 case IP_CHECKSUM: 451 case IP_RECVFRAGSIZE: 452 453 /* common stuff that need some love */ 454 case IP_TOS: 455 case IP_TTL: 456 case IP_BIND_ADDRESS_NO_PORT: 457 case IP_MTU_DISCOVER: 458 case IP_RECVERR: 459 460 /* possibly less common may deserve some love */ 461 case IP_MINTTL: 462 463 /* the following is apparently a no-op for plain TCP */ 464 case IP_RECVERR_RFC4884: 465 return true; 466 } 467 468 /* IP_OPTIONS is not supported, needs subflow care */ 469 /* IP_HDRINCL, IP_NODEFRAG are not supported, RAW specific */ 470 /* IP_MULTICAST_TTL, IP_MULTICAST_LOOP, IP_UNICAST_IF, 471 * IP_ADD_MEMBERSHIP, IP_ADD_SOURCE_MEMBERSHIP, IP_DROP_MEMBERSHIP, 472 * IP_DROP_SOURCE_MEMBERSHIP, IP_BLOCK_SOURCE, IP_UNBLOCK_SOURCE, 473 * MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP MCAST_JOIN_SOURCE_GROUP, 474 * MCAST_LEAVE_SOURCE_GROUP, MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, 475 * MCAST_MSFILTER, IP_MULTICAST_ALL are not supported, better not deal 476 * with mcast stuff 477 */ 478 /* IP_IPSEC_POLICY, IP_XFRM_POLICY are nut supported, unrelated here */ 479 return false; 480 } 481 if (level == SOL_IPV6) { 482 switch (optname) { 483 case IPV6_V6ONLY: 484 485 /* the following are control cmsg related */ 486 case IPV6_RECVPKTINFO: 487 case IPV6_2292PKTINFO: 488 case IPV6_RECVHOPLIMIT: 489 case IPV6_2292HOPLIMIT: 490 case IPV6_RECVRTHDR: 491 case IPV6_2292RTHDR: 492 case IPV6_RECVHOPOPTS: 493 case IPV6_2292HOPOPTS: 494 case IPV6_RECVDSTOPTS: 495 case IPV6_2292DSTOPTS: 496 case IPV6_RECVTCLASS: 497 case IPV6_FLOWINFO: 498 case IPV6_RECVPATHMTU: 499 case IPV6_RECVORIGDSTADDR: 500 case IPV6_RECVFRAGSIZE: 501 502 /* the following ones need some love but are quite common */ 503 case IPV6_TCLASS: 504 case IPV6_TRANSPARENT: 505 case IPV6_FREEBIND: 506 case IPV6_PKTINFO: 507 case IPV6_2292PKTOPTIONS: 508 case IPV6_UNICAST_HOPS: 509 case IPV6_MTU_DISCOVER: 510 case IPV6_MTU: 511 case IPV6_RECVERR: 512 case IPV6_FLOWINFO_SEND: 513 case IPV6_FLOWLABEL_MGR: 514 case IPV6_MINHOPCOUNT: 515 case IPV6_DONTFRAG: 516 case IPV6_AUTOFLOWLABEL: 517 518 /* the following one is a no-op for plain TCP */ 519 case IPV6_RECVERR_RFC4884: 520 return true; 521 } 522 523 /* IPV6_HOPOPTS, IPV6_RTHDRDSTOPTS, IPV6_RTHDR, IPV6_DSTOPTS are 524 * not supported 525 */ 526 /* IPV6_MULTICAST_HOPS, IPV6_MULTICAST_LOOP, IPV6_UNICAST_IF, 527 * IPV6_MULTICAST_IF, IPV6_ADDRFORM, 528 * IPV6_ADD_MEMBERSHIP, IPV6_DROP_MEMBERSHIP, IPV6_JOIN_ANYCAST, 529 * IPV6_LEAVE_ANYCAST, IPV6_MULTICAST_ALL, MCAST_JOIN_GROUP, MCAST_LEAVE_GROUP, 530 * MCAST_JOIN_SOURCE_GROUP, MCAST_LEAVE_SOURCE_GROUP, 531 * MCAST_BLOCK_SOURCE, MCAST_UNBLOCK_SOURCE, MCAST_MSFILTER 532 * are not supported better not deal with mcast 533 */ 534 /* IPV6_ROUTER_ALERT, IPV6_ROUTER_ALERT_ISOLATE are not supported, since are evil */ 535 536 /* IPV6_IPSEC_POLICY, IPV6_XFRM_POLICY are not supported */ 537 /* IPV6_ADDR_PREFERENCES is not supported, we must be careful with subflows */ 538 return false; 539 } 540 if (level == SOL_TCP) { 541 switch (optname) { 542 /* the following are no-op or should work just fine */ 543 case TCP_THIN_DUPACK: 544 case TCP_DEFER_ACCEPT: 545 546 /* the following need some love */ 547 case TCP_MAXSEG: 548 case TCP_NODELAY: 549 case TCP_THIN_LINEAR_TIMEOUTS: 550 case TCP_CONGESTION: 551 case TCP_CORK: 552 case TCP_KEEPIDLE: 553 case TCP_KEEPINTVL: 554 case TCP_KEEPCNT: 555 case TCP_SYNCNT: 556 case TCP_SAVE_SYN: 557 case TCP_LINGER2: 558 case TCP_WINDOW_CLAMP: 559 case TCP_QUICKACK: 560 case TCP_USER_TIMEOUT: 561 case TCP_TIMESTAMP: 562 case TCP_NOTSENT_LOWAT: 563 case TCP_TX_DELAY: 564 case TCP_INQ: 565 case TCP_FASTOPEN: 566 case TCP_FASTOPEN_CONNECT: 567 case TCP_FASTOPEN_KEY: 568 case TCP_FASTOPEN_NO_COOKIE: 569 return true; 570 } 571 572 /* TCP_MD5SIG, TCP_MD5SIG_EXT are not supported, MD5 is not compatible with MPTCP */ 573 574 /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, 575 * TCP_REPAIR_WINDOW are not supported, better avoid this mess 576 */ 577 } 578 return false; 579 } 580 581 static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t optval, 582 unsigned int optlen) 583 { 584 struct mptcp_subflow_context *subflow; 585 struct sock *sk = (struct sock *)msk; 586 char name[TCP_CA_NAME_MAX]; 587 bool cap_net_admin; 588 int ret; 589 590 if (optlen < 1) 591 return -EINVAL; 592 593 ret = strncpy_from_sockptr(name, optval, 594 min_t(long, TCP_CA_NAME_MAX - 1, optlen)); 595 if (ret < 0) 596 return -EFAULT; 597 598 name[ret] = 0; 599 600 cap_net_admin = ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN); 601 602 ret = 0; 603 lock_sock(sk); 604 sockopt_seq_inc(msk); 605 mptcp_for_each_subflow(msk, subflow) { 606 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 607 int err; 608 609 lock_sock(ssk); 610 err = tcp_set_congestion_control(ssk, name, true, cap_net_admin); 611 if (err < 0 && ret == 0) 612 ret = err; 613 subflow->setsockopt_seq = msk->setsockopt_seq; 614 release_sock(ssk); 615 } 616 617 if (ret == 0) 618 strcpy(msk->ca_name, name); 619 620 release_sock(sk); 621 return ret; 622 } 623 624 static int __mptcp_setsockopt_set_val(struct mptcp_sock *msk, int max, 625 int (*set_val)(struct sock *, int), 626 int *msk_val, int val) 627 { 628 struct mptcp_subflow_context *subflow; 629 int err = 0; 630 631 mptcp_for_each_subflow(msk, subflow) { 632 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 633 int ret; 634 635 lock_sock(ssk); 636 ret = set_val(ssk, val); 637 err = err ? : ret; 638 release_sock(ssk); 639 } 640 641 if (!err) { 642 *msk_val = val; 643 sockopt_seq_inc(msk); 644 } 645 646 return err; 647 } 648 649 static int __mptcp_setsockopt_sol_tcp_cork(struct mptcp_sock *msk, int val) 650 { 651 struct mptcp_subflow_context *subflow; 652 struct sock *sk = (struct sock *)msk; 653 654 sockopt_seq_inc(msk); 655 msk->cork = !!val; 656 mptcp_for_each_subflow(msk, subflow) { 657 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 658 659 lock_sock(ssk); 660 __tcp_sock_set_cork(ssk, !!val); 661 release_sock(ssk); 662 } 663 if (!val) 664 mptcp_check_and_set_pending(sk); 665 666 return 0; 667 } 668 669 static int __mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, int val) 670 { 671 struct mptcp_subflow_context *subflow; 672 struct sock *sk = (struct sock *)msk; 673 674 sockopt_seq_inc(msk); 675 msk->nodelay = !!val; 676 mptcp_for_each_subflow(msk, subflow) { 677 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 678 679 lock_sock(ssk); 680 __tcp_sock_set_nodelay(ssk, !!val); 681 release_sock(ssk); 682 } 683 if (val) 684 mptcp_check_and_set_pending(sk); 685 return 0; 686 } 687 688 static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname, 689 sockptr_t optval, unsigned int optlen) 690 { 691 struct sock *sk = (struct sock *)msk; 692 struct sock *ssk; 693 int err; 694 695 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 696 if (err != 0) 697 return err; 698 699 lock_sock(sk); 700 701 ssk = __mptcp_nmpc_sk(msk); 702 if (IS_ERR(ssk)) { 703 release_sock(sk); 704 return PTR_ERR(ssk); 705 } 706 707 switch (optname) { 708 case IP_FREEBIND: 709 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 710 break; 711 case IP_TRANSPARENT: 712 inet_assign_bit(TRANSPARENT, ssk, 713 inet_test_bit(TRANSPARENT, sk)); 714 break; 715 default: 716 release_sock(sk); 717 WARN_ON_ONCE(1); 718 return -EOPNOTSUPP; 719 } 720 721 sockopt_seq_inc(msk); 722 release_sock(sk); 723 return 0; 724 } 725 726 static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, 727 sockptr_t optval, unsigned int optlen) 728 { 729 struct mptcp_subflow_context *subflow; 730 struct sock *sk = (struct sock *)msk; 731 int err, val; 732 733 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); 734 735 if (err != 0) 736 return err; 737 738 lock_sock(sk); 739 sockopt_seq_inc(msk); 740 val = inet_sk(sk)->tos; 741 mptcp_for_each_subflow(msk, subflow) { 742 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 743 bool slow; 744 745 slow = lock_sock_fast(ssk); 746 __ip_sock_set_tos(ssk, val); 747 unlock_sock_fast(ssk, slow); 748 } 749 release_sock(sk); 750 751 return 0; 752 } 753 754 static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, 755 sockptr_t optval, unsigned int optlen) 756 { 757 switch (optname) { 758 case IP_FREEBIND: 759 case IP_TRANSPARENT: 760 return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen); 761 case IP_TOS: 762 return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen); 763 } 764 765 return -EOPNOTSUPP; 766 } 767 768 static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 769 sockptr_t optval, unsigned int optlen) 770 { 771 struct sock *sk = (struct sock *)msk; 772 struct sock *ssk; 773 int ret; 774 775 /* Limit to first subflow, before the connection establishment */ 776 lock_sock(sk); 777 ssk = __mptcp_nmpc_sk(msk); 778 if (IS_ERR(ssk)) { 779 ret = PTR_ERR(ssk); 780 goto unlock; 781 } 782 783 ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 784 785 unlock: 786 release_sock(sk); 787 return ret; 788 } 789 790 static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 791 sockptr_t optval, unsigned int optlen) 792 { 793 struct sock *sk = (void *)msk; 794 int ret, val; 795 796 switch (optname) { 797 case TCP_ULP: 798 return -EOPNOTSUPP; 799 case TCP_CONGESTION: 800 return mptcp_setsockopt_sol_tcp_congestion(msk, optval, optlen); 801 case TCP_DEFER_ACCEPT: 802 /* See tcp.c: TCP_DEFER_ACCEPT does not fail */ 803 mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); 804 return 0; 805 case TCP_FASTOPEN: 806 case TCP_FASTOPEN_CONNECT: 807 case TCP_FASTOPEN_KEY: 808 case TCP_FASTOPEN_NO_COOKIE: 809 return mptcp_setsockopt_first_sf_only(msk, SOL_TCP, optname, 810 optval, optlen); 811 } 812 813 ret = mptcp_get_int_option(msk, optval, optlen, &val); 814 if (ret) 815 return ret; 816 817 lock_sock(sk); 818 switch (optname) { 819 case TCP_INQ: 820 if (val < 0 || val > 1) 821 ret = -EINVAL; 822 else 823 msk->recvmsg_inq = !!val; 824 break; 825 case TCP_CORK: 826 ret = __mptcp_setsockopt_sol_tcp_cork(msk, val); 827 break; 828 case TCP_NODELAY: 829 ret = __mptcp_setsockopt_sol_tcp_nodelay(msk, val); 830 break; 831 case TCP_KEEPIDLE: 832 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPIDLE, 833 &tcp_sock_set_keepidle_locked, 834 &msk->keepalive_idle, val); 835 break; 836 case TCP_KEEPINTVL: 837 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPINTVL, 838 &tcp_sock_set_keepintvl, 839 &msk->keepalive_intvl, val); 840 break; 841 case TCP_KEEPCNT: 842 ret = __mptcp_setsockopt_set_val(msk, MAX_TCP_KEEPCNT, 843 &tcp_sock_set_keepcnt, 844 &msk->keepalive_cnt, 845 val); 846 break; 847 default: 848 ret = -ENOPROTOOPT; 849 } 850 851 release_sock(sk); 852 return ret; 853 } 854 855 int mptcp_setsockopt(struct sock *sk, int level, int optname, 856 sockptr_t optval, unsigned int optlen) 857 { 858 struct mptcp_sock *msk = mptcp_sk(sk); 859 struct sock *ssk; 860 861 pr_debug("msk=%p\n", msk); 862 863 if (level == SOL_SOCKET) 864 return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen); 865 866 if (!mptcp_supported_sockopt(level, optname)) 867 return -ENOPROTOOPT; 868 869 /* @@ the meaning of setsockopt() when the socket is connected and 870 * there are multiple subflows is not yet defined. It is up to the 871 * MPTCP-level socket to configure the subflows until the subflow 872 * is in TCP fallback, when TCP socket options are passed through 873 * to the one remaining subflow. 874 */ 875 lock_sock(sk); 876 ssk = __mptcp_tcp_fallback(msk); 877 release_sock(sk); 878 if (ssk) 879 return tcp_setsockopt(ssk, level, optname, optval, optlen); 880 881 if (level == SOL_IP) 882 return mptcp_setsockopt_v4(msk, optname, optval, optlen); 883 884 if (level == SOL_IPV6) 885 return mptcp_setsockopt_v6(msk, optname, optval, optlen); 886 887 if (level == SOL_TCP) 888 return mptcp_setsockopt_sol_tcp(msk, optname, optval, optlen); 889 890 return -EOPNOTSUPP; 891 } 892 893 static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname, 894 char __user *optval, int __user *optlen) 895 { 896 struct sock *sk = (struct sock *)msk; 897 struct sock *ssk; 898 int ret; 899 900 lock_sock(sk); 901 ssk = msk->first; 902 if (ssk) { 903 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 904 goto out; 905 } 906 907 ssk = __mptcp_nmpc_sk(msk); 908 if (IS_ERR(ssk)) { 909 ret = PTR_ERR(ssk); 910 goto out; 911 } 912 913 ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 914 915 out: 916 release_sock(sk); 917 return ret; 918 } 919 920 void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) 921 { 922 struct sock *sk = (struct sock *)msk; 923 u32 flags = 0; 924 bool slow; 925 926 memset(info, 0, sizeof(*info)); 927 928 info->mptcpi_subflows = READ_ONCE(msk->pm.subflows); 929 info->mptcpi_add_addr_signal = READ_ONCE(msk->pm.add_addr_signaled); 930 info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted); 931 info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used); 932 933 if (inet_sk_state_load(sk) == TCP_LISTEN) 934 return; 935 936 /* The following limits only make sense for the in-kernel PM */ 937 if (mptcp_pm_is_kernel(msk)) { 938 info->mptcpi_subflows_max = 939 mptcp_pm_get_subflows_max(msk); 940 info->mptcpi_add_addr_signal_max = 941 mptcp_pm_get_add_addr_signal_max(msk); 942 info->mptcpi_add_addr_accepted_max = 943 mptcp_pm_get_add_addr_accept_max(msk); 944 info->mptcpi_local_addr_max = 945 mptcp_pm_get_local_addr_max(msk); 946 } 947 948 if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) 949 flags |= MPTCP_INFO_FLAG_FALLBACK; 950 if (READ_ONCE(msk->can_ack)) 951 flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; 952 info->mptcpi_flags = flags; 953 mptcp_data_lock(sk); 954 info->mptcpi_snd_una = msk->snd_una; 955 info->mptcpi_rcv_nxt = msk->ack_seq; 956 info->mptcpi_bytes_acked = msk->bytes_acked; 957 mptcp_data_unlock(sk); 958 959 slow = lock_sock_fast(sk); 960 info->mptcpi_csum_enabled = msk->csum_enabled; 961 info->mptcpi_token = msk->token; 962 info->mptcpi_write_seq = msk->write_seq; 963 info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits; 964 info->mptcpi_bytes_sent = msk->bytes_sent; 965 info->mptcpi_bytes_received = msk->bytes_received; 966 info->mptcpi_bytes_retrans = msk->bytes_retrans; 967 unlock_sock_fast(sk, slow); 968 } 969 EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); 970 971 static int mptcp_getsockopt_info(struct mptcp_sock *msk, char __user *optval, int __user *optlen) 972 { 973 struct mptcp_info m_info; 974 int len; 975 976 if (get_user(len, optlen)) 977 return -EFAULT; 978 979 len = min_t(unsigned int, len, sizeof(struct mptcp_info)); 980 981 mptcp_diag_fill_info(msk, &m_info); 982 983 if (put_user(len, optlen)) 984 return -EFAULT; 985 986 if (copy_to_user(optval, &m_info, len)) 987 return -EFAULT; 988 989 return 0; 990 } 991 992 static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd, 993 char __user *optval, 994 u32 copied, 995 int __user *optlen) 996 { 997 u32 copylen = min_t(u32, sfd->size_subflow_data, sizeof(*sfd)); 998 999 if (copied) 1000 copied += sfd->size_subflow_data; 1001 else 1002 copied = copylen; 1003 1004 if (put_user(copied, optlen)) 1005 return -EFAULT; 1006 1007 if (copy_to_user(optval, sfd, copylen)) 1008 return -EFAULT; 1009 1010 return 0; 1011 } 1012 1013 static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd, 1014 char __user *optval, 1015 int __user *optlen) 1016 { 1017 int len, copylen; 1018 1019 if (get_user(len, optlen)) 1020 return -EFAULT; 1021 1022 /* if mptcp_subflow_data size is changed, need to adjust 1023 * this function to deal with programs using old version. 1024 */ 1025 BUILD_BUG_ON(sizeof(*sfd) != MIN_INFO_OPTLEN_SIZE); 1026 1027 if (len < MIN_INFO_OPTLEN_SIZE) 1028 return -EINVAL; 1029 1030 memset(sfd, 0, sizeof(*sfd)); 1031 1032 copylen = min_t(unsigned int, len, sizeof(*sfd)); 1033 if (copy_from_user(sfd, optval, copylen)) 1034 return -EFAULT; 1035 1036 /* size_subflow_data is u32, but len is signed */ 1037 if (sfd->size_subflow_data > INT_MAX || 1038 sfd->size_user > INT_MAX) 1039 return -EINVAL; 1040 1041 if (sfd->size_subflow_data < MIN_INFO_OPTLEN_SIZE || 1042 sfd->size_subflow_data > len) 1043 return -EINVAL; 1044 1045 if (sfd->num_subflows || sfd->size_kernel) 1046 return -EINVAL; 1047 1048 return len - sfd->size_subflow_data; 1049 } 1050 1051 static int mptcp_getsockopt_tcpinfo(struct mptcp_sock *msk, char __user *optval, 1052 int __user *optlen) 1053 { 1054 struct mptcp_subflow_context *subflow; 1055 struct sock *sk = (struct sock *)msk; 1056 unsigned int sfcount = 0, copied = 0; 1057 struct mptcp_subflow_data sfd; 1058 char __user *infoptr; 1059 int len; 1060 1061 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1062 if (len < 0) 1063 return len; 1064 1065 sfd.size_kernel = sizeof(struct tcp_info); 1066 sfd.size_user = min_t(unsigned int, sfd.size_user, 1067 sizeof(struct tcp_info)); 1068 1069 infoptr = optval + sfd.size_subflow_data; 1070 1071 lock_sock(sk); 1072 1073 mptcp_for_each_subflow(msk, subflow) { 1074 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1075 1076 ++sfcount; 1077 1078 if (len && len >= sfd.size_user) { 1079 struct tcp_info info; 1080 1081 tcp_get_info(ssk, &info); 1082 1083 if (copy_to_user(infoptr, &info, sfd.size_user)) { 1084 release_sock(sk); 1085 return -EFAULT; 1086 } 1087 1088 infoptr += sfd.size_user; 1089 copied += sfd.size_user; 1090 len -= sfd.size_user; 1091 } 1092 } 1093 1094 release_sock(sk); 1095 1096 sfd.num_subflows = sfcount; 1097 1098 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1099 return -EFAULT; 1100 1101 return 0; 1102 } 1103 1104 static void mptcp_get_sub_addrs(const struct sock *sk, struct mptcp_subflow_addrs *a) 1105 { 1106 const struct inet_sock *inet = inet_sk(sk); 1107 1108 memset(a, 0, sizeof(*a)); 1109 1110 if (sk->sk_family == AF_INET) { 1111 a->sin_local.sin_family = AF_INET; 1112 a->sin_local.sin_port = inet->inet_sport; 1113 a->sin_local.sin_addr.s_addr = inet->inet_rcv_saddr; 1114 1115 if (!a->sin_local.sin_addr.s_addr) 1116 a->sin_local.sin_addr.s_addr = inet->inet_saddr; 1117 1118 a->sin_remote.sin_family = AF_INET; 1119 a->sin_remote.sin_port = inet->inet_dport; 1120 a->sin_remote.sin_addr.s_addr = inet->inet_daddr; 1121 #if IS_ENABLED(CONFIG_IPV6) 1122 } else if (sk->sk_family == AF_INET6) { 1123 const struct ipv6_pinfo *np = inet6_sk(sk); 1124 1125 if (WARN_ON_ONCE(!np)) 1126 return; 1127 1128 a->sin6_local.sin6_family = AF_INET6; 1129 a->sin6_local.sin6_port = inet->inet_sport; 1130 1131 if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 1132 a->sin6_local.sin6_addr = np->saddr; 1133 else 1134 a->sin6_local.sin6_addr = sk->sk_v6_rcv_saddr; 1135 1136 a->sin6_remote.sin6_family = AF_INET6; 1137 a->sin6_remote.sin6_port = inet->inet_dport; 1138 a->sin6_remote.sin6_addr = sk->sk_v6_daddr; 1139 #endif 1140 } 1141 } 1142 1143 static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *optval, 1144 int __user *optlen) 1145 { 1146 struct mptcp_subflow_context *subflow; 1147 struct sock *sk = (struct sock *)msk; 1148 unsigned int sfcount = 0, copied = 0; 1149 struct mptcp_subflow_data sfd; 1150 char __user *addrptr; 1151 int len; 1152 1153 len = mptcp_get_subflow_data(&sfd, optval, optlen); 1154 if (len < 0) 1155 return len; 1156 1157 sfd.size_kernel = sizeof(struct mptcp_subflow_addrs); 1158 sfd.size_user = min_t(unsigned int, sfd.size_user, 1159 sizeof(struct mptcp_subflow_addrs)); 1160 1161 addrptr = optval + sfd.size_subflow_data; 1162 1163 lock_sock(sk); 1164 1165 mptcp_for_each_subflow(msk, subflow) { 1166 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1167 1168 ++sfcount; 1169 1170 if (len && len >= sfd.size_user) { 1171 struct mptcp_subflow_addrs a; 1172 1173 mptcp_get_sub_addrs(ssk, &a); 1174 1175 if (copy_to_user(addrptr, &a, sfd.size_user)) { 1176 release_sock(sk); 1177 return -EFAULT; 1178 } 1179 1180 addrptr += sfd.size_user; 1181 copied += sfd.size_user; 1182 len -= sfd.size_user; 1183 } 1184 } 1185 1186 release_sock(sk); 1187 1188 sfd.num_subflows = sfcount; 1189 1190 if (mptcp_put_subflow_data(&sfd, optval, copied, optlen)) 1191 return -EFAULT; 1192 1193 return 0; 1194 } 1195 1196 static int mptcp_get_full_info(struct mptcp_full_info *mfi, 1197 char __user *optval, 1198 int __user *optlen) 1199 { 1200 int len; 1201 1202 BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) != 1203 MIN_FULL_INFO_OPTLEN_SIZE); 1204 1205 if (get_user(len, optlen)) 1206 return -EFAULT; 1207 1208 if (len < MIN_FULL_INFO_OPTLEN_SIZE) 1209 return -EINVAL; 1210 1211 memset(mfi, 0, sizeof(*mfi)); 1212 if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE)) 1213 return -EFAULT; 1214 1215 if (mfi->size_tcpinfo_kernel || 1216 mfi->size_sfinfo_kernel || 1217 mfi->num_subflows) 1218 return -EINVAL; 1219 1220 if (mfi->size_sfinfo_user > INT_MAX || 1221 mfi->size_tcpinfo_user > INT_MAX) 1222 return -EINVAL; 1223 1224 return len - MIN_FULL_INFO_OPTLEN_SIZE; 1225 } 1226 1227 static int mptcp_put_full_info(struct mptcp_full_info *mfi, 1228 char __user *optval, 1229 u32 copylen, 1230 int __user *optlen) 1231 { 1232 copylen += MIN_FULL_INFO_OPTLEN_SIZE; 1233 if (put_user(copylen, optlen)) 1234 return -EFAULT; 1235 1236 if (copy_to_user(optval, mfi, copylen)) 1237 return -EFAULT; 1238 return 0; 1239 } 1240 1241 static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval, 1242 int __user *optlen) 1243 { 1244 unsigned int sfcount = 0, copylen = 0; 1245 struct mptcp_subflow_context *subflow; 1246 struct sock *sk = (struct sock *)msk; 1247 void __user *tcpinfoptr, *sfinfoptr; 1248 struct mptcp_full_info mfi; 1249 int len; 1250 1251 len = mptcp_get_full_info(&mfi, optval, optlen); 1252 if (len < 0) 1253 return len; 1254 1255 /* don't bother filling the mptcp info if there is not enough 1256 * user-space-provided storage 1257 */ 1258 if (len > 0) { 1259 mptcp_diag_fill_info(msk, &mfi.mptcp_info); 1260 copylen += min_t(unsigned int, len, sizeof(struct mptcp_info)); 1261 } 1262 1263 mfi.size_tcpinfo_kernel = sizeof(struct tcp_info); 1264 mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user, 1265 sizeof(struct tcp_info)); 1266 sfinfoptr = u64_to_user_ptr(mfi.subflow_info); 1267 mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info); 1268 mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user, 1269 sizeof(struct mptcp_subflow_info)); 1270 tcpinfoptr = u64_to_user_ptr(mfi.tcp_info); 1271 1272 lock_sock(sk); 1273 mptcp_for_each_subflow(msk, subflow) { 1274 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1275 struct mptcp_subflow_info sfinfo; 1276 struct tcp_info tcp_info; 1277 1278 if (sfcount++ >= mfi.size_arrays_user) 1279 continue; 1280 1281 /* fetch addr/tcp_info only if the user space buffers 1282 * are wide enough 1283 */ 1284 memset(&sfinfo, 0, sizeof(sfinfo)); 1285 sfinfo.id = subflow->subflow_id; 1286 if (mfi.size_sfinfo_user > 1287 offsetof(struct mptcp_subflow_info, addrs)) 1288 mptcp_get_sub_addrs(ssk, &sfinfo.addrs); 1289 if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user)) 1290 goto fail_release; 1291 1292 if (mfi.size_tcpinfo_user) { 1293 tcp_get_info(ssk, &tcp_info); 1294 if (copy_to_user(tcpinfoptr, &tcp_info, 1295 mfi.size_tcpinfo_user)) 1296 goto fail_release; 1297 } 1298 1299 tcpinfoptr += mfi.size_tcpinfo_user; 1300 sfinfoptr += mfi.size_sfinfo_user; 1301 } 1302 release_sock(sk); 1303 1304 mfi.num_subflows = sfcount; 1305 if (mptcp_put_full_info(&mfi, optval, copylen, optlen)) 1306 return -EFAULT; 1307 1308 return 0; 1309 1310 fail_release: 1311 release_sock(sk); 1312 return -EFAULT; 1313 } 1314 1315 static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval, 1316 int __user *optlen, int val) 1317 { 1318 int len; 1319 1320 if (get_user(len, optlen)) 1321 return -EFAULT; 1322 if (len < 0) 1323 return -EINVAL; 1324 1325 if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) { 1326 unsigned char ucval = (unsigned char)val; 1327 1328 len = 1; 1329 if (put_user(len, optlen)) 1330 return -EFAULT; 1331 if (copy_to_user(optval, &ucval, 1)) 1332 return -EFAULT; 1333 } else { 1334 len = min_t(unsigned int, len, sizeof(int)); 1335 if (put_user(len, optlen)) 1336 return -EFAULT; 1337 if (copy_to_user(optval, &val, len)) 1338 return -EFAULT; 1339 } 1340 1341 return 0; 1342 } 1343 1344 static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, 1345 char __user *optval, int __user *optlen) 1346 { 1347 struct sock *sk = (void *)msk; 1348 1349 switch (optname) { 1350 case TCP_ULP: 1351 case TCP_CONGESTION: 1352 case TCP_INFO: 1353 case TCP_CC_INFO: 1354 case TCP_DEFER_ACCEPT: 1355 case TCP_FASTOPEN: 1356 case TCP_FASTOPEN_CONNECT: 1357 case TCP_FASTOPEN_KEY: 1358 case TCP_FASTOPEN_NO_COOKIE: 1359 return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, 1360 optval, optlen); 1361 case TCP_INQ: 1362 return mptcp_put_int_option(msk, optval, optlen, msk->recvmsg_inq); 1363 case TCP_CORK: 1364 return mptcp_put_int_option(msk, optval, optlen, msk->cork); 1365 case TCP_NODELAY: 1366 return mptcp_put_int_option(msk, optval, optlen, msk->nodelay); 1367 case TCP_KEEPIDLE: 1368 return mptcp_put_int_option(msk, optval, optlen, 1369 msk->keepalive_idle ? : 1370 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_time) / HZ); 1371 case TCP_KEEPINTVL: 1372 return mptcp_put_int_option(msk, optval, optlen, 1373 msk->keepalive_intvl ? : 1374 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_intvl) / HZ); 1375 case TCP_KEEPCNT: 1376 return mptcp_put_int_option(msk, optval, optlen, 1377 msk->keepalive_cnt ? : 1378 READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_keepalive_probes)); 1379 } 1380 return -EOPNOTSUPP; 1381 } 1382 1383 static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname, 1384 char __user *optval, int __user *optlen) 1385 { 1386 struct sock *sk = (void *)msk; 1387 1388 switch (optname) { 1389 case IP_TOS: 1390 return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos); 1391 } 1392 1393 return -EOPNOTSUPP; 1394 } 1395 1396 static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname, 1397 char __user *optval, int __user *optlen) 1398 { 1399 switch (optname) { 1400 case MPTCP_INFO: 1401 return mptcp_getsockopt_info(msk, optval, optlen); 1402 case MPTCP_FULL_INFO: 1403 return mptcp_getsockopt_full_info(msk, optval, optlen); 1404 case MPTCP_TCPINFO: 1405 return mptcp_getsockopt_tcpinfo(msk, optval, optlen); 1406 case MPTCP_SUBFLOW_ADDRS: 1407 return mptcp_getsockopt_subflow_addrs(msk, optval, optlen); 1408 } 1409 1410 return -EOPNOTSUPP; 1411 } 1412 1413 int mptcp_getsockopt(struct sock *sk, int level, int optname, 1414 char __user *optval, int __user *option) 1415 { 1416 struct mptcp_sock *msk = mptcp_sk(sk); 1417 struct sock *ssk; 1418 1419 pr_debug("msk=%p\n", msk); 1420 1421 /* @@ the meaning of setsockopt() when the socket is connected and 1422 * there are multiple subflows is not yet defined. It is up to the 1423 * MPTCP-level socket to configure the subflows until the subflow 1424 * is in TCP fallback, when socket options are passed through 1425 * to the one remaining subflow. 1426 */ 1427 lock_sock(sk); 1428 ssk = __mptcp_tcp_fallback(msk); 1429 release_sock(sk); 1430 if (ssk) 1431 return tcp_getsockopt(ssk, level, optname, optval, option); 1432 1433 if (level == SOL_IP) 1434 return mptcp_getsockopt_v4(msk, optname, optval, option); 1435 if (level == SOL_TCP) 1436 return mptcp_getsockopt_sol_tcp(msk, optname, optval, option); 1437 if (level == SOL_MPTCP) 1438 return mptcp_getsockopt_sol_mptcp(msk, optname, optval, option); 1439 return -EOPNOTSUPP; 1440 } 1441 1442 static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) 1443 { 1444 static const unsigned int tx_rx_locks = SOCK_RCVBUF_LOCK | SOCK_SNDBUF_LOCK; 1445 struct sock *sk = (struct sock *)msk; 1446 1447 if (ssk->sk_prot->keepalive) { 1448 if (sock_flag(sk, SOCK_KEEPOPEN)) 1449 ssk->sk_prot->keepalive(ssk, 1); 1450 else 1451 ssk->sk_prot->keepalive(ssk, 0); 1452 } 1453 1454 ssk->sk_priority = sk->sk_priority; 1455 ssk->sk_bound_dev_if = sk->sk_bound_dev_if; 1456 ssk->sk_incoming_cpu = sk->sk_incoming_cpu; 1457 ssk->sk_ipv6only = sk->sk_ipv6only; 1458 __ip_sock_set_tos(ssk, inet_sk(sk)->tos); 1459 1460 if (sk->sk_userlocks & tx_rx_locks) { 1461 ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks; 1462 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK) { 1463 WRITE_ONCE(ssk->sk_sndbuf, sk->sk_sndbuf); 1464 mptcp_subflow_ctx(ssk)->cached_sndbuf = sk->sk_sndbuf; 1465 } 1466 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1467 WRITE_ONCE(ssk->sk_rcvbuf, sk->sk_rcvbuf); 1468 } 1469 1470 if (sock_flag(sk, SOCK_LINGER)) { 1471 ssk->sk_lingertime = sk->sk_lingertime; 1472 sock_set_flag(ssk, SOCK_LINGER); 1473 } else { 1474 sock_reset_flag(ssk, SOCK_LINGER); 1475 } 1476 1477 if (sk->sk_mark != ssk->sk_mark) { 1478 ssk->sk_mark = sk->sk_mark; 1479 sk_dst_reset(ssk); 1480 } 1481 1482 sock_valbool_flag(ssk, SOCK_DBG, sock_flag(sk, SOCK_DBG)); 1483 1484 if (inet_csk(sk)->icsk_ca_ops != inet_csk(ssk)->icsk_ca_ops) 1485 tcp_set_congestion_control(ssk, msk->ca_name, false, true); 1486 __tcp_sock_set_cork(ssk, !!msk->cork); 1487 __tcp_sock_set_nodelay(ssk, !!msk->nodelay); 1488 tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); 1489 tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); 1490 tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); 1491 1492 inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); 1493 inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); 1494 } 1495 1496 static void __mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1497 { 1498 bool slow = lock_sock_fast(ssk); 1499 1500 sync_socket_options(msk, ssk); 1501 1502 unlock_sock_fast(ssk, slow); 1503 } 1504 1505 void mptcp_sockopt_sync(struct mptcp_sock *msk, struct sock *ssk) 1506 { 1507 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1508 1509 msk_owned_by_me(msk); 1510 1511 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1512 __mptcp_sockopt_sync(msk, ssk); 1513 1514 subflow->setsockopt_seq = msk->setsockopt_seq; 1515 } 1516 } 1517 1518 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) 1519 { 1520 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 1521 1522 msk_owned_by_me(msk); 1523 1524 ssk->sk_rcvlowat = 0; 1525 1526 if (READ_ONCE(subflow->setsockopt_seq) != msk->setsockopt_seq) { 1527 sync_socket_options(msk, ssk); 1528 1529 subflow->setsockopt_seq = msk->setsockopt_seq; 1530 } 1531 } 1532 1533 /* unfortunately this is different enough from the tcp version so 1534 * that we can't factor it out 1535 */ 1536 int mptcp_set_rcvlowat(struct sock *sk, int val) 1537 { 1538 struct mptcp_subflow_context *subflow; 1539 int space, cap; 1540 1541 /* bpf can land here with a wrong sk type */ 1542 if (sk->sk_protocol == IPPROTO_TCP) 1543 return -EINVAL; 1544 1545 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1546 cap = sk->sk_rcvbuf >> 1; 1547 else 1548 cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; 1549 val = min(val, cap); 1550 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); 1551 1552 /* Check if we need to signal EPOLLIN right now */ 1553 if (mptcp_epollin_ready(sk)) 1554 sk->sk_data_ready(sk); 1555 1556 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) 1557 return 0; 1558 1559 space = __tcp_space_from_win(mptcp_sk(sk)->scaling_ratio, val); 1560 if (space <= sk->sk_rcvbuf) 1561 return 0; 1562 1563 /* propagate the rcvbuf changes to all the subflows */ 1564 WRITE_ONCE(sk->sk_rcvbuf, space); 1565 mptcp_for_each_subflow(mptcp_sk(sk), subflow) { 1566 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 1567 bool slow; 1568 1569 slow = lock_sock_fast(ssk); 1570 WRITE_ONCE(ssk->sk_rcvbuf, space); 1571 WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); 1572 unlock_sock_fast(ssk, slow); 1573 } 1574 return 0; 1575 } 1576