1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2020, Red Hat, Inc. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/inet.h> 10 #include <linux/kernel.h> 11 #include <net/tcp.h> 12 #include <net/netns/generic.h> 13 #include <net/mptcp.h> 14 #include <net/genetlink.h> 15 #include <uapi/linux/mptcp.h> 16 17 #include "protocol.h" 18 #include "mib.h" 19 20 /* forward declaration */ 21 static struct genl_family mptcp_genl_family; 22 23 static int pm_nl_pernet_id; 24 25 struct mptcp_pm_add_entry { 26 struct list_head list; 27 struct mptcp_addr_info addr; 28 struct timer_list add_timer; 29 struct mptcp_sock *sock; 30 u8 retrans_times; 31 }; 32 33 struct pm_nl_pernet { 34 /* protects pernet updates */ 35 spinlock_t lock; 36 struct list_head local_addr_list; 37 unsigned int addrs; 38 unsigned int stale_loss_cnt; 39 unsigned int add_addr_signal_max; 40 unsigned int add_addr_accept_max; 41 unsigned int local_addr_max; 42 unsigned int subflows_max; 43 unsigned int next_id; 44 DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 45 }; 46 47 #define MPTCP_PM_ADDR_MAX 8 48 #define ADD_ADDR_RETRANS_MAX 3 49 50 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 51 { 52 return net_generic(net, pm_nl_pernet_id); 53 } 54 55 static struct pm_nl_pernet * 56 pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 57 { 58 return pm_nl_get_pernet(sock_net((struct sock *)msk)); 59 } 60 61 bool mptcp_addresses_equal(const struct mptcp_addr_info *a, 62 const struct mptcp_addr_info *b, bool use_port) 63 { 64 bool addr_equals = false; 65 66 if (a->family == b->family) { 67 if (a->family == AF_INET) 68 addr_equals = a->addr.s_addr == b->addr.s_addr; 69 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 70 else 71 addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); 72 } else if (a->family == AF_INET) { 73 if (ipv6_addr_v4mapped(&b->addr6)) 74 addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; 75 } else if (b->family == AF_INET) { 76 if (ipv6_addr_v4mapped(&a->addr6)) 77 addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; 78 #endif 79 } 80 81 if (!addr_equals) 82 return false; 83 if (!use_port) 84 return true; 85 86 return a->port == b->port; 87 } 88 89 static void local_address(const struct sock_common *skc, 90 struct mptcp_addr_info *addr) 91 { 92 addr->family = skc->skc_family; 93 addr->port = htons(skc->skc_num); 94 if (addr->family == AF_INET) 95 addr->addr.s_addr = skc->skc_rcv_saddr; 96 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 97 else if (addr->family == AF_INET6) 98 addr->addr6 = skc->skc_v6_rcv_saddr; 99 #endif 100 } 101 102 static void remote_address(const struct sock_common *skc, 103 struct mptcp_addr_info *addr) 104 { 105 addr->family = skc->skc_family; 106 addr->port = skc->skc_dport; 107 if (addr->family == AF_INET) 108 addr->addr.s_addr = skc->skc_daddr; 109 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 110 else if (addr->family == AF_INET6) 111 addr->addr6 = skc->skc_v6_daddr; 112 #endif 113 } 114 115 static bool lookup_subflow_by_saddr(const struct list_head *list, 116 const struct mptcp_addr_info *saddr) 117 { 118 struct mptcp_subflow_context *subflow; 119 struct mptcp_addr_info cur; 120 struct sock_common *skc; 121 122 list_for_each_entry(subflow, list, node) { 123 skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 124 125 local_address(skc, &cur); 126 if (mptcp_addresses_equal(&cur, saddr, saddr->port)) 127 return true; 128 } 129 130 return false; 131 } 132 133 static bool lookup_subflow_by_daddr(const struct list_head *list, 134 const struct mptcp_addr_info *daddr) 135 { 136 struct mptcp_subflow_context *subflow; 137 struct mptcp_addr_info cur; 138 struct sock_common *skc; 139 140 list_for_each_entry(subflow, list, node) { 141 skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 142 143 remote_address(skc, &cur); 144 if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 145 return true; 146 } 147 148 return false; 149 } 150 151 static struct mptcp_pm_addr_entry * 152 select_local_address(const struct pm_nl_pernet *pernet, 153 const struct mptcp_sock *msk) 154 { 155 const struct sock *sk = (const struct sock *)msk; 156 struct mptcp_pm_addr_entry *entry, *ret = NULL; 157 158 msk_owned_by_me(msk); 159 160 rcu_read_lock(); 161 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 162 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 163 continue; 164 165 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 166 continue; 167 168 if (entry->addr.family != sk->sk_family) { 169 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 170 if ((entry->addr.family == AF_INET && 171 !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || 172 (sk->sk_family == AF_INET && 173 !ipv6_addr_v4mapped(&entry->addr.addr6))) 174 #endif 175 continue; 176 } 177 178 ret = entry; 179 break; 180 } 181 rcu_read_unlock(); 182 return ret; 183 } 184 185 static struct mptcp_pm_addr_entry * 186 select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) 187 { 188 struct mptcp_pm_addr_entry *entry, *ret = NULL; 189 190 rcu_read_lock(); 191 /* do not keep any additional per socket state, just signal 192 * the address list in order. 193 * Note: removal from the local address list during the msk life-cycle 194 * can lead to additional addresses not being announced. 195 */ 196 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 197 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 198 continue; 199 200 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 201 continue; 202 203 ret = entry; 204 break; 205 } 206 rcu_read_unlock(); 207 return ret; 208 } 209 210 unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) 211 { 212 const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 213 214 return READ_ONCE(pernet->add_addr_signal_max); 215 } 216 EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); 217 218 unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) 219 { 220 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 221 222 return READ_ONCE(pernet->add_addr_accept_max); 223 } 224 EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); 225 226 unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) 227 { 228 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 229 230 return READ_ONCE(pernet->subflows_max); 231 } 232 EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); 233 234 unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) 235 { 236 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 237 238 return READ_ONCE(pernet->local_addr_max); 239 } 240 EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); 241 242 bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 243 { 244 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 245 246 if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || 247 (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 248 MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 249 WRITE_ONCE(msk->pm.work_pending, false); 250 return false; 251 } 252 return true; 253 } 254 255 struct mptcp_pm_add_entry * 256 mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, 257 const struct mptcp_addr_info *addr) 258 { 259 struct mptcp_pm_add_entry *entry; 260 261 lockdep_assert_held(&msk->pm.lock); 262 263 list_for_each_entry(entry, &msk->pm.anno_list, list) { 264 if (mptcp_addresses_equal(&entry->addr, addr, true)) 265 return entry; 266 } 267 268 return NULL; 269 } 270 271 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) 272 { 273 struct mptcp_pm_add_entry *entry; 274 struct mptcp_addr_info saddr; 275 bool ret = false; 276 277 local_address((struct sock_common *)sk, &saddr); 278 279 spin_lock_bh(&msk->pm.lock); 280 list_for_each_entry(entry, &msk->pm.anno_list, list) { 281 if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { 282 ret = true; 283 goto out; 284 } 285 } 286 287 out: 288 spin_unlock_bh(&msk->pm.lock); 289 return ret; 290 } 291 292 static void mptcp_pm_add_timer(struct timer_list *timer) 293 { 294 struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); 295 struct mptcp_sock *msk = entry->sock; 296 struct sock *sk = (struct sock *)msk; 297 298 pr_debug("msk=%p", msk); 299 300 if (!msk) 301 return; 302 303 if (inet_sk_state_load(sk) == TCP_CLOSE) 304 return; 305 306 if (!entry->addr.id) 307 return; 308 309 if (mptcp_pm_should_add_signal_addr(msk)) { 310 sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); 311 goto out; 312 } 313 314 spin_lock_bh(&msk->pm.lock); 315 316 if (!mptcp_pm_should_add_signal_addr(msk)) { 317 pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); 318 mptcp_pm_announce_addr(msk, &entry->addr, false); 319 mptcp_pm_add_addr_send_ack(msk); 320 entry->retrans_times++; 321 } 322 323 if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) 324 sk_reset_timer(sk, timer, 325 jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); 326 327 spin_unlock_bh(&msk->pm.lock); 328 329 if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) 330 mptcp_pm_subflow_established(msk); 331 332 out: 333 __sock_put(sk); 334 } 335 336 struct mptcp_pm_add_entry * 337 mptcp_pm_del_add_timer(struct mptcp_sock *msk, 338 const struct mptcp_addr_info *addr, bool check_id) 339 { 340 struct mptcp_pm_add_entry *entry; 341 struct sock *sk = (struct sock *)msk; 342 343 spin_lock_bh(&msk->pm.lock); 344 entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 345 if (entry && (!check_id || entry->addr.id == addr->id)) 346 entry->retrans_times = ADD_ADDR_RETRANS_MAX; 347 spin_unlock_bh(&msk->pm.lock); 348 349 if (entry && (!check_id || entry->addr.id == addr->id)) 350 sk_stop_timer_sync(sk, &entry->add_timer); 351 352 return entry; 353 } 354 355 bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 356 const struct mptcp_pm_addr_entry *entry) 357 { 358 struct mptcp_pm_add_entry *add_entry = NULL; 359 struct sock *sk = (struct sock *)msk; 360 struct net *net = sock_net(sk); 361 362 lockdep_assert_held(&msk->pm.lock); 363 364 add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); 365 366 if (add_entry) { 367 if (mptcp_pm_is_kernel(msk)) 368 return false; 369 370 sk_reset_timer(sk, &add_entry->add_timer, 371 jiffies + mptcp_get_add_addr_timeout(net)); 372 return true; 373 } 374 375 add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); 376 if (!add_entry) 377 return false; 378 379 list_add(&add_entry->list, &msk->pm.anno_list); 380 381 add_entry->addr = entry->addr; 382 add_entry->sock = msk; 383 add_entry->retrans_times = 0; 384 385 timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); 386 sk_reset_timer(sk, &add_entry->add_timer, 387 jiffies + mptcp_get_add_addr_timeout(net)); 388 389 return true; 390 } 391 392 void mptcp_pm_free_anno_list(struct mptcp_sock *msk) 393 { 394 struct mptcp_pm_add_entry *entry, *tmp; 395 struct sock *sk = (struct sock *)msk; 396 LIST_HEAD(free_list); 397 398 pr_debug("msk=%p", msk); 399 400 spin_lock_bh(&msk->pm.lock); 401 list_splice_init(&msk->pm.anno_list, &free_list); 402 spin_unlock_bh(&msk->pm.lock); 403 404 list_for_each_entry_safe(entry, tmp, &free_list, list) { 405 sk_stop_timer_sync(sk, &entry->add_timer); 406 kfree(entry); 407 } 408 } 409 410 static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr, 411 const struct mptcp_addr_info *addr) 412 { 413 int i; 414 415 for (i = 0; i < nr; i++) { 416 if (mptcp_addresses_equal(&addrs[i], addr, addr->port)) 417 return true; 418 } 419 420 return false; 421 } 422 423 /* Fill all the remote addresses into the array addrs[], 424 * and return the array size. 425 */ 426 static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh, 427 struct mptcp_addr_info *addrs) 428 { 429 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 430 struct sock *sk = (struct sock *)msk, *ssk; 431 struct mptcp_subflow_context *subflow; 432 struct mptcp_addr_info remote = { 0 }; 433 unsigned int subflows_max; 434 int i = 0; 435 436 subflows_max = mptcp_pm_get_subflows_max(msk); 437 remote_address((struct sock_common *)sk, &remote); 438 439 /* Non-fullmesh endpoint, fill in the single entry 440 * corresponding to the primary MPC subflow remote address 441 */ 442 if (!fullmesh) { 443 if (deny_id0) 444 return 0; 445 446 msk->pm.subflows++; 447 addrs[i++] = remote; 448 } else { 449 mptcp_for_each_subflow(msk, subflow) { 450 ssk = mptcp_subflow_tcp_sock(subflow); 451 remote_address((struct sock_common *)ssk, &addrs[i]); 452 if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false)) 453 continue; 454 455 if (!lookup_address_in_vec(addrs, i, &addrs[i]) && 456 msk->pm.subflows < subflows_max) { 457 msk->pm.subflows++; 458 i++; 459 } 460 } 461 } 462 463 return i; 464 } 465 466 static struct mptcp_pm_addr_entry * 467 __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 468 { 469 struct mptcp_pm_addr_entry *entry; 470 471 list_for_each_entry(entry, &pernet->local_addr_list, list) { 472 if (entry->addr.id == id) 473 return entry; 474 } 475 return NULL; 476 } 477 478 static struct mptcp_pm_addr_entry * 479 __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, 480 bool lookup_by_id) 481 { 482 struct mptcp_pm_addr_entry *entry; 483 484 list_for_each_entry(entry, &pernet->local_addr_list, list) { 485 if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) || 486 (lookup_by_id && entry->addr.id == info->id)) 487 return entry; 488 } 489 return NULL; 490 } 491 492 static int 493 lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) 494 { 495 const struct mptcp_pm_addr_entry *entry; 496 int ret = -1; 497 498 rcu_read_lock(); 499 list_for_each_entry(entry, &pernet->local_addr_list, list) { 500 if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) { 501 ret = entry->addr.id; 502 break; 503 } 504 } 505 rcu_read_unlock(); 506 return ret; 507 } 508 509 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 510 { 511 struct sock *sk = (struct sock *)msk; 512 struct mptcp_pm_addr_entry *local; 513 unsigned int add_addr_signal_max; 514 unsigned int local_addr_max; 515 struct pm_nl_pernet *pernet; 516 unsigned int subflows_max; 517 518 pernet = pm_nl_get_pernet(sock_net(sk)); 519 520 add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); 521 local_addr_max = mptcp_pm_get_local_addr_max(msk); 522 subflows_max = mptcp_pm_get_subflows_max(msk); 523 524 /* do lazy endpoint usage accounting for the MPC subflows */ 525 if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { 526 struct mptcp_addr_info mpc_addr; 527 int mpc_id; 528 529 local_address((struct sock_common *)msk->first, &mpc_addr); 530 mpc_id = lookup_id_by_addr(pernet, &mpc_addr); 531 if (mpc_id >= 0) 532 __clear_bit(mpc_id, msk->pm.id_avail_bitmap); 533 534 msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 535 } 536 537 pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 538 msk->pm.local_addr_used, local_addr_max, 539 msk->pm.add_addr_signaled, add_addr_signal_max, 540 msk->pm.subflows, subflows_max); 541 542 /* check first for announce */ 543 if (msk->pm.add_addr_signaled < add_addr_signal_max) { 544 local = select_signal_address(pernet, msk); 545 546 /* due to racing events on both ends we can reach here while 547 * previous add address is still running: if we invoke now 548 * mptcp_pm_announce_addr(), that will fail and the 549 * corresponding id will be marked as used. 550 * Instead let the PM machinery reschedule us when the 551 * current address announce will be completed. 552 */ 553 if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 554 return; 555 556 if (local) { 557 if (mptcp_pm_alloc_anno_list(msk, local)) { 558 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 559 msk->pm.add_addr_signaled++; 560 mptcp_pm_announce_addr(msk, &local->addr, false); 561 mptcp_pm_nl_addr_send_ack(msk); 562 } 563 } 564 } 565 566 /* check if should create a new subflow */ 567 while (msk->pm.local_addr_used < local_addr_max && 568 msk->pm.subflows < subflows_max) { 569 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 570 bool fullmesh; 571 int i, nr; 572 573 local = select_local_address(pernet, msk); 574 if (!local) 575 break; 576 577 fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 578 579 msk->pm.local_addr_used++; 580 nr = fill_remote_addresses_vec(msk, fullmesh, addrs); 581 if (nr) 582 __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 583 spin_unlock_bh(&msk->pm.lock); 584 for (i = 0; i < nr; i++) 585 __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); 586 spin_lock_bh(&msk->pm.lock); 587 } 588 mptcp_pm_nl_check_work_pending(msk); 589 } 590 591 static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 592 { 593 mptcp_pm_create_subflow_or_signal_addr(msk); 594 } 595 596 static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 597 { 598 mptcp_pm_create_subflow_or_signal_addr(msk); 599 } 600 601 /* Fill all the local addresses into the array addrs[], 602 * and return the array size. 603 */ 604 static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, 605 struct mptcp_addr_info *addrs) 606 { 607 struct sock *sk = (struct sock *)msk; 608 struct mptcp_pm_addr_entry *entry; 609 struct mptcp_addr_info local; 610 struct pm_nl_pernet *pernet; 611 unsigned int subflows_max; 612 int i = 0; 613 614 pernet = pm_nl_get_pernet_from_msk(msk); 615 subflows_max = mptcp_pm_get_subflows_max(msk); 616 617 rcu_read_lock(); 618 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 619 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 620 continue; 621 622 if (entry->addr.family != sk->sk_family) { 623 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 624 if ((entry->addr.family == AF_INET && 625 !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) || 626 (sk->sk_family == AF_INET && 627 !ipv6_addr_v4mapped(&entry->addr.addr6))) 628 #endif 629 continue; 630 } 631 632 if (msk->pm.subflows < subflows_max) { 633 msk->pm.subflows++; 634 addrs[i++] = entry->addr; 635 } 636 } 637 rcu_read_unlock(); 638 639 /* If the array is empty, fill in the single 640 * 'IPADDRANY' local address 641 */ 642 if (!i) { 643 memset(&local, 0, sizeof(local)); 644 local.family = msk->pm.remote.family; 645 646 msk->pm.subflows++; 647 addrs[i++] = local; 648 } 649 650 return i; 651 } 652 653 static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 654 { 655 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 656 struct sock *sk = (struct sock *)msk; 657 unsigned int add_addr_accept_max; 658 struct mptcp_addr_info remote; 659 unsigned int subflows_max; 660 int i, nr; 661 662 add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); 663 subflows_max = mptcp_pm_get_subflows_max(msk); 664 665 pr_debug("accepted %d:%d remote family %d", 666 msk->pm.add_addr_accepted, add_addr_accept_max, 667 msk->pm.remote.family); 668 669 remote = msk->pm.remote; 670 mptcp_pm_announce_addr(msk, &remote, true); 671 mptcp_pm_nl_addr_send_ack(msk); 672 673 if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 674 return; 675 676 /* pick id 0 port, if none is provided the remote address */ 677 if (!remote.port) 678 remote.port = sk->sk_dport; 679 680 /* connect to the specified remote address, using whatever 681 * local address the routing configuration will pick. 682 */ 683 nr = fill_local_addresses_vec(msk, addrs); 684 685 msk->pm.add_addr_accepted++; 686 if (msk->pm.add_addr_accepted >= add_addr_accept_max || 687 msk->pm.subflows >= subflows_max) 688 WRITE_ONCE(msk->pm.accept_addr, false); 689 690 spin_unlock_bh(&msk->pm.lock); 691 for (i = 0; i < nr; i++) 692 __mptcp_subflow_connect(sk, &addrs[i], &remote); 693 spin_lock_bh(&msk->pm.lock); 694 } 695 696 void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) 697 { 698 struct mptcp_subflow_context *subflow; 699 700 msk_owned_by_me(msk); 701 lockdep_assert_held(&msk->pm.lock); 702 703 if (!mptcp_pm_should_add_signal(msk) && 704 !mptcp_pm_should_rm_signal(msk)) 705 return; 706 707 subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); 708 if (subflow) { 709 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 710 711 spin_unlock_bh(&msk->pm.lock); 712 pr_debug("send ack for %s", 713 mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); 714 715 mptcp_subflow_send_ack(ssk); 716 spin_lock_bh(&msk->pm.lock); 717 } 718 } 719 720 static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, 721 struct mptcp_addr_info *addr, 722 u8 bkup) 723 { 724 struct mptcp_subflow_context *subflow; 725 726 pr_debug("bkup=%d", bkup); 727 728 mptcp_for_each_subflow(msk, subflow) { 729 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 730 struct sock *sk = (struct sock *)msk; 731 struct mptcp_addr_info local; 732 733 local_address((struct sock_common *)ssk, &local); 734 if (!mptcp_addresses_equal(&local, addr, addr->port)) 735 continue; 736 737 if (subflow->backup != bkup) 738 msk->last_snd = NULL; 739 subflow->backup = bkup; 740 subflow->send_mp_prio = 1; 741 subflow->request_bkup = bkup; 742 __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX); 743 744 spin_unlock_bh(&msk->pm.lock); 745 pr_debug("send ack for mp_prio"); 746 mptcp_subflow_send_ack(ssk); 747 spin_lock_bh(&msk->pm.lock); 748 749 return 0; 750 } 751 752 return -EINVAL; 753 } 754 755 static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, 756 const struct mptcp_rm_list *rm_list, 757 enum linux_mptcp_mib_field rm_type) 758 { 759 struct mptcp_subflow_context *subflow, *tmp; 760 struct sock *sk = (struct sock *)msk; 761 u8 i; 762 763 pr_debug("%s rm_list_nr %d", 764 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); 765 766 msk_owned_by_me(msk); 767 768 if (sk->sk_state == TCP_LISTEN) 769 return; 770 771 if (!rm_list->nr) 772 return; 773 774 if (list_empty(&msk->conn_list)) 775 return; 776 777 for (i = 0; i < rm_list->nr; i++) { 778 bool removed = false; 779 780 list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { 781 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 782 int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 783 u8 id = subflow->local_id; 784 785 if (rm_type == MPTCP_MIB_RMADDR) 786 id = subflow->remote_id; 787 788 if (rm_list->ids[i] != id) 789 continue; 790 791 pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", 792 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 793 i, rm_list->ids[i], subflow->local_id, subflow->remote_id); 794 spin_unlock_bh(&msk->pm.lock); 795 mptcp_subflow_shutdown(sk, ssk, how); 796 797 /* the following takes care of updating the subflows counter */ 798 mptcp_close_ssk(sk, ssk, subflow); 799 spin_lock_bh(&msk->pm.lock); 800 801 removed = true; 802 __MPTCP_INC_STATS(sock_net(sk), rm_type); 803 } 804 __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); 805 if (!removed) 806 continue; 807 808 if (!mptcp_pm_is_kernel(msk)) 809 continue; 810 811 if (rm_type == MPTCP_MIB_RMADDR) { 812 msk->pm.add_addr_accepted--; 813 WRITE_ONCE(msk->pm.accept_addr, true); 814 } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { 815 msk->pm.local_addr_used--; 816 } 817 } 818 } 819 820 static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) 821 { 822 mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); 823 } 824 825 void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, 826 const struct mptcp_rm_list *rm_list) 827 { 828 mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); 829 } 830 831 void mptcp_pm_nl_work(struct mptcp_sock *msk) 832 { 833 struct mptcp_pm_data *pm = &msk->pm; 834 835 msk_owned_by_me(msk); 836 837 if (!(pm->status & MPTCP_PM_WORK_MASK)) 838 return; 839 840 spin_lock_bh(&msk->pm.lock); 841 842 pr_debug("msk=%p status=%x", msk, pm->status); 843 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 844 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 845 mptcp_pm_nl_add_addr_received(msk); 846 } 847 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { 848 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); 849 mptcp_pm_nl_addr_send_ack(msk); 850 } 851 if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { 852 pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); 853 mptcp_pm_nl_rm_addr_received(msk); 854 } 855 if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 856 pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 857 mptcp_pm_nl_fully_established(msk); 858 } 859 if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 860 pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 861 mptcp_pm_nl_subflow_established(msk); 862 } 863 864 spin_unlock_bh(&msk->pm.lock); 865 } 866 867 static bool address_use_port(struct mptcp_pm_addr_entry *entry) 868 { 869 return (entry->flags & 870 (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 871 MPTCP_PM_ADDR_FLAG_SIGNAL; 872 } 873 874 /* caller must ensure the RCU grace period is already elapsed */ 875 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 876 { 877 if (entry->lsk) 878 sock_release(entry->lsk); 879 kfree(entry); 880 } 881 882 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 883 struct mptcp_pm_addr_entry *entry) 884 { 885 struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 886 unsigned int addr_max; 887 int ret = -EINVAL; 888 889 spin_lock_bh(&pernet->lock); 890 /* to keep the code simple, don't do IDR-like allocation for address ID, 891 * just bail when we exceed limits 892 */ 893 if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 894 pernet->next_id = 1; 895 if (pernet->addrs >= MPTCP_PM_ADDR_MAX) 896 goto out; 897 if (test_bit(entry->addr.id, pernet->id_bitmap)) 898 goto out; 899 900 /* do not insert duplicate address, differentiate on port only 901 * singled addresses 902 */ 903 list_for_each_entry(cur, &pernet->local_addr_list, list) { 904 if (mptcp_addresses_equal(&cur->addr, &entry->addr, 905 address_use_port(entry) && 906 address_use_port(cur))) { 907 /* allow replacing the exiting endpoint only if such 908 * endpoint is an implicit one and the user-space 909 * did not provide an endpoint id 910 */ 911 if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) 912 goto out; 913 if (entry->addr.id) 914 goto out; 915 916 pernet->addrs--; 917 entry->addr.id = cur->addr.id; 918 list_del_rcu(&cur->list); 919 del_entry = cur; 920 break; 921 } 922 } 923 924 if (!entry->addr.id) { 925 find_next: 926 entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 927 MPTCP_PM_MAX_ADDR_ID + 1, 928 pernet->next_id); 929 if (!entry->addr.id && pernet->next_id != 1) { 930 pernet->next_id = 1; 931 goto find_next; 932 } 933 } 934 935 if (!entry->addr.id) 936 goto out; 937 938 __set_bit(entry->addr.id, pernet->id_bitmap); 939 if (entry->addr.id > pernet->next_id) 940 pernet->next_id = entry->addr.id; 941 942 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 943 addr_max = pernet->add_addr_signal_max; 944 WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); 945 } 946 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 947 addr_max = pernet->local_addr_max; 948 WRITE_ONCE(pernet->local_addr_max, addr_max + 1); 949 } 950 951 pernet->addrs++; 952 list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 953 ret = entry->addr.id; 954 955 out: 956 spin_unlock_bh(&pernet->lock); 957 958 /* just replaced an existing entry, free it */ 959 if (del_entry) { 960 synchronize_rcu(); 961 __mptcp_pm_release_addr_entry(del_entry); 962 } 963 return ret; 964 } 965 966 static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 967 struct mptcp_pm_addr_entry *entry) 968 { 969 int addrlen = sizeof(struct sockaddr_in); 970 struct sockaddr_storage addr; 971 struct mptcp_sock *msk; 972 struct socket *ssock; 973 int backlog = 1024; 974 int err; 975 976 err = sock_create_kern(sock_net(sk), entry->addr.family, 977 SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 978 if (err) 979 return err; 980 981 msk = mptcp_sk(entry->lsk->sk); 982 if (!msk) { 983 err = -EINVAL; 984 goto out; 985 } 986 987 ssock = __mptcp_nmpc_socket(msk); 988 if (!ssock) { 989 err = -EINVAL; 990 goto out; 991 } 992 993 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 994 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 995 if (entry->addr.family == AF_INET6) 996 addrlen = sizeof(struct sockaddr_in6); 997 #endif 998 err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen); 999 if (err) { 1000 pr_warn("kernel_bind error, err=%d", err); 1001 goto out; 1002 } 1003 1004 err = kernel_listen(ssock, backlog); 1005 if (err) { 1006 pr_warn("kernel_listen error, err=%d", err); 1007 goto out; 1008 } 1009 1010 return 0; 1011 1012 out: 1013 sock_release(entry->lsk); 1014 return err; 1015 } 1016 1017 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) 1018 { 1019 struct mptcp_pm_addr_entry *entry; 1020 struct mptcp_addr_info skc_local; 1021 struct mptcp_addr_info msk_local; 1022 struct pm_nl_pernet *pernet; 1023 int ret = -1; 1024 1025 if (WARN_ON_ONCE(!msk)) 1026 return -1; 1027 1028 /* The 0 ID mapping is defined by the first subflow, copied into the msk 1029 * addr 1030 */ 1031 local_address((struct sock_common *)msk, &msk_local); 1032 local_address((struct sock_common *)skc, &skc_local); 1033 if (mptcp_addresses_equal(&msk_local, &skc_local, false)) 1034 return 0; 1035 1036 if (mptcp_pm_is_userspace(msk)) 1037 return mptcp_userspace_pm_get_local_id(msk, &skc_local); 1038 1039 pernet = pm_nl_get_pernet_from_msk(msk); 1040 1041 rcu_read_lock(); 1042 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 1043 if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) { 1044 ret = entry->addr.id; 1045 break; 1046 } 1047 } 1048 rcu_read_unlock(); 1049 if (ret >= 0) 1050 return ret; 1051 1052 /* address not found, add to local list */ 1053 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 1054 if (!entry) 1055 return -ENOMEM; 1056 1057 entry->addr = skc_local; 1058 entry->addr.id = 0; 1059 entry->addr.port = 0; 1060 entry->ifindex = 0; 1061 entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; 1062 entry->lsk = NULL; 1063 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); 1064 if (ret < 0) 1065 kfree(entry); 1066 1067 return ret; 1068 } 1069 1070 #define MPTCP_PM_CMD_GRP_OFFSET 0 1071 #define MPTCP_PM_EV_GRP_OFFSET 1 1072 1073 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { 1074 [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, 1075 [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, 1076 .flags = GENL_UNS_ADMIN_PERM, 1077 }, 1078 }; 1079 1080 static const struct nla_policy 1081 mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = { 1082 [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, 1083 [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, 1084 [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, 1085 [MPTCP_PM_ADDR_ATTR_ADDR6] = 1086 NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 1087 [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 }, 1088 [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 }, 1089 [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 }, 1090 }; 1091 1092 static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { 1093 [MPTCP_PM_ATTR_ADDR] = 1094 NLA_POLICY_NESTED(mptcp_pm_addr_policy), 1095 [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, 1096 [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, 1097 [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, 1098 [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, 1099 [MPTCP_PM_ATTR_ADDR_REMOTE] = 1100 NLA_POLICY_NESTED(mptcp_pm_addr_policy), 1101 }; 1102 1103 void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 1104 { 1105 struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); 1106 struct sock *sk = (struct sock *)msk; 1107 unsigned int active_max_loss_cnt; 1108 struct net *net = sock_net(sk); 1109 unsigned int stale_loss_cnt; 1110 bool slow; 1111 1112 stale_loss_cnt = mptcp_stale_loss_cnt(net); 1113 if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) 1114 return; 1115 1116 /* look for another available subflow not in loss state */ 1117 active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); 1118 mptcp_for_each_subflow(msk, iter) { 1119 if (iter != subflow && mptcp_subflow_active(iter) && 1120 iter->stale_count < active_max_loss_cnt) { 1121 /* we have some alternatives, try to mark this subflow as idle ...*/ 1122 slow = lock_sock_fast(ssk); 1123 if (!tcp_rtx_and_write_queues_empty(ssk)) { 1124 subflow->stale = 1; 1125 __mptcp_retransmit_pending_data(sk); 1126 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE); 1127 } 1128 unlock_sock_fast(ssk, slow); 1129 1130 /* always try to push the pending data regarless of re-injections: 1131 * we can possibly use backup subflows now, and subflow selection 1132 * is cheap under the msk socket lock 1133 */ 1134 __mptcp_push_pending(sk, 0); 1135 return; 1136 } 1137 } 1138 } 1139 1140 static int mptcp_pm_family_to_addr(int family) 1141 { 1142 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1143 if (family == AF_INET6) 1144 return MPTCP_PM_ADDR_ATTR_ADDR6; 1145 #endif 1146 return MPTCP_PM_ADDR_ATTR_ADDR4; 1147 } 1148 1149 static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], 1150 const struct nlattr *attr, 1151 struct genl_info *info, 1152 struct mptcp_addr_info *addr, 1153 bool require_family) 1154 { 1155 int err, addr_addr; 1156 1157 if (!attr) { 1158 GENL_SET_ERR_MSG(info, "missing address info"); 1159 return -EINVAL; 1160 } 1161 1162 /* no validation needed - was already done via nested policy */ 1163 err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 1164 mptcp_pm_addr_policy, info->extack); 1165 if (err) 1166 return err; 1167 1168 if (tb[MPTCP_PM_ADDR_ATTR_ID]) 1169 addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); 1170 1171 if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { 1172 if (!require_family) 1173 return err; 1174 1175 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1176 "missing family"); 1177 return -EINVAL; 1178 } 1179 1180 addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); 1181 if (addr->family != AF_INET 1182 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1183 && addr->family != AF_INET6 1184 #endif 1185 ) { 1186 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1187 "unknown address family"); 1188 return -EINVAL; 1189 } 1190 addr_addr = mptcp_pm_family_to_addr(addr->family); 1191 if (!tb[addr_addr]) { 1192 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1193 "missing address data"); 1194 return -EINVAL; 1195 } 1196 1197 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1198 if (addr->family == AF_INET6) 1199 addr->addr6 = nla_get_in6_addr(tb[addr_addr]); 1200 else 1201 #endif 1202 addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]); 1203 1204 if (tb[MPTCP_PM_ADDR_ATTR_PORT]) 1205 addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); 1206 1207 return err; 1208 } 1209 1210 int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, 1211 struct mptcp_addr_info *addr) 1212 { 1213 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 1214 1215 memset(addr, 0, sizeof(*addr)); 1216 1217 return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); 1218 } 1219 1220 int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, 1221 bool require_family, 1222 struct mptcp_pm_addr_entry *entry) 1223 { 1224 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 1225 int err; 1226 1227 memset(entry, 0, sizeof(*entry)); 1228 1229 err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family); 1230 if (err) 1231 return err; 1232 1233 if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { 1234 u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); 1235 1236 entry->ifindex = val; 1237 } 1238 1239 if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) 1240 entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); 1241 1242 if (tb[MPTCP_PM_ADDR_ATTR_PORT]) 1243 entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); 1244 1245 return 0; 1246 } 1247 1248 static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 1249 { 1250 return pm_nl_get_pernet(genl_info_net(info)); 1251 } 1252 1253 static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) 1254 { 1255 struct mptcp_sock *msk; 1256 long s_slot = 0, s_num = 0; 1257 1258 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1259 struct sock *sk = (struct sock *)msk; 1260 1261 if (!READ_ONCE(msk->fully_established) || 1262 mptcp_pm_is_userspace(msk)) 1263 goto next; 1264 1265 lock_sock(sk); 1266 spin_lock_bh(&msk->pm.lock); 1267 mptcp_pm_create_subflow_or_signal_addr(msk); 1268 spin_unlock_bh(&msk->pm.lock); 1269 release_sock(sk); 1270 1271 next: 1272 sock_put(sk); 1273 cond_resched(); 1274 } 1275 1276 return 0; 1277 } 1278 1279 static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) 1280 { 1281 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1282 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1283 struct mptcp_pm_addr_entry addr, *entry; 1284 int ret; 1285 1286 ret = mptcp_pm_parse_entry(attr, info, true, &addr); 1287 if (ret < 0) 1288 return ret; 1289 1290 if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { 1291 GENL_SET_ERR_MSG(info, "flags must have signal when using port"); 1292 return -EINVAL; 1293 } 1294 1295 if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 1296 addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 1297 GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh"); 1298 return -EINVAL; 1299 } 1300 1301 if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 1302 GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint"); 1303 return -EINVAL; 1304 } 1305 1306 entry = kmalloc(sizeof(*entry), GFP_KERNEL); 1307 if (!entry) { 1308 GENL_SET_ERR_MSG(info, "can't allocate addr"); 1309 return -ENOMEM; 1310 } 1311 1312 *entry = addr; 1313 if (entry->addr.port) { 1314 ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 1315 if (ret) { 1316 GENL_SET_ERR_MSG(info, "create listen socket error"); 1317 kfree(entry); 1318 return ret; 1319 } 1320 } 1321 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry); 1322 if (ret < 0) { 1323 GENL_SET_ERR_MSG(info, "too many addresses or duplicate one"); 1324 if (entry->lsk) 1325 sock_release(entry->lsk); 1326 kfree(entry); 1327 return ret; 1328 } 1329 1330 mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk)); 1331 1332 return 0; 1333 } 1334 1335 int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, 1336 u8 *flags, int *ifindex) 1337 { 1338 struct mptcp_pm_addr_entry *entry; 1339 struct sock *sk = (struct sock *)msk; 1340 struct net *net = sock_net(sk); 1341 1342 *flags = 0; 1343 *ifindex = 0; 1344 1345 if (id) { 1346 if (mptcp_pm_is_userspace(msk)) 1347 return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, 1348 id, 1349 flags, 1350 ifindex); 1351 1352 rcu_read_lock(); 1353 entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); 1354 if (entry) { 1355 *flags = entry->flags; 1356 *ifindex = entry->ifindex; 1357 } 1358 rcu_read_unlock(); 1359 } 1360 1361 return 0; 1362 } 1363 1364 static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, 1365 const struct mptcp_addr_info *addr) 1366 { 1367 struct mptcp_pm_add_entry *entry; 1368 1369 entry = mptcp_pm_del_add_timer(msk, addr, false); 1370 if (entry) { 1371 list_del(&entry->list); 1372 kfree(entry); 1373 return true; 1374 } 1375 1376 return false; 1377 } 1378 1379 static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 1380 const struct mptcp_addr_info *addr, 1381 bool force) 1382 { 1383 struct mptcp_rm_list list = { .nr = 0 }; 1384 bool ret; 1385 1386 list.ids[list.nr++] = addr->id; 1387 1388 ret = remove_anno_list_by_saddr(msk, addr); 1389 if (ret || force) { 1390 spin_lock_bh(&msk->pm.lock); 1391 mptcp_pm_remove_addr(msk, &list); 1392 spin_unlock_bh(&msk->pm.lock); 1393 } 1394 return ret; 1395 } 1396 1397 static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 1398 const struct mptcp_pm_addr_entry *entry) 1399 { 1400 const struct mptcp_addr_info *addr = &entry->addr; 1401 struct mptcp_rm_list list = { .nr = 0 }; 1402 long s_slot = 0, s_num = 0; 1403 struct mptcp_sock *msk; 1404 1405 pr_debug("remove_id=%d", addr->id); 1406 1407 list.ids[list.nr++] = addr->id; 1408 1409 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1410 struct sock *sk = (struct sock *)msk; 1411 bool remove_subflow; 1412 1413 if (mptcp_pm_is_userspace(msk)) 1414 goto next; 1415 1416 if (list_empty(&msk->conn_list)) { 1417 mptcp_pm_remove_anno_addr(msk, addr, false); 1418 goto next; 1419 } 1420 1421 lock_sock(sk); 1422 remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); 1423 mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 1424 !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 1425 if (remove_subflow) 1426 mptcp_pm_remove_subflow(msk, &list); 1427 release_sock(sk); 1428 1429 next: 1430 sock_put(sk); 1431 cond_resched(); 1432 } 1433 1434 return 0; 1435 } 1436 1437 static int mptcp_nl_remove_id_zero_address(struct net *net, 1438 struct mptcp_addr_info *addr) 1439 { 1440 struct mptcp_rm_list list = { .nr = 0 }; 1441 long s_slot = 0, s_num = 0; 1442 struct mptcp_sock *msk; 1443 1444 list.ids[list.nr++] = 0; 1445 1446 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1447 struct sock *sk = (struct sock *)msk; 1448 struct mptcp_addr_info msk_local; 1449 1450 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1451 goto next; 1452 1453 local_address((struct sock_common *)msk, &msk_local); 1454 if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 1455 goto next; 1456 1457 lock_sock(sk); 1458 spin_lock_bh(&msk->pm.lock); 1459 mptcp_pm_remove_addr(msk, &list); 1460 mptcp_pm_nl_rm_subflow_received(msk, &list); 1461 spin_unlock_bh(&msk->pm.lock); 1462 release_sock(sk); 1463 1464 next: 1465 sock_put(sk); 1466 cond_resched(); 1467 } 1468 1469 return 0; 1470 } 1471 1472 static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) 1473 { 1474 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1475 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1476 struct mptcp_pm_addr_entry addr, *entry; 1477 unsigned int addr_max; 1478 int ret; 1479 1480 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1481 if (ret < 0) 1482 return ret; 1483 1484 /* the zero id address is special: the first address used by the msk 1485 * always gets such an id, so different subflows can have different zero 1486 * id addresses. Additionally zero id is not accounted for in id_bitmap. 1487 * Let's use an 'mptcp_rm_list' instead of the common remove code. 1488 */ 1489 if (addr.addr.id == 0) 1490 return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 1491 1492 spin_lock_bh(&pernet->lock); 1493 entry = __lookup_addr_by_id(pernet, addr.addr.id); 1494 if (!entry) { 1495 GENL_SET_ERR_MSG(info, "address not found"); 1496 spin_unlock_bh(&pernet->lock); 1497 return -EINVAL; 1498 } 1499 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1500 addr_max = pernet->add_addr_signal_max; 1501 WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); 1502 } 1503 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1504 addr_max = pernet->local_addr_max; 1505 WRITE_ONCE(pernet->local_addr_max, addr_max - 1); 1506 } 1507 1508 pernet->addrs--; 1509 list_del_rcu(&entry->list); 1510 __clear_bit(entry->addr.id, pernet->id_bitmap); 1511 spin_unlock_bh(&pernet->lock); 1512 1513 mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 1514 synchronize_rcu(); 1515 __mptcp_pm_release_addr_entry(entry); 1516 1517 return ret; 1518 } 1519 1520 void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, 1521 struct list_head *rm_list) 1522 { 1523 struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 1524 struct mptcp_pm_addr_entry *entry; 1525 1526 list_for_each_entry(entry, rm_list, list) { 1527 if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && 1528 slist.nr < MPTCP_RM_IDS_MAX) 1529 slist.ids[slist.nr++] = entry->addr.id; 1530 1531 if (remove_anno_list_by_saddr(msk, &entry->addr) && 1532 alist.nr < MPTCP_RM_IDS_MAX) 1533 alist.ids[alist.nr++] = entry->addr.id; 1534 } 1535 1536 if (alist.nr) { 1537 spin_lock_bh(&msk->pm.lock); 1538 mptcp_pm_remove_addr(msk, &alist); 1539 spin_unlock_bh(&msk->pm.lock); 1540 } 1541 if (slist.nr) 1542 mptcp_pm_remove_subflow(msk, &slist); 1543 } 1544 1545 static void mptcp_nl_remove_addrs_list(struct net *net, 1546 struct list_head *rm_list) 1547 { 1548 long s_slot = 0, s_num = 0; 1549 struct mptcp_sock *msk; 1550 1551 if (list_empty(rm_list)) 1552 return; 1553 1554 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1555 struct sock *sk = (struct sock *)msk; 1556 1557 if (!mptcp_pm_is_userspace(msk)) { 1558 lock_sock(sk); 1559 mptcp_pm_remove_addrs_and_subflows(msk, rm_list); 1560 release_sock(sk); 1561 } 1562 1563 sock_put(sk); 1564 cond_resched(); 1565 } 1566 } 1567 1568 /* caller must ensure the RCU grace period is already elapsed */ 1569 static void __flush_addrs(struct list_head *list) 1570 { 1571 while (!list_empty(list)) { 1572 struct mptcp_pm_addr_entry *cur; 1573 1574 cur = list_entry(list->next, 1575 struct mptcp_pm_addr_entry, list); 1576 list_del_rcu(&cur->list); 1577 __mptcp_pm_release_addr_entry(cur); 1578 } 1579 } 1580 1581 static void __reset_counters(struct pm_nl_pernet *pernet) 1582 { 1583 WRITE_ONCE(pernet->add_addr_signal_max, 0); 1584 WRITE_ONCE(pernet->add_addr_accept_max, 0); 1585 WRITE_ONCE(pernet->local_addr_max, 0); 1586 pernet->addrs = 0; 1587 } 1588 1589 static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) 1590 { 1591 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1592 LIST_HEAD(free_list); 1593 1594 spin_lock_bh(&pernet->lock); 1595 list_splice_init(&pernet->local_addr_list, &free_list); 1596 __reset_counters(pernet); 1597 pernet->next_id = 1; 1598 bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1599 spin_unlock_bh(&pernet->lock); 1600 mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); 1601 synchronize_rcu(); 1602 __flush_addrs(&free_list); 1603 return 0; 1604 } 1605 1606 static int mptcp_nl_fill_addr(struct sk_buff *skb, 1607 struct mptcp_pm_addr_entry *entry) 1608 { 1609 struct mptcp_addr_info *addr = &entry->addr; 1610 struct nlattr *attr; 1611 1612 attr = nla_nest_start(skb, MPTCP_PM_ATTR_ADDR); 1613 if (!attr) 1614 return -EMSGSIZE; 1615 1616 if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family)) 1617 goto nla_put_failure; 1618 if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port))) 1619 goto nla_put_failure; 1620 if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) 1621 goto nla_put_failure; 1622 if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags)) 1623 goto nla_put_failure; 1624 if (entry->ifindex && 1625 nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) 1626 goto nla_put_failure; 1627 1628 if (addr->family == AF_INET && 1629 nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, 1630 addr->addr.s_addr)) 1631 goto nla_put_failure; 1632 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1633 else if (addr->family == AF_INET6 && 1634 nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) 1635 goto nla_put_failure; 1636 #endif 1637 nla_nest_end(skb, attr); 1638 return 0; 1639 1640 nla_put_failure: 1641 nla_nest_cancel(skb, attr); 1642 return -EMSGSIZE; 1643 } 1644 1645 static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) 1646 { 1647 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1648 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1649 struct mptcp_pm_addr_entry addr, *entry; 1650 struct sk_buff *msg; 1651 void *reply; 1652 int ret; 1653 1654 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1655 if (ret < 0) 1656 return ret; 1657 1658 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1659 if (!msg) 1660 return -ENOMEM; 1661 1662 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1663 info->genlhdr->cmd); 1664 if (!reply) { 1665 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1666 ret = -EMSGSIZE; 1667 goto fail; 1668 } 1669 1670 spin_lock_bh(&pernet->lock); 1671 entry = __lookup_addr_by_id(pernet, addr.addr.id); 1672 if (!entry) { 1673 GENL_SET_ERR_MSG(info, "address not found"); 1674 ret = -EINVAL; 1675 goto unlock_fail; 1676 } 1677 1678 ret = mptcp_nl_fill_addr(msg, entry); 1679 if (ret) 1680 goto unlock_fail; 1681 1682 genlmsg_end(msg, reply); 1683 ret = genlmsg_reply(msg, info); 1684 spin_unlock_bh(&pernet->lock); 1685 return ret; 1686 1687 unlock_fail: 1688 spin_unlock_bh(&pernet->lock); 1689 1690 fail: 1691 nlmsg_free(msg); 1692 return ret; 1693 } 1694 1695 static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, 1696 struct netlink_callback *cb) 1697 { 1698 struct net *net = sock_net(msg->sk); 1699 struct mptcp_pm_addr_entry *entry; 1700 struct pm_nl_pernet *pernet; 1701 int id = cb->args[0]; 1702 void *hdr; 1703 int i; 1704 1705 pernet = pm_nl_get_pernet(net); 1706 1707 spin_lock_bh(&pernet->lock); 1708 for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 1709 if (test_bit(i, pernet->id_bitmap)) { 1710 entry = __lookup_addr_by_id(pernet, i); 1711 if (!entry) 1712 break; 1713 1714 if (entry->addr.id <= id) 1715 continue; 1716 1717 hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, 1718 cb->nlh->nlmsg_seq, &mptcp_genl_family, 1719 NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); 1720 if (!hdr) 1721 break; 1722 1723 if (mptcp_nl_fill_addr(msg, entry) < 0) { 1724 genlmsg_cancel(msg, hdr); 1725 break; 1726 } 1727 1728 id = entry->addr.id; 1729 genlmsg_end(msg, hdr); 1730 } 1731 } 1732 spin_unlock_bh(&pernet->lock); 1733 1734 cb->args[0] = id; 1735 return msg->len; 1736 } 1737 1738 static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 1739 { 1740 struct nlattr *attr = info->attrs[id]; 1741 1742 if (!attr) 1743 return 0; 1744 1745 *limit = nla_get_u32(attr); 1746 if (*limit > MPTCP_PM_ADDR_MAX) { 1747 GENL_SET_ERR_MSG(info, "limit greater than maximum"); 1748 return -EINVAL; 1749 } 1750 return 0; 1751 } 1752 1753 static int 1754 mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info) 1755 { 1756 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1757 unsigned int rcv_addrs, subflows; 1758 int ret; 1759 1760 spin_lock_bh(&pernet->lock); 1761 rcv_addrs = pernet->add_addr_accept_max; 1762 ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 1763 if (ret) 1764 goto unlock; 1765 1766 subflows = pernet->subflows_max; 1767 ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 1768 if (ret) 1769 goto unlock; 1770 1771 WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); 1772 WRITE_ONCE(pernet->subflows_max, subflows); 1773 1774 unlock: 1775 spin_unlock_bh(&pernet->lock); 1776 return ret; 1777 } 1778 1779 static int 1780 mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info) 1781 { 1782 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1783 struct sk_buff *msg; 1784 void *reply; 1785 1786 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1787 if (!msg) 1788 return -ENOMEM; 1789 1790 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1791 MPTCP_PM_CMD_GET_LIMITS); 1792 if (!reply) 1793 goto fail; 1794 1795 if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 1796 READ_ONCE(pernet->add_addr_accept_max))) 1797 goto fail; 1798 1799 if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 1800 READ_ONCE(pernet->subflows_max))) 1801 goto fail; 1802 1803 genlmsg_end(msg, reply); 1804 return genlmsg_reply(msg, info); 1805 1806 fail: 1807 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1808 nlmsg_free(msg); 1809 return -EMSGSIZE; 1810 } 1811 1812 static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 1813 struct mptcp_addr_info *addr) 1814 { 1815 struct mptcp_rm_list list = { .nr = 0 }; 1816 1817 list.ids[list.nr++] = addr->id; 1818 1819 mptcp_pm_nl_rm_subflow_received(msk, &list); 1820 mptcp_pm_create_subflow_or_signal_addr(msk); 1821 } 1822 1823 static int mptcp_nl_set_flags(struct net *net, 1824 struct mptcp_addr_info *addr, 1825 u8 bkup, u8 changed) 1826 { 1827 long s_slot = 0, s_num = 0; 1828 struct mptcp_sock *msk; 1829 int ret = -EINVAL; 1830 1831 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1832 struct sock *sk = (struct sock *)msk; 1833 1834 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1835 goto next; 1836 1837 lock_sock(sk); 1838 spin_lock_bh(&msk->pm.lock); 1839 if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 1840 ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup); 1841 if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) 1842 mptcp_pm_nl_fullmesh(msk, addr); 1843 spin_unlock_bh(&msk->pm.lock); 1844 release_sock(sk); 1845 1846 next: 1847 sock_put(sk); 1848 cond_resched(); 1849 } 1850 1851 return ret; 1852 } 1853 1854 static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) 1855 { 1856 struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry; 1857 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1858 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1859 u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 1860 MPTCP_PM_ADDR_FLAG_FULLMESH; 1861 struct net *net = sock_net(skb->sk); 1862 u8 bkup = 0, lookup_by_id = 0; 1863 int ret; 1864 1865 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1866 if (ret < 0) 1867 return ret; 1868 1869 if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) 1870 bkup = 1; 1871 if (addr.addr.family == AF_UNSPEC) { 1872 lookup_by_id = 1; 1873 if (!addr.addr.id) 1874 return -EOPNOTSUPP; 1875 } 1876 1877 spin_lock_bh(&pernet->lock); 1878 entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); 1879 if (!entry) { 1880 spin_unlock_bh(&pernet->lock); 1881 return -EINVAL; 1882 } 1883 if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 1884 (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { 1885 spin_unlock_bh(&pernet->lock); 1886 return -EINVAL; 1887 } 1888 1889 changed = (addr.flags ^ entry->flags) & mask; 1890 entry->flags = (entry->flags & ~mask) | (addr.flags & mask); 1891 addr = *entry; 1892 spin_unlock_bh(&pernet->lock); 1893 1894 mptcp_nl_set_flags(net, &addr.addr, bkup, changed); 1895 return 0; 1896 } 1897 1898 static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) 1899 { 1900 genlmsg_multicast_netns(&mptcp_genl_family, net, 1901 nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); 1902 } 1903 1904 bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) 1905 { 1906 return genl_has_listeners(&mptcp_genl_family, 1907 sock_net((const struct sock *)msk), 1908 MPTCP_PM_EV_GRP_OFFSET); 1909 } 1910 1911 static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) 1912 { 1913 const struct inet_sock *issk = inet_sk(ssk); 1914 const struct mptcp_subflow_context *sf; 1915 1916 if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) 1917 return -EMSGSIZE; 1918 1919 switch (ssk->sk_family) { 1920 case AF_INET: 1921 if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) 1922 return -EMSGSIZE; 1923 if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) 1924 return -EMSGSIZE; 1925 break; 1926 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1927 case AF_INET6: { 1928 const struct ipv6_pinfo *np = inet6_sk(ssk); 1929 1930 if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) 1931 return -EMSGSIZE; 1932 if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) 1933 return -EMSGSIZE; 1934 break; 1935 } 1936 #endif 1937 default: 1938 WARN_ON_ONCE(1); 1939 return -EMSGSIZE; 1940 } 1941 1942 if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) 1943 return -EMSGSIZE; 1944 if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) 1945 return -EMSGSIZE; 1946 1947 sf = mptcp_subflow_ctx(ssk); 1948 if (WARN_ON_ONCE(!sf)) 1949 return -EINVAL; 1950 1951 if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id)) 1952 return -EMSGSIZE; 1953 1954 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) 1955 return -EMSGSIZE; 1956 1957 return 0; 1958 } 1959 1960 static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, 1961 const struct mptcp_sock *msk, 1962 const struct sock *ssk) 1963 { 1964 const struct sock *sk = (const struct sock *)msk; 1965 const struct mptcp_subflow_context *sf; 1966 u8 sk_err; 1967 1968 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 1969 return -EMSGSIZE; 1970 1971 if (mptcp_event_add_subflow(skb, ssk)) 1972 return -EMSGSIZE; 1973 1974 sf = mptcp_subflow_ctx(ssk); 1975 if (WARN_ON_ONCE(!sf)) 1976 return -EINVAL; 1977 1978 if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) 1979 return -EMSGSIZE; 1980 1981 if (ssk->sk_bound_dev_if && 1982 nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) 1983 return -EMSGSIZE; 1984 1985 sk_err = ssk->sk_err; 1986 if (sk_err && sk->sk_state == TCP_ESTABLISHED && 1987 nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) 1988 return -EMSGSIZE; 1989 1990 return 0; 1991 } 1992 1993 static int mptcp_event_sub_established(struct sk_buff *skb, 1994 const struct mptcp_sock *msk, 1995 const struct sock *ssk) 1996 { 1997 return mptcp_event_put_token_and_ssk(skb, msk, ssk); 1998 } 1999 2000 static int mptcp_event_sub_closed(struct sk_buff *skb, 2001 const struct mptcp_sock *msk, 2002 const struct sock *ssk) 2003 { 2004 const struct mptcp_subflow_context *sf; 2005 2006 if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) 2007 return -EMSGSIZE; 2008 2009 sf = mptcp_subflow_ctx(ssk); 2010 if (!sf->reset_seen) 2011 return 0; 2012 2013 if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason)) 2014 return -EMSGSIZE; 2015 2016 if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient)) 2017 return -EMSGSIZE; 2018 2019 return 0; 2020 } 2021 2022 static int mptcp_event_created(struct sk_buff *skb, 2023 const struct mptcp_sock *msk, 2024 const struct sock *ssk) 2025 { 2026 int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token); 2027 2028 if (err) 2029 return err; 2030 2031 if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) 2032 return -EMSGSIZE; 2033 2034 return mptcp_event_add_subflow(skb, ssk); 2035 } 2036 2037 void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) 2038 { 2039 struct net *net = sock_net((const struct sock *)msk); 2040 struct nlmsghdr *nlh; 2041 struct sk_buff *skb; 2042 2043 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2044 return; 2045 2046 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 2047 if (!skb) 2048 return; 2049 2050 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); 2051 if (!nlh) 2052 goto nla_put_failure; 2053 2054 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 2055 goto nla_put_failure; 2056 2057 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) 2058 goto nla_put_failure; 2059 2060 genlmsg_end(skb, nlh); 2061 mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); 2062 return; 2063 2064 nla_put_failure: 2065 kfree_skb(skb); 2066 } 2067 2068 void mptcp_event_addr_announced(const struct sock *ssk, 2069 const struct mptcp_addr_info *info) 2070 { 2071 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 2072 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 2073 struct net *net = sock_net(ssk); 2074 struct nlmsghdr *nlh; 2075 struct sk_buff *skb; 2076 2077 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2078 return; 2079 2080 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 2081 if (!skb) 2082 return; 2083 2084 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, 2085 MPTCP_EVENT_ANNOUNCED); 2086 if (!nlh) 2087 goto nla_put_failure; 2088 2089 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 2090 goto nla_put_failure; 2091 2092 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) 2093 goto nla_put_failure; 2094 2095 if (nla_put_be16(skb, MPTCP_ATTR_DPORT, 2096 info->port == 0 ? 2097 inet_sk(ssk)->inet_dport : 2098 info->port)) 2099 goto nla_put_failure; 2100 2101 switch (info->family) { 2102 case AF_INET: 2103 if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) 2104 goto nla_put_failure; 2105 break; 2106 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 2107 case AF_INET6: 2108 if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) 2109 goto nla_put_failure; 2110 break; 2111 #endif 2112 default: 2113 WARN_ON_ONCE(1); 2114 goto nla_put_failure; 2115 } 2116 2117 genlmsg_end(skb, nlh); 2118 mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); 2119 return; 2120 2121 nla_put_failure: 2122 kfree_skb(skb); 2123 } 2124 2125 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, 2126 const struct sock *ssk, gfp_t gfp) 2127 { 2128 struct net *net = sock_net((const struct sock *)msk); 2129 struct nlmsghdr *nlh; 2130 struct sk_buff *skb; 2131 2132 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2133 return; 2134 2135 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 2136 if (!skb) 2137 return; 2138 2139 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); 2140 if (!nlh) 2141 goto nla_put_failure; 2142 2143 switch (type) { 2144 case MPTCP_EVENT_UNSPEC: 2145 WARN_ON_ONCE(1); 2146 break; 2147 case MPTCP_EVENT_CREATED: 2148 case MPTCP_EVENT_ESTABLISHED: 2149 if (mptcp_event_created(skb, msk, ssk) < 0) 2150 goto nla_put_failure; 2151 break; 2152 case MPTCP_EVENT_CLOSED: 2153 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0) 2154 goto nla_put_failure; 2155 break; 2156 case MPTCP_EVENT_ANNOUNCED: 2157 case MPTCP_EVENT_REMOVED: 2158 /* call mptcp_event_addr_announced()/removed instead */ 2159 WARN_ON_ONCE(1); 2160 break; 2161 case MPTCP_EVENT_SUB_ESTABLISHED: 2162 case MPTCP_EVENT_SUB_PRIORITY: 2163 if (mptcp_event_sub_established(skb, msk, ssk) < 0) 2164 goto nla_put_failure; 2165 break; 2166 case MPTCP_EVENT_SUB_CLOSED: 2167 if (mptcp_event_sub_closed(skb, msk, ssk) < 0) 2168 goto nla_put_failure; 2169 break; 2170 } 2171 2172 genlmsg_end(skb, nlh); 2173 mptcp_nl_mcast_send(net, skb, gfp); 2174 return; 2175 2176 nla_put_failure: 2177 kfree_skb(skb); 2178 } 2179 2180 static const struct genl_small_ops mptcp_pm_ops[] = { 2181 { 2182 .cmd = MPTCP_PM_CMD_ADD_ADDR, 2183 .doit = mptcp_nl_cmd_add_addr, 2184 .flags = GENL_ADMIN_PERM, 2185 }, 2186 { 2187 .cmd = MPTCP_PM_CMD_DEL_ADDR, 2188 .doit = mptcp_nl_cmd_del_addr, 2189 .flags = GENL_ADMIN_PERM, 2190 }, 2191 { 2192 .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, 2193 .doit = mptcp_nl_cmd_flush_addrs, 2194 .flags = GENL_ADMIN_PERM, 2195 }, 2196 { 2197 .cmd = MPTCP_PM_CMD_GET_ADDR, 2198 .doit = mptcp_nl_cmd_get_addr, 2199 .dumpit = mptcp_nl_cmd_dump_addrs, 2200 }, 2201 { 2202 .cmd = MPTCP_PM_CMD_SET_LIMITS, 2203 .doit = mptcp_nl_cmd_set_limits, 2204 .flags = GENL_ADMIN_PERM, 2205 }, 2206 { 2207 .cmd = MPTCP_PM_CMD_GET_LIMITS, 2208 .doit = mptcp_nl_cmd_get_limits, 2209 }, 2210 { 2211 .cmd = MPTCP_PM_CMD_SET_FLAGS, 2212 .doit = mptcp_nl_cmd_set_flags, 2213 .flags = GENL_ADMIN_PERM, 2214 }, 2215 { 2216 .cmd = MPTCP_PM_CMD_ANNOUNCE, 2217 .doit = mptcp_nl_cmd_announce, 2218 .flags = GENL_ADMIN_PERM, 2219 }, 2220 { 2221 .cmd = MPTCP_PM_CMD_REMOVE, 2222 .doit = mptcp_nl_cmd_remove, 2223 .flags = GENL_ADMIN_PERM, 2224 }, 2225 { 2226 .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, 2227 .doit = mptcp_nl_cmd_sf_create, 2228 .flags = GENL_ADMIN_PERM, 2229 }, 2230 { 2231 .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, 2232 .doit = mptcp_nl_cmd_sf_destroy, 2233 .flags = GENL_ADMIN_PERM, 2234 }, 2235 }; 2236 2237 static struct genl_family mptcp_genl_family __ro_after_init = { 2238 .name = MPTCP_PM_NAME, 2239 .version = MPTCP_PM_VER, 2240 .maxattr = MPTCP_PM_ATTR_MAX, 2241 .policy = mptcp_pm_policy, 2242 .netnsok = true, 2243 .module = THIS_MODULE, 2244 .small_ops = mptcp_pm_ops, 2245 .n_small_ops = ARRAY_SIZE(mptcp_pm_ops), 2246 .mcgrps = mptcp_pm_mcgrps, 2247 .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), 2248 }; 2249 2250 static int __net_init pm_nl_init_net(struct net *net) 2251 { 2252 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 2253 2254 INIT_LIST_HEAD_RCU(&pernet->local_addr_list); 2255 2256 /* Cit. 2 subflows ought to be enough for anybody. */ 2257 pernet->subflows_max = 2; 2258 pernet->next_id = 1; 2259 pernet->stale_loss_cnt = 4; 2260 spin_lock_init(&pernet->lock); 2261 2262 /* No need to initialize other pernet fields, the struct is zeroed at 2263 * allocation time. 2264 */ 2265 2266 return 0; 2267 } 2268 2269 static void __net_exit pm_nl_exit_net(struct list_head *net_list) 2270 { 2271 struct net *net; 2272 2273 list_for_each_entry(net, net_list, exit_list) { 2274 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 2275 2276 /* net is removed from namespace list, can't race with 2277 * other modifiers, also netns core already waited for a 2278 * RCU grace period. 2279 */ 2280 __flush_addrs(&pernet->local_addr_list); 2281 } 2282 } 2283 2284 static struct pernet_operations mptcp_pm_pernet_ops = { 2285 .init = pm_nl_init_net, 2286 .exit_batch = pm_nl_exit_net, 2287 .id = &pm_nl_pernet_id, 2288 .size = sizeof(struct pm_nl_pernet), 2289 }; 2290 2291 void __init mptcp_pm_nl_init(void) 2292 { 2293 if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 2294 panic("Failed to register MPTCP PM pernet subsystem.\n"); 2295 2296 if (genl_register_family(&mptcp_genl_family)) 2297 panic("Failed to register MPTCP PM netlink family\n"); 2298 } 2299