1 // SPDX-License-Identifier: GPL-2.0 2 /* Multipath TCP 3 * 4 * Copyright (c) 2020, Red Hat, Inc. 5 */ 6 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 9 #include <linux/inet.h> 10 #include <linux/kernel.h> 11 #include <net/tcp.h> 12 #include <net/inet_common.h> 13 #include <net/netns/generic.h> 14 #include <net/mptcp.h> 15 #include <net/genetlink.h> 16 #include <uapi/linux/mptcp.h> 17 18 #include "protocol.h" 19 #include "mib.h" 20 21 /* forward declaration */ 22 static struct genl_family mptcp_genl_family; 23 24 static int pm_nl_pernet_id; 25 26 struct mptcp_pm_add_entry { 27 struct list_head list; 28 struct mptcp_addr_info addr; 29 u8 retrans_times; 30 struct timer_list add_timer; 31 struct mptcp_sock *sock; 32 }; 33 34 struct pm_nl_pernet { 35 /* protects pernet updates */ 36 spinlock_t lock; 37 struct list_head local_addr_list; 38 unsigned int addrs; 39 unsigned int stale_loss_cnt; 40 unsigned int add_addr_signal_max; 41 unsigned int add_addr_accept_max; 42 unsigned int local_addr_max; 43 unsigned int subflows_max; 44 unsigned int next_id; 45 DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 46 }; 47 48 #define MPTCP_PM_ADDR_MAX 8 49 #define ADD_ADDR_RETRANS_MAX 3 50 51 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 52 { 53 return net_generic(net, pm_nl_pernet_id); 54 } 55 56 static struct pm_nl_pernet * 57 pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 58 { 59 return pm_nl_get_pernet(sock_net((struct sock *)msk)); 60 } 61 62 bool mptcp_addresses_equal(const struct mptcp_addr_info *a, 63 const struct mptcp_addr_info *b, bool use_port) 64 { 65 bool addr_equals = false; 66 67 if (a->family == b->family) { 68 if (a->family == AF_INET) 69 addr_equals = a->addr.s_addr == b->addr.s_addr; 70 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 71 else 72 addr_equals = !ipv6_addr_cmp(&a->addr6, &b->addr6); 73 } else if (a->family == AF_INET) { 74 if (ipv6_addr_v4mapped(&b->addr6)) 75 addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; 76 } else if (b->family == AF_INET) { 77 if (ipv6_addr_v4mapped(&a->addr6)) 78 addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; 79 #endif 80 } 81 82 if (!addr_equals) 83 return false; 84 if (!use_port) 85 return true; 86 87 return a->port == b->port; 88 } 89 90 void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) 91 { 92 addr->family = skc->skc_family; 93 addr->port = htons(skc->skc_num); 94 if (addr->family == AF_INET) 95 addr->addr.s_addr = skc->skc_rcv_saddr; 96 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 97 else if (addr->family == AF_INET6) 98 addr->addr6 = skc->skc_v6_rcv_saddr; 99 #endif 100 } 101 102 static void remote_address(const struct sock_common *skc, 103 struct mptcp_addr_info *addr) 104 { 105 addr->family = skc->skc_family; 106 addr->port = skc->skc_dport; 107 if (addr->family == AF_INET) 108 addr->addr.s_addr = skc->skc_daddr; 109 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 110 else if (addr->family == AF_INET6) 111 addr->addr6 = skc->skc_v6_daddr; 112 #endif 113 } 114 115 static bool lookup_subflow_by_saddr(const struct list_head *list, 116 const struct mptcp_addr_info *saddr) 117 { 118 struct mptcp_subflow_context *subflow; 119 struct mptcp_addr_info cur; 120 struct sock_common *skc; 121 122 list_for_each_entry(subflow, list, node) { 123 skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 124 125 mptcp_local_address(skc, &cur); 126 if (mptcp_addresses_equal(&cur, saddr, saddr->port)) 127 return true; 128 } 129 130 return false; 131 } 132 133 static bool lookup_subflow_by_daddr(const struct list_head *list, 134 const struct mptcp_addr_info *daddr) 135 { 136 struct mptcp_subflow_context *subflow; 137 struct mptcp_addr_info cur; 138 struct sock_common *skc; 139 140 list_for_each_entry(subflow, list, node) { 141 skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 142 143 remote_address(skc, &cur); 144 if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 145 return true; 146 } 147 148 return false; 149 } 150 151 static bool 152 select_local_address(const struct pm_nl_pernet *pernet, 153 const struct mptcp_sock *msk, 154 struct mptcp_pm_addr_entry *new_entry) 155 { 156 struct mptcp_pm_addr_entry *entry; 157 bool found = false; 158 159 msk_owned_by_me(msk); 160 161 rcu_read_lock(); 162 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 163 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 164 continue; 165 166 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 167 continue; 168 169 *new_entry = *entry; 170 found = true; 171 break; 172 } 173 rcu_read_unlock(); 174 175 return found; 176 } 177 178 static bool 179 select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, 180 struct mptcp_pm_addr_entry *new_entry) 181 { 182 struct mptcp_pm_addr_entry *entry; 183 bool found = false; 184 185 rcu_read_lock(); 186 /* do not keep any additional per socket state, just signal 187 * the address list in order. 188 * Note: removal from the local address list during the msk life-cycle 189 * can lead to additional addresses not being announced. 190 */ 191 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 192 if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 193 continue; 194 195 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 196 continue; 197 198 *new_entry = *entry; 199 found = true; 200 break; 201 } 202 rcu_read_unlock(); 203 204 return found; 205 } 206 207 unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) 208 { 209 const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 210 211 return READ_ONCE(pernet->add_addr_signal_max); 212 } 213 EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); 214 215 unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) 216 { 217 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 218 219 return READ_ONCE(pernet->add_addr_accept_max); 220 } 221 EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); 222 223 unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) 224 { 225 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 226 227 return READ_ONCE(pernet->subflows_max); 228 } 229 EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); 230 231 unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) 232 { 233 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 234 235 return READ_ONCE(pernet->local_addr_max); 236 } 237 EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); 238 239 bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 240 { 241 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 242 243 if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || 244 (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 245 MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 246 WRITE_ONCE(msk->pm.work_pending, false); 247 return false; 248 } 249 return true; 250 } 251 252 struct mptcp_pm_add_entry * 253 mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, 254 const struct mptcp_addr_info *addr) 255 { 256 struct mptcp_pm_add_entry *entry; 257 258 lockdep_assert_held(&msk->pm.lock); 259 260 list_for_each_entry(entry, &msk->pm.anno_list, list) { 261 if (mptcp_addresses_equal(&entry->addr, addr, true)) 262 return entry; 263 } 264 265 return NULL; 266 } 267 268 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) 269 { 270 struct mptcp_pm_add_entry *entry; 271 struct mptcp_addr_info saddr; 272 bool ret = false; 273 274 mptcp_local_address((struct sock_common *)sk, &saddr); 275 276 spin_lock_bh(&msk->pm.lock); 277 list_for_each_entry(entry, &msk->pm.anno_list, list) { 278 if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { 279 ret = true; 280 goto out; 281 } 282 } 283 284 out: 285 spin_unlock_bh(&msk->pm.lock); 286 return ret; 287 } 288 289 static void mptcp_pm_add_timer(struct timer_list *timer) 290 { 291 struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); 292 struct mptcp_sock *msk = entry->sock; 293 struct sock *sk = (struct sock *)msk; 294 295 pr_debug("msk=%p", msk); 296 297 if (!msk) 298 return; 299 300 if (inet_sk_state_load(sk) == TCP_CLOSE) 301 return; 302 303 if (!entry->addr.id) 304 return; 305 306 if (mptcp_pm_should_add_signal_addr(msk)) { 307 sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); 308 goto out; 309 } 310 311 spin_lock_bh(&msk->pm.lock); 312 313 if (!mptcp_pm_should_add_signal_addr(msk)) { 314 pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id); 315 mptcp_pm_announce_addr(msk, &entry->addr, false); 316 mptcp_pm_add_addr_send_ack(msk); 317 entry->retrans_times++; 318 } 319 320 if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) 321 sk_reset_timer(sk, timer, 322 jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); 323 324 spin_unlock_bh(&msk->pm.lock); 325 326 if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) 327 mptcp_pm_subflow_established(msk); 328 329 out: 330 __sock_put(sk); 331 } 332 333 struct mptcp_pm_add_entry * 334 mptcp_pm_del_add_timer(struct mptcp_sock *msk, 335 const struct mptcp_addr_info *addr, bool check_id) 336 { 337 struct mptcp_pm_add_entry *entry; 338 struct sock *sk = (struct sock *)msk; 339 340 spin_lock_bh(&msk->pm.lock); 341 entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 342 if (entry && (!check_id || entry->addr.id == addr->id)) 343 entry->retrans_times = ADD_ADDR_RETRANS_MAX; 344 spin_unlock_bh(&msk->pm.lock); 345 346 if (entry && (!check_id || entry->addr.id == addr->id)) 347 sk_stop_timer_sync(sk, &entry->add_timer); 348 349 return entry; 350 } 351 352 bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 353 const struct mptcp_addr_info *addr) 354 { 355 struct mptcp_pm_add_entry *add_entry = NULL; 356 struct sock *sk = (struct sock *)msk; 357 struct net *net = sock_net(sk); 358 359 lockdep_assert_held(&msk->pm.lock); 360 361 add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 362 363 if (add_entry) { 364 if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) 365 return false; 366 367 sk_reset_timer(sk, &add_entry->add_timer, 368 jiffies + mptcp_get_add_addr_timeout(net)); 369 return true; 370 } 371 372 add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); 373 if (!add_entry) 374 return false; 375 376 list_add(&add_entry->list, &msk->pm.anno_list); 377 378 add_entry->addr = *addr; 379 add_entry->sock = msk; 380 add_entry->retrans_times = 0; 381 382 timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); 383 sk_reset_timer(sk, &add_entry->add_timer, 384 jiffies + mptcp_get_add_addr_timeout(net)); 385 386 return true; 387 } 388 389 void mptcp_pm_free_anno_list(struct mptcp_sock *msk) 390 { 391 struct mptcp_pm_add_entry *entry, *tmp; 392 struct sock *sk = (struct sock *)msk; 393 LIST_HEAD(free_list); 394 395 pr_debug("msk=%p", msk); 396 397 spin_lock_bh(&msk->pm.lock); 398 list_splice_init(&msk->pm.anno_list, &free_list); 399 spin_unlock_bh(&msk->pm.lock); 400 401 list_for_each_entry_safe(entry, tmp, &free_list, list) { 402 sk_stop_timer_sync(sk, &entry->add_timer); 403 kfree(entry); 404 } 405 } 406 407 /* Fill all the remote addresses into the array addrs[], 408 * and return the array size. 409 */ 410 static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, 411 struct mptcp_addr_info *local, 412 bool fullmesh, 413 struct mptcp_addr_info *addrs) 414 { 415 bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 416 struct sock *sk = (struct sock *)msk, *ssk; 417 struct mptcp_subflow_context *subflow; 418 struct mptcp_addr_info remote = { 0 }; 419 unsigned int subflows_max; 420 int i = 0; 421 422 subflows_max = mptcp_pm_get_subflows_max(msk); 423 remote_address((struct sock_common *)sk, &remote); 424 425 /* Non-fullmesh endpoint, fill in the single entry 426 * corresponding to the primary MPC subflow remote address 427 */ 428 if (!fullmesh) { 429 if (deny_id0) 430 return 0; 431 432 if (!mptcp_pm_addr_families_match(sk, local, &remote)) 433 return 0; 434 435 msk->pm.subflows++; 436 addrs[i++] = remote; 437 } else { 438 DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 439 440 /* Forbid creation of new subflows matching existing 441 * ones, possibly already created by incoming ADD_ADDR 442 */ 443 bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 444 mptcp_for_each_subflow(msk, subflow) 445 if (READ_ONCE(subflow->local_id) == local->id) 446 __set_bit(subflow->remote_id, unavail_id); 447 448 mptcp_for_each_subflow(msk, subflow) { 449 ssk = mptcp_subflow_tcp_sock(subflow); 450 remote_address((struct sock_common *)ssk, &addrs[i]); 451 addrs[i].id = READ_ONCE(subflow->remote_id); 452 if (deny_id0 && !addrs[i].id) 453 continue; 454 455 if (test_bit(addrs[i].id, unavail_id)) 456 continue; 457 458 if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 459 continue; 460 461 if (msk->pm.subflows < subflows_max) { 462 /* forbid creating multiple address towards 463 * this id 464 */ 465 __set_bit(addrs[i].id, unavail_id); 466 msk->pm.subflows++; 467 i++; 468 } 469 } 470 } 471 472 return i; 473 } 474 475 static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, 476 bool prio, bool backup) 477 { 478 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 479 bool slow; 480 481 pr_debug("send ack for %s", 482 prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); 483 484 slow = lock_sock_fast(ssk); 485 if (prio) { 486 subflow->send_mp_prio = 1; 487 subflow->request_bkup = backup; 488 } 489 490 __mptcp_subflow_send_ack(ssk); 491 unlock_sock_fast(ssk, slow); 492 } 493 494 static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, 495 bool prio, bool backup) 496 { 497 spin_unlock_bh(&msk->pm.lock); 498 __mptcp_pm_send_ack(msk, subflow, prio, backup); 499 spin_lock_bh(&msk->pm.lock); 500 } 501 502 static struct mptcp_pm_addr_entry * 503 __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 504 { 505 struct mptcp_pm_addr_entry *entry; 506 507 list_for_each_entry(entry, &pernet->local_addr_list, list) { 508 if (entry->addr.id == id) 509 return entry; 510 } 511 return NULL; 512 } 513 514 static struct mptcp_pm_addr_entry * 515 __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, 516 bool lookup_by_id) 517 { 518 struct mptcp_pm_addr_entry *entry; 519 520 list_for_each_entry(entry, &pernet->local_addr_list, list) { 521 if ((!lookup_by_id && 522 mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) || 523 (lookup_by_id && entry->addr.id == info->id)) 524 return entry; 525 } 526 return NULL; 527 } 528 529 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 530 { 531 struct sock *sk = (struct sock *)msk; 532 struct mptcp_pm_addr_entry local; 533 unsigned int add_addr_signal_max; 534 bool signal_and_subflow = false; 535 unsigned int local_addr_max; 536 struct pm_nl_pernet *pernet; 537 unsigned int subflows_max; 538 539 pernet = pm_nl_get_pernet(sock_net(sk)); 540 541 add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); 542 local_addr_max = mptcp_pm_get_local_addr_max(msk); 543 subflows_max = mptcp_pm_get_subflows_max(msk); 544 545 /* do lazy endpoint usage accounting for the MPC subflows */ 546 if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { 547 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); 548 struct mptcp_pm_addr_entry *entry; 549 struct mptcp_addr_info mpc_addr; 550 bool backup = false; 551 552 mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); 553 rcu_read_lock(); 554 entry = __lookup_addr(pernet, &mpc_addr, false); 555 if (entry) { 556 __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 557 msk->mpc_endpoint_id = entry->addr.id; 558 backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 559 } 560 rcu_read_unlock(); 561 562 if (backup) 563 mptcp_pm_send_ack(msk, subflow, true, backup); 564 565 msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 566 } 567 568 pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 569 msk->pm.local_addr_used, local_addr_max, 570 msk->pm.add_addr_signaled, add_addr_signal_max, 571 msk->pm.subflows, subflows_max); 572 573 /* check first for announce */ 574 if (msk->pm.add_addr_signaled < add_addr_signal_max) { 575 /* due to racing events on both ends we can reach here while 576 * previous add address is still running: if we invoke now 577 * mptcp_pm_announce_addr(), that will fail and the 578 * corresponding id will be marked as used. 579 * Instead let the PM machinery reschedule us when the 580 * current address announce will be completed. 581 */ 582 if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 583 return; 584 585 if (!select_signal_address(pernet, msk, &local)) 586 goto subflow; 587 588 /* If the alloc fails, we are on memory pressure, not worth 589 * continuing, and trying to create subflows. 590 */ 591 if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) 592 return; 593 594 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 595 msk->pm.add_addr_signaled++; 596 mptcp_pm_announce_addr(msk, &local.addr, false); 597 mptcp_pm_nl_addr_send_ack(msk); 598 599 if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 600 signal_and_subflow = true; 601 } 602 603 subflow: 604 /* check if should create a new subflow */ 605 while (msk->pm.local_addr_used < local_addr_max && 606 msk->pm.subflows < subflows_max) { 607 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 608 bool fullmesh; 609 int i, nr; 610 611 if (signal_and_subflow) 612 signal_and_subflow = false; 613 else if (!select_local_address(pernet, msk, &local)) 614 break; 615 616 fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 617 618 msk->pm.local_addr_used++; 619 __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 620 nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); 621 if (nr == 0) 622 continue; 623 624 spin_unlock_bh(&msk->pm.lock); 625 for (i = 0; i < nr; i++) 626 __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); 627 spin_lock_bh(&msk->pm.lock); 628 } 629 mptcp_pm_nl_check_work_pending(msk); 630 } 631 632 static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 633 { 634 mptcp_pm_create_subflow_or_signal_addr(msk); 635 } 636 637 static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 638 { 639 mptcp_pm_create_subflow_or_signal_addr(msk); 640 } 641 642 /* Fill all the local addresses into the array addrs[], 643 * and return the array size. 644 */ 645 static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, 646 struct mptcp_addr_info *remote, 647 struct mptcp_addr_info *addrs) 648 { 649 struct sock *sk = (struct sock *)msk; 650 struct mptcp_pm_addr_entry *entry; 651 struct mptcp_addr_info mpc_addr; 652 struct pm_nl_pernet *pernet; 653 unsigned int subflows_max; 654 int i = 0; 655 656 pernet = pm_nl_get_pernet_from_msk(msk); 657 subflows_max = mptcp_pm_get_subflows_max(msk); 658 659 mptcp_local_address((struct sock_common *)msk, &mpc_addr); 660 661 rcu_read_lock(); 662 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 663 if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 664 continue; 665 666 if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 667 continue; 668 669 if (msk->pm.subflows < subflows_max) { 670 msk->pm.subflows++; 671 addrs[i] = entry->addr; 672 673 /* Special case for ID0: set the correct ID */ 674 if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) 675 addrs[i].id = 0; 676 677 i++; 678 } 679 } 680 rcu_read_unlock(); 681 682 /* If the array is empty, fill in the single 683 * 'IPADDRANY' local address 684 */ 685 if (!i) { 686 struct mptcp_addr_info local; 687 688 memset(&local, 0, sizeof(local)); 689 local.family = 690 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 691 remote->family == AF_INET6 && 692 ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : 693 #endif 694 remote->family; 695 696 if (!mptcp_pm_addr_families_match(sk, &local, remote)) 697 return 0; 698 699 msk->pm.subflows++; 700 addrs[i++] = local; 701 } 702 703 return i; 704 } 705 706 static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 707 { 708 struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 709 struct sock *sk = (struct sock *)msk; 710 unsigned int add_addr_accept_max; 711 struct mptcp_addr_info remote; 712 unsigned int subflows_max; 713 bool sf_created = false; 714 int i, nr; 715 716 add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); 717 subflows_max = mptcp_pm_get_subflows_max(msk); 718 719 pr_debug("accepted %d:%d remote family %d", 720 msk->pm.add_addr_accepted, add_addr_accept_max, 721 msk->pm.remote.family); 722 723 remote = msk->pm.remote; 724 mptcp_pm_announce_addr(msk, &remote, true); 725 mptcp_pm_nl_addr_send_ack(msk); 726 727 if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 728 return; 729 730 /* pick id 0 port, if none is provided the remote address */ 731 if (!remote.port) 732 remote.port = sk->sk_dport; 733 734 /* connect to the specified remote address, using whatever 735 * local address the routing configuration will pick. 736 */ 737 nr = fill_local_addresses_vec(msk, &remote, addrs); 738 if (nr == 0) 739 return; 740 741 spin_unlock_bh(&msk->pm.lock); 742 for (i = 0; i < nr; i++) 743 if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) 744 sf_created = true; 745 spin_lock_bh(&msk->pm.lock); 746 747 if (sf_created) { 748 msk->pm.add_addr_accepted++; 749 if (msk->pm.add_addr_accepted >= add_addr_accept_max || 750 msk->pm.subflows >= subflows_max) 751 WRITE_ONCE(msk->pm.accept_addr, false); 752 } 753 } 754 755 void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) 756 { 757 struct mptcp_subflow_context *subflow; 758 759 msk_owned_by_me(msk); 760 lockdep_assert_held(&msk->pm.lock); 761 762 if (!mptcp_pm_should_add_signal(msk) && 763 !mptcp_pm_should_rm_signal(msk)) 764 return; 765 766 subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); 767 if (subflow) 768 mptcp_pm_send_ack(msk, subflow, false, false); 769 } 770 771 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, 772 struct mptcp_addr_info *addr, 773 struct mptcp_addr_info *rem, 774 u8 bkup) 775 { 776 struct mptcp_subflow_context *subflow; 777 778 pr_debug("bkup=%d", bkup); 779 780 mptcp_for_each_subflow(msk, subflow) { 781 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 782 struct mptcp_addr_info local, remote; 783 784 mptcp_local_address((struct sock_common *)ssk, &local); 785 if (!mptcp_addresses_equal(&local, addr, addr->port)) 786 continue; 787 788 if (rem && rem->family != AF_UNSPEC) { 789 remote_address((struct sock_common *)ssk, &remote); 790 if (!mptcp_addresses_equal(&remote, rem, rem->port)) 791 continue; 792 } 793 794 __mptcp_pm_send_ack(msk, subflow, true, bkup); 795 return 0; 796 } 797 798 return -EINVAL; 799 } 800 801 static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id) 802 { 803 return local_id == id || (!local_id && msk->mpc_endpoint_id == id); 804 } 805 806 static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, 807 const struct mptcp_rm_list *rm_list, 808 enum linux_mptcp_mib_field rm_type) 809 { 810 struct mptcp_subflow_context *subflow, *tmp; 811 struct sock *sk = (struct sock *)msk; 812 u8 i; 813 814 pr_debug("%s rm_list_nr %d", 815 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); 816 817 msk_owned_by_me(msk); 818 819 if (sk->sk_state == TCP_LISTEN) 820 return; 821 822 if (!rm_list->nr) 823 return; 824 825 if (list_empty(&msk->conn_list)) 826 return; 827 828 for (i = 0; i < rm_list->nr; i++) { 829 u8 rm_id = rm_list->ids[i]; 830 bool removed = false; 831 832 mptcp_for_each_subflow_safe(msk, subflow, tmp) { 833 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 834 u8 remote_id = READ_ONCE(subflow->remote_id); 835 int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 836 u8 id = subflow_get_local_id(subflow); 837 838 if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) 839 continue; 840 if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) 841 continue; 842 843 pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", 844 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 845 i, rm_id, id, remote_id, msk->mpc_endpoint_id); 846 spin_unlock_bh(&msk->pm.lock); 847 mptcp_subflow_shutdown(sk, ssk, how); 848 849 /* the following takes care of updating the subflows counter */ 850 mptcp_close_ssk(sk, ssk, subflow); 851 spin_lock_bh(&msk->pm.lock); 852 853 removed |= subflow->request_join; 854 if (rm_type == MPTCP_MIB_RMSUBFLOW) 855 __MPTCP_INC_STATS(sock_net(sk), rm_type); 856 } 857 858 if (rm_type == MPTCP_MIB_RMADDR) 859 __MPTCP_INC_STATS(sock_net(sk), rm_type); 860 861 if (!removed) 862 continue; 863 864 if (!mptcp_pm_is_kernel(msk)) 865 continue; 866 867 if (rm_type == MPTCP_MIB_RMADDR && rm_id && 868 !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 869 /* Note: if the subflow has been closed before, this 870 * add_addr_accepted counter will not be decremented. 871 */ 872 if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) 873 WRITE_ONCE(msk->pm.accept_addr, true); 874 } 875 } 876 } 877 878 static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) 879 { 880 mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); 881 } 882 883 static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, 884 const struct mptcp_rm_list *rm_list) 885 { 886 mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); 887 } 888 889 void mptcp_pm_nl_work(struct mptcp_sock *msk) 890 { 891 struct mptcp_pm_data *pm = &msk->pm; 892 893 msk_owned_by_me(msk); 894 895 if (!(pm->status & MPTCP_PM_WORK_MASK)) 896 return; 897 898 spin_lock_bh(&msk->pm.lock); 899 900 pr_debug("msk=%p status=%x", msk, pm->status); 901 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 902 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 903 mptcp_pm_nl_add_addr_received(msk); 904 } 905 if (pm->status & BIT(MPTCP_PM_ADD_ADDR_SEND_ACK)) { 906 pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_SEND_ACK); 907 mptcp_pm_nl_addr_send_ack(msk); 908 } 909 if (pm->status & BIT(MPTCP_PM_RM_ADDR_RECEIVED)) { 910 pm->status &= ~BIT(MPTCP_PM_RM_ADDR_RECEIVED); 911 mptcp_pm_nl_rm_addr_received(msk); 912 } 913 if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 914 pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 915 mptcp_pm_nl_fully_established(msk); 916 } 917 if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 918 pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 919 mptcp_pm_nl_subflow_established(msk); 920 } 921 922 spin_unlock_bh(&msk->pm.lock); 923 } 924 925 static bool address_use_port(struct mptcp_pm_addr_entry *entry) 926 { 927 return (entry->flags & 928 (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 929 MPTCP_PM_ADDR_FLAG_SIGNAL; 930 } 931 932 /* caller must ensure the RCU grace period is already elapsed */ 933 static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 934 { 935 if (entry->lsk) 936 sock_release(entry->lsk); 937 kfree(entry); 938 } 939 940 static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 941 struct mptcp_pm_addr_entry *entry, 942 bool needs_id) 943 { 944 struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 945 unsigned int addr_max; 946 int ret = -EINVAL; 947 948 spin_lock_bh(&pernet->lock); 949 /* to keep the code simple, don't do IDR-like allocation for address ID, 950 * just bail when we exceed limits 951 */ 952 if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 953 pernet->next_id = 1; 954 if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { 955 ret = -ERANGE; 956 goto out; 957 } 958 if (test_bit(entry->addr.id, pernet->id_bitmap)) { 959 ret = -EBUSY; 960 goto out; 961 } 962 963 /* do not insert duplicate address, differentiate on port only 964 * singled addresses 965 */ 966 if (!address_use_port(entry)) 967 entry->addr.port = 0; 968 list_for_each_entry(cur, &pernet->local_addr_list, list) { 969 if (mptcp_addresses_equal(&cur->addr, &entry->addr, 970 cur->addr.port || entry->addr.port)) { 971 /* allow replacing the exiting endpoint only if such 972 * endpoint is an implicit one and the user-space 973 * did not provide an endpoint id 974 */ 975 if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { 976 ret = -EEXIST; 977 goto out; 978 } 979 if (entry->addr.id) 980 goto out; 981 982 pernet->addrs--; 983 entry->addr.id = cur->addr.id; 984 list_del_rcu(&cur->list); 985 del_entry = cur; 986 break; 987 } 988 } 989 990 if (!entry->addr.id && needs_id) { 991 find_next: 992 entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 993 MPTCP_PM_MAX_ADDR_ID + 1, 994 pernet->next_id); 995 if (!entry->addr.id && pernet->next_id != 1) { 996 pernet->next_id = 1; 997 goto find_next; 998 } 999 } 1000 1001 if (!entry->addr.id && needs_id) 1002 goto out; 1003 1004 __set_bit(entry->addr.id, pernet->id_bitmap); 1005 if (entry->addr.id > pernet->next_id) 1006 pernet->next_id = entry->addr.id; 1007 1008 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1009 addr_max = pernet->add_addr_signal_max; 1010 WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); 1011 } 1012 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1013 addr_max = pernet->local_addr_max; 1014 WRITE_ONCE(pernet->local_addr_max, addr_max + 1); 1015 } 1016 1017 pernet->addrs++; 1018 if (!entry->addr.port) 1019 list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 1020 else 1021 list_add_rcu(&entry->list, &pernet->local_addr_list); 1022 ret = entry->addr.id; 1023 1024 out: 1025 spin_unlock_bh(&pernet->lock); 1026 1027 /* just replaced an existing entry, free it */ 1028 if (del_entry) { 1029 synchronize_rcu(); 1030 __mptcp_pm_release_addr_entry(del_entry); 1031 } 1032 return ret; 1033 } 1034 1035 static struct lock_class_key mptcp_slock_keys[2]; 1036 static struct lock_class_key mptcp_keys[2]; 1037 1038 static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 1039 struct mptcp_pm_addr_entry *entry) 1040 { 1041 bool is_ipv6 = sk->sk_family == AF_INET6; 1042 int addrlen = sizeof(struct sockaddr_in); 1043 struct sockaddr_storage addr; 1044 struct sock *newsk, *ssk; 1045 int backlog = 1024; 1046 int err; 1047 1048 err = sock_create_kern(sock_net(sk), entry->addr.family, 1049 SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 1050 if (err) 1051 return err; 1052 1053 newsk = entry->lsk->sk; 1054 if (!newsk) 1055 return -EINVAL; 1056 1057 /* The subflow socket lock is acquired in a nested to the msk one 1058 * in several places, even by the TCP stack, and this msk is a kernel 1059 * socket: lockdep complains. Instead of propagating the _nested 1060 * modifiers in several places, re-init the lock class for the msk 1061 * socket to an mptcp specific one. 1062 */ 1063 sock_lock_init_class_and_name(newsk, 1064 is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", 1065 &mptcp_slock_keys[is_ipv6], 1066 is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", 1067 &mptcp_keys[is_ipv6]); 1068 1069 lock_sock(newsk); 1070 ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 1071 release_sock(newsk); 1072 if (IS_ERR(ssk)) 1073 return PTR_ERR(ssk); 1074 1075 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 1076 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1077 if (entry->addr.family == AF_INET6) 1078 addrlen = sizeof(struct sockaddr_in6); 1079 #endif 1080 if (ssk->sk_family == AF_INET) 1081 err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 1082 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1083 else if (ssk->sk_family == AF_INET6) 1084 err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 1085 #endif 1086 if (err) 1087 return err; 1088 1089 /* We don't use mptcp_set_state() here because it needs to be called 1090 * under the msk socket lock. For the moment, that will not bring 1091 * anything more than only calling inet_sk_state_store(), because the 1092 * old status is known (TCP_CLOSE). 1093 */ 1094 inet_sk_state_store(newsk, TCP_LISTEN); 1095 lock_sock(ssk); 1096 err = __inet_listen_sk(ssk, backlog); 1097 if (!err) 1098 mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 1099 release_sock(ssk); 1100 return err; 1101 } 1102 1103 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 1104 { 1105 struct mptcp_pm_addr_entry *entry; 1106 struct pm_nl_pernet *pernet; 1107 int ret = -1; 1108 1109 pernet = pm_nl_get_pernet_from_msk(msk); 1110 1111 rcu_read_lock(); 1112 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 1113 if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { 1114 ret = entry->addr.id; 1115 break; 1116 } 1117 } 1118 rcu_read_unlock(); 1119 if (ret >= 0) 1120 return ret; 1121 1122 /* address not found, add to local list */ 1123 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 1124 if (!entry) 1125 return -ENOMEM; 1126 1127 entry->addr = *skc; 1128 entry->addr.id = 0; 1129 entry->addr.port = 0; 1130 entry->ifindex = 0; 1131 entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT; 1132 entry->lsk = NULL; 1133 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true); 1134 if (ret < 0) 1135 kfree(entry); 1136 1137 return ret; 1138 } 1139 1140 bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 1141 { 1142 struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 1143 struct mptcp_pm_addr_entry *entry; 1144 bool backup = false; 1145 1146 rcu_read_lock(); 1147 list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 1148 if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { 1149 backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1150 break; 1151 } 1152 } 1153 rcu_read_unlock(); 1154 1155 return backup; 1156 } 1157 1158 #define MPTCP_PM_CMD_GRP_OFFSET 0 1159 #define MPTCP_PM_EV_GRP_OFFSET 1 1160 1161 static const struct genl_multicast_group mptcp_pm_mcgrps[] = { 1162 [MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, }, 1163 [MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME, 1164 .flags = GENL_UNS_ADMIN_PERM, 1165 }, 1166 }; 1167 1168 static const struct nla_policy 1169 mptcp_pm_addr_policy[MPTCP_PM_ADDR_ATTR_MAX + 1] = { 1170 [MPTCP_PM_ADDR_ATTR_FAMILY] = { .type = NLA_U16, }, 1171 [MPTCP_PM_ADDR_ATTR_ID] = { .type = NLA_U8, }, 1172 [MPTCP_PM_ADDR_ATTR_ADDR4] = { .type = NLA_U32, }, 1173 [MPTCP_PM_ADDR_ATTR_ADDR6] = 1174 NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), 1175 [MPTCP_PM_ADDR_ATTR_PORT] = { .type = NLA_U16 }, 1176 [MPTCP_PM_ADDR_ATTR_FLAGS] = { .type = NLA_U32 }, 1177 [MPTCP_PM_ADDR_ATTR_IF_IDX] = { .type = NLA_S32 }, 1178 }; 1179 1180 static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = { 1181 [MPTCP_PM_ATTR_ADDR] = 1182 NLA_POLICY_NESTED(mptcp_pm_addr_policy), 1183 [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, 1184 [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, 1185 [MPTCP_PM_ATTR_TOKEN] = { .type = NLA_U32, }, 1186 [MPTCP_PM_ATTR_LOC_ID] = { .type = NLA_U8, }, 1187 [MPTCP_PM_ATTR_ADDR_REMOTE] = 1188 NLA_POLICY_NESTED(mptcp_pm_addr_policy), 1189 }; 1190 1191 void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 1192 { 1193 struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); 1194 struct sock *sk = (struct sock *)msk; 1195 unsigned int active_max_loss_cnt; 1196 struct net *net = sock_net(sk); 1197 unsigned int stale_loss_cnt; 1198 bool slow; 1199 1200 stale_loss_cnt = mptcp_stale_loss_cnt(net); 1201 if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) 1202 return; 1203 1204 /* look for another available subflow not in loss state */ 1205 active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); 1206 mptcp_for_each_subflow(msk, iter) { 1207 if (iter != subflow && mptcp_subflow_active(iter) && 1208 iter->stale_count < active_max_loss_cnt) { 1209 /* we have some alternatives, try to mark this subflow as idle ...*/ 1210 slow = lock_sock_fast(ssk); 1211 if (!tcp_rtx_and_write_queues_empty(ssk)) { 1212 subflow->stale = 1; 1213 __mptcp_retransmit_pending_data(sk); 1214 MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); 1215 } 1216 unlock_sock_fast(ssk, slow); 1217 1218 /* always try to push the pending data regardless of re-injections: 1219 * we can possibly use backup subflows now, and subflow selection 1220 * is cheap under the msk socket lock 1221 */ 1222 __mptcp_push_pending(sk, 0); 1223 return; 1224 } 1225 } 1226 } 1227 1228 static int mptcp_pm_family_to_addr(int family) 1229 { 1230 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1231 if (family == AF_INET6) 1232 return MPTCP_PM_ADDR_ATTR_ADDR6; 1233 #endif 1234 return MPTCP_PM_ADDR_ATTR_ADDR4; 1235 } 1236 1237 static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], 1238 const struct nlattr *attr, 1239 struct genl_info *info, 1240 struct mptcp_addr_info *addr, 1241 bool require_family) 1242 { 1243 int err, addr_addr; 1244 1245 if (!attr) { 1246 GENL_SET_ERR_MSG(info, "missing address info"); 1247 return -EINVAL; 1248 } 1249 1250 /* no validation needed - was already done via nested policy */ 1251 err = nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 1252 mptcp_pm_addr_policy, info->extack); 1253 if (err) 1254 return err; 1255 1256 if (tb[MPTCP_PM_ADDR_ATTR_ID]) 1257 addr->id = nla_get_u8(tb[MPTCP_PM_ADDR_ATTR_ID]); 1258 1259 if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { 1260 if (!require_family) 1261 return 0; 1262 1263 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1264 "missing family"); 1265 return -EINVAL; 1266 } 1267 1268 addr->family = nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_FAMILY]); 1269 if (addr->family != AF_INET 1270 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1271 && addr->family != AF_INET6 1272 #endif 1273 ) { 1274 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1275 "unknown address family"); 1276 return -EINVAL; 1277 } 1278 addr_addr = mptcp_pm_family_to_addr(addr->family); 1279 if (!tb[addr_addr]) { 1280 NL_SET_ERR_MSG_ATTR(info->extack, attr, 1281 "missing address data"); 1282 return -EINVAL; 1283 } 1284 1285 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1286 if (addr->family == AF_INET6) 1287 addr->addr6 = nla_get_in6_addr(tb[addr_addr]); 1288 else 1289 #endif 1290 addr->addr.s_addr = nla_get_in_addr(tb[addr_addr]); 1291 1292 if (tb[MPTCP_PM_ADDR_ATTR_PORT]) 1293 addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); 1294 1295 return 0; 1296 } 1297 1298 int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, 1299 struct mptcp_addr_info *addr) 1300 { 1301 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 1302 1303 memset(addr, 0, sizeof(*addr)); 1304 1305 return mptcp_pm_parse_pm_addr_attr(tb, attr, info, addr, true); 1306 } 1307 1308 int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info, 1309 bool require_family, 1310 struct mptcp_pm_addr_entry *entry) 1311 { 1312 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 1313 int err; 1314 1315 memset(entry, 0, sizeof(*entry)); 1316 1317 err = mptcp_pm_parse_pm_addr_attr(tb, attr, info, &entry->addr, require_family); 1318 if (err) 1319 return err; 1320 1321 if (tb[MPTCP_PM_ADDR_ATTR_IF_IDX]) { 1322 u32 val = nla_get_s32(tb[MPTCP_PM_ADDR_ATTR_IF_IDX]); 1323 1324 entry->ifindex = val; 1325 } 1326 1327 if (tb[MPTCP_PM_ADDR_ATTR_FLAGS]) 1328 entry->flags = nla_get_u32(tb[MPTCP_PM_ADDR_ATTR_FLAGS]); 1329 1330 if (tb[MPTCP_PM_ADDR_ATTR_PORT]) 1331 entry->addr.port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); 1332 1333 return 0; 1334 } 1335 1336 static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 1337 { 1338 return pm_nl_get_pernet(genl_info_net(info)); 1339 } 1340 1341 static int mptcp_nl_add_subflow_or_signal_addr(struct net *net) 1342 { 1343 struct mptcp_sock *msk; 1344 long s_slot = 0, s_num = 0; 1345 1346 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1347 struct sock *sk = (struct sock *)msk; 1348 1349 if (!READ_ONCE(msk->fully_established) || 1350 mptcp_pm_is_userspace(msk)) 1351 goto next; 1352 1353 lock_sock(sk); 1354 spin_lock_bh(&msk->pm.lock); 1355 mptcp_pm_create_subflow_or_signal_addr(msk); 1356 spin_unlock_bh(&msk->pm.lock); 1357 release_sock(sk); 1358 1359 next: 1360 sock_put(sk); 1361 cond_resched(); 1362 } 1363 1364 return 0; 1365 } 1366 1367 static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 1368 struct genl_info *info) 1369 { 1370 struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 1371 1372 if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 1373 mptcp_pm_addr_policy, info->extack) && 1374 tb[MPTCP_PM_ADDR_ATTR_ID]) 1375 return true; 1376 return false; 1377 } 1378 1379 static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) 1380 { 1381 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1382 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1383 struct mptcp_pm_addr_entry addr, *entry; 1384 int ret; 1385 1386 ret = mptcp_pm_parse_entry(attr, info, true, &addr); 1387 if (ret < 0) 1388 return ret; 1389 1390 if (addr.addr.port && !address_use_port(&addr)) { 1391 GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); 1392 return -EINVAL; 1393 } 1394 1395 if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 1396 addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 1397 GENL_SET_ERR_MSG(info, "flags mustn't have both signal and fullmesh"); 1398 return -EINVAL; 1399 } 1400 1401 if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 1402 GENL_SET_ERR_MSG(info, "can't create IMPLICIT endpoint"); 1403 return -EINVAL; 1404 } 1405 1406 entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); 1407 if (!entry) { 1408 GENL_SET_ERR_MSG(info, "can't allocate addr"); 1409 return -ENOMEM; 1410 } 1411 1412 *entry = addr; 1413 if (entry->addr.port) { 1414 ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 1415 if (ret) { 1416 GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); 1417 goto out_free; 1418 } 1419 } 1420 ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 1421 !mptcp_pm_has_addr_attr_id(attr, info)); 1422 if (ret < 0) { 1423 GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 1424 goto out_free; 1425 } 1426 1427 mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk)); 1428 return 0; 1429 1430 out_free: 1431 __mptcp_pm_release_addr_entry(entry); 1432 return ret; 1433 } 1434 1435 int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, 1436 u8 *flags, int *ifindex) 1437 { 1438 struct mptcp_pm_addr_entry *entry; 1439 struct sock *sk = (struct sock *)msk; 1440 struct net *net = sock_net(sk); 1441 1442 /* No entries with ID 0 */ 1443 if (id == 0) 1444 return 0; 1445 1446 rcu_read_lock(); 1447 entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); 1448 if (entry) { 1449 *flags = entry->flags; 1450 *ifindex = entry->ifindex; 1451 } 1452 rcu_read_unlock(); 1453 1454 return 0; 1455 } 1456 1457 static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, 1458 const struct mptcp_addr_info *addr) 1459 { 1460 struct mptcp_pm_add_entry *entry; 1461 1462 entry = mptcp_pm_del_add_timer(msk, addr, false); 1463 if (entry) { 1464 list_del(&entry->list); 1465 kfree(entry); 1466 return true; 1467 } 1468 1469 return false; 1470 } 1471 1472 static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 1473 const struct mptcp_addr_info *addr, 1474 bool force) 1475 { 1476 struct mptcp_rm_list list = { .nr = 0 }; 1477 bool ret; 1478 1479 list.ids[list.nr++] = addr->id; 1480 1481 ret = remove_anno_list_by_saddr(msk, addr); 1482 if (ret || force) { 1483 spin_lock_bh(&msk->pm.lock); 1484 if (ret) { 1485 __set_bit(addr->id, msk->pm.id_avail_bitmap); 1486 msk->pm.add_addr_signaled--; 1487 } 1488 mptcp_pm_remove_addr(msk, &list); 1489 spin_unlock_bh(&msk->pm.lock); 1490 } 1491 return ret; 1492 } 1493 1494 static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) 1495 { 1496 /* If it was marked as used, and not ID 0, decrement local_addr_used */ 1497 if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && 1498 id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) 1499 msk->pm.local_addr_used--; 1500 } 1501 1502 static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 1503 const struct mptcp_pm_addr_entry *entry) 1504 { 1505 const struct mptcp_addr_info *addr = &entry->addr; 1506 struct mptcp_rm_list list = { .nr = 0 }; 1507 long s_slot = 0, s_num = 0; 1508 struct mptcp_sock *msk; 1509 1510 pr_debug("remove_id=%d", addr->id); 1511 1512 list.ids[list.nr++] = addr->id; 1513 1514 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1515 struct sock *sk = (struct sock *)msk; 1516 bool remove_subflow; 1517 1518 if (mptcp_pm_is_userspace(msk)) 1519 goto next; 1520 1521 if (list_empty(&msk->conn_list)) { 1522 mptcp_pm_remove_anno_addr(msk, addr, false); 1523 goto next; 1524 } 1525 1526 lock_sock(sk); 1527 remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); 1528 mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 1529 !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 1530 1531 if (remove_subflow) { 1532 spin_lock_bh(&msk->pm.lock); 1533 mptcp_pm_nl_rm_subflow_received(msk, &list); 1534 spin_unlock_bh(&msk->pm.lock); 1535 } 1536 1537 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1538 spin_lock_bh(&msk->pm.lock); 1539 __mark_subflow_endp_available(msk, list.ids[0]); 1540 spin_unlock_bh(&msk->pm.lock); 1541 } 1542 1543 release_sock(sk); 1544 1545 next: 1546 sock_put(sk); 1547 cond_resched(); 1548 } 1549 1550 return 0; 1551 } 1552 1553 static int mptcp_nl_remove_id_zero_address(struct net *net, 1554 struct mptcp_addr_info *addr) 1555 { 1556 struct mptcp_rm_list list = { .nr = 0 }; 1557 long s_slot = 0, s_num = 0; 1558 struct mptcp_sock *msk; 1559 1560 list.ids[list.nr++] = 0; 1561 1562 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1563 struct sock *sk = (struct sock *)msk; 1564 struct mptcp_addr_info msk_local; 1565 1566 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1567 goto next; 1568 1569 mptcp_local_address((struct sock_common *)msk, &msk_local); 1570 if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 1571 goto next; 1572 1573 lock_sock(sk); 1574 spin_lock_bh(&msk->pm.lock); 1575 mptcp_pm_remove_addr(msk, &list); 1576 mptcp_pm_nl_rm_subflow_received(msk, &list); 1577 __mark_subflow_endp_available(msk, 0); 1578 spin_unlock_bh(&msk->pm.lock); 1579 release_sock(sk); 1580 1581 next: 1582 sock_put(sk); 1583 cond_resched(); 1584 } 1585 1586 return 0; 1587 } 1588 1589 static int mptcp_nl_cmd_del_addr(struct sk_buff *skb, struct genl_info *info) 1590 { 1591 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1592 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1593 struct mptcp_pm_addr_entry addr, *entry; 1594 unsigned int addr_max; 1595 int ret; 1596 1597 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1598 if (ret < 0) 1599 return ret; 1600 1601 /* the zero id address is special: the first address used by the msk 1602 * always gets such an id, so different subflows can have different zero 1603 * id addresses. Additionally zero id is not accounted for in id_bitmap. 1604 * Let's use an 'mptcp_rm_list' instead of the common remove code. 1605 */ 1606 if (addr.addr.id == 0) 1607 return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 1608 1609 spin_lock_bh(&pernet->lock); 1610 entry = __lookup_addr_by_id(pernet, addr.addr.id); 1611 if (!entry) { 1612 GENL_SET_ERR_MSG(info, "address not found"); 1613 spin_unlock_bh(&pernet->lock); 1614 return -EINVAL; 1615 } 1616 if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1617 addr_max = pernet->add_addr_signal_max; 1618 WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); 1619 } 1620 if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1621 addr_max = pernet->local_addr_max; 1622 WRITE_ONCE(pernet->local_addr_max, addr_max - 1); 1623 } 1624 1625 pernet->addrs--; 1626 list_del_rcu(&entry->list); 1627 __clear_bit(entry->addr.id, pernet->id_bitmap); 1628 spin_unlock_bh(&pernet->lock); 1629 1630 mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 1631 synchronize_rcu(); 1632 __mptcp_pm_release_addr_entry(entry); 1633 1634 return ret; 1635 } 1636 1637 void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) 1638 { 1639 struct mptcp_rm_list alist = { .nr = 0 }; 1640 struct mptcp_pm_addr_entry *entry; 1641 int anno_nr = 0; 1642 1643 list_for_each_entry(entry, rm_list, list) { 1644 if (alist.nr >= MPTCP_RM_IDS_MAX) 1645 break; 1646 1647 /* only delete if either announced or matching a subflow */ 1648 if (remove_anno_list_by_saddr(msk, &entry->addr)) 1649 anno_nr++; 1650 else if (!lookup_subflow_by_saddr(&msk->conn_list, 1651 &entry->addr)) 1652 continue; 1653 1654 alist.ids[alist.nr++] = entry->addr.id; 1655 } 1656 1657 if (alist.nr) { 1658 spin_lock_bh(&msk->pm.lock); 1659 msk->pm.add_addr_signaled -= anno_nr; 1660 mptcp_pm_remove_addr(msk, &alist); 1661 spin_unlock_bh(&msk->pm.lock); 1662 } 1663 } 1664 1665 void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, 1666 struct list_head *rm_list) 1667 { 1668 struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 1669 struct mptcp_pm_addr_entry *entry; 1670 1671 list_for_each_entry(entry, rm_list, list) { 1672 if (slist.nr < MPTCP_RM_IDS_MAX && 1673 lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) 1674 slist.ids[slist.nr++] = entry->addr.id; 1675 1676 if (alist.nr < MPTCP_RM_IDS_MAX && 1677 remove_anno_list_by_saddr(msk, &entry->addr)) 1678 alist.ids[alist.nr++] = entry->addr.id; 1679 } 1680 1681 spin_lock_bh(&msk->pm.lock); 1682 if (alist.nr) { 1683 msk->pm.add_addr_signaled -= alist.nr; 1684 mptcp_pm_remove_addr(msk, &alist); 1685 } 1686 if (slist.nr) 1687 mptcp_pm_nl_rm_subflow_received(msk, &slist); 1688 /* Reset counters: maybe some subflows have been removed before */ 1689 bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1690 msk->pm.local_addr_used = 0; 1691 spin_unlock_bh(&msk->pm.lock); 1692 } 1693 1694 static void mptcp_nl_remove_addrs_list(struct net *net, 1695 struct list_head *rm_list) 1696 { 1697 long s_slot = 0, s_num = 0; 1698 struct mptcp_sock *msk; 1699 1700 if (list_empty(rm_list)) 1701 return; 1702 1703 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1704 struct sock *sk = (struct sock *)msk; 1705 1706 if (!mptcp_pm_is_userspace(msk)) { 1707 lock_sock(sk); 1708 mptcp_pm_remove_addrs_and_subflows(msk, rm_list); 1709 release_sock(sk); 1710 } 1711 1712 sock_put(sk); 1713 cond_resched(); 1714 } 1715 } 1716 1717 /* caller must ensure the RCU grace period is already elapsed */ 1718 static void __flush_addrs(struct list_head *list) 1719 { 1720 while (!list_empty(list)) { 1721 struct mptcp_pm_addr_entry *cur; 1722 1723 cur = list_entry(list->next, 1724 struct mptcp_pm_addr_entry, list); 1725 list_del_rcu(&cur->list); 1726 __mptcp_pm_release_addr_entry(cur); 1727 } 1728 } 1729 1730 static void __reset_counters(struct pm_nl_pernet *pernet) 1731 { 1732 WRITE_ONCE(pernet->add_addr_signal_max, 0); 1733 WRITE_ONCE(pernet->add_addr_accept_max, 0); 1734 WRITE_ONCE(pernet->local_addr_max, 0); 1735 pernet->addrs = 0; 1736 } 1737 1738 static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info) 1739 { 1740 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1741 LIST_HEAD(free_list); 1742 1743 spin_lock_bh(&pernet->lock); 1744 list_splice_init(&pernet->local_addr_list, &free_list); 1745 __reset_counters(pernet); 1746 pernet->next_id = 1; 1747 bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1748 spin_unlock_bh(&pernet->lock); 1749 mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list); 1750 synchronize_rcu(); 1751 __flush_addrs(&free_list); 1752 return 0; 1753 } 1754 1755 static int mptcp_nl_fill_addr(struct sk_buff *skb, 1756 struct mptcp_pm_addr_entry *entry) 1757 { 1758 struct mptcp_addr_info *addr = &entry->addr; 1759 struct nlattr *attr; 1760 1761 attr = nla_nest_start(skb, MPTCP_PM_ATTR_ADDR); 1762 if (!attr) 1763 return -EMSGSIZE; 1764 1765 if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_FAMILY, addr->family)) 1766 goto nla_put_failure; 1767 if (nla_put_u16(skb, MPTCP_PM_ADDR_ATTR_PORT, ntohs(addr->port))) 1768 goto nla_put_failure; 1769 if (nla_put_u8(skb, MPTCP_PM_ADDR_ATTR_ID, addr->id)) 1770 goto nla_put_failure; 1771 if (nla_put_u32(skb, MPTCP_PM_ADDR_ATTR_FLAGS, entry->flags)) 1772 goto nla_put_failure; 1773 if (entry->ifindex && 1774 nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) 1775 goto nla_put_failure; 1776 1777 if (addr->family == AF_INET && 1778 nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, 1779 addr->addr.s_addr)) 1780 goto nla_put_failure; 1781 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1782 else if (addr->family == AF_INET6 && 1783 nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) 1784 goto nla_put_failure; 1785 #endif 1786 nla_nest_end(skb, attr); 1787 return 0; 1788 1789 nla_put_failure: 1790 nla_nest_cancel(skb, attr); 1791 return -EMSGSIZE; 1792 } 1793 1794 static int mptcp_nl_cmd_get_addr(struct sk_buff *skb, struct genl_info *info) 1795 { 1796 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1797 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1798 struct mptcp_pm_addr_entry addr, *entry; 1799 struct sk_buff *msg; 1800 void *reply; 1801 int ret; 1802 1803 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 1804 if (ret < 0) 1805 return ret; 1806 1807 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1808 if (!msg) 1809 return -ENOMEM; 1810 1811 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1812 info->genlhdr->cmd); 1813 if (!reply) { 1814 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1815 ret = -EMSGSIZE; 1816 goto fail; 1817 } 1818 1819 spin_lock_bh(&pernet->lock); 1820 entry = __lookup_addr_by_id(pernet, addr.addr.id); 1821 if (!entry) { 1822 GENL_SET_ERR_MSG(info, "address not found"); 1823 ret = -EINVAL; 1824 goto unlock_fail; 1825 } 1826 1827 ret = mptcp_nl_fill_addr(msg, entry); 1828 if (ret) 1829 goto unlock_fail; 1830 1831 genlmsg_end(msg, reply); 1832 ret = genlmsg_reply(msg, info); 1833 spin_unlock_bh(&pernet->lock); 1834 return ret; 1835 1836 unlock_fail: 1837 spin_unlock_bh(&pernet->lock); 1838 1839 fail: 1840 nlmsg_free(msg); 1841 return ret; 1842 } 1843 1844 static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg, 1845 struct netlink_callback *cb) 1846 { 1847 struct net *net = sock_net(msg->sk); 1848 struct mptcp_pm_addr_entry *entry; 1849 struct pm_nl_pernet *pernet; 1850 int id = cb->args[0]; 1851 void *hdr; 1852 int i; 1853 1854 pernet = pm_nl_get_pernet(net); 1855 1856 spin_lock_bh(&pernet->lock); 1857 for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 1858 if (test_bit(i, pernet->id_bitmap)) { 1859 entry = __lookup_addr_by_id(pernet, i); 1860 if (!entry) 1861 break; 1862 1863 if (entry->addr.id <= id) 1864 continue; 1865 1866 hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, 1867 cb->nlh->nlmsg_seq, &mptcp_genl_family, 1868 NLM_F_MULTI, MPTCP_PM_CMD_GET_ADDR); 1869 if (!hdr) 1870 break; 1871 1872 if (mptcp_nl_fill_addr(msg, entry) < 0) { 1873 genlmsg_cancel(msg, hdr); 1874 break; 1875 } 1876 1877 id = entry->addr.id; 1878 genlmsg_end(msg, hdr); 1879 } 1880 } 1881 spin_unlock_bh(&pernet->lock); 1882 1883 cb->args[0] = id; 1884 return msg->len; 1885 } 1886 1887 static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 1888 { 1889 struct nlattr *attr = info->attrs[id]; 1890 1891 if (!attr) 1892 return 0; 1893 1894 *limit = nla_get_u32(attr); 1895 if (*limit > MPTCP_PM_ADDR_MAX) { 1896 GENL_SET_ERR_MSG(info, "limit greater than maximum"); 1897 return -EINVAL; 1898 } 1899 return 0; 1900 } 1901 1902 static int 1903 mptcp_nl_cmd_set_limits(struct sk_buff *skb, struct genl_info *info) 1904 { 1905 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1906 unsigned int rcv_addrs, subflows; 1907 int ret; 1908 1909 spin_lock_bh(&pernet->lock); 1910 rcv_addrs = pernet->add_addr_accept_max; 1911 ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 1912 if (ret) 1913 goto unlock; 1914 1915 subflows = pernet->subflows_max; 1916 ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 1917 if (ret) 1918 goto unlock; 1919 1920 WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); 1921 WRITE_ONCE(pernet->subflows_max, subflows); 1922 1923 unlock: 1924 spin_unlock_bh(&pernet->lock); 1925 return ret; 1926 } 1927 1928 static int 1929 mptcp_nl_cmd_get_limits(struct sk_buff *skb, struct genl_info *info) 1930 { 1931 struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1932 struct sk_buff *msg; 1933 void *reply; 1934 1935 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1936 if (!msg) 1937 return -ENOMEM; 1938 1939 reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1940 MPTCP_PM_CMD_GET_LIMITS); 1941 if (!reply) 1942 goto fail; 1943 1944 if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 1945 READ_ONCE(pernet->add_addr_accept_max))) 1946 goto fail; 1947 1948 if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 1949 READ_ONCE(pernet->subflows_max))) 1950 goto fail; 1951 1952 genlmsg_end(msg, reply); 1953 return genlmsg_reply(msg, info); 1954 1955 fail: 1956 GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1957 nlmsg_free(msg); 1958 return -EMSGSIZE; 1959 } 1960 1961 static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 1962 struct mptcp_addr_info *addr) 1963 { 1964 struct mptcp_rm_list list = { .nr = 0 }; 1965 1966 list.ids[list.nr++] = addr->id; 1967 1968 spin_lock_bh(&msk->pm.lock); 1969 mptcp_pm_nl_rm_subflow_received(msk, &list); 1970 __mark_subflow_endp_available(msk, list.ids[0]); 1971 mptcp_pm_create_subflow_or_signal_addr(msk); 1972 spin_unlock_bh(&msk->pm.lock); 1973 } 1974 1975 static int mptcp_nl_set_flags(struct net *net, 1976 struct mptcp_addr_info *addr, 1977 u8 bkup, u8 changed) 1978 { 1979 long s_slot = 0, s_num = 0; 1980 struct mptcp_sock *msk; 1981 int ret = -EINVAL; 1982 1983 while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1984 struct sock *sk = (struct sock *)msk; 1985 1986 if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1987 goto next; 1988 1989 lock_sock(sk); 1990 if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 1991 ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup); 1992 if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) 1993 mptcp_pm_nl_fullmesh(msk, addr); 1994 release_sock(sk); 1995 1996 next: 1997 sock_put(sk); 1998 cond_resched(); 1999 } 2000 2001 return ret; 2002 } 2003 2004 int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup) 2005 { 2006 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 2007 u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 2008 MPTCP_PM_ADDR_FLAG_FULLMESH; 2009 struct mptcp_pm_addr_entry *entry; 2010 u8 lookup_by_id = 0; 2011 2012 if (addr->addr.family == AF_UNSPEC) { 2013 lookup_by_id = 1; 2014 if (!addr->addr.id) 2015 return -EOPNOTSUPP; 2016 } 2017 2018 spin_lock_bh(&pernet->lock); 2019 entry = __lookup_addr(pernet, &addr->addr, lookup_by_id); 2020 if (!entry) { 2021 spin_unlock_bh(&pernet->lock); 2022 return -EINVAL; 2023 } 2024 if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 2025 (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { 2026 spin_unlock_bh(&pernet->lock); 2027 return -EINVAL; 2028 } 2029 2030 changed = (addr->flags ^ entry->flags) & mask; 2031 entry->flags = (entry->flags & ~mask) | (addr->flags & mask); 2032 *addr = *entry; 2033 spin_unlock_bh(&pernet->lock); 2034 2035 mptcp_nl_set_flags(net, &addr->addr, bkup, changed); 2036 return 0; 2037 } 2038 2039 static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) 2040 { 2041 struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, }; 2042 struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }; 2043 struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; 2044 struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; 2045 struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 2046 struct net *net = sock_net(skb->sk); 2047 u8 bkup = 0; 2048 int ret; 2049 2050 ret = mptcp_pm_parse_entry(attr, info, false, &addr); 2051 if (ret < 0) 2052 return ret; 2053 2054 if (attr_rem) { 2055 ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote); 2056 if (ret < 0) 2057 return ret; 2058 } 2059 2060 if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP) 2061 bkup = 1; 2062 2063 return mptcp_pm_set_flags(net, token, &addr, &remote, bkup); 2064 } 2065 2066 static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) 2067 { 2068 genlmsg_multicast_netns(&mptcp_genl_family, net, 2069 nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); 2070 } 2071 2072 bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) 2073 { 2074 return genl_has_listeners(&mptcp_genl_family, 2075 sock_net((const struct sock *)msk), 2076 MPTCP_PM_EV_GRP_OFFSET); 2077 } 2078 2079 static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) 2080 { 2081 const struct inet_sock *issk = inet_sk(ssk); 2082 const struct mptcp_subflow_context *sf; 2083 2084 if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) 2085 return -EMSGSIZE; 2086 2087 switch (ssk->sk_family) { 2088 case AF_INET: 2089 if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) 2090 return -EMSGSIZE; 2091 if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, issk->inet_daddr)) 2092 return -EMSGSIZE; 2093 break; 2094 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 2095 case AF_INET6: { 2096 const struct ipv6_pinfo *np = inet6_sk(ssk); 2097 2098 if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) 2099 return -EMSGSIZE; 2100 if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &ssk->sk_v6_daddr)) 2101 return -EMSGSIZE; 2102 break; 2103 } 2104 #endif 2105 default: 2106 WARN_ON_ONCE(1); 2107 return -EMSGSIZE; 2108 } 2109 2110 if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) 2111 return -EMSGSIZE; 2112 if (nla_put_be16(skb, MPTCP_ATTR_DPORT, issk->inet_dport)) 2113 return -EMSGSIZE; 2114 2115 sf = mptcp_subflow_ctx(ssk); 2116 if (WARN_ON_ONCE(!sf)) 2117 return -EINVAL; 2118 2119 if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf))) 2120 return -EMSGSIZE; 2121 2122 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id)) 2123 return -EMSGSIZE; 2124 2125 return 0; 2126 } 2127 2128 static int mptcp_event_put_token_and_ssk(struct sk_buff *skb, 2129 const struct mptcp_sock *msk, 2130 const struct sock *ssk) 2131 { 2132 const struct sock *sk = (const struct sock *)msk; 2133 const struct mptcp_subflow_context *sf; 2134 u8 sk_err; 2135 2136 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 2137 return -EMSGSIZE; 2138 2139 if (mptcp_event_add_subflow(skb, ssk)) 2140 return -EMSGSIZE; 2141 2142 sf = mptcp_subflow_ctx(ssk); 2143 if (WARN_ON_ONCE(!sf)) 2144 return -EINVAL; 2145 2146 if (nla_put_u8(skb, MPTCP_ATTR_BACKUP, sf->backup)) 2147 return -EMSGSIZE; 2148 2149 if (ssk->sk_bound_dev_if && 2150 nla_put_s32(skb, MPTCP_ATTR_IF_IDX, ssk->sk_bound_dev_if)) 2151 return -EMSGSIZE; 2152 2153 sk_err = READ_ONCE(ssk->sk_err); 2154 if (sk_err && sk->sk_state == TCP_ESTABLISHED && 2155 nla_put_u8(skb, MPTCP_ATTR_ERROR, sk_err)) 2156 return -EMSGSIZE; 2157 2158 return 0; 2159 } 2160 2161 static int mptcp_event_sub_established(struct sk_buff *skb, 2162 const struct mptcp_sock *msk, 2163 const struct sock *ssk) 2164 { 2165 return mptcp_event_put_token_and_ssk(skb, msk, ssk); 2166 } 2167 2168 static int mptcp_event_sub_closed(struct sk_buff *skb, 2169 const struct mptcp_sock *msk, 2170 const struct sock *ssk) 2171 { 2172 const struct mptcp_subflow_context *sf; 2173 2174 if (mptcp_event_put_token_and_ssk(skb, msk, ssk)) 2175 return -EMSGSIZE; 2176 2177 sf = mptcp_subflow_ctx(ssk); 2178 if (!sf->reset_seen) 2179 return 0; 2180 2181 if (nla_put_u32(skb, MPTCP_ATTR_RESET_REASON, sf->reset_reason)) 2182 return -EMSGSIZE; 2183 2184 if (nla_put_u32(skb, MPTCP_ATTR_RESET_FLAGS, sf->reset_transient)) 2185 return -EMSGSIZE; 2186 2187 return 0; 2188 } 2189 2190 static int mptcp_event_created(struct sk_buff *skb, 2191 const struct mptcp_sock *msk, 2192 const struct sock *ssk) 2193 { 2194 int err = nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token); 2195 2196 if (err) 2197 return err; 2198 2199 if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) 2200 return -EMSGSIZE; 2201 2202 return mptcp_event_add_subflow(skb, ssk); 2203 } 2204 2205 void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) 2206 { 2207 struct net *net = sock_net((const struct sock *)msk); 2208 struct nlmsghdr *nlh; 2209 struct sk_buff *skb; 2210 2211 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2212 return; 2213 2214 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 2215 if (!skb) 2216 return; 2217 2218 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, MPTCP_EVENT_REMOVED); 2219 if (!nlh) 2220 goto nla_put_failure; 2221 2222 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 2223 goto nla_put_failure; 2224 2225 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, id)) 2226 goto nla_put_failure; 2227 2228 genlmsg_end(skb, nlh); 2229 mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); 2230 return; 2231 2232 nla_put_failure: 2233 nlmsg_free(skb); 2234 } 2235 2236 void mptcp_event_addr_announced(const struct sock *ssk, 2237 const struct mptcp_addr_info *info) 2238 { 2239 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 2240 struct mptcp_sock *msk = mptcp_sk(subflow->conn); 2241 struct net *net = sock_net(ssk); 2242 struct nlmsghdr *nlh; 2243 struct sk_buff *skb; 2244 2245 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2246 return; 2247 2248 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 2249 if (!skb) 2250 return; 2251 2252 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, 2253 MPTCP_EVENT_ANNOUNCED); 2254 if (!nlh) 2255 goto nla_put_failure; 2256 2257 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token)) 2258 goto nla_put_failure; 2259 2260 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) 2261 goto nla_put_failure; 2262 2263 if (nla_put_be16(skb, MPTCP_ATTR_DPORT, 2264 info->port == 0 ? 2265 inet_sk(ssk)->inet_dport : 2266 info->port)) 2267 goto nla_put_failure; 2268 2269 switch (info->family) { 2270 case AF_INET: 2271 if (nla_put_in_addr(skb, MPTCP_ATTR_DADDR4, info->addr.s_addr)) 2272 goto nla_put_failure; 2273 break; 2274 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 2275 case AF_INET6: 2276 if (nla_put_in6_addr(skb, MPTCP_ATTR_DADDR6, &info->addr6)) 2277 goto nla_put_failure; 2278 break; 2279 #endif 2280 default: 2281 WARN_ON_ONCE(1); 2282 goto nla_put_failure; 2283 } 2284 2285 genlmsg_end(skb, nlh); 2286 mptcp_nl_mcast_send(net, skb, GFP_ATOMIC); 2287 return; 2288 2289 nla_put_failure: 2290 nlmsg_free(skb); 2291 } 2292 2293 void mptcp_event_pm_listener(const struct sock *ssk, 2294 enum mptcp_event_type event) 2295 { 2296 const struct inet_sock *issk = inet_sk(ssk); 2297 struct net *net = sock_net(ssk); 2298 struct nlmsghdr *nlh; 2299 struct sk_buff *skb; 2300 2301 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2302 return; 2303 2304 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2305 if (!skb) 2306 return; 2307 2308 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, event); 2309 if (!nlh) 2310 goto nla_put_failure; 2311 2312 if (nla_put_u16(skb, MPTCP_ATTR_FAMILY, ssk->sk_family)) 2313 goto nla_put_failure; 2314 2315 if (nla_put_be16(skb, MPTCP_ATTR_SPORT, issk->inet_sport)) 2316 goto nla_put_failure; 2317 2318 switch (ssk->sk_family) { 2319 case AF_INET: 2320 if (nla_put_in_addr(skb, MPTCP_ATTR_SADDR4, issk->inet_saddr)) 2321 goto nla_put_failure; 2322 break; 2323 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 2324 case AF_INET6: { 2325 const struct ipv6_pinfo *np = inet6_sk(ssk); 2326 2327 if (nla_put_in6_addr(skb, MPTCP_ATTR_SADDR6, &np->saddr)) 2328 goto nla_put_failure; 2329 break; 2330 } 2331 #endif 2332 default: 2333 WARN_ON_ONCE(1); 2334 goto nla_put_failure; 2335 } 2336 2337 genlmsg_end(skb, nlh); 2338 mptcp_nl_mcast_send(net, skb, GFP_KERNEL); 2339 return; 2340 2341 nla_put_failure: 2342 nlmsg_free(skb); 2343 } 2344 2345 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, 2346 const struct sock *ssk, gfp_t gfp) 2347 { 2348 struct net *net = sock_net((const struct sock *)msk); 2349 struct nlmsghdr *nlh; 2350 struct sk_buff *skb; 2351 2352 if (!genl_has_listeners(&mptcp_genl_family, net, MPTCP_PM_EV_GRP_OFFSET)) 2353 return; 2354 2355 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); 2356 if (!skb) 2357 return; 2358 2359 nlh = genlmsg_put(skb, 0, 0, &mptcp_genl_family, 0, type); 2360 if (!nlh) 2361 goto nla_put_failure; 2362 2363 switch (type) { 2364 case MPTCP_EVENT_UNSPEC: 2365 WARN_ON_ONCE(1); 2366 break; 2367 case MPTCP_EVENT_CREATED: 2368 case MPTCP_EVENT_ESTABLISHED: 2369 if (mptcp_event_created(skb, msk, ssk) < 0) 2370 goto nla_put_failure; 2371 break; 2372 case MPTCP_EVENT_CLOSED: 2373 if (nla_put_u32(skb, MPTCP_ATTR_TOKEN, msk->token) < 0) 2374 goto nla_put_failure; 2375 break; 2376 case MPTCP_EVENT_ANNOUNCED: 2377 case MPTCP_EVENT_REMOVED: 2378 /* call mptcp_event_addr_announced()/removed instead */ 2379 WARN_ON_ONCE(1); 2380 break; 2381 case MPTCP_EVENT_SUB_ESTABLISHED: 2382 case MPTCP_EVENT_SUB_PRIORITY: 2383 if (mptcp_event_sub_established(skb, msk, ssk) < 0) 2384 goto nla_put_failure; 2385 break; 2386 case MPTCP_EVENT_SUB_CLOSED: 2387 if (mptcp_event_sub_closed(skb, msk, ssk) < 0) 2388 goto nla_put_failure; 2389 break; 2390 case MPTCP_EVENT_LISTENER_CREATED: 2391 case MPTCP_EVENT_LISTENER_CLOSED: 2392 break; 2393 } 2394 2395 genlmsg_end(skb, nlh); 2396 mptcp_nl_mcast_send(net, skb, gfp); 2397 return; 2398 2399 nla_put_failure: 2400 nlmsg_free(skb); 2401 } 2402 2403 static const struct genl_small_ops mptcp_pm_ops[] = { 2404 { 2405 .cmd = MPTCP_PM_CMD_ADD_ADDR, 2406 .doit = mptcp_nl_cmd_add_addr, 2407 .flags = GENL_UNS_ADMIN_PERM, 2408 }, 2409 { 2410 .cmd = MPTCP_PM_CMD_DEL_ADDR, 2411 .doit = mptcp_nl_cmd_del_addr, 2412 .flags = GENL_UNS_ADMIN_PERM, 2413 }, 2414 { 2415 .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, 2416 .doit = mptcp_nl_cmd_flush_addrs, 2417 .flags = GENL_UNS_ADMIN_PERM, 2418 }, 2419 { 2420 .cmd = MPTCP_PM_CMD_GET_ADDR, 2421 .doit = mptcp_nl_cmd_get_addr, 2422 .dumpit = mptcp_nl_cmd_dump_addrs, 2423 }, 2424 { 2425 .cmd = MPTCP_PM_CMD_SET_LIMITS, 2426 .doit = mptcp_nl_cmd_set_limits, 2427 .flags = GENL_UNS_ADMIN_PERM, 2428 }, 2429 { 2430 .cmd = MPTCP_PM_CMD_GET_LIMITS, 2431 .doit = mptcp_nl_cmd_get_limits, 2432 }, 2433 { 2434 .cmd = MPTCP_PM_CMD_SET_FLAGS, 2435 .doit = mptcp_nl_cmd_set_flags, 2436 .flags = GENL_UNS_ADMIN_PERM, 2437 }, 2438 { 2439 .cmd = MPTCP_PM_CMD_ANNOUNCE, 2440 .doit = mptcp_nl_cmd_announce, 2441 .flags = GENL_UNS_ADMIN_PERM, 2442 }, 2443 { 2444 .cmd = MPTCP_PM_CMD_REMOVE, 2445 .doit = mptcp_nl_cmd_remove, 2446 .flags = GENL_UNS_ADMIN_PERM, 2447 }, 2448 { 2449 .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, 2450 .doit = mptcp_nl_cmd_sf_create, 2451 .flags = GENL_UNS_ADMIN_PERM, 2452 }, 2453 { 2454 .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, 2455 .doit = mptcp_nl_cmd_sf_destroy, 2456 .flags = GENL_UNS_ADMIN_PERM, 2457 }, 2458 }; 2459 2460 static struct genl_family mptcp_genl_family __ro_after_init = { 2461 .name = MPTCP_PM_NAME, 2462 .version = MPTCP_PM_VER, 2463 .maxattr = MPTCP_PM_ATTR_MAX, 2464 .policy = mptcp_pm_policy, 2465 .netnsok = true, 2466 .module = THIS_MODULE, 2467 .small_ops = mptcp_pm_ops, 2468 .n_small_ops = ARRAY_SIZE(mptcp_pm_ops), 2469 .resv_start_op = MPTCP_PM_CMD_SUBFLOW_DESTROY + 1, 2470 .mcgrps = mptcp_pm_mcgrps, 2471 .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), 2472 }; 2473 2474 static int __net_init pm_nl_init_net(struct net *net) 2475 { 2476 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 2477 2478 INIT_LIST_HEAD_RCU(&pernet->local_addr_list); 2479 2480 /* Cit. 2 subflows ought to be enough for anybody. */ 2481 pernet->subflows_max = 2; 2482 pernet->next_id = 1; 2483 pernet->stale_loss_cnt = 4; 2484 spin_lock_init(&pernet->lock); 2485 2486 /* No need to initialize other pernet fields, the struct is zeroed at 2487 * allocation time. 2488 */ 2489 2490 return 0; 2491 } 2492 2493 static void __net_exit pm_nl_exit_net(struct list_head *net_list) 2494 { 2495 struct net *net; 2496 2497 list_for_each_entry(net, net_list, exit_list) { 2498 struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 2499 2500 /* net is removed from namespace list, can't race with 2501 * other modifiers, also netns core already waited for a 2502 * RCU grace period. 2503 */ 2504 __flush_addrs(&pernet->local_addr_list); 2505 } 2506 } 2507 2508 static struct pernet_operations mptcp_pm_pernet_ops = { 2509 .init = pm_nl_init_net, 2510 .exit_batch = pm_nl_exit_net, 2511 .id = &pm_nl_pernet_id, 2512 .size = sizeof(struct pm_nl_pernet), 2513 }; 2514 2515 void __init mptcp_pm_nl_init(void) 2516 { 2517 if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 2518 panic("Failed to register MPTCP PM pernet subsystem.\n"); 2519 2520 if (genl_register_family(&mptcp_genl_family)) 2521 panic("Failed to register MPTCP PM netlink family\n"); 2522 } 2523