1 // SPDX-License-Identifier: GPL-2.0 2 /* Generic nexthop implementation 3 * 4 * Copyright (c) 2017-19 Cumulus Networks 5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com> 6 */ 7 8 #include <linux/nexthop.h> 9 #include <linux/rtnetlink.h> 10 #include <linux/slab.h> 11 #include <net/arp.h> 12 #include <net/ipv6_stubs.h> 13 #include <net/lwtunnel.h> 14 #include <net/ndisc.h> 15 #include <net/nexthop.h> 16 #include <net/route.h> 17 #include <net/sock.h> 18 19 static void remove_nexthop(struct net *net, struct nexthop *nh, 20 struct nl_info *nlinfo); 21 22 #define NH_DEV_HASHBITS 8 23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS) 24 25 static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = { 26 [NHA_UNSPEC] = { .strict_start_type = NHA_UNSPEC + 1 }, 27 [NHA_ID] = { .type = NLA_U32 }, 28 [NHA_GROUP] = { .type = NLA_BINARY }, 29 [NHA_GROUP_TYPE] = { .type = NLA_U16 }, 30 [NHA_BLACKHOLE] = { .type = NLA_FLAG }, 31 [NHA_OIF] = { .type = NLA_U32 }, 32 [NHA_GATEWAY] = { .type = NLA_BINARY }, 33 [NHA_ENCAP_TYPE] = { .type = NLA_U16 }, 34 [NHA_ENCAP] = { .type = NLA_NESTED }, 35 [NHA_GROUPS] = { .type = NLA_FLAG }, 36 [NHA_MASTER] = { .type = NLA_U32 }, 37 }; 38 39 static unsigned int nh_dev_hashfn(unsigned int val) 40 { 41 unsigned int mask = NH_DEV_HASHSIZE - 1; 42 43 return (val ^ 44 (val >> NH_DEV_HASHBITS) ^ 45 (val >> (NH_DEV_HASHBITS * 2))) & mask; 46 } 47 48 static void nexthop_devhash_add(struct net *net, struct nh_info *nhi) 49 { 50 struct net_device *dev = nhi->fib_nhc.nhc_dev; 51 struct hlist_head *head; 52 unsigned int hash; 53 54 WARN_ON(!dev); 55 56 hash = nh_dev_hashfn(dev->ifindex); 57 head = &net->nexthop.devhash[hash]; 58 hlist_add_head(&nhi->dev_hash, head); 59 } 60 61 static void nexthop_free_mpath(struct nexthop *nh) 62 { 63 struct nh_group *nhg; 64 int i; 65 66 nhg = rcu_dereference_raw(nh->nh_grp); 67 for (i = 0; i < nhg->num_nh; ++i) 68 WARN_ON(nhg->nh_entries[i].nh); 69 70 kfree(nhg); 71 } 72 73 static void nexthop_free_single(struct nexthop *nh) 74 { 75 struct nh_info *nhi; 76 77 nhi = rcu_dereference_raw(nh->nh_info); 78 switch (nhi->family) { 79 case AF_INET: 80 fib_nh_release(nh->net, &nhi->fib_nh); 81 break; 82 case AF_INET6: 83 ipv6_stub->fib6_nh_release(&nhi->fib6_nh); 84 break; 85 } 86 kfree(nhi); 87 } 88 89 void nexthop_free_rcu(struct rcu_head *head) 90 { 91 struct nexthop *nh = container_of(head, struct nexthop, rcu); 92 93 if (nh->is_group) 94 nexthop_free_mpath(nh); 95 else 96 nexthop_free_single(nh); 97 98 kfree(nh); 99 } 100 EXPORT_SYMBOL_GPL(nexthop_free_rcu); 101 102 static struct nexthop *nexthop_alloc(void) 103 { 104 struct nexthop *nh; 105 106 nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL); 107 if (nh) { 108 INIT_LIST_HEAD(&nh->fi_list); 109 INIT_LIST_HEAD(&nh->f6i_list); 110 INIT_LIST_HEAD(&nh->grp_list); 111 } 112 return nh; 113 } 114 115 static struct nh_group *nexthop_grp_alloc(u16 num_nh) 116 { 117 size_t sz = offsetof(struct nexthop, nh_grp) 118 + sizeof(struct nh_group) 119 + sizeof(struct nh_grp_entry) * num_nh; 120 struct nh_group *nhg; 121 122 nhg = kzalloc(sz, GFP_KERNEL); 123 if (nhg) 124 nhg->num_nh = num_nh; 125 126 return nhg; 127 } 128 129 static void nh_base_seq_inc(struct net *net) 130 { 131 while (++net->nexthop.seq == 0) 132 ; 133 } 134 135 /* no reference taken; rcu lock or rtnl must be held */ 136 struct nexthop *nexthop_find_by_id(struct net *net, u32 id) 137 { 138 struct rb_node **pp, *parent = NULL, *next; 139 140 pp = &net->nexthop.rb_root.rb_node; 141 while (1) { 142 struct nexthop *nh; 143 144 next = rcu_dereference_raw(*pp); 145 if (!next) 146 break; 147 parent = next; 148 149 nh = rb_entry(parent, struct nexthop, rb_node); 150 if (id < nh->id) 151 pp = &next->rb_left; 152 else if (id > nh->id) 153 pp = &next->rb_right; 154 else 155 return nh; 156 } 157 return NULL; 158 } 159 EXPORT_SYMBOL_GPL(nexthop_find_by_id); 160 161 /* used for auto id allocation; called with rtnl held */ 162 static u32 nh_find_unused_id(struct net *net) 163 { 164 u32 id_start = net->nexthop.last_id_allocated; 165 166 while (1) { 167 net->nexthop.last_id_allocated++; 168 if (net->nexthop.last_id_allocated == id_start) 169 break; 170 171 if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated)) 172 return net->nexthop.last_id_allocated; 173 } 174 return 0; 175 } 176 177 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg) 178 { 179 struct nexthop_grp *p; 180 size_t len = nhg->num_nh * sizeof(*p); 181 struct nlattr *nla; 182 u16 group_type = 0; 183 int i; 184 185 if (nhg->mpath) 186 group_type = NEXTHOP_GRP_TYPE_MPATH; 187 188 if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type)) 189 goto nla_put_failure; 190 191 nla = nla_reserve(skb, NHA_GROUP, len); 192 if (!nla) 193 goto nla_put_failure; 194 195 p = nla_data(nla); 196 for (i = 0; i < nhg->num_nh; ++i) { 197 p->id = nhg->nh_entries[i].nh->id; 198 p->weight = nhg->nh_entries[i].weight - 1; 199 p += 1; 200 } 201 202 return 0; 203 204 nla_put_failure: 205 return -EMSGSIZE; 206 } 207 208 static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh, 209 int event, u32 portid, u32 seq, unsigned int nlflags) 210 { 211 struct fib6_nh *fib6_nh; 212 struct fib_nh *fib_nh; 213 struct nlmsghdr *nlh; 214 struct nh_info *nhi; 215 struct nhmsg *nhm; 216 217 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags); 218 if (!nlh) 219 return -EMSGSIZE; 220 221 nhm = nlmsg_data(nlh); 222 nhm->nh_family = AF_UNSPEC; 223 nhm->nh_flags = nh->nh_flags; 224 nhm->nh_protocol = nh->protocol; 225 nhm->nh_scope = 0; 226 nhm->resvd = 0; 227 228 if (nla_put_u32(skb, NHA_ID, nh->id)) 229 goto nla_put_failure; 230 231 if (nh->is_group) { 232 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 233 234 if (nla_put_nh_group(skb, nhg)) 235 goto nla_put_failure; 236 goto out; 237 } 238 239 nhi = rtnl_dereference(nh->nh_info); 240 nhm->nh_family = nhi->family; 241 if (nhi->reject_nh) { 242 if (nla_put_flag(skb, NHA_BLACKHOLE)) 243 goto nla_put_failure; 244 goto out; 245 } else { 246 const struct net_device *dev; 247 248 dev = nhi->fib_nhc.nhc_dev; 249 if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex)) 250 goto nla_put_failure; 251 } 252 253 nhm->nh_scope = nhi->fib_nhc.nhc_scope; 254 switch (nhi->family) { 255 case AF_INET: 256 fib_nh = &nhi->fib_nh; 257 if (fib_nh->fib_nh_gw_family && 258 nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4)) 259 goto nla_put_failure; 260 break; 261 262 case AF_INET6: 263 fib6_nh = &nhi->fib6_nh; 264 if (fib6_nh->fib_nh_gw_family && 265 nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6)) 266 goto nla_put_failure; 267 break; 268 } 269 270 if (nhi->fib_nhc.nhc_lwtstate && 271 lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate, 272 NHA_ENCAP, NHA_ENCAP_TYPE) < 0) 273 goto nla_put_failure; 274 275 out: 276 nlmsg_end(skb, nlh); 277 return 0; 278 279 nla_put_failure: 280 return -EMSGSIZE; 281 } 282 283 static size_t nh_nlmsg_size_grp(struct nexthop *nh) 284 { 285 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 286 size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh; 287 288 return nla_total_size(sz) + 289 nla_total_size(2); /* NHA_GROUP_TYPE */ 290 } 291 292 static size_t nh_nlmsg_size_single(struct nexthop *nh) 293 { 294 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 295 size_t sz; 296 297 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE 298 * are mutually exclusive 299 */ 300 sz = nla_total_size(4); /* NHA_OIF */ 301 302 switch (nhi->family) { 303 case AF_INET: 304 if (nhi->fib_nh.fib_nh_gw_family) 305 sz += nla_total_size(4); /* NHA_GATEWAY */ 306 break; 307 308 case AF_INET6: 309 /* NHA_GATEWAY */ 310 if (nhi->fib6_nh.fib_nh_gw_family) 311 sz += nla_total_size(sizeof(const struct in6_addr)); 312 break; 313 } 314 315 if (nhi->fib_nhc.nhc_lwtstate) { 316 sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate); 317 sz += nla_total_size(2); /* NHA_ENCAP_TYPE */ 318 } 319 320 return sz; 321 } 322 323 static size_t nh_nlmsg_size(struct nexthop *nh) 324 { 325 size_t sz = nla_total_size(4); /* NHA_ID */ 326 327 if (nh->is_group) 328 sz += nh_nlmsg_size_grp(nh); 329 else 330 sz += nh_nlmsg_size_single(nh); 331 332 return sz; 333 } 334 335 static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info) 336 { 337 unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0; 338 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 339 struct sk_buff *skb; 340 int err = -ENOBUFS; 341 342 skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any()); 343 if (!skb) 344 goto errout; 345 346 err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags); 347 if (err < 0) { 348 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */ 349 WARN_ON(err == -EMSGSIZE); 350 kfree_skb(skb); 351 goto errout; 352 } 353 354 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP, 355 info->nlh, gfp_any()); 356 return; 357 errout: 358 if (err < 0) 359 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err); 360 } 361 362 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths, 363 struct netlink_ext_ack *extack) 364 { 365 if (nh->is_group) { 366 struct nh_group *nhg = rtnl_dereference(nh->nh_grp); 367 368 /* nested multipath (group within a group) is not 369 * supported 370 */ 371 if (nhg->mpath) { 372 NL_SET_ERR_MSG(extack, 373 "Multipath group can not be a nexthop within a group"); 374 return false; 375 } 376 } else { 377 struct nh_info *nhi = rtnl_dereference(nh->nh_info); 378 379 if (nhi->reject_nh && npaths > 1) { 380 NL_SET_ERR_MSG(extack, 381 "Blackhole nexthop can not be used in a group with more than 1 path"); 382 return false; 383 } 384 } 385 386 return true; 387 } 388 389 static int nh_check_attr_group(struct net *net, struct nlattr *tb[], 390 struct netlink_ext_ack *extack) 391 { 392 unsigned int len = nla_len(tb[NHA_GROUP]); 393 struct nexthop_grp *nhg; 394 unsigned int i, j; 395 396 if (len & (sizeof(struct nexthop_grp) - 1)) { 397 NL_SET_ERR_MSG(extack, 398 "Invalid length for nexthop group attribute"); 399 return -EINVAL; 400 } 401 402 /* convert len to number of nexthop ids */ 403 len /= sizeof(*nhg); 404 405 nhg = nla_data(tb[NHA_GROUP]); 406 for (i = 0; i < len; ++i) { 407 if (nhg[i].resvd1 || nhg[i].resvd2) { 408 NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0"); 409 return -EINVAL; 410 } 411 if (nhg[i].weight > 254) { 412 NL_SET_ERR_MSG(extack, "Invalid value for weight"); 413 return -EINVAL; 414 } 415 for (j = i + 1; j < len; ++j) { 416 if (nhg[i].id == nhg[j].id) { 417 NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group"); 418 return -EINVAL; 419 } 420 } 421 } 422 423 nhg = nla_data(tb[NHA_GROUP]); 424 for (i = 0; i < len; ++i) { 425 struct nexthop *nh; 426 427 nh = nexthop_find_by_id(net, nhg[i].id); 428 if (!nh) { 429 NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 430 return -EINVAL; 431 } 432 if (!valid_group_nh(nh, len, extack)) 433 return -EINVAL; 434 } 435 for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { 436 if (!tb[i]) 437 continue; 438 439 NL_SET_ERR_MSG(extack, 440 "No other attributes can be set in nexthop groups"); 441 return -EINVAL; 442 } 443 444 return 0; 445 } 446 447 static bool ipv6_good_nh(const struct fib6_nh *nh) 448 { 449 int state = NUD_REACHABLE; 450 struct neighbour *n; 451 452 rcu_read_lock_bh(); 453 454 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); 455 if (n) 456 state = n->nud_state; 457 458 rcu_read_unlock_bh(); 459 460 return !!(state & NUD_VALID); 461 } 462 463 static bool ipv4_good_nh(const struct fib_nh *nh) 464 { 465 int state = NUD_REACHABLE; 466 struct neighbour *n; 467 468 rcu_read_lock_bh(); 469 470 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 471 (__force u32)nh->fib_nh_gw4); 472 if (n) 473 state = n->nud_state; 474 475 rcu_read_unlock_bh(); 476 477 return !!(state & NUD_VALID); 478 } 479 480 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash) 481 { 482 struct nexthop *rc = NULL; 483 struct nh_group *nhg; 484 int i; 485 486 if (!nh->is_group) 487 return nh; 488 489 nhg = rcu_dereference(nh->nh_grp); 490 for (i = 0; i < nhg->num_nh; ++i) { 491 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 492 struct nh_info *nhi; 493 494 if (hash > atomic_read(&nhge->upper_bound)) 495 continue; 496 497 /* nexthops always check if it is good and does 498 * not rely on a sysctl for this behavior 499 */ 500 nhi = rcu_dereference(nhge->nh->nh_info); 501 switch (nhi->family) { 502 case AF_INET: 503 if (ipv4_good_nh(&nhi->fib_nh)) 504 return nhge->nh; 505 break; 506 case AF_INET6: 507 if (ipv6_good_nh(&nhi->fib6_nh)) 508 return nhge->nh; 509 break; 510 } 511 512 if (!rc) 513 rc = nhge->nh; 514 } 515 516 return rc; 517 } 518 EXPORT_SYMBOL_GPL(nexthop_select_path); 519 520 int nexthop_for_each_fib6_nh(struct nexthop *nh, 521 int (*cb)(struct fib6_nh *nh, void *arg), 522 void *arg) 523 { 524 struct nh_info *nhi; 525 int err; 526 527 if (nh->is_group) { 528 struct nh_group *nhg; 529 int i; 530 531 nhg = rcu_dereference_rtnl(nh->nh_grp); 532 for (i = 0; i < nhg->num_nh; i++) { 533 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 534 535 nhi = rcu_dereference_rtnl(nhge->nh->nh_info); 536 err = cb(&nhi->fib6_nh, arg); 537 if (err) 538 return err; 539 } 540 } else { 541 nhi = rcu_dereference_rtnl(nh->nh_info); 542 err = cb(&nhi->fib6_nh, arg); 543 if (err) 544 return err; 545 } 546 547 return 0; 548 } 549 EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh); 550 551 static int check_src_addr(const struct in6_addr *saddr, 552 struct netlink_ext_ack *extack) 553 { 554 if (!ipv6_addr_any(saddr)) { 555 NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects"); 556 return -EINVAL; 557 } 558 return 0; 559 } 560 561 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, 562 struct netlink_ext_ack *extack) 563 { 564 struct nh_info *nhi; 565 566 /* fib6_src is unique to a fib6_info and limits the ability to cache 567 * routes in fib6_nh within a nexthop that is potentially shared 568 * across multiple fib entries. If the config wants to use source 569 * routing it can not use nexthop objects. mlxsw also does not allow 570 * fib6_src on routes. 571 */ 572 if (cfg && check_src_addr(&cfg->fc_src, extack) < 0) 573 return -EINVAL; 574 575 if (nh->is_group) { 576 struct nh_group *nhg; 577 578 nhg = rtnl_dereference(nh->nh_grp); 579 if (nhg->has_v4) 580 goto no_v4_nh; 581 } else { 582 nhi = rtnl_dereference(nh->nh_info); 583 if (nhi->family == AF_INET) 584 goto no_v4_nh; 585 } 586 587 return 0; 588 no_v4_nh: 589 NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop"); 590 return -EINVAL; 591 } 592 EXPORT_SYMBOL_GPL(fib6_check_nexthop); 593 594 /* if existing nexthop has ipv6 routes linked to it, need 595 * to verify this new spec works with ipv6 596 */ 597 static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new, 598 struct netlink_ext_ack *extack) 599 { 600 struct fib6_info *f6i; 601 602 if (list_empty(&old->f6i_list)) 603 return 0; 604 605 list_for_each_entry(f6i, &old->f6i_list, nh_list) { 606 if (check_src_addr(&f6i->fib6_src.addr, extack) < 0) 607 return -EINVAL; 608 } 609 610 return fib6_check_nexthop(new, NULL, extack); 611 } 612 613 static int nexthop_check_scope(struct nexthop *nh, u8 scope, 614 struct netlink_ext_ack *extack) 615 { 616 struct nh_info *nhi; 617 618 nhi = rtnl_dereference(nh->nh_info); 619 if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) { 620 NL_SET_ERR_MSG(extack, 621 "Route with host scope can not have a gateway"); 622 return -EINVAL; 623 } 624 625 if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) { 626 NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop"); 627 return -EINVAL; 628 } 629 630 return 0; 631 } 632 633 /* Invoked by fib add code to verify nexthop by id is ok with 634 * config for prefix; parts of fib_check_nh not done when nexthop 635 * object is used. 636 */ 637 int fib_check_nexthop(struct nexthop *nh, u8 scope, 638 struct netlink_ext_ack *extack) 639 { 640 int err = 0; 641 642 if (nh->is_group) { 643 struct nh_group *nhg; 644 645 if (scope == RT_SCOPE_HOST) { 646 NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); 647 err = -EINVAL; 648 goto out; 649 } 650 651 nhg = rtnl_dereference(nh->nh_grp); 652 /* all nexthops in a group have the same scope */ 653 err = nexthop_check_scope(nhg->nh_entries[0].nh, scope, extack); 654 } else { 655 err = nexthop_check_scope(nh, scope, extack); 656 } 657 out: 658 return err; 659 } 660 661 static int fib_check_nh_list(struct nexthop *old, struct nexthop *new, 662 struct netlink_ext_ack *extack) 663 { 664 struct fib_info *fi; 665 666 list_for_each_entry(fi, &old->fi_list, nh_list) { 667 int err; 668 669 err = fib_check_nexthop(new, fi->fib_scope, extack); 670 if (err) 671 return err; 672 } 673 return 0; 674 } 675 676 static void nh_group_rebalance(struct nh_group *nhg) 677 { 678 int total = 0; 679 int w = 0; 680 int i; 681 682 for (i = 0; i < nhg->num_nh; ++i) 683 total += nhg->nh_entries[i].weight; 684 685 for (i = 0; i < nhg->num_nh; ++i) { 686 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 687 int upper_bound; 688 689 w += nhge->weight; 690 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; 691 atomic_set(&nhge->upper_bound, upper_bound); 692 } 693 } 694 695 static void remove_nh_grp_entry(struct nh_grp_entry *nhge, 696 struct nh_group *nhg, 697 struct nl_info *nlinfo) 698 { 699 struct nexthop *nh = nhge->nh; 700 struct nh_grp_entry *nhges; 701 bool found = false; 702 int i; 703 704 WARN_ON(!nh); 705 706 nhges = nhg->nh_entries; 707 for (i = 0; i < nhg->num_nh; ++i) { 708 if (found) { 709 nhges[i-1].nh = nhges[i].nh; 710 nhges[i-1].weight = nhges[i].weight; 711 list_del(&nhges[i].nh_list); 712 list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list); 713 } else if (nhg->nh_entries[i].nh == nh) { 714 found = true; 715 } 716 } 717 718 if (WARN_ON(!found)) 719 return; 720 721 nhg->num_nh--; 722 nhg->nh_entries[nhg->num_nh].nh = NULL; 723 724 nh_group_rebalance(nhg); 725 726 nexthop_put(nh); 727 728 if (nlinfo) 729 nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo); 730 } 731 732 static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh, 733 struct nl_info *nlinfo) 734 { 735 struct nh_grp_entry *nhge, *tmp; 736 737 list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) { 738 struct nh_group *nhg; 739 740 list_del(&nhge->nh_list); 741 nhg = rtnl_dereference(nhge->nh_parent->nh_grp); 742 remove_nh_grp_entry(nhge, nhg, nlinfo); 743 744 /* if this group has no more entries then remove it */ 745 if (!nhg->num_nh) 746 remove_nexthop(net, nhge->nh_parent, nlinfo); 747 } 748 } 749 750 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo) 751 { 752 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp); 753 int i, num_nh = nhg->num_nh; 754 755 for (i = 0; i < num_nh; ++i) { 756 struct nh_grp_entry *nhge = &nhg->nh_entries[i]; 757 758 if (WARN_ON(!nhge->nh)) 759 continue; 760 761 list_del(&nhge->nh_list); 762 nexthop_put(nhge->nh); 763 nhge->nh = NULL; 764 nhg->num_nh--; 765 } 766 } 767 768 /* not called for nexthop replace */ 769 static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) 770 { 771 struct fib6_info *f6i, *tmp; 772 bool do_flush = false; 773 struct fib_info *fi; 774 775 list_for_each_entry(fi, &nh->fi_list, nh_list) { 776 fi->fib_flags |= RTNH_F_DEAD; 777 do_flush = true; 778 } 779 if (do_flush) 780 fib_flush(net); 781 782 /* ip6_del_rt removes the entry from this list hence the _safe */ 783 list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) { 784 /* __ip6_del_rt does a release, so do a hold here */ 785 fib6_info_hold(f6i); 786 ipv6_stub->ip6_del_rt(net, f6i); 787 } 788 } 789 790 static void __remove_nexthop(struct net *net, struct nexthop *nh, 791 struct nl_info *nlinfo) 792 { 793 __remove_nexthop_fib(net, nh); 794 795 if (nh->is_group) { 796 remove_nexthop_group(nh, nlinfo); 797 } else { 798 struct nh_info *nhi; 799 800 nhi = rtnl_dereference(nh->nh_info); 801 if (nhi->fib_nhc.nhc_dev) 802 hlist_del(&nhi->dev_hash); 803 804 remove_nexthop_from_groups(net, nh, nlinfo); 805 } 806 } 807 808 static void remove_nexthop(struct net *net, struct nexthop *nh, 809 struct nl_info *nlinfo) 810 { 811 /* remove from the tree */ 812 rb_erase(&nh->rb_node, &net->nexthop.rb_root); 813 814 if (nlinfo) 815 nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo); 816 817 __remove_nexthop(net, nh, nlinfo); 818 nh_base_seq_inc(net); 819 820 nexthop_put(nh); 821 } 822 823 /* if any FIB entries reference this nexthop, any dst entries 824 * need to be regenerated 825 */ 826 static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) 827 { 828 struct fib6_info *f6i; 829 830 if (!list_empty(&nh->fi_list)) 831 rt_cache_flush(net); 832 833 list_for_each_entry(f6i, &nh->f6i_list, nh_list) 834 ipv6_stub->fib6_update_sernum(net, f6i); 835 } 836 837 static int replace_nexthop_grp(struct net *net, struct nexthop *old, 838 struct nexthop *new, 839 struct netlink_ext_ack *extack) 840 { 841 struct nh_group *oldg, *newg; 842 int i; 843 844 if (!new->is_group) { 845 NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop."); 846 return -EINVAL; 847 } 848 849 oldg = rtnl_dereference(old->nh_grp); 850 newg = rtnl_dereference(new->nh_grp); 851 852 /* update parents - used by nexthop code for cleanup */ 853 for (i = 0; i < newg->num_nh; i++) 854 newg->nh_entries[i].nh_parent = old; 855 856 rcu_assign_pointer(old->nh_grp, newg); 857 858 for (i = 0; i < oldg->num_nh; i++) 859 oldg->nh_entries[i].nh_parent = new; 860 861 rcu_assign_pointer(new->nh_grp, oldg); 862 863 return 0; 864 } 865 866 static int replace_nexthop_single(struct net *net, struct nexthop *old, 867 struct nexthop *new, 868 struct netlink_ext_ack *extack) 869 { 870 struct nh_info *oldi, *newi; 871 872 if (new->is_group) { 873 NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group."); 874 return -EINVAL; 875 } 876 877 oldi = rtnl_dereference(old->nh_info); 878 newi = rtnl_dereference(new->nh_info); 879 880 newi->nh_parent = old; 881 oldi->nh_parent = new; 882 883 old->protocol = new->protocol; 884 old->nh_flags = new->nh_flags; 885 886 rcu_assign_pointer(old->nh_info, newi); 887 rcu_assign_pointer(new->nh_info, oldi); 888 889 return 0; 890 } 891 892 static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, 893 struct nl_info *info) 894 { 895 struct fib6_info *f6i; 896 897 if (!list_empty(&nh->fi_list)) { 898 struct fib_info *fi; 899 900 /* expectation is a few fib_info per nexthop and then 901 * a lot of routes per fib_info. So mark the fib_info 902 * and then walk the fib tables once 903 */ 904 list_for_each_entry(fi, &nh->fi_list, nh_list) 905 fi->nh_updated = true; 906 907 fib_info_notify_update(net, info); 908 909 list_for_each_entry(fi, &nh->fi_list, nh_list) 910 fi->nh_updated = false; 911 } 912 913 list_for_each_entry(f6i, &nh->f6i_list, nh_list) 914 ipv6_stub->fib6_rt_update(net, f6i, info); 915 } 916 917 /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries 918 * linked to this nexthop and for all groups that the nexthop 919 * is a member of 920 */ 921 static void nexthop_replace_notify(struct net *net, struct nexthop *nh, 922 struct nl_info *info) 923 { 924 struct nh_grp_entry *nhge; 925 926 __nexthop_replace_notify(net, nh, info); 927 928 list_for_each_entry(nhge, &nh->grp_list, nh_list) 929 __nexthop_replace_notify(net, nhge->nh_parent, info); 930 } 931 932 static int replace_nexthop(struct net *net, struct nexthop *old, 933 struct nexthop *new, struct netlink_ext_ack *extack) 934 { 935 bool new_is_reject = false; 936 struct nh_grp_entry *nhge; 937 int err; 938 939 /* check that existing FIB entries are ok with the 940 * new nexthop definition 941 */ 942 err = fib_check_nh_list(old, new, extack); 943 if (err) 944 return err; 945 946 err = fib6_check_nh_list(old, new, extack); 947 if (err) 948 return err; 949 950 if (!new->is_group) { 951 struct nh_info *nhi = rtnl_dereference(new->nh_info); 952 953 new_is_reject = nhi->reject_nh; 954 } 955 956 list_for_each_entry(nhge, &old->grp_list, nh_list) { 957 /* if new nexthop is a blackhole, any groups using this 958 * nexthop cannot have more than 1 path 959 */ 960 if (new_is_reject && 961 nexthop_num_path(nhge->nh_parent) > 1) { 962 NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path"); 963 return -EINVAL; 964 } 965 966 err = fib_check_nh_list(nhge->nh_parent, new, extack); 967 if (err) 968 return err; 969 970 err = fib6_check_nh_list(nhge->nh_parent, new, extack); 971 if (err) 972 return err; 973 } 974 975 if (old->is_group) 976 err = replace_nexthop_grp(net, old, new, extack); 977 else 978 err = replace_nexthop_single(net, old, new, extack); 979 980 if (!err) { 981 nh_rt_cache_flush(net, old); 982 983 __remove_nexthop(net, new, NULL); 984 nexthop_put(new); 985 } 986 987 return err; 988 } 989 990 /* called with rtnl_lock held */ 991 static int insert_nexthop(struct net *net, struct nexthop *new_nh, 992 struct nh_config *cfg, struct netlink_ext_ack *extack) 993 { 994 struct rb_node **pp, *parent = NULL, *next; 995 struct rb_root *root = &net->nexthop.rb_root; 996 bool replace = !!(cfg->nlflags & NLM_F_REPLACE); 997 bool create = !!(cfg->nlflags & NLM_F_CREATE); 998 u32 new_id = new_nh->id; 999 int replace_notify = 0; 1000 int rc = -EEXIST; 1001 1002 pp = &root->rb_node; 1003 while (1) { 1004 struct nexthop *nh; 1005 1006 next = rtnl_dereference(*pp); 1007 if (!next) 1008 break; 1009 1010 parent = next; 1011 1012 nh = rb_entry(parent, struct nexthop, rb_node); 1013 if (new_id < nh->id) { 1014 pp = &next->rb_left; 1015 } else if (new_id > nh->id) { 1016 pp = &next->rb_right; 1017 } else if (replace) { 1018 rc = replace_nexthop(net, nh, new_nh, extack); 1019 if (!rc) { 1020 new_nh = nh; /* send notification with old nh */ 1021 replace_notify = 1; 1022 } 1023 goto out; 1024 } else { 1025 /* id already exists and not a replace */ 1026 goto out; 1027 } 1028 } 1029 1030 if (replace && !create) { 1031 NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists"); 1032 rc = -ENOENT; 1033 goto out; 1034 } 1035 1036 rb_link_node_rcu(&new_nh->rb_node, parent, pp); 1037 rb_insert_color(&new_nh->rb_node, root); 1038 rc = 0; 1039 out: 1040 if (!rc) { 1041 nh_base_seq_inc(net); 1042 nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); 1043 if (replace_notify) 1044 nexthop_replace_notify(net, new_nh, &cfg->nlinfo); 1045 } 1046 1047 return rc; 1048 } 1049 1050 /* rtnl */ 1051 /* remove all nexthops tied to a device being deleted */ 1052 static void nexthop_flush_dev(struct net_device *dev) 1053 { 1054 unsigned int hash = nh_dev_hashfn(dev->ifindex); 1055 struct net *net = dev_net(dev); 1056 struct hlist_head *head = &net->nexthop.devhash[hash]; 1057 struct hlist_node *n; 1058 struct nh_info *nhi; 1059 1060 hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 1061 if (nhi->fib_nhc.nhc_dev != dev) 1062 continue; 1063 1064 remove_nexthop(net, nhi->nh_parent, NULL); 1065 } 1066 } 1067 1068 /* rtnl; called when net namespace is deleted */ 1069 static void flush_all_nexthops(struct net *net) 1070 { 1071 struct rb_root *root = &net->nexthop.rb_root; 1072 struct rb_node *node; 1073 struct nexthop *nh; 1074 1075 while ((node = rb_first(root))) { 1076 nh = rb_entry(node, struct nexthop, rb_node); 1077 remove_nexthop(net, nh, NULL); 1078 cond_resched(); 1079 } 1080 } 1081 1082 static struct nexthop *nexthop_create_group(struct net *net, 1083 struct nh_config *cfg) 1084 { 1085 struct nlattr *grps_attr = cfg->nh_grp; 1086 struct nexthop_grp *entry = nla_data(grps_attr); 1087 struct nh_group *nhg; 1088 struct nexthop *nh; 1089 int i; 1090 1091 nh = nexthop_alloc(); 1092 if (!nh) 1093 return ERR_PTR(-ENOMEM); 1094 1095 nh->is_group = 1; 1096 1097 nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry)); 1098 if (!nhg) { 1099 kfree(nh); 1100 return ERR_PTR(-ENOMEM); 1101 } 1102 1103 for (i = 0; i < nhg->num_nh; ++i) { 1104 struct nexthop *nhe; 1105 struct nh_info *nhi; 1106 1107 nhe = nexthop_find_by_id(net, entry[i].id); 1108 if (!nexthop_get(nhe)) 1109 goto out_no_nh; 1110 1111 nhi = rtnl_dereference(nhe->nh_info); 1112 if (nhi->family == AF_INET) 1113 nhg->has_v4 = true; 1114 1115 nhg->nh_entries[i].nh = nhe; 1116 nhg->nh_entries[i].weight = entry[i].weight + 1; 1117 list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list); 1118 nhg->nh_entries[i].nh_parent = nh; 1119 } 1120 1121 if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) { 1122 nhg->mpath = 1; 1123 nh_group_rebalance(nhg); 1124 } 1125 1126 rcu_assign_pointer(nh->nh_grp, nhg); 1127 1128 return nh; 1129 1130 out_no_nh: 1131 for (; i >= 0; --i) 1132 nexthop_put(nhg->nh_entries[i].nh); 1133 1134 kfree(nhg); 1135 kfree(nh); 1136 1137 return ERR_PTR(-ENOENT); 1138 } 1139 1140 static int nh_create_ipv4(struct net *net, struct nexthop *nh, 1141 struct nh_info *nhi, struct nh_config *cfg, 1142 struct netlink_ext_ack *extack) 1143 { 1144 struct fib_nh *fib_nh = &nhi->fib_nh; 1145 struct fib_config fib_cfg = { 1146 .fc_oif = cfg->nh_ifindex, 1147 .fc_gw4 = cfg->gw.ipv4, 1148 .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, 1149 .fc_flags = cfg->nh_flags, 1150 .fc_encap = cfg->nh_encap, 1151 .fc_encap_type = cfg->nh_encap_type, 1152 }; 1153 u32 tb_id = l3mdev_fib_table(cfg->dev); 1154 int err; 1155 1156 err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); 1157 if (err) { 1158 fib_nh_release(net, fib_nh); 1159 goto out; 1160 } 1161 1162 /* sets nh_dev if successful */ 1163 err = fib_check_nh(net, fib_nh, tb_id, 0, extack); 1164 if (!err) { 1165 nh->nh_flags = fib_nh->fib_nh_flags; 1166 fib_info_update_nhc_saddr(net, &fib_nh->nh_common, 1167 fib_nh->fib_nh_scope); 1168 } else { 1169 fib_nh_release(net, fib_nh); 1170 } 1171 out: 1172 return err; 1173 } 1174 1175 static int nh_create_ipv6(struct net *net, struct nexthop *nh, 1176 struct nh_info *nhi, struct nh_config *cfg, 1177 struct netlink_ext_ack *extack) 1178 { 1179 struct fib6_nh *fib6_nh = &nhi->fib6_nh; 1180 struct fib6_config fib6_cfg = { 1181 .fc_table = l3mdev_fib_table(cfg->dev), 1182 .fc_ifindex = cfg->nh_ifindex, 1183 .fc_gateway = cfg->gw.ipv6, 1184 .fc_flags = cfg->nh_flags, 1185 .fc_encap = cfg->nh_encap, 1186 .fc_encap_type = cfg->nh_encap_type, 1187 }; 1188 int err; 1189 1190 if (!ipv6_addr_any(&cfg->gw.ipv6)) 1191 fib6_cfg.fc_flags |= RTF_GATEWAY; 1192 1193 /* sets nh_dev if successful */ 1194 err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, 1195 extack); 1196 if (err) 1197 ipv6_stub->fib6_nh_release(fib6_nh); 1198 else 1199 nh->nh_flags = fib6_nh->fib_nh_flags; 1200 1201 return err; 1202 } 1203 1204 static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg, 1205 struct netlink_ext_ack *extack) 1206 { 1207 struct nh_info *nhi; 1208 struct nexthop *nh; 1209 int err = 0; 1210 1211 nh = nexthop_alloc(); 1212 if (!nh) 1213 return ERR_PTR(-ENOMEM); 1214 1215 nhi = kzalloc(sizeof(*nhi), GFP_KERNEL); 1216 if (!nhi) { 1217 kfree(nh); 1218 return ERR_PTR(-ENOMEM); 1219 } 1220 1221 nh->nh_flags = cfg->nh_flags; 1222 nh->net = net; 1223 1224 nhi->nh_parent = nh; 1225 nhi->family = cfg->nh_family; 1226 nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK; 1227 1228 if (cfg->nh_blackhole) { 1229 nhi->reject_nh = 1; 1230 cfg->nh_ifindex = net->loopback_dev->ifindex; 1231 } 1232 1233 switch (cfg->nh_family) { 1234 case AF_INET: 1235 err = nh_create_ipv4(net, nh, nhi, cfg, extack); 1236 break; 1237 case AF_INET6: 1238 err = nh_create_ipv6(net, nh, nhi, cfg, extack); 1239 break; 1240 } 1241 1242 if (err) { 1243 kfree(nhi); 1244 kfree(nh); 1245 return ERR_PTR(err); 1246 } 1247 1248 /* add the entry to the device based hash */ 1249 nexthop_devhash_add(net, nhi); 1250 1251 rcu_assign_pointer(nh->nh_info, nhi); 1252 1253 return nh; 1254 } 1255 1256 /* called with rtnl lock held */ 1257 static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg, 1258 struct netlink_ext_ack *extack) 1259 { 1260 struct nexthop *nh; 1261 int err; 1262 1263 if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) { 1264 NL_SET_ERR_MSG(extack, "Replace requires nexthop id"); 1265 return ERR_PTR(-EINVAL); 1266 } 1267 1268 if (!cfg->nh_id) { 1269 cfg->nh_id = nh_find_unused_id(net); 1270 if (!cfg->nh_id) { 1271 NL_SET_ERR_MSG(extack, "No unused id"); 1272 return ERR_PTR(-EINVAL); 1273 } 1274 } 1275 1276 if (cfg->nh_grp) 1277 nh = nexthop_create_group(net, cfg); 1278 else 1279 nh = nexthop_create(net, cfg, extack); 1280 1281 if (IS_ERR(nh)) 1282 return nh; 1283 1284 refcount_set(&nh->refcnt, 1); 1285 nh->id = cfg->nh_id; 1286 nh->protocol = cfg->nh_protocol; 1287 nh->net = net; 1288 1289 err = insert_nexthop(net, nh, cfg, extack); 1290 if (err) { 1291 __remove_nexthop(net, nh, NULL); 1292 nexthop_put(nh); 1293 nh = ERR_PTR(err); 1294 } 1295 1296 return nh; 1297 } 1298 1299 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb, 1300 struct nlmsghdr *nlh, struct nh_config *cfg, 1301 struct netlink_ext_ack *extack) 1302 { 1303 struct nhmsg *nhm = nlmsg_data(nlh); 1304 struct nlattr *tb[NHA_MAX + 1]; 1305 int err; 1306 1307 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, 1308 extack); 1309 if (err < 0) 1310 return err; 1311 1312 err = -EINVAL; 1313 if (nhm->resvd || nhm->nh_scope) { 1314 NL_SET_ERR_MSG(extack, "Invalid values in ancillary header"); 1315 goto out; 1316 } 1317 if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) { 1318 NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header"); 1319 goto out; 1320 } 1321 1322 switch (nhm->nh_family) { 1323 case AF_INET: 1324 case AF_INET6: 1325 break; 1326 case AF_UNSPEC: 1327 if (tb[NHA_GROUP]) 1328 break; 1329 /* fallthrough */ 1330 default: 1331 NL_SET_ERR_MSG(extack, "Invalid address family"); 1332 goto out; 1333 } 1334 1335 if (tb[NHA_GROUPS] || tb[NHA_MASTER]) { 1336 NL_SET_ERR_MSG(extack, "Invalid attributes in request"); 1337 goto out; 1338 } 1339 1340 memset(cfg, 0, sizeof(*cfg)); 1341 cfg->nlflags = nlh->nlmsg_flags; 1342 cfg->nlinfo.portid = NETLINK_CB(skb).portid; 1343 cfg->nlinfo.nlh = nlh; 1344 cfg->nlinfo.nl_net = net; 1345 1346 cfg->nh_family = nhm->nh_family; 1347 cfg->nh_protocol = nhm->nh_protocol; 1348 cfg->nh_flags = nhm->nh_flags; 1349 1350 if (tb[NHA_ID]) 1351 cfg->nh_id = nla_get_u32(tb[NHA_ID]); 1352 1353 if (tb[NHA_GROUP]) { 1354 if (nhm->nh_family != AF_UNSPEC) { 1355 NL_SET_ERR_MSG(extack, "Invalid family for group"); 1356 goto out; 1357 } 1358 cfg->nh_grp = tb[NHA_GROUP]; 1359 1360 cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH; 1361 if (tb[NHA_GROUP_TYPE]) 1362 cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]); 1363 1364 if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) { 1365 NL_SET_ERR_MSG(extack, "Invalid group type"); 1366 goto out; 1367 } 1368 err = nh_check_attr_group(net, tb, extack); 1369 1370 /* no other attributes should be set */ 1371 goto out; 1372 } 1373 1374 if (tb[NHA_BLACKHOLE]) { 1375 if (tb[NHA_GATEWAY] || tb[NHA_OIF] || 1376 tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) { 1377 NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif"); 1378 goto out; 1379 } 1380 1381 cfg->nh_blackhole = 1; 1382 err = 0; 1383 goto out; 1384 } 1385 1386 if (!tb[NHA_OIF]) { 1387 NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops"); 1388 goto out; 1389 } 1390 1391 cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]); 1392 if (cfg->nh_ifindex) 1393 cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex); 1394 1395 if (!cfg->dev) { 1396 NL_SET_ERR_MSG(extack, "Invalid device index"); 1397 goto out; 1398 } else if (!(cfg->dev->flags & IFF_UP)) { 1399 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 1400 err = -ENETDOWN; 1401 goto out; 1402 } else if (!netif_carrier_ok(cfg->dev)) { 1403 NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down"); 1404 err = -ENETDOWN; 1405 goto out; 1406 } 1407 1408 err = -EINVAL; 1409 if (tb[NHA_GATEWAY]) { 1410 struct nlattr *gwa = tb[NHA_GATEWAY]; 1411 1412 switch (cfg->nh_family) { 1413 case AF_INET: 1414 if (nla_len(gwa) != sizeof(u32)) { 1415 NL_SET_ERR_MSG(extack, "Invalid gateway"); 1416 goto out; 1417 } 1418 cfg->gw.ipv4 = nla_get_be32(gwa); 1419 break; 1420 case AF_INET6: 1421 if (nla_len(gwa) != sizeof(struct in6_addr)) { 1422 NL_SET_ERR_MSG(extack, "Invalid gateway"); 1423 goto out; 1424 } 1425 cfg->gw.ipv6 = nla_get_in6_addr(gwa); 1426 break; 1427 default: 1428 NL_SET_ERR_MSG(extack, 1429 "Unknown address family for gateway"); 1430 goto out; 1431 } 1432 } else { 1433 /* device only nexthop (no gateway) */ 1434 if (cfg->nh_flags & RTNH_F_ONLINK) { 1435 NL_SET_ERR_MSG(extack, 1436 "ONLINK flag can not be set for nexthop without a gateway"); 1437 goto out; 1438 } 1439 } 1440 1441 if (tb[NHA_ENCAP]) { 1442 cfg->nh_encap = tb[NHA_ENCAP]; 1443 1444 if (!tb[NHA_ENCAP_TYPE]) { 1445 NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing"); 1446 goto out; 1447 } 1448 1449 cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]); 1450 err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack); 1451 if (err < 0) 1452 goto out; 1453 1454 } else if (tb[NHA_ENCAP_TYPE]) { 1455 NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing"); 1456 goto out; 1457 } 1458 1459 1460 err = 0; 1461 out: 1462 return err; 1463 } 1464 1465 /* rtnl */ 1466 static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 1467 struct netlink_ext_ack *extack) 1468 { 1469 struct net *net = sock_net(skb->sk); 1470 struct nh_config cfg; 1471 struct nexthop *nh; 1472 int err; 1473 1474 err = rtm_to_nh_config(net, skb, nlh, &cfg, extack); 1475 if (!err) { 1476 nh = nexthop_add(net, &cfg, extack); 1477 if (IS_ERR(nh)) 1478 err = PTR_ERR(nh); 1479 } 1480 1481 return err; 1482 } 1483 1484 static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id, 1485 struct netlink_ext_ack *extack) 1486 { 1487 struct nhmsg *nhm = nlmsg_data(nlh); 1488 struct nlattr *tb[NHA_MAX + 1]; 1489 int err, i; 1490 1491 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, 1492 extack); 1493 if (err < 0) 1494 return err; 1495 1496 err = -EINVAL; 1497 for (i = 0; i < __NHA_MAX; ++i) { 1498 if (!tb[i]) 1499 continue; 1500 1501 switch (i) { 1502 case NHA_ID: 1503 break; 1504 default: 1505 NL_SET_ERR_MSG_ATTR(extack, tb[i], 1506 "Unexpected attribute in request"); 1507 goto out; 1508 } 1509 } 1510 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 1511 NL_SET_ERR_MSG(extack, "Invalid values in header"); 1512 goto out; 1513 } 1514 1515 if (!tb[NHA_ID]) { 1516 NL_SET_ERR_MSG(extack, "Nexthop id is missing"); 1517 goto out; 1518 } 1519 1520 *id = nla_get_u32(tb[NHA_ID]); 1521 if (!(*id)) 1522 NL_SET_ERR_MSG(extack, "Invalid nexthop id"); 1523 else 1524 err = 0; 1525 out: 1526 return err; 1527 } 1528 1529 /* rtnl */ 1530 static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh, 1531 struct netlink_ext_ack *extack) 1532 { 1533 struct net *net = sock_net(skb->sk); 1534 struct nl_info nlinfo = { 1535 .nlh = nlh, 1536 .nl_net = net, 1537 .portid = NETLINK_CB(skb).portid, 1538 }; 1539 struct nexthop *nh; 1540 int err; 1541 u32 id; 1542 1543 err = nh_valid_get_del_req(nlh, &id, extack); 1544 if (err) 1545 return err; 1546 1547 nh = nexthop_find_by_id(net, id); 1548 if (!nh) 1549 return -ENOENT; 1550 1551 remove_nexthop(net, nh, &nlinfo); 1552 1553 return 0; 1554 } 1555 1556 /* rtnl */ 1557 static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh, 1558 struct netlink_ext_ack *extack) 1559 { 1560 struct net *net = sock_net(in_skb->sk); 1561 struct sk_buff *skb = NULL; 1562 struct nexthop *nh; 1563 int err; 1564 u32 id; 1565 1566 err = nh_valid_get_del_req(nlh, &id, extack); 1567 if (err) 1568 return err; 1569 1570 err = -ENOBUFS; 1571 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1572 if (!skb) 1573 goto out; 1574 1575 err = -ENOENT; 1576 nh = nexthop_find_by_id(net, id); 1577 if (!nh) 1578 goto errout_free; 1579 1580 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid, 1581 nlh->nlmsg_seq, 0); 1582 if (err < 0) { 1583 WARN_ON(err == -EMSGSIZE); 1584 goto errout_free; 1585 } 1586 1587 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 1588 out: 1589 return err; 1590 errout_free: 1591 kfree_skb(skb); 1592 goto out; 1593 } 1594 1595 static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx, 1596 bool group_filter, u8 family) 1597 { 1598 const struct net_device *dev; 1599 const struct nh_info *nhi; 1600 1601 if (group_filter && !nh->is_group) 1602 return true; 1603 1604 if (!dev_idx && !master_idx && !family) 1605 return false; 1606 1607 if (nh->is_group) 1608 return true; 1609 1610 nhi = rtnl_dereference(nh->nh_info); 1611 if (family && nhi->family != family) 1612 return true; 1613 1614 dev = nhi->fib_nhc.nhc_dev; 1615 if (dev_idx && (!dev || dev->ifindex != dev_idx)) 1616 return true; 1617 1618 if (master_idx) { 1619 struct net_device *master; 1620 1621 if (!dev) 1622 return true; 1623 1624 master = netdev_master_upper_dev_get((struct net_device *)dev); 1625 if (!master || master->ifindex != master_idx) 1626 return true; 1627 } 1628 1629 return false; 1630 } 1631 1632 static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx, 1633 int *master_idx, bool *group_filter, 1634 struct netlink_callback *cb) 1635 { 1636 struct netlink_ext_ack *extack = cb->extack; 1637 struct nlattr *tb[NHA_MAX + 1]; 1638 struct nhmsg *nhm; 1639 int err, i; 1640 u32 idx; 1641 1642 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy, 1643 NULL); 1644 if (err < 0) 1645 return err; 1646 1647 for (i = 0; i <= NHA_MAX; ++i) { 1648 if (!tb[i]) 1649 continue; 1650 1651 switch (i) { 1652 case NHA_OIF: 1653 idx = nla_get_u32(tb[i]); 1654 if (idx > INT_MAX) { 1655 NL_SET_ERR_MSG(extack, "Invalid device index"); 1656 return -EINVAL; 1657 } 1658 *dev_idx = idx; 1659 break; 1660 case NHA_MASTER: 1661 idx = nla_get_u32(tb[i]); 1662 if (idx > INT_MAX) { 1663 NL_SET_ERR_MSG(extack, "Invalid master device index"); 1664 return -EINVAL; 1665 } 1666 *master_idx = idx; 1667 break; 1668 case NHA_GROUPS: 1669 *group_filter = true; 1670 break; 1671 default: 1672 NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); 1673 return -EINVAL; 1674 } 1675 } 1676 1677 nhm = nlmsg_data(nlh); 1678 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) { 1679 NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request"); 1680 return -EINVAL; 1681 } 1682 1683 return 0; 1684 } 1685 1686 /* rtnl */ 1687 static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb) 1688 { 1689 struct nhmsg *nhm = nlmsg_data(cb->nlh); 1690 int dev_filter_idx = 0, master_idx = 0; 1691 struct net *net = sock_net(skb->sk); 1692 struct rb_root *root = &net->nexthop.rb_root; 1693 bool group_filter = false; 1694 struct rb_node *node; 1695 int idx = 0, s_idx; 1696 int err; 1697 1698 err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx, 1699 &group_filter, cb); 1700 if (err < 0) 1701 return err; 1702 1703 s_idx = cb->args[0]; 1704 for (node = rb_first(root); node; node = rb_next(node)) { 1705 struct nexthop *nh; 1706 1707 if (idx < s_idx) 1708 goto cont; 1709 1710 nh = rb_entry(node, struct nexthop, rb_node); 1711 if (nh_dump_filtered(nh, dev_filter_idx, master_idx, 1712 group_filter, nhm->nh_family)) 1713 goto cont; 1714 1715 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, 1716 NETLINK_CB(cb->skb).portid, 1717 cb->nlh->nlmsg_seq, NLM_F_MULTI); 1718 if (err < 0) { 1719 if (likely(skb->len)) 1720 goto out; 1721 1722 goto out_err; 1723 } 1724 cont: 1725 idx++; 1726 } 1727 1728 out: 1729 err = skb->len; 1730 out_err: 1731 cb->args[0] = idx; 1732 cb->seq = net->nexthop.seq; 1733 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1734 1735 return err; 1736 } 1737 1738 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu) 1739 { 1740 unsigned int hash = nh_dev_hashfn(dev->ifindex); 1741 struct net *net = dev_net(dev); 1742 struct hlist_head *head = &net->nexthop.devhash[hash]; 1743 struct hlist_node *n; 1744 struct nh_info *nhi; 1745 1746 hlist_for_each_entry_safe(nhi, n, head, dev_hash) { 1747 if (nhi->fib_nhc.nhc_dev == dev) { 1748 if (nhi->family == AF_INET) 1749 fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu, 1750 orig_mtu); 1751 } 1752 } 1753 } 1754 1755 /* rtnl */ 1756 static int nh_netdev_event(struct notifier_block *this, 1757 unsigned long event, void *ptr) 1758 { 1759 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1760 struct netdev_notifier_info_ext *info_ext; 1761 1762 switch (event) { 1763 case NETDEV_DOWN: 1764 case NETDEV_UNREGISTER: 1765 nexthop_flush_dev(dev); 1766 break; 1767 case NETDEV_CHANGE: 1768 if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) 1769 nexthop_flush_dev(dev); 1770 break; 1771 case NETDEV_CHANGEMTU: 1772 info_ext = ptr; 1773 nexthop_sync_mtu(dev, info_ext->ext.mtu); 1774 rt_cache_flush(dev_net(dev)); 1775 break; 1776 } 1777 return NOTIFY_DONE; 1778 } 1779 1780 static struct notifier_block nh_netdev_notifier = { 1781 .notifier_call = nh_netdev_event, 1782 }; 1783 1784 static void __net_exit nexthop_net_exit(struct net *net) 1785 { 1786 rtnl_lock(); 1787 flush_all_nexthops(net); 1788 rtnl_unlock(); 1789 kfree(net->nexthop.devhash); 1790 } 1791 1792 static int __net_init nexthop_net_init(struct net *net) 1793 { 1794 size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE; 1795 1796 net->nexthop.rb_root = RB_ROOT; 1797 net->nexthop.devhash = kzalloc(sz, GFP_KERNEL); 1798 if (!net->nexthop.devhash) 1799 return -ENOMEM; 1800 1801 return 0; 1802 } 1803 1804 static struct pernet_operations nexthop_net_ops = { 1805 .init = nexthop_net_init, 1806 .exit = nexthop_net_exit, 1807 }; 1808 1809 static int __init nexthop_init(void) 1810 { 1811 register_pernet_subsys(&nexthop_net_ops); 1812 1813 register_netdevice_notifier(&nh_netdev_notifier); 1814 1815 rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 1816 rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0); 1817 rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop, 1818 rtm_dump_nexthop, 0); 1819 1820 rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 1821 rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 1822 1823 rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0); 1824 rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0); 1825 1826 return 0; 1827 } 1828 subsys_initcall(nexthop_init); 1829