1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * INET An implementation of the TCP/IP protocol suite for the LINUX 4 * operating system. INET is implemented using the BSD Socket 5 * interface as the means of communication with the user level. 6 * 7 * IPv4 Forwarding Information Base: semantics. 8 * 9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 10 */ 11 12 #include <linux/uaccess.h> 13 #include <linux/bitops.h> 14 #include <linux/types.h> 15 #include <linux/kernel.h> 16 #include <linux/jiffies.h> 17 #include <linux/mm.h> 18 #include <linux/string.h> 19 #include <linux/socket.h> 20 #include <linux/sockios.h> 21 #include <linux/errno.h> 22 #include <linux/in.h> 23 #include <linux/inet.h> 24 #include <linux/inetdevice.h> 25 #include <linux/netdevice.h> 26 #include <linux/if_arp.h> 27 #include <linux/proc_fs.h> 28 #include <linux/skbuff.h> 29 #include <linux/init.h> 30 #include <linux/slab.h> 31 #include <linux/netlink.h> 32 33 #include <net/arp.h> 34 #include <net/ip.h> 35 #include <net/protocol.h> 36 #include <net/route.h> 37 #include <net/tcp.h> 38 #include <net/sock.h> 39 #include <net/ip_fib.h> 40 #include <net/ip6_fib.h> 41 #include <net/netlink.h> 42 #include <net/rtnh.h> 43 #include <net/lwtunnel.h> 44 #include <net/fib_notifier.h> 45 #include <net/addrconf.h> 46 47 #include "fib_lookup.h" 48 49 static DEFINE_SPINLOCK(fib_info_lock); 50 static struct hlist_head *fib_info_hash; 51 static struct hlist_head *fib_info_laddrhash; 52 static unsigned int fib_info_hash_size; 53 static unsigned int fib_info_cnt; 54 55 #define DEVINDEX_HASHBITS 8 56 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 57 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 58 59 #ifdef CONFIG_IP_ROUTE_MULTIPATH 60 61 #define for_nexthops(fi) { \ 62 int nhsel; const struct fib_nh *nh; \ 63 for (nhsel = 0, nh = (fi)->fib_nh; \ 64 nhsel < (fi)->fib_nhs; \ 65 nh++, nhsel++) 66 67 #define change_nexthops(fi) { \ 68 int nhsel; struct fib_nh *nexthop_nh; \ 69 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 70 nhsel < (fi)->fib_nhs; \ 71 nexthop_nh++, nhsel++) 72 73 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 74 75 /* Hope, that gcc will optimize it to get rid of dummy loop */ 76 77 #define for_nexthops(fi) { \ 78 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 79 for (nhsel = 0; nhsel < 1; nhsel++) 80 81 #define change_nexthops(fi) { \ 82 int nhsel; \ 83 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 84 for (nhsel = 0; nhsel < 1; nhsel++) 85 86 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 87 88 #define endfor_nexthops(fi) } 89 90 91 const struct fib_prop fib_props[RTN_MAX + 1] = { 92 [RTN_UNSPEC] = { 93 .error = 0, 94 .scope = RT_SCOPE_NOWHERE, 95 }, 96 [RTN_UNICAST] = { 97 .error = 0, 98 .scope = RT_SCOPE_UNIVERSE, 99 }, 100 [RTN_LOCAL] = { 101 .error = 0, 102 .scope = RT_SCOPE_HOST, 103 }, 104 [RTN_BROADCAST] = { 105 .error = 0, 106 .scope = RT_SCOPE_LINK, 107 }, 108 [RTN_ANYCAST] = { 109 .error = 0, 110 .scope = RT_SCOPE_LINK, 111 }, 112 [RTN_MULTICAST] = { 113 .error = 0, 114 .scope = RT_SCOPE_UNIVERSE, 115 }, 116 [RTN_BLACKHOLE] = { 117 .error = -EINVAL, 118 .scope = RT_SCOPE_UNIVERSE, 119 }, 120 [RTN_UNREACHABLE] = { 121 .error = -EHOSTUNREACH, 122 .scope = RT_SCOPE_UNIVERSE, 123 }, 124 [RTN_PROHIBIT] = { 125 .error = -EACCES, 126 .scope = RT_SCOPE_UNIVERSE, 127 }, 128 [RTN_THROW] = { 129 .error = -EAGAIN, 130 .scope = RT_SCOPE_UNIVERSE, 131 }, 132 [RTN_NAT] = { 133 .error = -EINVAL, 134 .scope = RT_SCOPE_NOWHERE, 135 }, 136 [RTN_XRESOLVE] = { 137 .error = -EINVAL, 138 .scope = RT_SCOPE_NOWHERE, 139 }, 140 }; 141 142 static void rt_fibinfo_free(struct rtable __rcu **rtp) 143 { 144 struct rtable *rt = rcu_dereference_protected(*rtp, 1); 145 146 if (!rt) 147 return; 148 149 /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); 150 * because we waited an RCU grace period before calling 151 * free_fib_info_rcu() 152 */ 153 154 dst_dev_put(&rt->dst); 155 dst_release_immediate(&rt->dst); 156 } 157 158 static void free_nh_exceptions(struct fib_nh_common *nhc) 159 { 160 struct fnhe_hash_bucket *hash; 161 int i; 162 163 hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); 164 if (!hash) 165 return; 166 for (i = 0; i < FNHE_HASH_SIZE; i++) { 167 struct fib_nh_exception *fnhe; 168 169 fnhe = rcu_dereference_protected(hash[i].chain, 1); 170 while (fnhe) { 171 struct fib_nh_exception *next; 172 173 next = rcu_dereference_protected(fnhe->fnhe_next, 1); 174 175 rt_fibinfo_free(&fnhe->fnhe_rth_input); 176 rt_fibinfo_free(&fnhe->fnhe_rth_output); 177 178 kfree(fnhe); 179 180 fnhe = next; 181 } 182 } 183 kfree(hash); 184 } 185 186 static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) 187 { 188 int cpu; 189 190 if (!rtp) 191 return; 192 193 for_each_possible_cpu(cpu) { 194 struct rtable *rt; 195 196 rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); 197 if (rt) { 198 dst_dev_put(&rt->dst); 199 dst_release_immediate(&rt->dst); 200 } 201 } 202 free_percpu(rtp); 203 } 204 205 void fib_nh_common_release(struct fib_nh_common *nhc) 206 { 207 if (nhc->nhc_dev) 208 dev_put(nhc->nhc_dev); 209 210 lwtstate_put(nhc->nhc_lwtstate); 211 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 212 rt_fibinfo_free(&nhc->nhc_rth_input); 213 free_nh_exceptions(nhc); 214 } 215 EXPORT_SYMBOL_GPL(fib_nh_common_release); 216 217 void fib_nh_release(struct net *net, struct fib_nh *fib_nh) 218 { 219 #ifdef CONFIG_IP_ROUTE_CLASSID 220 if (fib_nh->nh_tclassid) 221 net->ipv4.fib_num_tclassid_users--; 222 #endif 223 fib_nh_common_release(&fib_nh->nh_common); 224 } 225 226 /* Release a nexthop info record */ 227 static void free_fib_info_rcu(struct rcu_head *head) 228 { 229 struct fib_info *fi = container_of(head, struct fib_info, rcu); 230 231 change_nexthops(fi) { 232 fib_nh_release(fi->fib_net, nexthop_nh); 233 } endfor_nexthops(fi); 234 235 ip_fib_metrics_put(fi->fib_metrics); 236 237 kfree(fi); 238 } 239 240 void free_fib_info(struct fib_info *fi) 241 { 242 if (fi->fib_dead == 0) { 243 pr_warn("Freeing alive fib_info %p\n", fi); 244 return; 245 } 246 fib_info_cnt--; 247 248 call_rcu(&fi->rcu, free_fib_info_rcu); 249 } 250 EXPORT_SYMBOL_GPL(free_fib_info); 251 252 void fib_release_info(struct fib_info *fi) 253 { 254 spin_lock_bh(&fib_info_lock); 255 if (fi && --fi->fib_treeref == 0) { 256 hlist_del(&fi->fib_hash); 257 if (fi->fib_prefsrc) 258 hlist_del(&fi->fib_lhash); 259 change_nexthops(fi) { 260 if (!nexthop_nh->fib_nh_dev) 261 continue; 262 hlist_del(&nexthop_nh->nh_hash); 263 } endfor_nexthops(fi) 264 fi->fib_dead = 1; 265 fib_info_put(fi); 266 } 267 spin_unlock_bh(&fib_info_lock); 268 } 269 270 static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 271 { 272 const struct fib_nh *onh = ofi->fib_nh; 273 274 for_nexthops(fi) { 275 if (nh->fib_nh_oif != onh->fib_nh_oif || 276 nh->fib_nh_gw_family != onh->fib_nh_gw_family || 277 nh->fib_nh_scope != onh->fib_nh_scope || 278 #ifdef CONFIG_IP_ROUTE_MULTIPATH 279 nh->fib_nh_weight != onh->fib_nh_weight || 280 #endif 281 #ifdef CONFIG_IP_ROUTE_CLASSID 282 nh->nh_tclassid != onh->nh_tclassid || 283 #endif 284 lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || 285 ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) 286 return -1; 287 288 if (nh->fib_nh_gw_family == AF_INET && 289 nh->fib_nh_gw4 != onh->fib_nh_gw4) 290 return -1; 291 292 if (nh->fib_nh_gw_family == AF_INET6 && 293 ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) 294 return -1; 295 296 onh++; 297 } endfor_nexthops(fi); 298 return 0; 299 } 300 301 static inline unsigned int fib_devindex_hashfn(unsigned int val) 302 { 303 unsigned int mask = DEVINDEX_HASHSIZE - 1; 304 305 return (val ^ 306 (val >> DEVINDEX_HASHBITS) ^ 307 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 308 } 309 310 static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 311 { 312 unsigned int mask = (fib_info_hash_size - 1); 313 unsigned int val = fi->fib_nhs; 314 315 val ^= (fi->fib_protocol << 8) | fi->fib_scope; 316 val ^= (__force u32)fi->fib_prefsrc; 317 val ^= fi->fib_priority; 318 for_nexthops(fi) { 319 val ^= fib_devindex_hashfn(nh->fib_nh_oif); 320 } endfor_nexthops(fi) 321 322 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 323 } 324 325 static struct fib_info *fib_find_info(const struct fib_info *nfi) 326 { 327 struct hlist_head *head; 328 struct fib_info *fi; 329 unsigned int hash; 330 331 hash = fib_info_hashfn(nfi); 332 head = &fib_info_hash[hash]; 333 334 hlist_for_each_entry(fi, head, fib_hash) { 335 if (!net_eq(fi->fib_net, nfi->fib_net)) 336 continue; 337 if (fi->fib_nhs != nfi->fib_nhs) 338 continue; 339 if (nfi->fib_protocol == fi->fib_protocol && 340 nfi->fib_scope == fi->fib_scope && 341 nfi->fib_prefsrc == fi->fib_prefsrc && 342 nfi->fib_priority == fi->fib_priority && 343 nfi->fib_type == fi->fib_type && 344 memcmp(nfi->fib_metrics, fi->fib_metrics, 345 sizeof(u32) * RTAX_MAX) == 0 && 346 !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && 347 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 348 return fi; 349 } 350 351 return NULL; 352 } 353 354 /* Check, that the gateway is already configured. 355 * Used only by redirect accept routine. 356 */ 357 int ip_fib_check_default(__be32 gw, struct net_device *dev) 358 { 359 struct hlist_head *head; 360 struct fib_nh *nh; 361 unsigned int hash; 362 363 spin_lock(&fib_info_lock); 364 365 hash = fib_devindex_hashfn(dev->ifindex); 366 head = &fib_info_devhash[hash]; 367 hlist_for_each_entry(nh, head, nh_hash) { 368 if (nh->fib_nh_dev == dev && 369 nh->fib_nh_gw4 == gw && 370 !(nh->fib_nh_flags & RTNH_F_DEAD)) { 371 spin_unlock(&fib_info_lock); 372 return 0; 373 } 374 } 375 376 spin_unlock(&fib_info_lock); 377 378 return -1; 379 } 380 381 static inline size_t fib_nlmsg_size(struct fib_info *fi) 382 { 383 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 384 + nla_total_size(4) /* RTA_TABLE */ 385 + nla_total_size(4) /* RTA_DST */ 386 + nla_total_size(4) /* RTA_PRIORITY */ 387 + nla_total_size(4) /* RTA_PREFSRC */ 388 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ 389 390 /* space for nested metrics */ 391 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 392 393 if (fi->fib_nhs) { 394 size_t nh_encapsize = 0; 395 /* Also handles the special case fib_nhs == 1 */ 396 397 /* each nexthop is packed in an attribute */ 398 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 399 400 /* may contain flow and gateway attribute */ 401 nhsize += 2 * nla_total_size(4); 402 403 /* grab encap info */ 404 for_nexthops(fi) { 405 if (nh->fib_nh_lws) { 406 /* RTA_ENCAP_TYPE */ 407 nh_encapsize += lwtunnel_get_encap_size( 408 nh->fib_nh_lws); 409 /* RTA_ENCAP */ 410 nh_encapsize += nla_total_size(2); 411 } 412 } endfor_nexthops(fi); 413 414 /* all nexthops are packed in a nested attribute */ 415 payload += nla_total_size((fi->fib_nhs * nhsize) + 416 nh_encapsize); 417 418 } 419 420 return payload; 421 } 422 423 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 424 int dst_len, u32 tb_id, const struct nl_info *info, 425 unsigned int nlm_flags) 426 { 427 struct sk_buff *skb; 428 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 429 int err = -ENOBUFS; 430 431 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 432 if (!skb) 433 goto errout; 434 435 err = fib_dump_info(skb, info->portid, seq, event, tb_id, 436 fa->fa_type, key, dst_len, 437 fa->fa_tos, fa->fa_info, nlm_flags); 438 if (err < 0) { 439 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 440 WARN_ON(err == -EMSGSIZE); 441 kfree_skb(skb); 442 goto errout; 443 } 444 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, 445 info->nlh, GFP_KERNEL); 446 return; 447 errout: 448 if (err < 0) 449 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 450 } 451 452 static int fib_detect_death(struct fib_info *fi, int order, 453 struct fib_info **last_resort, int *last_idx, 454 int dflt) 455 { 456 const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); 457 struct neighbour *n; 458 int state = NUD_NONE; 459 460 if (likely(nhc->nhc_gw_family == AF_INET)) 461 n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); 462 else if (nhc->nhc_gw_family == AF_INET6) 463 n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, 464 nhc->nhc_dev); 465 else 466 n = NULL; 467 468 if (n) { 469 state = n->nud_state; 470 neigh_release(n); 471 } else { 472 return 0; 473 } 474 if (state == NUD_REACHABLE) 475 return 0; 476 if ((state & NUD_VALID) && order != dflt) 477 return 0; 478 if ((state & NUD_VALID) || 479 (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { 480 *last_resort = fi; 481 *last_idx = order; 482 } 483 return 1; 484 } 485 486 int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, 487 u16 encap_type, void *cfg, gfp_t gfp_flags, 488 struct netlink_ext_ack *extack) 489 { 490 int err; 491 492 nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, 493 gfp_flags); 494 if (!nhc->nhc_pcpu_rth_output) 495 return -ENOMEM; 496 497 if (encap) { 498 struct lwtunnel_state *lwtstate; 499 500 if (encap_type == LWTUNNEL_ENCAP_NONE) { 501 NL_SET_ERR_MSG(extack, "LWT encap type not specified"); 502 err = -EINVAL; 503 goto lwt_failure; 504 } 505 err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, 506 cfg, &lwtstate, extack); 507 if (err) 508 goto lwt_failure; 509 510 nhc->nhc_lwtstate = lwtstate_get(lwtstate); 511 } 512 513 return 0; 514 515 lwt_failure: 516 rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); 517 nhc->nhc_pcpu_rth_output = NULL; 518 return err; 519 } 520 EXPORT_SYMBOL_GPL(fib_nh_common_init); 521 522 int fib_nh_init(struct net *net, struct fib_nh *nh, 523 struct fib_config *cfg, int nh_weight, 524 struct netlink_ext_ack *extack) 525 { 526 int err; 527 528 nh->fib_nh_family = AF_INET; 529 530 err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, 531 cfg->fc_encap_type, cfg, GFP_KERNEL, extack); 532 if (err) 533 return err; 534 535 nh->fib_nh_oif = cfg->fc_oif; 536 nh->fib_nh_gw_family = cfg->fc_gw_family; 537 if (cfg->fc_gw_family == AF_INET) 538 nh->fib_nh_gw4 = cfg->fc_gw4; 539 else if (cfg->fc_gw_family == AF_INET6) 540 nh->fib_nh_gw6 = cfg->fc_gw6; 541 542 nh->fib_nh_flags = cfg->fc_flags; 543 544 #ifdef CONFIG_IP_ROUTE_CLASSID 545 nh->nh_tclassid = cfg->fc_flow; 546 if (nh->nh_tclassid) 547 net->ipv4.fib_num_tclassid_users++; 548 #endif 549 #ifdef CONFIG_IP_ROUTE_MULTIPATH 550 nh->fib_nh_weight = nh_weight; 551 #endif 552 return 0; 553 } 554 555 #ifdef CONFIG_IP_ROUTE_MULTIPATH 556 557 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, 558 struct netlink_ext_ack *extack) 559 { 560 int nhs = 0; 561 562 while (rtnh_ok(rtnh, remaining)) { 563 nhs++; 564 rtnh = rtnh_next(rtnh, &remaining); 565 } 566 567 /* leftover implies invalid nexthop configuration, discard it */ 568 if (remaining > 0) { 569 NL_SET_ERR_MSG(extack, 570 "Invalid nexthop configuration - extra data after nexthops"); 571 nhs = 0; 572 } 573 574 return nhs; 575 } 576 577 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 578 int remaining, struct fib_config *cfg, 579 struct netlink_ext_ack *extack) 580 { 581 struct net *net = fi->fib_net; 582 struct fib_config fib_cfg; 583 int ret; 584 585 change_nexthops(fi) { 586 int attrlen; 587 588 memset(&fib_cfg, 0, sizeof(fib_cfg)); 589 590 if (!rtnh_ok(rtnh, remaining)) { 591 NL_SET_ERR_MSG(extack, 592 "Invalid nexthop configuration - extra data after nexthop"); 593 return -EINVAL; 594 } 595 596 if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 597 NL_SET_ERR_MSG(extack, 598 "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); 599 return -EINVAL; 600 } 601 602 fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 603 fib_cfg.fc_oif = rtnh->rtnh_ifindex; 604 605 attrlen = rtnh_attrlen(rtnh); 606 if (attrlen > 0) { 607 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 608 609 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 610 nlav = nla_find(attrs, attrlen, RTA_VIA); 611 if (nla && nlav) { 612 NL_SET_ERR_MSG(extack, 613 "Nexthop configuration can not contain both GATEWAY and VIA"); 614 return -EINVAL; 615 } 616 if (nla) { 617 fib_cfg.fc_gw4 = nla_get_in_addr(nla); 618 if (fib_cfg.fc_gw4) 619 fib_cfg.fc_gw_family = AF_INET; 620 } else if (nlav) { 621 ret = fib_gw_from_via(&fib_cfg, nlav, extack); 622 if (ret) 623 goto errout; 624 } 625 626 nla = nla_find(attrs, attrlen, RTA_FLOW); 627 if (nla) 628 fib_cfg.fc_flow = nla_get_u32(nla); 629 630 fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 631 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 632 if (nla) 633 fib_cfg.fc_encap_type = nla_get_u16(nla); 634 } 635 636 ret = fib_nh_init(net, nexthop_nh, &fib_cfg, 637 rtnh->rtnh_hops + 1, extack); 638 if (ret) 639 goto errout; 640 641 rtnh = rtnh_next(rtnh, &remaining); 642 } endfor_nexthops(fi); 643 644 ret = -EINVAL; 645 if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) { 646 NL_SET_ERR_MSG(extack, 647 "Nexthop device index does not match RTA_OIF"); 648 goto errout; 649 } 650 if (cfg->fc_gw_family) { 651 if (cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family || 652 (cfg->fc_gw_family == AF_INET && 653 fi->fib_nh->fib_nh_gw4 != cfg->fc_gw4) || 654 (cfg->fc_gw_family == AF_INET6 && 655 ipv6_addr_cmp(&fi->fib_nh->fib_nh_gw6, &cfg->fc_gw6))) { 656 NL_SET_ERR_MSG(extack, 657 "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); 658 goto errout; 659 } 660 } 661 #ifdef CONFIG_IP_ROUTE_CLASSID 662 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { 663 NL_SET_ERR_MSG(extack, 664 "Nexthop class id does not match RTA_FLOW"); 665 goto errout; 666 } 667 #endif 668 ret = 0; 669 errout: 670 return ret; 671 } 672 673 static void fib_rebalance(struct fib_info *fi) 674 { 675 int total; 676 int w; 677 678 if (fi->fib_nhs < 2) 679 return; 680 681 total = 0; 682 for_nexthops(fi) { 683 if (nh->fib_nh_flags & RTNH_F_DEAD) 684 continue; 685 686 if (ip_ignore_linkdown(nh->fib_nh_dev) && 687 nh->fib_nh_flags & RTNH_F_LINKDOWN) 688 continue; 689 690 total += nh->fib_nh_weight; 691 } endfor_nexthops(fi); 692 693 w = 0; 694 change_nexthops(fi) { 695 int upper_bound; 696 697 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { 698 upper_bound = -1; 699 } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && 700 nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { 701 upper_bound = -1; 702 } else { 703 w += nexthop_nh->fib_nh_weight; 704 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, 705 total) - 1; 706 } 707 708 atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); 709 } endfor_nexthops(fi); 710 } 711 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 712 713 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 714 int remaining, struct fib_config *cfg, 715 struct netlink_ext_ack *extack) 716 { 717 NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); 718 719 return -EINVAL; 720 } 721 722 #define fib_rebalance(fi) do { } while (0) 723 724 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 725 726 static int fib_encap_match(u16 encap_type, 727 struct nlattr *encap, 728 const struct fib_nh *nh, 729 const struct fib_config *cfg, 730 struct netlink_ext_ack *extack) 731 { 732 struct lwtunnel_state *lwtstate; 733 int ret, result = 0; 734 735 if (encap_type == LWTUNNEL_ENCAP_NONE) 736 return 0; 737 738 ret = lwtunnel_build_state(encap_type, encap, AF_INET, 739 cfg, &lwtstate, extack); 740 if (!ret) { 741 result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); 742 lwtstate_free(lwtstate); 743 } 744 745 return result; 746 } 747 748 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, 749 struct netlink_ext_ack *extack) 750 { 751 #ifdef CONFIG_IP_ROUTE_MULTIPATH 752 struct rtnexthop *rtnh; 753 int remaining; 754 #endif 755 756 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 757 return 1; 758 759 if (cfg->fc_oif || cfg->fc_gw_family) { 760 if (cfg->fc_encap) { 761 if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap, 762 fi->fib_nh, cfg, extack)) 763 return 1; 764 } 765 #ifdef CONFIG_IP_ROUTE_CLASSID 766 if (cfg->fc_flow && 767 cfg->fc_flow != fi->fib_nh->nh_tclassid) 768 return 1; 769 #endif 770 if ((cfg->fc_oif && cfg->fc_oif != fi->fib_nh->fib_nh_oif) || 771 (cfg->fc_gw_family && 772 cfg->fc_gw_family != fi->fib_nh->fib_nh_gw_family)) 773 return 1; 774 775 if (cfg->fc_gw_family == AF_INET && 776 cfg->fc_gw4 != fi->fib_nh->fib_nh_gw4) 777 return 1; 778 779 if (cfg->fc_gw_family == AF_INET6 && 780 ipv6_addr_cmp(&cfg->fc_gw6, &fi->fib_nh->fib_nh_gw6)) 781 return 1; 782 783 return 0; 784 } 785 786 #ifdef CONFIG_IP_ROUTE_MULTIPATH 787 if (!cfg->fc_mp) 788 return 0; 789 790 rtnh = cfg->fc_mp; 791 remaining = cfg->fc_mp_len; 792 793 for_nexthops(fi) { 794 int attrlen; 795 796 if (!rtnh_ok(rtnh, remaining)) 797 return -EINVAL; 798 799 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) 800 return 1; 801 802 attrlen = rtnh_attrlen(rtnh); 803 if (attrlen > 0) { 804 struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); 805 806 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 807 nlav = nla_find(attrs, attrlen, RTA_VIA); 808 if (nla && nlav) { 809 NL_SET_ERR_MSG(extack, 810 "Nexthop configuration can not contain both GATEWAY and VIA"); 811 return -EINVAL; 812 } 813 814 if (nla) { 815 if (nh->fib_nh_gw_family != AF_INET || 816 nla_get_in_addr(nla) != nh->fib_nh_gw4) 817 return 1; 818 } else if (nlav) { 819 struct fib_config cfg2; 820 int err; 821 822 err = fib_gw_from_via(&cfg2, nlav, extack); 823 if (err) 824 return err; 825 826 switch (nh->fib_nh_gw_family) { 827 case AF_INET: 828 if (cfg2.fc_gw_family != AF_INET || 829 cfg2.fc_gw4 != nh->fib_nh_gw4) 830 return 1; 831 break; 832 case AF_INET6: 833 if (cfg2.fc_gw_family != AF_INET6 || 834 ipv6_addr_cmp(&cfg2.fc_gw6, 835 &nh->fib_nh_gw6)) 836 return 1; 837 break; 838 } 839 } 840 841 #ifdef CONFIG_IP_ROUTE_CLASSID 842 nla = nla_find(attrs, attrlen, RTA_FLOW); 843 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 844 return 1; 845 #endif 846 } 847 848 rtnh = rtnh_next(rtnh, &remaining); 849 } endfor_nexthops(fi); 850 #endif 851 return 0; 852 } 853 854 bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) 855 { 856 struct nlattr *nla; 857 int remaining; 858 859 if (!cfg->fc_mx) 860 return true; 861 862 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 863 int type = nla_type(nla); 864 u32 fi_val, val; 865 866 if (!type) 867 continue; 868 if (type > RTAX_MAX) 869 return false; 870 871 if (type == RTAX_CC_ALGO) { 872 char tmp[TCP_CA_NAME_MAX]; 873 bool ecn_ca = false; 874 875 nla_strlcpy(tmp, nla, sizeof(tmp)); 876 val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); 877 } else { 878 if (nla_len(nla) != sizeof(u32)) 879 return false; 880 val = nla_get_u32(nla); 881 } 882 883 fi_val = fi->fib_metrics->metrics[type - 1]; 884 if (type == RTAX_FEATURES) 885 fi_val &= ~DST_FEATURE_ECN_CA; 886 887 if (fi_val != val) 888 return false; 889 } 890 891 return true; 892 } 893 894 static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, 895 u32 table, struct netlink_ext_ack *extack) 896 { 897 struct fib6_config cfg = { 898 .fc_table = table, 899 .fc_flags = nh->fib_nh_flags | RTF_GATEWAY, 900 .fc_ifindex = nh->fib_nh_oif, 901 .fc_gateway = nh->fib_nh_gw6, 902 }; 903 struct fib6_nh fib6_nh = {}; 904 int err; 905 906 err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); 907 if (!err) { 908 nh->fib_nh_dev = fib6_nh.fib_nh_dev; 909 dev_hold(nh->fib_nh_dev); 910 nh->fib_nh_oif = nh->fib_nh_dev->ifindex; 911 nh->fib_nh_scope = RT_SCOPE_LINK; 912 913 ipv6_stub->fib6_nh_release(&fib6_nh); 914 } 915 916 return err; 917 } 918 919 /* 920 * Picture 921 * ------- 922 * 923 * Semantics of nexthop is very messy by historical reasons. 924 * We have to take into account, that: 925 * a) gateway can be actually local interface address, 926 * so that gatewayed route is direct. 927 * b) gateway must be on-link address, possibly 928 * described not by an ifaddr, but also by a direct route. 929 * c) If both gateway and interface are specified, they should not 930 * contradict. 931 * d) If we use tunnel routes, gateway could be not on-link. 932 * 933 * Attempt to reconcile all of these (alas, self-contradictory) conditions 934 * results in pretty ugly and hairy code with obscure logic. 935 * 936 * I chose to generalized it instead, so that the size 937 * of code does not increase practically, but it becomes 938 * much more general. 939 * Every prefix is assigned a "scope" value: "host" is local address, 940 * "link" is direct route, 941 * [ ... "site" ... "interior" ... ] 942 * and "universe" is true gateway route with global meaning. 943 * 944 * Every prefix refers to a set of "nexthop"s (gw, oif), 945 * where gw must have narrower scope. This recursion stops 946 * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 947 * which means that gw is forced to be on link. 948 * 949 * Code is still hairy, but now it is apparently logically 950 * consistent and very flexible. F.e. as by-product it allows 951 * to co-exists in peace independent exterior and interior 952 * routing processes. 953 * 954 * Normally it looks as following. 955 * 956 * {universe prefix} -> (gw, oif) [scope link] 957 * | 958 * |-> {link prefix} -> (gw, oif) [scope local] 959 * | 960 * |-> {local prefix} (terminal node) 961 */ 962 static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, 963 u8 scope, struct netlink_ext_ack *extack) 964 { 965 struct net_device *dev; 966 struct fib_result res; 967 int err; 968 969 if (nh->fib_nh_flags & RTNH_F_ONLINK) { 970 unsigned int addr_type; 971 972 if (scope >= RT_SCOPE_LINK) { 973 NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); 974 return -EINVAL; 975 } 976 dev = __dev_get_by_index(net, nh->fib_nh_oif); 977 if (!dev) { 978 NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); 979 return -ENODEV; 980 } 981 if (!(dev->flags & IFF_UP)) { 982 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 983 return -ENETDOWN; 984 } 985 addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); 986 if (addr_type != RTN_UNICAST) { 987 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 988 return -EINVAL; 989 } 990 if (!netif_carrier_ok(dev)) 991 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 992 nh->fib_nh_dev = dev; 993 dev_hold(dev); 994 nh->fib_nh_scope = RT_SCOPE_LINK; 995 return 0; 996 } 997 rcu_read_lock(); 998 { 999 struct fib_table *tbl = NULL; 1000 struct flowi4 fl4 = { 1001 .daddr = nh->fib_nh_gw4, 1002 .flowi4_scope = scope + 1, 1003 .flowi4_oif = nh->fib_nh_oif, 1004 .flowi4_iif = LOOPBACK_IFINDEX, 1005 }; 1006 1007 /* It is not necessary, but requires a bit of thinking */ 1008 if (fl4.flowi4_scope < RT_SCOPE_LINK) 1009 fl4.flowi4_scope = RT_SCOPE_LINK; 1010 1011 if (table) 1012 tbl = fib_get_table(net, table); 1013 1014 if (tbl) 1015 err = fib_table_lookup(tbl, &fl4, &res, 1016 FIB_LOOKUP_IGNORE_LINKSTATE | 1017 FIB_LOOKUP_NOREF); 1018 1019 /* on error or if no table given do full lookup. This 1020 * is needed for example when nexthops are in the local 1021 * table rather than the given table 1022 */ 1023 if (!tbl || err) { 1024 err = fib_lookup(net, &fl4, &res, 1025 FIB_LOOKUP_IGNORE_LINKSTATE); 1026 } 1027 1028 if (err) { 1029 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1030 goto out; 1031 } 1032 } 1033 1034 err = -EINVAL; 1035 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { 1036 NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); 1037 goto out; 1038 } 1039 nh->fib_nh_scope = res.scope; 1040 nh->fib_nh_oif = FIB_RES_OIF(res); 1041 nh->fib_nh_dev = dev = FIB_RES_DEV(res); 1042 if (!dev) { 1043 NL_SET_ERR_MSG(extack, 1044 "No egress device for nexthop gateway"); 1045 goto out; 1046 } 1047 dev_hold(dev); 1048 if (!netif_carrier_ok(dev)) 1049 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1050 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 1051 out: 1052 rcu_read_unlock(); 1053 return err; 1054 } 1055 1056 static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, 1057 struct netlink_ext_ack *extack) 1058 { 1059 struct in_device *in_dev; 1060 int err; 1061 1062 if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { 1063 NL_SET_ERR_MSG(extack, 1064 "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); 1065 return -EINVAL; 1066 } 1067 1068 rcu_read_lock(); 1069 1070 err = -ENODEV; 1071 in_dev = inetdev_by_index(net, nh->fib_nh_oif); 1072 if (!in_dev) 1073 goto out; 1074 err = -ENETDOWN; 1075 if (!(in_dev->dev->flags & IFF_UP)) { 1076 NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); 1077 goto out; 1078 } 1079 1080 nh->fib_nh_dev = in_dev->dev; 1081 dev_hold(nh->fib_nh_dev); 1082 nh->fib_nh_scope = RT_SCOPE_HOST; 1083 if (!netif_carrier_ok(nh->fib_nh_dev)) 1084 nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1085 err = 0; 1086 out: 1087 rcu_read_unlock(); 1088 return err; 1089 } 1090 1091 static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, 1092 struct netlink_ext_ack *extack) 1093 { 1094 struct net *net = cfg->fc_nlinfo.nl_net; 1095 u32 table = cfg->fc_table; 1096 int err; 1097 1098 if (nh->fib_nh_gw_family == AF_INET) 1099 err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack); 1100 else if (nh->fib_nh_gw_family == AF_INET6) 1101 err = fib_check_nh_v6_gw(net, nh, table, extack); 1102 else 1103 err = fib_check_nh_nongw(net, nh, extack); 1104 1105 return err; 1106 } 1107 1108 static inline unsigned int fib_laddr_hashfn(__be32 val) 1109 { 1110 unsigned int mask = (fib_info_hash_size - 1); 1111 1112 return ((__force u32)val ^ 1113 ((__force u32)val >> 7) ^ 1114 ((__force u32)val >> 14)) & mask; 1115 } 1116 1117 static struct hlist_head *fib_info_hash_alloc(int bytes) 1118 { 1119 if (bytes <= PAGE_SIZE) 1120 return kzalloc(bytes, GFP_KERNEL); 1121 else 1122 return (struct hlist_head *) 1123 __get_free_pages(GFP_KERNEL | __GFP_ZERO, 1124 get_order(bytes)); 1125 } 1126 1127 static void fib_info_hash_free(struct hlist_head *hash, int bytes) 1128 { 1129 if (!hash) 1130 return; 1131 1132 if (bytes <= PAGE_SIZE) 1133 kfree(hash); 1134 else 1135 free_pages((unsigned long) hash, get_order(bytes)); 1136 } 1137 1138 static void fib_info_hash_move(struct hlist_head *new_info_hash, 1139 struct hlist_head *new_laddrhash, 1140 unsigned int new_size) 1141 { 1142 struct hlist_head *old_info_hash, *old_laddrhash; 1143 unsigned int old_size = fib_info_hash_size; 1144 unsigned int i, bytes; 1145 1146 spin_lock_bh(&fib_info_lock); 1147 old_info_hash = fib_info_hash; 1148 old_laddrhash = fib_info_laddrhash; 1149 fib_info_hash_size = new_size; 1150 1151 for (i = 0; i < old_size; i++) { 1152 struct hlist_head *head = &fib_info_hash[i]; 1153 struct hlist_node *n; 1154 struct fib_info *fi; 1155 1156 hlist_for_each_entry_safe(fi, n, head, fib_hash) { 1157 struct hlist_head *dest; 1158 unsigned int new_hash; 1159 1160 new_hash = fib_info_hashfn(fi); 1161 dest = &new_info_hash[new_hash]; 1162 hlist_add_head(&fi->fib_hash, dest); 1163 } 1164 } 1165 fib_info_hash = new_info_hash; 1166 1167 for (i = 0; i < old_size; i++) { 1168 struct hlist_head *lhead = &fib_info_laddrhash[i]; 1169 struct hlist_node *n; 1170 struct fib_info *fi; 1171 1172 hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) { 1173 struct hlist_head *ldest; 1174 unsigned int new_hash; 1175 1176 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 1177 ldest = &new_laddrhash[new_hash]; 1178 hlist_add_head(&fi->fib_lhash, ldest); 1179 } 1180 } 1181 fib_info_laddrhash = new_laddrhash; 1182 1183 spin_unlock_bh(&fib_info_lock); 1184 1185 bytes = old_size * sizeof(struct hlist_head *); 1186 fib_info_hash_free(old_info_hash, bytes); 1187 fib_info_hash_free(old_laddrhash, bytes); 1188 } 1189 1190 __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) 1191 { 1192 nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, 1193 nh->fib_nh_gw4, 1194 nh->nh_parent->fib_scope); 1195 nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); 1196 1197 return nh->nh_saddr; 1198 } 1199 1200 __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) 1201 { 1202 struct fib_nh_common *nhc = res->nhc; 1203 struct fib_nh *nh; 1204 1205 if (res->fi->fib_prefsrc) 1206 return res->fi->fib_prefsrc; 1207 1208 nh = container_of(nhc, struct fib_nh, nh_common); 1209 if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) 1210 return nh->nh_saddr; 1211 1212 return fib_info_update_nh_saddr(net, nh); 1213 } 1214 1215 static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) 1216 { 1217 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 1218 fib_prefsrc != cfg->fc_dst) { 1219 u32 tb_id = cfg->fc_table; 1220 int rc; 1221 1222 if (tb_id == RT_TABLE_MAIN) 1223 tb_id = RT_TABLE_LOCAL; 1224 1225 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1226 fib_prefsrc, tb_id); 1227 1228 if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { 1229 rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, 1230 fib_prefsrc, RT_TABLE_LOCAL); 1231 } 1232 1233 if (rc != RTN_LOCAL) 1234 return false; 1235 } 1236 return true; 1237 } 1238 1239 struct fib_info *fib_create_info(struct fib_config *cfg, 1240 struct netlink_ext_ack *extack) 1241 { 1242 int err; 1243 struct fib_info *fi = NULL; 1244 struct fib_info *ofi; 1245 int nhs = 1; 1246 struct net *net = cfg->fc_nlinfo.nl_net; 1247 1248 if (cfg->fc_type > RTN_MAX) 1249 goto err_inval; 1250 1251 /* Fast check to catch the most weird cases */ 1252 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { 1253 NL_SET_ERR_MSG(extack, "Invalid scope"); 1254 goto err_inval; 1255 } 1256 1257 if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { 1258 NL_SET_ERR_MSG(extack, 1259 "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); 1260 goto err_inval; 1261 } 1262 1263 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1264 if (cfg->fc_mp) { 1265 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); 1266 if (nhs == 0) 1267 goto err_inval; 1268 } 1269 #endif 1270 1271 err = -ENOBUFS; 1272 if (fib_info_cnt >= fib_info_hash_size) { 1273 unsigned int new_size = fib_info_hash_size << 1; 1274 struct hlist_head *new_info_hash; 1275 struct hlist_head *new_laddrhash; 1276 unsigned int bytes; 1277 1278 if (!new_size) 1279 new_size = 16; 1280 bytes = new_size * sizeof(struct hlist_head *); 1281 new_info_hash = fib_info_hash_alloc(bytes); 1282 new_laddrhash = fib_info_hash_alloc(bytes); 1283 if (!new_info_hash || !new_laddrhash) { 1284 fib_info_hash_free(new_info_hash, bytes); 1285 fib_info_hash_free(new_laddrhash, bytes); 1286 } else 1287 fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 1288 1289 if (!fib_info_hash_size) 1290 goto failure; 1291 } 1292 1293 fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); 1294 if (!fi) 1295 goto failure; 1296 fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, 1297 cfg->fc_mx_len, extack); 1298 if (unlikely(IS_ERR(fi->fib_metrics))) { 1299 err = PTR_ERR(fi->fib_metrics); 1300 kfree(fi); 1301 return ERR_PTR(err); 1302 } 1303 1304 fib_info_cnt++; 1305 fi->fib_net = net; 1306 fi->fib_protocol = cfg->fc_protocol; 1307 fi->fib_scope = cfg->fc_scope; 1308 fi->fib_flags = cfg->fc_flags; 1309 fi->fib_priority = cfg->fc_priority; 1310 fi->fib_prefsrc = cfg->fc_prefsrc; 1311 fi->fib_type = cfg->fc_type; 1312 fi->fib_tb_id = cfg->fc_table; 1313 1314 fi->fib_nhs = nhs; 1315 change_nexthops(fi) { 1316 nexthop_nh->nh_parent = fi; 1317 } endfor_nexthops(fi) 1318 1319 if (cfg->fc_mp) 1320 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); 1321 else 1322 err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); 1323 1324 if (err != 0) 1325 goto failure; 1326 1327 if (fib_props[cfg->fc_type].error) { 1328 if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) { 1329 NL_SET_ERR_MSG(extack, 1330 "Gateway, device and multipath can not be specified for this route type"); 1331 goto err_inval; 1332 } 1333 goto link_it; 1334 } else { 1335 switch (cfg->fc_type) { 1336 case RTN_UNICAST: 1337 case RTN_LOCAL: 1338 case RTN_BROADCAST: 1339 case RTN_ANYCAST: 1340 case RTN_MULTICAST: 1341 break; 1342 default: 1343 NL_SET_ERR_MSG(extack, "Invalid route type"); 1344 goto err_inval; 1345 } 1346 } 1347 1348 if (cfg->fc_scope > RT_SCOPE_HOST) { 1349 NL_SET_ERR_MSG(extack, "Invalid scope"); 1350 goto err_inval; 1351 } 1352 1353 if (cfg->fc_scope == RT_SCOPE_HOST) { 1354 struct fib_nh *nh = fi->fib_nh; 1355 1356 /* Local address is added. */ 1357 if (nhs != 1) { 1358 NL_SET_ERR_MSG(extack, 1359 "Route with host scope can not have multiple nexthops"); 1360 goto err_inval; 1361 } 1362 if (nh->fib_nh_gw_family) { 1363 NL_SET_ERR_MSG(extack, 1364 "Route with host scope can not have a gateway"); 1365 goto err_inval; 1366 } 1367 nh->fib_nh_scope = RT_SCOPE_NOWHERE; 1368 nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif); 1369 err = -ENODEV; 1370 if (!nh->fib_nh_dev) 1371 goto failure; 1372 } else { 1373 int linkdown = 0; 1374 1375 change_nexthops(fi) { 1376 err = fib_check_nh(cfg, nexthop_nh, extack); 1377 if (err != 0) 1378 goto failure; 1379 if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) 1380 linkdown++; 1381 } endfor_nexthops(fi) 1382 if (linkdown == fi->fib_nhs) 1383 fi->fib_flags |= RTNH_F_LINKDOWN; 1384 } 1385 1386 if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { 1387 NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); 1388 goto err_inval; 1389 } 1390 1391 change_nexthops(fi) { 1392 fib_info_update_nh_saddr(net, nexthop_nh); 1393 if (nexthop_nh->fib_nh_gw_family == AF_INET6) 1394 fi->fib_nh_is_v6 = true; 1395 } endfor_nexthops(fi) 1396 1397 fib_rebalance(fi); 1398 1399 link_it: 1400 ofi = fib_find_info(fi); 1401 if (ofi) { 1402 fi->fib_dead = 1; 1403 free_fib_info(fi); 1404 ofi->fib_treeref++; 1405 return ofi; 1406 } 1407 1408 fi->fib_treeref++; 1409 refcount_set(&fi->fib_clntref, 1); 1410 spin_lock_bh(&fib_info_lock); 1411 hlist_add_head(&fi->fib_hash, 1412 &fib_info_hash[fib_info_hashfn(fi)]); 1413 if (fi->fib_prefsrc) { 1414 struct hlist_head *head; 1415 1416 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 1417 hlist_add_head(&fi->fib_lhash, head); 1418 } 1419 change_nexthops(fi) { 1420 struct hlist_head *head; 1421 unsigned int hash; 1422 1423 if (!nexthop_nh->fib_nh_dev) 1424 continue; 1425 hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); 1426 head = &fib_info_devhash[hash]; 1427 hlist_add_head(&nexthop_nh->nh_hash, head); 1428 } endfor_nexthops(fi) 1429 spin_unlock_bh(&fib_info_lock); 1430 return fi; 1431 1432 err_inval: 1433 err = -EINVAL; 1434 1435 failure: 1436 if (fi) { 1437 fi->fib_dead = 1; 1438 free_fib_info(fi); 1439 } 1440 1441 return ERR_PTR(err); 1442 } 1443 1444 int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, 1445 unsigned char *flags, bool skip_oif) 1446 { 1447 if (nhc->nhc_flags & RTNH_F_DEAD) 1448 *flags |= RTNH_F_DEAD; 1449 1450 if (nhc->nhc_flags & RTNH_F_LINKDOWN) { 1451 *flags |= RTNH_F_LINKDOWN; 1452 1453 rcu_read_lock(); 1454 switch (nhc->nhc_family) { 1455 case AF_INET: 1456 if (ip_ignore_linkdown(nhc->nhc_dev)) 1457 *flags |= RTNH_F_DEAD; 1458 break; 1459 case AF_INET6: 1460 if (ip6_ignore_linkdown(nhc->nhc_dev)) 1461 *flags |= RTNH_F_DEAD; 1462 break; 1463 } 1464 rcu_read_unlock(); 1465 } 1466 1467 switch (nhc->nhc_gw_family) { 1468 case AF_INET: 1469 if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) 1470 goto nla_put_failure; 1471 break; 1472 case AF_INET6: 1473 /* if gateway family does not match nexthop family 1474 * gateway is encoded as RTA_VIA 1475 */ 1476 if (nhc->nhc_gw_family != nhc->nhc_family) { 1477 int alen = sizeof(struct in6_addr); 1478 struct nlattr *nla; 1479 struct rtvia *via; 1480 1481 nla = nla_reserve(skb, RTA_VIA, alen + 2); 1482 if (!nla) 1483 goto nla_put_failure; 1484 1485 via = nla_data(nla); 1486 via->rtvia_family = AF_INET6; 1487 memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen); 1488 } else if (nla_put_in6_addr(skb, RTA_GATEWAY, 1489 &nhc->nhc_gw.ipv6) < 0) { 1490 goto nla_put_failure; 1491 } 1492 break; 1493 } 1494 1495 *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); 1496 if (nhc->nhc_flags & RTNH_F_OFFLOAD) 1497 *flags |= RTNH_F_OFFLOAD; 1498 1499 if (!skip_oif && nhc->nhc_dev && 1500 nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) 1501 goto nla_put_failure; 1502 1503 if (nhc->nhc_lwtstate && 1504 lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, 1505 RTA_ENCAP, RTA_ENCAP_TYPE) < 0) 1506 goto nla_put_failure; 1507 1508 return 0; 1509 1510 nla_put_failure: 1511 return -EMSGSIZE; 1512 } 1513 EXPORT_SYMBOL_GPL(fib_nexthop_info); 1514 1515 #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) 1516 int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, 1517 int nh_weight) 1518 { 1519 const struct net_device *dev = nhc->nhc_dev; 1520 struct rtnexthop *rtnh; 1521 unsigned char flags = 0; 1522 1523 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1524 if (!rtnh) 1525 goto nla_put_failure; 1526 1527 rtnh->rtnh_hops = nh_weight - 1; 1528 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; 1529 1530 if (fib_nexthop_info(skb, nhc, &flags, true) < 0) 1531 goto nla_put_failure; 1532 1533 rtnh->rtnh_flags = flags; 1534 1535 /* length of rtnetlink header + attributes */ 1536 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 1537 1538 return 0; 1539 1540 nla_put_failure: 1541 return -EMSGSIZE; 1542 } 1543 EXPORT_SYMBOL_GPL(fib_add_nexthop); 1544 #endif 1545 1546 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1547 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1548 { 1549 struct nlattr *mp; 1550 1551 mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); 1552 if (!mp) 1553 goto nla_put_failure; 1554 1555 for_nexthops(fi) { 1556 if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) 1557 goto nla_put_failure; 1558 #ifdef CONFIG_IP_ROUTE_CLASSID 1559 if (nh->nh_tclassid && 1560 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1561 goto nla_put_failure; 1562 #endif 1563 } endfor_nexthops(fi); 1564 1565 nla_nest_end(skb, mp); 1566 1567 return 0; 1568 1569 nla_put_failure: 1570 return -EMSGSIZE; 1571 } 1572 #else 1573 static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) 1574 { 1575 return 0; 1576 } 1577 #endif 1578 1579 int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, 1580 u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, 1581 struct fib_info *fi, unsigned int flags) 1582 { 1583 struct nlmsghdr *nlh; 1584 struct rtmsg *rtm; 1585 1586 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); 1587 if (!nlh) 1588 return -EMSGSIZE; 1589 1590 rtm = nlmsg_data(nlh); 1591 rtm->rtm_family = AF_INET; 1592 rtm->rtm_dst_len = dst_len; 1593 rtm->rtm_src_len = 0; 1594 rtm->rtm_tos = tos; 1595 if (tb_id < 256) 1596 rtm->rtm_table = tb_id; 1597 else 1598 rtm->rtm_table = RT_TABLE_COMPAT; 1599 if (nla_put_u32(skb, RTA_TABLE, tb_id)) 1600 goto nla_put_failure; 1601 rtm->rtm_type = type; 1602 rtm->rtm_flags = fi->fib_flags; 1603 rtm->rtm_scope = fi->fib_scope; 1604 rtm->rtm_protocol = fi->fib_protocol; 1605 1606 if (rtm->rtm_dst_len && 1607 nla_put_in_addr(skb, RTA_DST, dst)) 1608 goto nla_put_failure; 1609 if (fi->fib_priority && 1610 nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) 1611 goto nla_put_failure; 1612 if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) 1613 goto nla_put_failure; 1614 1615 if (fi->fib_prefsrc && 1616 nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) 1617 goto nla_put_failure; 1618 if (fi->fib_nhs == 1) { 1619 struct fib_nh *nh = &fi->fib_nh[0]; 1620 unsigned char flags = 0; 1621 1622 if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0) 1623 goto nla_put_failure; 1624 1625 rtm->rtm_flags = flags; 1626 #ifdef CONFIG_IP_ROUTE_CLASSID 1627 if (nh->nh_tclassid && 1628 nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) 1629 goto nla_put_failure; 1630 #endif 1631 } else { 1632 if (fib_add_multipath(skb, fi) < 0) 1633 goto nla_put_failure; 1634 } 1635 1636 nlmsg_end(skb, nlh); 1637 return 0; 1638 1639 nla_put_failure: 1640 nlmsg_cancel(skb, nlh); 1641 return -EMSGSIZE; 1642 } 1643 1644 /* 1645 * Update FIB if: 1646 * - local address disappeared -> we must delete all the entries 1647 * referring to it. 1648 * - device went down -> we must shutdown all nexthops going via it. 1649 */ 1650 int fib_sync_down_addr(struct net_device *dev, __be32 local) 1651 { 1652 int ret = 0; 1653 unsigned int hash = fib_laddr_hashfn(local); 1654 struct hlist_head *head = &fib_info_laddrhash[hash]; 1655 struct net *net = dev_net(dev); 1656 int tb_id = l3mdev_fib_table(dev); 1657 struct fib_info *fi; 1658 1659 if (!fib_info_laddrhash || local == 0) 1660 return 0; 1661 1662 hlist_for_each_entry(fi, head, fib_lhash) { 1663 if (!net_eq(fi->fib_net, net) || 1664 fi->fib_tb_id != tb_id) 1665 continue; 1666 if (fi->fib_prefsrc == local) { 1667 fi->fib_flags |= RTNH_F_DEAD; 1668 ret++; 1669 } 1670 } 1671 return ret; 1672 } 1673 1674 static int call_fib_nh_notifiers(struct fib_nh *nh, 1675 enum fib_event_type event_type) 1676 { 1677 bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); 1678 struct fib_nh_notifier_info info = { 1679 .fib_nh = nh, 1680 }; 1681 1682 switch (event_type) { 1683 case FIB_EVENT_NH_ADD: 1684 if (nh->fib_nh_flags & RTNH_F_DEAD) 1685 break; 1686 if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) 1687 break; 1688 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, 1689 &info.info); 1690 case FIB_EVENT_NH_DEL: 1691 if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || 1692 (nh->fib_nh_flags & RTNH_F_DEAD)) 1693 return call_fib4_notifiers(dev_net(nh->fib_nh_dev), 1694 event_type, &info.info); 1695 default: 1696 break; 1697 } 1698 1699 return NOTIFY_DONE; 1700 } 1701 1702 /* Update the PMTU of exceptions when: 1703 * - the new MTU of the first hop becomes smaller than the PMTU 1704 * - the old MTU was the same as the PMTU, and it limited discovery of 1705 * larger MTUs on the path. With that limit raised, we can now 1706 * discover larger MTUs 1707 * A special case is locked exceptions, for which the PMTU is smaller 1708 * than the minimal accepted PMTU: 1709 * - if the new MTU is greater than the PMTU, don't make any change 1710 * - otherwise, unlock and set PMTU 1711 */ 1712 static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) 1713 { 1714 struct fnhe_hash_bucket *bucket; 1715 int i; 1716 1717 bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); 1718 if (!bucket) 1719 return; 1720 1721 for (i = 0; i < FNHE_HASH_SIZE; i++) { 1722 struct fib_nh_exception *fnhe; 1723 1724 for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); 1725 fnhe; 1726 fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { 1727 if (fnhe->fnhe_mtu_locked) { 1728 if (new <= fnhe->fnhe_pmtu) { 1729 fnhe->fnhe_pmtu = new; 1730 fnhe->fnhe_mtu_locked = false; 1731 } 1732 } else if (new < fnhe->fnhe_pmtu || 1733 orig == fnhe->fnhe_pmtu) { 1734 fnhe->fnhe_pmtu = new; 1735 } 1736 } 1737 } 1738 } 1739 1740 void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) 1741 { 1742 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1743 struct hlist_head *head = &fib_info_devhash[hash]; 1744 struct fib_nh *nh; 1745 1746 hlist_for_each_entry(nh, head, nh_hash) { 1747 if (nh->fib_nh_dev == dev) 1748 nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); 1749 } 1750 } 1751 1752 /* Event force Flags Description 1753 * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host 1754 * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host 1755 * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed 1756 * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed 1757 */ 1758 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) 1759 { 1760 int ret = 0; 1761 int scope = RT_SCOPE_NOWHERE; 1762 struct fib_info *prev_fi = NULL; 1763 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1764 struct hlist_head *head = &fib_info_devhash[hash]; 1765 struct fib_nh *nh; 1766 1767 if (force) 1768 scope = -1; 1769 1770 hlist_for_each_entry(nh, head, nh_hash) { 1771 struct fib_info *fi = nh->nh_parent; 1772 int dead; 1773 1774 BUG_ON(!fi->fib_nhs); 1775 if (nh->fib_nh_dev != dev || fi == prev_fi) 1776 continue; 1777 prev_fi = fi; 1778 dead = 0; 1779 change_nexthops(fi) { 1780 if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) 1781 dead++; 1782 else if (nexthop_nh->fib_nh_dev == dev && 1783 nexthop_nh->fib_nh_scope != scope) { 1784 switch (event) { 1785 case NETDEV_DOWN: 1786 case NETDEV_UNREGISTER: 1787 nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; 1788 /* fall through */ 1789 case NETDEV_CHANGE: 1790 nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 1791 break; 1792 } 1793 call_fib_nh_notifiers(nexthop_nh, 1794 FIB_EVENT_NH_DEL); 1795 dead++; 1796 } 1797 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1798 if (event == NETDEV_UNREGISTER && 1799 nexthop_nh->fib_nh_dev == dev) { 1800 dead = fi->fib_nhs; 1801 break; 1802 } 1803 #endif 1804 } endfor_nexthops(fi) 1805 if (dead == fi->fib_nhs) { 1806 switch (event) { 1807 case NETDEV_DOWN: 1808 case NETDEV_UNREGISTER: 1809 fi->fib_flags |= RTNH_F_DEAD; 1810 /* fall through */ 1811 case NETDEV_CHANGE: 1812 fi->fib_flags |= RTNH_F_LINKDOWN; 1813 break; 1814 } 1815 ret++; 1816 } 1817 1818 fib_rebalance(fi); 1819 } 1820 1821 return ret; 1822 } 1823 1824 /* Must be invoked inside of an RCU protected region. */ 1825 static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) 1826 { 1827 struct fib_info *fi = NULL, *last_resort = NULL; 1828 struct hlist_head *fa_head = res->fa_head; 1829 struct fib_table *tb = res->table; 1830 u8 slen = 32 - res->prefixlen; 1831 int order = -1, last_idx = -1; 1832 struct fib_alias *fa, *fa1 = NULL; 1833 u32 last_prio = res->fi->fib_priority; 1834 u8 last_tos = 0; 1835 1836 hlist_for_each_entry_rcu(fa, fa_head, fa_list) { 1837 struct fib_info *next_fi = fa->fa_info; 1838 1839 if (fa->fa_slen != slen) 1840 continue; 1841 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos) 1842 continue; 1843 if (fa->tb_id != tb->tb_id) 1844 continue; 1845 if (next_fi->fib_priority > last_prio && 1846 fa->fa_tos == last_tos) { 1847 if (last_tos) 1848 continue; 1849 break; 1850 } 1851 if (next_fi->fib_flags & RTNH_F_DEAD) 1852 continue; 1853 last_tos = fa->fa_tos; 1854 last_prio = next_fi->fib_priority; 1855 1856 if (next_fi->fib_scope != res->scope || 1857 fa->fa_type != RTN_UNICAST) 1858 continue; 1859 if (!next_fi->fib_nh[0].fib_nh_gw4 || 1860 next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK) 1861 continue; 1862 1863 fib_alias_accessed(fa); 1864 1865 if (!fi) { 1866 if (next_fi != res->fi) 1867 break; 1868 fa1 = fa; 1869 } else if (!fib_detect_death(fi, order, &last_resort, 1870 &last_idx, fa1->fa_default)) { 1871 fib_result_assign(res, fi); 1872 fa1->fa_default = order; 1873 goto out; 1874 } 1875 fi = next_fi; 1876 order++; 1877 } 1878 1879 if (order <= 0 || !fi) { 1880 if (fa1) 1881 fa1->fa_default = -1; 1882 goto out; 1883 } 1884 1885 if (!fib_detect_death(fi, order, &last_resort, &last_idx, 1886 fa1->fa_default)) { 1887 fib_result_assign(res, fi); 1888 fa1->fa_default = order; 1889 goto out; 1890 } 1891 1892 if (last_idx >= 0) 1893 fib_result_assign(res, last_resort); 1894 fa1->fa_default = last_idx; 1895 out: 1896 return; 1897 } 1898 1899 /* 1900 * Dead device goes up. We wake up dead nexthops. 1901 * It takes sense only on multipath routes. 1902 */ 1903 int fib_sync_up(struct net_device *dev, unsigned char nh_flags) 1904 { 1905 struct fib_info *prev_fi; 1906 unsigned int hash; 1907 struct hlist_head *head; 1908 struct fib_nh *nh; 1909 int ret; 1910 1911 if (!(dev->flags & IFF_UP)) 1912 return 0; 1913 1914 if (nh_flags & RTNH_F_DEAD) { 1915 unsigned int flags = dev_get_flags(dev); 1916 1917 if (flags & (IFF_RUNNING | IFF_LOWER_UP)) 1918 nh_flags |= RTNH_F_LINKDOWN; 1919 } 1920 1921 prev_fi = NULL; 1922 hash = fib_devindex_hashfn(dev->ifindex); 1923 head = &fib_info_devhash[hash]; 1924 ret = 0; 1925 1926 hlist_for_each_entry(nh, head, nh_hash) { 1927 struct fib_info *fi = nh->nh_parent; 1928 int alive; 1929 1930 BUG_ON(!fi->fib_nhs); 1931 if (nh->fib_nh_dev != dev || fi == prev_fi) 1932 continue; 1933 1934 prev_fi = fi; 1935 alive = 0; 1936 change_nexthops(fi) { 1937 if (!(nexthop_nh->fib_nh_flags & nh_flags)) { 1938 alive++; 1939 continue; 1940 } 1941 if (!nexthop_nh->fib_nh_dev || 1942 !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) 1943 continue; 1944 if (nexthop_nh->fib_nh_dev != dev || 1945 !__in_dev_get_rtnl(dev)) 1946 continue; 1947 alive++; 1948 nexthop_nh->fib_nh_flags &= ~nh_flags; 1949 call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); 1950 } endfor_nexthops(fi) 1951 1952 if (alive > 0) { 1953 fi->fib_flags &= ~nh_flags; 1954 ret++; 1955 } 1956 1957 fib_rebalance(fi); 1958 } 1959 1960 return ret; 1961 } 1962 1963 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1964 static bool fib_good_nh(const struct fib_nh *nh) 1965 { 1966 int state = NUD_REACHABLE; 1967 1968 if (nh->fib_nh_scope == RT_SCOPE_LINK) { 1969 struct neighbour *n; 1970 1971 rcu_read_lock_bh(); 1972 1973 if (likely(nh->fib_nh_gw_family == AF_INET)) 1974 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, 1975 (__force u32)nh->fib_nh_gw4); 1976 else if (nh->fib_nh_gw_family == AF_INET6) 1977 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, 1978 &nh->fib_nh_gw6); 1979 else 1980 n = NULL; 1981 if (n) 1982 state = n->nud_state; 1983 1984 rcu_read_unlock_bh(); 1985 } 1986 1987 return !!(state & NUD_VALID); 1988 } 1989 1990 void fib_select_multipath(struct fib_result *res, int hash) 1991 { 1992 struct fib_info *fi = res->fi; 1993 struct net *net = fi->fib_net; 1994 bool first = false; 1995 1996 change_nexthops(fi) { 1997 if (net->ipv4.sysctl_fib_multipath_use_neigh) { 1998 if (!fib_good_nh(nexthop_nh)) 1999 continue; 2000 if (!first) { 2001 res->nh_sel = nhsel; 2002 res->nhc = &nexthop_nh->nh_common; 2003 first = true; 2004 } 2005 } 2006 2007 if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) 2008 continue; 2009 2010 res->nh_sel = nhsel; 2011 res->nhc = &nexthop_nh->nh_common; 2012 return; 2013 } endfor_nexthops(fi); 2014 } 2015 #endif 2016 2017 void fib_select_path(struct net *net, struct fib_result *res, 2018 struct flowi4 *fl4, const struct sk_buff *skb) 2019 { 2020 if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) 2021 goto check_saddr; 2022 2023 #ifdef CONFIG_IP_ROUTE_MULTIPATH 2024 if (res->fi->fib_nhs > 1) { 2025 int h = fib_multipath_hash(net, fl4, skb, NULL); 2026 2027 fib_select_multipath(res, h); 2028 } 2029 else 2030 #endif 2031 if (!res->prefixlen && 2032 res->table->tb_num_default > 1 && 2033 res->type == RTN_UNICAST) 2034 fib_select_default(fl4, res); 2035 2036 check_saddr: 2037 if (!fl4->saddr) 2038 fl4->saddr = fib_result_prefsrc(net, res); 2039 } 2040