1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: semantics. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <asm/uaccess.h> 17 #include <asm/system.h> 18 #include <linux/bitops.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/jiffies.h> 22 #include <linux/mm.h> 23 #include <linux/string.h> 24 #include <linux/socket.h> 25 #include <linux/sockios.h> 26 #include <linux/errno.h> 27 #include <linux/in.h> 28 #include <linux/inet.h> 29 #include <linux/inetdevice.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/proc_fs.h> 33 #include <linux/skbuff.h> 34 #include <linux/init.h> 35 #include <linux/slab.h> 36 37 #include <net/arp.h> 38 #include <net/ip.h> 39 #include <net/protocol.h> 40 #include <net/route.h> 41 #include <net/tcp.h> 42 #include <net/sock.h> 43 #include <net/ip_fib.h> 44 #include <net/netlink.h> 45 #include <net/nexthop.h> 46 47 #include "fib_lookup.h" 48 49 static DEFINE_SPINLOCK(fib_info_lock); 50 static struct hlist_head *fib_info_hash; 51 static struct hlist_head *fib_info_laddrhash; 52 static unsigned int fib_info_hash_size; 53 static unsigned int fib_info_cnt; 54 55 #define DEVINDEX_HASHBITS 8 56 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 57 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 58 59 #ifdef CONFIG_IP_ROUTE_MULTIPATH 60 61 static DEFINE_SPINLOCK(fib_multipath_lock); 62 63 #define for_nexthops(fi) { \ 64 int nhsel; const struct fib_nh *nh; \ 65 for (nhsel = 0, nh = (fi)->fib_nh; \ 66 nhsel < (fi)->fib_nhs; \ 67 nh++, nhsel++) 68 69 #define change_nexthops(fi) { \ 70 int nhsel; struct fib_nh *nexthop_nh; \ 71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 72 nhsel < (fi)->fib_nhs; \ 73 nexthop_nh++, nhsel++) 74 75 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 76 77 /* Hope, that gcc will optimize it to get rid of dummy loop */ 78 79 #define for_nexthops(fi) { \ 80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ 81 for (nhsel = 0; nhsel < 1; nhsel++) 82 83 #define change_nexthops(fi) { \ 84 int nhsel; \ 85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 86 for (nhsel = 0; nhsel < 1; nhsel++) 87 88 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 89 90 #define endfor_nexthops(fi) } 91 92 93 const struct fib_prop fib_props[RTN_MAX + 1] = { 94 [RTN_UNSPEC] = { 95 .error = 0, 96 .scope = RT_SCOPE_NOWHERE, 97 }, 98 [RTN_UNICAST] = { 99 .error = 0, 100 .scope = RT_SCOPE_UNIVERSE, 101 }, 102 [RTN_LOCAL] = { 103 .error = 0, 104 .scope = RT_SCOPE_HOST, 105 }, 106 [RTN_BROADCAST] = { 107 .error = 0, 108 .scope = RT_SCOPE_LINK, 109 }, 110 [RTN_ANYCAST] = { 111 .error = 0, 112 .scope = RT_SCOPE_LINK, 113 }, 114 [RTN_MULTICAST] = { 115 .error = 0, 116 .scope = RT_SCOPE_UNIVERSE, 117 }, 118 [RTN_BLACKHOLE] = { 119 .error = -EINVAL, 120 .scope = RT_SCOPE_UNIVERSE, 121 }, 122 [RTN_UNREACHABLE] = { 123 .error = -EHOSTUNREACH, 124 .scope = RT_SCOPE_UNIVERSE, 125 }, 126 [RTN_PROHIBIT] = { 127 .error = -EACCES, 128 .scope = RT_SCOPE_UNIVERSE, 129 }, 130 [RTN_THROW] = { 131 .error = -EAGAIN, 132 .scope = RT_SCOPE_UNIVERSE, 133 }, 134 [RTN_NAT] = { 135 .error = -EINVAL, 136 .scope = RT_SCOPE_NOWHERE, 137 }, 138 [RTN_XRESOLVE] = { 139 .error = -EINVAL, 140 .scope = RT_SCOPE_NOWHERE, 141 }, 142 }; 143 144 145 /* Release a nexthop info record */ 146 147 static void free_fib_info_rcu(struct rcu_head *head) 148 { 149 struct fib_info *fi = container_of(head, struct fib_info, rcu); 150 151 if (fi->fib_metrics != (u32 *) dst_default_metrics) 152 kfree(fi->fib_metrics); 153 kfree(fi); 154 } 155 156 void free_fib_info(struct fib_info *fi) 157 { 158 if (fi->fib_dead == 0) { 159 pr_warning("Freeing alive fib_info %p\n", fi); 160 return; 161 } 162 change_nexthops(fi) { 163 if (nexthop_nh->nh_dev) 164 dev_put(nexthop_nh->nh_dev); 165 nexthop_nh->nh_dev = NULL; 166 } endfor_nexthops(fi); 167 fib_info_cnt--; 168 release_net(fi->fib_net); 169 call_rcu(&fi->rcu, free_fib_info_rcu); 170 } 171 172 void fib_release_info(struct fib_info *fi) 173 { 174 spin_lock_bh(&fib_info_lock); 175 if (fi && --fi->fib_treeref == 0) { 176 hlist_del(&fi->fib_hash); 177 if (fi->fib_prefsrc) 178 hlist_del(&fi->fib_lhash); 179 change_nexthops(fi) { 180 if (!nexthop_nh->nh_dev) 181 continue; 182 hlist_del(&nexthop_nh->nh_hash); 183 } endfor_nexthops(fi) 184 fi->fib_dead = 1; 185 fib_info_put(fi); 186 } 187 spin_unlock_bh(&fib_info_lock); 188 } 189 190 static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 191 { 192 const struct fib_nh *onh = ofi->fib_nh; 193 194 for_nexthops(fi) { 195 if (nh->nh_oif != onh->nh_oif || 196 nh->nh_gw != onh->nh_gw || 197 nh->nh_scope != onh->nh_scope || 198 #ifdef CONFIG_IP_ROUTE_MULTIPATH 199 nh->nh_weight != onh->nh_weight || 200 #endif 201 #ifdef CONFIG_IP_ROUTE_CLASSID 202 nh->nh_tclassid != onh->nh_tclassid || 203 #endif 204 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) 205 return -1; 206 onh++; 207 } endfor_nexthops(fi); 208 return 0; 209 } 210 211 static inline unsigned int fib_devindex_hashfn(unsigned int val) 212 { 213 unsigned int mask = DEVINDEX_HASHSIZE - 1; 214 215 return (val ^ 216 (val >> DEVINDEX_HASHBITS) ^ 217 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 218 } 219 220 static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 221 { 222 unsigned int mask = (fib_info_hash_size - 1); 223 unsigned int val = fi->fib_nhs; 224 225 val ^= fi->fib_protocol; 226 val ^= (__force u32)fi->fib_prefsrc; 227 val ^= fi->fib_priority; 228 for_nexthops(fi) { 229 val ^= fib_devindex_hashfn(nh->nh_oif); 230 } endfor_nexthops(fi) 231 232 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 233 } 234 235 static struct fib_info *fib_find_info(const struct fib_info *nfi) 236 { 237 struct hlist_head *head; 238 struct hlist_node *node; 239 struct fib_info *fi; 240 unsigned int hash; 241 242 hash = fib_info_hashfn(nfi); 243 head = &fib_info_hash[hash]; 244 245 hlist_for_each_entry(fi, node, head, fib_hash) { 246 if (!net_eq(fi->fib_net, nfi->fib_net)) 247 continue; 248 if (fi->fib_nhs != nfi->fib_nhs) 249 continue; 250 if (nfi->fib_protocol == fi->fib_protocol && 251 nfi->fib_prefsrc == fi->fib_prefsrc && 252 nfi->fib_priority == fi->fib_priority && 253 memcmp(nfi->fib_metrics, fi->fib_metrics, 254 sizeof(fi->fib_metrics)) == 0 && 255 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && 256 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 257 return fi; 258 } 259 260 return NULL; 261 } 262 263 /* Check, that the gateway is already configured. 264 * Used only by redirect accept routine. 265 */ 266 int ip_fib_check_default(__be32 gw, struct net_device *dev) 267 { 268 struct hlist_head *head; 269 struct hlist_node *node; 270 struct fib_nh *nh; 271 unsigned int hash; 272 273 spin_lock(&fib_info_lock); 274 275 hash = fib_devindex_hashfn(dev->ifindex); 276 head = &fib_info_devhash[hash]; 277 hlist_for_each_entry(nh, node, head, nh_hash) { 278 if (nh->nh_dev == dev && 279 nh->nh_gw == gw && 280 !(nh->nh_flags & RTNH_F_DEAD)) { 281 spin_unlock(&fib_info_lock); 282 return 0; 283 } 284 } 285 286 spin_unlock(&fib_info_lock); 287 288 return -1; 289 } 290 291 static inline size_t fib_nlmsg_size(struct fib_info *fi) 292 { 293 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 294 + nla_total_size(4) /* RTA_TABLE */ 295 + nla_total_size(4) /* RTA_DST */ 296 + nla_total_size(4) /* RTA_PRIORITY */ 297 + nla_total_size(4); /* RTA_PREFSRC */ 298 299 /* space for nested metrics */ 300 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 301 302 if (fi->fib_nhs) { 303 /* Also handles the special case fib_nhs == 1 */ 304 305 /* each nexthop is packed in an attribute */ 306 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 307 308 /* may contain flow and gateway attribute */ 309 nhsize += 2 * nla_total_size(4); 310 311 /* all nexthops are packed in a nested attribute */ 312 payload += nla_total_size(fi->fib_nhs * nhsize); 313 } 314 315 return payload; 316 } 317 318 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 319 int dst_len, u32 tb_id, struct nl_info *info, 320 unsigned int nlm_flags) 321 { 322 struct sk_buff *skb; 323 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 324 int err = -ENOBUFS; 325 326 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 327 if (skb == NULL) 328 goto errout; 329 330 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 331 fa->fa_type, fa->fa_scope, key, dst_len, 332 fa->fa_tos, fa->fa_info, nlm_flags); 333 if (err < 0) { 334 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 335 WARN_ON(err == -EMSGSIZE); 336 kfree_skb(skb); 337 goto errout; 338 } 339 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 340 info->nlh, GFP_KERNEL); 341 return; 342 errout: 343 if (err < 0) 344 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 345 } 346 347 /* Return the first fib alias matching TOS with 348 * priority less than or equal to PRIO. 349 */ 350 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 351 { 352 if (fah) { 353 struct fib_alias *fa; 354 list_for_each_entry(fa, fah, fa_list) { 355 if (fa->fa_tos > tos) 356 continue; 357 if (fa->fa_info->fib_priority >= prio || 358 fa->fa_tos < tos) 359 return fa; 360 } 361 } 362 return NULL; 363 } 364 365 int fib_detect_death(struct fib_info *fi, int order, 366 struct fib_info **last_resort, int *last_idx, int dflt) 367 { 368 struct neighbour *n; 369 int state = NUD_NONE; 370 371 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 372 if (n) { 373 state = n->nud_state; 374 neigh_release(n); 375 } 376 if (state == NUD_REACHABLE) 377 return 0; 378 if ((state & NUD_VALID) && order != dflt) 379 return 0; 380 if ((state & NUD_VALID) || 381 (*last_idx < 0 && order > dflt)) { 382 *last_resort = fi; 383 *last_idx = order; 384 } 385 return 1; 386 } 387 388 #ifdef CONFIG_IP_ROUTE_MULTIPATH 389 390 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 391 { 392 int nhs = 0; 393 394 while (rtnh_ok(rtnh, remaining)) { 395 nhs++; 396 rtnh = rtnh_next(rtnh, &remaining); 397 } 398 399 /* leftover implies invalid nexthop configuration, discard it */ 400 return remaining > 0 ? 0 : nhs; 401 } 402 403 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 404 int remaining, struct fib_config *cfg) 405 { 406 change_nexthops(fi) { 407 int attrlen; 408 409 if (!rtnh_ok(rtnh, remaining)) 410 return -EINVAL; 411 412 nexthop_nh->nh_flags = 413 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 414 nexthop_nh->nh_oif = rtnh->rtnh_ifindex; 415 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; 416 417 attrlen = rtnh_attrlen(rtnh); 418 if (attrlen > 0) { 419 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 420 421 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 422 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0; 423 #ifdef CONFIG_IP_ROUTE_CLASSID 424 nla = nla_find(attrs, attrlen, RTA_FLOW); 425 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 426 #endif 427 } 428 429 rtnh = rtnh_next(rtnh, &remaining); 430 } endfor_nexthops(fi); 431 432 return 0; 433 } 434 435 #endif 436 437 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 438 { 439 #ifdef CONFIG_IP_ROUTE_MULTIPATH 440 struct rtnexthop *rtnh; 441 int remaining; 442 #endif 443 444 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 445 return 1; 446 447 if (cfg->fc_oif || cfg->fc_gw) { 448 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 449 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 450 return 0; 451 return 1; 452 } 453 454 #ifdef CONFIG_IP_ROUTE_MULTIPATH 455 if (cfg->fc_mp == NULL) 456 return 0; 457 458 rtnh = cfg->fc_mp; 459 remaining = cfg->fc_mp_len; 460 461 for_nexthops(fi) { 462 int attrlen; 463 464 if (!rtnh_ok(rtnh, remaining)) 465 return -EINVAL; 466 467 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 468 return 1; 469 470 attrlen = rtnh_attrlen(rtnh); 471 if (attrlen < 0) { 472 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 473 474 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 475 if (nla && nla_get_be32(nla) != nh->nh_gw) 476 return 1; 477 #ifdef CONFIG_IP_ROUTE_CLASSID 478 nla = nla_find(attrs, attrlen, RTA_FLOW); 479 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 480 return 1; 481 #endif 482 } 483 484 rtnh = rtnh_next(rtnh, &remaining); 485 } endfor_nexthops(fi); 486 #endif 487 return 0; 488 } 489 490 491 /* 492 * Picture 493 * ------- 494 * 495 * Semantics of nexthop is very messy by historical reasons. 496 * We have to take into account, that: 497 * a) gateway can be actually local interface address, 498 * so that gatewayed route is direct. 499 * b) gateway must be on-link address, possibly 500 * described not by an ifaddr, but also by a direct route. 501 * c) If both gateway and interface are specified, they should not 502 * contradict. 503 * d) If we use tunnel routes, gateway could be not on-link. 504 * 505 * Attempt to reconcile all of these (alas, self-contradictory) conditions 506 * results in pretty ugly and hairy code with obscure logic. 507 * 508 * I chose to generalized it instead, so that the size 509 * of code does not increase practically, but it becomes 510 * much more general. 511 * Every prefix is assigned a "scope" value: "host" is local address, 512 * "link" is direct route, 513 * [ ... "site" ... "interior" ... ] 514 * and "universe" is true gateway route with global meaning. 515 * 516 * Every prefix refers to a set of "nexthop"s (gw, oif), 517 * where gw must have narrower scope. This recursion stops 518 * when gw has LOCAL scope or if "nexthop" is declared ONLINK, 519 * which means that gw is forced to be on link. 520 * 521 * Code is still hairy, but now it is apparently logically 522 * consistent and very flexible. F.e. as by-product it allows 523 * to co-exists in peace independent exterior and interior 524 * routing processes. 525 * 526 * Normally it looks as following. 527 * 528 * {universe prefix} -> (gw, oif) [scope link] 529 * | 530 * |-> {link prefix} -> (gw, oif) [scope local] 531 * | 532 * |-> {local prefix} (terminal node) 533 */ 534 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 535 struct fib_nh *nh) 536 { 537 int err; 538 struct net *net; 539 struct net_device *dev; 540 541 net = cfg->fc_nlinfo.nl_net; 542 if (nh->nh_gw) { 543 struct fib_result res; 544 545 if (nh->nh_flags & RTNH_F_ONLINK) { 546 547 if (cfg->fc_scope >= RT_SCOPE_LINK) 548 return -EINVAL; 549 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 550 return -EINVAL; 551 dev = __dev_get_by_index(net, nh->nh_oif); 552 if (!dev) 553 return -ENODEV; 554 if (!(dev->flags & IFF_UP)) 555 return -ENETDOWN; 556 nh->nh_dev = dev; 557 dev_hold(dev); 558 nh->nh_scope = RT_SCOPE_LINK; 559 return 0; 560 } 561 rcu_read_lock(); 562 { 563 struct flowi4 fl4 = { 564 .daddr = nh->nh_gw, 565 .flowi4_scope = cfg->fc_scope + 1, 566 .flowi4_oif = nh->nh_oif, 567 }; 568 569 /* It is not necessary, but requires a bit of thinking */ 570 if (fl4.flowi4_scope < RT_SCOPE_LINK) 571 fl4.flowi4_scope = RT_SCOPE_LINK; 572 err = fib_lookup(net, &fl4, &res); 573 if (err) { 574 rcu_read_unlock(); 575 return err; 576 } 577 } 578 err = -EINVAL; 579 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 580 goto out; 581 nh->nh_scope = res.scope; 582 nh->nh_oif = FIB_RES_OIF(res); 583 nh->nh_dev = dev = FIB_RES_DEV(res); 584 if (!dev) 585 goto out; 586 dev_hold(dev); 587 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; 588 } else { 589 struct in_device *in_dev; 590 591 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) 592 return -EINVAL; 593 594 rcu_read_lock(); 595 err = -ENODEV; 596 in_dev = inetdev_by_index(net, nh->nh_oif); 597 if (in_dev == NULL) 598 goto out; 599 err = -ENETDOWN; 600 if (!(in_dev->dev->flags & IFF_UP)) 601 goto out; 602 nh->nh_dev = in_dev->dev; 603 dev_hold(nh->nh_dev); 604 nh->nh_scope = RT_SCOPE_HOST; 605 err = 0; 606 } 607 out: 608 rcu_read_unlock(); 609 return err; 610 } 611 612 static inline unsigned int fib_laddr_hashfn(__be32 val) 613 { 614 unsigned int mask = (fib_info_hash_size - 1); 615 616 return ((__force u32)val ^ 617 ((__force u32)val >> 7) ^ 618 ((__force u32)val >> 14)) & mask; 619 } 620 621 static struct hlist_head *fib_info_hash_alloc(int bytes) 622 { 623 if (bytes <= PAGE_SIZE) 624 return kzalloc(bytes, GFP_KERNEL); 625 else 626 return (struct hlist_head *) 627 __get_free_pages(GFP_KERNEL | __GFP_ZERO, 628 get_order(bytes)); 629 } 630 631 static void fib_info_hash_free(struct hlist_head *hash, int bytes) 632 { 633 if (!hash) 634 return; 635 636 if (bytes <= PAGE_SIZE) 637 kfree(hash); 638 else 639 free_pages((unsigned long) hash, get_order(bytes)); 640 } 641 642 static void fib_info_hash_move(struct hlist_head *new_info_hash, 643 struct hlist_head *new_laddrhash, 644 unsigned int new_size) 645 { 646 struct hlist_head *old_info_hash, *old_laddrhash; 647 unsigned int old_size = fib_info_hash_size; 648 unsigned int i, bytes; 649 650 spin_lock_bh(&fib_info_lock); 651 old_info_hash = fib_info_hash; 652 old_laddrhash = fib_info_laddrhash; 653 fib_info_hash_size = new_size; 654 655 for (i = 0; i < old_size; i++) { 656 struct hlist_head *head = &fib_info_hash[i]; 657 struct hlist_node *node, *n; 658 struct fib_info *fi; 659 660 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 661 struct hlist_head *dest; 662 unsigned int new_hash; 663 664 hlist_del(&fi->fib_hash); 665 666 new_hash = fib_info_hashfn(fi); 667 dest = &new_info_hash[new_hash]; 668 hlist_add_head(&fi->fib_hash, dest); 669 } 670 } 671 fib_info_hash = new_info_hash; 672 673 for (i = 0; i < old_size; i++) { 674 struct hlist_head *lhead = &fib_info_laddrhash[i]; 675 struct hlist_node *node, *n; 676 struct fib_info *fi; 677 678 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 679 struct hlist_head *ldest; 680 unsigned int new_hash; 681 682 hlist_del(&fi->fib_lhash); 683 684 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 685 ldest = &new_laddrhash[new_hash]; 686 hlist_add_head(&fi->fib_lhash, ldest); 687 } 688 } 689 fib_info_laddrhash = new_laddrhash; 690 691 spin_unlock_bh(&fib_info_lock); 692 693 bytes = old_size * sizeof(struct hlist_head *); 694 fib_info_hash_free(old_info_hash, bytes); 695 fib_info_hash_free(old_laddrhash, bytes); 696 } 697 698 struct fib_info *fib_create_info(struct fib_config *cfg) 699 { 700 int err; 701 struct fib_info *fi = NULL; 702 struct fib_info *ofi; 703 int nhs = 1; 704 struct net *net = cfg->fc_nlinfo.nl_net; 705 706 if (cfg->fc_type > RTN_MAX) 707 goto err_inval; 708 709 /* Fast check to catch the most weird cases */ 710 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 711 goto err_inval; 712 713 #ifdef CONFIG_IP_ROUTE_MULTIPATH 714 if (cfg->fc_mp) { 715 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 716 if (nhs == 0) 717 goto err_inval; 718 } 719 #endif 720 721 err = -ENOBUFS; 722 if (fib_info_cnt >= fib_info_hash_size) { 723 unsigned int new_size = fib_info_hash_size << 1; 724 struct hlist_head *new_info_hash; 725 struct hlist_head *new_laddrhash; 726 unsigned int bytes; 727 728 if (!new_size) 729 new_size = 1; 730 bytes = new_size * sizeof(struct hlist_head *); 731 new_info_hash = fib_info_hash_alloc(bytes); 732 new_laddrhash = fib_info_hash_alloc(bytes); 733 if (!new_info_hash || !new_laddrhash) { 734 fib_info_hash_free(new_info_hash, bytes); 735 fib_info_hash_free(new_laddrhash, bytes); 736 } else 737 fib_info_hash_move(new_info_hash, new_laddrhash, new_size); 738 739 if (!fib_info_hash_size) 740 goto failure; 741 } 742 743 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 744 if (fi == NULL) 745 goto failure; 746 if (cfg->fc_mx) { 747 fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 748 if (!fi->fib_metrics) 749 goto failure; 750 } else 751 fi->fib_metrics = (u32 *) dst_default_metrics; 752 fib_info_cnt++; 753 754 fi->fib_net = hold_net(net); 755 fi->fib_protocol = cfg->fc_protocol; 756 fi->fib_flags = cfg->fc_flags; 757 fi->fib_priority = cfg->fc_priority; 758 fi->fib_prefsrc = cfg->fc_prefsrc; 759 760 fi->fib_nhs = nhs; 761 change_nexthops(fi) { 762 nexthop_nh->nh_parent = fi; 763 } endfor_nexthops(fi) 764 765 if (cfg->fc_mx) { 766 struct nlattr *nla; 767 int remaining; 768 769 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 770 int type = nla_type(nla); 771 772 if (type) { 773 if (type > RTAX_MAX) 774 goto err_inval; 775 fi->fib_metrics[type - 1] = nla_get_u32(nla); 776 } 777 } 778 } 779 780 if (cfg->fc_mp) { 781 #ifdef CONFIG_IP_ROUTE_MULTIPATH 782 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 783 if (err != 0) 784 goto failure; 785 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 786 goto err_inval; 787 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 788 goto err_inval; 789 #ifdef CONFIG_IP_ROUTE_CLASSID 790 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 791 goto err_inval; 792 #endif 793 #else 794 goto err_inval; 795 #endif 796 } else { 797 struct fib_nh *nh = fi->fib_nh; 798 799 nh->nh_oif = cfg->fc_oif; 800 nh->nh_gw = cfg->fc_gw; 801 nh->nh_flags = cfg->fc_flags; 802 #ifdef CONFIG_IP_ROUTE_CLASSID 803 nh->nh_tclassid = cfg->fc_flow; 804 #endif 805 #ifdef CONFIG_IP_ROUTE_MULTIPATH 806 nh->nh_weight = 1; 807 #endif 808 } 809 810 if (fib_props[cfg->fc_type].error) { 811 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 812 goto err_inval; 813 goto link_it; 814 } else { 815 switch (cfg->fc_type) { 816 case RTN_UNICAST: 817 case RTN_LOCAL: 818 case RTN_BROADCAST: 819 case RTN_ANYCAST: 820 case RTN_MULTICAST: 821 break; 822 default: 823 goto err_inval; 824 } 825 } 826 827 if (cfg->fc_scope > RT_SCOPE_HOST) 828 goto err_inval; 829 830 if (cfg->fc_scope == RT_SCOPE_HOST) { 831 struct fib_nh *nh = fi->fib_nh; 832 833 /* Local address is added. */ 834 if (nhs != 1 || nh->nh_gw) 835 goto err_inval; 836 nh->nh_scope = RT_SCOPE_NOWHERE; 837 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 838 err = -ENODEV; 839 if (nh->nh_dev == NULL) 840 goto failure; 841 } else { 842 change_nexthops(fi) { 843 err = fib_check_nh(cfg, fi, nexthop_nh); 844 if (err != 0) 845 goto failure; 846 } endfor_nexthops(fi) 847 } 848 849 if (fi->fib_prefsrc) { 850 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 851 fi->fib_prefsrc != cfg->fc_dst) 852 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) 853 goto err_inval; 854 } 855 856 change_nexthops(fi) { 857 nexthop_nh->nh_cfg_scope = cfg->fc_scope; 858 nexthop_nh->nh_saddr = inet_select_addr(nexthop_nh->nh_dev, 859 nexthop_nh->nh_gw, 860 nexthop_nh->nh_cfg_scope); 861 } endfor_nexthops(fi) 862 863 link_it: 864 ofi = fib_find_info(fi); 865 if (ofi) { 866 fi->fib_dead = 1; 867 free_fib_info(fi); 868 ofi->fib_treeref++; 869 return ofi; 870 } 871 872 fi->fib_treeref++; 873 atomic_inc(&fi->fib_clntref); 874 spin_lock_bh(&fib_info_lock); 875 hlist_add_head(&fi->fib_hash, 876 &fib_info_hash[fib_info_hashfn(fi)]); 877 if (fi->fib_prefsrc) { 878 struct hlist_head *head; 879 880 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 881 hlist_add_head(&fi->fib_lhash, head); 882 } 883 change_nexthops(fi) { 884 struct hlist_head *head; 885 unsigned int hash; 886 887 if (!nexthop_nh->nh_dev) 888 continue; 889 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); 890 head = &fib_info_devhash[hash]; 891 hlist_add_head(&nexthop_nh->nh_hash, head); 892 } endfor_nexthops(fi) 893 spin_unlock_bh(&fib_info_lock); 894 return fi; 895 896 err_inval: 897 err = -EINVAL; 898 899 failure: 900 if (fi) { 901 fi->fib_dead = 1; 902 free_fib_info(fi); 903 } 904 905 return ERR_PTR(err); 906 } 907 908 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 909 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 910 struct fib_info *fi, unsigned int flags) 911 { 912 struct nlmsghdr *nlh; 913 struct rtmsg *rtm; 914 915 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 916 if (nlh == NULL) 917 return -EMSGSIZE; 918 919 rtm = nlmsg_data(nlh); 920 rtm->rtm_family = AF_INET; 921 rtm->rtm_dst_len = dst_len; 922 rtm->rtm_src_len = 0; 923 rtm->rtm_tos = tos; 924 if (tb_id < 256) 925 rtm->rtm_table = tb_id; 926 else 927 rtm->rtm_table = RT_TABLE_COMPAT; 928 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 929 rtm->rtm_type = type; 930 rtm->rtm_flags = fi->fib_flags; 931 rtm->rtm_scope = scope; 932 rtm->rtm_protocol = fi->fib_protocol; 933 934 if (rtm->rtm_dst_len) 935 NLA_PUT_BE32(skb, RTA_DST, dst); 936 937 if (fi->fib_priority) 938 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 939 940 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 941 goto nla_put_failure; 942 943 if (fi->fib_prefsrc) 944 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 945 946 if (fi->fib_nhs == 1) { 947 if (fi->fib_nh->nh_gw) 948 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 949 950 if (fi->fib_nh->nh_oif) 951 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 952 #ifdef CONFIG_IP_ROUTE_CLASSID 953 if (fi->fib_nh[0].nh_tclassid) 954 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 955 #endif 956 } 957 #ifdef CONFIG_IP_ROUTE_MULTIPATH 958 if (fi->fib_nhs > 1) { 959 struct rtnexthop *rtnh; 960 struct nlattr *mp; 961 962 mp = nla_nest_start(skb, RTA_MULTIPATH); 963 if (mp == NULL) 964 goto nla_put_failure; 965 966 for_nexthops(fi) { 967 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 968 if (rtnh == NULL) 969 goto nla_put_failure; 970 971 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 972 rtnh->rtnh_hops = nh->nh_weight - 1; 973 rtnh->rtnh_ifindex = nh->nh_oif; 974 975 if (nh->nh_gw) 976 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 977 #ifdef CONFIG_IP_ROUTE_CLASSID 978 if (nh->nh_tclassid) 979 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 980 #endif 981 /* length of rtnetlink header + attributes */ 982 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 983 } endfor_nexthops(fi); 984 985 nla_nest_end(skb, mp); 986 } 987 #endif 988 return nlmsg_end(skb, nlh); 989 990 nla_put_failure: 991 nlmsg_cancel(skb, nlh); 992 return -EMSGSIZE; 993 } 994 995 /* 996 * Update FIB if: 997 * - local address disappeared -> we must delete all the entries 998 * referring to it. 999 * - device went down -> we must shutdown all nexthops going via it. 1000 */ 1001 int fib_sync_down_addr(struct net *net, __be32 local) 1002 { 1003 int ret = 0; 1004 unsigned int hash = fib_laddr_hashfn(local); 1005 struct hlist_head *head = &fib_info_laddrhash[hash]; 1006 struct hlist_node *node; 1007 struct fib_info *fi; 1008 1009 if (fib_info_laddrhash == NULL || local == 0) 1010 return 0; 1011 1012 hlist_for_each_entry(fi, node, head, fib_lhash) { 1013 if (!net_eq(fi->fib_net, net)) 1014 continue; 1015 if (fi->fib_prefsrc == local) { 1016 fi->fib_flags |= RTNH_F_DEAD; 1017 ret++; 1018 } 1019 } 1020 return ret; 1021 } 1022 1023 int fib_sync_down_dev(struct net_device *dev, int force) 1024 { 1025 int ret = 0; 1026 int scope = RT_SCOPE_NOWHERE; 1027 struct fib_info *prev_fi = NULL; 1028 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1029 struct hlist_head *head = &fib_info_devhash[hash]; 1030 struct hlist_node *node; 1031 struct fib_nh *nh; 1032 1033 if (force) 1034 scope = -1; 1035 1036 hlist_for_each_entry(nh, node, head, nh_hash) { 1037 struct fib_info *fi = nh->nh_parent; 1038 int dead; 1039 1040 BUG_ON(!fi->fib_nhs); 1041 if (nh->nh_dev != dev || fi == prev_fi) 1042 continue; 1043 prev_fi = fi; 1044 dead = 0; 1045 change_nexthops(fi) { 1046 if (nexthop_nh->nh_flags & RTNH_F_DEAD) 1047 dead++; 1048 else if (nexthop_nh->nh_dev == dev && 1049 nexthop_nh->nh_scope != scope) { 1050 nexthop_nh->nh_flags |= RTNH_F_DEAD; 1051 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1052 spin_lock_bh(&fib_multipath_lock); 1053 fi->fib_power -= nexthop_nh->nh_power; 1054 nexthop_nh->nh_power = 0; 1055 spin_unlock_bh(&fib_multipath_lock); 1056 #endif 1057 dead++; 1058 } 1059 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1060 if (force > 1 && nexthop_nh->nh_dev == dev) { 1061 dead = fi->fib_nhs; 1062 break; 1063 } 1064 #endif 1065 } endfor_nexthops(fi) 1066 if (dead == fi->fib_nhs) { 1067 fi->fib_flags |= RTNH_F_DEAD; 1068 ret++; 1069 } 1070 } 1071 1072 return ret; 1073 } 1074 1075 /* Must be invoked inside of an RCU protected region. */ 1076 void fib_select_default(struct fib_result *res) 1077 { 1078 struct fib_info *fi = NULL, *last_resort = NULL; 1079 struct list_head *fa_head = res->fa_head; 1080 struct fib_table *tb = res->table; 1081 int order = -1, last_idx = -1; 1082 struct fib_alias *fa; 1083 1084 list_for_each_entry_rcu(fa, fa_head, fa_list) { 1085 struct fib_info *next_fi = fa->fa_info; 1086 1087 if (fa->fa_scope != res->scope || 1088 fa->fa_type != RTN_UNICAST) 1089 continue; 1090 1091 if (next_fi->fib_priority > res->fi->fib_priority) 1092 break; 1093 if (!next_fi->fib_nh[0].nh_gw || 1094 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 1095 continue; 1096 1097 fib_alias_accessed(fa); 1098 1099 if (fi == NULL) { 1100 if (next_fi != res->fi) 1101 break; 1102 } else if (!fib_detect_death(fi, order, &last_resort, 1103 &last_idx, tb->tb_default)) { 1104 fib_result_assign(res, fi); 1105 tb->tb_default = order; 1106 goto out; 1107 } 1108 fi = next_fi; 1109 order++; 1110 } 1111 1112 if (order <= 0 || fi == NULL) { 1113 tb->tb_default = -1; 1114 goto out; 1115 } 1116 1117 if (!fib_detect_death(fi, order, &last_resort, &last_idx, 1118 tb->tb_default)) { 1119 fib_result_assign(res, fi); 1120 tb->tb_default = order; 1121 goto out; 1122 } 1123 1124 if (last_idx >= 0) 1125 fib_result_assign(res, last_resort); 1126 tb->tb_default = last_idx; 1127 out: 1128 return; 1129 } 1130 1131 void fib_update_nh_saddrs(struct net_device *dev) 1132 { 1133 struct hlist_head *head; 1134 struct hlist_node *node; 1135 struct fib_nh *nh; 1136 unsigned int hash; 1137 1138 hash = fib_devindex_hashfn(dev->ifindex); 1139 head = &fib_info_devhash[hash]; 1140 hlist_for_each_entry(nh, node, head, nh_hash) { 1141 if (nh->nh_dev != dev) 1142 continue; 1143 nh->nh_saddr = inet_select_addr(nh->nh_dev, 1144 nh->nh_gw, 1145 nh->nh_cfg_scope); 1146 } 1147 } 1148 1149 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1150 1151 /* 1152 * Dead device goes up. We wake up dead nexthops. 1153 * It takes sense only on multipath routes. 1154 */ 1155 int fib_sync_up(struct net_device *dev) 1156 { 1157 struct fib_info *prev_fi; 1158 unsigned int hash; 1159 struct hlist_head *head; 1160 struct hlist_node *node; 1161 struct fib_nh *nh; 1162 int ret; 1163 1164 if (!(dev->flags & IFF_UP)) 1165 return 0; 1166 1167 prev_fi = NULL; 1168 hash = fib_devindex_hashfn(dev->ifindex); 1169 head = &fib_info_devhash[hash]; 1170 ret = 0; 1171 1172 hlist_for_each_entry(nh, node, head, nh_hash) { 1173 struct fib_info *fi = nh->nh_parent; 1174 int alive; 1175 1176 BUG_ON(!fi->fib_nhs); 1177 if (nh->nh_dev != dev || fi == prev_fi) 1178 continue; 1179 1180 prev_fi = fi; 1181 alive = 0; 1182 change_nexthops(fi) { 1183 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { 1184 alive++; 1185 continue; 1186 } 1187 if (nexthop_nh->nh_dev == NULL || 1188 !(nexthop_nh->nh_dev->flags & IFF_UP)) 1189 continue; 1190 if (nexthop_nh->nh_dev != dev || 1191 !__in_dev_get_rtnl(dev)) 1192 continue; 1193 alive++; 1194 spin_lock_bh(&fib_multipath_lock); 1195 nexthop_nh->nh_power = 0; 1196 nexthop_nh->nh_flags &= ~RTNH_F_DEAD; 1197 spin_unlock_bh(&fib_multipath_lock); 1198 } endfor_nexthops(fi) 1199 1200 if (alive > 0) { 1201 fi->fib_flags &= ~RTNH_F_DEAD; 1202 ret++; 1203 } 1204 } 1205 1206 return ret; 1207 } 1208 1209 /* 1210 * The algorithm is suboptimal, but it provides really 1211 * fair weighted route distribution. 1212 */ 1213 void fib_select_multipath(struct fib_result *res) 1214 { 1215 struct fib_info *fi = res->fi; 1216 int w; 1217 1218 spin_lock_bh(&fib_multipath_lock); 1219 if (fi->fib_power <= 0) { 1220 int power = 0; 1221 change_nexthops(fi) { 1222 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { 1223 power += nexthop_nh->nh_weight; 1224 nexthop_nh->nh_power = nexthop_nh->nh_weight; 1225 } 1226 } endfor_nexthops(fi); 1227 fi->fib_power = power; 1228 if (power <= 0) { 1229 spin_unlock_bh(&fib_multipath_lock); 1230 /* Race condition: route has just become dead. */ 1231 res->nh_sel = 0; 1232 return; 1233 } 1234 } 1235 1236 1237 /* w should be random number [0..fi->fib_power-1], 1238 * it is pretty bad approximation. 1239 */ 1240 1241 w = jiffies % fi->fib_power; 1242 1243 change_nexthops(fi) { 1244 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && 1245 nexthop_nh->nh_power) { 1246 w -= nexthop_nh->nh_power; 1247 if (w <= 0) { 1248 nexthop_nh->nh_power--; 1249 fi->fib_power--; 1250 res->nh_sel = nhsel; 1251 spin_unlock_bh(&fib_multipath_lock); 1252 return; 1253 } 1254 } 1255 } endfor_nexthops(fi); 1256 1257 /* Race condition: route has just become dead. */ 1258 res->nh_sel = 0; 1259 spin_unlock_bh(&fib_multipath_lock); 1260 } 1261 #endif 1262