1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: semantics. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 #include <asm/uaccess.h> 17 #include <asm/system.h> 18 #include <linux/bitops.h> 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/jiffies.h> 22 #include <linux/mm.h> 23 #include <linux/string.h> 24 #include <linux/socket.h> 25 #include <linux/sockios.h> 26 #include <linux/errno.h> 27 #include <linux/in.h> 28 #include <linux/inet.h> 29 #include <linux/inetdevice.h> 30 #include <linux/netdevice.h> 31 #include <linux/if_arp.h> 32 #include <linux/proc_fs.h> 33 #include <linux/skbuff.h> 34 #include <linux/init.h> 35 36 #include <net/arp.h> 37 #include <net/ip.h> 38 #include <net/protocol.h> 39 #include <net/route.h> 40 #include <net/tcp.h> 41 #include <net/sock.h> 42 #include <net/ip_fib.h> 43 #include <net/netlink.h> 44 #include <net/nexthop.h> 45 46 #include "fib_lookup.h" 47 48 static DEFINE_SPINLOCK(fib_info_lock); 49 static struct hlist_head *fib_info_hash; 50 static struct hlist_head *fib_info_laddrhash; 51 static unsigned int fib_hash_size; 52 static unsigned int fib_info_cnt; 53 54 #define DEVINDEX_HASHBITS 8 55 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 56 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 57 58 #ifdef CONFIG_IP_ROUTE_MULTIPATH 59 60 static DEFINE_SPINLOCK(fib_multipath_lock); 61 62 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 63 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 65 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 66 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 67 68 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 69 70 /* Hope, that gcc will optimize it to get rid of dummy loop */ 71 72 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 73 for (nhsel=0; nhsel < 1; nhsel++) 74 75 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 76 for (nhsel=0; nhsel < 1; nhsel++) 77 78 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 79 80 #define endfor_nexthops(fi) } 81 82 83 static const struct 84 { 85 int error; 86 u8 scope; 87 } fib_props[RTN_MAX + 1] = { 88 { 89 .error = 0, 90 .scope = RT_SCOPE_NOWHERE, 91 }, /* RTN_UNSPEC */ 92 { 93 .error = 0, 94 .scope = RT_SCOPE_UNIVERSE, 95 }, /* RTN_UNICAST */ 96 { 97 .error = 0, 98 .scope = RT_SCOPE_HOST, 99 }, /* RTN_LOCAL */ 100 { 101 .error = 0, 102 .scope = RT_SCOPE_LINK, 103 }, /* RTN_BROADCAST */ 104 { 105 .error = 0, 106 .scope = RT_SCOPE_LINK, 107 }, /* RTN_ANYCAST */ 108 { 109 .error = 0, 110 .scope = RT_SCOPE_UNIVERSE, 111 }, /* RTN_MULTICAST */ 112 { 113 .error = -EINVAL, 114 .scope = RT_SCOPE_UNIVERSE, 115 }, /* RTN_BLACKHOLE */ 116 { 117 .error = -EHOSTUNREACH, 118 .scope = RT_SCOPE_UNIVERSE, 119 }, /* RTN_UNREACHABLE */ 120 { 121 .error = -EACCES, 122 .scope = RT_SCOPE_UNIVERSE, 123 }, /* RTN_PROHIBIT */ 124 { 125 .error = -EAGAIN, 126 .scope = RT_SCOPE_UNIVERSE, 127 }, /* RTN_THROW */ 128 { 129 .error = -EINVAL, 130 .scope = RT_SCOPE_NOWHERE, 131 }, /* RTN_NAT */ 132 { 133 .error = -EINVAL, 134 .scope = RT_SCOPE_NOWHERE, 135 }, /* RTN_XRESOLVE */ 136 }; 137 138 139 /* Release a nexthop info record */ 140 141 void free_fib_info(struct fib_info *fi) 142 { 143 if (fi->fib_dead == 0) { 144 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); 145 return; 146 } 147 change_nexthops(fi) { 148 if (nh->nh_dev) 149 dev_put(nh->nh_dev); 150 nh->nh_dev = NULL; 151 } endfor_nexthops(fi); 152 fib_info_cnt--; 153 release_net(fi->fib_net); 154 kfree(fi); 155 } 156 157 void fib_release_info(struct fib_info *fi) 158 { 159 spin_lock_bh(&fib_info_lock); 160 if (fi && --fi->fib_treeref == 0) { 161 hlist_del(&fi->fib_hash); 162 if (fi->fib_prefsrc) 163 hlist_del(&fi->fib_lhash); 164 change_nexthops(fi) { 165 if (!nh->nh_dev) 166 continue; 167 hlist_del(&nh->nh_hash); 168 } endfor_nexthops(fi) 169 fi->fib_dead = 1; 170 fib_info_put(fi); 171 } 172 spin_unlock_bh(&fib_info_lock); 173 } 174 175 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 176 { 177 const struct fib_nh *onh = ofi->fib_nh; 178 179 for_nexthops(fi) { 180 if (nh->nh_oif != onh->nh_oif || 181 nh->nh_gw != onh->nh_gw || 182 nh->nh_scope != onh->nh_scope || 183 #ifdef CONFIG_IP_ROUTE_MULTIPATH 184 nh->nh_weight != onh->nh_weight || 185 #endif 186 #ifdef CONFIG_NET_CLS_ROUTE 187 nh->nh_tclassid != onh->nh_tclassid || 188 #endif 189 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 190 return -1; 191 onh++; 192 } endfor_nexthops(fi); 193 return 0; 194 } 195 196 static inline unsigned int fib_devindex_hashfn(unsigned int val) 197 { 198 unsigned int mask = DEVINDEX_HASHSIZE - 1; 199 200 return (val ^ 201 (val >> DEVINDEX_HASHBITS) ^ 202 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 203 } 204 205 static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 206 { 207 unsigned int mask = (fib_hash_size - 1); 208 unsigned int val = fi->fib_nhs; 209 210 val ^= fi->fib_protocol; 211 val ^= (__force u32)fi->fib_prefsrc; 212 val ^= fi->fib_priority; 213 for_nexthops(fi) { 214 val ^= fib_devindex_hashfn(nh->nh_oif); 215 } endfor_nexthops(fi) 216 217 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 218 } 219 220 static struct fib_info *fib_find_info(const struct fib_info *nfi) 221 { 222 struct hlist_head *head; 223 struct hlist_node *node; 224 struct fib_info *fi; 225 unsigned int hash; 226 227 hash = fib_info_hashfn(nfi); 228 head = &fib_info_hash[hash]; 229 230 hlist_for_each_entry(fi, node, head, fib_hash) { 231 if (fi->fib_net != nfi->fib_net) 232 continue; 233 if (fi->fib_nhs != nfi->fib_nhs) 234 continue; 235 if (nfi->fib_protocol == fi->fib_protocol && 236 nfi->fib_prefsrc == fi->fib_prefsrc && 237 nfi->fib_priority == fi->fib_priority && 238 memcmp(nfi->fib_metrics, fi->fib_metrics, 239 sizeof(fi->fib_metrics)) == 0 && 240 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 241 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 242 return fi; 243 } 244 245 return NULL; 246 } 247 248 /* Check, that the gateway is already configured. 249 Used only by redirect accept routine. 250 */ 251 252 int ip_fib_check_default(__be32 gw, struct net_device *dev) 253 { 254 struct hlist_head *head; 255 struct hlist_node *node; 256 struct fib_nh *nh; 257 unsigned int hash; 258 259 spin_lock(&fib_info_lock); 260 261 hash = fib_devindex_hashfn(dev->ifindex); 262 head = &fib_info_devhash[hash]; 263 hlist_for_each_entry(nh, node, head, nh_hash) { 264 if (nh->nh_dev == dev && 265 nh->nh_gw == gw && 266 !(nh->nh_flags&RTNH_F_DEAD)) { 267 spin_unlock(&fib_info_lock); 268 return 0; 269 } 270 } 271 272 spin_unlock(&fib_info_lock); 273 274 return -1; 275 } 276 277 static inline size_t fib_nlmsg_size(struct fib_info *fi) 278 { 279 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 280 + nla_total_size(4) /* RTA_TABLE */ 281 + nla_total_size(4) /* RTA_DST */ 282 + nla_total_size(4) /* RTA_PRIORITY */ 283 + nla_total_size(4); /* RTA_PREFSRC */ 284 285 /* space for nested metrics */ 286 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 287 288 if (fi->fib_nhs) { 289 /* Also handles the special case fib_nhs == 1 */ 290 291 /* each nexthop is packed in an attribute */ 292 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 293 294 /* may contain flow and gateway attribute */ 295 nhsize += 2 * nla_total_size(4); 296 297 /* all nexthops are packed in a nested attribute */ 298 payload += nla_total_size(fi->fib_nhs * nhsize); 299 } 300 301 return payload; 302 } 303 304 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 305 int dst_len, u32 tb_id, struct nl_info *info, 306 unsigned int nlm_flags) 307 { 308 struct sk_buff *skb; 309 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 310 int err = -ENOBUFS; 311 312 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 313 if (skb == NULL) 314 goto errout; 315 316 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 317 fa->fa_type, fa->fa_scope, key, dst_len, 318 fa->fa_tos, fa->fa_info, nlm_flags); 319 if (err < 0) { 320 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 321 WARN_ON(err == -EMSGSIZE); 322 kfree_skb(skb); 323 goto errout; 324 } 325 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 326 info->nlh, GFP_KERNEL); 327 errout: 328 if (err < 0) 329 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 330 } 331 332 /* Return the first fib alias matching TOS with 333 * priority less than or equal to PRIO. 334 */ 335 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 336 { 337 if (fah) { 338 struct fib_alias *fa; 339 list_for_each_entry(fa, fah, fa_list) { 340 if (fa->fa_tos > tos) 341 continue; 342 if (fa->fa_info->fib_priority >= prio || 343 fa->fa_tos < tos) 344 return fa; 345 } 346 } 347 return NULL; 348 } 349 350 int fib_detect_death(struct fib_info *fi, int order, 351 struct fib_info **last_resort, int *last_idx, int dflt) 352 { 353 struct neighbour *n; 354 int state = NUD_NONE; 355 356 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 357 if (n) { 358 state = n->nud_state; 359 neigh_release(n); 360 } 361 if (state==NUD_REACHABLE) 362 return 0; 363 if ((state&NUD_VALID) && order != dflt) 364 return 0; 365 if ((state&NUD_VALID) || 366 (*last_idx<0 && order > dflt)) { 367 *last_resort = fi; 368 *last_idx = order; 369 } 370 return 1; 371 } 372 373 #ifdef CONFIG_IP_ROUTE_MULTIPATH 374 375 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 376 { 377 int nhs = 0; 378 379 while (rtnh_ok(rtnh, remaining)) { 380 nhs++; 381 rtnh = rtnh_next(rtnh, &remaining); 382 } 383 384 /* leftover implies invalid nexthop configuration, discard it */ 385 return remaining > 0 ? 0 : nhs; 386 } 387 388 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 389 int remaining, struct fib_config *cfg) 390 { 391 change_nexthops(fi) { 392 int attrlen; 393 394 if (!rtnh_ok(rtnh, remaining)) 395 return -EINVAL; 396 397 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 398 nh->nh_oif = rtnh->rtnh_ifindex; 399 nh->nh_weight = rtnh->rtnh_hops + 1; 400 401 attrlen = rtnh_attrlen(rtnh); 402 if (attrlen > 0) { 403 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 404 405 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 406 nh->nh_gw = nla ? nla_get_be32(nla) : 0; 407 #ifdef CONFIG_NET_CLS_ROUTE 408 nla = nla_find(attrs, attrlen, RTA_FLOW); 409 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 410 #endif 411 } 412 413 rtnh = rtnh_next(rtnh, &remaining); 414 } endfor_nexthops(fi); 415 416 return 0; 417 } 418 419 #endif 420 421 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 422 { 423 #ifdef CONFIG_IP_ROUTE_MULTIPATH 424 struct rtnexthop *rtnh; 425 int remaining; 426 #endif 427 428 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 429 return 1; 430 431 if (cfg->fc_oif || cfg->fc_gw) { 432 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 433 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 434 return 0; 435 return 1; 436 } 437 438 #ifdef CONFIG_IP_ROUTE_MULTIPATH 439 if (cfg->fc_mp == NULL) 440 return 0; 441 442 rtnh = cfg->fc_mp; 443 remaining = cfg->fc_mp_len; 444 445 for_nexthops(fi) { 446 int attrlen; 447 448 if (!rtnh_ok(rtnh, remaining)) 449 return -EINVAL; 450 451 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 452 return 1; 453 454 attrlen = rtnh_attrlen(rtnh); 455 if (attrlen < 0) { 456 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 457 458 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 459 if (nla && nla_get_be32(nla) != nh->nh_gw) 460 return 1; 461 #ifdef CONFIG_NET_CLS_ROUTE 462 nla = nla_find(attrs, attrlen, RTA_FLOW); 463 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 464 return 1; 465 #endif 466 } 467 468 rtnh = rtnh_next(rtnh, &remaining); 469 } endfor_nexthops(fi); 470 #endif 471 return 0; 472 } 473 474 475 /* 476 Picture 477 ------- 478 479 Semantics of nexthop is very messy by historical reasons. 480 We have to take into account, that: 481 a) gateway can be actually local interface address, 482 so that gatewayed route is direct. 483 b) gateway must be on-link address, possibly 484 described not by an ifaddr, but also by a direct route. 485 c) If both gateway and interface are specified, they should not 486 contradict. 487 d) If we use tunnel routes, gateway could be not on-link. 488 489 Attempt to reconcile all of these (alas, self-contradictory) conditions 490 results in pretty ugly and hairy code with obscure logic. 491 492 I chose to generalized it instead, so that the size 493 of code does not increase practically, but it becomes 494 much more general. 495 Every prefix is assigned a "scope" value: "host" is local address, 496 "link" is direct route, 497 [ ... "site" ... "interior" ... ] 498 and "universe" is true gateway route with global meaning. 499 500 Every prefix refers to a set of "nexthop"s (gw, oif), 501 where gw must have narrower scope. This recursion stops 502 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 503 which means that gw is forced to be on link. 504 505 Code is still hairy, but now it is apparently logically 506 consistent and very flexible. F.e. as by-product it allows 507 to co-exists in peace independent exterior and interior 508 routing processes. 509 510 Normally it looks as following. 511 512 {universe prefix} -> (gw, oif) [scope link] 513 | 514 |-> {link prefix} -> (gw, oif) [scope local] 515 | 516 |-> {local prefix} (terminal node) 517 */ 518 519 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 520 struct fib_nh *nh) 521 { 522 int err; 523 struct net *net; 524 525 net = cfg->fc_nlinfo.nl_net; 526 if (nh->nh_gw) { 527 struct fib_result res; 528 529 #ifdef CONFIG_IP_ROUTE_PERVASIVE 530 if (nh->nh_flags&RTNH_F_PERVASIVE) 531 return 0; 532 #endif 533 if (nh->nh_flags&RTNH_F_ONLINK) { 534 struct net_device *dev; 535 536 if (cfg->fc_scope >= RT_SCOPE_LINK) 537 return -EINVAL; 538 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 539 return -EINVAL; 540 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) 541 return -ENODEV; 542 if (!(dev->flags&IFF_UP)) 543 return -ENETDOWN; 544 nh->nh_dev = dev; 545 dev_hold(dev); 546 nh->nh_scope = RT_SCOPE_LINK; 547 return 0; 548 } 549 { 550 struct flowi fl = { 551 .nl_u = { 552 .ip4_u = { 553 .daddr = nh->nh_gw, 554 .scope = cfg->fc_scope + 1, 555 }, 556 }, 557 .oif = nh->nh_oif, 558 }; 559 560 /* It is not necessary, but requires a bit of thinking */ 561 if (fl.fl4_scope < RT_SCOPE_LINK) 562 fl.fl4_scope = RT_SCOPE_LINK; 563 if ((err = fib_lookup(net, &fl, &res)) != 0) 564 return err; 565 } 566 err = -EINVAL; 567 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 568 goto out; 569 nh->nh_scope = res.scope; 570 nh->nh_oif = FIB_RES_OIF(res); 571 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 572 goto out; 573 dev_hold(nh->nh_dev); 574 err = -ENETDOWN; 575 if (!(nh->nh_dev->flags & IFF_UP)) 576 goto out; 577 err = 0; 578 out: 579 fib_res_put(&res); 580 return err; 581 } else { 582 struct in_device *in_dev; 583 584 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 585 return -EINVAL; 586 587 in_dev = inetdev_by_index(net, nh->nh_oif); 588 if (in_dev == NULL) 589 return -ENODEV; 590 if (!(in_dev->dev->flags&IFF_UP)) { 591 in_dev_put(in_dev); 592 return -ENETDOWN; 593 } 594 nh->nh_dev = in_dev->dev; 595 dev_hold(nh->nh_dev); 596 nh->nh_scope = RT_SCOPE_HOST; 597 in_dev_put(in_dev); 598 } 599 return 0; 600 } 601 602 static inline unsigned int fib_laddr_hashfn(__be32 val) 603 { 604 unsigned int mask = (fib_hash_size - 1); 605 606 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 607 } 608 609 static struct hlist_head *fib_hash_alloc(int bytes) 610 { 611 if (bytes <= PAGE_SIZE) 612 return kzalloc(bytes, GFP_KERNEL); 613 else 614 return (struct hlist_head *) 615 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); 616 } 617 618 static void fib_hash_free(struct hlist_head *hash, int bytes) 619 { 620 if (!hash) 621 return; 622 623 if (bytes <= PAGE_SIZE) 624 kfree(hash); 625 else 626 free_pages((unsigned long) hash, get_order(bytes)); 627 } 628 629 static void fib_hash_move(struct hlist_head *new_info_hash, 630 struct hlist_head *new_laddrhash, 631 unsigned int new_size) 632 { 633 struct hlist_head *old_info_hash, *old_laddrhash; 634 unsigned int old_size = fib_hash_size; 635 unsigned int i, bytes; 636 637 spin_lock_bh(&fib_info_lock); 638 old_info_hash = fib_info_hash; 639 old_laddrhash = fib_info_laddrhash; 640 fib_hash_size = new_size; 641 642 for (i = 0; i < old_size; i++) { 643 struct hlist_head *head = &fib_info_hash[i]; 644 struct hlist_node *node, *n; 645 struct fib_info *fi; 646 647 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 648 struct hlist_head *dest; 649 unsigned int new_hash; 650 651 hlist_del(&fi->fib_hash); 652 653 new_hash = fib_info_hashfn(fi); 654 dest = &new_info_hash[new_hash]; 655 hlist_add_head(&fi->fib_hash, dest); 656 } 657 } 658 fib_info_hash = new_info_hash; 659 660 for (i = 0; i < old_size; i++) { 661 struct hlist_head *lhead = &fib_info_laddrhash[i]; 662 struct hlist_node *node, *n; 663 struct fib_info *fi; 664 665 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 666 struct hlist_head *ldest; 667 unsigned int new_hash; 668 669 hlist_del(&fi->fib_lhash); 670 671 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 672 ldest = &new_laddrhash[new_hash]; 673 hlist_add_head(&fi->fib_lhash, ldest); 674 } 675 } 676 fib_info_laddrhash = new_laddrhash; 677 678 spin_unlock_bh(&fib_info_lock); 679 680 bytes = old_size * sizeof(struct hlist_head *); 681 fib_hash_free(old_info_hash, bytes); 682 fib_hash_free(old_laddrhash, bytes); 683 } 684 685 struct fib_info *fib_create_info(struct fib_config *cfg) 686 { 687 int err; 688 struct fib_info *fi = NULL; 689 struct fib_info *ofi; 690 int nhs = 1; 691 struct net *net = cfg->fc_nlinfo.nl_net; 692 693 /* Fast check to catch the most weird cases */ 694 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 695 goto err_inval; 696 697 #ifdef CONFIG_IP_ROUTE_MULTIPATH 698 if (cfg->fc_mp) { 699 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 700 if (nhs == 0) 701 goto err_inval; 702 } 703 #endif 704 705 err = -ENOBUFS; 706 if (fib_info_cnt >= fib_hash_size) { 707 unsigned int new_size = fib_hash_size << 1; 708 struct hlist_head *new_info_hash; 709 struct hlist_head *new_laddrhash; 710 unsigned int bytes; 711 712 if (!new_size) 713 new_size = 1; 714 bytes = new_size * sizeof(struct hlist_head *); 715 new_info_hash = fib_hash_alloc(bytes); 716 new_laddrhash = fib_hash_alloc(bytes); 717 if (!new_info_hash || !new_laddrhash) { 718 fib_hash_free(new_info_hash, bytes); 719 fib_hash_free(new_laddrhash, bytes); 720 } else 721 fib_hash_move(new_info_hash, new_laddrhash, new_size); 722 723 if (!fib_hash_size) 724 goto failure; 725 } 726 727 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 728 if (fi == NULL) 729 goto failure; 730 fib_info_cnt++; 731 732 fi->fib_net = hold_net(net); 733 fi->fib_protocol = cfg->fc_protocol; 734 fi->fib_flags = cfg->fc_flags; 735 fi->fib_priority = cfg->fc_priority; 736 fi->fib_prefsrc = cfg->fc_prefsrc; 737 738 fi->fib_nhs = nhs; 739 change_nexthops(fi) { 740 nh->nh_parent = fi; 741 } endfor_nexthops(fi) 742 743 if (cfg->fc_mx) { 744 struct nlattr *nla; 745 int remaining; 746 747 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 748 int type = nla_type(nla); 749 750 if (type) { 751 if (type > RTAX_MAX) 752 goto err_inval; 753 fi->fib_metrics[type - 1] = nla_get_u32(nla); 754 } 755 } 756 } 757 758 if (cfg->fc_mp) { 759 #ifdef CONFIG_IP_ROUTE_MULTIPATH 760 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 761 if (err != 0) 762 goto failure; 763 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 764 goto err_inval; 765 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 766 goto err_inval; 767 #ifdef CONFIG_NET_CLS_ROUTE 768 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 769 goto err_inval; 770 #endif 771 #else 772 goto err_inval; 773 #endif 774 } else { 775 struct fib_nh *nh = fi->fib_nh; 776 777 nh->nh_oif = cfg->fc_oif; 778 nh->nh_gw = cfg->fc_gw; 779 nh->nh_flags = cfg->fc_flags; 780 #ifdef CONFIG_NET_CLS_ROUTE 781 nh->nh_tclassid = cfg->fc_flow; 782 #endif 783 #ifdef CONFIG_IP_ROUTE_MULTIPATH 784 nh->nh_weight = 1; 785 #endif 786 } 787 788 if (fib_props[cfg->fc_type].error) { 789 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 790 goto err_inval; 791 goto link_it; 792 } 793 794 if (cfg->fc_scope > RT_SCOPE_HOST) 795 goto err_inval; 796 797 if (cfg->fc_scope == RT_SCOPE_HOST) { 798 struct fib_nh *nh = fi->fib_nh; 799 800 /* Local address is added. */ 801 if (nhs != 1 || nh->nh_gw) 802 goto err_inval; 803 nh->nh_scope = RT_SCOPE_NOWHERE; 804 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 805 err = -ENODEV; 806 if (nh->nh_dev == NULL) 807 goto failure; 808 } else { 809 change_nexthops(fi) { 810 if ((err = fib_check_nh(cfg, fi, nh)) != 0) 811 goto failure; 812 } endfor_nexthops(fi) 813 } 814 815 if (fi->fib_prefsrc) { 816 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 817 fi->fib_prefsrc != cfg->fc_dst) 818 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) 819 goto err_inval; 820 } 821 822 link_it: 823 if ((ofi = fib_find_info(fi)) != NULL) { 824 fi->fib_dead = 1; 825 free_fib_info(fi); 826 ofi->fib_treeref++; 827 return ofi; 828 } 829 830 fi->fib_treeref++; 831 atomic_inc(&fi->fib_clntref); 832 spin_lock_bh(&fib_info_lock); 833 hlist_add_head(&fi->fib_hash, 834 &fib_info_hash[fib_info_hashfn(fi)]); 835 if (fi->fib_prefsrc) { 836 struct hlist_head *head; 837 838 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 839 hlist_add_head(&fi->fib_lhash, head); 840 } 841 change_nexthops(fi) { 842 struct hlist_head *head; 843 unsigned int hash; 844 845 if (!nh->nh_dev) 846 continue; 847 hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 848 head = &fib_info_devhash[hash]; 849 hlist_add_head(&nh->nh_hash, head); 850 } endfor_nexthops(fi) 851 spin_unlock_bh(&fib_info_lock); 852 return fi; 853 854 err_inval: 855 err = -EINVAL; 856 857 failure: 858 if (fi) { 859 fi->fib_dead = 1; 860 free_fib_info(fi); 861 } 862 863 return ERR_PTR(err); 864 } 865 866 /* Note! fib_semantic_match intentionally uses RCU list functions. */ 867 int fib_semantic_match(struct list_head *head, const struct flowi *flp, 868 struct fib_result *res, __be32 zone, __be32 mask, 869 int prefixlen) 870 { 871 struct fib_alias *fa; 872 int nh_sel = 0; 873 874 list_for_each_entry_rcu(fa, head, fa_list) { 875 int err; 876 877 if (fa->fa_tos && 878 fa->fa_tos != flp->fl4_tos) 879 continue; 880 881 if (fa->fa_scope < flp->fl4_scope) 882 continue; 883 884 fa->fa_state |= FA_S_ACCESSED; 885 886 err = fib_props[fa->fa_type].error; 887 if (err == 0) { 888 struct fib_info *fi = fa->fa_info; 889 890 if (fi->fib_flags & RTNH_F_DEAD) 891 continue; 892 893 switch (fa->fa_type) { 894 case RTN_UNICAST: 895 case RTN_LOCAL: 896 case RTN_BROADCAST: 897 case RTN_ANYCAST: 898 case RTN_MULTICAST: 899 for_nexthops(fi) { 900 if (nh->nh_flags&RTNH_F_DEAD) 901 continue; 902 if (!flp->oif || flp->oif == nh->nh_oif) 903 break; 904 } 905 #ifdef CONFIG_IP_ROUTE_MULTIPATH 906 if (nhsel < fi->fib_nhs) { 907 nh_sel = nhsel; 908 goto out_fill_res; 909 } 910 #else 911 if (nhsel < 1) { 912 goto out_fill_res; 913 } 914 #endif 915 endfor_nexthops(fi); 916 continue; 917 918 default: 919 printk(KERN_WARNING "fib_semantic_match bad type %#x\n", 920 fa->fa_type); 921 return -EINVAL; 922 } 923 } 924 return err; 925 } 926 return 1; 927 928 out_fill_res: 929 res->prefixlen = prefixlen; 930 res->nh_sel = nh_sel; 931 res->type = fa->fa_type; 932 res->scope = fa->fa_scope; 933 res->fi = fa->fa_info; 934 atomic_inc(&res->fi->fib_clntref); 935 return 0; 936 } 937 938 /* Find appropriate source address to this destination */ 939 940 __be32 __fib_res_prefsrc(struct fib_result *res) 941 { 942 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 943 } 944 945 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 946 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 947 struct fib_info *fi, unsigned int flags) 948 { 949 struct nlmsghdr *nlh; 950 struct rtmsg *rtm; 951 952 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 953 if (nlh == NULL) 954 return -EMSGSIZE; 955 956 rtm = nlmsg_data(nlh); 957 rtm->rtm_family = AF_INET; 958 rtm->rtm_dst_len = dst_len; 959 rtm->rtm_src_len = 0; 960 rtm->rtm_tos = tos; 961 if (tb_id < 256) 962 rtm->rtm_table = tb_id; 963 else 964 rtm->rtm_table = RT_TABLE_COMPAT; 965 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 966 rtm->rtm_type = type; 967 rtm->rtm_flags = fi->fib_flags; 968 rtm->rtm_scope = scope; 969 rtm->rtm_protocol = fi->fib_protocol; 970 971 if (rtm->rtm_dst_len) 972 NLA_PUT_BE32(skb, RTA_DST, dst); 973 974 if (fi->fib_priority) 975 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 976 977 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 978 goto nla_put_failure; 979 980 if (fi->fib_prefsrc) 981 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 982 983 if (fi->fib_nhs == 1) { 984 if (fi->fib_nh->nh_gw) 985 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 986 987 if (fi->fib_nh->nh_oif) 988 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 989 #ifdef CONFIG_NET_CLS_ROUTE 990 if (fi->fib_nh[0].nh_tclassid) 991 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 992 #endif 993 } 994 #ifdef CONFIG_IP_ROUTE_MULTIPATH 995 if (fi->fib_nhs > 1) { 996 struct rtnexthop *rtnh; 997 struct nlattr *mp; 998 999 mp = nla_nest_start(skb, RTA_MULTIPATH); 1000 if (mp == NULL) 1001 goto nla_put_failure; 1002 1003 for_nexthops(fi) { 1004 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1005 if (rtnh == NULL) 1006 goto nla_put_failure; 1007 1008 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1009 rtnh->rtnh_hops = nh->nh_weight - 1; 1010 rtnh->rtnh_ifindex = nh->nh_oif; 1011 1012 if (nh->nh_gw) 1013 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1014 #ifdef CONFIG_NET_CLS_ROUTE 1015 if (nh->nh_tclassid) 1016 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 1017 #endif 1018 /* length of rtnetlink header + attributes */ 1019 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 1020 } endfor_nexthops(fi); 1021 1022 nla_nest_end(skb, mp); 1023 } 1024 #endif 1025 return nlmsg_end(skb, nlh); 1026 1027 nla_put_failure: 1028 nlmsg_cancel(skb, nlh); 1029 return -EMSGSIZE; 1030 } 1031 1032 /* 1033 Update FIB if: 1034 - local address disappeared -> we must delete all the entries 1035 referring to it. 1036 - device went down -> we must shutdown all nexthops going via it. 1037 */ 1038 int fib_sync_down_addr(struct net *net, __be32 local) 1039 { 1040 int ret = 0; 1041 unsigned int hash = fib_laddr_hashfn(local); 1042 struct hlist_head *head = &fib_info_laddrhash[hash]; 1043 struct hlist_node *node; 1044 struct fib_info *fi; 1045 1046 if (fib_info_laddrhash == NULL || local == 0) 1047 return 0; 1048 1049 hlist_for_each_entry(fi, node, head, fib_lhash) { 1050 if (fi->fib_net != net) 1051 continue; 1052 if (fi->fib_prefsrc == local) { 1053 fi->fib_flags |= RTNH_F_DEAD; 1054 ret++; 1055 } 1056 } 1057 return ret; 1058 } 1059 1060 int fib_sync_down_dev(struct net_device *dev, int force) 1061 { 1062 int ret = 0; 1063 int scope = RT_SCOPE_NOWHERE; 1064 struct fib_info *prev_fi = NULL; 1065 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1066 struct hlist_head *head = &fib_info_devhash[hash]; 1067 struct hlist_node *node; 1068 struct fib_nh *nh; 1069 1070 if (force) 1071 scope = -1; 1072 1073 hlist_for_each_entry(nh, node, head, nh_hash) { 1074 struct fib_info *fi = nh->nh_parent; 1075 int dead; 1076 1077 BUG_ON(!fi->fib_nhs); 1078 if (nh->nh_dev != dev || fi == prev_fi) 1079 continue; 1080 prev_fi = fi; 1081 dead = 0; 1082 change_nexthops(fi) { 1083 if (nh->nh_flags&RTNH_F_DEAD) 1084 dead++; 1085 else if (nh->nh_dev == dev && 1086 nh->nh_scope != scope) { 1087 nh->nh_flags |= RTNH_F_DEAD; 1088 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1089 spin_lock_bh(&fib_multipath_lock); 1090 fi->fib_power -= nh->nh_power; 1091 nh->nh_power = 0; 1092 spin_unlock_bh(&fib_multipath_lock); 1093 #endif 1094 dead++; 1095 } 1096 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1097 if (force > 1 && nh->nh_dev == dev) { 1098 dead = fi->fib_nhs; 1099 break; 1100 } 1101 #endif 1102 } endfor_nexthops(fi) 1103 if (dead == fi->fib_nhs) { 1104 fi->fib_flags |= RTNH_F_DEAD; 1105 ret++; 1106 } 1107 } 1108 1109 return ret; 1110 } 1111 1112 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1113 1114 /* 1115 Dead device goes up. We wake up dead nexthops. 1116 It takes sense only on multipath routes. 1117 */ 1118 1119 int fib_sync_up(struct net_device *dev) 1120 { 1121 struct fib_info *prev_fi; 1122 unsigned int hash; 1123 struct hlist_head *head; 1124 struct hlist_node *node; 1125 struct fib_nh *nh; 1126 int ret; 1127 1128 if (!(dev->flags&IFF_UP)) 1129 return 0; 1130 1131 prev_fi = NULL; 1132 hash = fib_devindex_hashfn(dev->ifindex); 1133 head = &fib_info_devhash[hash]; 1134 ret = 0; 1135 1136 hlist_for_each_entry(nh, node, head, nh_hash) { 1137 struct fib_info *fi = nh->nh_parent; 1138 int alive; 1139 1140 BUG_ON(!fi->fib_nhs); 1141 if (nh->nh_dev != dev || fi == prev_fi) 1142 continue; 1143 1144 prev_fi = fi; 1145 alive = 0; 1146 change_nexthops(fi) { 1147 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1148 alive++; 1149 continue; 1150 } 1151 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 1152 continue; 1153 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) 1154 continue; 1155 alive++; 1156 spin_lock_bh(&fib_multipath_lock); 1157 nh->nh_power = 0; 1158 nh->nh_flags &= ~RTNH_F_DEAD; 1159 spin_unlock_bh(&fib_multipath_lock); 1160 } endfor_nexthops(fi) 1161 1162 if (alive > 0) { 1163 fi->fib_flags &= ~RTNH_F_DEAD; 1164 ret++; 1165 } 1166 } 1167 1168 return ret; 1169 } 1170 1171 /* 1172 The algorithm is suboptimal, but it provides really 1173 fair weighted route distribution. 1174 */ 1175 1176 void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1177 { 1178 struct fib_info *fi = res->fi; 1179 int w; 1180 1181 spin_lock_bh(&fib_multipath_lock); 1182 if (fi->fib_power <= 0) { 1183 int power = 0; 1184 change_nexthops(fi) { 1185 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1186 power += nh->nh_weight; 1187 nh->nh_power = nh->nh_weight; 1188 } 1189 } endfor_nexthops(fi); 1190 fi->fib_power = power; 1191 if (power <= 0) { 1192 spin_unlock_bh(&fib_multipath_lock); 1193 /* Race condition: route has just become dead. */ 1194 res->nh_sel = 0; 1195 return; 1196 } 1197 } 1198 1199 1200 /* w should be random number [0..fi->fib_power-1], 1201 it is pretty bad approximation. 1202 */ 1203 1204 w = jiffies % fi->fib_power; 1205 1206 change_nexthops(fi) { 1207 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 1208 if ((w -= nh->nh_power) <= 0) { 1209 nh->nh_power--; 1210 fi->fib_power--; 1211 res->nh_sel = nhsel; 1212 spin_unlock_bh(&fib_multipath_lock); 1213 return; 1214 } 1215 } 1216 } endfor_nexthops(fi); 1217 1218 /* Race condition: route has just become dead. */ 1219 res->nh_sel = 0; 1220 spin_unlock_bh(&fib_multipath_lock); 1221 } 1222 #endif 1223