1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: semantics. 7 * 8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18 #include <asm/uaccess.h> 19 #include <asm/system.h> 20 #include <linux/bitops.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/jiffies.h> 24 #include <linux/mm.h> 25 #include <linux/string.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/errno.h> 29 #include <linux/in.h> 30 #include <linux/inet.h> 31 #include <linux/inetdevice.h> 32 #include <linux/netdevice.h> 33 #include <linux/if_arp.h> 34 #include <linux/proc_fs.h> 35 #include <linux/skbuff.h> 36 #include <linux/init.h> 37 38 #include <net/arp.h> 39 #include <net/ip.h> 40 #include <net/protocol.h> 41 #include <net/route.h> 42 #include <net/tcp.h> 43 #include <net/sock.h> 44 #include <net/ip_fib.h> 45 #include <net/netlink.h> 46 #include <net/nexthop.h> 47 48 #include "fib_lookup.h" 49 50 static DEFINE_SPINLOCK(fib_info_lock); 51 static struct hlist_head *fib_info_hash; 52 static struct hlist_head *fib_info_laddrhash; 53 static unsigned int fib_hash_size; 54 static unsigned int fib_info_cnt; 55 56 #define DEVINDEX_HASHBITS 8 57 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 58 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 59 60 #ifdef CONFIG_IP_ROUTE_MULTIPATH 61 62 static DEFINE_SPINLOCK(fib_multipath_lock); 63 64 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 65 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 66 67 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 68 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 69 70 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 71 72 /* Hope, that gcc will optimize it to get rid of dummy loop */ 73 74 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 75 for (nhsel=0; nhsel < 1; nhsel++) 76 77 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 78 for (nhsel=0; nhsel < 1; nhsel++) 79 80 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 81 82 #define endfor_nexthops(fi) } 83 84 85 static const struct 86 { 87 int error; 88 u8 scope; 89 } fib_props[RTN_MAX + 1] = { 90 { 91 .error = 0, 92 .scope = RT_SCOPE_NOWHERE, 93 }, /* RTN_UNSPEC */ 94 { 95 .error = 0, 96 .scope = RT_SCOPE_UNIVERSE, 97 }, /* RTN_UNICAST */ 98 { 99 .error = 0, 100 .scope = RT_SCOPE_HOST, 101 }, /* RTN_LOCAL */ 102 { 103 .error = 0, 104 .scope = RT_SCOPE_LINK, 105 }, /* RTN_BROADCAST */ 106 { 107 .error = 0, 108 .scope = RT_SCOPE_LINK, 109 }, /* RTN_ANYCAST */ 110 { 111 .error = 0, 112 .scope = RT_SCOPE_UNIVERSE, 113 }, /* RTN_MULTICAST */ 114 { 115 .error = -EINVAL, 116 .scope = RT_SCOPE_UNIVERSE, 117 }, /* RTN_BLACKHOLE */ 118 { 119 .error = -EHOSTUNREACH, 120 .scope = RT_SCOPE_UNIVERSE, 121 }, /* RTN_UNREACHABLE */ 122 { 123 .error = -EACCES, 124 .scope = RT_SCOPE_UNIVERSE, 125 }, /* RTN_PROHIBIT */ 126 { 127 .error = -EAGAIN, 128 .scope = RT_SCOPE_UNIVERSE, 129 }, /* RTN_THROW */ 130 { 131 .error = -EINVAL, 132 .scope = RT_SCOPE_NOWHERE, 133 }, /* RTN_NAT */ 134 { 135 .error = -EINVAL, 136 .scope = RT_SCOPE_NOWHERE, 137 }, /* RTN_XRESOLVE */ 138 }; 139 140 141 /* Release a nexthop info record */ 142 143 void free_fib_info(struct fib_info *fi) 144 { 145 if (fi->fib_dead == 0) { 146 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); 147 return; 148 } 149 change_nexthops(fi) { 150 if (nh->nh_dev) 151 dev_put(nh->nh_dev); 152 nh->nh_dev = NULL; 153 } endfor_nexthops(fi); 154 fib_info_cnt--; 155 kfree(fi); 156 } 157 158 void fib_release_info(struct fib_info *fi) 159 { 160 spin_lock_bh(&fib_info_lock); 161 if (fi && --fi->fib_treeref == 0) { 162 hlist_del(&fi->fib_hash); 163 if (fi->fib_prefsrc) 164 hlist_del(&fi->fib_lhash); 165 change_nexthops(fi) { 166 if (!nh->nh_dev) 167 continue; 168 hlist_del(&nh->nh_hash); 169 } endfor_nexthops(fi) 170 fi->fib_dead = 1; 171 fib_info_put(fi); 172 } 173 spin_unlock_bh(&fib_info_lock); 174 } 175 176 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 177 { 178 const struct fib_nh *onh = ofi->fib_nh; 179 180 for_nexthops(fi) { 181 if (nh->nh_oif != onh->nh_oif || 182 nh->nh_gw != onh->nh_gw || 183 nh->nh_scope != onh->nh_scope || 184 #ifdef CONFIG_IP_ROUTE_MULTIPATH 185 nh->nh_weight != onh->nh_weight || 186 #endif 187 #ifdef CONFIG_NET_CLS_ROUTE 188 nh->nh_tclassid != onh->nh_tclassid || 189 #endif 190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 191 return -1; 192 onh++; 193 } endfor_nexthops(fi); 194 return 0; 195 } 196 197 static inline unsigned int fib_devindex_hashfn(unsigned int val) 198 { 199 unsigned int mask = DEVINDEX_HASHSIZE - 1; 200 201 return (val ^ 202 (val >> DEVINDEX_HASHBITS) ^ 203 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 204 } 205 206 static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 207 { 208 unsigned int mask = (fib_hash_size - 1); 209 unsigned int val = fi->fib_nhs; 210 211 val ^= fi->fib_protocol; 212 val ^= (__force u32)fi->fib_prefsrc; 213 val ^= fi->fib_priority; 214 for_nexthops(fi) { 215 val ^= fib_devindex_hashfn(nh->nh_oif); 216 } endfor_nexthops(fi) 217 218 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 219 } 220 221 static struct fib_info *fib_find_info(const struct fib_info *nfi) 222 { 223 struct hlist_head *head; 224 struct hlist_node *node; 225 struct fib_info *fi; 226 unsigned int hash; 227 228 hash = fib_info_hashfn(nfi); 229 head = &fib_info_hash[hash]; 230 231 hlist_for_each_entry(fi, node, head, fib_hash) { 232 if (fi->fib_net != nfi->fib_net) 233 continue; 234 if (fi->fib_nhs != nfi->fib_nhs) 235 continue; 236 if (nfi->fib_protocol == fi->fib_protocol && 237 nfi->fib_prefsrc == fi->fib_prefsrc && 238 nfi->fib_priority == fi->fib_priority && 239 memcmp(nfi->fib_metrics, fi->fib_metrics, 240 sizeof(fi->fib_metrics)) == 0 && 241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 243 return fi; 244 } 245 246 return NULL; 247 } 248 249 /* Check, that the gateway is already configured. 250 Used only by redirect accept routine. 251 */ 252 253 int ip_fib_check_default(__be32 gw, struct net_device *dev) 254 { 255 struct hlist_head *head; 256 struct hlist_node *node; 257 struct fib_nh *nh; 258 unsigned int hash; 259 260 spin_lock(&fib_info_lock); 261 262 hash = fib_devindex_hashfn(dev->ifindex); 263 head = &fib_info_devhash[hash]; 264 hlist_for_each_entry(nh, node, head, nh_hash) { 265 if (nh->nh_dev == dev && 266 nh->nh_gw == gw && 267 !(nh->nh_flags&RTNH_F_DEAD)) { 268 spin_unlock(&fib_info_lock); 269 return 0; 270 } 271 } 272 273 spin_unlock(&fib_info_lock); 274 275 return -1; 276 } 277 278 static inline size_t fib_nlmsg_size(struct fib_info *fi) 279 { 280 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 281 + nla_total_size(4) /* RTA_TABLE */ 282 + nla_total_size(4) /* RTA_DST */ 283 + nla_total_size(4) /* RTA_PRIORITY */ 284 + nla_total_size(4); /* RTA_PREFSRC */ 285 286 /* space for nested metrics */ 287 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 288 289 if (fi->fib_nhs) { 290 /* Also handles the special case fib_nhs == 1 */ 291 292 /* each nexthop is packed in an attribute */ 293 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 294 295 /* may contain flow and gateway attribute */ 296 nhsize += 2 * nla_total_size(4); 297 298 /* all nexthops are packed in a nested attribute */ 299 payload += nla_total_size(fi->fib_nhs * nhsize); 300 } 301 302 return payload; 303 } 304 305 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 306 int dst_len, u32 tb_id, struct nl_info *info, 307 unsigned int nlm_flags) 308 { 309 struct sk_buff *skb; 310 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 311 int err = -ENOBUFS; 312 313 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 314 if (skb == NULL) 315 goto errout; 316 317 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 318 fa->fa_type, fa->fa_scope, key, dst_len, 319 fa->fa_tos, fa->fa_info, nlm_flags); 320 if (err < 0) { 321 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 322 WARN_ON(err == -EMSGSIZE); 323 kfree_skb(skb); 324 goto errout; 325 } 326 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 327 info->nlh, GFP_KERNEL); 328 errout: 329 if (err < 0) 330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 331 } 332 333 /* Return the first fib alias matching TOS with 334 * priority less than or equal to PRIO. 335 */ 336 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 337 { 338 if (fah) { 339 struct fib_alias *fa; 340 list_for_each_entry(fa, fah, fa_list) { 341 if (fa->fa_tos > tos) 342 continue; 343 if (fa->fa_info->fib_priority >= prio || 344 fa->fa_tos < tos) 345 return fa; 346 } 347 } 348 return NULL; 349 } 350 351 int fib_detect_death(struct fib_info *fi, int order, 352 struct fib_info **last_resort, int *last_idx, int dflt) 353 { 354 struct neighbour *n; 355 int state = NUD_NONE; 356 357 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 358 if (n) { 359 state = n->nud_state; 360 neigh_release(n); 361 } 362 if (state==NUD_REACHABLE) 363 return 0; 364 if ((state&NUD_VALID) && order != dflt) 365 return 0; 366 if ((state&NUD_VALID) || 367 (*last_idx<0 && order > dflt)) { 368 *last_resort = fi; 369 *last_idx = order; 370 } 371 return 1; 372 } 373 374 #ifdef CONFIG_IP_ROUTE_MULTIPATH 375 376 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 377 { 378 int nhs = 0; 379 380 while (rtnh_ok(rtnh, remaining)) { 381 nhs++; 382 rtnh = rtnh_next(rtnh, &remaining); 383 } 384 385 /* leftover implies invalid nexthop configuration, discard it */ 386 return remaining > 0 ? 0 : nhs; 387 } 388 389 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 390 int remaining, struct fib_config *cfg) 391 { 392 change_nexthops(fi) { 393 int attrlen; 394 395 if (!rtnh_ok(rtnh, remaining)) 396 return -EINVAL; 397 398 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 399 nh->nh_oif = rtnh->rtnh_ifindex; 400 nh->nh_weight = rtnh->rtnh_hops + 1; 401 402 attrlen = rtnh_attrlen(rtnh); 403 if (attrlen > 0) { 404 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 405 406 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 407 nh->nh_gw = nla ? nla_get_be32(nla) : 0; 408 #ifdef CONFIG_NET_CLS_ROUTE 409 nla = nla_find(attrs, attrlen, RTA_FLOW); 410 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 411 #endif 412 } 413 414 rtnh = rtnh_next(rtnh, &remaining); 415 } endfor_nexthops(fi); 416 417 return 0; 418 } 419 420 #endif 421 422 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 423 { 424 #ifdef CONFIG_IP_ROUTE_MULTIPATH 425 struct rtnexthop *rtnh; 426 int remaining; 427 #endif 428 429 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 430 return 1; 431 432 if (cfg->fc_oif || cfg->fc_gw) { 433 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 434 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 435 return 0; 436 return 1; 437 } 438 439 #ifdef CONFIG_IP_ROUTE_MULTIPATH 440 if (cfg->fc_mp == NULL) 441 return 0; 442 443 rtnh = cfg->fc_mp; 444 remaining = cfg->fc_mp_len; 445 446 for_nexthops(fi) { 447 int attrlen; 448 449 if (!rtnh_ok(rtnh, remaining)) 450 return -EINVAL; 451 452 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 453 return 1; 454 455 attrlen = rtnh_attrlen(rtnh); 456 if (attrlen < 0) { 457 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 458 459 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 460 if (nla && nla_get_be32(nla) != nh->nh_gw) 461 return 1; 462 #ifdef CONFIG_NET_CLS_ROUTE 463 nla = nla_find(attrs, attrlen, RTA_FLOW); 464 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 465 return 1; 466 #endif 467 } 468 469 rtnh = rtnh_next(rtnh, &remaining); 470 } endfor_nexthops(fi); 471 #endif 472 return 0; 473 } 474 475 476 /* 477 Picture 478 ------- 479 480 Semantics of nexthop is very messy by historical reasons. 481 We have to take into account, that: 482 a) gateway can be actually local interface address, 483 so that gatewayed route is direct. 484 b) gateway must be on-link address, possibly 485 described not by an ifaddr, but also by a direct route. 486 c) If both gateway and interface are specified, they should not 487 contradict. 488 d) If we use tunnel routes, gateway could be not on-link. 489 490 Attempt to reconcile all of these (alas, self-contradictory) conditions 491 results in pretty ugly and hairy code with obscure logic. 492 493 I chose to generalized it instead, so that the size 494 of code does not increase practically, but it becomes 495 much more general. 496 Every prefix is assigned a "scope" value: "host" is local address, 497 "link" is direct route, 498 [ ... "site" ... "interior" ... ] 499 and "universe" is true gateway route with global meaning. 500 501 Every prefix refers to a set of "nexthop"s (gw, oif), 502 where gw must have narrower scope. This recursion stops 503 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 504 which means that gw is forced to be on link. 505 506 Code is still hairy, but now it is apparently logically 507 consistent and very flexible. F.e. as by-product it allows 508 to co-exists in peace independent exterior and interior 509 routing processes. 510 511 Normally it looks as following. 512 513 {universe prefix} -> (gw, oif) [scope link] 514 | 515 |-> {link prefix} -> (gw, oif) [scope local] 516 | 517 |-> {local prefix} (terminal node) 518 */ 519 520 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 521 struct fib_nh *nh) 522 { 523 int err; 524 struct net *net; 525 526 net = cfg->fc_nlinfo.nl_net; 527 if (nh->nh_gw) { 528 struct fib_result res; 529 530 #ifdef CONFIG_IP_ROUTE_PERVASIVE 531 if (nh->nh_flags&RTNH_F_PERVASIVE) 532 return 0; 533 #endif 534 if (nh->nh_flags&RTNH_F_ONLINK) { 535 struct net_device *dev; 536 537 if (cfg->fc_scope >= RT_SCOPE_LINK) 538 return -EINVAL; 539 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 540 return -EINVAL; 541 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) 542 return -ENODEV; 543 if (!(dev->flags&IFF_UP)) 544 return -ENETDOWN; 545 nh->nh_dev = dev; 546 dev_hold(dev); 547 nh->nh_scope = RT_SCOPE_LINK; 548 return 0; 549 } 550 { 551 struct flowi fl = { 552 .nl_u = { 553 .ip4_u = { 554 .daddr = nh->nh_gw, 555 .scope = cfg->fc_scope + 1, 556 }, 557 }, 558 .oif = nh->nh_oif, 559 }; 560 561 /* It is not necessary, but requires a bit of thinking */ 562 if (fl.fl4_scope < RT_SCOPE_LINK) 563 fl.fl4_scope = RT_SCOPE_LINK; 564 if ((err = fib_lookup(net, &fl, &res)) != 0) 565 return err; 566 } 567 err = -EINVAL; 568 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 569 goto out; 570 nh->nh_scope = res.scope; 571 nh->nh_oif = FIB_RES_OIF(res); 572 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 573 goto out; 574 dev_hold(nh->nh_dev); 575 err = -ENETDOWN; 576 if (!(nh->nh_dev->flags & IFF_UP)) 577 goto out; 578 err = 0; 579 out: 580 fib_res_put(&res); 581 return err; 582 } else { 583 struct in_device *in_dev; 584 585 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 586 return -EINVAL; 587 588 in_dev = inetdev_by_index(net, nh->nh_oif); 589 if (in_dev == NULL) 590 return -ENODEV; 591 if (!(in_dev->dev->flags&IFF_UP)) { 592 in_dev_put(in_dev); 593 return -ENETDOWN; 594 } 595 nh->nh_dev = in_dev->dev; 596 dev_hold(nh->nh_dev); 597 nh->nh_scope = RT_SCOPE_HOST; 598 in_dev_put(in_dev); 599 } 600 return 0; 601 } 602 603 static inline unsigned int fib_laddr_hashfn(__be32 val) 604 { 605 unsigned int mask = (fib_hash_size - 1); 606 607 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 608 } 609 610 static struct hlist_head *fib_hash_alloc(int bytes) 611 { 612 if (bytes <= PAGE_SIZE) 613 return kzalloc(bytes, GFP_KERNEL); 614 else 615 return (struct hlist_head *) 616 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); 617 } 618 619 static void fib_hash_free(struct hlist_head *hash, int bytes) 620 { 621 if (!hash) 622 return; 623 624 if (bytes <= PAGE_SIZE) 625 kfree(hash); 626 else 627 free_pages((unsigned long) hash, get_order(bytes)); 628 } 629 630 static void fib_hash_move(struct hlist_head *new_info_hash, 631 struct hlist_head *new_laddrhash, 632 unsigned int new_size) 633 { 634 struct hlist_head *old_info_hash, *old_laddrhash; 635 unsigned int old_size = fib_hash_size; 636 unsigned int i, bytes; 637 638 spin_lock_bh(&fib_info_lock); 639 old_info_hash = fib_info_hash; 640 old_laddrhash = fib_info_laddrhash; 641 fib_hash_size = new_size; 642 643 for (i = 0; i < old_size; i++) { 644 struct hlist_head *head = &fib_info_hash[i]; 645 struct hlist_node *node, *n; 646 struct fib_info *fi; 647 648 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 649 struct hlist_head *dest; 650 unsigned int new_hash; 651 652 hlist_del(&fi->fib_hash); 653 654 new_hash = fib_info_hashfn(fi); 655 dest = &new_info_hash[new_hash]; 656 hlist_add_head(&fi->fib_hash, dest); 657 } 658 } 659 fib_info_hash = new_info_hash; 660 661 for (i = 0; i < old_size; i++) { 662 struct hlist_head *lhead = &fib_info_laddrhash[i]; 663 struct hlist_node *node, *n; 664 struct fib_info *fi; 665 666 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 667 struct hlist_head *ldest; 668 unsigned int new_hash; 669 670 hlist_del(&fi->fib_lhash); 671 672 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 673 ldest = &new_laddrhash[new_hash]; 674 hlist_add_head(&fi->fib_lhash, ldest); 675 } 676 } 677 fib_info_laddrhash = new_laddrhash; 678 679 spin_unlock_bh(&fib_info_lock); 680 681 bytes = old_size * sizeof(struct hlist_head *); 682 fib_hash_free(old_info_hash, bytes); 683 fib_hash_free(old_laddrhash, bytes); 684 } 685 686 struct fib_info *fib_create_info(struct fib_config *cfg) 687 { 688 int err; 689 struct fib_info *fi = NULL; 690 struct fib_info *ofi; 691 int nhs = 1; 692 struct net *net = cfg->fc_nlinfo.nl_net; 693 694 /* Fast check to catch the most weird cases */ 695 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 696 goto err_inval; 697 698 #ifdef CONFIG_IP_ROUTE_MULTIPATH 699 if (cfg->fc_mp) { 700 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 701 if (nhs == 0) 702 goto err_inval; 703 } 704 #endif 705 706 err = -ENOBUFS; 707 if (fib_info_cnt >= fib_hash_size) { 708 unsigned int new_size = fib_hash_size << 1; 709 struct hlist_head *new_info_hash; 710 struct hlist_head *new_laddrhash; 711 unsigned int bytes; 712 713 if (!new_size) 714 new_size = 1; 715 bytes = new_size * sizeof(struct hlist_head *); 716 new_info_hash = fib_hash_alloc(bytes); 717 new_laddrhash = fib_hash_alloc(bytes); 718 if (!new_info_hash || !new_laddrhash) { 719 fib_hash_free(new_info_hash, bytes); 720 fib_hash_free(new_laddrhash, bytes); 721 } else 722 fib_hash_move(new_info_hash, new_laddrhash, new_size); 723 724 if (!fib_hash_size) 725 goto failure; 726 } 727 728 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 729 if (fi == NULL) 730 goto failure; 731 fib_info_cnt++; 732 733 fi->fib_net = net; 734 fi->fib_protocol = cfg->fc_protocol; 735 fi->fib_flags = cfg->fc_flags; 736 fi->fib_priority = cfg->fc_priority; 737 fi->fib_prefsrc = cfg->fc_prefsrc; 738 739 fi->fib_nhs = nhs; 740 change_nexthops(fi) { 741 nh->nh_parent = fi; 742 } endfor_nexthops(fi) 743 744 if (cfg->fc_mx) { 745 struct nlattr *nla; 746 int remaining; 747 748 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 749 int type = nla_type(nla); 750 751 if (type) { 752 if (type > RTAX_MAX) 753 goto err_inval; 754 fi->fib_metrics[type - 1] = nla_get_u32(nla); 755 } 756 } 757 } 758 759 if (cfg->fc_mp) { 760 #ifdef CONFIG_IP_ROUTE_MULTIPATH 761 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 762 if (err != 0) 763 goto failure; 764 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 765 goto err_inval; 766 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 767 goto err_inval; 768 #ifdef CONFIG_NET_CLS_ROUTE 769 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 770 goto err_inval; 771 #endif 772 #else 773 goto err_inval; 774 #endif 775 } else { 776 struct fib_nh *nh = fi->fib_nh; 777 778 nh->nh_oif = cfg->fc_oif; 779 nh->nh_gw = cfg->fc_gw; 780 nh->nh_flags = cfg->fc_flags; 781 #ifdef CONFIG_NET_CLS_ROUTE 782 nh->nh_tclassid = cfg->fc_flow; 783 #endif 784 #ifdef CONFIG_IP_ROUTE_MULTIPATH 785 nh->nh_weight = 1; 786 #endif 787 } 788 789 if (fib_props[cfg->fc_type].error) { 790 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 791 goto err_inval; 792 goto link_it; 793 } 794 795 if (cfg->fc_scope > RT_SCOPE_HOST) 796 goto err_inval; 797 798 if (cfg->fc_scope == RT_SCOPE_HOST) { 799 struct fib_nh *nh = fi->fib_nh; 800 801 /* Local address is added. */ 802 if (nhs != 1 || nh->nh_gw) 803 goto err_inval; 804 nh->nh_scope = RT_SCOPE_NOWHERE; 805 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); 806 err = -ENODEV; 807 if (nh->nh_dev == NULL) 808 goto failure; 809 } else { 810 change_nexthops(fi) { 811 if ((err = fib_check_nh(cfg, fi, nh)) != 0) 812 goto failure; 813 } endfor_nexthops(fi) 814 } 815 816 if (fi->fib_prefsrc) { 817 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 818 fi->fib_prefsrc != cfg->fc_dst) 819 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL) 820 goto err_inval; 821 } 822 823 link_it: 824 if ((ofi = fib_find_info(fi)) != NULL) { 825 fi->fib_dead = 1; 826 free_fib_info(fi); 827 ofi->fib_treeref++; 828 return ofi; 829 } 830 831 fi->fib_treeref++; 832 atomic_inc(&fi->fib_clntref); 833 spin_lock_bh(&fib_info_lock); 834 hlist_add_head(&fi->fib_hash, 835 &fib_info_hash[fib_info_hashfn(fi)]); 836 if (fi->fib_prefsrc) { 837 struct hlist_head *head; 838 839 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 840 hlist_add_head(&fi->fib_lhash, head); 841 } 842 change_nexthops(fi) { 843 struct hlist_head *head; 844 unsigned int hash; 845 846 if (!nh->nh_dev) 847 continue; 848 hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 849 head = &fib_info_devhash[hash]; 850 hlist_add_head(&nh->nh_hash, head); 851 } endfor_nexthops(fi) 852 spin_unlock_bh(&fib_info_lock); 853 return fi; 854 855 err_inval: 856 err = -EINVAL; 857 858 failure: 859 if (fi) { 860 fi->fib_dead = 1; 861 free_fib_info(fi); 862 } 863 864 return ERR_PTR(err); 865 } 866 867 /* Note! fib_semantic_match intentionally uses RCU list functions. */ 868 int fib_semantic_match(struct list_head *head, const struct flowi *flp, 869 struct fib_result *res, __be32 zone, __be32 mask, 870 int prefixlen) 871 { 872 struct fib_alias *fa; 873 int nh_sel = 0; 874 875 list_for_each_entry_rcu(fa, head, fa_list) { 876 int err; 877 878 if (fa->fa_tos && 879 fa->fa_tos != flp->fl4_tos) 880 continue; 881 882 if (fa->fa_scope < flp->fl4_scope) 883 continue; 884 885 fa->fa_state |= FA_S_ACCESSED; 886 887 err = fib_props[fa->fa_type].error; 888 if (err == 0) { 889 struct fib_info *fi = fa->fa_info; 890 891 if (fi->fib_flags & RTNH_F_DEAD) 892 continue; 893 894 switch (fa->fa_type) { 895 case RTN_UNICAST: 896 case RTN_LOCAL: 897 case RTN_BROADCAST: 898 case RTN_ANYCAST: 899 case RTN_MULTICAST: 900 for_nexthops(fi) { 901 if (nh->nh_flags&RTNH_F_DEAD) 902 continue; 903 if (!flp->oif || flp->oif == nh->nh_oif) 904 break; 905 } 906 #ifdef CONFIG_IP_ROUTE_MULTIPATH 907 if (nhsel < fi->fib_nhs) { 908 nh_sel = nhsel; 909 goto out_fill_res; 910 } 911 #else 912 if (nhsel < 1) { 913 goto out_fill_res; 914 } 915 #endif 916 endfor_nexthops(fi); 917 continue; 918 919 default: 920 printk(KERN_WARNING "fib_semantic_match bad type %#x\n", 921 fa->fa_type); 922 return -EINVAL; 923 } 924 } 925 return err; 926 } 927 return 1; 928 929 out_fill_res: 930 res->prefixlen = prefixlen; 931 res->nh_sel = nh_sel; 932 res->type = fa->fa_type; 933 res->scope = fa->fa_scope; 934 res->fi = fa->fa_info; 935 atomic_inc(&res->fi->fib_clntref); 936 return 0; 937 } 938 939 /* Find appropriate source address to this destination */ 940 941 __be32 __fib_res_prefsrc(struct fib_result *res) 942 { 943 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 944 } 945 946 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 947 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 948 struct fib_info *fi, unsigned int flags) 949 { 950 struct nlmsghdr *nlh; 951 struct rtmsg *rtm; 952 953 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 954 if (nlh == NULL) 955 return -EMSGSIZE; 956 957 rtm = nlmsg_data(nlh); 958 rtm->rtm_family = AF_INET; 959 rtm->rtm_dst_len = dst_len; 960 rtm->rtm_src_len = 0; 961 rtm->rtm_tos = tos; 962 rtm->rtm_table = tb_id; 963 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 964 rtm->rtm_type = type; 965 rtm->rtm_flags = fi->fib_flags; 966 rtm->rtm_scope = scope; 967 rtm->rtm_protocol = fi->fib_protocol; 968 969 if (rtm->rtm_dst_len) 970 NLA_PUT_BE32(skb, RTA_DST, dst); 971 972 if (fi->fib_priority) 973 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 974 975 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 976 goto nla_put_failure; 977 978 if (fi->fib_prefsrc) 979 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 980 981 if (fi->fib_nhs == 1) { 982 if (fi->fib_nh->nh_gw) 983 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 984 985 if (fi->fib_nh->nh_oif) 986 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 987 #ifdef CONFIG_NET_CLS_ROUTE 988 if (fi->fib_nh[0].nh_tclassid) 989 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 990 #endif 991 } 992 #ifdef CONFIG_IP_ROUTE_MULTIPATH 993 if (fi->fib_nhs > 1) { 994 struct rtnexthop *rtnh; 995 struct nlattr *mp; 996 997 mp = nla_nest_start(skb, RTA_MULTIPATH); 998 if (mp == NULL) 999 goto nla_put_failure; 1000 1001 for_nexthops(fi) { 1002 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 1003 if (rtnh == NULL) 1004 goto nla_put_failure; 1005 1006 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1007 rtnh->rtnh_hops = nh->nh_weight - 1; 1008 rtnh->rtnh_ifindex = nh->nh_oif; 1009 1010 if (nh->nh_gw) 1011 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1012 #ifdef CONFIG_NET_CLS_ROUTE 1013 if (nh->nh_tclassid) 1014 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 1015 #endif 1016 /* length of rtnetlink header + attributes */ 1017 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 1018 } endfor_nexthops(fi); 1019 1020 nla_nest_end(skb, mp); 1021 } 1022 #endif 1023 return nlmsg_end(skb, nlh); 1024 1025 nla_put_failure: 1026 nlmsg_cancel(skb, nlh); 1027 return -EMSGSIZE; 1028 } 1029 1030 /* 1031 Update FIB if: 1032 - local address disappeared -> we must delete all the entries 1033 referring to it. 1034 - device went down -> we must shutdown all nexthops going via it. 1035 */ 1036 int fib_sync_down_addr(struct net *net, __be32 local) 1037 { 1038 int ret = 0; 1039 unsigned int hash = fib_laddr_hashfn(local); 1040 struct hlist_head *head = &fib_info_laddrhash[hash]; 1041 struct hlist_node *node; 1042 struct fib_info *fi; 1043 1044 if (fib_info_laddrhash == NULL || local == 0) 1045 return 0; 1046 1047 hlist_for_each_entry(fi, node, head, fib_lhash) { 1048 if (fi->fib_net != net) 1049 continue; 1050 if (fi->fib_prefsrc == local) { 1051 fi->fib_flags |= RTNH_F_DEAD; 1052 ret++; 1053 } 1054 } 1055 return ret; 1056 } 1057 1058 int fib_sync_down_dev(struct net_device *dev, int force) 1059 { 1060 int ret = 0; 1061 int scope = RT_SCOPE_NOWHERE; 1062 struct fib_info *prev_fi = NULL; 1063 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1064 struct hlist_head *head = &fib_info_devhash[hash]; 1065 struct hlist_node *node; 1066 struct fib_nh *nh; 1067 1068 if (force) 1069 scope = -1; 1070 1071 hlist_for_each_entry(nh, node, head, nh_hash) { 1072 struct fib_info *fi = nh->nh_parent; 1073 int dead; 1074 1075 BUG_ON(!fi->fib_nhs); 1076 if (nh->nh_dev != dev || fi == prev_fi) 1077 continue; 1078 prev_fi = fi; 1079 dead = 0; 1080 change_nexthops(fi) { 1081 if (nh->nh_flags&RTNH_F_DEAD) 1082 dead++; 1083 else if (nh->nh_dev == dev && 1084 nh->nh_scope != scope) { 1085 nh->nh_flags |= RTNH_F_DEAD; 1086 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1087 spin_lock_bh(&fib_multipath_lock); 1088 fi->fib_power -= nh->nh_power; 1089 nh->nh_power = 0; 1090 spin_unlock_bh(&fib_multipath_lock); 1091 #endif 1092 dead++; 1093 } 1094 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1095 if (force > 1 && nh->nh_dev == dev) { 1096 dead = fi->fib_nhs; 1097 break; 1098 } 1099 #endif 1100 } endfor_nexthops(fi) 1101 if (dead == fi->fib_nhs) { 1102 fi->fib_flags |= RTNH_F_DEAD; 1103 ret++; 1104 } 1105 } 1106 1107 return ret; 1108 } 1109 1110 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1111 1112 /* 1113 Dead device goes up. We wake up dead nexthops. 1114 It takes sense only on multipath routes. 1115 */ 1116 1117 int fib_sync_up(struct net_device *dev) 1118 { 1119 struct fib_info *prev_fi; 1120 unsigned int hash; 1121 struct hlist_head *head; 1122 struct hlist_node *node; 1123 struct fib_nh *nh; 1124 int ret; 1125 1126 if (!(dev->flags&IFF_UP)) 1127 return 0; 1128 1129 prev_fi = NULL; 1130 hash = fib_devindex_hashfn(dev->ifindex); 1131 head = &fib_info_devhash[hash]; 1132 ret = 0; 1133 1134 hlist_for_each_entry(nh, node, head, nh_hash) { 1135 struct fib_info *fi = nh->nh_parent; 1136 int alive; 1137 1138 BUG_ON(!fi->fib_nhs); 1139 if (nh->nh_dev != dev || fi == prev_fi) 1140 continue; 1141 1142 prev_fi = fi; 1143 alive = 0; 1144 change_nexthops(fi) { 1145 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1146 alive++; 1147 continue; 1148 } 1149 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 1150 continue; 1151 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) 1152 continue; 1153 alive++; 1154 spin_lock_bh(&fib_multipath_lock); 1155 nh->nh_power = 0; 1156 nh->nh_flags &= ~RTNH_F_DEAD; 1157 spin_unlock_bh(&fib_multipath_lock); 1158 } endfor_nexthops(fi) 1159 1160 if (alive > 0) { 1161 fi->fib_flags &= ~RTNH_F_DEAD; 1162 ret++; 1163 } 1164 } 1165 1166 return ret; 1167 } 1168 1169 /* 1170 The algorithm is suboptimal, but it provides really 1171 fair weighted route distribution. 1172 */ 1173 1174 void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1175 { 1176 struct fib_info *fi = res->fi; 1177 int w; 1178 1179 spin_lock_bh(&fib_multipath_lock); 1180 if (fi->fib_power <= 0) { 1181 int power = 0; 1182 change_nexthops(fi) { 1183 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1184 power += nh->nh_weight; 1185 nh->nh_power = nh->nh_weight; 1186 } 1187 } endfor_nexthops(fi); 1188 fi->fib_power = power; 1189 if (power <= 0) { 1190 spin_unlock_bh(&fib_multipath_lock); 1191 /* Race condition: route has just become dead. */ 1192 res->nh_sel = 0; 1193 return; 1194 } 1195 } 1196 1197 1198 /* w should be random number [0..fi->fib_power-1], 1199 it is pretty bad approximation. 1200 */ 1201 1202 w = jiffies % fi->fib_power; 1203 1204 change_nexthops(fi) { 1205 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 1206 if ((w -= nh->nh_power) <= 0) { 1207 nh->nh_power--; 1208 fi->fib_power--; 1209 res->nh_sel = nhsel; 1210 spin_unlock_bh(&fib_multipath_lock); 1211 return; 1212 } 1213 } 1214 } endfor_nexthops(fi); 1215 1216 /* Race condition: route has just become dead. */ 1217 res->nh_sel = 0; 1218 spin_unlock_bh(&fib_multipath_lock); 1219 } 1220 #endif 1221