1 /* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: semantics. 7 * 8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $ 9 * 10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 */ 17 18 #include <asm/uaccess.h> 19 #include <asm/system.h> 20 #include <linux/bitops.h> 21 #include <linux/types.h> 22 #include <linux/kernel.h> 23 #include <linux/jiffies.h> 24 #include <linux/mm.h> 25 #include <linux/string.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/errno.h> 29 #include <linux/in.h> 30 #include <linux/inet.h> 31 #include <linux/inetdevice.h> 32 #include <linux/netdevice.h> 33 #include <linux/if_arp.h> 34 #include <linux/proc_fs.h> 35 #include <linux/skbuff.h> 36 #include <linux/init.h> 37 38 #include <net/arp.h> 39 #include <net/ip.h> 40 #include <net/protocol.h> 41 #include <net/route.h> 42 #include <net/tcp.h> 43 #include <net/sock.h> 44 #include <net/ip_fib.h> 45 #include <net/netlink.h> 46 #include <net/nexthop.h> 47 48 #include "fib_lookup.h" 49 50 #define FSprintk(a...) 51 52 static DEFINE_SPINLOCK(fib_info_lock); 53 static struct hlist_head *fib_info_hash; 54 static struct hlist_head *fib_info_laddrhash; 55 static unsigned int fib_hash_size; 56 static unsigned int fib_info_cnt; 57 58 #define DEVINDEX_HASHBITS 8 59 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) 60 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; 61 62 #ifdef CONFIG_IP_ROUTE_MULTIPATH 63 64 static DEFINE_SPINLOCK(fib_multipath_lock); 65 66 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 67 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 68 69 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \ 70 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++) 71 72 #else /* CONFIG_IP_ROUTE_MULTIPATH */ 73 74 /* Hope, that gcc will optimize it to get rid of dummy loop */ 75 76 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \ 77 for (nhsel=0; nhsel < 1; nhsel++) 78 79 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \ 80 for (nhsel=0; nhsel < 1; nhsel++) 81 82 #endif /* CONFIG_IP_ROUTE_MULTIPATH */ 83 84 #define endfor_nexthops(fi) } 85 86 87 static const struct 88 { 89 int error; 90 u8 scope; 91 } fib_props[RTN_MAX + 1] = { 92 { 93 .error = 0, 94 .scope = RT_SCOPE_NOWHERE, 95 }, /* RTN_UNSPEC */ 96 { 97 .error = 0, 98 .scope = RT_SCOPE_UNIVERSE, 99 }, /* RTN_UNICAST */ 100 { 101 .error = 0, 102 .scope = RT_SCOPE_HOST, 103 }, /* RTN_LOCAL */ 104 { 105 .error = 0, 106 .scope = RT_SCOPE_LINK, 107 }, /* RTN_BROADCAST */ 108 { 109 .error = 0, 110 .scope = RT_SCOPE_LINK, 111 }, /* RTN_ANYCAST */ 112 { 113 .error = 0, 114 .scope = RT_SCOPE_UNIVERSE, 115 }, /* RTN_MULTICAST */ 116 { 117 .error = -EINVAL, 118 .scope = RT_SCOPE_UNIVERSE, 119 }, /* RTN_BLACKHOLE */ 120 { 121 .error = -EHOSTUNREACH, 122 .scope = RT_SCOPE_UNIVERSE, 123 }, /* RTN_UNREACHABLE */ 124 { 125 .error = -EACCES, 126 .scope = RT_SCOPE_UNIVERSE, 127 }, /* RTN_PROHIBIT */ 128 { 129 .error = -EAGAIN, 130 .scope = RT_SCOPE_UNIVERSE, 131 }, /* RTN_THROW */ 132 { 133 .error = -EINVAL, 134 .scope = RT_SCOPE_NOWHERE, 135 }, /* RTN_NAT */ 136 { 137 .error = -EINVAL, 138 .scope = RT_SCOPE_NOWHERE, 139 }, /* RTN_XRESOLVE */ 140 }; 141 142 143 /* Release a nexthop info record */ 144 145 void free_fib_info(struct fib_info *fi) 146 { 147 if (fi->fib_dead == 0) { 148 printk("Freeing alive fib_info %p\n", fi); 149 return; 150 } 151 change_nexthops(fi) { 152 if (nh->nh_dev) 153 dev_put(nh->nh_dev); 154 nh->nh_dev = NULL; 155 } endfor_nexthops(fi); 156 fib_info_cnt--; 157 kfree(fi); 158 } 159 160 void fib_release_info(struct fib_info *fi) 161 { 162 spin_lock_bh(&fib_info_lock); 163 if (fi && --fi->fib_treeref == 0) { 164 hlist_del(&fi->fib_hash); 165 if (fi->fib_prefsrc) 166 hlist_del(&fi->fib_lhash); 167 change_nexthops(fi) { 168 if (!nh->nh_dev) 169 continue; 170 hlist_del(&nh->nh_hash); 171 } endfor_nexthops(fi) 172 fi->fib_dead = 1; 173 fib_info_put(fi); 174 } 175 spin_unlock_bh(&fib_info_lock); 176 } 177 178 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 179 { 180 const struct fib_nh *onh = ofi->fib_nh; 181 182 for_nexthops(fi) { 183 if (nh->nh_oif != onh->nh_oif || 184 nh->nh_gw != onh->nh_gw || 185 nh->nh_scope != onh->nh_scope || 186 #ifdef CONFIG_IP_ROUTE_MULTIPATH 187 nh->nh_weight != onh->nh_weight || 188 #endif 189 #ifdef CONFIG_NET_CLS_ROUTE 190 nh->nh_tclassid != onh->nh_tclassid || 191 #endif 192 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 193 return -1; 194 onh++; 195 } endfor_nexthops(fi); 196 return 0; 197 } 198 199 static inline unsigned int fib_info_hashfn(const struct fib_info *fi) 200 { 201 unsigned int mask = (fib_hash_size - 1); 202 unsigned int val = fi->fib_nhs; 203 204 val ^= fi->fib_protocol; 205 val ^= (__force u32)fi->fib_prefsrc; 206 val ^= fi->fib_priority; 207 208 return (val ^ (val >> 7) ^ (val >> 12)) & mask; 209 } 210 211 static struct fib_info *fib_find_info(const struct fib_info *nfi) 212 { 213 struct hlist_head *head; 214 struct hlist_node *node; 215 struct fib_info *fi; 216 unsigned int hash; 217 218 hash = fib_info_hashfn(nfi); 219 head = &fib_info_hash[hash]; 220 221 hlist_for_each_entry(fi, node, head, fib_hash) { 222 if (fi->fib_nhs != nfi->fib_nhs) 223 continue; 224 if (nfi->fib_protocol == fi->fib_protocol && 225 nfi->fib_prefsrc == fi->fib_prefsrc && 226 nfi->fib_priority == fi->fib_priority && 227 memcmp(nfi->fib_metrics, fi->fib_metrics, 228 sizeof(fi->fib_metrics)) == 0 && 229 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 230 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 231 return fi; 232 } 233 234 return NULL; 235 } 236 237 static inline unsigned int fib_devindex_hashfn(unsigned int val) 238 { 239 unsigned int mask = DEVINDEX_HASHSIZE - 1; 240 241 return (val ^ 242 (val >> DEVINDEX_HASHBITS) ^ 243 (val >> (DEVINDEX_HASHBITS * 2))) & mask; 244 } 245 246 /* Check, that the gateway is already configured. 247 Used only by redirect accept routine. 248 */ 249 250 int ip_fib_check_default(__be32 gw, struct net_device *dev) 251 { 252 struct hlist_head *head; 253 struct hlist_node *node; 254 struct fib_nh *nh; 255 unsigned int hash; 256 257 spin_lock(&fib_info_lock); 258 259 hash = fib_devindex_hashfn(dev->ifindex); 260 head = &fib_info_devhash[hash]; 261 hlist_for_each_entry(nh, node, head, nh_hash) { 262 if (nh->nh_dev == dev && 263 nh->nh_gw == gw && 264 !(nh->nh_flags&RTNH_F_DEAD)) { 265 spin_unlock(&fib_info_lock); 266 return 0; 267 } 268 } 269 270 spin_unlock(&fib_info_lock); 271 272 return -1; 273 } 274 275 static inline size_t fib_nlmsg_size(struct fib_info *fi) 276 { 277 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) 278 + nla_total_size(4) /* RTA_TABLE */ 279 + nla_total_size(4) /* RTA_DST */ 280 + nla_total_size(4) /* RTA_PRIORITY */ 281 + nla_total_size(4); /* RTA_PREFSRC */ 282 283 /* space for nested metrics */ 284 payload += nla_total_size((RTAX_MAX * nla_total_size(4))); 285 286 if (fi->fib_nhs) { 287 /* Also handles the special case fib_nhs == 1 */ 288 289 /* each nexthop is packed in an attribute */ 290 size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); 291 292 /* may contain flow and gateway attribute */ 293 nhsize += 2 * nla_total_size(4); 294 295 /* all nexthops are packed in a nested attribute */ 296 payload += nla_total_size(fi->fib_nhs * nhsize); 297 } 298 299 return payload; 300 } 301 302 void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, 303 int dst_len, u32 tb_id, struct nl_info *info, 304 unsigned int nlm_flags) 305 { 306 struct sk_buff *skb; 307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 308 int err = -ENOBUFS; 309 310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); 311 if (skb == NULL) 312 goto errout; 313 314 err = fib_dump_info(skb, info->pid, seq, event, tb_id, 315 fa->fa_type, fa->fa_scope, key, dst_len, 316 fa->fa_tos, fa->fa_info, nlm_flags); 317 if (err < 0) { 318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ 319 WARN_ON(err == -EMSGSIZE); 320 kfree_skb(skb); 321 goto errout; 322 } 323 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, 324 info->nlh, GFP_KERNEL); 325 errout: 326 if (err < 0) 327 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); 328 } 329 330 /* Return the first fib alias matching TOS with 331 * priority less than or equal to PRIO. 332 */ 333 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio) 334 { 335 if (fah) { 336 struct fib_alias *fa; 337 list_for_each_entry(fa, fah, fa_list) { 338 if (fa->fa_tos > tos) 339 continue; 340 if (fa->fa_info->fib_priority >= prio || 341 fa->fa_tos < tos) 342 return fa; 343 } 344 } 345 return NULL; 346 } 347 348 int fib_detect_death(struct fib_info *fi, int order, 349 struct fib_info **last_resort, int *last_idx, int *dflt) 350 { 351 struct neighbour *n; 352 int state = NUD_NONE; 353 354 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); 355 if (n) { 356 state = n->nud_state; 357 neigh_release(n); 358 } 359 if (state==NUD_REACHABLE) 360 return 0; 361 if ((state&NUD_VALID) && order != *dflt) 362 return 0; 363 if ((state&NUD_VALID) || 364 (*last_idx<0 && order > *dflt)) { 365 *last_resort = fi; 366 *last_idx = order; 367 } 368 return 1; 369 } 370 371 #ifdef CONFIG_IP_ROUTE_MULTIPATH 372 373 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) 374 { 375 int nhs = 0; 376 377 while (rtnh_ok(rtnh, remaining)) { 378 nhs++; 379 rtnh = rtnh_next(rtnh, &remaining); 380 } 381 382 /* leftover implies invalid nexthop configuration, discard it */ 383 return remaining > 0 ? 0 : nhs; 384 } 385 386 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, 387 int remaining, struct fib_config *cfg) 388 { 389 change_nexthops(fi) { 390 int attrlen; 391 392 if (!rtnh_ok(rtnh, remaining)) 393 return -EINVAL; 394 395 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; 396 nh->nh_oif = rtnh->rtnh_ifindex; 397 nh->nh_weight = rtnh->rtnh_hops + 1; 398 399 attrlen = rtnh_attrlen(rtnh); 400 if (attrlen > 0) { 401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 402 403 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 404 nh->nh_gw = nla ? nla_get_be32(nla) : 0; 405 #ifdef CONFIG_NET_CLS_ROUTE 406 nla = nla_find(attrs, attrlen, RTA_FLOW); 407 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; 408 #endif 409 } 410 411 rtnh = rtnh_next(rtnh, &remaining); 412 } endfor_nexthops(fi); 413 414 return 0; 415 } 416 417 #endif 418 419 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) 420 { 421 #ifdef CONFIG_IP_ROUTE_MULTIPATH 422 struct rtnexthop *rtnh; 423 int remaining; 424 #endif 425 426 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) 427 return 1; 428 429 if (cfg->fc_oif || cfg->fc_gw) { 430 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && 431 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) 432 return 0; 433 return 1; 434 } 435 436 #ifdef CONFIG_IP_ROUTE_MULTIPATH 437 if (cfg->fc_mp == NULL) 438 return 0; 439 440 rtnh = cfg->fc_mp; 441 remaining = cfg->fc_mp_len; 442 443 for_nexthops(fi) { 444 int attrlen; 445 446 if (!rtnh_ok(rtnh, remaining)) 447 return -EINVAL; 448 449 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) 450 return 1; 451 452 attrlen = rtnh_attrlen(rtnh); 453 if (attrlen < 0) { 454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 455 456 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 457 if (nla && nla_get_be32(nla) != nh->nh_gw) 458 return 1; 459 #ifdef CONFIG_NET_CLS_ROUTE 460 nla = nla_find(attrs, attrlen, RTA_FLOW); 461 if (nla && nla_get_u32(nla) != nh->nh_tclassid) 462 return 1; 463 #endif 464 } 465 466 rtnh = rtnh_next(rtnh, &remaining); 467 } endfor_nexthops(fi); 468 #endif 469 return 0; 470 } 471 472 473 /* 474 Picture 475 ------- 476 477 Semantics of nexthop is very messy by historical reasons. 478 We have to take into account, that: 479 a) gateway can be actually local interface address, 480 so that gatewayed route is direct. 481 b) gateway must be on-link address, possibly 482 described not by an ifaddr, but also by a direct route. 483 c) If both gateway and interface are specified, they should not 484 contradict. 485 d) If we use tunnel routes, gateway could be not on-link. 486 487 Attempt to reconcile all of these (alas, self-contradictory) conditions 488 results in pretty ugly and hairy code with obscure logic. 489 490 I chose to generalized it instead, so that the size 491 of code does not increase practically, but it becomes 492 much more general. 493 Every prefix is assigned a "scope" value: "host" is local address, 494 "link" is direct route, 495 [ ... "site" ... "interior" ... ] 496 and "universe" is true gateway route with global meaning. 497 498 Every prefix refers to a set of "nexthop"s (gw, oif), 499 where gw must have narrower scope. This recursion stops 500 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 501 which means that gw is forced to be on link. 502 503 Code is still hairy, but now it is apparently logically 504 consistent and very flexible. F.e. as by-product it allows 505 to co-exists in peace independent exterior and interior 506 routing processes. 507 508 Normally it looks as following. 509 510 {universe prefix} -> (gw, oif) [scope link] 511 | 512 |-> {link prefix} -> (gw, oif) [scope local] 513 | 514 |-> {local prefix} (terminal node) 515 */ 516 517 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 518 struct fib_nh *nh) 519 { 520 int err; 521 522 if (nh->nh_gw) { 523 struct fib_result res; 524 525 #ifdef CONFIG_IP_ROUTE_PERVASIVE 526 if (nh->nh_flags&RTNH_F_PERVASIVE) 527 return 0; 528 #endif 529 if (nh->nh_flags&RTNH_F_ONLINK) { 530 struct net_device *dev; 531 532 if (cfg->fc_scope >= RT_SCOPE_LINK) 533 return -EINVAL; 534 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) 535 return -EINVAL; 536 if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL) 537 return -ENODEV; 538 if (!(dev->flags&IFF_UP)) 539 return -ENETDOWN; 540 nh->nh_dev = dev; 541 dev_hold(dev); 542 nh->nh_scope = RT_SCOPE_LINK; 543 return 0; 544 } 545 { 546 struct flowi fl = { 547 .nl_u = { 548 .ip4_u = { 549 .daddr = nh->nh_gw, 550 .scope = cfg->fc_scope + 1, 551 }, 552 }, 553 .oif = nh->nh_oif, 554 }; 555 556 /* It is not necessary, but requires a bit of thinking */ 557 if (fl.fl4_scope < RT_SCOPE_LINK) 558 fl.fl4_scope = RT_SCOPE_LINK; 559 if ((err = fib_lookup(&fl, &res)) != 0) 560 return err; 561 } 562 err = -EINVAL; 563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 564 goto out; 565 nh->nh_scope = res.scope; 566 nh->nh_oif = FIB_RES_OIF(res); 567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 568 goto out; 569 dev_hold(nh->nh_dev); 570 err = -ENETDOWN; 571 if (!(nh->nh_dev->flags & IFF_UP)) 572 goto out; 573 err = 0; 574 out: 575 fib_res_put(&res); 576 return err; 577 } else { 578 struct in_device *in_dev; 579 580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 581 return -EINVAL; 582 583 in_dev = inetdev_by_index(nh->nh_oif); 584 if (in_dev == NULL) 585 return -ENODEV; 586 if (!(in_dev->dev->flags&IFF_UP)) { 587 in_dev_put(in_dev); 588 return -ENETDOWN; 589 } 590 nh->nh_dev = in_dev->dev; 591 dev_hold(nh->nh_dev); 592 nh->nh_scope = RT_SCOPE_HOST; 593 in_dev_put(in_dev); 594 } 595 return 0; 596 } 597 598 static inline unsigned int fib_laddr_hashfn(__be32 val) 599 { 600 unsigned int mask = (fib_hash_size - 1); 601 602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 603 } 604 605 static struct hlist_head *fib_hash_alloc(int bytes) 606 { 607 if (bytes <= PAGE_SIZE) 608 return kmalloc(bytes, GFP_KERNEL); 609 else 610 return (struct hlist_head *) 611 __get_free_pages(GFP_KERNEL, get_order(bytes)); 612 } 613 614 static void fib_hash_free(struct hlist_head *hash, int bytes) 615 { 616 if (!hash) 617 return; 618 619 if (bytes <= PAGE_SIZE) 620 kfree(hash); 621 else 622 free_pages((unsigned long) hash, get_order(bytes)); 623 } 624 625 static void fib_hash_move(struct hlist_head *new_info_hash, 626 struct hlist_head *new_laddrhash, 627 unsigned int new_size) 628 { 629 struct hlist_head *old_info_hash, *old_laddrhash; 630 unsigned int old_size = fib_hash_size; 631 unsigned int i, bytes; 632 633 spin_lock_bh(&fib_info_lock); 634 old_info_hash = fib_info_hash; 635 old_laddrhash = fib_info_laddrhash; 636 fib_hash_size = new_size; 637 638 for (i = 0; i < old_size; i++) { 639 struct hlist_head *head = &fib_info_hash[i]; 640 struct hlist_node *node, *n; 641 struct fib_info *fi; 642 643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) { 644 struct hlist_head *dest; 645 unsigned int new_hash; 646 647 hlist_del(&fi->fib_hash); 648 649 new_hash = fib_info_hashfn(fi); 650 dest = &new_info_hash[new_hash]; 651 hlist_add_head(&fi->fib_hash, dest); 652 } 653 } 654 fib_info_hash = new_info_hash; 655 656 for (i = 0; i < old_size; i++) { 657 struct hlist_head *lhead = &fib_info_laddrhash[i]; 658 struct hlist_node *node, *n; 659 struct fib_info *fi; 660 661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) { 662 struct hlist_head *ldest; 663 unsigned int new_hash; 664 665 hlist_del(&fi->fib_lhash); 666 667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc); 668 ldest = &new_laddrhash[new_hash]; 669 hlist_add_head(&fi->fib_lhash, ldest); 670 } 671 } 672 fib_info_laddrhash = new_laddrhash; 673 674 spin_unlock_bh(&fib_info_lock); 675 676 bytes = old_size * sizeof(struct hlist_head *); 677 fib_hash_free(old_info_hash, bytes); 678 fib_hash_free(old_laddrhash, bytes); 679 } 680 681 struct fib_info *fib_create_info(struct fib_config *cfg) 682 { 683 int err; 684 struct fib_info *fi = NULL; 685 struct fib_info *ofi; 686 int nhs = 1; 687 688 /* Fast check to catch the most weird cases */ 689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope) 690 goto err_inval; 691 692 #ifdef CONFIG_IP_ROUTE_MULTIPATH 693 if (cfg->fc_mp) { 694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); 695 if (nhs == 0) 696 goto err_inval; 697 } 698 #endif 699 700 err = -ENOBUFS; 701 if (fib_info_cnt >= fib_hash_size) { 702 unsigned int new_size = fib_hash_size << 1; 703 struct hlist_head *new_info_hash; 704 struct hlist_head *new_laddrhash; 705 unsigned int bytes; 706 707 if (!new_size) 708 new_size = 1; 709 bytes = new_size * sizeof(struct hlist_head *); 710 new_info_hash = fib_hash_alloc(bytes); 711 new_laddrhash = fib_hash_alloc(bytes); 712 if (!new_info_hash || !new_laddrhash) { 713 fib_hash_free(new_info_hash, bytes); 714 fib_hash_free(new_laddrhash, bytes); 715 } else { 716 memset(new_info_hash, 0, bytes); 717 memset(new_laddrhash, 0, bytes); 718 719 fib_hash_move(new_info_hash, new_laddrhash, new_size); 720 } 721 722 if (!fib_hash_size) 723 goto failure; 724 } 725 726 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); 727 if (fi == NULL) 728 goto failure; 729 fib_info_cnt++; 730 731 fi->fib_protocol = cfg->fc_protocol; 732 fi->fib_flags = cfg->fc_flags; 733 fi->fib_priority = cfg->fc_priority; 734 fi->fib_prefsrc = cfg->fc_prefsrc; 735 736 fi->fib_nhs = nhs; 737 change_nexthops(fi) { 738 nh->nh_parent = fi; 739 } endfor_nexthops(fi) 740 741 if (cfg->fc_mx) { 742 struct nlattr *nla; 743 int remaining; 744 745 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 746 int type = nla_type(nla); 747 748 if (type) { 749 if (type > RTAX_MAX) 750 goto err_inval; 751 fi->fib_metrics[type - 1] = nla_get_u32(nla); 752 } 753 } 754 } 755 756 if (cfg->fc_mp) { 757 #ifdef CONFIG_IP_ROUTE_MULTIPATH 758 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); 759 if (err != 0) 760 goto failure; 761 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) 762 goto err_inval; 763 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) 764 goto err_inval; 765 #ifdef CONFIG_NET_CLS_ROUTE 766 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) 767 goto err_inval; 768 #endif 769 #else 770 goto err_inval; 771 #endif 772 } else { 773 struct fib_nh *nh = fi->fib_nh; 774 775 nh->nh_oif = cfg->fc_oif; 776 nh->nh_gw = cfg->fc_gw; 777 nh->nh_flags = cfg->fc_flags; 778 #ifdef CONFIG_NET_CLS_ROUTE 779 nh->nh_tclassid = cfg->fc_flow; 780 #endif 781 #ifdef CONFIG_IP_ROUTE_MULTIPATH 782 nh->nh_weight = 1; 783 #endif 784 } 785 786 if (fib_props[cfg->fc_type].error) { 787 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) 788 goto err_inval; 789 goto link_it; 790 } 791 792 if (cfg->fc_scope > RT_SCOPE_HOST) 793 goto err_inval; 794 795 if (cfg->fc_scope == RT_SCOPE_HOST) { 796 struct fib_nh *nh = fi->fib_nh; 797 798 /* Local address is added. */ 799 if (nhs != 1 || nh->nh_gw) 800 goto err_inval; 801 nh->nh_scope = RT_SCOPE_NOWHERE; 802 nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif); 803 err = -ENODEV; 804 if (nh->nh_dev == NULL) 805 goto failure; 806 } else { 807 change_nexthops(fi) { 808 if ((err = fib_check_nh(cfg, fi, nh)) != 0) 809 goto failure; 810 } endfor_nexthops(fi) 811 } 812 813 if (fi->fib_prefsrc) { 814 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || 815 fi->fib_prefsrc != cfg->fc_dst) 816 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) 817 goto err_inval; 818 } 819 820 link_it: 821 if ((ofi = fib_find_info(fi)) != NULL) { 822 fi->fib_dead = 1; 823 free_fib_info(fi); 824 ofi->fib_treeref++; 825 return ofi; 826 } 827 828 fi->fib_treeref++; 829 atomic_inc(&fi->fib_clntref); 830 spin_lock_bh(&fib_info_lock); 831 hlist_add_head(&fi->fib_hash, 832 &fib_info_hash[fib_info_hashfn(fi)]); 833 if (fi->fib_prefsrc) { 834 struct hlist_head *head; 835 836 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)]; 837 hlist_add_head(&fi->fib_lhash, head); 838 } 839 change_nexthops(fi) { 840 struct hlist_head *head; 841 unsigned int hash; 842 843 if (!nh->nh_dev) 844 continue; 845 hash = fib_devindex_hashfn(nh->nh_dev->ifindex); 846 head = &fib_info_devhash[hash]; 847 hlist_add_head(&nh->nh_hash, head); 848 } endfor_nexthops(fi) 849 spin_unlock_bh(&fib_info_lock); 850 return fi; 851 852 err_inval: 853 err = -EINVAL; 854 855 failure: 856 if (fi) { 857 fi->fib_dead = 1; 858 free_fib_info(fi); 859 } 860 861 return ERR_PTR(err); 862 } 863 864 /* Note! fib_semantic_match intentionally uses RCU list functions. */ 865 int fib_semantic_match(struct list_head *head, const struct flowi *flp, 866 struct fib_result *res, __be32 zone, __be32 mask, 867 int prefixlen) 868 { 869 struct fib_alias *fa; 870 int nh_sel = 0; 871 872 list_for_each_entry_rcu(fa, head, fa_list) { 873 int err; 874 875 if (fa->fa_tos && 876 fa->fa_tos != flp->fl4_tos) 877 continue; 878 879 if (fa->fa_scope < flp->fl4_scope) 880 continue; 881 882 fa->fa_state |= FA_S_ACCESSED; 883 884 err = fib_props[fa->fa_type].error; 885 if (err == 0) { 886 struct fib_info *fi = fa->fa_info; 887 888 if (fi->fib_flags & RTNH_F_DEAD) 889 continue; 890 891 switch (fa->fa_type) { 892 case RTN_UNICAST: 893 case RTN_LOCAL: 894 case RTN_BROADCAST: 895 case RTN_ANYCAST: 896 case RTN_MULTICAST: 897 for_nexthops(fi) { 898 if (nh->nh_flags&RTNH_F_DEAD) 899 continue; 900 if (!flp->oif || flp->oif == nh->nh_oif) 901 break; 902 } 903 #ifdef CONFIG_IP_ROUTE_MULTIPATH 904 if (nhsel < fi->fib_nhs) { 905 nh_sel = nhsel; 906 goto out_fill_res; 907 } 908 #else 909 if (nhsel < 1) { 910 goto out_fill_res; 911 } 912 #endif 913 endfor_nexthops(fi); 914 continue; 915 916 default: 917 printk(KERN_DEBUG "impossible 102\n"); 918 return -EINVAL; 919 } 920 } 921 return err; 922 } 923 return 1; 924 925 out_fill_res: 926 res->prefixlen = prefixlen; 927 res->nh_sel = nh_sel; 928 res->type = fa->fa_type; 929 res->scope = fa->fa_scope; 930 res->fi = fa->fa_info; 931 atomic_inc(&res->fi->fib_clntref); 932 return 0; 933 } 934 935 /* Find appropriate source address to this destination */ 936 937 __be32 __fib_res_prefsrc(struct fib_result *res) 938 { 939 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); 940 } 941 942 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 943 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, 944 struct fib_info *fi, unsigned int flags) 945 { 946 struct nlmsghdr *nlh; 947 struct rtmsg *rtm; 948 949 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); 950 if (nlh == NULL) 951 return -EMSGSIZE; 952 953 rtm = nlmsg_data(nlh); 954 rtm->rtm_family = AF_INET; 955 rtm->rtm_dst_len = dst_len; 956 rtm->rtm_src_len = 0; 957 rtm->rtm_tos = tos; 958 rtm->rtm_table = tb_id; 959 NLA_PUT_U32(skb, RTA_TABLE, tb_id); 960 rtm->rtm_type = type; 961 rtm->rtm_flags = fi->fib_flags; 962 rtm->rtm_scope = scope; 963 rtm->rtm_protocol = fi->fib_protocol; 964 965 if (rtm->rtm_dst_len) 966 NLA_PUT_BE32(skb, RTA_DST, dst); 967 968 if (fi->fib_priority) 969 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); 970 971 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) 972 goto nla_put_failure; 973 974 if (fi->fib_prefsrc) 975 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc); 976 977 if (fi->fib_nhs == 1) { 978 if (fi->fib_nh->nh_gw) 979 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); 980 981 if (fi->fib_nh->nh_oif) 982 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); 983 #ifdef CONFIG_NET_CLS_ROUTE 984 if (fi->fib_nh[0].nh_tclassid) 985 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); 986 #endif 987 } 988 #ifdef CONFIG_IP_ROUTE_MULTIPATH 989 if (fi->fib_nhs > 1) { 990 struct rtnexthop *rtnh; 991 struct nlattr *mp; 992 993 mp = nla_nest_start(skb, RTA_MULTIPATH); 994 if (mp == NULL) 995 goto nla_put_failure; 996 997 for_nexthops(fi) { 998 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 999 if (rtnh == NULL) 1000 goto nla_put_failure; 1001 1002 rtnh->rtnh_flags = nh->nh_flags & 0xFF; 1003 rtnh->rtnh_hops = nh->nh_weight - 1; 1004 rtnh->rtnh_ifindex = nh->nh_oif; 1005 1006 if (nh->nh_gw) 1007 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw); 1008 #ifdef CONFIG_NET_CLS_ROUTE 1009 if (nh->nh_tclassid) 1010 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); 1011 #endif 1012 /* length of rtnetlink header + attributes */ 1013 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; 1014 } endfor_nexthops(fi); 1015 1016 nla_nest_end(skb, mp); 1017 } 1018 #endif 1019 return nlmsg_end(skb, nlh); 1020 1021 nla_put_failure: 1022 nlmsg_cancel(skb, nlh); 1023 return -EMSGSIZE; 1024 } 1025 1026 /* 1027 Update FIB if: 1028 - local address disappeared -> we must delete all the entries 1029 referring to it. 1030 - device went down -> we must shutdown all nexthops going via it. 1031 */ 1032 1033 int fib_sync_down(__be32 local, struct net_device *dev, int force) 1034 { 1035 int ret = 0; 1036 int scope = RT_SCOPE_NOWHERE; 1037 1038 if (force) 1039 scope = -1; 1040 1041 if (local && fib_info_laddrhash) { 1042 unsigned int hash = fib_laddr_hashfn(local); 1043 struct hlist_head *head = &fib_info_laddrhash[hash]; 1044 struct hlist_node *node; 1045 struct fib_info *fi; 1046 1047 hlist_for_each_entry(fi, node, head, fib_lhash) { 1048 if (fi->fib_prefsrc == local) { 1049 fi->fib_flags |= RTNH_F_DEAD; 1050 ret++; 1051 } 1052 } 1053 } 1054 1055 if (dev) { 1056 struct fib_info *prev_fi = NULL; 1057 unsigned int hash = fib_devindex_hashfn(dev->ifindex); 1058 struct hlist_head *head = &fib_info_devhash[hash]; 1059 struct hlist_node *node; 1060 struct fib_nh *nh; 1061 1062 hlist_for_each_entry(nh, node, head, nh_hash) { 1063 struct fib_info *fi = nh->nh_parent; 1064 int dead; 1065 1066 BUG_ON(!fi->fib_nhs); 1067 if (nh->nh_dev != dev || fi == prev_fi) 1068 continue; 1069 prev_fi = fi; 1070 dead = 0; 1071 change_nexthops(fi) { 1072 if (nh->nh_flags&RTNH_F_DEAD) 1073 dead++; 1074 else if (nh->nh_dev == dev && 1075 nh->nh_scope != scope) { 1076 nh->nh_flags |= RTNH_F_DEAD; 1077 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1078 spin_lock_bh(&fib_multipath_lock); 1079 fi->fib_power -= nh->nh_power; 1080 nh->nh_power = 0; 1081 spin_unlock_bh(&fib_multipath_lock); 1082 #endif 1083 dead++; 1084 } 1085 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1086 if (force > 1 && nh->nh_dev == dev) { 1087 dead = fi->fib_nhs; 1088 break; 1089 } 1090 #endif 1091 } endfor_nexthops(fi) 1092 if (dead == fi->fib_nhs) { 1093 fi->fib_flags |= RTNH_F_DEAD; 1094 ret++; 1095 } 1096 } 1097 } 1098 1099 return ret; 1100 } 1101 1102 #ifdef CONFIG_IP_ROUTE_MULTIPATH 1103 1104 /* 1105 Dead device goes up. We wake up dead nexthops. 1106 It takes sense only on multipath routes. 1107 */ 1108 1109 int fib_sync_up(struct net_device *dev) 1110 { 1111 struct fib_info *prev_fi; 1112 unsigned int hash; 1113 struct hlist_head *head; 1114 struct hlist_node *node; 1115 struct fib_nh *nh; 1116 int ret; 1117 1118 if (!(dev->flags&IFF_UP)) 1119 return 0; 1120 1121 prev_fi = NULL; 1122 hash = fib_devindex_hashfn(dev->ifindex); 1123 head = &fib_info_devhash[hash]; 1124 ret = 0; 1125 1126 hlist_for_each_entry(nh, node, head, nh_hash) { 1127 struct fib_info *fi = nh->nh_parent; 1128 int alive; 1129 1130 BUG_ON(!fi->fib_nhs); 1131 if (nh->nh_dev != dev || fi == prev_fi) 1132 continue; 1133 1134 prev_fi = fi; 1135 alive = 0; 1136 change_nexthops(fi) { 1137 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1138 alive++; 1139 continue; 1140 } 1141 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) 1142 continue; 1143 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) 1144 continue; 1145 alive++; 1146 spin_lock_bh(&fib_multipath_lock); 1147 nh->nh_power = 0; 1148 nh->nh_flags &= ~RTNH_F_DEAD; 1149 spin_unlock_bh(&fib_multipath_lock); 1150 } endfor_nexthops(fi) 1151 1152 if (alive > 0) { 1153 fi->fib_flags &= ~RTNH_F_DEAD; 1154 ret++; 1155 } 1156 } 1157 1158 return ret; 1159 } 1160 1161 /* 1162 The algorithm is suboptimal, but it provides really 1163 fair weighted route distribution. 1164 */ 1165 1166 void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1167 { 1168 struct fib_info *fi = res->fi; 1169 int w; 1170 1171 spin_lock_bh(&fib_multipath_lock); 1172 if (fi->fib_power <= 0) { 1173 int power = 0; 1174 change_nexthops(fi) { 1175 if (!(nh->nh_flags&RTNH_F_DEAD)) { 1176 power += nh->nh_weight; 1177 nh->nh_power = nh->nh_weight; 1178 } 1179 } endfor_nexthops(fi); 1180 fi->fib_power = power; 1181 if (power <= 0) { 1182 spin_unlock_bh(&fib_multipath_lock); 1183 /* Race condition: route has just become dead. */ 1184 res->nh_sel = 0; 1185 return; 1186 } 1187 } 1188 1189 1190 /* w should be random number [0..fi->fib_power-1], 1191 it is pretty bad approximation. 1192 */ 1193 1194 w = jiffies % fi->fib_power; 1195 1196 change_nexthops(fi) { 1197 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { 1198 if ((w -= nh->nh_power) <= 0) { 1199 nh->nh_power--; 1200 fi->fib_power--; 1201 res->nh_sel = nhsel; 1202 spin_unlock_bh(&fib_multipath_lock); 1203 return; 1204 } 1205 } 1206 } endfor_nexthops(fi); 1207 1208 /* Race condition: route has just become dead. */ 1209 res->nh_sel = 0; 1210 spin_unlock_bh(&fib_multipath_lock); 1211 } 1212 #endif 1213