1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 /* Changes: 17 * 18 * YOSHIFUJI Hideaki @USAGI 19 * reworked default router selection. 20 * - respect outgoing interface 21 * - select from (probably) reachable routers (i.e. 22 * routers in REACHABLE, STALE, DELAY or PROBE states). 23 * - always select the same router if it is (probably) 24 * reachable. otherwise, round-robin the list. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/init.h> 38 #include <linux/if_arp.h> 39 40 #ifdef CONFIG_PROC_FS 41 #include <linux/proc_fs.h> 42 #include <linux/seq_file.h> 43 #endif 44 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 #define CLONE_OFFLINK_ROUTE 0 76 77 #define RT6_SELECT_F_IFACE 0x1 78 #define RT6_SELECT_F_REACHABLE 0x2 79 80 static int ip6_rt_max_size = 4096; 81 static int ip6_rt_gc_min_interval = HZ / 2; 82 static int ip6_rt_gc_timeout = 60*HZ; 83 int ip6_rt_gc_interval = 30*HZ; 84 static int ip6_rt_gc_elasticity = 9; 85 static int ip6_rt_mtu_expires = 10*60*HZ; 86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 87 88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 90 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 91 static void ip6_dst_destroy(struct dst_entry *); 92 static void ip6_dst_ifdown(struct dst_entry *, 93 struct net_device *dev, int how); 94 static int ip6_dst_gc(void); 95 96 static int ip6_pkt_discard(struct sk_buff *skb); 97 static int ip6_pkt_discard_out(struct sk_buff *skb); 98 static void ip6_link_failure(struct sk_buff *skb); 99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 100 101 #ifdef CONFIG_IPV6_ROUTE_INFO 102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 103 struct in6_addr *gwaddr, int ifindex, 104 unsigned pref); 105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 106 struct in6_addr *gwaddr, int ifindex); 107 #endif 108 109 static struct dst_ops ip6_dst_ops = { 110 .family = AF_INET6, 111 .protocol = __constant_htons(ETH_P_IPV6), 112 .gc = ip6_dst_gc, 113 .gc_thresh = 1024, 114 .check = ip6_dst_check, 115 .destroy = ip6_dst_destroy, 116 .ifdown = ip6_dst_ifdown, 117 .negative_advice = ip6_negative_advice, 118 .link_failure = ip6_link_failure, 119 .update_pmtu = ip6_rt_update_pmtu, 120 .entry_size = sizeof(struct rt6_info), 121 }; 122 123 struct rt6_info ip6_null_entry = { 124 .u = { 125 .dst = { 126 .__refcnt = ATOMIC_INIT(1), 127 .__use = 1, 128 .dev = &loopback_dev, 129 .obsolete = -1, 130 .error = -ENETUNREACH, 131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 132 .input = ip6_pkt_discard, 133 .output = ip6_pkt_discard_out, 134 .ops = &ip6_dst_ops, 135 .path = (struct dst_entry*)&ip6_null_entry, 136 } 137 }, 138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 139 .rt6i_metric = ~(u32) 0, 140 .rt6i_ref = ATOMIC_INIT(1), 141 }; 142 143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 144 145 struct rt6_info ip6_prohibit_entry = { 146 .u = { 147 .dst = { 148 .__refcnt = ATOMIC_INIT(1), 149 .__use = 1, 150 .dev = &loopback_dev, 151 .obsolete = -1, 152 .error = -EACCES, 153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 154 .input = ip6_pkt_discard, 155 .output = ip6_pkt_discard_out, 156 .ops = &ip6_dst_ops, 157 .path = (struct dst_entry*)&ip6_prohibit_entry, 158 } 159 }, 160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 161 .rt6i_metric = ~(u32) 0, 162 .rt6i_ref = ATOMIC_INIT(1), 163 }; 164 165 struct rt6_info ip6_blk_hole_entry = { 166 .u = { 167 .dst = { 168 .__refcnt = ATOMIC_INIT(1), 169 .__use = 1, 170 .dev = &loopback_dev, 171 .obsolete = -1, 172 .error = -EINVAL, 173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 174 .input = ip6_pkt_discard, 175 .output = ip6_pkt_discard_out, 176 .ops = &ip6_dst_ops, 177 .path = (struct dst_entry*)&ip6_blk_hole_entry, 178 } 179 }, 180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 181 .rt6i_metric = ~(u32) 0, 182 .rt6i_ref = ATOMIC_INIT(1), 183 }; 184 185 #endif 186 187 /* allocate dst with ip6_dst_ops */ 188 static __inline__ struct rt6_info *ip6_dst_alloc(void) 189 { 190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops); 191 } 192 193 static void ip6_dst_destroy(struct dst_entry *dst) 194 { 195 struct rt6_info *rt = (struct rt6_info *)dst; 196 struct inet6_dev *idev = rt->rt6i_idev; 197 198 if (idev != NULL) { 199 rt->rt6i_idev = NULL; 200 in6_dev_put(idev); 201 } 202 } 203 204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 205 int how) 206 { 207 struct rt6_info *rt = (struct rt6_info *)dst; 208 struct inet6_dev *idev = rt->rt6i_idev; 209 210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { 211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); 212 if (loopback_idev != NULL) { 213 rt->rt6i_idev = loopback_idev; 214 in6_dev_put(idev); 215 } 216 } 217 } 218 219 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 220 { 221 return (rt->rt6i_flags & RTF_EXPIRES && 222 time_after(jiffies, rt->rt6i_expires)); 223 } 224 225 static inline int rt6_need_strict(struct in6_addr *daddr) 226 { 227 return (ipv6_addr_type(daddr) & 228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); 229 } 230 231 /* 232 * Route lookup. Any table->tb6_lock is implied. 233 */ 234 235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 236 int oif, 237 int strict) 238 { 239 struct rt6_info *local = NULL; 240 struct rt6_info *sprt; 241 242 if (oif) { 243 for (sprt = rt; sprt; sprt = sprt->u.next) { 244 struct net_device *dev = sprt->rt6i_dev; 245 if (dev->ifindex == oif) 246 return sprt; 247 if (dev->flags & IFF_LOOPBACK) { 248 if (sprt->rt6i_idev == NULL || 249 sprt->rt6i_idev->dev->ifindex != oif) { 250 if (strict && oif) 251 continue; 252 if (local && (!oif || 253 local->rt6i_idev->dev->ifindex == oif)) 254 continue; 255 } 256 local = sprt; 257 } 258 } 259 260 if (local) 261 return local; 262 263 if (strict) 264 return &ip6_null_entry; 265 } 266 return rt; 267 } 268 269 #ifdef CONFIG_IPV6_ROUTER_PREF 270 static void rt6_probe(struct rt6_info *rt) 271 { 272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 273 /* 274 * Okay, this does not seem to be appropriate 275 * for now, however, we need to check if it 276 * is really so; aka Router Reachability Probing. 277 * 278 * Router Reachability Probe MUST be rate-limited 279 * to no more than one per minute. 280 */ 281 if (!neigh || (neigh->nud_state & NUD_VALID)) 282 return; 283 read_lock_bh(&neigh->lock); 284 if (!(neigh->nud_state & NUD_VALID) && 285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 286 struct in6_addr mcaddr; 287 struct in6_addr *target; 288 289 neigh->updated = jiffies; 290 read_unlock_bh(&neigh->lock); 291 292 target = (struct in6_addr *)&neigh->primary_key; 293 addrconf_addr_solict_mult(target, &mcaddr); 294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 295 } else 296 read_unlock_bh(&neigh->lock); 297 } 298 #else 299 static inline void rt6_probe(struct rt6_info *rt) 300 { 301 return; 302 } 303 #endif 304 305 /* 306 * Default Router Selection (RFC 2461 6.3.6) 307 */ 308 static int inline rt6_check_dev(struct rt6_info *rt, int oif) 309 { 310 struct net_device *dev = rt->rt6i_dev; 311 if (!oif || dev->ifindex == oif) 312 return 2; 313 if ((dev->flags & IFF_LOOPBACK) && 314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 315 return 1; 316 return 0; 317 } 318 319 static int inline rt6_check_neigh(struct rt6_info *rt) 320 { 321 struct neighbour *neigh = rt->rt6i_nexthop; 322 int m = 0; 323 if (rt->rt6i_flags & RTF_NONEXTHOP || 324 !(rt->rt6i_flags & RTF_GATEWAY)) 325 m = 1; 326 else if (neigh) { 327 read_lock_bh(&neigh->lock); 328 if (neigh->nud_state & NUD_VALID) 329 m = 2; 330 read_unlock_bh(&neigh->lock); 331 } 332 return m; 333 } 334 335 static int rt6_score_route(struct rt6_info *rt, int oif, 336 int strict) 337 { 338 int m, n; 339 340 m = rt6_check_dev(rt, oif); 341 if (!m && (strict & RT6_SELECT_F_IFACE)) 342 return -1; 343 #ifdef CONFIG_IPV6_ROUTER_PREF 344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 345 #endif 346 n = rt6_check_neigh(rt); 347 if (n > 1) 348 m |= 16; 349 else if (!n && strict & RT6_SELECT_F_REACHABLE) 350 return -1; 351 return m; 352 } 353 354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif, 355 int strict) 356 { 357 struct rt6_info *match = NULL, *last = NULL; 358 struct rt6_info *rt, *rt0 = *head; 359 u32 metric; 360 int mpri = -1; 361 362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n", 363 __FUNCTION__, head, head ? *head : NULL, oif); 364 365 for (rt = rt0, metric = rt0->rt6i_metric; 366 rt && rt->rt6i_metric == metric && (!last || rt != rt0); 367 rt = rt->u.next) { 368 int m; 369 370 if (rt6_check_expired(rt)) 371 continue; 372 373 last = rt; 374 375 m = rt6_score_route(rt, oif, strict); 376 if (m < 0) 377 continue; 378 379 if (m > mpri) { 380 rt6_probe(match); 381 match = rt; 382 mpri = m; 383 } else { 384 rt6_probe(rt); 385 } 386 } 387 388 if (!match && 389 (strict & RT6_SELECT_F_REACHABLE) && 390 last && last != rt0) { 391 /* no entries matched; do round-robin */ 392 static DEFINE_SPINLOCK(lock); 393 spin_lock(&lock); 394 *head = rt0->u.next; 395 rt0->u.next = last->u.next; 396 last->u.next = rt0; 397 spin_unlock(&lock); 398 } 399 400 RT6_TRACE("%s() => %p, score=%d\n", 401 __FUNCTION__, match, mpri); 402 403 return (match ? match : &ip6_null_entry); 404 } 405 406 #ifdef CONFIG_IPV6_ROUTE_INFO 407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 408 struct in6_addr *gwaddr) 409 { 410 struct route_info *rinfo = (struct route_info *) opt; 411 struct in6_addr prefix_buf, *prefix; 412 unsigned int pref; 413 u32 lifetime; 414 struct rt6_info *rt; 415 416 if (len < sizeof(struct route_info)) { 417 return -EINVAL; 418 } 419 420 /* Sanity check for prefix_len and length */ 421 if (rinfo->length > 3) { 422 return -EINVAL; 423 } else if (rinfo->prefix_len > 128) { 424 return -EINVAL; 425 } else if (rinfo->prefix_len > 64) { 426 if (rinfo->length < 2) { 427 return -EINVAL; 428 } 429 } else if (rinfo->prefix_len > 0) { 430 if (rinfo->length < 1) { 431 return -EINVAL; 432 } 433 } 434 435 pref = rinfo->route_pref; 436 if (pref == ICMPV6_ROUTER_PREF_INVALID) 437 pref = ICMPV6_ROUTER_PREF_MEDIUM; 438 439 lifetime = htonl(rinfo->lifetime); 440 if (lifetime == 0xffffffff) { 441 /* infinity */ 442 } else if (lifetime > 0x7fffffff/HZ) { 443 /* Avoid arithmetic overflow */ 444 lifetime = 0x7fffffff/HZ - 1; 445 } 446 447 if (rinfo->length == 3) 448 prefix = (struct in6_addr *)rinfo->prefix; 449 else { 450 /* this function is safe */ 451 ipv6_addr_prefix(&prefix_buf, 452 (struct in6_addr *)rinfo->prefix, 453 rinfo->prefix_len); 454 prefix = &prefix_buf; 455 } 456 457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); 458 459 if (rt && !lifetime) { 460 ip6_del_rt(rt); 461 rt = NULL; 462 } 463 464 if (!rt && lifetime) 465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 466 pref); 467 else if (rt) 468 rt->rt6i_flags = RTF_ROUTEINFO | 469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 470 471 if (rt) { 472 if (lifetime == 0xffffffff) { 473 rt->rt6i_flags &= ~RTF_EXPIRES; 474 } else { 475 rt->rt6i_expires = jiffies + HZ * lifetime; 476 rt->rt6i_flags |= RTF_EXPIRES; 477 } 478 dst_release(&rt->u.dst); 479 } 480 return 0; 481 } 482 #endif 483 484 #define BACKTRACK() \ 485 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \ 486 while ((fn = fn->parent) != NULL) { \ 487 if (fn->fn_flags & RTN_TL_ROOT) { \ 488 dst_hold(&rt->u.dst); \ 489 goto out; \ 490 } \ 491 if (fn->fn_flags & RTN_RTINFO) \ 492 goto restart; \ 493 } \ 494 } 495 496 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, 497 struct flowi *fl, int flags) 498 { 499 struct fib6_node *fn; 500 struct rt6_info *rt; 501 502 read_lock_bh(&table->tb6_lock); 503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 504 restart: 505 rt = fn->leaf; 506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT); 507 BACKTRACK(); 508 dst_hold(&rt->u.dst); 509 out: 510 read_unlock_bh(&table->tb6_lock); 511 512 rt->u.dst.lastuse = jiffies; 513 rt->u.dst.__use++; 514 515 return rt; 516 517 } 518 519 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 520 int oif, int strict) 521 { 522 struct flowi fl = { 523 .oif = oif, 524 .nl_u = { 525 .ip6_u = { 526 .daddr = *daddr, 527 /* TODO: saddr */ 528 }, 529 }, 530 }; 531 struct dst_entry *dst; 532 int flags = strict ? RT6_F_STRICT : 0; 533 534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); 535 if (dst->error == 0) 536 return (struct rt6_info *) dst; 537 538 dst_release(dst); 539 540 return NULL; 541 } 542 543 /* ip6_ins_rt is called with FREE table->tb6_lock. 544 It takes new route entry, the addition fails by any reason the 545 route is freed. In any case, if caller does not hold it, it may 546 be destroyed. 547 */ 548 549 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 550 { 551 int err; 552 struct fib6_table *table; 553 554 table = rt->rt6i_table; 555 write_lock_bh(&table->tb6_lock); 556 err = fib6_add(&table->tb6_root, rt, info); 557 write_unlock_bh(&table->tb6_lock); 558 559 return err; 560 } 561 562 int ip6_ins_rt(struct rt6_info *rt) 563 { 564 return __ip6_ins_rt(rt, NULL); 565 } 566 567 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 568 struct in6_addr *saddr) 569 { 570 struct rt6_info *rt; 571 572 /* 573 * Clone the route. 574 */ 575 576 rt = ip6_rt_copy(ort); 577 578 if (rt) { 579 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 580 if (rt->rt6i_dst.plen != 128 && 581 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 582 rt->rt6i_flags |= RTF_ANYCAST; 583 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 584 } 585 586 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 587 rt->rt6i_dst.plen = 128; 588 rt->rt6i_flags |= RTF_CACHE; 589 rt->u.dst.flags |= DST_HOST; 590 591 #ifdef CONFIG_IPV6_SUBTREES 592 if (rt->rt6i_src.plen && saddr) { 593 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 594 rt->rt6i_src.plen = 128; 595 } 596 #endif 597 598 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 599 600 } 601 602 return rt; 603 } 604 605 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 606 { 607 struct rt6_info *rt = ip6_rt_copy(ort); 608 if (rt) { 609 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 610 rt->rt6i_dst.plen = 128; 611 rt->rt6i_flags |= RTF_CACHE; 612 if (rt->rt6i_flags & RTF_REJECT) 613 rt->u.dst.error = ort->u.dst.error; 614 rt->u.dst.flags |= DST_HOST; 615 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 616 } 617 return rt; 618 } 619 620 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, 621 struct flowi *fl, int flags) 622 { 623 struct fib6_node *fn; 624 struct rt6_info *rt, *nrt; 625 int strict = 0; 626 int attempts = 3; 627 int err; 628 int reachable = RT6_SELECT_F_REACHABLE; 629 630 if (flags & RT6_F_STRICT) 631 strict = RT6_SELECT_F_IFACE; 632 633 relookup: 634 read_lock_bh(&table->tb6_lock); 635 636 restart_2: 637 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 638 639 restart: 640 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); 641 BACKTRACK(); 642 if (rt == &ip6_null_entry || 643 rt->rt6i_flags & RTF_CACHE) 644 goto out; 645 646 dst_hold(&rt->u.dst); 647 read_unlock_bh(&table->tb6_lock); 648 649 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 650 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 651 else { 652 #if CLONE_OFFLINK_ROUTE 653 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 654 #else 655 goto out2; 656 #endif 657 } 658 659 dst_release(&rt->u.dst); 660 rt = nrt ? : &ip6_null_entry; 661 662 dst_hold(&rt->u.dst); 663 if (nrt) { 664 err = ip6_ins_rt(nrt); 665 if (!err) 666 goto out2; 667 } 668 669 if (--attempts <= 0) 670 goto out2; 671 672 /* 673 * Race condition! In the gap, when table->tb6_lock was 674 * released someone could insert this route. Relookup. 675 */ 676 dst_release(&rt->u.dst); 677 goto relookup; 678 679 out: 680 if (reachable) { 681 reachable = 0; 682 goto restart_2; 683 } 684 dst_hold(&rt->u.dst); 685 read_unlock_bh(&table->tb6_lock); 686 out2: 687 rt->u.dst.lastuse = jiffies; 688 rt->u.dst.__use++; 689 690 return rt; 691 } 692 693 void ip6_route_input(struct sk_buff *skb) 694 { 695 struct ipv6hdr *iph = skb->nh.ipv6h; 696 struct flowi fl = { 697 .iif = skb->dev->ifindex, 698 .nl_u = { 699 .ip6_u = { 700 .daddr = iph->daddr, 701 .saddr = iph->saddr, 702 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, 703 }, 704 }, 705 .proto = iph->nexthdr, 706 }; 707 int flags = 0; 708 709 if (rt6_need_strict(&iph->daddr)) 710 flags |= RT6_F_STRICT; 711 712 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); 713 } 714 715 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, 716 struct flowi *fl, int flags) 717 { 718 struct fib6_node *fn; 719 struct rt6_info *rt, *nrt; 720 int strict = 0; 721 int attempts = 3; 722 int err; 723 int reachable = RT6_SELECT_F_REACHABLE; 724 725 if (flags & RT6_F_STRICT) 726 strict = RT6_SELECT_F_IFACE; 727 728 relookup: 729 read_lock_bh(&table->tb6_lock); 730 731 restart_2: 732 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 733 734 restart: 735 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); 736 BACKTRACK(); 737 if (rt == &ip6_null_entry || 738 rt->rt6i_flags & RTF_CACHE) 739 goto out; 740 741 dst_hold(&rt->u.dst); 742 read_unlock_bh(&table->tb6_lock); 743 744 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 745 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 746 else { 747 #if CLONE_OFFLINK_ROUTE 748 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 749 #else 750 goto out2; 751 #endif 752 } 753 754 dst_release(&rt->u.dst); 755 rt = nrt ? : &ip6_null_entry; 756 757 dst_hold(&rt->u.dst); 758 if (nrt) { 759 err = ip6_ins_rt(nrt); 760 if (!err) 761 goto out2; 762 } 763 764 if (--attempts <= 0) 765 goto out2; 766 767 /* 768 * Race condition! In the gap, when table->tb6_lock was 769 * released someone could insert this route. Relookup. 770 */ 771 dst_release(&rt->u.dst); 772 goto relookup; 773 774 out: 775 if (reachable) { 776 reachable = 0; 777 goto restart_2; 778 } 779 dst_hold(&rt->u.dst); 780 read_unlock_bh(&table->tb6_lock); 781 out2: 782 rt->u.dst.lastuse = jiffies; 783 rt->u.dst.__use++; 784 return rt; 785 } 786 787 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 788 { 789 int flags = 0; 790 791 if (rt6_need_strict(&fl->fl6_dst)) 792 flags |= RT6_F_STRICT; 793 794 return fib6_rule_lookup(fl, flags, ip6_pol_route_output); 795 } 796 797 798 /* 799 * Destination cache support functions 800 */ 801 802 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 803 { 804 struct rt6_info *rt; 805 806 rt = (struct rt6_info *) dst; 807 808 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 809 return dst; 810 811 return NULL; 812 } 813 814 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 815 { 816 struct rt6_info *rt = (struct rt6_info *) dst; 817 818 if (rt) { 819 if (rt->rt6i_flags & RTF_CACHE) 820 ip6_del_rt(rt); 821 else 822 dst_release(dst); 823 } 824 return NULL; 825 } 826 827 static void ip6_link_failure(struct sk_buff *skb) 828 { 829 struct rt6_info *rt; 830 831 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 832 833 rt = (struct rt6_info *) skb->dst; 834 if (rt) { 835 if (rt->rt6i_flags&RTF_CACHE) { 836 dst_set_expires(&rt->u.dst, 0); 837 rt->rt6i_flags |= RTF_EXPIRES; 838 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 839 rt->rt6i_node->fn_sernum = -1; 840 } 841 } 842 843 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 844 { 845 struct rt6_info *rt6 = (struct rt6_info*)dst; 846 847 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 848 rt6->rt6i_flags |= RTF_MODIFIED; 849 if (mtu < IPV6_MIN_MTU) { 850 mtu = IPV6_MIN_MTU; 851 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 852 } 853 dst->metrics[RTAX_MTU-1] = mtu; 854 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 855 } 856 } 857 858 static int ipv6_get_mtu(struct net_device *dev); 859 860 static inline unsigned int ipv6_advmss(unsigned int mtu) 861 { 862 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 863 864 if (mtu < ip6_rt_min_advmss) 865 mtu = ip6_rt_min_advmss; 866 867 /* 868 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 869 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 870 * IPV6_MAXPLEN is also valid and means: "any MSS, 871 * rely only on pmtu discovery" 872 */ 873 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 874 mtu = IPV6_MAXPLEN; 875 return mtu; 876 } 877 878 static struct dst_entry *ndisc_dst_gc_list; 879 static DEFINE_SPINLOCK(ndisc_lock); 880 881 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 882 struct neighbour *neigh, 883 struct in6_addr *addr, 884 int (*output)(struct sk_buff *)) 885 { 886 struct rt6_info *rt; 887 struct inet6_dev *idev = in6_dev_get(dev); 888 889 if (unlikely(idev == NULL)) 890 return NULL; 891 892 rt = ip6_dst_alloc(); 893 if (unlikely(rt == NULL)) { 894 in6_dev_put(idev); 895 goto out; 896 } 897 898 dev_hold(dev); 899 if (neigh) 900 neigh_hold(neigh); 901 else 902 neigh = ndisc_get_neigh(dev, addr); 903 904 rt->rt6i_dev = dev; 905 rt->rt6i_idev = idev; 906 rt->rt6i_nexthop = neigh; 907 atomic_set(&rt->u.dst.__refcnt, 1); 908 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 909 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 910 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 911 rt->u.dst.output = output; 912 913 #if 0 /* there's no chance to use these for ndisc */ 914 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 915 ? DST_HOST 916 : 0; 917 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 918 rt->rt6i_dst.plen = 128; 919 #endif 920 921 spin_lock_bh(&ndisc_lock); 922 rt->u.dst.next = ndisc_dst_gc_list; 923 ndisc_dst_gc_list = &rt->u.dst; 924 spin_unlock_bh(&ndisc_lock); 925 926 fib6_force_start_gc(); 927 928 out: 929 return (struct dst_entry *)rt; 930 } 931 932 int ndisc_dst_gc(int *more) 933 { 934 struct dst_entry *dst, *next, **pprev; 935 int freed; 936 937 next = NULL; 938 freed = 0; 939 940 spin_lock_bh(&ndisc_lock); 941 pprev = &ndisc_dst_gc_list; 942 943 while ((dst = *pprev) != NULL) { 944 if (!atomic_read(&dst->__refcnt)) { 945 *pprev = dst->next; 946 dst_free(dst); 947 freed++; 948 } else { 949 pprev = &dst->next; 950 (*more)++; 951 } 952 } 953 954 spin_unlock_bh(&ndisc_lock); 955 956 return freed; 957 } 958 959 static int ip6_dst_gc(void) 960 { 961 static unsigned expire = 30*HZ; 962 static unsigned long last_gc; 963 unsigned long now = jiffies; 964 965 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 966 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 967 goto out; 968 969 expire++; 970 fib6_run_gc(expire); 971 last_gc = now; 972 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 973 expire = ip6_rt_gc_timeout>>1; 974 975 out: 976 expire -= expire>>ip6_rt_gc_elasticity; 977 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 978 } 979 980 /* Clean host part of a prefix. Not necessary in radix tree, 981 but results in cleaner routing tables. 982 983 Remove it only when all the things will work! 984 */ 985 986 static int ipv6_get_mtu(struct net_device *dev) 987 { 988 int mtu = IPV6_MIN_MTU; 989 struct inet6_dev *idev; 990 991 idev = in6_dev_get(dev); 992 if (idev) { 993 mtu = idev->cnf.mtu6; 994 in6_dev_put(idev); 995 } 996 return mtu; 997 } 998 999 int ipv6_get_hoplimit(struct net_device *dev) 1000 { 1001 int hoplimit = ipv6_devconf.hop_limit; 1002 struct inet6_dev *idev; 1003 1004 idev = in6_dev_get(dev); 1005 if (idev) { 1006 hoplimit = idev->cnf.hop_limit; 1007 in6_dev_put(idev); 1008 } 1009 return hoplimit; 1010 } 1011 1012 /* 1013 * 1014 */ 1015 1016 int ip6_route_add(struct fib6_config *cfg) 1017 { 1018 int err; 1019 struct rt6_info *rt = NULL; 1020 struct net_device *dev = NULL; 1021 struct inet6_dev *idev = NULL; 1022 struct fib6_table *table; 1023 int addr_type; 1024 1025 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1026 return -EINVAL; 1027 #ifndef CONFIG_IPV6_SUBTREES 1028 if (cfg->fc_src_len) 1029 return -EINVAL; 1030 #endif 1031 if (cfg->fc_ifindex) { 1032 err = -ENODEV; 1033 dev = dev_get_by_index(cfg->fc_ifindex); 1034 if (!dev) 1035 goto out; 1036 idev = in6_dev_get(dev); 1037 if (!idev) 1038 goto out; 1039 } 1040 1041 if (cfg->fc_metric == 0) 1042 cfg->fc_metric = IP6_RT_PRIO_USER; 1043 1044 table = fib6_new_table(cfg->fc_table); 1045 if (table == NULL) { 1046 err = -ENOBUFS; 1047 goto out; 1048 } 1049 1050 rt = ip6_dst_alloc(); 1051 1052 if (rt == NULL) { 1053 err = -ENOMEM; 1054 goto out; 1055 } 1056 1057 rt->u.dst.obsolete = -1; 1058 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); 1059 1060 if (cfg->fc_protocol == RTPROT_UNSPEC) 1061 cfg->fc_protocol = RTPROT_BOOT; 1062 rt->rt6i_protocol = cfg->fc_protocol; 1063 1064 addr_type = ipv6_addr_type(&cfg->fc_dst); 1065 1066 if (addr_type & IPV6_ADDR_MULTICAST) 1067 rt->u.dst.input = ip6_mc_input; 1068 else 1069 rt->u.dst.input = ip6_forward; 1070 1071 rt->u.dst.output = ip6_output; 1072 1073 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1074 rt->rt6i_dst.plen = cfg->fc_dst_len; 1075 if (rt->rt6i_dst.plen == 128) 1076 rt->u.dst.flags = DST_HOST; 1077 1078 #ifdef CONFIG_IPV6_SUBTREES 1079 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1080 rt->rt6i_src.plen = cfg->fc_src_len; 1081 #endif 1082 1083 rt->rt6i_metric = cfg->fc_metric; 1084 1085 /* We cannot add true routes via loopback here, 1086 they would result in kernel looping; promote them to reject routes 1087 */ 1088 if ((cfg->fc_flags & RTF_REJECT) || 1089 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1090 /* hold loopback dev/idev if we haven't done so. */ 1091 if (dev != &loopback_dev) { 1092 if (dev) { 1093 dev_put(dev); 1094 in6_dev_put(idev); 1095 } 1096 dev = &loopback_dev; 1097 dev_hold(dev); 1098 idev = in6_dev_get(dev); 1099 if (!idev) { 1100 err = -ENODEV; 1101 goto out; 1102 } 1103 } 1104 rt->u.dst.output = ip6_pkt_discard_out; 1105 rt->u.dst.input = ip6_pkt_discard; 1106 rt->u.dst.error = -ENETUNREACH; 1107 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1108 goto install_route; 1109 } 1110 1111 if (cfg->fc_flags & RTF_GATEWAY) { 1112 struct in6_addr *gw_addr; 1113 int gwa_type; 1114 1115 gw_addr = &cfg->fc_gateway; 1116 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1117 gwa_type = ipv6_addr_type(gw_addr); 1118 1119 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1120 struct rt6_info *grt; 1121 1122 /* IPv6 strictly inhibits using not link-local 1123 addresses as nexthop address. 1124 Otherwise, router will not able to send redirects. 1125 It is very good, but in some (rare!) circumstances 1126 (SIT, PtP, NBMA NOARP links) it is handy to allow 1127 some exceptions. --ANK 1128 */ 1129 err = -EINVAL; 1130 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1131 goto out; 1132 1133 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); 1134 1135 err = -EHOSTUNREACH; 1136 if (grt == NULL) 1137 goto out; 1138 if (dev) { 1139 if (dev != grt->rt6i_dev) { 1140 dst_release(&grt->u.dst); 1141 goto out; 1142 } 1143 } else { 1144 dev = grt->rt6i_dev; 1145 idev = grt->rt6i_idev; 1146 dev_hold(dev); 1147 in6_dev_hold(grt->rt6i_idev); 1148 } 1149 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1150 err = 0; 1151 dst_release(&grt->u.dst); 1152 1153 if (err) 1154 goto out; 1155 } 1156 err = -EINVAL; 1157 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1158 goto out; 1159 } 1160 1161 err = -ENODEV; 1162 if (dev == NULL) 1163 goto out; 1164 1165 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1166 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1167 if (IS_ERR(rt->rt6i_nexthop)) { 1168 err = PTR_ERR(rt->rt6i_nexthop); 1169 rt->rt6i_nexthop = NULL; 1170 goto out; 1171 } 1172 } 1173 1174 rt->rt6i_flags = cfg->fc_flags; 1175 1176 install_route: 1177 if (cfg->fc_mx) { 1178 struct nlattr *nla; 1179 int remaining; 1180 1181 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1182 int type = nla->nla_type; 1183 1184 if (type) { 1185 if (type > RTAX_MAX) { 1186 err = -EINVAL; 1187 goto out; 1188 } 1189 1190 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1191 } 1192 } 1193 } 1194 1195 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 1196 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1197 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1198 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1199 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1200 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1201 rt->u.dst.dev = dev; 1202 rt->rt6i_idev = idev; 1203 rt->rt6i_table = table; 1204 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1205 1206 out: 1207 if (dev) 1208 dev_put(dev); 1209 if (idev) 1210 in6_dev_put(idev); 1211 if (rt) 1212 dst_free((struct dst_entry *) rt); 1213 return err; 1214 } 1215 1216 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1217 { 1218 int err; 1219 struct fib6_table *table; 1220 1221 if (rt == &ip6_null_entry) 1222 return -ENOENT; 1223 1224 table = rt->rt6i_table; 1225 write_lock_bh(&table->tb6_lock); 1226 1227 err = fib6_del(rt, info); 1228 dst_release(&rt->u.dst); 1229 1230 write_unlock_bh(&table->tb6_lock); 1231 1232 return err; 1233 } 1234 1235 int ip6_del_rt(struct rt6_info *rt) 1236 { 1237 return __ip6_del_rt(rt, NULL); 1238 } 1239 1240 static int ip6_route_del(struct fib6_config *cfg) 1241 { 1242 struct fib6_table *table; 1243 struct fib6_node *fn; 1244 struct rt6_info *rt; 1245 int err = -ESRCH; 1246 1247 table = fib6_get_table(cfg->fc_table); 1248 if (table == NULL) 1249 return err; 1250 1251 read_lock_bh(&table->tb6_lock); 1252 1253 fn = fib6_locate(&table->tb6_root, 1254 &cfg->fc_dst, cfg->fc_dst_len, 1255 &cfg->fc_src, cfg->fc_src_len); 1256 1257 if (fn) { 1258 for (rt = fn->leaf; rt; rt = rt->u.next) { 1259 if (cfg->fc_ifindex && 1260 (rt->rt6i_dev == NULL || 1261 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1262 continue; 1263 if (cfg->fc_flags & RTF_GATEWAY && 1264 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1265 continue; 1266 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1267 continue; 1268 dst_hold(&rt->u.dst); 1269 read_unlock_bh(&table->tb6_lock); 1270 1271 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1272 } 1273 } 1274 read_unlock_bh(&table->tb6_lock); 1275 1276 return err; 1277 } 1278 1279 /* 1280 * Handle redirects 1281 */ 1282 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, 1283 struct neighbour *neigh, u8 *lladdr, int on_link) 1284 { 1285 struct rt6_info *rt, *nrt = NULL; 1286 struct fib6_node *fn; 1287 struct fib6_table *table; 1288 struct netevent_redirect netevent; 1289 1290 /* TODO: Very lazy, might need to check all tables */ 1291 table = fib6_get_table(RT6_TABLE_MAIN); 1292 if (table == NULL) 1293 return; 1294 1295 /* 1296 * Get the "current" route for this destination and 1297 * check if the redirect has come from approriate router. 1298 * 1299 * RFC 2461 specifies that redirects should only be 1300 * accepted if they come from the nexthop to the target. 1301 * Due to the way the routes are chosen, this notion 1302 * is a bit fuzzy and one might need to check all possible 1303 * routes. 1304 */ 1305 1306 read_lock_bh(&table->tb6_lock); 1307 fn = fib6_lookup(&table->tb6_root, dest, NULL); 1308 restart: 1309 for (rt = fn->leaf; rt; rt = rt->u.next) { 1310 /* 1311 * Current route is on-link; redirect is always invalid. 1312 * 1313 * Seems, previous statement is not true. It could 1314 * be node, which looks for us as on-link (f.e. proxy ndisc) 1315 * But then router serving it might decide, that we should 1316 * know truth 8)8) --ANK (980726). 1317 */ 1318 if (rt6_check_expired(rt)) 1319 continue; 1320 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1321 continue; 1322 if (neigh->dev != rt->rt6i_dev) 1323 continue; 1324 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) 1325 continue; 1326 break; 1327 } 1328 if (rt) 1329 dst_hold(&rt->u.dst); 1330 else if (rt6_need_strict(dest)) { 1331 while ((fn = fn->parent) != NULL) { 1332 if (fn->fn_flags & RTN_ROOT) 1333 break; 1334 if (fn->fn_flags & RTN_RTINFO) 1335 goto restart; 1336 } 1337 } 1338 read_unlock_bh(&table->tb6_lock); 1339 1340 if (!rt) { 1341 if (net_ratelimit()) 1342 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1343 "for redirect target\n"); 1344 return; 1345 } 1346 1347 /* 1348 * We have finally decided to accept it. 1349 */ 1350 1351 neigh_update(neigh, lladdr, NUD_STALE, 1352 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1353 NEIGH_UPDATE_F_OVERRIDE| 1354 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1355 NEIGH_UPDATE_F_ISROUTER)) 1356 ); 1357 1358 /* 1359 * Redirect received -> path was valid. 1360 * Look, redirects are sent only in response to data packets, 1361 * so that this nexthop apparently is reachable. --ANK 1362 */ 1363 dst_confirm(&rt->u.dst); 1364 1365 /* Duplicate redirect: silently ignore. */ 1366 if (neigh == rt->u.dst.neighbour) 1367 goto out; 1368 1369 nrt = ip6_rt_copy(rt); 1370 if (nrt == NULL) 1371 goto out; 1372 1373 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1374 if (on_link) 1375 nrt->rt6i_flags &= ~RTF_GATEWAY; 1376 1377 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1378 nrt->rt6i_dst.plen = 128; 1379 nrt->u.dst.flags |= DST_HOST; 1380 1381 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1382 nrt->rt6i_nexthop = neigh_clone(neigh); 1383 /* Reset pmtu, it may be better */ 1384 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1385 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1386 1387 if (ip6_ins_rt(nrt)) 1388 goto out; 1389 1390 netevent.old = &rt->u.dst; 1391 netevent.new = &nrt->u.dst; 1392 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1393 1394 if (rt->rt6i_flags&RTF_CACHE) { 1395 ip6_del_rt(rt); 1396 return; 1397 } 1398 1399 out: 1400 dst_release(&rt->u.dst); 1401 return; 1402 } 1403 1404 /* 1405 * Handle ICMP "packet too big" messages 1406 * i.e. Path MTU discovery 1407 */ 1408 1409 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1410 struct net_device *dev, u32 pmtu) 1411 { 1412 struct rt6_info *rt, *nrt; 1413 int allfrag = 0; 1414 1415 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); 1416 if (rt == NULL) 1417 return; 1418 1419 if (pmtu >= dst_mtu(&rt->u.dst)) 1420 goto out; 1421 1422 if (pmtu < IPV6_MIN_MTU) { 1423 /* 1424 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1425 * MTU (1280) and a fragment header should always be included 1426 * after a node receiving Too Big message reporting PMTU is 1427 * less than the IPv6 Minimum Link MTU. 1428 */ 1429 pmtu = IPV6_MIN_MTU; 1430 allfrag = 1; 1431 } 1432 1433 /* New mtu received -> path was valid. 1434 They are sent only in response to data packets, 1435 so that this nexthop apparently is reachable. --ANK 1436 */ 1437 dst_confirm(&rt->u.dst); 1438 1439 /* Host route. If it is static, it would be better 1440 not to override it, but add new one, so that 1441 when cache entry will expire old pmtu 1442 would return automatically. 1443 */ 1444 if (rt->rt6i_flags & RTF_CACHE) { 1445 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1446 if (allfrag) 1447 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1448 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1449 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1450 goto out; 1451 } 1452 1453 /* Network route. 1454 Two cases are possible: 1455 1. It is connected route. Action: COW 1456 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1457 */ 1458 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1459 nrt = rt6_alloc_cow(rt, daddr, saddr); 1460 else 1461 nrt = rt6_alloc_clone(rt, daddr); 1462 1463 if (nrt) { 1464 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1465 if (allfrag) 1466 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1467 1468 /* According to RFC 1981, detecting PMTU increase shouldn't be 1469 * happened within 5 mins, the recommended timer is 10 mins. 1470 * Here this route expiration time is set to ip6_rt_mtu_expires 1471 * which is 10 mins. After 10 mins the decreased pmtu is expired 1472 * and detecting PMTU increase will be automatically happened. 1473 */ 1474 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1475 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1476 1477 ip6_ins_rt(nrt); 1478 } 1479 out: 1480 dst_release(&rt->u.dst); 1481 } 1482 1483 /* 1484 * Misc support functions 1485 */ 1486 1487 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1488 { 1489 struct rt6_info *rt = ip6_dst_alloc(); 1490 1491 if (rt) { 1492 rt->u.dst.input = ort->u.dst.input; 1493 rt->u.dst.output = ort->u.dst.output; 1494 1495 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1496 rt->u.dst.dev = ort->u.dst.dev; 1497 if (rt->u.dst.dev) 1498 dev_hold(rt->u.dst.dev); 1499 rt->rt6i_idev = ort->rt6i_idev; 1500 if (rt->rt6i_idev) 1501 in6_dev_hold(rt->rt6i_idev); 1502 rt->u.dst.lastuse = jiffies; 1503 rt->rt6i_expires = 0; 1504 1505 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1506 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1507 rt->rt6i_metric = 0; 1508 1509 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1510 #ifdef CONFIG_IPV6_SUBTREES 1511 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1512 #endif 1513 rt->rt6i_table = ort->rt6i_table; 1514 } 1515 return rt; 1516 } 1517 1518 #ifdef CONFIG_IPV6_ROUTE_INFO 1519 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen, 1520 struct in6_addr *gwaddr, int ifindex) 1521 { 1522 struct fib6_node *fn; 1523 struct rt6_info *rt = NULL; 1524 struct fib6_table *table; 1525 1526 table = fib6_get_table(RT6_TABLE_INFO); 1527 if (table == NULL) 1528 return NULL; 1529 1530 write_lock_bh(&table->tb6_lock); 1531 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1532 if (!fn) 1533 goto out; 1534 1535 for (rt = fn->leaf; rt; rt = rt->u.next) { 1536 if (rt->rt6i_dev->ifindex != ifindex) 1537 continue; 1538 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1539 continue; 1540 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1541 continue; 1542 dst_hold(&rt->u.dst); 1543 break; 1544 } 1545 out: 1546 write_unlock_bh(&table->tb6_lock); 1547 return rt; 1548 } 1549 1550 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen, 1551 struct in6_addr *gwaddr, int ifindex, 1552 unsigned pref) 1553 { 1554 struct fib6_config cfg = { 1555 .fc_table = RT6_TABLE_INFO, 1556 .fc_metric = 1024, 1557 .fc_ifindex = ifindex, 1558 .fc_dst_len = prefixlen, 1559 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1560 RTF_UP | RTF_PREF(pref), 1561 }; 1562 1563 ipv6_addr_copy(&cfg.fc_dst, prefix); 1564 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1565 1566 /* We should treat it as a default route if prefix length is 0. */ 1567 if (!prefixlen) 1568 cfg.fc_flags |= RTF_DEFAULT; 1569 1570 ip6_route_add(&cfg); 1571 1572 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); 1573 } 1574 #endif 1575 1576 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1577 { 1578 struct rt6_info *rt; 1579 struct fib6_table *table; 1580 1581 table = fib6_get_table(RT6_TABLE_DFLT); 1582 if (table == NULL) 1583 return NULL; 1584 1585 write_lock_bh(&table->tb6_lock); 1586 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) { 1587 if (dev == rt->rt6i_dev && 1588 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1589 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1590 break; 1591 } 1592 if (rt) 1593 dst_hold(&rt->u.dst); 1594 write_unlock_bh(&table->tb6_lock); 1595 return rt; 1596 } 1597 1598 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1599 struct net_device *dev, 1600 unsigned int pref) 1601 { 1602 struct fib6_config cfg = { 1603 .fc_table = RT6_TABLE_DFLT, 1604 .fc_metric = 1024, 1605 .fc_ifindex = dev->ifindex, 1606 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1607 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1608 }; 1609 1610 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1611 1612 ip6_route_add(&cfg); 1613 1614 return rt6_get_dflt_router(gwaddr, dev); 1615 } 1616 1617 void rt6_purge_dflt_routers(void) 1618 { 1619 struct rt6_info *rt; 1620 struct fib6_table *table; 1621 1622 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1623 table = fib6_get_table(RT6_TABLE_DFLT); 1624 if (table == NULL) 1625 return; 1626 1627 restart: 1628 read_lock_bh(&table->tb6_lock); 1629 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) { 1630 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1631 dst_hold(&rt->u.dst); 1632 read_unlock_bh(&table->tb6_lock); 1633 ip6_del_rt(rt); 1634 goto restart; 1635 } 1636 } 1637 read_unlock_bh(&table->tb6_lock); 1638 } 1639 1640 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, 1641 struct fib6_config *cfg) 1642 { 1643 memset(cfg, 0, sizeof(*cfg)); 1644 1645 cfg->fc_table = RT6_TABLE_MAIN; 1646 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1647 cfg->fc_metric = rtmsg->rtmsg_metric; 1648 cfg->fc_expires = rtmsg->rtmsg_info; 1649 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1650 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1651 cfg->fc_flags = rtmsg->rtmsg_flags; 1652 1653 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1654 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1655 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1656 } 1657 1658 int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1659 { 1660 struct fib6_config cfg; 1661 struct in6_rtmsg rtmsg; 1662 int err; 1663 1664 switch(cmd) { 1665 case SIOCADDRT: /* Add a route */ 1666 case SIOCDELRT: /* Delete a route */ 1667 if (!capable(CAP_NET_ADMIN)) 1668 return -EPERM; 1669 err = copy_from_user(&rtmsg, arg, 1670 sizeof(struct in6_rtmsg)); 1671 if (err) 1672 return -EFAULT; 1673 1674 rtmsg_to_fib6_config(&rtmsg, &cfg); 1675 1676 rtnl_lock(); 1677 switch (cmd) { 1678 case SIOCADDRT: 1679 err = ip6_route_add(&cfg); 1680 break; 1681 case SIOCDELRT: 1682 err = ip6_route_del(&cfg); 1683 break; 1684 default: 1685 err = -EINVAL; 1686 } 1687 rtnl_unlock(); 1688 1689 return err; 1690 }; 1691 1692 return -EINVAL; 1693 } 1694 1695 /* 1696 * Drop the packet on the floor 1697 */ 1698 1699 static int ip6_pkt_discard(struct sk_buff *skb) 1700 { 1701 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr); 1702 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) 1703 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS); 1704 1705 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 1706 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); 1707 kfree_skb(skb); 1708 return 0; 1709 } 1710 1711 static int ip6_pkt_discard_out(struct sk_buff *skb) 1712 { 1713 skb->dev = skb->dst->dev; 1714 return ip6_pkt_discard(skb); 1715 } 1716 1717 /* 1718 * Allocate a dst for local (unicast / anycast) address. 1719 */ 1720 1721 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1722 const struct in6_addr *addr, 1723 int anycast) 1724 { 1725 struct rt6_info *rt = ip6_dst_alloc(); 1726 1727 if (rt == NULL) 1728 return ERR_PTR(-ENOMEM); 1729 1730 dev_hold(&loopback_dev); 1731 in6_dev_hold(idev); 1732 1733 rt->u.dst.flags = DST_HOST; 1734 rt->u.dst.input = ip6_input; 1735 rt->u.dst.output = ip6_output; 1736 rt->rt6i_dev = &loopback_dev; 1737 rt->rt6i_idev = idev; 1738 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1739 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1740 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1741 rt->u.dst.obsolete = -1; 1742 1743 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1744 if (anycast) 1745 rt->rt6i_flags |= RTF_ANYCAST; 1746 else 1747 rt->rt6i_flags |= RTF_LOCAL; 1748 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1749 if (rt->rt6i_nexthop == NULL) { 1750 dst_free((struct dst_entry *) rt); 1751 return ERR_PTR(-ENOMEM); 1752 } 1753 1754 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1755 rt->rt6i_dst.plen = 128; 1756 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); 1757 1758 atomic_set(&rt->u.dst.__refcnt, 1); 1759 1760 return rt; 1761 } 1762 1763 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1764 { 1765 if (((void*)rt->rt6i_dev == arg || arg == NULL) && 1766 rt != &ip6_null_entry) { 1767 RT6_TRACE("deleted by ifdown %p\n", rt); 1768 return -1; 1769 } 1770 return 0; 1771 } 1772 1773 void rt6_ifdown(struct net_device *dev) 1774 { 1775 fib6_clean_all(fib6_ifdown, 0, dev); 1776 } 1777 1778 struct rt6_mtu_change_arg 1779 { 1780 struct net_device *dev; 1781 unsigned mtu; 1782 }; 1783 1784 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 1785 { 1786 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1787 struct inet6_dev *idev; 1788 1789 /* In IPv6 pmtu discovery is not optional, 1790 so that RTAX_MTU lock cannot disable it. 1791 We still use this lock to block changes 1792 caused by addrconf/ndisc. 1793 */ 1794 1795 idev = __in6_dev_get(arg->dev); 1796 if (idev == NULL) 1797 return 0; 1798 1799 /* For administrative MTU increase, there is no way to discover 1800 IPv6 PMTU increase, so PMTU increase should be updated here. 1801 Since RFC 1981 doesn't include administrative MTU increase 1802 update PMTU increase is a MUST. (i.e. jumbo frame) 1803 */ 1804 /* 1805 If new MTU is less than route PMTU, this new MTU will be the 1806 lowest MTU in the path, update the route PMTU to reflect PMTU 1807 decreases; if new MTU is greater than route PMTU, and the 1808 old MTU is the lowest MTU in the path, update the route PMTU 1809 to reflect the increase. In this case if the other nodes' MTU 1810 also have the lowest MTU, TOO BIG MESSAGE will be lead to 1811 PMTU discouvery. 1812 */ 1813 if (rt->rt6i_dev == arg->dev && 1814 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1815 (dst_mtu(&rt->u.dst) > arg->mtu || 1816 (dst_mtu(&rt->u.dst) < arg->mtu && 1817 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) 1818 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1819 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1820 return 0; 1821 } 1822 1823 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1824 { 1825 struct rt6_mtu_change_arg arg = { 1826 .dev = dev, 1827 .mtu = mtu, 1828 }; 1829 1830 fib6_clean_all(rt6_mtu_change_route, 0, &arg); 1831 } 1832 1833 static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { 1834 [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) }, 1835 [RTA_OIF] = { .type = NLA_U32 }, 1836 [RTA_IIF] = { .type = NLA_U32 }, 1837 [RTA_PRIORITY] = { .type = NLA_U32 }, 1838 [RTA_METRICS] = { .type = NLA_NESTED }, 1839 }; 1840 1841 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 1842 struct fib6_config *cfg) 1843 { 1844 struct rtmsg *rtm; 1845 struct nlattr *tb[RTA_MAX+1]; 1846 int err; 1847 1848 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 1849 if (err < 0) 1850 goto errout; 1851 1852 err = -EINVAL; 1853 rtm = nlmsg_data(nlh); 1854 memset(cfg, 0, sizeof(*cfg)); 1855 1856 cfg->fc_table = rtm->rtm_table; 1857 cfg->fc_dst_len = rtm->rtm_dst_len; 1858 cfg->fc_src_len = rtm->rtm_src_len; 1859 cfg->fc_flags = RTF_UP; 1860 cfg->fc_protocol = rtm->rtm_protocol; 1861 1862 if (rtm->rtm_type == RTN_UNREACHABLE) 1863 cfg->fc_flags |= RTF_REJECT; 1864 1865 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 1866 cfg->fc_nlinfo.nlh = nlh; 1867 1868 if (tb[RTA_GATEWAY]) { 1869 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 1870 cfg->fc_flags |= RTF_GATEWAY; 1871 } 1872 1873 if (tb[RTA_DST]) { 1874 int plen = (rtm->rtm_dst_len + 7) >> 3; 1875 1876 if (nla_len(tb[RTA_DST]) < plen) 1877 goto errout; 1878 1879 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 1880 } 1881 1882 if (tb[RTA_SRC]) { 1883 int plen = (rtm->rtm_src_len + 7) >> 3; 1884 1885 if (nla_len(tb[RTA_SRC]) < plen) 1886 goto errout; 1887 1888 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 1889 } 1890 1891 if (tb[RTA_OIF]) 1892 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 1893 1894 if (tb[RTA_PRIORITY]) 1895 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 1896 1897 if (tb[RTA_METRICS]) { 1898 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 1899 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 1900 } 1901 1902 if (tb[RTA_TABLE]) 1903 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 1904 1905 err = 0; 1906 errout: 1907 return err; 1908 } 1909 1910 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1911 { 1912 struct fib6_config cfg; 1913 int err; 1914 1915 err = rtm_to_fib6_config(skb, nlh, &cfg); 1916 if (err < 0) 1917 return err; 1918 1919 return ip6_route_del(&cfg); 1920 } 1921 1922 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1923 { 1924 struct fib6_config cfg; 1925 int err; 1926 1927 err = rtm_to_fib6_config(skb, nlh, &cfg); 1928 if (err < 0) 1929 return err; 1930 1931 return ip6_route_add(&cfg); 1932 } 1933 1934 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 1935 struct in6_addr *dst, struct in6_addr *src, 1936 int iif, int type, u32 pid, u32 seq, 1937 int prefix, unsigned int flags) 1938 { 1939 struct rtmsg *rtm; 1940 struct nlmsghdr *nlh; 1941 struct rta_cacheinfo ci; 1942 u32 table; 1943 1944 if (prefix) { /* user wants prefix routes only */ 1945 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 1946 /* success since this is not a prefix route */ 1947 return 1; 1948 } 1949 } 1950 1951 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 1952 if (nlh == NULL) 1953 return -ENOBUFS; 1954 1955 rtm = nlmsg_data(nlh); 1956 rtm->rtm_family = AF_INET6; 1957 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1958 rtm->rtm_src_len = rt->rt6i_src.plen; 1959 rtm->rtm_tos = 0; 1960 if (rt->rt6i_table) 1961 table = rt->rt6i_table->tb6_id; 1962 else 1963 table = RT6_TABLE_UNSPEC; 1964 rtm->rtm_table = table; 1965 NLA_PUT_U32(skb, RTA_TABLE, table); 1966 if (rt->rt6i_flags&RTF_REJECT) 1967 rtm->rtm_type = RTN_UNREACHABLE; 1968 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 1969 rtm->rtm_type = RTN_LOCAL; 1970 else 1971 rtm->rtm_type = RTN_UNICAST; 1972 rtm->rtm_flags = 0; 1973 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1974 rtm->rtm_protocol = rt->rt6i_protocol; 1975 if (rt->rt6i_flags&RTF_DYNAMIC) 1976 rtm->rtm_protocol = RTPROT_REDIRECT; 1977 else if (rt->rt6i_flags & RTF_ADDRCONF) 1978 rtm->rtm_protocol = RTPROT_KERNEL; 1979 else if (rt->rt6i_flags&RTF_DEFAULT) 1980 rtm->rtm_protocol = RTPROT_RA; 1981 1982 if (rt->rt6i_flags&RTF_CACHE) 1983 rtm->rtm_flags |= RTM_F_CLONED; 1984 1985 if (dst) { 1986 NLA_PUT(skb, RTA_DST, 16, dst); 1987 rtm->rtm_dst_len = 128; 1988 } else if (rtm->rtm_dst_len) 1989 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 1990 #ifdef CONFIG_IPV6_SUBTREES 1991 if (src) { 1992 NLA_PUT(skb, RTA_SRC, 16, src); 1993 rtm->rtm_src_len = 128; 1994 } else if (rtm->rtm_src_len) 1995 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 1996 #endif 1997 if (iif) 1998 NLA_PUT_U32(skb, RTA_IIF, iif); 1999 else if (dst) { 2000 struct in6_addr saddr_buf; 2001 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 2002 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2003 } 2004 2005 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2006 goto nla_put_failure; 2007 2008 if (rt->u.dst.neighbour) 2009 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2010 2011 if (rt->u.dst.dev) 2012 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2013 2014 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2015 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 2016 if (rt->rt6i_expires) 2017 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); 2018 else 2019 ci.rta_expires = 0; 2020 ci.rta_used = rt->u.dst.__use; 2021 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 2022 ci.rta_error = rt->u.dst.error; 2023 ci.rta_id = 0; 2024 ci.rta_ts = 0; 2025 ci.rta_tsage = 0; 2026 NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 2027 2028 return nlmsg_end(skb, nlh); 2029 2030 nla_put_failure: 2031 return nlmsg_cancel(skb, nlh); 2032 } 2033 2034 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2035 { 2036 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2037 int prefix; 2038 2039 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2040 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2041 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2042 } else 2043 prefix = 0; 2044 2045 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2046 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2047 prefix, NLM_F_MULTI); 2048 } 2049 2050 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2051 { 2052 struct nlattr *tb[RTA_MAX+1]; 2053 struct rt6_info *rt; 2054 struct sk_buff *skb; 2055 struct rtmsg *rtm; 2056 struct flowi fl; 2057 int err, iif = 0; 2058 2059 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2060 if (err < 0) 2061 goto errout; 2062 2063 err = -EINVAL; 2064 memset(&fl, 0, sizeof(fl)); 2065 2066 if (tb[RTA_SRC]) { 2067 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2068 goto errout; 2069 2070 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2071 } 2072 2073 if (tb[RTA_DST]) { 2074 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2075 goto errout; 2076 2077 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2078 } 2079 2080 if (tb[RTA_IIF]) 2081 iif = nla_get_u32(tb[RTA_IIF]); 2082 2083 if (tb[RTA_OIF]) 2084 fl.oif = nla_get_u32(tb[RTA_OIF]); 2085 2086 if (iif) { 2087 struct net_device *dev; 2088 dev = __dev_get_by_index(iif); 2089 if (!dev) { 2090 err = -ENODEV; 2091 goto errout; 2092 } 2093 } 2094 2095 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2096 if (skb == NULL) { 2097 err = -ENOBUFS; 2098 goto errout; 2099 } 2100 2101 /* Reserve room for dummy headers, this skb can pass 2102 through good chunk of routing engine. 2103 */ 2104 skb->mac.raw = skb->data; 2105 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2106 2107 rt = (struct rt6_info*) ip6_route_output(NULL, &fl); 2108 skb->dst = &rt->u.dst; 2109 2110 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2111 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2112 nlh->nlmsg_seq, 0, 0); 2113 if (err < 0) { 2114 kfree_skb(skb); 2115 goto errout; 2116 } 2117 2118 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 2119 errout: 2120 return err; 2121 } 2122 2123 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2124 { 2125 struct sk_buff *skb; 2126 u32 pid = 0, seq = 0; 2127 struct nlmsghdr *nlh = NULL; 2128 int payload = sizeof(struct rtmsg) + 256; 2129 int err = -ENOBUFS; 2130 2131 if (info) { 2132 pid = info->pid; 2133 nlh = info->nlh; 2134 if (nlh) 2135 seq = nlh->nlmsg_seq; 2136 } 2137 2138 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); 2139 if (skb == NULL) 2140 goto errout; 2141 2142 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); 2143 if (err < 0) { 2144 kfree_skb(skb); 2145 goto errout; 2146 } 2147 2148 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); 2149 errout: 2150 if (err < 0) 2151 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); 2152 } 2153 2154 /* 2155 * /proc 2156 */ 2157 2158 #ifdef CONFIG_PROC_FS 2159 2160 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2161 2162 struct rt6_proc_arg 2163 { 2164 char *buffer; 2165 int offset; 2166 int length; 2167 int skip; 2168 int len; 2169 }; 2170 2171 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2172 { 2173 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 2174 int i; 2175 2176 if (arg->skip < arg->offset / RT6_INFO_LEN) { 2177 arg->skip++; 2178 return 0; 2179 } 2180 2181 if (arg->len >= arg->length) 2182 return 0; 2183 2184 for (i=0; i<16; i++) { 2185 sprintf(arg->buffer + arg->len, "%02x", 2186 rt->rt6i_dst.addr.s6_addr[i]); 2187 arg->len += 2; 2188 } 2189 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 2190 rt->rt6i_dst.plen); 2191 2192 #ifdef CONFIG_IPV6_SUBTREES 2193 for (i=0; i<16; i++) { 2194 sprintf(arg->buffer + arg->len, "%02x", 2195 rt->rt6i_src.addr.s6_addr[i]); 2196 arg->len += 2; 2197 } 2198 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 2199 rt->rt6i_src.plen); 2200 #else 2201 sprintf(arg->buffer + arg->len, 2202 "00000000000000000000000000000000 00 "); 2203 arg->len += 36; 2204 #endif 2205 2206 if (rt->rt6i_nexthop) { 2207 for (i=0; i<16; i++) { 2208 sprintf(arg->buffer + arg->len, "%02x", 2209 rt->rt6i_nexthop->primary_key[i]); 2210 arg->len += 2; 2211 } 2212 } else { 2213 sprintf(arg->buffer + arg->len, 2214 "00000000000000000000000000000000"); 2215 arg->len += 32; 2216 } 2217 arg->len += sprintf(arg->buffer + arg->len, 2218 " %08x %08x %08x %08x %8s\n", 2219 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2220 rt->u.dst.__use, rt->rt6i_flags, 2221 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2222 return 0; 2223 } 2224 2225 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 2226 { 2227 struct rt6_proc_arg arg = { 2228 .buffer = buffer, 2229 .offset = offset, 2230 .length = length, 2231 }; 2232 2233 fib6_clean_all(rt6_info_route, 0, &arg); 2234 2235 *start = buffer; 2236 if (offset) 2237 *start += offset % RT6_INFO_LEN; 2238 2239 arg.len -= offset % RT6_INFO_LEN; 2240 2241 if (arg.len > length) 2242 arg.len = length; 2243 if (arg.len < 0) 2244 arg.len = 0; 2245 2246 return arg.len; 2247 } 2248 2249 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2250 { 2251 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2252 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, 2253 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, 2254 rt6_stats.fib_rt_cache, 2255 atomic_read(&ip6_dst_ops.entries), 2256 rt6_stats.fib_discarded_routes); 2257 2258 return 0; 2259 } 2260 2261 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2262 { 2263 return single_open(file, rt6_stats_seq_show, NULL); 2264 } 2265 2266 static struct file_operations rt6_stats_seq_fops = { 2267 .owner = THIS_MODULE, 2268 .open = rt6_stats_seq_open, 2269 .read = seq_read, 2270 .llseek = seq_lseek, 2271 .release = single_release, 2272 }; 2273 #endif /* CONFIG_PROC_FS */ 2274 2275 #ifdef CONFIG_SYSCTL 2276 2277 static int flush_delay; 2278 2279 static 2280 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2281 void __user *buffer, size_t *lenp, loff_t *ppos) 2282 { 2283 if (write) { 2284 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2285 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2286 return 0; 2287 } else 2288 return -EINVAL; 2289 } 2290 2291 ctl_table ipv6_route_table[] = { 2292 { 2293 .ctl_name = NET_IPV6_ROUTE_FLUSH, 2294 .procname = "flush", 2295 .data = &flush_delay, 2296 .maxlen = sizeof(int), 2297 .mode = 0200, 2298 .proc_handler = &ipv6_sysctl_rtcache_flush 2299 }, 2300 { 2301 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2302 .procname = "gc_thresh", 2303 .data = &ip6_dst_ops.gc_thresh, 2304 .maxlen = sizeof(int), 2305 .mode = 0644, 2306 .proc_handler = &proc_dointvec, 2307 }, 2308 { 2309 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2310 .procname = "max_size", 2311 .data = &ip6_rt_max_size, 2312 .maxlen = sizeof(int), 2313 .mode = 0644, 2314 .proc_handler = &proc_dointvec, 2315 }, 2316 { 2317 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2318 .procname = "gc_min_interval", 2319 .data = &ip6_rt_gc_min_interval, 2320 .maxlen = sizeof(int), 2321 .mode = 0644, 2322 .proc_handler = &proc_dointvec_jiffies, 2323 .strategy = &sysctl_jiffies, 2324 }, 2325 { 2326 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2327 .procname = "gc_timeout", 2328 .data = &ip6_rt_gc_timeout, 2329 .maxlen = sizeof(int), 2330 .mode = 0644, 2331 .proc_handler = &proc_dointvec_jiffies, 2332 .strategy = &sysctl_jiffies, 2333 }, 2334 { 2335 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2336 .procname = "gc_interval", 2337 .data = &ip6_rt_gc_interval, 2338 .maxlen = sizeof(int), 2339 .mode = 0644, 2340 .proc_handler = &proc_dointvec_jiffies, 2341 .strategy = &sysctl_jiffies, 2342 }, 2343 { 2344 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2345 .procname = "gc_elasticity", 2346 .data = &ip6_rt_gc_elasticity, 2347 .maxlen = sizeof(int), 2348 .mode = 0644, 2349 .proc_handler = &proc_dointvec_jiffies, 2350 .strategy = &sysctl_jiffies, 2351 }, 2352 { 2353 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2354 .procname = "mtu_expires", 2355 .data = &ip6_rt_mtu_expires, 2356 .maxlen = sizeof(int), 2357 .mode = 0644, 2358 .proc_handler = &proc_dointvec_jiffies, 2359 .strategy = &sysctl_jiffies, 2360 }, 2361 { 2362 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2363 .procname = "min_adv_mss", 2364 .data = &ip6_rt_min_advmss, 2365 .maxlen = sizeof(int), 2366 .mode = 0644, 2367 .proc_handler = &proc_dointvec_jiffies, 2368 .strategy = &sysctl_jiffies, 2369 }, 2370 { 2371 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2372 .procname = "gc_min_interval_ms", 2373 .data = &ip6_rt_gc_min_interval, 2374 .maxlen = sizeof(int), 2375 .mode = 0644, 2376 .proc_handler = &proc_dointvec_ms_jiffies, 2377 .strategy = &sysctl_ms_jiffies, 2378 }, 2379 { .ctl_name = 0 } 2380 }; 2381 2382 #endif 2383 2384 void __init ip6_route_init(void) 2385 { 2386 struct proc_dir_entry *p; 2387 2388 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", 2389 sizeof(struct rt6_info), 2390 0, SLAB_HWCACHE_ALIGN, 2391 NULL, NULL); 2392 if (!ip6_dst_ops.kmem_cachep) 2393 panic("cannot create ip6_dst_cache"); 2394 2395 fib6_init(); 2396 #ifdef CONFIG_PROC_FS 2397 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2398 if (p) 2399 p->owner = THIS_MODULE; 2400 2401 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2402 #endif 2403 #ifdef CONFIG_XFRM 2404 xfrm6_init(); 2405 #endif 2406 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2407 fib6_rules_init(); 2408 #endif 2409 } 2410 2411 void ip6_route_cleanup(void) 2412 { 2413 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2414 fib6_rules_cleanup(); 2415 #endif 2416 #ifdef CONFIG_PROC_FS 2417 proc_net_remove("ipv6_route"); 2418 proc_net_remove("rt6_stats"); 2419 #endif 2420 #ifdef CONFIG_XFRM 2421 xfrm6_fini(); 2422 #endif 2423 rt6_ifdown(NULL); 2424 fib6_gc_cleanup(); 2425 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2426 } 2427