1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $ 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16 /* Changes: 17 * 18 * YOSHIFUJI Hideaki @USAGI 19 * reworked default router selection. 20 * - respect outgoing interface 21 * - select from (probably) reachable routers (i.e. 22 * routers in REACHABLE, STALE, DELAY or PROBE states). 23 * - always select the same router if it is (probably) 24 * reachable. otherwise, round-robin the list. 25 */ 26 27 #include <linux/config.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/init.h> 38 #include <linux/netlink.h> 39 #include <linux/if_arp.h> 40 41 #ifdef CONFIG_PROC_FS 42 #include <linux/proc_fs.h> 43 #include <linux/seq_file.h> 44 #endif 45 46 #include <net/snmp.h> 47 #include <net/ipv6.h> 48 #include <net/ip6_fib.h> 49 #include <net/ip6_route.h> 50 #include <net/ndisc.h> 51 #include <net/addrconf.h> 52 #include <net/tcp.h> 53 #include <linux/rtnetlink.h> 54 #include <net/dst.h> 55 #include <net/xfrm.h> 56 57 #include <asm/uaccess.h> 58 59 #ifdef CONFIG_SYSCTL 60 #include <linux/sysctl.h> 61 #endif 62 63 /* Set to 3 to get tracing. */ 64 #define RT6_DEBUG 2 65 66 #if RT6_DEBUG >= 3 67 #define RDBG(x) printk x 68 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 69 #else 70 #define RDBG(x) 71 #define RT6_TRACE(x...) do { ; } while (0) 72 #endif 73 74 75 static int ip6_rt_max_size = 4096; 76 static int ip6_rt_gc_min_interval = HZ / 2; 77 static int ip6_rt_gc_timeout = 60*HZ; 78 int ip6_rt_gc_interval = 30*HZ; 79 static int ip6_rt_gc_elasticity = 9; 80 static int ip6_rt_mtu_expires = 10*60*HZ; 81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 82 83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 85 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 86 static void ip6_dst_destroy(struct dst_entry *); 87 static void ip6_dst_ifdown(struct dst_entry *, 88 struct net_device *dev, int how); 89 static int ip6_dst_gc(void); 90 91 static int ip6_pkt_discard(struct sk_buff *skb); 92 static int ip6_pkt_discard_out(struct sk_buff *skb); 93 static void ip6_link_failure(struct sk_buff *skb); 94 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 95 96 static struct dst_ops ip6_dst_ops = { 97 .family = AF_INET6, 98 .protocol = __constant_htons(ETH_P_IPV6), 99 .gc = ip6_dst_gc, 100 .gc_thresh = 1024, 101 .check = ip6_dst_check, 102 .destroy = ip6_dst_destroy, 103 .ifdown = ip6_dst_ifdown, 104 .negative_advice = ip6_negative_advice, 105 .link_failure = ip6_link_failure, 106 .update_pmtu = ip6_rt_update_pmtu, 107 .entry_size = sizeof(struct rt6_info), 108 }; 109 110 struct rt6_info ip6_null_entry = { 111 .u = { 112 .dst = { 113 .__refcnt = ATOMIC_INIT(1), 114 .__use = 1, 115 .dev = &loopback_dev, 116 .obsolete = -1, 117 .error = -ENETUNREACH, 118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 119 .input = ip6_pkt_discard, 120 .output = ip6_pkt_discard_out, 121 .ops = &ip6_dst_ops, 122 .path = (struct dst_entry*)&ip6_null_entry, 123 } 124 }, 125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 126 .rt6i_metric = ~(u32) 0, 127 .rt6i_ref = ATOMIC_INIT(1), 128 }; 129 130 struct fib6_node ip6_routing_table = { 131 .leaf = &ip6_null_entry, 132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, 133 }; 134 135 /* Protects all the ip6 fib */ 136 137 DEFINE_RWLOCK(rt6_lock); 138 139 140 /* allocate dst with ip6_dst_ops */ 141 static __inline__ struct rt6_info *ip6_dst_alloc(void) 142 { 143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops); 144 } 145 146 static void ip6_dst_destroy(struct dst_entry *dst) 147 { 148 struct rt6_info *rt = (struct rt6_info *)dst; 149 struct inet6_dev *idev = rt->rt6i_idev; 150 151 if (idev != NULL) { 152 rt->rt6i_idev = NULL; 153 in6_dev_put(idev); 154 } 155 } 156 157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 158 int how) 159 { 160 struct rt6_info *rt = (struct rt6_info *)dst; 161 struct inet6_dev *idev = rt->rt6i_idev; 162 163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) { 164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev); 165 if (loopback_idev != NULL) { 166 rt->rt6i_idev = loopback_idev; 167 in6_dev_put(idev); 168 } 169 } 170 } 171 172 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 173 { 174 return (rt->rt6i_flags & RTF_EXPIRES && 175 time_after(jiffies, rt->rt6i_expires)); 176 } 177 178 /* 179 * Route lookup. Any rt6_lock is implied. 180 */ 181 182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, 183 int oif, 184 int strict) 185 { 186 struct rt6_info *local = NULL; 187 struct rt6_info *sprt; 188 189 if (oif) { 190 for (sprt = rt; sprt; sprt = sprt->u.next) { 191 struct net_device *dev = sprt->rt6i_dev; 192 if (dev->ifindex == oif) 193 return sprt; 194 if (dev->flags & IFF_LOOPBACK) { 195 if (sprt->rt6i_idev == NULL || 196 sprt->rt6i_idev->dev->ifindex != oif) { 197 if (strict && oif) 198 continue; 199 if (local && (!oif || 200 local->rt6i_idev->dev->ifindex == oif)) 201 continue; 202 } 203 local = sprt; 204 } 205 } 206 207 if (local) 208 return local; 209 210 if (strict) 211 return &ip6_null_entry; 212 } 213 return rt; 214 } 215 216 /* 217 * pointer to the last default router chosen. BH is disabled locally. 218 */ 219 static struct rt6_info *rt6_dflt_pointer; 220 static DEFINE_SPINLOCK(rt6_dflt_lock); 221 222 void rt6_reset_dflt_pointer(struct rt6_info *rt) 223 { 224 spin_lock_bh(&rt6_dflt_lock); 225 if (rt == NULL || rt == rt6_dflt_pointer) { 226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer); 227 rt6_dflt_pointer = NULL; 228 } 229 spin_unlock_bh(&rt6_dflt_lock); 230 } 231 232 /* Default Router Selection (RFC 2461 6.3.6) */ 233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) 234 { 235 struct rt6_info *match = NULL; 236 struct rt6_info *sprt; 237 int mpri = 0; 238 239 for (sprt = rt; sprt; sprt = sprt->u.next) { 240 struct neighbour *neigh; 241 int m = 0; 242 243 if (!oif || 244 (sprt->rt6i_dev && 245 sprt->rt6i_dev->ifindex == oif)) 246 m += 8; 247 248 if (rt6_check_expired(sprt)) 249 continue; 250 251 if (sprt == rt6_dflt_pointer) 252 m += 4; 253 254 if ((neigh = sprt->rt6i_nexthop) != NULL) { 255 read_lock_bh(&neigh->lock); 256 switch (neigh->nud_state) { 257 case NUD_REACHABLE: 258 m += 3; 259 break; 260 261 case NUD_STALE: 262 case NUD_DELAY: 263 case NUD_PROBE: 264 m += 2; 265 break; 266 267 case NUD_NOARP: 268 case NUD_PERMANENT: 269 m += 1; 270 break; 271 272 case NUD_INCOMPLETE: 273 default: 274 read_unlock_bh(&neigh->lock); 275 continue; 276 } 277 read_unlock_bh(&neigh->lock); 278 } else { 279 continue; 280 } 281 282 if (m > mpri || m >= 12) { 283 match = sprt; 284 mpri = m; 285 if (m >= 12) { 286 /* we choose the last default router if it 287 * is in (probably) reachable state. 288 * If route changed, we should do pmtu 289 * discovery. --yoshfuji 290 */ 291 break; 292 } 293 } 294 } 295 296 spin_lock(&rt6_dflt_lock); 297 if (!match) { 298 /* 299 * No default routers are known to be reachable. 300 * SHOULD round robin 301 */ 302 if (rt6_dflt_pointer) { 303 for (sprt = rt6_dflt_pointer->u.next; 304 sprt; sprt = sprt->u.next) { 305 if (sprt->u.dst.obsolete <= 0 && 306 sprt->u.dst.error == 0 && 307 !rt6_check_expired(sprt)) { 308 match = sprt; 309 break; 310 } 311 } 312 for (sprt = rt; 313 !match && sprt; 314 sprt = sprt->u.next) { 315 if (sprt->u.dst.obsolete <= 0 && 316 sprt->u.dst.error == 0 && 317 !rt6_check_expired(sprt)) { 318 match = sprt; 319 break; 320 } 321 if (sprt == rt6_dflt_pointer) 322 break; 323 } 324 } 325 } 326 327 if (match) { 328 if (rt6_dflt_pointer != match) 329 RT6_TRACE("changed default router: %p->%p\n", 330 rt6_dflt_pointer, match); 331 rt6_dflt_pointer = match; 332 } 333 spin_unlock(&rt6_dflt_lock); 334 335 if (!match) { 336 /* 337 * Last Resort: if no default routers found, 338 * use addrconf default route. 339 * We don't record this route. 340 */ 341 for (sprt = ip6_routing_table.leaf; 342 sprt; sprt = sprt->u.next) { 343 if (!rt6_check_expired(sprt) && 344 (sprt->rt6i_flags & RTF_DEFAULT) && 345 (!oif || 346 (sprt->rt6i_dev && 347 sprt->rt6i_dev->ifindex == oif))) { 348 match = sprt; 349 break; 350 } 351 } 352 if (!match) { 353 /* no default route. give up. */ 354 match = &ip6_null_entry; 355 } 356 } 357 358 return match; 359 } 360 361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, 362 int oif, int strict) 363 { 364 struct fib6_node *fn; 365 struct rt6_info *rt; 366 367 read_lock_bh(&rt6_lock); 368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr); 369 rt = rt6_device_match(fn->leaf, oif, strict); 370 dst_hold(&rt->u.dst); 371 rt->u.dst.__use++; 372 read_unlock_bh(&rt6_lock); 373 374 rt->u.dst.lastuse = jiffies; 375 if (rt->u.dst.error == 0) 376 return rt; 377 dst_release(&rt->u.dst); 378 return NULL; 379 } 380 381 /* ip6_ins_rt is called with FREE rt6_lock. 382 It takes new route entry, the addition fails by any reason the 383 route is freed. In any case, if caller does not hold it, it may 384 be destroyed. 385 */ 386 387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, 388 void *_rtattr, struct netlink_skb_parms *req) 389 { 390 int err; 391 392 write_lock_bh(&rt6_lock); 393 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); 394 write_unlock_bh(&rt6_lock); 395 396 return err; 397 } 398 399 /* No rt6_lock! If COW failed, the function returns dead route entry 400 with dst->error set to errno value. 401 */ 402 403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr, 404 struct in6_addr *saddr, struct netlink_skb_parms *req) 405 { 406 int err; 407 struct rt6_info *rt; 408 409 /* 410 * Clone the route. 411 */ 412 413 rt = ip6_rt_copy(ort); 414 415 if (rt) { 416 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 417 418 if (!(rt->rt6i_flags&RTF_GATEWAY)) 419 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 420 421 rt->rt6i_dst.plen = 128; 422 rt->rt6i_flags |= RTF_CACHE; 423 rt->u.dst.flags |= DST_HOST; 424 425 #ifdef CONFIG_IPV6_SUBTREES 426 if (rt->rt6i_src.plen && saddr) { 427 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 428 rt->rt6i_src.plen = 128; 429 } 430 #endif 431 432 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 433 434 dst_hold(&rt->u.dst); 435 436 err = ip6_ins_rt(rt, NULL, NULL, req); 437 if (err == 0) 438 return rt; 439 440 rt->u.dst.error = err; 441 442 return rt; 443 } 444 dst_hold(&ip6_null_entry.u.dst); 445 return &ip6_null_entry; 446 } 447 448 #define BACKTRACK() \ 449 if (rt == &ip6_null_entry && strict) { \ 450 while ((fn = fn->parent) != NULL) { \ 451 if (fn->fn_flags & RTN_ROOT) { \ 452 dst_hold(&rt->u.dst); \ 453 goto out; \ 454 } \ 455 if (fn->fn_flags & RTN_RTINFO) \ 456 goto restart; \ 457 } \ 458 } 459 460 461 void ip6_route_input(struct sk_buff *skb) 462 { 463 struct fib6_node *fn; 464 struct rt6_info *rt; 465 int strict; 466 int attempts = 3; 467 468 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); 469 470 relookup: 471 read_lock_bh(&rt6_lock); 472 473 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, 474 &skb->nh.ipv6h->saddr); 475 476 restart: 477 rt = fn->leaf; 478 479 if ((rt->rt6i_flags & RTF_CACHE)) { 480 rt = rt6_device_match(rt, skb->dev->ifindex, strict); 481 BACKTRACK(); 482 dst_hold(&rt->u.dst); 483 goto out; 484 } 485 486 rt = rt6_device_match(rt, skb->dev->ifindex, strict); 487 BACKTRACK(); 488 489 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 490 struct rt6_info *nrt; 491 dst_hold(&rt->u.dst); 492 read_unlock_bh(&rt6_lock); 493 494 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr, 495 &skb->nh.ipv6h->saddr, 496 &NETLINK_CB(skb)); 497 498 dst_release(&rt->u.dst); 499 rt = nrt; 500 501 if (rt->u.dst.error != -EEXIST || --attempts <= 0) 502 goto out2; 503 504 /* Race condition! In the gap, when rt6_lock was 505 released someone could insert this route. Relookup. 506 */ 507 dst_release(&rt->u.dst); 508 goto relookup; 509 } 510 dst_hold(&rt->u.dst); 511 512 out: 513 read_unlock_bh(&rt6_lock); 514 out2: 515 rt->u.dst.lastuse = jiffies; 516 rt->u.dst.__use++; 517 skb->dst = (struct dst_entry *) rt; 518 } 519 520 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) 521 { 522 struct fib6_node *fn; 523 struct rt6_info *rt; 524 int strict; 525 int attempts = 3; 526 527 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL); 528 529 relookup: 530 read_lock_bh(&rt6_lock); 531 532 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); 533 534 restart: 535 rt = fn->leaf; 536 537 if ((rt->rt6i_flags & RTF_CACHE)) { 538 rt = rt6_device_match(rt, fl->oif, strict); 539 BACKTRACK(); 540 dst_hold(&rt->u.dst); 541 goto out; 542 } 543 if (rt->rt6i_flags & RTF_DEFAULT) { 544 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF) 545 rt = rt6_best_dflt(rt, fl->oif); 546 } else { 547 rt = rt6_device_match(rt, fl->oif, strict); 548 BACKTRACK(); 549 } 550 551 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 552 struct rt6_info *nrt; 553 dst_hold(&rt->u.dst); 554 read_unlock_bh(&rt6_lock); 555 556 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL); 557 558 dst_release(&rt->u.dst); 559 rt = nrt; 560 561 if (rt->u.dst.error != -EEXIST || --attempts <= 0) 562 goto out2; 563 564 /* Race condition! In the gap, when rt6_lock was 565 released someone could insert this route. Relookup. 566 */ 567 dst_release(&rt->u.dst); 568 goto relookup; 569 } 570 dst_hold(&rt->u.dst); 571 572 out: 573 read_unlock_bh(&rt6_lock); 574 out2: 575 rt->u.dst.lastuse = jiffies; 576 rt->u.dst.__use++; 577 return &rt->u.dst; 578 } 579 580 581 /* 582 * Destination cache support functions 583 */ 584 585 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 586 { 587 struct rt6_info *rt; 588 589 rt = (struct rt6_info *) dst; 590 591 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 592 return dst; 593 594 return NULL; 595 } 596 597 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 598 { 599 struct rt6_info *rt = (struct rt6_info *) dst; 600 601 if (rt) { 602 if (rt->rt6i_flags & RTF_CACHE) 603 ip6_del_rt(rt, NULL, NULL, NULL); 604 else 605 dst_release(dst); 606 } 607 return NULL; 608 } 609 610 static void ip6_link_failure(struct sk_buff *skb) 611 { 612 struct rt6_info *rt; 613 614 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 615 616 rt = (struct rt6_info *) skb->dst; 617 if (rt) { 618 if (rt->rt6i_flags&RTF_CACHE) { 619 dst_set_expires(&rt->u.dst, 0); 620 rt->rt6i_flags |= RTF_EXPIRES; 621 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 622 rt->rt6i_node->fn_sernum = -1; 623 } 624 } 625 626 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 627 { 628 struct rt6_info *rt6 = (struct rt6_info*)dst; 629 630 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 631 rt6->rt6i_flags |= RTF_MODIFIED; 632 if (mtu < IPV6_MIN_MTU) { 633 mtu = IPV6_MIN_MTU; 634 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 635 } 636 dst->metrics[RTAX_MTU-1] = mtu; 637 } 638 } 639 640 /* Protected by rt6_lock. */ 641 static struct dst_entry *ndisc_dst_gc_list; 642 static int ipv6_get_mtu(struct net_device *dev); 643 644 static inline unsigned int ipv6_advmss(unsigned int mtu) 645 { 646 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 647 648 if (mtu < ip6_rt_min_advmss) 649 mtu = ip6_rt_min_advmss; 650 651 /* 652 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 653 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 654 * IPV6_MAXPLEN is also valid and means: "any MSS, 655 * rely only on pmtu discovery" 656 */ 657 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 658 mtu = IPV6_MAXPLEN; 659 return mtu; 660 } 661 662 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 663 struct neighbour *neigh, 664 struct in6_addr *addr, 665 int (*output)(struct sk_buff *)) 666 { 667 struct rt6_info *rt; 668 struct inet6_dev *idev = in6_dev_get(dev); 669 670 if (unlikely(idev == NULL)) 671 return NULL; 672 673 rt = ip6_dst_alloc(); 674 if (unlikely(rt == NULL)) { 675 in6_dev_put(idev); 676 goto out; 677 } 678 679 dev_hold(dev); 680 if (neigh) 681 neigh_hold(neigh); 682 else 683 neigh = ndisc_get_neigh(dev, addr); 684 685 rt->rt6i_dev = dev; 686 rt->rt6i_idev = idev; 687 rt->rt6i_nexthop = neigh; 688 atomic_set(&rt->u.dst.__refcnt, 1); 689 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 690 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 691 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 692 rt->u.dst.output = output; 693 694 #if 0 /* there's no chance to use these for ndisc */ 695 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 696 ? DST_HOST 697 : 0; 698 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 699 rt->rt6i_dst.plen = 128; 700 #endif 701 702 write_lock_bh(&rt6_lock); 703 rt->u.dst.next = ndisc_dst_gc_list; 704 ndisc_dst_gc_list = &rt->u.dst; 705 write_unlock_bh(&rt6_lock); 706 707 fib6_force_start_gc(); 708 709 out: 710 return (struct dst_entry *)rt; 711 } 712 713 int ndisc_dst_gc(int *more) 714 { 715 struct dst_entry *dst, *next, **pprev; 716 int freed; 717 718 next = NULL; 719 pprev = &ndisc_dst_gc_list; 720 freed = 0; 721 while ((dst = *pprev) != NULL) { 722 if (!atomic_read(&dst->__refcnt)) { 723 *pprev = dst->next; 724 dst_free(dst); 725 freed++; 726 } else { 727 pprev = &dst->next; 728 (*more)++; 729 } 730 } 731 732 return freed; 733 } 734 735 static int ip6_dst_gc(void) 736 { 737 static unsigned expire = 30*HZ; 738 static unsigned long last_gc; 739 unsigned long now = jiffies; 740 741 if (time_after(last_gc + ip6_rt_gc_min_interval, now) && 742 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size) 743 goto out; 744 745 expire++; 746 fib6_run_gc(expire); 747 last_gc = now; 748 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh) 749 expire = ip6_rt_gc_timeout>>1; 750 751 out: 752 expire -= expire>>ip6_rt_gc_elasticity; 753 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size); 754 } 755 756 /* Clean host part of a prefix. Not necessary in radix tree, 757 but results in cleaner routing tables. 758 759 Remove it only when all the things will work! 760 */ 761 762 static int ipv6_get_mtu(struct net_device *dev) 763 { 764 int mtu = IPV6_MIN_MTU; 765 struct inet6_dev *idev; 766 767 idev = in6_dev_get(dev); 768 if (idev) { 769 mtu = idev->cnf.mtu6; 770 in6_dev_put(idev); 771 } 772 return mtu; 773 } 774 775 int ipv6_get_hoplimit(struct net_device *dev) 776 { 777 int hoplimit = ipv6_devconf.hop_limit; 778 struct inet6_dev *idev; 779 780 idev = in6_dev_get(dev); 781 if (idev) { 782 hoplimit = idev->cnf.hop_limit; 783 in6_dev_put(idev); 784 } 785 return hoplimit; 786 } 787 788 /* 789 * 790 */ 791 792 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 793 void *_rtattr, struct netlink_skb_parms *req) 794 { 795 int err; 796 struct rtmsg *r; 797 struct rtattr **rta; 798 struct rt6_info *rt = NULL; 799 struct net_device *dev = NULL; 800 struct inet6_dev *idev = NULL; 801 int addr_type; 802 803 rta = (struct rtattr **) _rtattr; 804 805 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) 806 return -EINVAL; 807 #ifndef CONFIG_IPV6_SUBTREES 808 if (rtmsg->rtmsg_src_len) 809 return -EINVAL; 810 #endif 811 if (rtmsg->rtmsg_ifindex) { 812 err = -ENODEV; 813 dev = dev_get_by_index(rtmsg->rtmsg_ifindex); 814 if (!dev) 815 goto out; 816 idev = in6_dev_get(dev); 817 if (!idev) 818 goto out; 819 } 820 821 if (rtmsg->rtmsg_metric == 0) 822 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; 823 824 rt = ip6_dst_alloc(); 825 826 if (rt == NULL) { 827 err = -ENOMEM; 828 goto out; 829 } 830 831 rt->u.dst.obsolete = -1; 832 rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info); 833 if (nlh && (r = NLMSG_DATA(nlh))) { 834 rt->rt6i_protocol = r->rtm_protocol; 835 } else { 836 rt->rt6i_protocol = RTPROT_BOOT; 837 } 838 839 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); 840 841 if (addr_type & IPV6_ADDR_MULTICAST) 842 rt->u.dst.input = ip6_mc_input; 843 else 844 rt->u.dst.input = ip6_forward; 845 846 rt->u.dst.output = ip6_output; 847 848 ipv6_addr_prefix(&rt->rt6i_dst.addr, 849 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); 850 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; 851 if (rt->rt6i_dst.plen == 128) 852 rt->u.dst.flags = DST_HOST; 853 854 #ifdef CONFIG_IPV6_SUBTREES 855 ipv6_addr_prefix(&rt->rt6i_src.addr, 856 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 857 rt->rt6i_src.plen = rtmsg->rtmsg_src_len; 858 #endif 859 860 rt->rt6i_metric = rtmsg->rtmsg_metric; 861 862 /* We cannot add true routes via loopback here, 863 they would result in kernel looping; promote them to reject routes 864 */ 865 if ((rtmsg->rtmsg_flags&RTF_REJECT) || 866 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 867 /* hold loopback dev/idev if we haven't done so. */ 868 if (dev != &loopback_dev) { 869 if (dev) { 870 dev_put(dev); 871 in6_dev_put(idev); 872 } 873 dev = &loopback_dev; 874 dev_hold(dev); 875 idev = in6_dev_get(dev); 876 if (!idev) { 877 err = -ENODEV; 878 goto out; 879 } 880 } 881 rt->u.dst.output = ip6_pkt_discard_out; 882 rt->u.dst.input = ip6_pkt_discard; 883 rt->u.dst.error = -ENETUNREACH; 884 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 885 goto install_route; 886 } 887 888 if (rtmsg->rtmsg_flags & RTF_GATEWAY) { 889 struct in6_addr *gw_addr; 890 int gwa_type; 891 892 gw_addr = &rtmsg->rtmsg_gateway; 893 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); 894 gwa_type = ipv6_addr_type(gw_addr); 895 896 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 897 struct rt6_info *grt; 898 899 /* IPv6 strictly inhibits using not link-local 900 addresses as nexthop address. 901 Otherwise, router will not able to send redirects. 902 It is very good, but in some (rare!) circumstances 903 (SIT, PtP, NBMA NOARP links) it is handy to allow 904 some exceptions. --ANK 905 */ 906 err = -EINVAL; 907 if (!(gwa_type&IPV6_ADDR_UNICAST)) 908 goto out; 909 910 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); 911 912 err = -EHOSTUNREACH; 913 if (grt == NULL) 914 goto out; 915 if (dev) { 916 if (dev != grt->rt6i_dev) { 917 dst_release(&grt->u.dst); 918 goto out; 919 } 920 } else { 921 dev = grt->rt6i_dev; 922 idev = grt->rt6i_idev; 923 dev_hold(dev); 924 in6_dev_hold(grt->rt6i_idev); 925 } 926 if (!(grt->rt6i_flags&RTF_GATEWAY)) 927 err = 0; 928 dst_release(&grt->u.dst); 929 930 if (err) 931 goto out; 932 } 933 err = -EINVAL; 934 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 935 goto out; 936 } 937 938 err = -ENODEV; 939 if (dev == NULL) 940 goto out; 941 942 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { 943 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 944 if (IS_ERR(rt->rt6i_nexthop)) { 945 err = PTR_ERR(rt->rt6i_nexthop); 946 rt->rt6i_nexthop = NULL; 947 goto out; 948 } 949 } 950 951 rt->rt6i_flags = rtmsg->rtmsg_flags; 952 953 install_route: 954 if (rta && rta[RTA_METRICS-1]) { 955 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); 956 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); 957 958 while (RTA_OK(attr, attrlen)) { 959 unsigned flavor = attr->rta_type; 960 if (flavor) { 961 if (flavor > RTAX_MAX) { 962 err = -EINVAL; 963 goto out; 964 } 965 rt->u.dst.metrics[flavor-1] = 966 *(u32 *)RTA_DATA(attr); 967 } 968 attr = RTA_NEXT(attr, attrlen); 969 } 970 } 971 972 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) 973 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 974 if (!rt->u.dst.metrics[RTAX_MTU-1]) 975 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 976 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 977 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 978 rt->u.dst.dev = dev; 979 rt->rt6i_idev = idev; 980 return ip6_ins_rt(rt, nlh, _rtattr, req); 981 982 out: 983 if (dev) 984 dev_put(dev); 985 if (idev) 986 in6_dev_put(idev); 987 if (rt) 988 dst_free((struct dst_entry *) rt); 989 return err; 990 } 991 992 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 993 { 994 int err; 995 996 write_lock_bh(&rt6_lock); 997 998 rt6_reset_dflt_pointer(NULL); 999 1000 err = fib6_del(rt, nlh, _rtattr, req); 1001 dst_release(&rt->u.dst); 1002 1003 write_unlock_bh(&rt6_lock); 1004 1005 return err; 1006 } 1007 1008 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) 1009 { 1010 struct fib6_node *fn; 1011 struct rt6_info *rt; 1012 int err = -ESRCH; 1013 1014 read_lock_bh(&rt6_lock); 1015 1016 fn = fib6_locate(&ip6_routing_table, 1017 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, 1018 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); 1019 1020 if (fn) { 1021 for (rt = fn->leaf; rt; rt = rt->u.next) { 1022 if (rtmsg->rtmsg_ifindex && 1023 (rt->rt6i_dev == NULL || 1024 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) 1025 continue; 1026 if (rtmsg->rtmsg_flags&RTF_GATEWAY && 1027 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) 1028 continue; 1029 if (rtmsg->rtmsg_metric && 1030 rtmsg->rtmsg_metric != rt->rt6i_metric) 1031 continue; 1032 dst_hold(&rt->u.dst); 1033 read_unlock_bh(&rt6_lock); 1034 1035 return ip6_del_rt(rt, nlh, _rtattr, req); 1036 } 1037 } 1038 read_unlock_bh(&rt6_lock); 1039 1040 return err; 1041 } 1042 1043 /* 1044 * Handle redirects 1045 */ 1046 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, 1047 struct neighbour *neigh, u8 *lladdr, int on_link) 1048 { 1049 struct rt6_info *rt, *nrt; 1050 1051 /* Locate old route to this destination. */ 1052 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1); 1053 1054 if (rt == NULL) 1055 return; 1056 1057 if (neigh->dev != rt->rt6i_dev) 1058 goto out; 1059 1060 /* 1061 * Current route is on-link; redirect is always invalid. 1062 * 1063 * Seems, previous statement is not true. It could 1064 * be node, which looks for us as on-link (f.e. proxy ndisc) 1065 * But then router serving it might decide, that we should 1066 * know truth 8)8) --ANK (980726). 1067 */ 1068 if (!(rt->rt6i_flags&RTF_GATEWAY)) 1069 goto out; 1070 1071 /* 1072 * RFC 2461 specifies that redirects should only be 1073 * accepted if they come from the nexthop to the target. 1074 * Due to the way default routers are chosen, this notion 1075 * is a bit fuzzy and one might need to check all default 1076 * routers. 1077 */ 1078 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) { 1079 if (rt->rt6i_flags & RTF_DEFAULT) { 1080 struct rt6_info *rt1; 1081 1082 read_lock(&rt6_lock); 1083 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) { 1084 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) { 1085 dst_hold(&rt1->u.dst); 1086 dst_release(&rt->u.dst); 1087 read_unlock(&rt6_lock); 1088 rt = rt1; 1089 goto source_ok; 1090 } 1091 } 1092 read_unlock(&rt6_lock); 1093 } 1094 if (net_ratelimit()) 1095 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1096 "for redirect target\n"); 1097 goto out; 1098 } 1099 1100 source_ok: 1101 1102 /* 1103 * We have finally decided to accept it. 1104 */ 1105 1106 neigh_update(neigh, lladdr, NUD_STALE, 1107 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1108 NEIGH_UPDATE_F_OVERRIDE| 1109 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1110 NEIGH_UPDATE_F_ISROUTER)) 1111 ); 1112 1113 /* 1114 * Redirect received -> path was valid. 1115 * Look, redirects are sent only in response to data packets, 1116 * so that this nexthop apparently is reachable. --ANK 1117 */ 1118 dst_confirm(&rt->u.dst); 1119 1120 /* Duplicate redirect: silently ignore. */ 1121 if (neigh == rt->u.dst.neighbour) 1122 goto out; 1123 1124 nrt = ip6_rt_copy(rt); 1125 if (nrt == NULL) 1126 goto out; 1127 1128 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1129 if (on_link) 1130 nrt->rt6i_flags &= ~RTF_GATEWAY; 1131 1132 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1133 nrt->rt6i_dst.plen = 128; 1134 nrt->u.dst.flags |= DST_HOST; 1135 1136 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1137 nrt->rt6i_nexthop = neigh_clone(neigh); 1138 /* Reset pmtu, it may be better */ 1139 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1140 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1141 1142 if (ip6_ins_rt(nrt, NULL, NULL, NULL)) 1143 goto out; 1144 1145 if (rt->rt6i_flags&RTF_CACHE) { 1146 ip6_del_rt(rt, NULL, NULL, NULL); 1147 return; 1148 } 1149 1150 out: 1151 dst_release(&rt->u.dst); 1152 return; 1153 } 1154 1155 /* 1156 * Handle ICMP "packet too big" messages 1157 * i.e. Path MTU discovery 1158 */ 1159 1160 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1161 struct net_device *dev, u32 pmtu) 1162 { 1163 struct rt6_info *rt, *nrt; 1164 int allfrag = 0; 1165 1166 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0); 1167 if (rt == NULL) 1168 return; 1169 1170 if (pmtu >= dst_mtu(&rt->u.dst)) 1171 goto out; 1172 1173 if (pmtu < IPV6_MIN_MTU) { 1174 /* 1175 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1176 * MTU (1280) and a fragment header should always be included 1177 * after a node receiving Too Big message reporting PMTU is 1178 * less than the IPv6 Minimum Link MTU. 1179 */ 1180 pmtu = IPV6_MIN_MTU; 1181 allfrag = 1; 1182 } 1183 1184 /* New mtu received -> path was valid. 1185 They are sent only in response to data packets, 1186 so that this nexthop apparently is reachable. --ANK 1187 */ 1188 dst_confirm(&rt->u.dst); 1189 1190 /* Host route. If it is static, it would be better 1191 not to override it, but add new one, so that 1192 when cache entry will expire old pmtu 1193 would return automatically. 1194 */ 1195 if (rt->rt6i_flags & RTF_CACHE) { 1196 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1197 if (allfrag) 1198 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1199 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires); 1200 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1201 goto out; 1202 } 1203 1204 /* Network route. 1205 Two cases are possible: 1206 1. It is connected route. Action: COW 1207 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1208 */ 1209 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) { 1210 nrt = rt6_cow(rt, daddr, saddr, NULL); 1211 if (!nrt->u.dst.error) { 1212 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1213 if (allfrag) 1214 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1215 /* According to RFC 1981, detecting PMTU increase shouldn't be 1216 happened within 5 mins, the recommended timer is 10 mins. 1217 Here this route expiration time is set to ip6_rt_mtu_expires 1218 which is 10 mins. After 10 mins the decreased pmtu is expired 1219 and detecting PMTU increase will be automatically happened. 1220 */ 1221 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1222 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1223 } 1224 dst_release(&nrt->u.dst); 1225 } else { 1226 nrt = ip6_rt_copy(rt); 1227 if (nrt == NULL) 1228 goto out; 1229 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr); 1230 nrt->rt6i_dst.plen = 128; 1231 nrt->u.dst.flags |= DST_HOST; 1232 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop); 1233 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); 1234 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES; 1235 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1236 if (allfrag) 1237 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1238 ip6_ins_rt(nrt, NULL, NULL, NULL); 1239 } 1240 1241 out: 1242 dst_release(&rt->u.dst); 1243 } 1244 1245 /* 1246 * Misc support functions 1247 */ 1248 1249 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1250 { 1251 struct rt6_info *rt = ip6_dst_alloc(); 1252 1253 if (rt) { 1254 rt->u.dst.input = ort->u.dst.input; 1255 rt->u.dst.output = ort->u.dst.output; 1256 1257 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1258 rt->u.dst.dev = ort->u.dst.dev; 1259 if (rt->u.dst.dev) 1260 dev_hold(rt->u.dst.dev); 1261 rt->rt6i_idev = ort->rt6i_idev; 1262 if (rt->rt6i_idev) 1263 in6_dev_hold(rt->rt6i_idev); 1264 rt->u.dst.lastuse = jiffies; 1265 rt->rt6i_expires = 0; 1266 1267 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1268 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1269 rt->rt6i_metric = 0; 1270 1271 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1272 #ifdef CONFIG_IPV6_SUBTREES 1273 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1274 #endif 1275 } 1276 return rt; 1277 } 1278 1279 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1280 { 1281 struct rt6_info *rt; 1282 struct fib6_node *fn; 1283 1284 fn = &ip6_routing_table; 1285 1286 write_lock_bh(&rt6_lock); 1287 for (rt = fn->leaf; rt; rt=rt->u.next) { 1288 if (dev == rt->rt6i_dev && 1289 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1290 break; 1291 } 1292 if (rt) 1293 dst_hold(&rt->u.dst); 1294 write_unlock_bh(&rt6_lock); 1295 return rt; 1296 } 1297 1298 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1299 struct net_device *dev) 1300 { 1301 struct in6_rtmsg rtmsg; 1302 1303 memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); 1304 rtmsg.rtmsg_type = RTMSG_NEWROUTE; 1305 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); 1306 rtmsg.rtmsg_metric = 1024; 1307 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES; 1308 1309 rtmsg.rtmsg_ifindex = dev->ifindex; 1310 1311 ip6_route_add(&rtmsg, NULL, NULL, NULL); 1312 return rt6_get_dflt_router(gwaddr, dev); 1313 } 1314 1315 void rt6_purge_dflt_routers(void) 1316 { 1317 struct rt6_info *rt; 1318 1319 restart: 1320 read_lock_bh(&rt6_lock); 1321 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { 1322 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1323 dst_hold(&rt->u.dst); 1324 1325 rt6_reset_dflt_pointer(NULL); 1326 1327 read_unlock_bh(&rt6_lock); 1328 1329 ip6_del_rt(rt, NULL, NULL, NULL); 1330 1331 goto restart; 1332 } 1333 } 1334 read_unlock_bh(&rt6_lock); 1335 } 1336 1337 int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1338 { 1339 struct in6_rtmsg rtmsg; 1340 int err; 1341 1342 switch(cmd) { 1343 case SIOCADDRT: /* Add a route */ 1344 case SIOCDELRT: /* Delete a route */ 1345 if (!capable(CAP_NET_ADMIN)) 1346 return -EPERM; 1347 err = copy_from_user(&rtmsg, arg, 1348 sizeof(struct in6_rtmsg)); 1349 if (err) 1350 return -EFAULT; 1351 1352 rtnl_lock(); 1353 switch (cmd) { 1354 case SIOCADDRT: 1355 err = ip6_route_add(&rtmsg, NULL, NULL, NULL); 1356 break; 1357 case SIOCDELRT: 1358 err = ip6_route_del(&rtmsg, NULL, NULL, NULL); 1359 break; 1360 default: 1361 err = -EINVAL; 1362 } 1363 rtnl_unlock(); 1364 1365 return err; 1366 }; 1367 1368 return -EINVAL; 1369 } 1370 1371 /* 1372 * Drop the packet on the floor 1373 */ 1374 1375 static int ip6_pkt_discard(struct sk_buff *skb) 1376 { 1377 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); 1378 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); 1379 kfree_skb(skb); 1380 return 0; 1381 } 1382 1383 static int ip6_pkt_discard_out(struct sk_buff *skb) 1384 { 1385 skb->dev = skb->dst->dev; 1386 return ip6_pkt_discard(skb); 1387 } 1388 1389 /* 1390 * Allocate a dst for local (unicast / anycast) address. 1391 */ 1392 1393 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1394 const struct in6_addr *addr, 1395 int anycast) 1396 { 1397 struct rt6_info *rt = ip6_dst_alloc(); 1398 1399 if (rt == NULL) 1400 return ERR_PTR(-ENOMEM); 1401 1402 dev_hold(&loopback_dev); 1403 in6_dev_hold(idev); 1404 1405 rt->u.dst.flags = DST_HOST; 1406 rt->u.dst.input = ip6_input; 1407 rt->u.dst.output = ip6_output; 1408 rt->rt6i_dev = &loopback_dev; 1409 rt->rt6i_idev = idev; 1410 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1411 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1412 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1413 rt->u.dst.obsolete = -1; 1414 1415 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1416 if (!anycast) 1417 rt->rt6i_flags |= RTF_LOCAL; 1418 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1419 if (rt->rt6i_nexthop == NULL) { 1420 dst_free((struct dst_entry *) rt); 1421 return ERR_PTR(-ENOMEM); 1422 } 1423 1424 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1425 rt->rt6i_dst.plen = 128; 1426 1427 atomic_set(&rt->u.dst.__refcnt, 1); 1428 1429 return rt; 1430 } 1431 1432 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1433 { 1434 if (((void*)rt->rt6i_dev == arg || arg == NULL) && 1435 rt != &ip6_null_entry) { 1436 RT6_TRACE("deleted by ifdown %p\n", rt); 1437 return -1; 1438 } 1439 return 0; 1440 } 1441 1442 void rt6_ifdown(struct net_device *dev) 1443 { 1444 write_lock_bh(&rt6_lock); 1445 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); 1446 write_unlock_bh(&rt6_lock); 1447 } 1448 1449 struct rt6_mtu_change_arg 1450 { 1451 struct net_device *dev; 1452 unsigned mtu; 1453 }; 1454 1455 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 1456 { 1457 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1458 struct inet6_dev *idev; 1459 1460 /* In IPv6 pmtu discovery is not optional, 1461 so that RTAX_MTU lock cannot disable it. 1462 We still use this lock to block changes 1463 caused by addrconf/ndisc. 1464 */ 1465 1466 idev = __in6_dev_get(arg->dev); 1467 if (idev == NULL) 1468 return 0; 1469 1470 /* For administrative MTU increase, there is no way to discover 1471 IPv6 PMTU increase, so PMTU increase should be updated here. 1472 Since RFC 1981 doesn't include administrative MTU increase 1473 update PMTU increase is a MUST. (i.e. jumbo frame) 1474 */ 1475 /* 1476 If new MTU is less than route PMTU, this new MTU will be the 1477 lowest MTU in the path, update the route PMTU to reflect PMTU 1478 decreases; if new MTU is greater than route PMTU, and the 1479 old MTU is the lowest MTU in the path, update the route PMTU 1480 to reflect the increase. In this case if the other nodes' MTU 1481 also have the lowest MTU, TOO BIG MESSAGE will be lead to 1482 PMTU discouvery. 1483 */ 1484 if (rt->rt6i_dev == arg->dev && 1485 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 1486 (dst_mtu(&rt->u.dst) > arg->mtu || 1487 (dst_mtu(&rt->u.dst) < arg->mtu && 1488 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) 1489 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1490 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1491 return 0; 1492 } 1493 1494 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 1495 { 1496 struct rt6_mtu_change_arg arg; 1497 1498 arg.dev = dev; 1499 arg.mtu = mtu; 1500 read_lock_bh(&rt6_lock); 1501 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); 1502 read_unlock_bh(&rt6_lock); 1503 } 1504 1505 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, 1506 struct in6_rtmsg *rtmsg) 1507 { 1508 memset(rtmsg, 0, sizeof(*rtmsg)); 1509 1510 rtmsg->rtmsg_dst_len = r->rtm_dst_len; 1511 rtmsg->rtmsg_src_len = r->rtm_src_len; 1512 rtmsg->rtmsg_flags = RTF_UP; 1513 if (r->rtm_type == RTN_UNREACHABLE) 1514 rtmsg->rtmsg_flags |= RTF_REJECT; 1515 1516 if (rta[RTA_GATEWAY-1]) { 1517 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) 1518 return -EINVAL; 1519 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); 1520 rtmsg->rtmsg_flags |= RTF_GATEWAY; 1521 } 1522 if (rta[RTA_DST-1]) { 1523 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) 1524 return -EINVAL; 1525 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); 1526 } 1527 if (rta[RTA_SRC-1]) { 1528 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) 1529 return -EINVAL; 1530 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); 1531 } 1532 if (rta[RTA_OIF-1]) { 1533 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) 1534 return -EINVAL; 1535 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1536 } 1537 if (rta[RTA_PRIORITY-1]) { 1538 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) 1539 return -EINVAL; 1540 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); 1541 } 1542 return 0; 1543 } 1544 1545 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1546 { 1547 struct rtmsg *r = NLMSG_DATA(nlh); 1548 struct in6_rtmsg rtmsg; 1549 1550 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1551 return -EINVAL; 1552 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1553 } 1554 1555 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 1556 { 1557 struct rtmsg *r = NLMSG_DATA(nlh); 1558 struct in6_rtmsg rtmsg; 1559 1560 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) 1561 return -EINVAL; 1562 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); 1563 } 1564 1565 struct rt6_rtnl_dump_arg 1566 { 1567 struct sk_buff *skb; 1568 struct netlink_callback *cb; 1569 }; 1570 1571 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, 1572 struct in6_addr *dst, struct in6_addr *src, 1573 int iif, int type, u32 pid, u32 seq, 1574 int prefix, unsigned int flags) 1575 { 1576 struct rtmsg *rtm; 1577 struct nlmsghdr *nlh; 1578 unsigned char *b = skb->tail; 1579 struct rta_cacheinfo ci; 1580 1581 if (prefix) { /* user wants prefix routes only */ 1582 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 1583 /* success since this is not a prefix route */ 1584 return 1; 1585 } 1586 } 1587 1588 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); 1589 rtm = NLMSG_DATA(nlh); 1590 rtm->rtm_family = AF_INET6; 1591 rtm->rtm_dst_len = rt->rt6i_dst.plen; 1592 rtm->rtm_src_len = rt->rt6i_src.plen; 1593 rtm->rtm_tos = 0; 1594 rtm->rtm_table = RT_TABLE_MAIN; 1595 if (rt->rt6i_flags&RTF_REJECT) 1596 rtm->rtm_type = RTN_UNREACHABLE; 1597 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 1598 rtm->rtm_type = RTN_LOCAL; 1599 else 1600 rtm->rtm_type = RTN_UNICAST; 1601 rtm->rtm_flags = 0; 1602 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 1603 rtm->rtm_protocol = rt->rt6i_protocol; 1604 if (rt->rt6i_flags&RTF_DYNAMIC) 1605 rtm->rtm_protocol = RTPROT_REDIRECT; 1606 else if (rt->rt6i_flags & RTF_ADDRCONF) 1607 rtm->rtm_protocol = RTPROT_KERNEL; 1608 else if (rt->rt6i_flags&RTF_DEFAULT) 1609 rtm->rtm_protocol = RTPROT_RA; 1610 1611 if (rt->rt6i_flags&RTF_CACHE) 1612 rtm->rtm_flags |= RTM_F_CLONED; 1613 1614 if (dst) { 1615 RTA_PUT(skb, RTA_DST, 16, dst); 1616 rtm->rtm_dst_len = 128; 1617 } else if (rtm->rtm_dst_len) 1618 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 1619 #ifdef CONFIG_IPV6_SUBTREES 1620 if (src) { 1621 RTA_PUT(skb, RTA_SRC, 16, src); 1622 rtm->rtm_src_len = 128; 1623 } else if (rtm->rtm_src_len) 1624 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 1625 #endif 1626 if (iif) 1627 RTA_PUT(skb, RTA_IIF, 4, &iif); 1628 else if (dst) { 1629 struct in6_addr saddr_buf; 1630 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) 1631 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 1632 } 1633 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 1634 goto rtattr_failure; 1635 if (rt->u.dst.neighbour) 1636 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 1637 if (rt->u.dst.dev) 1638 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); 1639 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); 1640 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 1641 if (rt->rt6i_expires) 1642 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); 1643 else 1644 ci.rta_expires = 0; 1645 ci.rta_used = rt->u.dst.__use; 1646 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 1647 ci.rta_error = rt->u.dst.error; 1648 ci.rta_id = 0; 1649 ci.rta_ts = 0; 1650 ci.rta_tsage = 0; 1651 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 1652 nlh->nlmsg_len = skb->tail - b; 1653 return skb->len; 1654 1655 nlmsg_failure: 1656 rtattr_failure: 1657 skb_trim(skb, b - skb->data); 1658 return -1; 1659 } 1660 1661 static int rt6_dump_route(struct rt6_info *rt, void *p_arg) 1662 { 1663 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 1664 int prefix; 1665 1666 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { 1667 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); 1668 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 1669 } else 1670 prefix = 0; 1671 1672 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 1673 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 1674 prefix, NLM_F_MULTI); 1675 } 1676 1677 static int fib6_dump_node(struct fib6_walker_t *w) 1678 { 1679 int res; 1680 struct rt6_info *rt; 1681 1682 for (rt = w->leaf; rt; rt = rt->u.next) { 1683 res = rt6_dump_route(rt, w->args); 1684 if (res < 0) { 1685 /* Frame is full, suspend walking */ 1686 w->leaf = rt; 1687 return 1; 1688 } 1689 BUG_TRAP(res!=0); 1690 } 1691 w->leaf = NULL; 1692 return 0; 1693 } 1694 1695 static void fib6_dump_end(struct netlink_callback *cb) 1696 { 1697 struct fib6_walker_t *w = (void*)cb->args[0]; 1698 1699 if (w) { 1700 cb->args[0] = 0; 1701 fib6_walker_unlink(w); 1702 kfree(w); 1703 } 1704 if (cb->args[1]) { 1705 cb->done = (void*)cb->args[1]; 1706 cb->args[1] = 0; 1707 } 1708 } 1709 1710 static int fib6_dump_done(struct netlink_callback *cb) 1711 { 1712 fib6_dump_end(cb); 1713 return cb->done(cb); 1714 } 1715 1716 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 1717 { 1718 struct rt6_rtnl_dump_arg arg; 1719 struct fib6_walker_t *w; 1720 int res; 1721 1722 arg.skb = skb; 1723 arg.cb = cb; 1724 1725 w = (void*)cb->args[0]; 1726 if (w == NULL) { 1727 /* New dump: 1728 * 1729 * 1. hook callback destructor. 1730 */ 1731 cb->args[1] = (long)cb->done; 1732 cb->done = fib6_dump_done; 1733 1734 /* 1735 * 2. allocate and initialize walker. 1736 */ 1737 w = kmalloc(sizeof(*w), GFP_ATOMIC); 1738 if (w == NULL) 1739 return -ENOMEM; 1740 RT6_TRACE("dump<%p", w); 1741 memset(w, 0, sizeof(*w)); 1742 w->root = &ip6_routing_table; 1743 w->func = fib6_dump_node; 1744 w->args = &arg; 1745 cb->args[0] = (long)w; 1746 read_lock_bh(&rt6_lock); 1747 res = fib6_walk(w); 1748 read_unlock_bh(&rt6_lock); 1749 } else { 1750 w->args = &arg; 1751 read_lock_bh(&rt6_lock); 1752 res = fib6_walk_continue(w); 1753 read_unlock_bh(&rt6_lock); 1754 } 1755 #if RT6_DEBUG >= 3 1756 if (res <= 0 && skb->len == 0) 1757 RT6_TRACE("%p>dump end\n", w); 1758 #endif 1759 res = res < 0 ? res : skb->len; 1760 /* res < 0 is an error. (really, impossible) 1761 res == 0 means that dump is complete, but skb still can contain data. 1762 res > 0 dump is not complete, but frame is full. 1763 */ 1764 /* Destroy walker, if dump of this table is complete. */ 1765 if (res <= 0) 1766 fib6_dump_end(cb); 1767 return res; 1768 } 1769 1770 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 1771 { 1772 struct rtattr **rta = arg; 1773 int iif = 0; 1774 int err = -ENOBUFS; 1775 struct sk_buff *skb; 1776 struct flowi fl; 1777 struct rt6_info *rt; 1778 1779 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1780 if (skb == NULL) 1781 goto out; 1782 1783 /* Reserve room for dummy headers, this skb can pass 1784 through good chunk of routing engine. 1785 */ 1786 skb->mac.raw = skb->data; 1787 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 1788 1789 memset(&fl, 0, sizeof(fl)); 1790 if (rta[RTA_SRC-1]) 1791 ipv6_addr_copy(&fl.fl6_src, 1792 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); 1793 if (rta[RTA_DST-1]) 1794 ipv6_addr_copy(&fl.fl6_dst, 1795 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); 1796 1797 if (rta[RTA_IIF-1]) 1798 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1799 1800 if (iif) { 1801 struct net_device *dev; 1802 dev = __dev_get_by_index(iif); 1803 if (!dev) { 1804 err = -ENODEV; 1805 goto out_free; 1806 } 1807 } 1808 1809 fl.oif = 0; 1810 if (rta[RTA_OIF-1]) 1811 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); 1812 1813 rt = (struct rt6_info*)ip6_route_output(NULL, &fl); 1814 1815 skb->dst = &rt->u.dst; 1816 1817 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1818 err = rt6_fill_node(skb, rt, 1819 &fl.fl6_dst, &fl.fl6_src, 1820 iif, 1821 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 1822 nlh->nlmsg_seq, 0, 0); 1823 if (err < 0) { 1824 err = -EMSGSIZE; 1825 goto out_free; 1826 } 1827 1828 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); 1829 if (err > 0) 1830 err = 0; 1831 out: 1832 return err; 1833 out_free: 1834 kfree_skb(skb); 1835 goto out; 1836 } 1837 1838 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 1839 struct netlink_skb_parms *req) 1840 { 1841 struct sk_buff *skb; 1842 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); 1843 u32 pid = current->pid; 1844 u32 seq = 0; 1845 1846 if (req) 1847 pid = req->pid; 1848 if (nlh) 1849 seq = nlh->nlmsg_seq; 1850 1851 skb = alloc_skb(size, gfp_any()); 1852 if (!skb) { 1853 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); 1854 return; 1855 } 1856 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { 1857 kfree_skb(skb); 1858 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); 1859 return; 1860 } 1861 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; 1862 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); 1863 } 1864 1865 /* 1866 * /proc 1867 */ 1868 1869 #ifdef CONFIG_PROC_FS 1870 1871 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 1872 1873 struct rt6_proc_arg 1874 { 1875 char *buffer; 1876 int offset; 1877 int length; 1878 int skip; 1879 int len; 1880 }; 1881 1882 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 1883 { 1884 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; 1885 int i; 1886 1887 if (arg->skip < arg->offset / RT6_INFO_LEN) { 1888 arg->skip++; 1889 return 0; 1890 } 1891 1892 if (arg->len >= arg->length) 1893 return 0; 1894 1895 for (i=0; i<16; i++) { 1896 sprintf(arg->buffer + arg->len, "%02x", 1897 rt->rt6i_dst.addr.s6_addr[i]); 1898 arg->len += 2; 1899 } 1900 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 1901 rt->rt6i_dst.plen); 1902 1903 #ifdef CONFIG_IPV6_SUBTREES 1904 for (i=0; i<16; i++) { 1905 sprintf(arg->buffer + arg->len, "%02x", 1906 rt->rt6i_src.addr.s6_addr[i]); 1907 arg->len += 2; 1908 } 1909 arg->len += sprintf(arg->buffer + arg->len, " %02x ", 1910 rt->rt6i_src.plen); 1911 #else 1912 sprintf(arg->buffer + arg->len, 1913 "00000000000000000000000000000000 00 "); 1914 arg->len += 36; 1915 #endif 1916 1917 if (rt->rt6i_nexthop) { 1918 for (i=0; i<16; i++) { 1919 sprintf(arg->buffer + arg->len, "%02x", 1920 rt->rt6i_nexthop->primary_key[i]); 1921 arg->len += 2; 1922 } 1923 } else { 1924 sprintf(arg->buffer + arg->len, 1925 "00000000000000000000000000000000"); 1926 arg->len += 32; 1927 } 1928 arg->len += sprintf(arg->buffer + arg->len, 1929 " %08x %08x %08x %08x %8s\n", 1930 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 1931 rt->u.dst.__use, rt->rt6i_flags, 1932 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 1933 return 0; 1934 } 1935 1936 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) 1937 { 1938 struct rt6_proc_arg arg; 1939 arg.buffer = buffer; 1940 arg.offset = offset; 1941 arg.length = length; 1942 arg.skip = 0; 1943 arg.len = 0; 1944 1945 read_lock_bh(&rt6_lock); 1946 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); 1947 read_unlock_bh(&rt6_lock); 1948 1949 *start = buffer; 1950 if (offset) 1951 *start += offset % RT6_INFO_LEN; 1952 1953 arg.len -= offset % RT6_INFO_LEN; 1954 1955 if (arg.len > length) 1956 arg.len = length; 1957 if (arg.len < 0) 1958 arg.len = 0; 1959 1960 return arg.len; 1961 } 1962 1963 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 1964 { 1965 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 1966 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes, 1967 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries, 1968 rt6_stats.fib_rt_cache, 1969 atomic_read(&ip6_dst_ops.entries), 1970 rt6_stats.fib_discarded_routes); 1971 1972 return 0; 1973 } 1974 1975 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 1976 { 1977 return single_open(file, rt6_stats_seq_show, NULL); 1978 } 1979 1980 static struct file_operations rt6_stats_seq_fops = { 1981 .owner = THIS_MODULE, 1982 .open = rt6_stats_seq_open, 1983 .read = seq_read, 1984 .llseek = seq_lseek, 1985 .release = single_release, 1986 }; 1987 #endif /* CONFIG_PROC_FS */ 1988 1989 #ifdef CONFIG_SYSCTL 1990 1991 static int flush_delay; 1992 1993 static 1994 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 1995 void __user *buffer, size_t *lenp, loff_t *ppos) 1996 { 1997 if (write) { 1998 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 1999 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay); 2000 return 0; 2001 } else 2002 return -EINVAL; 2003 } 2004 2005 ctl_table ipv6_route_table[] = { 2006 { 2007 .ctl_name = NET_IPV6_ROUTE_FLUSH, 2008 .procname = "flush", 2009 .data = &flush_delay, 2010 .maxlen = sizeof(int), 2011 .mode = 0200, 2012 .proc_handler = &ipv6_sysctl_rtcache_flush 2013 }, 2014 { 2015 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2016 .procname = "gc_thresh", 2017 .data = &ip6_dst_ops.gc_thresh, 2018 .maxlen = sizeof(int), 2019 .mode = 0644, 2020 .proc_handler = &proc_dointvec, 2021 }, 2022 { 2023 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2024 .procname = "max_size", 2025 .data = &ip6_rt_max_size, 2026 .maxlen = sizeof(int), 2027 .mode = 0644, 2028 .proc_handler = &proc_dointvec, 2029 }, 2030 { 2031 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2032 .procname = "gc_min_interval", 2033 .data = &ip6_rt_gc_min_interval, 2034 .maxlen = sizeof(int), 2035 .mode = 0644, 2036 .proc_handler = &proc_dointvec_jiffies, 2037 .strategy = &sysctl_jiffies, 2038 }, 2039 { 2040 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2041 .procname = "gc_timeout", 2042 .data = &ip6_rt_gc_timeout, 2043 .maxlen = sizeof(int), 2044 .mode = 0644, 2045 .proc_handler = &proc_dointvec_jiffies, 2046 .strategy = &sysctl_jiffies, 2047 }, 2048 { 2049 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2050 .procname = "gc_interval", 2051 .data = &ip6_rt_gc_interval, 2052 .maxlen = sizeof(int), 2053 .mode = 0644, 2054 .proc_handler = &proc_dointvec_jiffies, 2055 .strategy = &sysctl_jiffies, 2056 }, 2057 { 2058 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2059 .procname = "gc_elasticity", 2060 .data = &ip6_rt_gc_elasticity, 2061 .maxlen = sizeof(int), 2062 .mode = 0644, 2063 .proc_handler = &proc_dointvec_jiffies, 2064 .strategy = &sysctl_jiffies, 2065 }, 2066 { 2067 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2068 .procname = "mtu_expires", 2069 .data = &ip6_rt_mtu_expires, 2070 .maxlen = sizeof(int), 2071 .mode = 0644, 2072 .proc_handler = &proc_dointvec_jiffies, 2073 .strategy = &sysctl_jiffies, 2074 }, 2075 { 2076 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2077 .procname = "min_adv_mss", 2078 .data = &ip6_rt_min_advmss, 2079 .maxlen = sizeof(int), 2080 .mode = 0644, 2081 .proc_handler = &proc_dointvec_jiffies, 2082 .strategy = &sysctl_jiffies, 2083 }, 2084 { 2085 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2086 .procname = "gc_min_interval_ms", 2087 .data = &ip6_rt_gc_min_interval, 2088 .maxlen = sizeof(int), 2089 .mode = 0644, 2090 .proc_handler = &proc_dointvec_ms_jiffies, 2091 .strategy = &sysctl_ms_jiffies, 2092 }, 2093 { .ctl_name = 0 } 2094 }; 2095 2096 #endif 2097 2098 void __init ip6_route_init(void) 2099 { 2100 struct proc_dir_entry *p; 2101 2102 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", 2103 sizeof(struct rt6_info), 2104 0, SLAB_HWCACHE_ALIGN, 2105 NULL, NULL); 2106 if (!ip6_dst_ops.kmem_cachep) 2107 panic("cannot create ip6_dst_cache"); 2108 2109 fib6_init(); 2110 #ifdef CONFIG_PROC_FS 2111 p = proc_net_create("ipv6_route", 0, rt6_proc_info); 2112 if (p) 2113 p->owner = THIS_MODULE; 2114 2115 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2116 #endif 2117 #ifdef CONFIG_XFRM 2118 xfrm6_init(); 2119 #endif 2120 } 2121 2122 void ip6_route_cleanup(void) 2123 { 2124 #ifdef CONFIG_PROC_FS 2125 proc_net_remove("ipv6_route"); 2126 proc_net_remove("rt6_stats"); 2127 #endif 2128 #ifdef CONFIG_XFRM 2129 xfrm6_fini(); 2130 #endif 2131 rt6_ifdown(NULL); 2132 fib6_gc_cleanup(); 2133 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2134 } 2135