1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <net/net_namespace.h> 44 #include <net/snmp.h> 45 #include <net/ipv6.h> 46 #include <net/ip6_fib.h> 47 #include <net/ip6_route.h> 48 #include <net/ndisc.h> 49 #include <net/addrconf.h> 50 #include <net/tcp.h> 51 #include <linux/rtnetlink.h> 52 #include <net/dst.h> 53 #include <net/xfrm.h> 54 #include <net/netevent.h> 55 #include <net/netlink.h> 56 57 #include <asm/uaccess.h> 58 59 #ifdef CONFIG_SYSCTL 60 #include <linux/sysctl.h> 61 #endif 62 63 /* Set to 3 to get tracing. */ 64 #define RT6_DEBUG 2 65 66 #if RT6_DEBUG >= 3 67 #define RDBG(x) printk x 68 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 69 #else 70 #define RDBG(x) 71 #define RT6_TRACE(x...) do { ; } while (0) 72 #endif 73 74 #define CLONE_OFFLINK_ROUTE 0 75 76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 79 static void ip6_dst_destroy(struct dst_entry *); 80 static void ip6_dst_ifdown(struct dst_entry *, 81 struct net_device *dev, int how); 82 static int ip6_dst_gc(struct dst_ops *ops); 83 84 static int ip6_pkt_discard(struct sk_buff *skb); 85 static int ip6_pkt_discard_out(struct sk_buff *skb); 86 static void ip6_link_failure(struct sk_buff *skb); 87 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 88 89 #ifdef CONFIG_IPV6_ROUTE_INFO 90 static struct rt6_info *rt6_add_route_info(struct net *net, 91 struct in6_addr *prefix, int prefixlen, 92 struct in6_addr *gwaddr, int ifindex, 93 unsigned pref); 94 static struct rt6_info *rt6_get_route_info(struct net *net, 95 struct in6_addr *prefix, int prefixlen, 96 struct in6_addr *gwaddr, int ifindex); 97 #endif 98 99 static struct dst_ops ip6_dst_ops_template = { 100 .family = AF_INET6, 101 .protocol = cpu_to_be16(ETH_P_IPV6), 102 .gc = ip6_dst_gc, 103 .gc_thresh = 1024, 104 .check = ip6_dst_check, 105 .destroy = ip6_dst_destroy, 106 .ifdown = ip6_dst_ifdown, 107 .negative_advice = ip6_negative_advice, 108 .link_failure = ip6_link_failure, 109 .update_pmtu = ip6_rt_update_pmtu, 110 .local_out = __ip6_local_out, 111 .entries = ATOMIC_INIT(0), 112 }; 113 114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 115 { 116 } 117 118 static struct dst_ops ip6_dst_blackhole_ops = { 119 .family = AF_INET6, 120 .protocol = cpu_to_be16(ETH_P_IPV6), 121 .destroy = ip6_dst_destroy, 122 .check = ip6_dst_check, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu, 124 .entries = ATOMIC_INIT(0), 125 }; 126 127 static struct rt6_info ip6_null_entry_template = { 128 .u = { 129 .dst = { 130 .__refcnt = ATOMIC_INIT(1), 131 .__use = 1, 132 .obsolete = -1, 133 .error = -ENETUNREACH, 134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 135 .input = ip6_pkt_discard, 136 .output = ip6_pkt_discard_out, 137 } 138 }, 139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 140 .rt6i_protocol = RTPROT_KERNEL, 141 .rt6i_metric = ~(u32) 0, 142 .rt6i_ref = ATOMIC_INIT(1), 143 }; 144 145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 146 147 static int ip6_pkt_prohibit(struct sk_buff *skb); 148 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 149 150 static struct rt6_info ip6_prohibit_entry_template = { 151 .u = { 152 .dst = { 153 .__refcnt = ATOMIC_INIT(1), 154 .__use = 1, 155 .obsolete = -1, 156 .error = -EACCES, 157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 158 .input = ip6_pkt_prohibit, 159 .output = ip6_pkt_prohibit_out, 160 } 161 }, 162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 163 .rt6i_protocol = RTPROT_KERNEL, 164 .rt6i_metric = ~(u32) 0, 165 .rt6i_ref = ATOMIC_INIT(1), 166 }; 167 168 static struct rt6_info ip6_blk_hole_entry_template = { 169 .u = { 170 .dst = { 171 .__refcnt = ATOMIC_INIT(1), 172 .__use = 1, 173 .obsolete = -1, 174 .error = -EINVAL, 175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 176 .input = dst_discard, 177 .output = dst_discard, 178 } 179 }, 180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 181 .rt6i_protocol = RTPROT_KERNEL, 182 .rt6i_metric = ~(u32) 0, 183 .rt6i_ref = ATOMIC_INIT(1), 184 }; 185 186 #endif 187 188 /* allocate dst with ip6_dst_ops */ 189 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 190 { 191 return (struct rt6_info *)dst_alloc(ops); 192 } 193 194 static void ip6_dst_destroy(struct dst_entry *dst) 195 { 196 struct rt6_info *rt = (struct rt6_info *)dst; 197 struct inet6_dev *idev = rt->rt6i_idev; 198 199 if (idev != NULL) { 200 rt->rt6i_idev = NULL; 201 in6_dev_put(idev); 202 } 203 } 204 205 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 206 int how) 207 { 208 struct rt6_info *rt = (struct rt6_info *)dst; 209 struct inet6_dev *idev = rt->rt6i_idev; 210 struct net_device *loopback_dev = 211 dev_net(dev)->loopback_dev; 212 213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 214 struct inet6_dev *loopback_idev = 215 in6_dev_get(loopback_dev); 216 if (loopback_idev != NULL) { 217 rt->rt6i_idev = loopback_idev; 218 in6_dev_put(idev); 219 } 220 } 221 } 222 223 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 224 { 225 return (rt->rt6i_flags & RTF_EXPIRES && 226 time_after(jiffies, rt->rt6i_expires)); 227 } 228 229 static inline int rt6_need_strict(struct in6_addr *daddr) 230 { 231 return (ipv6_addr_type(daddr) & 232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 233 } 234 235 /* 236 * Route lookup. Any table->tb6_lock is implied. 237 */ 238 239 static inline struct rt6_info *rt6_device_match(struct net *net, 240 struct rt6_info *rt, 241 struct in6_addr *saddr, 242 int oif, 243 int flags) 244 { 245 struct rt6_info *local = NULL; 246 struct rt6_info *sprt; 247 248 if (!oif && ipv6_addr_any(saddr)) 249 goto out; 250 251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 252 struct net_device *dev = sprt->rt6i_dev; 253 254 if (oif) { 255 if (dev->ifindex == oif) 256 return sprt; 257 if (dev->flags & IFF_LOOPBACK) { 258 if (sprt->rt6i_idev == NULL || 259 sprt->rt6i_idev->dev->ifindex != oif) { 260 if (flags & RT6_LOOKUP_F_IFACE && oif) 261 continue; 262 if (local && (!oif || 263 local->rt6i_idev->dev->ifindex == oif)) 264 continue; 265 } 266 local = sprt; 267 } 268 } else { 269 if (ipv6_chk_addr(net, saddr, dev, 270 flags & RT6_LOOKUP_F_IFACE)) 271 return sprt; 272 } 273 } 274 275 if (oif) { 276 if (local) 277 return local; 278 279 if (flags & RT6_LOOKUP_F_IFACE) 280 return net->ipv6.ip6_null_entry; 281 } 282 out: 283 return rt; 284 } 285 286 #ifdef CONFIG_IPV6_ROUTER_PREF 287 static void rt6_probe(struct rt6_info *rt) 288 { 289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 290 /* 291 * Okay, this does not seem to be appropriate 292 * for now, however, we need to check if it 293 * is really so; aka Router Reachability Probing. 294 * 295 * Router Reachability Probe MUST be rate-limited 296 * to no more than one per minute. 297 */ 298 if (!neigh || (neigh->nud_state & NUD_VALID)) 299 return; 300 read_lock_bh(&neigh->lock); 301 if (!(neigh->nud_state & NUD_VALID) && 302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 303 struct in6_addr mcaddr; 304 struct in6_addr *target; 305 306 neigh->updated = jiffies; 307 read_unlock_bh(&neigh->lock); 308 309 target = (struct in6_addr *)&neigh->primary_key; 310 addrconf_addr_solict_mult(target, &mcaddr); 311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 312 } else 313 read_unlock_bh(&neigh->lock); 314 } 315 #else 316 static inline void rt6_probe(struct rt6_info *rt) 317 { 318 return; 319 } 320 #endif 321 322 /* 323 * Default Router Selection (RFC 2461 6.3.6) 324 */ 325 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 326 { 327 struct net_device *dev = rt->rt6i_dev; 328 if (!oif || dev->ifindex == oif) 329 return 2; 330 if ((dev->flags & IFF_LOOPBACK) && 331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 332 return 1; 333 return 0; 334 } 335 336 static inline int rt6_check_neigh(struct rt6_info *rt) 337 { 338 struct neighbour *neigh = rt->rt6i_nexthop; 339 int m; 340 if (rt->rt6i_flags & RTF_NONEXTHOP || 341 !(rt->rt6i_flags & RTF_GATEWAY)) 342 m = 1; 343 else if (neigh) { 344 read_lock_bh(&neigh->lock); 345 if (neigh->nud_state & NUD_VALID) 346 m = 2; 347 #ifdef CONFIG_IPV6_ROUTER_PREF 348 else if (neigh->nud_state & NUD_FAILED) 349 m = 0; 350 #endif 351 else 352 m = 1; 353 read_unlock_bh(&neigh->lock); 354 } else 355 m = 0; 356 return m; 357 } 358 359 static int rt6_score_route(struct rt6_info *rt, int oif, 360 int strict) 361 { 362 int m, n; 363 364 m = rt6_check_dev(rt, oif); 365 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 366 return -1; 367 #ifdef CONFIG_IPV6_ROUTER_PREF 368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 369 #endif 370 n = rt6_check_neigh(rt); 371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 372 return -1; 373 return m; 374 } 375 376 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 377 int *mpri, struct rt6_info *match) 378 { 379 int m; 380 381 if (rt6_check_expired(rt)) 382 goto out; 383 384 m = rt6_score_route(rt, oif, strict); 385 if (m < 0) 386 goto out; 387 388 if (m > *mpri) { 389 if (strict & RT6_LOOKUP_F_REACHABLE) 390 rt6_probe(match); 391 *mpri = m; 392 match = rt; 393 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 394 rt6_probe(rt); 395 } 396 397 out: 398 return match; 399 } 400 401 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 402 struct rt6_info *rr_head, 403 u32 metric, int oif, int strict) 404 { 405 struct rt6_info *rt, *match; 406 int mpri = -1; 407 408 match = NULL; 409 for (rt = rr_head; rt && rt->rt6i_metric == metric; 410 rt = rt->u.dst.rt6_next) 411 match = find_match(rt, oif, strict, &mpri, match); 412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 413 rt = rt->u.dst.rt6_next) 414 match = find_match(rt, oif, strict, &mpri, match); 415 416 return match; 417 } 418 419 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 420 { 421 struct rt6_info *match, *rt0; 422 struct net *net; 423 424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 425 __func__, fn->leaf, oif); 426 427 rt0 = fn->rr_ptr; 428 if (!rt0) 429 fn->rr_ptr = rt0 = fn->leaf; 430 431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 432 433 if (!match && 434 (strict & RT6_LOOKUP_F_REACHABLE)) { 435 struct rt6_info *next = rt0->u.dst.rt6_next; 436 437 /* no entries matched; do round-robin */ 438 if (!next || next->rt6i_metric != rt0->rt6i_metric) 439 next = fn->leaf; 440 441 if (next != rt0) 442 fn->rr_ptr = next; 443 } 444 445 RT6_TRACE("%s() => %p\n", 446 __func__, match); 447 448 net = dev_net(rt0->rt6i_dev); 449 return (match ? match : net->ipv6.ip6_null_entry); 450 } 451 452 #ifdef CONFIG_IPV6_ROUTE_INFO 453 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 454 struct in6_addr *gwaddr) 455 { 456 struct net *net = dev_net(dev); 457 struct route_info *rinfo = (struct route_info *) opt; 458 struct in6_addr prefix_buf, *prefix; 459 unsigned int pref; 460 unsigned long lifetime; 461 struct rt6_info *rt; 462 463 if (len < sizeof(struct route_info)) { 464 return -EINVAL; 465 } 466 467 /* Sanity check for prefix_len and length */ 468 if (rinfo->length > 3) { 469 return -EINVAL; 470 } else if (rinfo->prefix_len > 128) { 471 return -EINVAL; 472 } else if (rinfo->prefix_len > 64) { 473 if (rinfo->length < 2) { 474 return -EINVAL; 475 } 476 } else if (rinfo->prefix_len > 0) { 477 if (rinfo->length < 1) { 478 return -EINVAL; 479 } 480 } 481 482 pref = rinfo->route_pref; 483 if (pref == ICMPV6_ROUTER_PREF_INVALID) 484 pref = ICMPV6_ROUTER_PREF_MEDIUM; 485 486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 487 488 if (rinfo->length == 3) 489 prefix = (struct in6_addr *)rinfo->prefix; 490 else { 491 /* this function is safe */ 492 ipv6_addr_prefix(&prefix_buf, 493 (struct in6_addr *)rinfo->prefix, 494 rinfo->prefix_len); 495 prefix = &prefix_buf; 496 } 497 498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 499 dev->ifindex); 500 501 if (rt && !lifetime) { 502 ip6_del_rt(rt); 503 rt = NULL; 504 } 505 506 if (!rt && lifetime) 507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 508 pref); 509 else if (rt) 510 rt->rt6i_flags = RTF_ROUTEINFO | 511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 512 513 if (rt) { 514 if (!addrconf_finite_timeout(lifetime)) { 515 rt->rt6i_flags &= ~RTF_EXPIRES; 516 } else { 517 rt->rt6i_expires = jiffies + HZ * lifetime; 518 rt->rt6i_flags |= RTF_EXPIRES; 519 } 520 dst_release(&rt->u.dst); 521 } 522 return 0; 523 } 524 #endif 525 526 #define BACKTRACK(__net, saddr) \ 527 do { \ 528 if (rt == __net->ipv6.ip6_null_entry) { \ 529 struct fib6_node *pn; \ 530 while (1) { \ 531 if (fn->fn_flags & RTN_TL_ROOT) \ 532 goto out; \ 533 pn = fn->parent; \ 534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 536 else \ 537 fn = pn; \ 538 if (fn->fn_flags & RTN_RTINFO) \ 539 goto restart; \ 540 } \ 541 } \ 542 } while(0) 543 544 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 545 struct fib6_table *table, 546 struct flowi *fl, int flags) 547 { 548 struct fib6_node *fn; 549 struct rt6_info *rt; 550 551 read_lock_bh(&table->tb6_lock); 552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 553 restart: 554 rt = fn->leaf; 555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 556 BACKTRACK(net, &fl->fl6_src); 557 out: 558 dst_use(&rt->u.dst, jiffies); 559 read_unlock_bh(&table->tb6_lock); 560 return rt; 561 562 } 563 564 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 565 const struct in6_addr *saddr, int oif, int strict) 566 { 567 struct flowi fl = { 568 .oif = oif, 569 .nl_u = { 570 .ip6_u = { 571 .daddr = *daddr, 572 }, 573 }, 574 }; 575 struct dst_entry *dst; 576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 577 578 if (saddr) { 579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 580 flags |= RT6_LOOKUP_F_HAS_SADDR; 581 } 582 583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 584 if (dst->error == 0) 585 return (struct rt6_info *) dst; 586 587 dst_release(dst); 588 589 return NULL; 590 } 591 592 EXPORT_SYMBOL(rt6_lookup); 593 594 /* ip6_ins_rt is called with FREE table->tb6_lock. 595 It takes new route entry, the addition fails by any reason the 596 route is freed. In any case, if caller does not hold it, it may 597 be destroyed. 598 */ 599 600 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 601 { 602 int err; 603 struct fib6_table *table; 604 605 table = rt->rt6i_table; 606 write_lock_bh(&table->tb6_lock); 607 err = fib6_add(&table->tb6_root, rt, info); 608 write_unlock_bh(&table->tb6_lock); 609 610 return err; 611 } 612 613 int ip6_ins_rt(struct rt6_info *rt) 614 { 615 struct nl_info info = { 616 .nl_net = dev_net(rt->rt6i_dev), 617 }; 618 return __ip6_ins_rt(rt, &info); 619 } 620 621 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 622 struct in6_addr *saddr) 623 { 624 struct rt6_info *rt; 625 626 /* 627 * Clone the route. 628 */ 629 630 rt = ip6_rt_copy(ort); 631 632 if (rt) { 633 struct neighbour *neigh; 634 int attempts = !in_softirq(); 635 636 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 637 if (rt->rt6i_dst.plen != 128 && 638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 639 rt->rt6i_flags |= RTF_ANYCAST; 640 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 641 } 642 643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 644 rt->rt6i_dst.plen = 128; 645 rt->rt6i_flags |= RTF_CACHE; 646 rt->u.dst.flags |= DST_HOST; 647 648 #ifdef CONFIG_IPV6_SUBTREES 649 if (rt->rt6i_src.plen && saddr) { 650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 651 rt->rt6i_src.plen = 128; 652 } 653 #endif 654 655 retry: 656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 657 if (IS_ERR(neigh)) { 658 struct net *net = dev_net(rt->rt6i_dev); 659 int saved_rt_min_interval = 660 net->ipv6.sysctl.ip6_rt_gc_min_interval; 661 int saved_rt_elasticity = 662 net->ipv6.sysctl.ip6_rt_gc_elasticity; 663 664 if (attempts-- > 0) { 665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 667 668 ip6_dst_gc(net->ipv6.ip6_dst_ops); 669 670 net->ipv6.sysctl.ip6_rt_gc_elasticity = 671 saved_rt_elasticity; 672 net->ipv6.sysctl.ip6_rt_gc_min_interval = 673 saved_rt_min_interval; 674 goto retry; 675 } 676 677 if (net_ratelimit()) 678 printk(KERN_WARNING 679 "Neighbour table overflow.\n"); 680 dst_free(&rt->u.dst); 681 return NULL; 682 } 683 rt->rt6i_nexthop = neigh; 684 685 } 686 687 return rt; 688 } 689 690 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 691 { 692 struct rt6_info *rt = ip6_rt_copy(ort); 693 if (rt) { 694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 695 rt->rt6i_dst.plen = 128; 696 rt->rt6i_flags |= RTF_CACHE; 697 rt->u.dst.flags |= DST_HOST; 698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 699 } 700 return rt; 701 } 702 703 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 704 struct flowi *fl, int flags) 705 { 706 struct fib6_node *fn; 707 struct rt6_info *rt, *nrt; 708 int strict = 0; 709 int attempts = 3; 710 int err; 711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 712 713 strict |= flags & RT6_LOOKUP_F_IFACE; 714 715 relookup: 716 read_lock_bh(&table->tb6_lock); 717 718 restart_2: 719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 720 721 restart: 722 rt = rt6_select(fn, oif, strict | reachable); 723 724 BACKTRACK(net, &fl->fl6_src); 725 if (rt == net->ipv6.ip6_null_entry || 726 rt->rt6i_flags & RTF_CACHE) 727 goto out; 728 729 dst_hold(&rt->u.dst); 730 read_unlock_bh(&table->tb6_lock); 731 732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 734 else { 735 #if CLONE_OFFLINK_ROUTE 736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 737 #else 738 goto out2; 739 #endif 740 } 741 742 dst_release(&rt->u.dst); 743 rt = nrt ? : net->ipv6.ip6_null_entry; 744 745 dst_hold(&rt->u.dst); 746 if (nrt) { 747 err = ip6_ins_rt(nrt); 748 if (!err) 749 goto out2; 750 } 751 752 if (--attempts <= 0) 753 goto out2; 754 755 /* 756 * Race condition! In the gap, when table->tb6_lock was 757 * released someone could insert this route. Relookup. 758 */ 759 dst_release(&rt->u.dst); 760 goto relookup; 761 762 out: 763 if (reachable) { 764 reachable = 0; 765 goto restart_2; 766 } 767 dst_hold(&rt->u.dst); 768 read_unlock_bh(&table->tb6_lock); 769 out2: 770 rt->u.dst.lastuse = jiffies; 771 rt->u.dst.__use++; 772 773 return rt; 774 } 775 776 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 777 struct flowi *fl, int flags) 778 { 779 return ip6_pol_route(net, table, fl->iif, fl, flags); 780 } 781 782 void ip6_route_input(struct sk_buff *skb) 783 { 784 struct ipv6hdr *iph = ipv6_hdr(skb); 785 struct net *net = dev_net(skb->dev); 786 int flags = RT6_LOOKUP_F_HAS_SADDR; 787 struct flowi fl = { 788 .iif = skb->dev->ifindex, 789 .nl_u = { 790 .ip6_u = { 791 .daddr = iph->daddr, 792 .saddr = iph->saddr, 793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 794 }, 795 }, 796 .mark = skb->mark, 797 .proto = iph->nexthdr, 798 }; 799 800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 801 flags |= RT6_LOOKUP_F_IFACE; 802 803 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); 804 } 805 806 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 807 struct flowi *fl, int flags) 808 { 809 return ip6_pol_route(net, table, fl->oif, fl, flags); 810 } 811 812 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 813 struct flowi *fl) 814 { 815 int flags = 0; 816 817 if (rt6_need_strict(&fl->fl6_dst)) 818 flags |= RT6_LOOKUP_F_IFACE; 819 820 if (!ipv6_addr_any(&fl->fl6_src)) 821 flags |= RT6_LOOKUP_F_HAS_SADDR; 822 else if (sk) { 823 unsigned int prefs = inet6_sk(sk)->srcprefs; 824 if (prefs & IPV6_PREFER_SRC_TMP) 825 flags |= RT6_LOOKUP_F_SRCPREF_TMP; 826 if (prefs & IPV6_PREFER_SRC_PUBLIC) 827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; 828 if (prefs & IPV6_PREFER_SRC_COA) 829 flags |= RT6_LOOKUP_F_SRCPREF_COA; 830 } 831 832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 833 } 834 835 EXPORT_SYMBOL(ip6_route_output); 836 837 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 838 { 839 struct rt6_info *ort = (struct rt6_info *) *dstp; 840 struct rt6_info *rt = (struct rt6_info *) 841 dst_alloc(&ip6_dst_blackhole_ops); 842 struct dst_entry *new = NULL; 843 844 if (rt) { 845 new = &rt->u.dst; 846 847 atomic_set(&new->__refcnt, 1); 848 new->__use = 1; 849 new->input = dst_discard; 850 new->output = dst_discard; 851 852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 853 new->dev = ort->u.dst.dev; 854 if (new->dev) 855 dev_hold(new->dev); 856 rt->rt6i_idev = ort->rt6i_idev; 857 if (rt->rt6i_idev) 858 in6_dev_hold(rt->rt6i_idev); 859 rt->rt6i_expires = 0; 860 861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 863 rt->rt6i_metric = 0; 864 865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 866 #ifdef CONFIG_IPV6_SUBTREES 867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 868 #endif 869 870 dst_free(new); 871 } 872 873 dst_release(*dstp); 874 *dstp = new; 875 return (new ? 0 : -ENOMEM); 876 } 877 EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 878 879 /* 880 * Destination cache support functions 881 */ 882 883 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 884 { 885 struct rt6_info *rt; 886 887 rt = (struct rt6_info *) dst; 888 889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 890 return dst; 891 892 return NULL; 893 } 894 895 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 896 { 897 struct rt6_info *rt = (struct rt6_info *) dst; 898 899 if (rt) { 900 if (rt->rt6i_flags & RTF_CACHE) 901 ip6_del_rt(rt); 902 else 903 dst_release(dst); 904 } 905 return NULL; 906 } 907 908 static void ip6_link_failure(struct sk_buff *skb) 909 { 910 struct rt6_info *rt; 911 912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 913 914 rt = (struct rt6_info *) skb->dst; 915 if (rt) { 916 if (rt->rt6i_flags&RTF_CACHE) { 917 dst_set_expires(&rt->u.dst, 0); 918 rt->rt6i_flags |= RTF_EXPIRES; 919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 920 rt->rt6i_node->fn_sernum = -1; 921 } 922 } 923 924 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 925 { 926 struct rt6_info *rt6 = (struct rt6_info*)dst; 927 928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 929 rt6->rt6i_flags |= RTF_MODIFIED; 930 if (mtu < IPV6_MIN_MTU) { 931 mtu = IPV6_MIN_MTU; 932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 933 } 934 dst->metrics[RTAX_MTU-1] = mtu; 935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 936 } 937 } 938 939 static int ipv6_get_mtu(struct net_device *dev); 940 941 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 942 { 943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 944 945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 947 948 /* 949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 951 * IPV6_MAXPLEN is also valid and means: "any MSS, 952 * rely only on pmtu discovery" 953 */ 954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 955 mtu = IPV6_MAXPLEN; 956 return mtu; 957 } 958 959 static struct dst_entry *icmp6_dst_gc_list; 960 static DEFINE_SPINLOCK(icmp6_dst_lock); 961 962 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 963 struct neighbour *neigh, 964 const struct in6_addr *addr) 965 { 966 struct rt6_info *rt; 967 struct inet6_dev *idev = in6_dev_get(dev); 968 struct net *net = dev_net(dev); 969 970 if (unlikely(idev == NULL)) 971 return NULL; 972 973 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 974 if (unlikely(rt == NULL)) { 975 in6_dev_put(idev); 976 goto out; 977 } 978 979 dev_hold(dev); 980 if (neigh) 981 neigh_hold(neigh); 982 else { 983 neigh = ndisc_get_neigh(dev, addr); 984 if (IS_ERR(neigh)) 985 neigh = NULL; 986 } 987 988 rt->rt6i_dev = dev; 989 rt->rt6i_idev = idev; 990 rt->rt6i_nexthop = neigh; 991 atomic_set(&rt->u.dst.__refcnt, 1); 992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 995 rt->u.dst.output = ip6_output; 996 997 #if 0 /* there's no chance to use these for ndisc */ 998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 999 ? DST_HOST 1000 : 0; 1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1002 rt->rt6i_dst.plen = 128; 1003 #endif 1004 1005 spin_lock_bh(&icmp6_dst_lock); 1006 rt->u.dst.next = icmp6_dst_gc_list; 1007 icmp6_dst_gc_list = &rt->u.dst; 1008 spin_unlock_bh(&icmp6_dst_lock); 1009 1010 fib6_force_start_gc(net); 1011 1012 out: 1013 return &rt->u.dst; 1014 } 1015 1016 int icmp6_dst_gc(void) 1017 { 1018 struct dst_entry *dst, *next, **pprev; 1019 int more = 0; 1020 1021 next = NULL; 1022 1023 spin_lock_bh(&icmp6_dst_lock); 1024 pprev = &icmp6_dst_gc_list; 1025 1026 while ((dst = *pprev) != NULL) { 1027 if (!atomic_read(&dst->__refcnt)) { 1028 *pprev = dst->next; 1029 dst_free(dst); 1030 } else { 1031 pprev = &dst->next; 1032 ++more; 1033 } 1034 } 1035 1036 spin_unlock_bh(&icmp6_dst_lock); 1037 1038 return more; 1039 } 1040 1041 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1042 void *arg) 1043 { 1044 struct dst_entry *dst, **pprev; 1045 1046 spin_lock_bh(&icmp6_dst_lock); 1047 pprev = &icmp6_dst_gc_list; 1048 while ((dst = *pprev) != NULL) { 1049 struct rt6_info *rt = (struct rt6_info *) dst; 1050 if (func(rt, arg)) { 1051 *pprev = dst->next; 1052 dst_free(dst); 1053 } else { 1054 pprev = &dst->next; 1055 } 1056 } 1057 spin_unlock_bh(&icmp6_dst_lock); 1058 } 1059 1060 static int ip6_dst_gc(struct dst_ops *ops) 1061 { 1062 unsigned long now = jiffies; 1063 struct net *net = ops->dst_net; 1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1069 1070 if (time_after(rt_last_gc + rt_min_interval, now) && 1071 atomic_read(&ops->entries) <= rt_max_size) 1072 goto out; 1073 1074 net->ipv6.ip6_rt_gc_expire++; 1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1076 net->ipv6.ip6_rt_last_gc = now; 1077 if (atomic_read(&ops->entries) < ops->gc_thresh) 1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1079 out: 1080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1081 return (atomic_read(&ops->entries) > rt_max_size); 1082 } 1083 1084 /* Clean host part of a prefix. Not necessary in radix tree, 1085 but results in cleaner routing tables. 1086 1087 Remove it only when all the things will work! 1088 */ 1089 1090 static int ipv6_get_mtu(struct net_device *dev) 1091 { 1092 int mtu = IPV6_MIN_MTU; 1093 struct inet6_dev *idev; 1094 1095 idev = in6_dev_get(dev); 1096 if (idev) { 1097 mtu = idev->cnf.mtu6; 1098 in6_dev_put(idev); 1099 } 1100 return mtu; 1101 } 1102 1103 int ip6_dst_hoplimit(struct dst_entry *dst) 1104 { 1105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1106 if (hoplimit < 0) { 1107 struct net_device *dev = dst->dev; 1108 struct inet6_dev *idev = in6_dev_get(dev); 1109 if (idev) { 1110 hoplimit = idev->cnf.hop_limit; 1111 in6_dev_put(idev); 1112 } else 1113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1114 } 1115 return hoplimit; 1116 } 1117 1118 /* 1119 * 1120 */ 1121 1122 int ip6_route_add(struct fib6_config *cfg) 1123 { 1124 int err; 1125 struct net *net = cfg->fc_nlinfo.nl_net; 1126 struct rt6_info *rt = NULL; 1127 struct net_device *dev = NULL; 1128 struct inet6_dev *idev = NULL; 1129 struct fib6_table *table; 1130 int addr_type; 1131 1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1133 return -EINVAL; 1134 #ifndef CONFIG_IPV6_SUBTREES 1135 if (cfg->fc_src_len) 1136 return -EINVAL; 1137 #endif 1138 if (cfg->fc_ifindex) { 1139 err = -ENODEV; 1140 dev = dev_get_by_index(net, cfg->fc_ifindex); 1141 if (!dev) 1142 goto out; 1143 idev = in6_dev_get(dev); 1144 if (!idev) 1145 goto out; 1146 } 1147 1148 if (cfg->fc_metric == 0) 1149 cfg->fc_metric = IP6_RT_PRIO_USER; 1150 1151 table = fib6_new_table(net, cfg->fc_table); 1152 if (table == NULL) { 1153 err = -ENOBUFS; 1154 goto out; 1155 } 1156 1157 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1158 1159 if (rt == NULL) { 1160 err = -ENOMEM; 1161 goto out; 1162 } 1163 1164 rt->u.dst.obsolete = -1; 1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1167 0; 1168 1169 if (cfg->fc_protocol == RTPROT_UNSPEC) 1170 cfg->fc_protocol = RTPROT_BOOT; 1171 rt->rt6i_protocol = cfg->fc_protocol; 1172 1173 addr_type = ipv6_addr_type(&cfg->fc_dst); 1174 1175 if (addr_type & IPV6_ADDR_MULTICAST) 1176 rt->u.dst.input = ip6_mc_input; 1177 else 1178 rt->u.dst.input = ip6_forward; 1179 1180 rt->u.dst.output = ip6_output; 1181 1182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1183 rt->rt6i_dst.plen = cfg->fc_dst_len; 1184 if (rt->rt6i_dst.plen == 128) 1185 rt->u.dst.flags = DST_HOST; 1186 1187 #ifdef CONFIG_IPV6_SUBTREES 1188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1189 rt->rt6i_src.plen = cfg->fc_src_len; 1190 #endif 1191 1192 rt->rt6i_metric = cfg->fc_metric; 1193 1194 /* We cannot add true routes via loopback here, 1195 they would result in kernel looping; promote them to reject routes 1196 */ 1197 if ((cfg->fc_flags & RTF_REJECT) || 1198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1199 /* hold loopback dev/idev if we haven't done so. */ 1200 if (dev != net->loopback_dev) { 1201 if (dev) { 1202 dev_put(dev); 1203 in6_dev_put(idev); 1204 } 1205 dev = net->loopback_dev; 1206 dev_hold(dev); 1207 idev = in6_dev_get(dev); 1208 if (!idev) { 1209 err = -ENODEV; 1210 goto out; 1211 } 1212 } 1213 rt->u.dst.output = ip6_pkt_discard_out; 1214 rt->u.dst.input = ip6_pkt_discard; 1215 rt->u.dst.error = -ENETUNREACH; 1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1217 goto install_route; 1218 } 1219 1220 if (cfg->fc_flags & RTF_GATEWAY) { 1221 struct in6_addr *gw_addr; 1222 int gwa_type; 1223 1224 gw_addr = &cfg->fc_gateway; 1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1226 gwa_type = ipv6_addr_type(gw_addr); 1227 1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1229 struct rt6_info *grt; 1230 1231 /* IPv6 strictly inhibits using not link-local 1232 addresses as nexthop address. 1233 Otherwise, router will not able to send redirects. 1234 It is very good, but in some (rare!) circumstances 1235 (SIT, PtP, NBMA NOARP links) it is handy to allow 1236 some exceptions. --ANK 1237 */ 1238 err = -EINVAL; 1239 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1240 goto out; 1241 1242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1243 1244 err = -EHOSTUNREACH; 1245 if (grt == NULL) 1246 goto out; 1247 if (dev) { 1248 if (dev != grt->rt6i_dev) { 1249 dst_release(&grt->u.dst); 1250 goto out; 1251 } 1252 } else { 1253 dev = grt->rt6i_dev; 1254 idev = grt->rt6i_idev; 1255 dev_hold(dev); 1256 in6_dev_hold(grt->rt6i_idev); 1257 } 1258 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1259 err = 0; 1260 dst_release(&grt->u.dst); 1261 1262 if (err) 1263 goto out; 1264 } 1265 err = -EINVAL; 1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1267 goto out; 1268 } 1269 1270 err = -ENODEV; 1271 if (dev == NULL) 1272 goto out; 1273 1274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1276 if (IS_ERR(rt->rt6i_nexthop)) { 1277 err = PTR_ERR(rt->rt6i_nexthop); 1278 rt->rt6i_nexthop = NULL; 1279 goto out; 1280 } 1281 } 1282 1283 rt->rt6i_flags = cfg->fc_flags; 1284 1285 install_route: 1286 if (cfg->fc_mx) { 1287 struct nlattr *nla; 1288 int remaining; 1289 1290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1291 int type = nla_type(nla); 1292 1293 if (type) { 1294 if (type > RTAX_MAX) { 1295 err = -EINVAL; 1296 goto out; 1297 } 1298 1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1300 } 1301 } 1302 } 1303 1304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1306 if (!dst_mtu(&rt->u.dst)) 1307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) 1309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1310 rt->u.dst.dev = dev; 1311 rt->rt6i_idev = idev; 1312 rt->rt6i_table = table; 1313 1314 cfg->fc_nlinfo.nl_net = dev_net(dev); 1315 1316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1317 1318 out: 1319 if (dev) 1320 dev_put(dev); 1321 if (idev) 1322 in6_dev_put(idev); 1323 if (rt) 1324 dst_free(&rt->u.dst); 1325 return err; 1326 } 1327 1328 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1329 { 1330 int err; 1331 struct fib6_table *table; 1332 struct net *net = dev_net(rt->rt6i_dev); 1333 1334 if (rt == net->ipv6.ip6_null_entry) 1335 return -ENOENT; 1336 1337 table = rt->rt6i_table; 1338 write_lock_bh(&table->tb6_lock); 1339 1340 err = fib6_del(rt, info); 1341 dst_release(&rt->u.dst); 1342 1343 write_unlock_bh(&table->tb6_lock); 1344 1345 return err; 1346 } 1347 1348 int ip6_del_rt(struct rt6_info *rt) 1349 { 1350 struct nl_info info = { 1351 .nl_net = dev_net(rt->rt6i_dev), 1352 }; 1353 return __ip6_del_rt(rt, &info); 1354 } 1355 1356 static int ip6_route_del(struct fib6_config *cfg) 1357 { 1358 struct fib6_table *table; 1359 struct fib6_node *fn; 1360 struct rt6_info *rt; 1361 int err = -ESRCH; 1362 1363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1364 if (table == NULL) 1365 return err; 1366 1367 read_lock_bh(&table->tb6_lock); 1368 1369 fn = fib6_locate(&table->tb6_root, 1370 &cfg->fc_dst, cfg->fc_dst_len, 1371 &cfg->fc_src, cfg->fc_src_len); 1372 1373 if (fn) { 1374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1375 if (cfg->fc_ifindex && 1376 (rt->rt6i_dev == NULL || 1377 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1378 continue; 1379 if (cfg->fc_flags & RTF_GATEWAY && 1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1381 continue; 1382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1383 continue; 1384 dst_hold(&rt->u.dst); 1385 read_unlock_bh(&table->tb6_lock); 1386 1387 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1388 } 1389 } 1390 read_unlock_bh(&table->tb6_lock); 1391 1392 return err; 1393 } 1394 1395 /* 1396 * Handle redirects 1397 */ 1398 struct ip6rd_flowi { 1399 struct flowi fl; 1400 struct in6_addr gateway; 1401 }; 1402 1403 static struct rt6_info *__ip6_route_redirect(struct net *net, 1404 struct fib6_table *table, 1405 struct flowi *fl, 1406 int flags) 1407 { 1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1409 struct rt6_info *rt; 1410 struct fib6_node *fn; 1411 1412 /* 1413 * Get the "current" route for this destination and 1414 * check if the redirect has come from approriate router. 1415 * 1416 * RFC 2461 specifies that redirects should only be 1417 * accepted if they come from the nexthop to the target. 1418 * Due to the way the routes are chosen, this notion 1419 * is a bit fuzzy and one might need to check all possible 1420 * routes. 1421 */ 1422 1423 read_lock_bh(&table->tb6_lock); 1424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1425 restart: 1426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1427 /* 1428 * Current route is on-link; redirect is always invalid. 1429 * 1430 * Seems, previous statement is not true. It could 1431 * be node, which looks for us as on-link (f.e. proxy ndisc) 1432 * But then router serving it might decide, that we should 1433 * know truth 8)8) --ANK (980726). 1434 */ 1435 if (rt6_check_expired(rt)) 1436 continue; 1437 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1438 continue; 1439 if (fl->oif != rt->rt6i_dev->ifindex) 1440 continue; 1441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1442 continue; 1443 break; 1444 } 1445 1446 if (!rt) 1447 rt = net->ipv6.ip6_null_entry; 1448 BACKTRACK(net, &fl->fl6_src); 1449 out: 1450 dst_hold(&rt->u.dst); 1451 1452 read_unlock_bh(&table->tb6_lock); 1453 1454 return rt; 1455 }; 1456 1457 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1458 struct in6_addr *src, 1459 struct in6_addr *gateway, 1460 struct net_device *dev) 1461 { 1462 int flags = RT6_LOOKUP_F_HAS_SADDR; 1463 struct net *net = dev_net(dev); 1464 struct ip6rd_flowi rdfl = { 1465 .fl = { 1466 .oif = dev->ifindex, 1467 .nl_u = { 1468 .ip6_u = { 1469 .daddr = *dest, 1470 .saddr = *src, 1471 }, 1472 }, 1473 }, 1474 .gateway = *gateway, 1475 }; 1476 1477 if (rt6_need_strict(dest)) 1478 flags |= RT6_LOOKUP_F_IFACE; 1479 1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1481 flags, __ip6_route_redirect); 1482 } 1483 1484 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1485 struct in6_addr *saddr, 1486 struct neighbour *neigh, u8 *lladdr, int on_link) 1487 { 1488 struct rt6_info *rt, *nrt = NULL; 1489 struct netevent_redirect netevent; 1490 struct net *net = dev_net(neigh->dev); 1491 1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1493 1494 if (rt == net->ipv6.ip6_null_entry) { 1495 if (net_ratelimit()) 1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1497 "for redirect target\n"); 1498 goto out; 1499 } 1500 1501 /* 1502 * We have finally decided to accept it. 1503 */ 1504 1505 neigh_update(neigh, lladdr, NUD_STALE, 1506 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1507 NEIGH_UPDATE_F_OVERRIDE| 1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1509 NEIGH_UPDATE_F_ISROUTER)) 1510 ); 1511 1512 /* 1513 * Redirect received -> path was valid. 1514 * Look, redirects are sent only in response to data packets, 1515 * so that this nexthop apparently is reachable. --ANK 1516 */ 1517 dst_confirm(&rt->u.dst); 1518 1519 /* Duplicate redirect: silently ignore. */ 1520 if (neigh == rt->u.dst.neighbour) 1521 goto out; 1522 1523 nrt = ip6_rt_copy(rt); 1524 if (nrt == NULL) 1525 goto out; 1526 1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1528 if (on_link) 1529 nrt->rt6i_flags &= ~RTF_GATEWAY; 1530 1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1532 nrt->rt6i_dst.plen = 128; 1533 nrt->u.dst.flags |= DST_HOST; 1534 1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1536 nrt->rt6i_nexthop = neigh_clone(neigh); 1537 /* Reset pmtu, it may be better */ 1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1540 dst_mtu(&nrt->u.dst)); 1541 1542 if (ip6_ins_rt(nrt)) 1543 goto out; 1544 1545 netevent.old = &rt->u.dst; 1546 netevent.new = &nrt->u.dst; 1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1548 1549 if (rt->rt6i_flags&RTF_CACHE) { 1550 ip6_del_rt(rt); 1551 return; 1552 } 1553 1554 out: 1555 dst_release(&rt->u.dst); 1556 return; 1557 } 1558 1559 /* 1560 * Handle ICMP "packet too big" messages 1561 * i.e. Path MTU discovery 1562 */ 1563 1564 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1565 struct net_device *dev, u32 pmtu) 1566 { 1567 struct rt6_info *rt, *nrt; 1568 struct net *net = dev_net(dev); 1569 int allfrag = 0; 1570 1571 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); 1572 if (rt == NULL) 1573 return; 1574 1575 if (pmtu >= dst_mtu(&rt->u.dst)) 1576 goto out; 1577 1578 if (pmtu < IPV6_MIN_MTU) { 1579 /* 1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1581 * MTU (1280) and a fragment header should always be included 1582 * after a node receiving Too Big message reporting PMTU is 1583 * less than the IPv6 Minimum Link MTU. 1584 */ 1585 pmtu = IPV6_MIN_MTU; 1586 allfrag = 1; 1587 } 1588 1589 /* New mtu received -> path was valid. 1590 They are sent only in response to data packets, 1591 so that this nexthop apparently is reachable. --ANK 1592 */ 1593 dst_confirm(&rt->u.dst); 1594 1595 /* Host route. If it is static, it would be better 1596 not to override it, but add new one, so that 1597 when cache entry will expire old pmtu 1598 would return automatically. 1599 */ 1600 if (rt->rt6i_flags & RTF_CACHE) { 1601 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1602 if (allfrag) 1603 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1604 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1606 goto out; 1607 } 1608 1609 /* Network route. 1610 Two cases are possible: 1611 1. It is connected route. Action: COW 1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1613 */ 1614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1615 nrt = rt6_alloc_cow(rt, daddr, saddr); 1616 else 1617 nrt = rt6_alloc_clone(rt, daddr); 1618 1619 if (nrt) { 1620 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1621 if (allfrag) 1622 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1623 1624 /* According to RFC 1981, detecting PMTU increase shouldn't be 1625 * happened within 5 mins, the recommended timer is 10 mins. 1626 * Here this route expiration time is set to ip6_rt_mtu_expires 1627 * which is 10 mins. After 10 mins the decreased pmtu is expired 1628 * and detecting PMTU increase will be automatically happened. 1629 */ 1630 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1631 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1632 1633 ip6_ins_rt(nrt); 1634 } 1635 out: 1636 dst_release(&rt->u.dst); 1637 } 1638 1639 /* 1640 * Misc support functions 1641 */ 1642 1643 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1644 { 1645 struct net *net = dev_net(ort->rt6i_dev); 1646 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1647 1648 if (rt) { 1649 rt->u.dst.input = ort->u.dst.input; 1650 rt->u.dst.output = ort->u.dst.output; 1651 1652 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1653 rt->u.dst.error = ort->u.dst.error; 1654 rt->u.dst.dev = ort->u.dst.dev; 1655 if (rt->u.dst.dev) 1656 dev_hold(rt->u.dst.dev); 1657 rt->rt6i_idev = ort->rt6i_idev; 1658 if (rt->rt6i_idev) 1659 in6_dev_hold(rt->rt6i_idev); 1660 rt->u.dst.lastuse = jiffies; 1661 rt->rt6i_expires = 0; 1662 1663 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1664 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1665 rt->rt6i_metric = 0; 1666 1667 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1668 #ifdef CONFIG_IPV6_SUBTREES 1669 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1670 #endif 1671 rt->rt6i_table = ort->rt6i_table; 1672 } 1673 return rt; 1674 } 1675 1676 #ifdef CONFIG_IPV6_ROUTE_INFO 1677 static struct rt6_info *rt6_get_route_info(struct net *net, 1678 struct in6_addr *prefix, int prefixlen, 1679 struct in6_addr *gwaddr, int ifindex) 1680 { 1681 struct fib6_node *fn; 1682 struct rt6_info *rt = NULL; 1683 struct fib6_table *table; 1684 1685 table = fib6_get_table(net, RT6_TABLE_INFO); 1686 if (table == NULL) 1687 return NULL; 1688 1689 write_lock_bh(&table->tb6_lock); 1690 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1691 if (!fn) 1692 goto out; 1693 1694 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1695 if (rt->rt6i_dev->ifindex != ifindex) 1696 continue; 1697 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1698 continue; 1699 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1700 continue; 1701 dst_hold(&rt->u.dst); 1702 break; 1703 } 1704 out: 1705 write_unlock_bh(&table->tb6_lock); 1706 return rt; 1707 } 1708 1709 static struct rt6_info *rt6_add_route_info(struct net *net, 1710 struct in6_addr *prefix, int prefixlen, 1711 struct in6_addr *gwaddr, int ifindex, 1712 unsigned pref) 1713 { 1714 struct fib6_config cfg = { 1715 .fc_table = RT6_TABLE_INFO, 1716 .fc_metric = IP6_RT_PRIO_USER, 1717 .fc_ifindex = ifindex, 1718 .fc_dst_len = prefixlen, 1719 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1720 RTF_UP | RTF_PREF(pref), 1721 .fc_nlinfo.pid = 0, 1722 .fc_nlinfo.nlh = NULL, 1723 .fc_nlinfo.nl_net = net, 1724 }; 1725 1726 ipv6_addr_copy(&cfg.fc_dst, prefix); 1727 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1728 1729 /* We should treat it as a default route if prefix length is 0. */ 1730 if (!prefixlen) 1731 cfg.fc_flags |= RTF_DEFAULT; 1732 1733 ip6_route_add(&cfg); 1734 1735 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1736 } 1737 #endif 1738 1739 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1740 { 1741 struct rt6_info *rt; 1742 struct fib6_table *table; 1743 1744 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1745 if (table == NULL) 1746 return NULL; 1747 1748 write_lock_bh(&table->tb6_lock); 1749 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1750 if (dev == rt->rt6i_dev && 1751 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1752 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1753 break; 1754 } 1755 if (rt) 1756 dst_hold(&rt->u.dst); 1757 write_unlock_bh(&table->tb6_lock); 1758 return rt; 1759 } 1760 1761 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1762 struct net_device *dev, 1763 unsigned int pref) 1764 { 1765 struct fib6_config cfg = { 1766 .fc_table = RT6_TABLE_DFLT, 1767 .fc_metric = IP6_RT_PRIO_USER, 1768 .fc_ifindex = dev->ifindex, 1769 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1770 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1771 .fc_nlinfo.pid = 0, 1772 .fc_nlinfo.nlh = NULL, 1773 .fc_nlinfo.nl_net = dev_net(dev), 1774 }; 1775 1776 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1777 1778 ip6_route_add(&cfg); 1779 1780 return rt6_get_dflt_router(gwaddr, dev); 1781 } 1782 1783 void rt6_purge_dflt_routers(struct net *net) 1784 { 1785 struct rt6_info *rt; 1786 struct fib6_table *table; 1787 1788 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1789 table = fib6_get_table(net, RT6_TABLE_DFLT); 1790 if (table == NULL) 1791 return; 1792 1793 restart: 1794 read_lock_bh(&table->tb6_lock); 1795 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1796 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1797 dst_hold(&rt->u.dst); 1798 read_unlock_bh(&table->tb6_lock); 1799 ip6_del_rt(rt); 1800 goto restart; 1801 } 1802 } 1803 read_unlock_bh(&table->tb6_lock); 1804 } 1805 1806 static void rtmsg_to_fib6_config(struct net *net, 1807 struct in6_rtmsg *rtmsg, 1808 struct fib6_config *cfg) 1809 { 1810 memset(cfg, 0, sizeof(*cfg)); 1811 1812 cfg->fc_table = RT6_TABLE_MAIN; 1813 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1814 cfg->fc_metric = rtmsg->rtmsg_metric; 1815 cfg->fc_expires = rtmsg->rtmsg_info; 1816 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1817 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1818 cfg->fc_flags = rtmsg->rtmsg_flags; 1819 1820 cfg->fc_nlinfo.nl_net = net; 1821 1822 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1823 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1824 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1825 } 1826 1827 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1828 { 1829 struct fib6_config cfg; 1830 struct in6_rtmsg rtmsg; 1831 int err; 1832 1833 switch(cmd) { 1834 case SIOCADDRT: /* Add a route */ 1835 case SIOCDELRT: /* Delete a route */ 1836 if (!capable(CAP_NET_ADMIN)) 1837 return -EPERM; 1838 err = copy_from_user(&rtmsg, arg, 1839 sizeof(struct in6_rtmsg)); 1840 if (err) 1841 return -EFAULT; 1842 1843 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1844 1845 rtnl_lock(); 1846 switch (cmd) { 1847 case SIOCADDRT: 1848 err = ip6_route_add(&cfg); 1849 break; 1850 case SIOCDELRT: 1851 err = ip6_route_del(&cfg); 1852 break; 1853 default: 1854 err = -EINVAL; 1855 } 1856 rtnl_unlock(); 1857 1858 return err; 1859 } 1860 1861 return -EINVAL; 1862 } 1863 1864 /* 1865 * Drop the packet on the floor 1866 */ 1867 1868 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) 1869 { 1870 int type; 1871 struct dst_entry *dst = skb->dst; 1872 switch (ipstats_mib_noroutes) { 1873 case IPSTATS_MIB_INNOROUTES: 1874 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1875 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { 1876 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1877 IPSTATS_MIB_INADDRERRORS); 1878 break; 1879 } 1880 /* FALLTHROUGH */ 1881 case IPSTATS_MIB_OUTNOROUTES: 1882 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1883 ipstats_mib_noroutes); 1884 break; 1885 } 1886 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); 1887 kfree_skb(skb); 1888 return 0; 1889 } 1890 1891 static int ip6_pkt_discard(struct sk_buff *skb) 1892 { 1893 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1894 } 1895 1896 static int ip6_pkt_discard_out(struct sk_buff *skb) 1897 { 1898 skb->dev = skb->dst->dev; 1899 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1900 } 1901 1902 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1903 1904 static int ip6_pkt_prohibit(struct sk_buff *skb) 1905 { 1906 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1907 } 1908 1909 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1910 { 1911 skb->dev = skb->dst->dev; 1912 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1913 } 1914 1915 #endif 1916 1917 /* 1918 * Allocate a dst for local (unicast / anycast) address. 1919 */ 1920 1921 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1922 const struct in6_addr *addr, 1923 int anycast) 1924 { 1925 struct net *net = dev_net(idev->dev); 1926 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1927 struct neighbour *neigh; 1928 1929 if (rt == NULL) 1930 return ERR_PTR(-ENOMEM); 1931 1932 dev_hold(net->loopback_dev); 1933 in6_dev_hold(idev); 1934 1935 rt->u.dst.flags = DST_HOST; 1936 rt->u.dst.input = ip6_input; 1937 rt->u.dst.output = ip6_output; 1938 rt->rt6i_dev = net->loopback_dev; 1939 rt->rt6i_idev = idev; 1940 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1942 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1943 rt->u.dst.obsolete = -1; 1944 1945 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1946 if (anycast) 1947 rt->rt6i_flags |= RTF_ANYCAST; 1948 else 1949 rt->rt6i_flags |= RTF_LOCAL; 1950 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1951 if (IS_ERR(neigh)) { 1952 dst_free(&rt->u.dst); 1953 1954 /* We are casting this because that is the return 1955 * value type. But an errno encoded pointer is the 1956 * same regardless of the underlying pointer type, 1957 * and that's what we are returning. So this is OK. 1958 */ 1959 return (struct rt6_info *) neigh; 1960 } 1961 rt->rt6i_nexthop = neigh; 1962 1963 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1964 rt->rt6i_dst.plen = 128; 1965 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1966 1967 atomic_set(&rt->u.dst.__refcnt, 1); 1968 1969 return rt; 1970 } 1971 1972 struct arg_dev_net { 1973 struct net_device *dev; 1974 struct net *net; 1975 }; 1976 1977 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1978 { 1979 struct net_device *dev = ((struct arg_dev_net *)arg)->dev; 1980 struct net *net = ((struct arg_dev_net *)arg)->net; 1981 1982 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 1983 rt != net->ipv6.ip6_null_entry) { 1984 RT6_TRACE("deleted by ifdown %p\n", rt); 1985 return -1; 1986 } 1987 return 0; 1988 } 1989 1990 void rt6_ifdown(struct net *net, struct net_device *dev) 1991 { 1992 struct arg_dev_net adn = { 1993 .dev = dev, 1994 .net = net, 1995 }; 1996 1997 fib6_clean_all(net, fib6_ifdown, 0, &adn); 1998 icmp6_clean_all(fib6_ifdown, &adn); 1999 } 2000 2001 struct rt6_mtu_change_arg 2002 { 2003 struct net_device *dev; 2004 unsigned mtu; 2005 }; 2006 2007 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2008 { 2009 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2010 struct inet6_dev *idev; 2011 struct net *net = dev_net(arg->dev); 2012 2013 /* In IPv6 pmtu discovery is not optional, 2014 so that RTAX_MTU lock cannot disable it. 2015 We still use this lock to block changes 2016 caused by addrconf/ndisc. 2017 */ 2018 2019 idev = __in6_dev_get(arg->dev); 2020 if (idev == NULL) 2021 return 0; 2022 2023 /* For administrative MTU increase, there is no way to discover 2024 IPv6 PMTU increase, so PMTU increase should be updated here. 2025 Since RFC 1981 doesn't include administrative MTU increase 2026 update PMTU increase is a MUST. (i.e. jumbo frame) 2027 */ 2028 /* 2029 If new MTU is less than route PMTU, this new MTU will be the 2030 lowest MTU in the path, update the route PMTU to reflect PMTU 2031 decreases; if new MTU is greater than route PMTU, and the 2032 old MTU is the lowest MTU in the path, update the route PMTU 2033 to reflect the increase. In this case if the other nodes' MTU 2034 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2035 PMTU discouvery. 2036 */ 2037 if (rt->rt6i_dev == arg->dev && 2038 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 2039 (dst_mtu(&rt->u.dst) >= arg->mtu || 2040 (dst_mtu(&rt->u.dst) < arg->mtu && 2041 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 2042 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 2043 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2044 } 2045 return 0; 2046 } 2047 2048 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2049 { 2050 struct rt6_mtu_change_arg arg = { 2051 .dev = dev, 2052 .mtu = mtu, 2053 }; 2054 2055 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2056 } 2057 2058 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2059 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2060 [RTA_OIF] = { .type = NLA_U32 }, 2061 [RTA_IIF] = { .type = NLA_U32 }, 2062 [RTA_PRIORITY] = { .type = NLA_U32 }, 2063 [RTA_METRICS] = { .type = NLA_NESTED }, 2064 }; 2065 2066 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2067 struct fib6_config *cfg) 2068 { 2069 struct rtmsg *rtm; 2070 struct nlattr *tb[RTA_MAX+1]; 2071 int err; 2072 2073 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2074 if (err < 0) 2075 goto errout; 2076 2077 err = -EINVAL; 2078 rtm = nlmsg_data(nlh); 2079 memset(cfg, 0, sizeof(*cfg)); 2080 2081 cfg->fc_table = rtm->rtm_table; 2082 cfg->fc_dst_len = rtm->rtm_dst_len; 2083 cfg->fc_src_len = rtm->rtm_src_len; 2084 cfg->fc_flags = RTF_UP; 2085 cfg->fc_protocol = rtm->rtm_protocol; 2086 2087 if (rtm->rtm_type == RTN_UNREACHABLE) 2088 cfg->fc_flags |= RTF_REJECT; 2089 2090 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2091 cfg->fc_nlinfo.nlh = nlh; 2092 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2093 2094 if (tb[RTA_GATEWAY]) { 2095 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2096 cfg->fc_flags |= RTF_GATEWAY; 2097 } 2098 2099 if (tb[RTA_DST]) { 2100 int plen = (rtm->rtm_dst_len + 7) >> 3; 2101 2102 if (nla_len(tb[RTA_DST]) < plen) 2103 goto errout; 2104 2105 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2106 } 2107 2108 if (tb[RTA_SRC]) { 2109 int plen = (rtm->rtm_src_len + 7) >> 3; 2110 2111 if (nla_len(tb[RTA_SRC]) < plen) 2112 goto errout; 2113 2114 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2115 } 2116 2117 if (tb[RTA_OIF]) 2118 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2119 2120 if (tb[RTA_PRIORITY]) 2121 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2122 2123 if (tb[RTA_METRICS]) { 2124 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2125 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2126 } 2127 2128 if (tb[RTA_TABLE]) 2129 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2130 2131 err = 0; 2132 errout: 2133 return err; 2134 } 2135 2136 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2137 { 2138 struct fib6_config cfg; 2139 int err; 2140 2141 err = rtm_to_fib6_config(skb, nlh, &cfg); 2142 if (err < 0) 2143 return err; 2144 2145 return ip6_route_del(&cfg); 2146 } 2147 2148 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2149 { 2150 struct fib6_config cfg; 2151 int err; 2152 2153 err = rtm_to_fib6_config(skb, nlh, &cfg); 2154 if (err < 0) 2155 return err; 2156 2157 return ip6_route_add(&cfg); 2158 } 2159 2160 static inline size_t rt6_nlmsg_size(void) 2161 { 2162 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2163 + nla_total_size(16) /* RTA_SRC */ 2164 + nla_total_size(16) /* RTA_DST */ 2165 + nla_total_size(16) /* RTA_GATEWAY */ 2166 + nla_total_size(16) /* RTA_PREFSRC */ 2167 + nla_total_size(4) /* RTA_TABLE */ 2168 + nla_total_size(4) /* RTA_IIF */ 2169 + nla_total_size(4) /* RTA_OIF */ 2170 + nla_total_size(4) /* RTA_PRIORITY */ 2171 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2172 + nla_total_size(sizeof(struct rta_cacheinfo)); 2173 } 2174 2175 static int rt6_fill_node(struct net *net, 2176 struct sk_buff *skb, struct rt6_info *rt, 2177 struct in6_addr *dst, struct in6_addr *src, 2178 int iif, int type, u32 pid, u32 seq, 2179 int prefix, int nowait, unsigned int flags) 2180 { 2181 struct rtmsg *rtm; 2182 struct nlmsghdr *nlh; 2183 long expires; 2184 u32 table; 2185 2186 if (prefix) { /* user wants prefix routes only */ 2187 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2188 /* success since this is not a prefix route */ 2189 return 1; 2190 } 2191 } 2192 2193 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2194 if (nlh == NULL) 2195 return -EMSGSIZE; 2196 2197 rtm = nlmsg_data(nlh); 2198 rtm->rtm_family = AF_INET6; 2199 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2200 rtm->rtm_src_len = rt->rt6i_src.plen; 2201 rtm->rtm_tos = 0; 2202 if (rt->rt6i_table) 2203 table = rt->rt6i_table->tb6_id; 2204 else 2205 table = RT6_TABLE_UNSPEC; 2206 rtm->rtm_table = table; 2207 NLA_PUT_U32(skb, RTA_TABLE, table); 2208 if (rt->rt6i_flags&RTF_REJECT) 2209 rtm->rtm_type = RTN_UNREACHABLE; 2210 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2211 rtm->rtm_type = RTN_LOCAL; 2212 else 2213 rtm->rtm_type = RTN_UNICAST; 2214 rtm->rtm_flags = 0; 2215 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2216 rtm->rtm_protocol = rt->rt6i_protocol; 2217 if (rt->rt6i_flags&RTF_DYNAMIC) 2218 rtm->rtm_protocol = RTPROT_REDIRECT; 2219 else if (rt->rt6i_flags & RTF_ADDRCONF) 2220 rtm->rtm_protocol = RTPROT_KERNEL; 2221 else if (rt->rt6i_flags&RTF_DEFAULT) 2222 rtm->rtm_protocol = RTPROT_RA; 2223 2224 if (rt->rt6i_flags&RTF_CACHE) 2225 rtm->rtm_flags |= RTM_F_CLONED; 2226 2227 if (dst) { 2228 NLA_PUT(skb, RTA_DST, 16, dst); 2229 rtm->rtm_dst_len = 128; 2230 } else if (rtm->rtm_dst_len) 2231 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2232 #ifdef CONFIG_IPV6_SUBTREES 2233 if (src) { 2234 NLA_PUT(skb, RTA_SRC, 16, src); 2235 rtm->rtm_src_len = 128; 2236 } else if (rtm->rtm_src_len) 2237 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2238 #endif 2239 if (iif) { 2240 #ifdef CONFIG_IPV6_MROUTE 2241 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2242 int err = ip6mr_get_route(net, skb, rtm, nowait); 2243 if (err <= 0) { 2244 if (!nowait) { 2245 if (err == 0) 2246 return 0; 2247 goto nla_put_failure; 2248 } else { 2249 if (err == -EMSGSIZE) 2250 goto nla_put_failure; 2251 } 2252 } 2253 } else 2254 #endif 2255 NLA_PUT_U32(skb, RTA_IIF, iif); 2256 } else if (dst) { 2257 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); 2258 struct in6_addr saddr_buf; 2259 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2260 dst, 0, &saddr_buf) == 0) 2261 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2262 } 2263 2264 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2265 goto nla_put_failure; 2266 2267 if (rt->u.dst.neighbour) 2268 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2269 2270 if (rt->u.dst.dev) 2271 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2272 2273 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2274 2275 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2276 expires = 0; 2277 else if (rt->rt6i_expires - jiffies < INT_MAX) 2278 expires = rt->rt6i_expires - jiffies; 2279 else 2280 expires = INT_MAX; 2281 2282 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2283 expires, rt->u.dst.error) < 0) 2284 goto nla_put_failure; 2285 2286 return nlmsg_end(skb, nlh); 2287 2288 nla_put_failure: 2289 nlmsg_cancel(skb, nlh); 2290 return -EMSGSIZE; 2291 } 2292 2293 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2294 { 2295 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2296 int prefix; 2297 2298 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2299 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2300 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2301 } else 2302 prefix = 0; 2303 2304 return rt6_fill_node(arg->net, 2305 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2306 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2307 prefix, 0, NLM_F_MULTI); 2308 } 2309 2310 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2311 { 2312 struct net *net = sock_net(in_skb->sk); 2313 struct nlattr *tb[RTA_MAX+1]; 2314 struct rt6_info *rt; 2315 struct sk_buff *skb; 2316 struct rtmsg *rtm; 2317 struct flowi fl; 2318 int err, iif = 0; 2319 2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2321 if (err < 0) 2322 goto errout; 2323 2324 err = -EINVAL; 2325 memset(&fl, 0, sizeof(fl)); 2326 2327 if (tb[RTA_SRC]) { 2328 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2329 goto errout; 2330 2331 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2332 } 2333 2334 if (tb[RTA_DST]) { 2335 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2336 goto errout; 2337 2338 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2339 } 2340 2341 if (tb[RTA_IIF]) 2342 iif = nla_get_u32(tb[RTA_IIF]); 2343 2344 if (tb[RTA_OIF]) 2345 fl.oif = nla_get_u32(tb[RTA_OIF]); 2346 2347 if (iif) { 2348 struct net_device *dev; 2349 dev = __dev_get_by_index(net, iif); 2350 if (!dev) { 2351 err = -ENODEV; 2352 goto errout; 2353 } 2354 } 2355 2356 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2357 if (skb == NULL) { 2358 err = -ENOBUFS; 2359 goto errout; 2360 } 2361 2362 /* Reserve room for dummy headers, this skb can pass 2363 through good chunk of routing engine. 2364 */ 2365 skb_reset_mac_header(skb); 2366 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2367 2368 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2369 skb->dst = &rt->u.dst; 2370 2371 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2372 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2373 nlh->nlmsg_seq, 0, 0, 0); 2374 if (err < 0) { 2375 kfree_skb(skb); 2376 goto errout; 2377 } 2378 2379 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2380 errout: 2381 return err; 2382 } 2383 2384 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2385 { 2386 struct sk_buff *skb; 2387 struct net *net = info->nl_net; 2388 u32 seq; 2389 int err; 2390 2391 err = -ENOBUFS; 2392 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2393 2394 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2395 if (skb == NULL) 2396 goto errout; 2397 2398 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2399 event, info->pid, seq, 0, 0, 0); 2400 if (err < 0) { 2401 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2402 WARN_ON(err == -EMSGSIZE); 2403 kfree_skb(skb); 2404 goto errout; 2405 } 2406 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2407 info->nlh, gfp_any()); 2408 return; 2409 errout: 2410 if (err < 0) 2411 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2412 } 2413 2414 static int ip6_route_dev_notify(struct notifier_block *this, 2415 unsigned long event, void *data) 2416 { 2417 struct net_device *dev = (struct net_device *)data; 2418 struct net *net = dev_net(dev); 2419 2420 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2421 net->ipv6.ip6_null_entry->u.dst.dev = dev; 2422 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2423 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2424 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; 2425 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2426 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; 2427 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2428 #endif 2429 } 2430 2431 return NOTIFY_OK; 2432 } 2433 2434 /* 2435 * /proc 2436 */ 2437 2438 #ifdef CONFIG_PROC_FS 2439 2440 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2441 2442 struct rt6_proc_arg 2443 { 2444 char *buffer; 2445 int offset; 2446 int length; 2447 int skip; 2448 int len; 2449 }; 2450 2451 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2452 { 2453 struct seq_file *m = p_arg; 2454 2455 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2456 2457 #ifdef CONFIG_IPV6_SUBTREES 2458 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2459 #else 2460 seq_puts(m, "00000000000000000000000000000000 00 "); 2461 #endif 2462 2463 if (rt->rt6i_nexthop) { 2464 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2465 } else { 2466 seq_puts(m, "00000000000000000000000000000000"); 2467 } 2468 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2469 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2470 rt->u.dst.__use, rt->rt6i_flags, 2471 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2472 return 0; 2473 } 2474 2475 static int ipv6_route_show(struct seq_file *m, void *v) 2476 { 2477 struct net *net = (struct net *)m->private; 2478 fib6_clean_all(net, rt6_info_route, 0, m); 2479 return 0; 2480 } 2481 2482 static int ipv6_route_open(struct inode *inode, struct file *file) 2483 { 2484 return single_open_net(inode, file, ipv6_route_show); 2485 } 2486 2487 static const struct file_operations ipv6_route_proc_fops = { 2488 .owner = THIS_MODULE, 2489 .open = ipv6_route_open, 2490 .read = seq_read, 2491 .llseek = seq_lseek, 2492 .release = single_release_net, 2493 }; 2494 2495 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2496 { 2497 struct net *net = (struct net *)seq->private; 2498 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2499 net->ipv6.rt6_stats->fib_nodes, 2500 net->ipv6.rt6_stats->fib_route_nodes, 2501 net->ipv6.rt6_stats->fib_rt_alloc, 2502 net->ipv6.rt6_stats->fib_rt_entries, 2503 net->ipv6.rt6_stats->fib_rt_cache, 2504 atomic_read(&net->ipv6.ip6_dst_ops->entries), 2505 net->ipv6.rt6_stats->fib_discarded_routes); 2506 2507 return 0; 2508 } 2509 2510 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2511 { 2512 return single_open_net(inode, file, rt6_stats_seq_show); 2513 } 2514 2515 static const struct file_operations rt6_stats_seq_fops = { 2516 .owner = THIS_MODULE, 2517 .open = rt6_stats_seq_open, 2518 .read = seq_read, 2519 .llseek = seq_lseek, 2520 .release = single_release_net, 2521 }; 2522 #endif /* CONFIG_PROC_FS */ 2523 2524 #ifdef CONFIG_SYSCTL 2525 2526 static 2527 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2528 void __user *buffer, size_t *lenp, loff_t *ppos) 2529 { 2530 struct net *net = current->nsproxy->net_ns; 2531 int delay = net->ipv6.sysctl.flush_delay; 2532 if (write) { 2533 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2535 return 0; 2536 } else 2537 return -EINVAL; 2538 } 2539 2540 ctl_table ipv6_route_table_template[] = { 2541 { 2542 .procname = "flush", 2543 .data = &init_net.ipv6.sysctl.flush_delay, 2544 .maxlen = sizeof(int), 2545 .mode = 0200, 2546 .proc_handler = ipv6_sysctl_rtcache_flush 2547 }, 2548 { 2549 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2550 .procname = "gc_thresh", 2551 .data = &ip6_dst_ops_template.gc_thresh, 2552 .maxlen = sizeof(int), 2553 .mode = 0644, 2554 .proc_handler = proc_dointvec, 2555 }, 2556 { 2557 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2558 .procname = "max_size", 2559 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2560 .maxlen = sizeof(int), 2561 .mode = 0644, 2562 .proc_handler = proc_dointvec, 2563 }, 2564 { 2565 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2566 .procname = "gc_min_interval", 2567 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2568 .maxlen = sizeof(int), 2569 .mode = 0644, 2570 .proc_handler = proc_dointvec_jiffies, 2571 .strategy = sysctl_jiffies, 2572 }, 2573 { 2574 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2575 .procname = "gc_timeout", 2576 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2577 .maxlen = sizeof(int), 2578 .mode = 0644, 2579 .proc_handler = proc_dointvec_jiffies, 2580 .strategy = sysctl_jiffies, 2581 }, 2582 { 2583 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2584 .procname = "gc_interval", 2585 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2586 .maxlen = sizeof(int), 2587 .mode = 0644, 2588 .proc_handler = proc_dointvec_jiffies, 2589 .strategy = sysctl_jiffies, 2590 }, 2591 { 2592 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2593 .procname = "gc_elasticity", 2594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2595 .maxlen = sizeof(int), 2596 .mode = 0644, 2597 .proc_handler = proc_dointvec_jiffies, 2598 .strategy = sysctl_jiffies, 2599 }, 2600 { 2601 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2602 .procname = "mtu_expires", 2603 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2604 .maxlen = sizeof(int), 2605 .mode = 0644, 2606 .proc_handler = proc_dointvec_jiffies, 2607 .strategy = sysctl_jiffies, 2608 }, 2609 { 2610 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2611 .procname = "min_adv_mss", 2612 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2613 .maxlen = sizeof(int), 2614 .mode = 0644, 2615 .proc_handler = proc_dointvec_jiffies, 2616 .strategy = sysctl_jiffies, 2617 }, 2618 { 2619 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2620 .procname = "gc_min_interval_ms", 2621 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2622 .maxlen = sizeof(int), 2623 .mode = 0644, 2624 .proc_handler = proc_dointvec_ms_jiffies, 2625 .strategy = sysctl_ms_jiffies, 2626 }, 2627 { .ctl_name = 0 } 2628 }; 2629 2630 struct ctl_table *ipv6_route_sysctl_init(struct net *net) 2631 { 2632 struct ctl_table *table; 2633 2634 table = kmemdup(ipv6_route_table_template, 2635 sizeof(ipv6_route_table_template), 2636 GFP_KERNEL); 2637 2638 if (table) { 2639 table[0].data = &net->ipv6.sysctl.flush_delay; 2640 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh; 2641 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2642 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2643 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2644 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2645 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2646 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2647 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2648 } 2649 2650 return table; 2651 } 2652 #endif 2653 2654 static int ip6_route_net_init(struct net *net) 2655 { 2656 int ret = -ENOMEM; 2657 2658 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template, 2659 sizeof(*net->ipv6.ip6_dst_ops), 2660 GFP_KERNEL); 2661 if (!net->ipv6.ip6_dst_ops) 2662 goto out; 2663 net->ipv6.ip6_dst_ops->dst_net = hold_net(net); 2664 2665 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2666 sizeof(*net->ipv6.ip6_null_entry), 2667 GFP_KERNEL); 2668 if (!net->ipv6.ip6_null_entry) 2669 goto out_ip6_dst_ops; 2670 net->ipv6.ip6_null_entry->u.dst.path = 2671 (struct dst_entry *)net->ipv6.ip6_null_entry; 2672 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops; 2673 2674 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2675 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2676 sizeof(*net->ipv6.ip6_prohibit_entry), 2677 GFP_KERNEL); 2678 if (!net->ipv6.ip6_prohibit_entry) 2679 goto out_ip6_null_entry; 2680 net->ipv6.ip6_prohibit_entry->u.dst.path = 2681 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2682 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; 2683 2684 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2685 sizeof(*net->ipv6.ip6_blk_hole_entry), 2686 GFP_KERNEL); 2687 if (!net->ipv6.ip6_blk_hole_entry) 2688 goto out_ip6_prohibit_entry; 2689 net->ipv6.ip6_blk_hole_entry->u.dst.path = 2690 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2691 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; 2692 #endif 2693 2694 net->ipv6.sysctl.flush_delay = 0; 2695 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2696 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2697 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2698 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2699 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2700 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2701 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2702 2703 #ifdef CONFIG_PROC_FS 2704 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2705 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2706 #endif 2707 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2708 2709 ret = 0; 2710 out: 2711 return ret; 2712 2713 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2714 out_ip6_prohibit_entry: 2715 kfree(net->ipv6.ip6_prohibit_entry); 2716 out_ip6_null_entry: 2717 kfree(net->ipv6.ip6_null_entry); 2718 #endif 2719 out_ip6_dst_ops: 2720 release_net(net->ipv6.ip6_dst_ops->dst_net); 2721 kfree(net->ipv6.ip6_dst_ops); 2722 goto out; 2723 } 2724 2725 static void ip6_route_net_exit(struct net *net) 2726 { 2727 #ifdef CONFIG_PROC_FS 2728 proc_net_remove(net, "ipv6_route"); 2729 proc_net_remove(net, "rt6_stats"); 2730 #endif 2731 kfree(net->ipv6.ip6_null_entry); 2732 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2733 kfree(net->ipv6.ip6_prohibit_entry); 2734 kfree(net->ipv6.ip6_blk_hole_entry); 2735 #endif 2736 release_net(net->ipv6.ip6_dst_ops->dst_net); 2737 kfree(net->ipv6.ip6_dst_ops); 2738 } 2739 2740 static struct pernet_operations ip6_route_net_ops = { 2741 .init = ip6_route_net_init, 2742 .exit = ip6_route_net_exit, 2743 }; 2744 2745 static struct notifier_block ip6_route_dev_notifier = { 2746 .notifier_call = ip6_route_dev_notify, 2747 .priority = 0, 2748 }; 2749 2750 int __init ip6_route_init(void) 2751 { 2752 int ret; 2753 2754 ret = -ENOMEM; 2755 ip6_dst_ops_template.kmem_cachep = 2756 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2757 SLAB_HWCACHE_ALIGN, NULL); 2758 if (!ip6_dst_ops_template.kmem_cachep) 2759 goto out; 2760 2761 ret = register_pernet_subsys(&ip6_route_net_ops); 2762 if (ret) 2763 goto out_kmem_cache; 2764 2765 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2766 2767 /* Registering of the loopback is done before this portion of code, 2768 * the loopback reference in rt6_info will not be taken, do it 2769 * manually for init_net */ 2770 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; 2771 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2772 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2773 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; 2774 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2775 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; 2776 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2777 #endif 2778 ret = fib6_init(); 2779 if (ret) 2780 goto out_register_subsys; 2781 2782 ret = xfrm6_init(); 2783 if (ret) 2784 goto out_fib6_init; 2785 2786 ret = fib6_rules_init(); 2787 if (ret) 2788 goto xfrm6_init; 2789 2790 ret = -ENOBUFS; 2791 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2792 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2793 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2794 goto fib6_rules_init; 2795 2796 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2797 if (ret) 2798 goto fib6_rules_init; 2799 2800 out: 2801 return ret; 2802 2803 fib6_rules_init: 2804 fib6_rules_cleanup(); 2805 xfrm6_init: 2806 xfrm6_fini(); 2807 out_fib6_init: 2808 fib6_gc_cleanup(); 2809 out_register_subsys: 2810 unregister_pernet_subsys(&ip6_route_net_ops); 2811 out_kmem_cache: 2812 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2813 goto out; 2814 } 2815 2816 void ip6_route_cleanup(void) 2817 { 2818 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2819 fib6_rules_cleanup(); 2820 xfrm6_fini(); 2821 fib6_gc_cleanup(); 2822 unregister_pernet_subsys(&ip6_route_net_ops); 2823 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2824 } 2825