1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <net/net_namespace.h> 44 #include <net/snmp.h> 45 #include <net/ipv6.h> 46 #include <net/ip6_fib.h> 47 #include <net/ip6_route.h> 48 #include <net/ndisc.h> 49 #include <net/addrconf.h> 50 #include <net/tcp.h> 51 #include <linux/rtnetlink.h> 52 #include <net/dst.h> 53 #include <net/xfrm.h> 54 #include <net/netevent.h> 55 #include <net/netlink.h> 56 57 #include <asm/uaccess.h> 58 59 #ifdef CONFIG_SYSCTL 60 #include <linux/sysctl.h> 61 #endif 62 63 /* Set to 3 to get tracing. */ 64 #define RT6_DEBUG 2 65 66 #if RT6_DEBUG >= 3 67 #define RDBG(x) printk x 68 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 69 #else 70 #define RDBG(x) 71 #define RT6_TRACE(x...) do { ; } while (0) 72 #endif 73 74 #define CLONE_OFFLINK_ROUTE 0 75 76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 79 static void ip6_dst_destroy(struct dst_entry *); 80 static void ip6_dst_ifdown(struct dst_entry *, 81 struct net_device *dev, int how); 82 static int ip6_dst_gc(struct dst_ops *ops); 83 84 static int ip6_pkt_discard(struct sk_buff *skb); 85 static int ip6_pkt_discard_out(struct sk_buff *skb); 86 static void ip6_link_failure(struct sk_buff *skb); 87 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 88 89 #ifdef CONFIG_IPV6_ROUTE_INFO 90 static struct rt6_info *rt6_add_route_info(struct net *net, 91 struct in6_addr *prefix, int prefixlen, 92 struct in6_addr *gwaddr, int ifindex, 93 unsigned pref); 94 static struct rt6_info *rt6_get_route_info(struct net *net, 95 struct in6_addr *prefix, int prefixlen, 96 struct in6_addr *gwaddr, int ifindex); 97 #endif 98 99 static struct dst_ops ip6_dst_ops_template = { 100 .family = AF_INET6, 101 .protocol = cpu_to_be16(ETH_P_IPV6), 102 .gc = ip6_dst_gc, 103 .gc_thresh = 1024, 104 .check = ip6_dst_check, 105 .destroy = ip6_dst_destroy, 106 .ifdown = ip6_dst_ifdown, 107 .negative_advice = ip6_negative_advice, 108 .link_failure = ip6_link_failure, 109 .update_pmtu = ip6_rt_update_pmtu, 110 .local_out = __ip6_local_out, 111 .entries = ATOMIC_INIT(0), 112 }; 113 114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 115 { 116 } 117 118 static struct dst_ops ip6_dst_blackhole_ops = { 119 .family = AF_INET6, 120 .protocol = cpu_to_be16(ETH_P_IPV6), 121 .destroy = ip6_dst_destroy, 122 .check = ip6_dst_check, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu, 124 .entries = ATOMIC_INIT(0), 125 }; 126 127 static struct rt6_info ip6_null_entry_template = { 128 .u = { 129 .dst = { 130 .__refcnt = ATOMIC_INIT(1), 131 .__use = 1, 132 .obsolete = -1, 133 .error = -ENETUNREACH, 134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 135 .input = ip6_pkt_discard, 136 .output = ip6_pkt_discard_out, 137 } 138 }, 139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 140 .rt6i_protocol = RTPROT_KERNEL, 141 .rt6i_metric = ~(u32) 0, 142 .rt6i_ref = ATOMIC_INIT(1), 143 }; 144 145 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 146 147 static int ip6_pkt_prohibit(struct sk_buff *skb); 148 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 149 150 static struct rt6_info ip6_prohibit_entry_template = { 151 .u = { 152 .dst = { 153 .__refcnt = ATOMIC_INIT(1), 154 .__use = 1, 155 .obsolete = -1, 156 .error = -EACCES, 157 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 158 .input = ip6_pkt_prohibit, 159 .output = ip6_pkt_prohibit_out, 160 } 161 }, 162 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 163 .rt6i_protocol = RTPROT_KERNEL, 164 .rt6i_metric = ~(u32) 0, 165 .rt6i_ref = ATOMIC_INIT(1), 166 }; 167 168 static struct rt6_info ip6_blk_hole_entry_template = { 169 .u = { 170 .dst = { 171 .__refcnt = ATOMIC_INIT(1), 172 .__use = 1, 173 .obsolete = -1, 174 .error = -EINVAL, 175 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 176 .input = dst_discard, 177 .output = dst_discard, 178 } 179 }, 180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 181 .rt6i_protocol = RTPROT_KERNEL, 182 .rt6i_metric = ~(u32) 0, 183 .rt6i_ref = ATOMIC_INIT(1), 184 }; 185 186 #endif 187 188 /* allocate dst with ip6_dst_ops */ 189 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 190 { 191 return (struct rt6_info *)dst_alloc(ops); 192 } 193 194 static void ip6_dst_destroy(struct dst_entry *dst) 195 { 196 struct rt6_info *rt = (struct rt6_info *)dst; 197 struct inet6_dev *idev = rt->rt6i_idev; 198 199 if (idev != NULL) { 200 rt->rt6i_idev = NULL; 201 in6_dev_put(idev); 202 } 203 } 204 205 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 206 int how) 207 { 208 struct rt6_info *rt = (struct rt6_info *)dst; 209 struct inet6_dev *idev = rt->rt6i_idev; 210 struct net_device *loopback_dev = 211 dev_net(dev)->loopback_dev; 212 213 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 214 struct inet6_dev *loopback_idev = 215 in6_dev_get(loopback_dev); 216 if (loopback_idev != NULL) { 217 rt->rt6i_idev = loopback_idev; 218 in6_dev_put(idev); 219 } 220 } 221 } 222 223 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 224 { 225 return (rt->rt6i_flags & RTF_EXPIRES && 226 time_after(jiffies, rt->rt6i_expires)); 227 } 228 229 static inline int rt6_need_strict(struct in6_addr *daddr) 230 { 231 return (ipv6_addr_type(daddr) & 232 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 233 } 234 235 /* 236 * Route lookup. Any table->tb6_lock is implied. 237 */ 238 239 static inline struct rt6_info *rt6_device_match(struct net *net, 240 struct rt6_info *rt, 241 struct in6_addr *saddr, 242 int oif, 243 int flags) 244 { 245 struct rt6_info *local = NULL; 246 struct rt6_info *sprt; 247 248 if (!oif && ipv6_addr_any(saddr)) 249 goto out; 250 251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 252 struct net_device *dev = sprt->rt6i_dev; 253 254 if (oif) { 255 if (dev->ifindex == oif) 256 return sprt; 257 if (dev->flags & IFF_LOOPBACK) { 258 if (sprt->rt6i_idev == NULL || 259 sprt->rt6i_idev->dev->ifindex != oif) { 260 if (flags & RT6_LOOKUP_F_IFACE && oif) 261 continue; 262 if (local && (!oif || 263 local->rt6i_idev->dev->ifindex == oif)) 264 continue; 265 } 266 local = sprt; 267 } 268 } else { 269 if (ipv6_chk_addr(net, saddr, dev, 270 flags & RT6_LOOKUP_F_IFACE)) 271 return sprt; 272 } 273 } 274 275 if (oif) { 276 if (local) 277 return local; 278 279 if (flags & RT6_LOOKUP_F_IFACE) 280 return net->ipv6.ip6_null_entry; 281 } 282 out: 283 return rt; 284 } 285 286 #ifdef CONFIG_IPV6_ROUTER_PREF 287 static void rt6_probe(struct rt6_info *rt) 288 { 289 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 290 /* 291 * Okay, this does not seem to be appropriate 292 * for now, however, we need to check if it 293 * is really so; aka Router Reachability Probing. 294 * 295 * Router Reachability Probe MUST be rate-limited 296 * to no more than one per minute. 297 */ 298 if (!neigh || (neigh->nud_state & NUD_VALID)) 299 return; 300 read_lock_bh(&neigh->lock); 301 if (!(neigh->nud_state & NUD_VALID) && 302 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 303 struct in6_addr mcaddr; 304 struct in6_addr *target; 305 306 neigh->updated = jiffies; 307 read_unlock_bh(&neigh->lock); 308 309 target = (struct in6_addr *)&neigh->primary_key; 310 addrconf_addr_solict_mult(target, &mcaddr); 311 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 312 } else 313 read_unlock_bh(&neigh->lock); 314 } 315 #else 316 static inline void rt6_probe(struct rt6_info *rt) 317 { 318 return; 319 } 320 #endif 321 322 /* 323 * Default Router Selection (RFC 2461 6.3.6) 324 */ 325 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 326 { 327 struct net_device *dev = rt->rt6i_dev; 328 if (!oif || dev->ifindex == oif) 329 return 2; 330 if ((dev->flags & IFF_LOOPBACK) && 331 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 332 return 1; 333 return 0; 334 } 335 336 static inline int rt6_check_neigh(struct rt6_info *rt) 337 { 338 struct neighbour *neigh = rt->rt6i_nexthop; 339 int m; 340 if (rt->rt6i_flags & RTF_NONEXTHOP || 341 !(rt->rt6i_flags & RTF_GATEWAY)) 342 m = 1; 343 else if (neigh) { 344 read_lock_bh(&neigh->lock); 345 if (neigh->nud_state & NUD_VALID) 346 m = 2; 347 #ifdef CONFIG_IPV6_ROUTER_PREF 348 else if (neigh->nud_state & NUD_FAILED) 349 m = 0; 350 #endif 351 else 352 m = 1; 353 read_unlock_bh(&neigh->lock); 354 } else 355 m = 0; 356 return m; 357 } 358 359 static int rt6_score_route(struct rt6_info *rt, int oif, 360 int strict) 361 { 362 int m, n; 363 364 m = rt6_check_dev(rt, oif); 365 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 366 return -1; 367 #ifdef CONFIG_IPV6_ROUTER_PREF 368 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 369 #endif 370 n = rt6_check_neigh(rt); 371 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 372 return -1; 373 return m; 374 } 375 376 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 377 int *mpri, struct rt6_info *match) 378 { 379 int m; 380 381 if (rt6_check_expired(rt)) 382 goto out; 383 384 m = rt6_score_route(rt, oif, strict); 385 if (m < 0) 386 goto out; 387 388 if (m > *mpri) { 389 if (strict & RT6_LOOKUP_F_REACHABLE) 390 rt6_probe(match); 391 *mpri = m; 392 match = rt; 393 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 394 rt6_probe(rt); 395 } 396 397 out: 398 return match; 399 } 400 401 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 402 struct rt6_info *rr_head, 403 u32 metric, int oif, int strict) 404 { 405 struct rt6_info *rt, *match; 406 int mpri = -1; 407 408 match = NULL; 409 for (rt = rr_head; rt && rt->rt6i_metric == metric; 410 rt = rt->u.dst.rt6_next) 411 match = find_match(rt, oif, strict, &mpri, match); 412 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 413 rt = rt->u.dst.rt6_next) 414 match = find_match(rt, oif, strict, &mpri, match); 415 416 return match; 417 } 418 419 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 420 { 421 struct rt6_info *match, *rt0; 422 struct net *net; 423 424 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 425 __func__, fn->leaf, oif); 426 427 rt0 = fn->rr_ptr; 428 if (!rt0) 429 fn->rr_ptr = rt0 = fn->leaf; 430 431 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 432 433 if (!match && 434 (strict & RT6_LOOKUP_F_REACHABLE)) { 435 struct rt6_info *next = rt0->u.dst.rt6_next; 436 437 /* no entries matched; do round-robin */ 438 if (!next || next->rt6i_metric != rt0->rt6i_metric) 439 next = fn->leaf; 440 441 if (next != rt0) 442 fn->rr_ptr = next; 443 } 444 445 RT6_TRACE("%s() => %p\n", 446 __func__, match); 447 448 net = dev_net(rt0->rt6i_dev); 449 return (match ? match : net->ipv6.ip6_null_entry); 450 } 451 452 #ifdef CONFIG_IPV6_ROUTE_INFO 453 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 454 struct in6_addr *gwaddr) 455 { 456 struct net *net = dev_net(dev); 457 struct route_info *rinfo = (struct route_info *) opt; 458 struct in6_addr prefix_buf, *prefix; 459 unsigned int pref; 460 unsigned long lifetime; 461 struct rt6_info *rt; 462 463 if (len < sizeof(struct route_info)) { 464 return -EINVAL; 465 } 466 467 /* Sanity check for prefix_len and length */ 468 if (rinfo->length > 3) { 469 return -EINVAL; 470 } else if (rinfo->prefix_len > 128) { 471 return -EINVAL; 472 } else if (rinfo->prefix_len > 64) { 473 if (rinfo->length < 2) { 474 return -EINVAL; 475 } 476 } else if (rinfo->prefix_len > 0) { 477 if (rinfo->length < 1) { 478 return -EINVAL; 479 } 480 } 481 482 pref = rinfo->route_pref; 483 if (pref == ICMPV6_ROUTER_PREF_INVALID) 484 return -EINVAL; 485 486 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 487 488 if (rinfo->length == 3) 489 prefix = (struct in6_addr *)rinfo->prefix; 490 else { 491 /* this function is safe */ 492 ipv6_addr_prefix(&prefix_buf, 493 (struct in6_addr *)rinfo->prefix, 494 rinfo->prefix_len); 495 prefix = &prefix_buf; 496 } 497 498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 499 dev->ifindex); 500 501 if (rt && !lifetime) { 502 ip6_del_rt(rt); 503 rt = NULL; 504 } 505 506 if (!rt && lifetime) 507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 508 pref); 509 else if (rt) 510 rt->rt6i_flags = RTF_ROUTEINFO | 511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 512 513 if (rt) { 514 if (!addrconf_finite_timeout(lifetime)) { 515 rt->rt6i_flags &= ~RTF_EXPIRES; 516 } else { 517 rt->rt6i_expires = jiffies + HZ * lifetime; 518 rt->rt6i_flags |= RTF_EXPIRES; 519 } 520 dst_release(&rt->u.dst); 521 } 522 return 0; 523 } 524 #endif 525 526 #define BACKTRACK(__net, saddr) \ 527 do { \ 528 if (rt == __net->ipv6.ip6_null_entry) { \ 529 struct fib6_node *pn; \ 530 while (1) { \ 531 if (fn->fn_flags & RTN_TL_ROOT) \ 532 goto out; \ 533 pn = fn->parent; \ 534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 536 else \ 537 fn = pn; \ 538 if (fn->fn_flags & RTN_RTINFO) \ 539 goto restart; \ 540 } \ 541 } \ 542 } while(0) 543 544 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 545 struct fib6_table *table, 546 struct flowi *fl, int flags) 547 { 548 struct fib6_node *fn; 549 struct rt6_info *rt; 550 551 read_lock_bh(&table->tb6_lock); 552 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 553 restart: 554 rt = fn->leaf; 555 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 556 BACKTRACK(net, &fl->fl6_src); 557 out: 558 dst_use(&rt->u.dst, jiffies); 559 read_unlock_bh(&table->tb6_lock); 560 return rt; 561 562 } 563 564 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 565 const struct in6_addr *saddr, int oif, int strict) 566 { 567 struct flowi fl = { 568 .oif = oif, 569 .nl_u = { 570 .ip6_u = { 571 .daddr = *daddr, 572 }, 573 }, 574 }; 575 struct dst_entry *dst; 576 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 577 578 if (saddr) { 579 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 580 flags |= RT6_LOOKUP_F_HAS_SADDR; 581 } 582 583 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 584 if (dst->error == 0) 585 return (struct rt6_info *) dst; 586 587 dst_release(dst); 588 589 return NULL; 590 } 591 592 EXPORT_SYMBOL(rt6_lookup); 593 594 /* ip6_ins_rt is called with FREE table->tb6_lock. 595 It takes new route entry, the addition fails by any reason the 596 route is freed. In any case, if caller does not hold it, it may 597 be destroyed. 598 */ 599 600 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 601 { 602 int err; 603 struct fib6_table *table; 604 605 table = rt->rt6i_table; 606 write_lock_bh(&table->tb6_lock); 607 err = fib6_add(&table->tb6_root, rt, info); 608 write_unlock_bh(&table->tb6_lock); 609 610 return err; 611 } 612 613 int ip6_ins_rt(struct rt6_info *rt) 614 { 615 struct nl_info info = { 616 .nl_net = dev_net(rt->rt6i_dev), 617 }; 618 return __ip6_ins_rt(rt, &info); 619 } 620 621 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 622 struct in6_addr *saddr) 623 { 624 struct rt6_info *rt; 625 626 /* 627 * Clone the route. 628 */ 629 630 rt = ip6_rt_copy(ort); 631 632 if (rt) { 633 struct neighbour *neigh; 634 int attempts = !in_softirq(); 635 636 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 637 if (rt->rt6i_dst.plen != 128 && 638 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 639 rt->rt6i_flags |= RTF_ANYCAST; 640 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 641 } 642 643 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 644 rt->rt6i_dst.plen = 128; 645 rt->rt6i_flags |= RTF_CACHE; 646 rt->u.dst.flags |= DST_HOST; 647 648 #ifdef CONFIG_IPV6_SUBTREES 649 if (rt->rt6i_src.plen && saddr) { 650 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 651 rt->rt6i_src.plen = 128; 652 } 653 #endif 654 655 retry: 656 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 657 if (IS_ERR(neigh)) { 658 struct net *net = dev_net(rt->rt6i_dev); 659 int saved_rt_min_interval = 660 net->ipv6.sysctl.ip6_rt_gc_min_interval; 661 int saved_rt_elasticity = 662 net->ipv6.sysctl.ip6_rt_gc_elasticity; 663 664 if (attempts-- > 0) { 665 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 666 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 667 668 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 669 670 net->ipv6.sysctl.ip6_rt_gc_elasticity = 671 saved_rt_elasticity; 672 net->ipv6.sysctl.ip6_rt_gc_min_interval = 673 saved_rt_min_interval; 674 goto retry; 675 } 676 677 if (net_ratelimit()) 678 printk(KERN_WARNING 679 "Neighbour table overflow.\n"); 680 dst_free(&rt->u.dst); 681 return NULL; 682 } 683 rt->rt6i_nexthop = neigh; 684 685 } 686 687 return rt; 688 } 689 690 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 691 { 692 struct rt6_info *rt = ip6_rt_copy(ort); 693 if (rt) { 694 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 695 rt->rt6i_dst.plen = 128; 696 rt->rt6i_flags |= RTF_CACHE; 697 rt->u.dst.flags |= DST_HOST; 698 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 699 } 700 return rt; 701 } 702 703 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 704 struct flowi *fl, int flags) 705 { 706 struct fib6_node *fn; 707 struct rt6_info *rt, *nrt; 708 int strict = 0; 709 int attempts = 3; 710 int err; 711 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 712 713 strict |= flags & RT6_LOOKUP_F_IFACE; 714 715 relookup: 716 read_lock_bh(&table->tb6_lock); 717 718 restart_2: 719 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 720 721 restart: 722 rt = rt6_select(fn, oif, strict | reachable); 723 724 BACKTRACK(net, &fl->fl6_src); 725 if (rt == net->ipv6.ip6_null_entry || 726 rt->rt6i_flags & RTF_CACHE) 727 goto out; 728 729 dst_hold(&rt->u.dst); 730 read_unlock_bh(&table->tb6_lock); 731 732 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 733 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 734 else { 735 #if CLONE_OFFLINK_ROUTE 736 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 737 #else 738 goto out2; 739 #endif 740 } 741 742 dst_release(&rt->u.dst); 743 rt = nrt ? : net->ipv6.ip6_null_entry; 744 745 dst_hold(&rt->u.dst); 746 if (nrt) { 747 err = ip6_ins_rt(nrt); 748 if (!err) 749 goto out2; 750 } 751 752 if (--attempts <= 0) 753 goto out2; 754 755 /* 756 * Race condition! In the gap, when table->tb6_lock was 757 * released someone could insert this route. Relookup. 758 */ 759 dst_release(&rt->u.dst); 760 goto relookup; 761 762 out: 763 if (reachable) { 764 reachable = 0; 765 goto restart_2; 766 } 767 dst_hold(&rt->u.dst); 768 read_unlock_bh(&table->tb6_lock); 769 out2: 770 rt->u.dst.lastuse = jiffies; 771 rt->u.dst.__use++; 772 773 return rt; 774 } 775 776 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 777 struct flowi *fl, int flags) 778 { 779 return ip6_pol_route(net, table, fl->iif, fl, flags); 780 } 781 782 void ip6_route_input(struct sk_buff *skb) 783 { 784 struct ipv6hdr *iph = ipv6_hdr(skb); 785 struct net *net = dev_net(skb->dev); 786 int flags = RT6_LOOKUP_F_HAS_SADDR; 787 struct flowi fl = { 788 .iif = skb->dev->ifindex, 789 .nl_u = { 790 .ip6_u = { 791 .daddr = iph->daddr, 792 .saddr = iph->saddr, 793 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 794 }, 795 }, 796 .mark = skb->mark, 797 .proto = iph->nexthdr, 798 }; 799 800 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 801 flags |= RT6_LOOKUP_F_IFACE; 802 803 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); 804 } 805 806 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 807 struct flowi *fl, int flags) 808 { 809 return ip6_pol_route(net, table, fl->oif, fl, flags); 810 } 811 812 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 813 struct flowi *fl) 814 { 815 int flags = 0; 816 817 if (rt6_need_strict(&fl->fl6_dst)) 818 flags |= RT6_LOOKUP_F_IFACE; 819 820 if (!ipv6_addr_any(&fl->fl6_src)) 821 flags |= RT6_LOOKUP_F_HAS_SADDR; 822 else if (sk) { 823 unsigned int prefs = inet6_sk(sk)->srcprefs; 824 if (prefs & IPV6_PREFER_SRC_TMP) 825 flags |= RT6_LOOKUP_F_SRCPREF_TMP; 826 if (prefs & IPV6_PREFER_SRC_PUBLIC) 827 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; 828 if (prefs & IPV6_PREFER_SRC_COA) 829 flags |= RT6_LOOKUP_F_SRCPREF_COA; 830 } 831 832 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 833 } 834 835 EXPORT_SYMBOL(ip6_route_output); 836 837 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 838 { 839 struct rt6_info *ort = (struct rt6_info *) *dstp; 840 struct rt6_info *rt = (struct rt6_info *) 841 dst_alloc(&ip6_dst_blackhole_ops); 842 struct dst_entry *new = NULL; 843 844 if (rt) { 845 new = &rt->u.dst; 846 847 atomic_set(&new->__refcnt, 1); 848 new->__use = 1; 849 new->input = dst_discard; 850 new->output = dst_discard; 851 852 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 853 new->dev = ort->u.dst.dev; 854 if (new->dev) 855 dev_hold(new->dev); 856 rt->rt6i_idev = ort->rt6i_idev; 857 if (rt->rt6i_idev) 858 in6_dev_hold(rt->rt6i_idev); 859 rt->rt6i_expires = 0; 860 861 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 862 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 863 rt->rt6i_metric = 0; 864 865 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 866 #ifdef CONFIG_IPV6_SUBTREES 867 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 868 #endif 869 870 dst_free(new); 871 } 872 873 dst_release(*dstp); 874 *dstp = new; 875 return (new ? 0 : -ENOMEM); 876 } 877 EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 878 879 /* 880 * Destination cache support functions 881 */ 882 883 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 884 { 885 struct rt6_info *rt; 886 887 rt = (struct rt6_info *) dst; 888 889 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 890 return dst; 891 892 return NULL; 893 } 894 895 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 896 { 897 struct rt6_info *rt = (struct rt6_info *) dst; 898 899 if (rt) { 900 if (rt->rt6i_flags & RTF_CACHE) 901 ip6_del_rt(rt); 902 else 903 dst_release(dst); 904 } 905 return NULL; 906 } 907 908 static void ip6_link_failure(struct sk_buff *skb) 909 { 910 struct rt6_info *rt; 911 912 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 913 914 rt = (struct rt6_info *) skb_dst(skb); 915 if (rt) { 916 if (rt->rt6i_flags&RTF_CACHE) { 917 dst_set_expires(&rt->u.dst, 0); 918 rt->rt6i_flags |= RTF_EXPIRES; 919 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 920 rt->rt6i_node->fn_sernum = -1; 921 } 922 } 923 924 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 925 { 926 struct rt6_info *rt6 = (struct rt6_info*)dst; 927 928 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 929 rt6->rt6i_flags |= RTF_MODIFIED; 930 if (mtu < IPV6_MIN_MTU) { 931 mtu = IPV6_MIN_MTU; 932 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 933 } 934 dst->metrics[RTAX_MTU-1] = mtu; 935 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 936 } 937 } 938 939 static int ipv6_get_mtu(struct net_device *dev); 940 941 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 942 { 943 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 944 945 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 946 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 947 948 /* 949 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 950 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 951 * IPV6_MAXPLEN is also valid and means: "any MSS, 952 * rely only on pmtu discovery" 953 */ 954 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 955 mtu = IPV6_MAXPLEN; 956 return mtu; 957 } 958 959 static struct dst_entry *icmp6_dst_gc_list; 960 static DEFINE_SPINLOCK(icmp6_dst_lock); 961 962 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 963 struct neighbour *neigh, 964 const struct in6_addr *addr) 965 { 966 struct rt6_info *rt; 967 struct inet6_dev *idev = in6_dev_get(dev); 968 struct net *net = dev_net(dev); 969 970 if (unlikely(idev == NULL)) 971 return NULL; 972 973 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 974 if (unlikely(rt == NULL)) { 975 in6_dev_put(idev); 976 goto out; 977 } 978 979 dev_hold(dev); 980 if (neigh) 981 neigh_hold(neigh); 982 else { 983 neigh = ndisc_get_neigh(dev, addr); 984 if (IS_ERR(neigh)) 985 neigh = NULL; 986 } 987 988 rt->rt6i_dev = dev; 989 rt->rt6i_idev = idev; 990 rt->rt6i_nexthop = neigh; 991 atomic_set(&rt->u.dst.__refcnt, 1); 992 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 993 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 994 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 995 rt->u.dst.output = ip6_output; 996 997 #if 0 /* there's no chance to use these for ndisc */ 998 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 999 ? DST_HOST 1000 : 0; 1001 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1002 rt->rt6i_dst.plen = 128; 1003 #endif 1004 1005 spin_lock_bh(&icmp6_dst_lock); 1006 rt->u.dst.next = icmp6_dst_gc_list; 1007 icmp6_dst_gc_list = &rt->u.dst; 1008 spin_unlock_bh(&icmp6_dst_lock); 1009 1010 fib6_force_start_gc(net); 1011 1012 out: 1013 return &rt->u.dst; 1014 } 1015 1016 int icmp6_dst_gc(void) 1017 { 1018 struct dst_entry *dst, *next, **pprev; 1019 int more = 0; 1020 1021 next = NULL; 1022 1023 spin_lock_bh(&icmp6_dst_lock); 1024 pprev = &icmp6_dst_gc_list; 1025 1026 while ((dst = *pprev) != NULL) { 1027 if (!atomic_read(&dst->__refcnt)) { 1028 *pprev = dst->next; 1029 dst_free(dst); 1030 } else { 1031 pprev = &dst->next; 1032 ++more; 1033 } 1034 } 1035 1036 spin_unlock_bh(&icmp6_dst_lock); 1037 1038 return more; 1039 } 1040 1041 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1042 void *arg) 1043 { 1044 struct dst_entry *dst, **pprev; 1045 1046 spin_lock_bh(&icmp6_dst_lock); 1047 pprev = &icmp6_dst_gc_list; 1048 while ((dst = *pprev) != NULL) { 1049 struct rt6_info *rt = (struct rt6_info *) dst; 1050 if (func(rt, arg)) { 1051 *pprev = dst->next; 1052 dst_free(dst); 1053 } else { 1054 pprev = &dst->next; 1055 } 1056 } 1057 spin_unlock_bh(&icmp6_dst_lock); 1058 } 1059 1060 static int ip6_dst_gc(struct dst_ops *ops) 1061 { 1062 unsigned long now = jiffies; 1063 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1064 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1065 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1066 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1067 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1068 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1069 1070 if (time_after(rt_last_gc + rt_min_interval, now) && 1071 atomic_read(&ops->entries) <= rt_max_size) 1072 goto out; 1073 1074 net->ipv6.ip6_rt_gc_expire++; 1075 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1076 net->ipv6.ip6_rt_last_gc = now; 1077 if (atomic_read(&ops->entries) < ops->gc_thresh) 1078 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1079 out: 1080 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1081 return (atomic_read(&ops->entries) > rt_max_size); 1082 } 1083 1084 /* Clean host part of a prefix. Not necessary in radix tree, 1085 but results in cleaner routing tables. 1086 1087 Remove it only when all the things will work! 1088 */ 1089 1090 static int ipv6_get_mtu(struct net_device *dev) 1091 { 1092 int mtu = IPV6_MIN_MTU; 1093 struct inet6_dev *idev; 1094 1095 idev = in6_dev_get(dev); 1096 if (idev) { 1097 mtu = idev->cnf.mtu6; 1098 in6_dev_put(idev); 1099 } 1100 return mtu; 1101 } 1102 1103 int ip6_dst_hoplimit(struct dst_entry *dst) 1104 { 1105 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1106 if (hoplimit < 0) { 1107 struct net_device *dev = dst->dev; 1108 struct inet6_dev *idev = in6_dev_get(dev); 1109 if (idev) { 1110 hoplimit = idev->cnf.hop_limit; 1111 in6_dev_put(idev); 1112 } else 1113 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1114 } 1115 return hoplimit; 1116 } 1117 1118 /* 1119 * 1120 */ 1121 1122 int ip6_route_add(struct fib6_config *cfg) 1123 { 1124 int err; 1125 struct net *net = cfg->fc_nlinfo.nl_net; 1126 struct rt6_info *rt = NULL; 1127 struct net_device *dev = NULL; 1128 struct inet6_dev *idev = NULL; 1129 struct fib6_table *table; 1130 int addr_type; 1131 1132 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1133 return -EINVAL; 1134 #ifndef CONFIG_IPV6_SUBTREES 1135 if (cfg->fc_src_len) 1136 return -EINVAL; 1137 #endif 1138 if (cfg->fc_ifindex) { 1139 err = -ENODEV; 1140 dev = dev_get_by_index(net, cfg->fc_ifindex); 1141 if (!dev) 1142 goto out; 1143 idev = in6_dev_get(dev); 1144 if (!idev) 1145 goto out; 1146 } 1147 1148 if (cfg->fc_metric == 0) 1149 cfg->fc_metric = IP6_RT_PRIO_USER; 1150 1151 table = fib6_new_table(net, cfg->fc_table); 1152 if (table == NULL) { 1153 err = -ENOBUFS; 1154 goto out; 1155 } 1156 1157 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1158 1159 if (rt == NULL) { 1160 err = -ENOMEM; 1161 goto out; 1162 } 1163 1164 rt->u.dst.obsolete = -1; 1165 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1166 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1167 0; 1168 1169 if (cfg->fc_protocol == RTPROT_UNSPEC) 1170 cfg->fc_protocol = RTPROT_BOOT; 1171 rt->rt6i_protocol = cfg->fc_protocol; 1172 1173 addr_type = ipv6_addr_type(&cfg->fc_dst); 1174 1175 if (addr_type & IPV6_ADDR_MULTICAST) 1176 rt->u.dst.input = ip6_mc_input; 1177 else 1178 rt->u.dst.input = ip6_forward; 1179 1180 rt->u.dst.output = ip6_output; 1181 1182 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1183 rt->rt6i_dst.plen = cfg->fc_dst_len; 1184 if (rt->rt6i_dst.plen == 128) 1185 rt->u.dst.flags = DST_HOST; 1186 1187 #ifdef CONFIG_IPV6_SUBTREES 1188 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1189 rt->rt6i_src.plen = cfg->fc_src_len; 1190 #endif 1191 1192 rt->rt6i_metric = cfg->fc_metric; 1193 1194 /* We cannot add true routes via loopback here, 1195 they would result in kernel looping; promote them to reject routes 1196 */ 1197 if ((cfg->fc_flags & RTF_REJECT) || 1198 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1199 /* hold loopback dev/idev if we haven't done so. */ 1200 if (dev != net->loopback_dev) { 1201 if (dev) { 1202 dev_put(dev); 1203 in6_dev_put(idev); 1204 } 1205 dev = net->loopback_dev; 1206 dev_hold(dev); 1207 idev = in6_dev_get(dev); 1208 if (!idev) { 1209 err = -ENODEV; 1210 goto out; 1211 } 1212 } 1213 rt->u.dst.output = ip6_pkt_discard_out; 1214 rt->u.dst.input = ip6_pkt_discard; 1215 rt->u.dst.error = -ENETUNREACH; 1216 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1217 goto install_route; 1218 } 1219 1220 if (cfg->fc_flags & RTF_GATEWAY) { 1221 struct in6_addr *gw_addr; 1222 int gwa_type; 1223 1224 gw_addr = &cfg->fc_gateway; 1225 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1226 gwa_type = ipv6_addr_type(gw_addr); 1227 1228 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1229 struct rt6_info *grt; 1230 1231 /* IPv6 strictly inhibits using not link-local 1232 addresses as nexthop address. 1233 Otherwise, router will not able to send redirects. 1234 It is very good, but in some (rare!) circumstances 1235 (SIT, PtP, NBMA NOARP links) it is handy to allow 1236 some exceptions. --ANK 1237 */ 1238 err = -EINVAL; 1239 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1240 goto out; 1241 1242 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1243 1244 err = -EHOSTUNREACH; 1245 if (grt == NULL) 1246 goto out; 1247 if (dev) { 1248 if (dev != grt->rt6i_dev) { 1249 dst_release(&grt->u.dst); 1250 goto out; 1251 } 1252 } else { 1253 dev = grt->rt6i_dev; 1254 idev = grt->rt6i_idev; 1255 dev_hold(dev); 1256 in6_dev_hold(grt->rt6i_idev); 1257 } 1258 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1259 err = 0; 1260 dst_release(&grt->u.dst); 1261 1262 if (err) 1263 goto out; 1264 } 1265 err = -EINVAL; 1266 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1267 goto out; 1268 } 1269 1270 err = -ENODEV; 1271 if (dev == NULL) 1272 goto out; 1273 1274 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1275 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1276 if (IS_ERR(rt->rt6i_nexthop)) { 1277 err = PTR_ERR(rt->rt6i_nexthop); 1278 rt->rt6i_nexthop = NULL; 1279 goto out; 1280 } 1281 } 1282 1283 rt->rt6i_flags = cfg->fc_flags; 1284 1285 install_route: 1286 if (cfg->fc_mx) { 1287 struct nlattr *nla; 1288 int remaining; 1289 1290 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1291 int type = nla_type(nla); 1292 1293 if (type) { 1294 if (type > RTAX_MAX) { 1295 err = -EINVAL; 1296 goto out; 1297 } 1298 1299 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1300 } 1301 } 1302 } 1303 1304 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1305 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1306 if (!dst_mtu(&rt->u.dst)) 1307 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1308 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) 1309 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1310 rt->u.dst.dev = dev; 1311 rt->rt6i_idev = idev; 1312 rt->rt6i_table = table; 1313 1314 cfg->fc_nlinfo.nl_net = dev_net(dev); 1315 1316 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1317 1318 out: 1319 if (dev) 1320 dev_put(dev); 1321 if (idev) 1322 in6_dev_put(idev); 1323 if (rt) 1324 dst_free(&rt->u.dst); 1325 return err; 1326 } 1327 1328 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1329 { 1330 int err; 1331 struct fib6_table *table; 1332 struct net *net = dev_net(rt->rt6i_dev); 1333 1334 if (rt == net->ipv6.ip6_null_entry) 1335 return -ENOENT; 1336 1337 table = rt->rt6i_table; 1338 write_lock_bh(&table->tb6_lock); 1339 1340 err = fib6_del(rt, info); 1341 dst_release(&rt->u.dst); 1342 1343 write_unlock_bh(&table->tb6_lock); 1344 1345 return err; 1346 } 1347 1348 int ip6_del_rt(struct rt6_info *rt) 1349 { 1350 struct nl_info info = { 1351 .nl_net = dev_net(rt->rt6i_dev), 1352 }; 1353 return __ip6_del_rt(rt, &info); 1354 } 1355 1356 static int ip6_route_del(struct fib6_config *cfg) 1357 { 1358 struct fib6_table *table; 1359 struct fib6_node *fn; 1360 struct rt6_info *rt; 1361 int err = -ESRCH; 1362 1363 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1364 if (table == NULL) 1365 return err; 1366 1367 read_lock_bh(&table->tb6_lock); 1368 1369 fn = fib6_locate(&table->tb6_root, 1370 &cfg->fc_dst, cfg->fc_dst_len, 1371 &cfg->fc_src, cfg->fc_src_len); 1372 1373 if (fn) { 1374 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1375 if (cfg->fc_ifindex && 1376 (rt->rt6i_dev == NULL || 1377 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1378 continue; 1379 if (cfg->fc_flags & RTF_GATEWAY && 1380 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1381 continue; 1382 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1383 continue; 1384 dst_hold(&rt->u.dst); 1385 read_unlock_bh(&table->tb6_lock); 1386 1387 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1388 } 1389 } 1390 read_unlock_bh(&table->tb6_lock); 1391 1392 return err; 1393 } 1394 1395 /* 1396 * Handle redirects 1397 */ 1398 struct ip6rd_flowi { 1399 struct flowi fl; 1400 struct in6_addr gateway; 1401 }; 1402 1403 static struct rt6_info *__ip6_route_redirect(struct net *net, 1404 struct fib6_table *table, 1405 struct flowi *fl, 1406 int flags) 1407 { 1408 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1409 struct rt6_info *rt; 1410 struct fib6_node *fn; 1411 1412 /* 1413 * Get the "current" route for this destination and 1414 * check if the redirect has come from approriate router. 1415 * 1416 * RFC 2461 specifies that redirects should only be 1417 * accepted if they come from the nexthop to the target. 1418 * Due to the way the routes are chosen, this notion 1419 * is a bit fuzzy and one might need to check all possible 1420 * routes. 1421 */ 1422 1423 read_lock_bh(&table->tb6_lock); 1424 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1425 restart: 1426 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1427 /* 1428 * Current route is on-link; redirect is always invalid. 1429 * 1430 * Seems, previous statement is not true. It could 1431 * be node, which looks for us as on-link (f.e. proxy ndisc) 1432 * But then router serving it might decide, that we should 1433 * know truth 8)8) --ANK (980726). 1434 */ 1435 if (rt6_check_expired(rt)) 1436 continue; 1437 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1438 continue; 1439 if (fl->oif != rt->rt6i_dev->ifindex) 1440 continue; 1441 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1442 continue; 1443 break; 1444 } 1445 1446 if (!rt) 1447 rt = net->ipv6.ip6_null_entry; 1448 BACKTRACK(net, &fl->fl6_src); 1449 out: 1450 dst_hold(&rt->u.dst); 1451 1452 read_unlock_bh(&table->tb6_lock); 1453 1454 return rt; 1455 }; 1456 1457 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1458 struct in6_addr *src, 1459 struct in6_addr *gateway, 1460 struct net_device *dev) 1461 { 1462 int flags = RT6_LOOKUP_F_HAS_SADDR; 1463 struct net *net = dev_net(dev); 1464 struct ip6rd_flowi rdfl = { 1465 .fl = { 1466 .oif = dev->ifindex, 1467 .nl_u = { 1468 .ip6_u = { 1469 .daddr = *dest, 1470 .saddr = *src, 1471 }, 1472 }, 1473 }, 1474 }; 1475 1476 ipv6_addr_copy(&rdfl.gateway, gateway); 1477 1478 if (rt6_need_strict(dest)) 1479 flags |= RT6_LOOKUP_F_IFACE; 1480 1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1482 flags, __ip6_route_redirect); 1483 } 1484 1485 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1486 struct in6_addr *saddr, 1487 struct neighbour *neigh, u8 *lladdr, int on_link) 1488 { 1489 struct rt6_info *rt, *nrt = NULL; 1490 struct netevent_redirect netevent; 1491 struct net *net = dev_net(neigh->dev); 1492 1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1494 1495 if (rt == net->ipv6.ip6_null_entry) { 1496 if (net_ratelimit()) 1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1498 "for redirect target\n"); 1499 goto out; 1500 } 1501 1502 /* 1503 * We have finally decided to accept it. 1504 */ 1505 1506 neigh_update(neigh, lladdr, NUD_STALE, 1507 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1508 NEIGH_UPDATE_F_OVERRIDE| 1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1510 NEIGH_UPDATE_F_ISROUTER)) 1511 ); 1512 1513 /* 1514 * Redirect received -> path was valid. 1515 * Look, redirects are sent only in response to data packets, 1516 * so that this nexthop apparently is reachable. --ANK 1517 */ 1518 dst_confirm(&rt->u.dst); 1519 1520 /* Duplicate redirect: silently ignore. */ 1521 if (neigh == rt->u.dst.neighbour) 1522 goto out; 1523 1524 nrt = ip6_rt_copy(rt); 1525 if (nrt == NULL) 1526 goto out; 1527 1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1529 if (on_link) 1530 nrt->rt6i_flags &= ~RTF_GATEWAY; 1531 1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1533 nrt->rt6i_dst.plen = 128; 1534 nrt->u.dst.flags |= DST_HOST; 1535 1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1537 nrt->rt6i_nexthop = neigh_clone(neigh); 1538 /* Reset pmtu, it may be better */ 1539 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1540 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1541 dst_mtu(&nrt->u.dst)); 1542 1543 if (ip6_ins_rt(nrt)) 1544 goto out; 1545 1546 netevent.old = &rt->u.dst; 1547 netevent.new = &nrt->u.dst; 1548 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1549 1550 if (rt->rt6i_flags&RTF_CACHE) { 1551 ip6_del_rt(rt); 1552 return; 1553 } 1554 1555 out: 1556 dst_release(&rt->u.dst); 1557 return; 1558 } 1559 1560 /* 1561 * Handle ICMP "packet too big" messages 1562 * i.e. Path MTU discovery 1563 */ 1564 1565 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1566 struct net_device *dev, u32 pmtu) 1567 { 1568 struct rt6_info *rt, *nrt; 1569 struct net *net = dev_net(dev); 1570 int allfrag = 0; 1571 1572 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); 1573 if (rt == NULL) 1574 return; 1575 1576 if (pmtu >= dst_mtu(&rt->u.dst)) 1577 goto out; 1578 1579 if (pmtu < IPV6_MIN_MTU) { 1580 /* 1581 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1582 * MTU (1280) and a fragment header should always be included 1583 * after a node receiving Too Big message reporting PMTU is 1584 * less than the IPv6 Minimum Link MTU. 1585 */ 1586 pmtu = IPV6_MIN_MTU; 1587 allfrag = 1; 1588 } 1589 1590 /* New mtu received -> path was valid. 1591 They are sent only in response to data packets, 1592 so that this nexthop apparently is reachable. --ANK 1593 */ 1594 dst_confirm(&rt->u.dst); 1595 1596 /* Host route. If it is static, it would be better 1597 not to override it, but add new one, so that 1598 when cache entry will expire old pmtu 1599 would return automatically. 1600 */ 1601 if (rt->rt6i_flags & RTF_CACHE) { 1602 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1603 if (allfrag) 1604 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1605 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1606 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1607 goto out; 1608 } 1609 1610 /* Network route. 1611 Two cases are possible: 1612 1. It is connected route. Action: COW 1613 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1614 */ 1615 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1616 nrt = rt6_alloc_cow(rt, daddr, saddr); 1617 else 1618 nrt = rt6_alloc_clone(rt, daddr); 1619 1620 if (nrt) { 1621 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1622 if (allfrag) 1623 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1624 1625 /* According to RFC 1981, detecting PMTU increase shouldn't be 1626 * happened within 5 mins, the recommended timer is 10 mins. 1627 * Here this route expiration time is set to ip6_rt_mtu_expires 1628 * which is 10 mins. After 10 mins the decreased pmtu is expired 1629 * and detecting PMTU increase will be automatically happened. 1630 */ 1631 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1632 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1633 1634 ip6_ins_rt(nrt); 1635 } 1636 out: 1637 dst_release(&rt->u.dst); 1638 } 1639 1640 /* 1641 * Misc support functions 1642 */ 1643 1644 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1645 { 1646 struct net *net = dev_net(ort->rt6i_dev); 1647 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1648 1649 if (rt) { 1650 rt->u.dst.input = ort->u.dst.input; 1651 rt->u.dst.output = ort->u.dst.output; 1652 1653 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1654 rt->u.dst.error = ort->u.dst.error; 1655 rt->u.dst.dev = ort->u.dst.dev; 1656 if (rt->u.dst.dev) 1657 dev_hold(rt->u.dst.dev); 1658 rt->rt6i_idev = ort->rt6i_idev; 1659 if (rt->rt6i_idev) 1660 in6_dev_hold(rt->rt6i_idev); 1661 rt->u.dst.lastuse = jiffies; 1662 rt->rt6i_expires = 0; 1663 1664 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1665 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1666 rt->rt6i_metric = 0; 1667 1668 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1669 #ifdef CONFIG_IPV6_SUBTREES 1670 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1671 #endif 1672 rt->rt6i_table = ort->rt6i_table; 1673 } 1674 return rt; 1675 } 1676 1677 #ifdef CONFIG_IPV6_ROUTE_INFO 1678 static struct rt6_info *rt6_get_route_info(struct net *net, 1679 struct in6_addr *prefix, int prefixlen, 1680 struct in6_addr *gwaddr, int ifindex) 1681 { 1682 struct fib6_node *fn; 1683 struct rt6_info *rt = NULL; 1684 struct fib6_table *table; 1685 1686 table = fib6_get_table(net, RT6_TABLE_INFO); 1687 if (table == NULL) 1688 return NULL; 1689 1690 write_lock_bh(&table->tb6_lock); 1691 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1692 if (!fn) 1693 goto out; 1694 1695 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1696 if (rt->rt6i_dev->ifindex != ifindex) 1697 continue; 1698 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1699 continue; 1700 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1701 continue; 1702 dst_hold(&rt->u.dst); 1703 break; 1704 } 1705 out: 1706 write_unlock_bh(&table->tb6_lock); 1707 return rt; 1708 } 1709 1710 static struct rt6_info *rt6_add_route_info(struct net *net, 1711 struct in6_addr *prefix, int prefixlen, 1712 struct in6_addr *gwaddr, int ifindex, 1713 unsigned pref) 1714 { 1715 struct fib6_config cfg = { 1716 .fc_table = RT6_TABLE_INFO, 1717 .fc_metric = IP6_RT_PRIO_USER, 1718 .fc_ifindex = ifindex, 1719 .fc_dst_len = prefixlen, 1720 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1721 RTF_UP | RTF_PREF(pref), 1722 .fc_nlinfo.pid = 0, 1723 .fc_nlinfo.nlh = NULL, 1724 .fc_nlinfo.nl_net = net, 1725 }; 1726 1727 ipv6_addr_copy(&cfg.fc_dst, prefix); 1728 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1729 1730 /* We should treat it as a default route if prefix length is 0. */ 1731 if (!prefixlen) 1732 cfg.fc_flags |= RTF_DEFAULT; 1733 1734 ip6_route_add(&cfg); 1735 1736 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1737 } 1738 #endif 1739 1740 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1741 { 1742 struct rt6_info *rt; 1743 struct fib6_table *table; 1744 1745 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1746 if (table == NULL) 1747 return NULL; 1748 1749 write_lock_bh(&table->tb6_lock); 1750 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1751 if (dev == rt->rt6i_dev && 1752 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1753 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1754 break; 1755 } 1756 if (rt) 1757 dst_hold(&rt->u.dst); 1758 write_unlock_bh(&table->tb6_lock); 1759 return rt; 1760 } 1761 1762 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1763 struct net_device *dev, 1764 unsigned int pref) 1765 { 1766 struct fib6_config cfg = { 1767 .fc_table = RT6_TABLE_DFLT, 1768 .fc_metric = IP6_RT_PRIO_USER, 1769 .fc_ifindex = dev->ifindex, 1770 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1771 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1772 .fc_nlinfo.pid = 0, 1773 .fc_nlinfo.nlh = NULL, 1774 .fc_nlinfo.nl_net = dev_net(dev), 1775 }; 1776 1777 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1778 1779 ip6_route_add(&cfg); 1780 1781 return rt6_get_dflt_router(gwaddr, dev); 1782 } 1783 1784 void rt6_purge_dflt_routers(struct net *net) 1785 { 1786 struct rt6_info *rt; 1787 struct fib6_table *table; 1788 1789 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1790 table = fib6_get_table(net, RT6_TABLE_DFLT); 1791 if (table == NULL) 1792 return; 1793 1794 restart: 1795 read_lock_bh(&table->tb6_lock); 1796 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1797 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1798 dst_hold(&rt->u.dst); 1799 read_unlock_bh(&table->tb6_lock); 1800 ip6_del_rt(rt); 1801 goto restart; 1802 } 1803 } 1804 read_unlock_bh(&table->tb6_lock); 1805 } 1806 1807 static void rtmsg_to_fib6_config(struct net *net, 1808 struct in6_rtmsg *rtmsg, 1809 struct fib6_config *cfg) 1810 { 1811 memset(cfg, 0, sizeof(*cfg)); 1812 1813 cfg->fc_table = RT6_TABLE_MAIN; 1814 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1815 cfg->fc_metric = rtmsg->rtmsg_metric; 1816 cfg->fc_expires = rtmsg->rtmsg_info; 1817 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1818 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1819 cfg->fc_flags = rtmsg->rtmsg_flags; 1820 1821 cfg->fc_nlinfo.nl_net = net; 1822 1823 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1824 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1825 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1826 } 1827 1828 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1829 { 1830 struct fib6_config cfg; 1831 struct in6_rtmsg rtmsg; 1832 int err; 1833 1834 switch(cmd) { 1835 case SIOCADDRT: /* Add a route */ 1836 case SIOCDELRT: /* Delete a route */ 1837 if (!capable(CAP_NET_ADMIN)) 1838 return -EPERM; 1839 err = copy_from_user(&rtmsg, arg, 1840 sizeof(struct in6_rtmsg)); 1841 if (err) 1842 return -EFAULT; 1843 1844 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1845 1846 rtnl_lock(); 1847 switch (cmd) { 1848 case SIOCADDRT: 1849 err = ip6_route_add(&cfg); 1850 break; 1851 case SIOCDELRT: 1852 err = ip6_route_del(&cfg); 1853 break; 1854 default: 1855 err = -EINVAL; 1856 } 1857 rtnl_unlock(); 1858 1859 return err; 1860 } 1861 1862 return -EINVAL; 1863 } 1864 1865 /* 1866 * Drop the packet on the floor 1867 */ 1868 1869 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1870 { 1871 int type; 1872 struct dst_entry *dst = skb_dst(skb); 1873 switch (ipstats_mib_noroutes) { 1874 case IPSTATS_MIB_INNOROUTES: 1875 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1876 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { 1877 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1878 IPSTATS_MIB_INADDRERRORS); 1879 break; 1880 } 1881 /* FALLTHROUGH */ 1882 case IPSTATS_MIB_OUTNOROUTES: 1883 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1884 ipstats_mib_noroutes); 1885 break; 1886 } 1887 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); 1888 kfree_skb(skb); 1889 return 0; 1890 } 1891 1892 static int ip6_pkt_discard(struct sk_buff *skb) 1893 { 1894 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1895 } 1896 1897 static int ip6_pkt_discard_out(struct sk_buff *skb) 1898 { 1899 skb->dev = skb_dst(skb)->dev; 1900 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1901 } 1902 1903 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1904 1905 static int ip6_pkt_prohibit(struct sk_buff *skb) 1906 { 1907 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1908 } 1909 1910 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1911 { 1912 skb->dev = skb_dst(skb)->dev; 1913 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1914 } 1915 1916 #endif 1917 1918 /* 1919 * Allocate a dst for local (unicast / anycast) address. 1920 */ 1921 1922 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1923 const struct in6_addr *addr, 1924 int anycast) 1925 { 1926 struct net *net = dev_net(idev->dev); 1927 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1928 struct neighbour *neigh; 1929 1930 if (rt == NULL) 1931 return ERR_PTR(-ENOMEM); 1932 1933 dev_hold(net->loopback_dev); 1934 in6_dev_hold(idev); 1935 1936 rt->u.dst.flags = DST_HOST; 1937 rt->u.dst.input = ip6_input; 1938 rt->u.dst.output = ip6_output; 1939 rt->rt6i_dev = net->loopback_dev; 1940 rt->rt6i_idev = idev; 1941 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1942 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1943 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1944 rt->u.dst.obsolete = -1; 1945 1946 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1947 if (anycast) 1948 rt->rt6i_flags |= RTF_ANYCAST; 1949 else 1950 rt->rt6i_flags |= RTF_LOCAL; 1951 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1952 if (IS_ERR(neigh)) { 1953 dst_free(&rt->u.dst); 1954 1955 /* We are casting this because that is the return 1956 * value type. But an errno encoded pointer is the 1957 * same regardless of the underlying pointer type, 1958 * and that's what we are returning. So this is OK. 1959 */ 1960 return (struct rt6_info *) neigh; 1961 } 1962 rt->rt6i_nexthop = neigh; 1963 1964 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1965 rt->rt6i_dst.plen = 128; 1966 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1967 1968 atomic_set(&rt->u.dst.__refcnt, 1); 1969 1970 return rt; 1971 } 1972 1973 struct arg_dev_net { 1974 struct net_device *dev; 1975 struct net *net; 1976 }; 1977 1978 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1979 { 1980 struct net_device *dev = ((struct arg_dev_net *)arg)->dev; 1981 struct net *net = ((struct arg_dev_net *)arg)->net; 1982 1983 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 1984 rt != net->ipv6.ip6_null_entry) { 1985 RT6_TRACE("deleted by ifdown %p\n", rt); 1986 return -1; 1987 } 1988 return 0; 1989 } 1990 1991 void rt6_ifdown(struct net *net, struct net_device *dev) 1992 { 1993 struct arg_dev_net adn = { 1994 .dev = dev, 1995 .net = net, 1996 }; 1997 1998 fib6_clean_all(net, fib6_ifdown, 0, &adn); 1999 icmp6_clean_all(fib6_ifdown, &adn); 2000 } 2001 2002 struct rt6_mtu_change_arg 2003 { 2004 struct net_device *dev; 2005 unsigned mtu; 2006 }; 2007 2008 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2009 { 2010 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2011 struct inet6_dev *idev; 2012 struct net *net = dev_net(arg->dev); 2013 2014 /* In IPv6 pmtu discovery is not optional, 2015 so that RTAX_MTU lock cannot disable it. 2016 We still use this lock to block changes 2017 caused by addrconf/ndisc. 2018 */ 2019 2020 idev = __in6_dev_get(arg->dev); 2021 if (idev == NULL) 2022 return 0; 2023 2024 /* For administrative MTU increase, there is no way to discover 2025 IPv6 PMTU increase, so PMTU increase should be updated here. 2026 Since RFC 1981 doesn't include administrative MTU increase 2027 update PMTU increase is a MUST. (i.e. jumbo frame) 2028 */ 2029 /* 2030 If new MTU is less than route PMTU, this new MTU will be the 2031 lowest MTU in the path, update the route PMTU to reflect PMTU 2032 decreases; if new MTU is greater than route PMTU, and the 2033 old MTU is the lowest MTU in the path, update the route PMTU 2034 to reflect the increase. In this case if the other nodes' MTU 2035 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2036 PMTU discouvery. 2037 */ 2038 if (rt->rt6i_dev == arg->dev && 2039 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 2040 (dst_mtu(&rt->u.dst) >= arg->mtu || 2041 (dst_mtu(&rt->u.dst) < arg->mtu && 2042 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 2043 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 2044 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2045 } 2046 return 0; 2047 } 2048 2049 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2050 { 2051 struct rt6_mtu_change_arg arg = { 2052 .dev = dev, 2053 .mtu = mtu, 2054 }; 2055 2056 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2057 } 2058 2059 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2060 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2061 [RTA_OIF] = { .type = NLA_U32 }, 2062 [RTA_IIF] = { .type = NLA_U32 }, 2063 [RTA_PRIORITY] = { .type = NLA_U32 }, 2064 [RTA_METRICS] = { .type = NLA_NESTED }, 2065 }; 2066 2067 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2068 struct fib6_config *cfg) 2069 { 2070 struct rtmsg *rtm; 2071 struct nlattr *tb[RTA_MAX+1]; 2072 int err; 2073 2074 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2075 if (err < 0) 2076 goto errout; 2077 2078 err = -EINVAL; 2079 rtm = nlmsg_data(nlh); 2080 memset(cfg, 0, sizeof(*cfg)); 2081 2082 cfg->fc_table = rtm->rtm_table; 2083 cfg->fc_dst_len = rtm->rtm_dst_len; 2084 cfg->fc_src_len = rtm->rtm_src_len; 2085 cfg->fc_flags = RTF_UP; 2086 cfg->fc_protocol = rtm->rtm_protocol; 2087 2088 if (rtm->rtm_type == RTN_UNREACHABLE) 2089 cfg->fc_flags |= RTF_REJECT; 2090 2091 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2092 cfg->fc_nlinfo.nlh = nlh; 2093 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2094 2095 if (tb[RTA_GATEWAY]) { 2096 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2097 cfg->fc_flags |= RTF_GATEWAY; 2098 } 2099 2100 if (tb[RTA_DST]) { 2101 int plen = (rtm->rtm_dst_len + 7) >> 3; 2102 2103 if (nla_len(tb[RTA_DST]) < plen) 2104 goto errout; 2105 2106 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2107 } 2108 2109 if (tb[RTA_SRC]) { 2110 int plen = (rtm->rtm_src_len + 7) >> 3; 2111 2112 if (nla_len(tb[RTA_SRC]) < plen) 2113 goto errout; 2114 2115 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2116 } 2117 2118 if (tb[RTA_OIF]) 2119 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2120 2121 if (tb[RTA_PRIORITY]) 2122 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2123 2124 if (tb[RTA_METRICS]) { 2125 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2126 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2127 } 2128 2129 if (tb[RTA_TABLE]) 2130 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2131 2132 err = 0; 2133 errout: 2134 return err; 2135 } 2136 2137 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2138 { 2139 struct fib6_config cfg; 2140 int err; 2141 2142 err = rtm_to_fib6_config(skb, nlh, &cfg); 2143 if (err < 0) 2144 return err; 2145 2146 return ip6_route_del(&cfg); 2147 } 2148 2149 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2150 { 2151 struct fib6_config cfg; 2152 int err; 2153 2154 err = rtm_to_fib6_config(skb, nlh, &cfg); 2155 if (err < 0) 2156 return err; 2157 2158 return ip6_route_add(&cfg); 2159 } 2160 2161 static inline size_t rt6_nlmsg_size(void) 2162 { 2163 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2164 + nla_total_size(16) /* RTA_SRC */ 2165 + nla_total_size(16) /* RTA_DST */ 2166 + nla_total_size(16) /* RTA_GATEWAY */ 2167 + nla_total_size(16) /* RTA_PREFSRC */ 2168 + nla_total_size(4) /* RTA_TABLE */ 2169 + nla_total_size(4) /* RTA_IIF */ 2170 + nla_total_size(4) /* RTA_OIF */ 2171 + nla_total_size(4) /* RTA_PRIORITY */ 2172 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2173 + nla_total_size(sizeof(struct rta_cacheinfo)); 2174 } 2175 2176 static int rt6_fill_node(struct net *net, 2177 struct sk_buff *skb, struct rt6_info *rt, 2178 struct in6_addr *dst, struct in6_addr *src, 2179 int iif, int type, u32 pid, u32 seq, 2180 int prefix, int nowait, unsigned int flags) 2181 { 2182 struct rtmsg *rtm; 2183 struct nlmsghdr *nlh; 2184 long expires; 2185 u32 table; 2186 2187 if (prefix) { /* user wants prefix routes only */ 2188 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2189 /* success since this is not a prefix route */ 2190 return 1; 2191 } 2192 } 2193 2194 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2195 if (nlh == NULL) 2196 return -EMSGSIZE; 2197 2198 rtm = nlmsg_data(nlh); 2199 rtm->rtm_family = AF_INET6; 2200 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2201 rtm->rtm_src_len = rt->rt6i_src.plen; 2202 rtm->rtm_tos = 0; 2203 if (rt->rt6i_table) 2204 table = rt->rt6i_table->tb6_id; 2205 else 2206 table = RT6_TABLE_UNSPEC; 2207 rtm->rtm_table = table; 2208 NLA_PUT_U32(skb, RTA_TABLE, table); 2209 if (rt->rt6i_flags&RTF_REJECT) 2210 rtm->rtm_type = RTN_UNREACHABLE; 2211 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2212 rtm->rtm_type = RTN_LOCAL; 2213 else 2214 rtm->rtm_type = RTN_UNICAST; 2215 rtm->rtm_flags = 0; 2216 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2217 rtm->rtm_protocol = rt->rt6i_protocol; 2218 if (rt->rt6i_flags&RTF_DYNAMIC) 2219 rtm->rtm_protocol = RTPROT_REDIRECT; 2220 else if (rt->rt6i_flags & RTF_ADDRCONF) 2221 rtm->rtm_protocol = RTPROT_KERNEL; 2222 else if (rt->rt6i_flags&RTF_DEFAULT) 2223 rtm->rtm_protocol = RTPROT_RA; 2224 2225 if (rt->rt6i_flags&RTF_CACHE) 2226 rtm->rtm_flags |= RTM_F_CLONED; 2227 2228 if (dst) { 2229 NLA_PUT(skb, RTA_DST, 16, dst); 2230 rtm->rtm_dst_len = 128; 2231 } else if (rtm->rtm_dst_len) 2232 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2233 #ifdef CONFIG_IPV6_SUBTREES 2234 if (src) { 2235 NLA_PUT(skb, RTA_SRC, 16, src); 2236 rtm->rtm_src_len = 128; 2237 } else if (rtm->rtm_src_len) 2238 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2239 #endif 2240 if (iif) { 2241 #ifdef CONFIG_IPV6_MROUTE 2242 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2243 int err = ip6mr_get_route(net, skb, rtm, nowait); 2244 if (err <= 0) { 2245 if (!nowait) { 2246 if (err == 0) 2247 return 0; 2248 goto nla_put_failure; 2249 } else { 2250 if (err == -EMSGSIZE) 2251 goto nla_put_failure; 2252 } 2253 } 2254 } else 2255 #endif 2256 NLA_PUT_U32(skb, RTA_IIF, iif); 2257 } else if (dst) { 2258 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); 2259 struct in6_addr saddr_buf; 2260 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2261 dst, 0, &saddr_buf) == 0) 2262 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2263 } 2264 2265 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2266 goto nla_put_failure; 2267 2268 if (rt->u.dst.neighbour) 2269 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2270 2271 if (rt->u.dst.dev) 2272 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2273 2274 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2275 2276 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2277 expires = 0; 2278 else if (rt->rt6i_expires - jiffies < INT_MAX) 2279 expires = rt->rt6i_expires - jiffies; 2280 else 2281 expires = INT_MAX; 2282 2283 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2284 expires, rt->u.dst.error) < 0) 2285 goto nla_put_failure; 2286 2287 return nlmsg_end(skb, nlh); 2288 2289 nla_put_failure: 2290 nlmsg_cancel(skb, nlh); 2291 return -EMSGSIZE; 2292 } 2293 2294 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2295 { 2296 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2297 int prefix; 2298 2299 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2300 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2301 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2302 } else 2303 prefix = 0; 2304 2305 return rt6_fill_node(arg->net, 2306 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2307 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2308 prefix, 0, NLM_F_MULTI); 2309 } 2310 2311 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2312 { 2313 struct net *net = sock_net(in_skb->sk); 2314 struct nlattr *tb[RTA_MAX+1]; 2315 struct rt6_info *rt; 2316 struct sk_buff *skb; 2317 struct rtmsg *rtm; 2318 struct flowi fl; 2319 int err, iif = 0; 2320 2321 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2322 if (err < 0) 2323 goto errout; 2324 2325 err = -EINVAL; 2326 memset(&fl, 0, sizeof(fl)); 2327 2328 if (tb[RTA_SRC]) { 2329 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2330 goto errout; 2331 2332 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2333 } 2334 2335 if (tb[RTA_DST]) { 2336 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2337 goto errout; 2338 2339 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2340 } 2341 2342 if (tb[RTA_IIF]) 2343 iif = nla_get_u32(tb[RTA_IIF]); 2344 2345 if (tb[RTA_OIF]) 2346 fl.oif = nla_get_u32(tb[RTA_OIF]); 2347 2348 if (iif) { 2349 struct net_device *dev; 2350 dev = __dev_get_by_index(net, iif); 2351 if (!dev) { 2352 err = -ENODEV; 2353 goto errout; 2354 } 2355 } 2356 2357 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2358 if (skb == NULL) { 2359 err = -ENOBUFS; 2360 goto errout; 2361 } 2362 2363 /* Reserve room for dummy headers, this skb can pass 2364 through good chunk of routing engine. 2365 */ 2366 skb_reset_mac_header(skb); 2367 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2368 2369 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2370 skb_dst_set(skb, &rt->u.dst); 2371 2372 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2373 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2374 nlh->nlmsg_seq, 0, 0, 0); 2375 if (err < 0) { 2376 kfree_skb(skb); 2377 goto errout; 2378 } 2379 2380 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2381 errout: 2382 return err; 2383 } 2384 2385 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2386 { 2387 struct sk_buff *skb; 2388 struct net *net = info->nl_net; 2389 u32 seq; 2390 int err; 2391 2392 err = -ENOBUFS; 2393 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2394 2395 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2396 if (skb == NULL) 2397 goto errout; 2398 2399 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2400 event, info->pid, seq, 0, 0, 0); 2401 if (err < 0) { 2402 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2403 WARN_ON(err == -EMSGSIZE); 2404 kfree_skb(skb); 2405 goto errout; 2406 } 2407 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2408 info->nlh, gfp_any()); 2409 return; 2410 errout: 2411 if (err < 0) 2412 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2413 } 2414 2415 static int ip6_route_dev_notify(struct notifier_block *this, 2416 unsigned long event, void *data) 2417 { 2418 struct net_device *dev = (struct net_device *)data; 2419 struct net *net = dev_net(dev); 2420 2421 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2422 net->ipv6.ip6_null_entry->u.dst.dev = dev; 2423 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2424 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2425 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; 2426 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2427 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; 2428 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2429 #endif 2430 } 2431 2432 return NOTIFY_OK; 2433 } 2434 2435 /* 2436 * /proc 2437 */ 2438 2439 #ifdef CONFIG_PROC_FS 2440 2441 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2442 2443 struct rt6_proc_arg 2444 { 2445 char *buffer; 2446 int offset; 2447 int length; 2448 int skip; 2449 int len; 2450 }; 2451 2452 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2453 { 2454 struct seq_file *m = p_arg; 2455 2456 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2457 2458 #ifdef CONFIG_IPV6_SUBTREES 2459 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2460 #else 2461 seq_puts(m, "00000000000000000000000000000000 00 "); 2462 #endif 2463 2464 if (rt->rt6i_nexthop) { 2465 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2466 } else { 2467 seq_puts(m, "00000000000000000000000000000000"); 2468 } 2469 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2470 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2471 rt->u.dst.__use, rt->rt6i_flags, 2472 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2473 return 0; 2474 } 2475 2476 static int ipv6_route_show(struct seq_file *m, void *v) 2477 { 2478 struct net *net = (struct net *)m->private; 2479 fib6_clean_all(net, rt6_info_route, 0, m); 2480 return 0; 2481 } 2482 2483 static int ipv6_route_open(struct inode *inode, struct file *file) 2484 { 2485 return single_open_net(inode, file, ipv6_route_show); 2486 } 2487 2488 static const struct file_operations ipv6_route_proc_fops = { 2489 .owner = THIS_MODULE, 2490 .open = ipv6_route_open, 2491 .read = seq_read, 2492 .llseek = seq_lseek, 2493 .release = single_release_net, 2494 }; 2495 2496 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2497 { 2498 struct net *net = (struct net *)seq->private; 2499 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2500 net->ipv6.rt6_stats->fib_nodes, 2501 net->ipv6.rt6_stats->fib_route_nodes, 2502 net->ipv6.rt6_stats->fib_rt_alloc, 2503 net->ipv6.rt6_stats->fib_rt_entries, 2504 net->ipv6.rt6_stats->fib_rt_cache, 2505 atomic_read(&net->ipv6.ip6_dst_ops.entries), 2506 net->ipv6.rt6_stats->fib_discarded_routes); 2507 2508 return 0; 2509 } 2510 2511 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2512 { 2513 return single_open_net(inode, file, rt6_stats_seq_show); 2514 } 2515 2516 static const struct file_operations rt6_stats_seq_fops = { 2517 .owner = THIS_MODULE, 2518 .open = rt6_stats_seq_open, 2519 .read = seq_read, 2520 .llseek = seq_lseek, 2521 .release = single_release_net, 2522 }; 2523 #endif /* CONFIG_PROC_FS */ 2524 2525 #ifdef CONFIG_SYSCTL 2526 2527 static 2528 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2529 void __user *buffer, size_t *lenp, loff_t *ppos) 2530 { 2531 struct net *net = current->nsproxy->net_ns; 2532 int delay = net->ipv6.sysctl.flush_delay; 2533 if (write) { 2534 proc_dointvec(ctl, write, buffer, lenp, ppos); 2535 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2536 return 0; 2537 } else 2538 return -EINVAL; 2539 } 2540 2541 ctl_table ipv6_route_table_template[] = { 2542 { 2543 .procname = "flush", 2544 .data = &init_net.ipv6.sysctl.flush_delay, 2545 .maxlen = sizeof(int), 2546 .mode = 0200, 2547 .proc_handler = ipv6_sysctl_rtcache_flush 2548 }, 2549 { 2550 .procname = "gc_thresh", 2551 .data = &ip6_dst_ops_template.gc_thresh, 2552 .maxlen = sizeof(int), 2553 .mode = 0644, 2554 .proc_handler = proc_dointvec, 2555 }, 2556 { 2557 .procname = "max_size", 2558 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2559 .maxlen = sizeof(int), 2560 .mode = 0644, 2561 .proc_handler = proc_dointvec, 2562 }, 2563 { 2564 .procname = "gc_min_interval", 2565 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2566 .maxlen = sizeof(int), 2567 .mode = 0644, 2568 .proc_handler = proc_dointvec_jiffies, 2569 }, 2570 { 2571 .procname = "gc_timeout", 2572 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2573 .maxlen = sizeof(int), 2574 .mode = 0644, 2575 .proc_handler = proc_dointvec_jiffies, 2576 }, 2577 { 2578 .procname = "gc_interval", 2579 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2580 .maxlen = sizeof(int), 2581 .mode = 0644, 2582 .proc_handler = proc_dointvec_jiffies, 2583 }, 2584 { 2585 .procname = "gc_elasticity", 2586 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2587 .maxlen = sizeof(int), 2588 .mode = 0644, 2589 .proc_handler = proc_dointvec_jiffies, 2590 }, 2591 { 2592 .procname = "mtu_expires", 2593 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2594 .maxlen = sizeof(int), 2595 .mode = 0644, 2596 .proc_handler = proc_dointvec_jiffies, 2597 }, 2598 { 2599 .procname = "min_adv_mss", 2600 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2601 .maxlen = sizeof(int), 2602 .mode = 0644, 2603 .proc_handler = proc_dointvec_jiffies, 2604 }, 2605 { 2606 .procname = "gc_min_interval_ms", 2607 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2608 .maxlen = sizeof(int), 2609 .mode = 0644, 2610 .proc_handler = proc_dointvec_ms_jiffies, 2611 }, 2612 { } 2613 }; 2614 2615 struct ctl_table *ipv6_route_sysctl_init(struct net *net) 2616 { 2617 struct ctl_table *table; 2618 2619 table = kmemdup(ipv6_route_table_template, 2620 sizeof(ipv6_route_table_template), 2621 GFP_KERNEL); 2622 2623 if (table) { 2624 table[0].data = &net->ipv6.sysctl.flush_delay; 2625 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2626 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2627 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2628 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2629 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2630 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2631 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2632 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2633 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2634 } 2635 2636 return table; 2637 } 2638 #endif 2639 2640 static int ip6_route_net_init(struct net *net) 2641 { 2642 int ret = -ENOMEM; 2643 2644 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2645 sizeof(net->ipv6.ip6_dst_ops)); 2646 2647 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2648 sizeof(*net->ipv6.ip6_null_entry), 2649 GFP_KERNEL); 2650 if (!net->ipv6.ip6_null_entry) 2651 goto out_ip6_dst_ops; 2652 net->ipv6.ip6_null_entry->u.dst.path = 2653 (struct dst_entry *)net->ipv6.ip6_null_entry; 2654 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2655 2656 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2657 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2658 sizeof(*net->ipv6.ip6_prohibit_entry), 2659 GFP_KERNEL); 2660 if (!net->ipv6.ip6_prohibit_entry) 2661 goto out_ip6_null_entry; 2662 net->ipv6.ip6_prohibit_entry->u.dst.path = 2663 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2664 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2665 2666 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2667 sizeof(*net->ipv6.ip6_blk_hole_entry), 2668 GFP_KERNEL); 2669 if (!net->ipv6.ip6_blk_hole_entry) 2670 goto out_ip6_prohibit_entry; 2671 net->ipv6.ip6_blk_hole_entry->u.dst.path = 2672 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2673 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops; 2674 #endif 2675 2676 net->ipv6.sysctl.flush_delay = 0; 2677 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2678 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2679 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2680 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2681 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2682 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2683 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2684 2685 #ifdef CONFIG_PROC_FS 2686 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2687 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2688 #endif 2689 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2690 2691 ret = 0; 2692 out: 2693 return ret; 2694 2695 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2696 out_ip6_prohibit_entry: 2697 kfree(net->ipv6.ip6_prohibit_entry); 2698 out_ip6_null_entry: 2699 kfree(net->ipv6.ip6_null_entry); 2700 #endif 2701 out_ip6_dst_ops: 2702 goto out; 2703 } 2704 2705 static void ip6_route_net_exit(struct net *net) 2706 { 2707 #ifdef CONFIG_PROC_FS 2708 proc_net_remove(net, "ipv6_route"); 2709 proc_net_remove(net, "rt6_stats"); 2710 #endif 2711 kfree(net->ipv6.ip6_null_entry); 2712 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2713 kfree(net->ipv6.ip6_prohibit_entry); 2714 kfree(net->ipv6.ip6_blk_hole_entry); 2715 #endif 2716 } 2717 2718 static struct pernet_operations ip6_route_net_ops = { 2719 .init = ip6_route_net_init, 2720 .exit = ip6_route_net_exit, 2721 }; 2722 2723 static struct notifier_block ip6_route_dev_notifier = { 2724 .notifier_call = ip6_route_dev_notify, 2725 .priority = 0, 2726 }; 2727 2728 int __init ip6_route_init(void) 2729 { 2730 int ret; 2731 2732 ret = -ENOMEM; 2733 ip6_dst_ops_template.kmem_cachep = 2734 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2735 SLAB_HWCACHE_ALIGN, NULL); 2736 if (!ip6_dst_ops_template.kmem_cachep) 2737 goto out; 2738 2739 ret = register_pernet_subsys(&ip6_route_net_ops); 2740 if (ret) 2741 goto out_kmem_cache; 2742 2743 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2744 2745 /* Registering of the loopback is done before this portion of code, 2746 * the loopback reference in rt6_info will not be taken, do it 2747 * manually for init_net */ 2748 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; 2749 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2750 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2751 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; 2752 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2753 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; 2754 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2755 #endif 2756 ret = fib6_init(); 2757 if (ret) 2758 goto out_register_subsys; 2759 2760 ret = xfrm6_init(); 2761 if (ret) 2762 goto out_fib6_init; 2763 2764 ret = fib6_rules_init(); 2765 if (ret) 2766 goto xfrm6_init; 2767 2768 ret = -ENOBUFS; 2769 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2770 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2771 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2772 goto fib6_rules_init; 2773 2774 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2775 if (ret) 2776 goto fib6_rules_init; 2777 2778 out: 2779 return ret; 2780 2781 fib6_rules_init: 2782 fib6_rules_cleanup(); 2783 xfrm6_init: 2784 xfrm6_fini(); 2785 out_fib6_init: 2786 fib6_gc_cleanup(); 2787 out_register_subsys: 2788 unregister_pernet_subsys(&ip6_route_net_ops); 2789 out_kmem_cache: 2790 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2791 goto out; 2792 } 2793 2794 void ip6_route_cleanup(void) 2795 { 2796 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2797 fib6_rules_cleanup(); 2798 xfrm6_fini(); 2799 fib6_gc_cleanup(); 2800 unregister_pernet_subsys(&ip6_route_net_ops); 2801 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2802 } 2803