1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <linux/slab.h> 44 #include <net/net_namespace.h> 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 77 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 78 static unsigned int ip6_default_mtu(const struct dst_entry *dst); 79 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 80 static void ip6_dst_destroy(struct dst_entry *); 81 static void ip6_dst_ifdown(struct dst_entry *, 82 struct net_device *dev, int how); 83 static int ip6_dst_gc(struct dst_ops *ops); 84 85 static int ip6_pkt_discard(struct sk_buff *skb); 86 static int ip6_pkt_discard_out(struct sk_buff *skb); 87 static void ip6_link_failure(struct sk_buff *skb); 88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 89 90 #ifdef CONFIG_IPV6_ROUTE_INFO 91 static struct rt6_info *rt6_add_route_info(struct net *net, 92 struct in6_addr *prefix, int prefixlen, 93 struct in6_addr *gwaddr, int ifindex, 94 unsigned pref); 95 static struct rt6_info *rt6_get_route_info(struct net *net, 96 struct in6_addr *prefix, int prefixlen, 97 struct in6_addr *gwaddr, int ifindex); 98 #endif 99 100 static struct dst_ops ip6_dst_ops_template = { 101 .family = AF_INET6, 102 .protocol = cpu_to_be16(ETH_P_IPV6), 103 .gc = ip6_dst_gc, 104 .gc_thresh = 1024, 105 .check = ip6_dst_check, 106 .default_advmss = ip6_default_advmss, 107 .default_mtu = ip6_default_mtu, 108 .destroy = ip6_dst_destroy, 109 .ifdown = ip6_dst_ifdown, 110 .negative_advice = ip6_negative_advice, 111 .link_failure = ip6_link_failure, 112 .update_pmtu = ip6_rt_update_pmtu, 113 .local_out = __ip6_local_out, 114 }; 115 116 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) 117 { 118 return 0; 119 } 120 121 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 122 { 123 } 124 125 static struct dst_ops ip6_dst_blackhole_ops = { 126 .family = AF_INET6, 127 .protocol = cpu_to_be16(ETH_P_IPV6), 128 .destroy = ip6_dst_destroy, 129 .check = ip6_dst_check, 130 .default_mtu = ip6_blackhole_default_mtu, 131 .default_advmss = ip6_default_advmss, 132 .update_pmtu = ip6_rt_blackhole_update_pmtu, 133 }; 134 135 static struct rt6_info ip6_null_entry_template = { 136 .dst = { 137 .__refcnt = ATOMIC_INIT(1), 138 .__use = 1, 139 .obsolete = -1, 140 .error = -ENETUNREACH, 141 .input = ip6_pkt_discard, 142 .output = ip6_pkt_discard_out, 143 }, 144 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 145 .rt6i_protocol = RTPROT_KERNEL, 146 .rt6i_metric = ~(u32) 0, 147 .rt6i_ref = ATOMIC_INIT(1), 148 }; 149 150 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 151 152 static int ip6_pkt_prohibit(struct sk_buff *skb); 153 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 154 155 static struct rt6_info ip6_prohibit_entry_template = { 156 .dst = { 157 .__refcnt = ATOMIC_INIT(1), 158 .__use = 1, 159 .obsolete = -1, 160 .error = -EACCES, 161 .input = ip6_pkt_prohibit, 162 .output = ip6_pkt_prohibit_out, 163 }, 164 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 165 .rt6i_protocol = RTPROT_KERNEL, 166 .rt6i_metric = ~(u32) 0, 167 .rt6i_ref = ATOMIC_INIT(1), 168 }; 169 170 static struct rt6_info ip6_blk_hole_entry_template = { 171 .dst = { 172 .__refcnt = ATOMIC_INIT(1), 173 .__use = 1, 174 .obsolete = -1, 175 .error = -EINVAL, 176 .input = dst_discard, 177 .output = dst_discard, 178 }, 179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 180 .rt6i_protocol = RTPROT_KERNEL, 181 .rt6i_metric = ~(u32) 0, 182 .rt6i_ref = ATOMIC_INIT(1), 183 }; 184 185 #endif 186 187 /* allocate dst with ip6_dst_ops */ 188 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 189 { 190 return (struct rt6_info *)dst_alloc(ops); 191 } 192 193 static void ip6_dst_destroy(struct dst_entry *dst) 194 { 195 struct rt6_info *rt = (struct rt6_info *)dst; 196 struct inet6_dev *idev = rt->rt6i_idev; 197 struct inet_peer *peer = rt->rt6i_peer; 198 199 if (idev != NULL) { 200 rt->rt6i_idev = NULL; 201 in6_dev_put(idev); 202 } 203 if (peer) { 204 rt->rt6i_peer = NULL; 205 inet_putpeer(peer); 206 } 207 } 208 209 void rt6_bind_peer(struct rt6_info *rt, int create) 210 { 211 struct inet_peer *peer; 212 213 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 214 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 215 inet_putpeer(peer); 216 } 217 218 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 219 int how) 220 { 221 struct rt6_info *rt = (struct rt6_info *)dst; 222 struct inet6_dev *idev = rt->rt6i_idev; 223 struct net_device *loopback_dev = 224 dev_net(dev)->loopback_dev; 225 226 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 227 struct inet6_dev *loopback_idev = 228 in6_dev_get(loopback_dev); 229 if (loopback_idev != NULL) { 230 rt->rt6i_idev = loopback_idev; 231 in6_dev_put(idev); 232 } 233 } 234 } 235 236 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 237 { 238 return (rt->rt6i_flags & RTF_EXPIRES) && 239 time_after(jiffies, rt->rt6i_expires); 240 } 241 242 static inline int rt6_need_strict(struct in6_addr *daddr) 243 { 244 return ipv6_addr_type(daddr) & 245 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 246 } 247 248 /* 249 * Route lookup. Any table->tb6_lock is implied. 250 */ 251 252 static inline struct rt6_info *rt6_device_match(struct net *net, 253 struct rt6_info *rt, 254 struct in6_addr *saddr, 255 int oif, 256 int flags) 257 { 258 struct rt6_info *local = NULL; 259 struct rt6_info *sprt; 260 261 if (!oif && ipv6_addr_any(saddr)) 262 goto out; 263 264 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 265 struct net_device *dev = sprt->rt6i_dev; 266 267 if (oif) { 268 if (dev->ifindex == oif) 269 return sprt; 270 if (dev->flags & IFF_LOOPBACK) { 271 if (sprt->rt6i_idev == NULL || 272 sprt->rt6i_idev->dev->ifindex != oif) { 273 if (flags & RT6_LOOKUP_F_IFACE && oif) 274 continue; 275 if (local && (!oif || 276 local->rt6i_idev->dev->ifindex == oif)) 277 continue; 278 } 279 local = sprt; 280 } 281 } else { 282 if (ipv6_chk_addr(net, saddr, dev, 283 flags & RT6_LOOKUP_F_IFACE)) 284 return sprt; 285 } 286 } 287 288 if (oif) { 289 if (local) 290 return local; 291 292 if (flags & RT6_LOOKUP_F_IFACE) 293 return net->ipv6.ip6_null_entry; 294 } 295 out: 296 return rt; 297 } 298 299 #ifdef CONFIG_IPV6_ROUTER_PREF 300 static void rt6_probe(struct rt6_info *rt) 301 { 302 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 303 /* 304 * Okay, this does not seem to be appropriate 305 * for now, however, we need to check if it 306 * is really so; aka Router Reachability Probing. 307 * 308 * Router Reachability Probe MUST be rate-limited 309 * to no more than one per minute. 310 */ 311 if (!neigh || (neigh->nud_state & NUD_VALID)) 312 return; 313 read_lock_bh(&neigh->lock); 314 if (!(neigh->nud_state & NUD_VALID) && 315 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 316 struct in6_addr mcaddr; 317 struct in6_addr *target; 318 319 neigh->updated = jiffies; 320 read_unlock_bh(&neigh->lock); 321 322 target = (struct in6_addr *)&neigh->primary_key; 323 addrconf_addr_solict_mult(target, &mcaddr); 324 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 325 } else 326 read_unlock_bh(&neigh->lock); 327 } 328 #else 329 static inline void rt6_probe(struct rt6_info *rt) 330 { 331 } 332 #endif 333 334 /* 335 * Default Router Selection (RFC 2461 6.3.6) 336 */ 337 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 338 { 339 struct net_device *dev = rt->rt6i_dev; 340 if (!oif || dev->ifindex == oif) 341 return 2; 342 if ((dev->flags & IFF_LOOPBACK) && 343 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 344 return 1; 345 return 0; 346 } 347 348 static inline int rt6_check_neigh(struct rt6_info *rt) 349 { 350 struct neighbour *neigh = rt->rt6i_nexthop; 351 int m; 352 if (rt->rt6i_flags & RTF_NONEXTHOP || 353 !(rt->rt6i_flags & RTF_GATEWAY)) 354 m = 1; 355 else if (neigh) { 356 read_lock_bh(&neigh->lock); 357 if (neigh->nud_state & NUD_VALID) 358 m = 2; 359 #ifdef CONFIG_IPV6_ROUTER_PREF 360 else if (neigh->nud_state & NUD_FAILED) 361 m = 0; 362 #endif 363 else 364 m = 1; 365 read_unlock_bh(&neigh->lock); 366 } else 367 m = 0; 368 return m; 369 } 370 371 static int rt6_score_route(struct rt6_info *rt, int oif, 372 int strict) 373 { 374 int m, n; 375 376 m = rt6_check_dev(rt, oif); 377 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 378 return -1; 379 #ifdef CONFIG_IPV6_ROUTER_PREF 380 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 381 #endif 382 n = rt6_check_neigh(rt); 383 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 384 return -1; 385 return m; 386 } 387 388 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 389 int *mpri, struct rt6_info *match) 390 { 391 int m; 392 393 if (rt6_check_expired(rt)) 394 goto out; 395 396 m = rt6_score_route(rt, oif, strict); 397 if (m < 0) 398 goto out; 399 400 if (m > *mpri) { 401 if (strict & RT6_LOOKUP_F_REACHABLE) 402 rt6_probe(match); 403 *mpri = m; 404 match = rt; 405 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 406 rt6_probe(rt); 407 } 408 409 out: 410 return match; 411 } 412 413 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 414 struct rt6_info *rr_head, 415 u32 metric, int oif, int strict) 416 { 417 struct rt6_info *rt, *match; 418 int mpri = -1; 419 420 match = NULL; 421 for (rt = rr_head; rt && rt->rt6i_metric == metric; 422 rt = rt->dst.rt6_next) 423 match = find_match(rt, oif, strict, &mpri, match); 424 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 425 rt = rt->dst.rt6_next) 426 match = find_match(rt, oif, strict, &mpri, match); 427 428 return match; 429 } 430 431 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 432 { 433 struct rt6_info *match, *rt0; 434 struct net *net; 435 436 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 437 __func__, fn->leaf, oif); 438 439 rt0 = fn->rr_ptr; 440 if (!rt0) 441 fn->rr_ptr = rt0 = fn->leaf; 442 443 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 444 445 if (!match && 446 (strict & RT6_LOOKUP_F_REACHABLE)) { 447 struct rt6_info *next = rt0->dst.rt6_next; 448 449 /* no entries matched; do round-robin */ 450 if (!next || next->rt6i_metric != rt0->rt6i_metric) 451 next = fn->leaf; 452 453 if (next != rt0) 454 fn->rr_ptr = next; 455 } 456 457 RT6_TRACE("%s() => %p\n", 458 __func__, match); 459 460 net = dev_net(rt0->rt6i_dev); 461 return match ? match : net->ipv6.ip6_null_entry; 462 } 463 464 #ifdef CONFIG_IPV6_ROUTE_INFO 465 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 466 struct in6_addr *gwaddr) 467 { 468 struct net *net = dev_net(dev); 469 struct route_info *rinfo = (struct route_info *) opt; 470 struct in6_addr prefix_buf, *prefix; 471 unsigned int pref; 472 unsigned long lifetime; 473 struct rt6_info *rt; 474 475 if (len < sizeof(struct route_info)) { 476 return -EINVAL; 477 } 478 479 /* Sanity check for prefix_len and length */ 480 if (rinfo->length > 3) { 481 return -EINVAL; 482 } else if (rinfo->prefix_len > 128) { 483 return -EINVAL; 484 } else if (rinfo->prefix_len > 64) { 485 if (rinfo->length < 2) { 486 return -EINVAL; 487 } 488 } else if (rinfo->prefix_len > 0) { 489 if (rinfo->length < 1) { 490 return -EINVAL; 491 } 492 } 493 494 pref = rinfo->route_pref; 495 if (pref == ICMPV6_ROUTER_PREF_INVALID) 496 return -EINVAL; 497 498 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 499 500 if (rinfo->length == 3) 501 prefix = (struct in6_addr *)rinfo->prefix; 502 else { 503 /* this function is safe */ 504 ipv6_addr_prefix(&prefix_buf, 505 (struct in6_addr *)rinfo->prefix, 506 rinfo->prefix_len); 507 prefix = &prefix_buf; 508 } 509 510 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 511 dev->ifindex); 512 513 if (rt && !lifetime) { 514 ip6_del_rt(rt); 515 rt = NULL; 516 } 517 518 if (!rt && lifetime) 519 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 520 pref); 521 else if (rt) 522 rt->rt6i_flags = RTF_ROUTEINFO | 523 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 524 525 if (rt) { 526 if (!addrconf_finite_timeout(lifetime)) { 527 rt->rt6i_flags &= ~RTF_EXPIRES; 528 } else { 529 rt->rt6i_expires = jiffies + HZ * lifetime; 530 rt->rt6i_flags |= RTF_EXPIRES; 531 } 532 dst_release(&rt->dst); 533 } 534 return 0; 535 } 536 #endif 537 538 #define BACKTRACK(__net, saddr) \ 539 do { \ 540 if (rt == __net->ipv6.ip6_null_entry) { \ 541 struct fib6_node *pn; \ 542 while (1) { \ 543 if (fn->fn_flags & RTN_TL_ROOT) \ 544 goto out; \ 545 pn = fn->parent; \ 546 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 547 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 548 else \ 549 fn = pn; \ 550 if (fn->fn_flags & RTN_RTINFO) \ 551 goto restart; \ 552 } \ 553 } \ 554 } while(0) 555 556 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 557 struct fib6_table *table, 558 struct flowi *fl, int flags) 559 { 560 struct fib6_node *fn; 561 struct rt6_info *rt; 562 563 read_lock_bh(&table->tb6_lock); 564 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 565 restart: 566 rt = fn->leaf; 567 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 568 BACKTRACK(net, &fl->fl6_src); 569 out: 570 dst_use(&rt->dst, jiffies); 571 read_unlock_bh(&table->tb6_lock); 572 return rt; 573 574 } 575 576 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 577 const struct in6_addr *saddr, int oif, int strict) 578 { 579 struct flowi fl = { 580 .oif = oif, 581 .fl6_dst = *daddr, 582 }; 583 struct dst_entry *dst; 584 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 585 586 if (saddr) { 587 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 588 flags |= RT6_LOOKUP_F_HAS_SADDR; 589 } 590 591 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 592 if (dst->error == 0) 593 return (struct rt6_info *) dst; 594 595 dst_release(dst); 596 597 return NULL; 598 } 599 600 EXPORT_SYMBOL(rt6_lookup); 601 602 /* ip6_ins_rt is called with FREE table->tb6_lock. 603 It takes new route entry, the addition fails by any reason the 604 route is freed. In any case, if caller does not hold it, it may 605 be destroyed. 606 */ 607 608 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 609 { 610 int err; 611 struct fib6_table *table; 612 613 table = rt->rt6i_table; 614 write_lock_bh(&table->tb6_lock); 615 err = fib6_add(&table->tb6_root, rt, info); 616 write_unlock_bh(&table->tb6_lock); 617 618 return err; 619 } 620 621 int ip6_ins_rt(struct rt6_info *rt) 622 { 623 struct nl_info info = { 624 .nl_net = dev_net(rt->rt6i_dev), 625 }; 626 return __ip6_ins_rt(rt, &info); 627 } 628 629 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 630 struct in6_addr *saddr) 631 { 632 struct rt6_info *rt; 633 634 /* 635 * Clone the route. 636 */ 637 638 rt = ip6_rt_copy(ort); 639 640 if (rt) { 641 struct neighbour *neigh; 642 int attempts = !in_softirq(); 643 644 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 645 if (rt->rt6i_dst.plen != 128 && 646 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 647 rt->rt6i_flags |= RTF_ANYCAST; 648 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 649 } 650 651 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 652 rt->rt6i_dst.plen = 128; 653 rt->rt6i_flags |= RTF_CACHE; 654 rt->dst.flags |= DST_HOST; 655 656 #ifdef CONFIG_IPV6_SUBTREES 657 if (rt->rt6i_src.plen && saddr) { 658 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 659 rt->rt6i_src.plen = 128; 660 } 661 #endif 662 663 retry: 664 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 665 if (IS_ERR(neigh)) { 666 struct net *net = dev_net(rt->rt6i_dev); 667 int saved_rt_min_interval = 668 net->ipv6.sysctl.ip6_rt_gc_min_interval; 669 int saved_rt_elasticity = 670 net->ipv6.sysctl.ip6_rt_gc_elasticity; 671 672 if (attempts-- > 0) { 673 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 674 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 675 676 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 677 678 net->ipv6.sysctl.ip6_rt_gc_elasticity = 679 saved_rt_elasticity; 680 net->ipv6.sysctl.ip6_rt_gc_min_interval = 681 saved_rt_min_interval; 682 goto retry; 683 } 684 685 if (net_ratelimit()) 686 printk(KERN_WARNING 687 "ipv6: Neighbour table overflow.\n"); 688 dst_free(&rt->dst); 689 return NULL; 690 } 691 rt->rt6i_nexthop = neigh; 692 693 } 694 695 return rt; 696 } 697 698 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 699 { 700 struct rt6_info *rt = ip6_rt_copy(ort); 701 if (rt) { 702 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 703 rt->rt6i_dst.plen = 128; 704 rt->rt6i_flags |= RTF_CACHE; 705 rt->dst.flags |= DST_HOST; 706 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 707 } 708 return rt; 709 } 710 711 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 712 struct flowi *fl, int flags) 713 { 714 struct fib6_node *fn; 715 struct rt6_info *rt, *nrt; 716 int strict = 0; 717 int attempts = 3; 718 int err; 719 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 720 721 strict |= flags & RT6_LOOKUP_F_IFACE; 722 723 relookup: 724 read_lock_bh(&table->tb6_lock); 725 726 restart_2: 727 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 728 729 restart: 730 rt = rt6_select(fn, oif, strict | reachable); 731 732 BACKTRACK(net, &fl->fl6_src); 733 if (rt == net->ipv6.ip6_null_entry || 734 rt->rt6i_flags & RTF_CACHE) 735 goto out; 736 737 dst_hold(&rt->dst); 738 read_unlock_bh(&table->tb6_lock); 739 740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 742 else 743 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 744 745 dst_release(&rt->dst); 746 rt = nrt ? : net->ipv6.ip6_null_entry; 747 748 dst_hold(&rt->dst); 749 if (nrt) { 750 err = ip6_ins_rt(nrt); 751 if (!err) 752 goto out2; 753 } 754 755 if (--attempts <= 0) 756 goto out2; 757 758 /* 759 * Race condition! In the gap, when table->tb6_lock was 760 * released someone could insert this route. Relookup. 761 */ 762 dst_release(&rt->dst); 763 goto relookup; 764 765 out: 766 if (reachable) { 767 reachable = 0; 768 goto restart_2; 769 } 770 dst_hold(&rt->dst); 771 read_unlock_bh(&table->tb6_lock); 772 out2: 773 rt->dst.lastuse = jiffies; 774 rt->dst.__use++; 775 776 return rt; 777 } 778 779 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 780 struct flowi *fl, int flags) 781 { 782 return ip6_pol_route(net, table, fl->iif, fl, flags); 783 } 784 785 void ip6_route_input(struct sk_buff *skb) 786 { 787 struct ipv6hdr *iph = ipv6_hdr(skb); 788 struct net *net = dev_net(skb->dev); 789 int flags = RT6_LOOKUP_F_HAS_SADDR; 790 struct flowi fl = { 791 .iif = skb->dev->ifindex, 792 .fl6_dst = iph->daddr, 793 .fl6_src = iph->saddr, 794 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 795 .mark = skb->mark, 796 .proto = iph->nexthdr, 797 }; 798 799 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 800 flags |= RT6_LOOKUP_F_IFACE; 801 802 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input)); 803 } 804 805 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 806 struct flowi *fl, int flags) 807 { 808 return ip6_pol_route(net, table, fl->oif, fl, flags); 809 } 810 811 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 812 struct flowi *fl) 813 { 814 int flags = 0; 815 816 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst)) 817 flags |= RT6_LOOKUP_F_IFACE; 818 819 if (!ipv6_addr_any(&fl->fl6_src)) 820 flags |= RT6_LOOKUP_F_HAS_SADDR; 821 else if (sk) 822 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 823 824 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 825 } 826 827 EXPORT_SYMBOL(ip6_route_output); 828 829 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 830 { 831 struct rt6_info *ort = (struct rt6_info *) *dstp; 832 struct rt6_info *rt = (struct rt6_info *) 833 dst_alloc(&ip6_dst_blackhole_ops); 834 struct dst_entry *new = NULL; 835 836 if (rt) { 837 new = &rt->dst; 838 839 atomic_set(&new->__refcnt, 1); 840 new->__use = 1; 841 new->input = dst_discard; 842 new->output = dst_discard; 843 844 dst_copy_metrics(new, &ort->dst); 845 new->dev = ort->dst.dev; 846 if (new->dev) 847 dev_hold(new->dev); 848 rt->rt6i_idev = ort->rt6i_idev; 849 if (rt->rt6i_idev) 850 in6_dev_hold(rt->rt6i_idev); 851 rt->rt6i_expires = 0; 852 853 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 854 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 855 rt->rt6i_metric = 0; 856 857 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 858 #ifdef CONFIG_IPV6_SUBTREES 859 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 860 #endif 861 862 dst_free(new); 863 } 864 865 dst_release(*dstp); 866 *dstp = new; 867 return new ? 0 : -ENOMEM; 868 } 869 EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 870 871 /* 872 * Destination cache support functions 873 */ 874 875 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 876 { 877 struct rt6_info *rt; 878 879 rt = (struct rt6_info *) dst; 880 881 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 882 return dst; 883 884 return NULL; 885 } 886 887 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 888 { 889 struct rt6_info *rt = (struct rt6_info *) dst; 890 891 if (rt) { 892 if (rt->rt6i_flags & RTF_CACHE) { 893 if (rt6_check_expired(rt)) { 894 ip6_del_rt(rt); 895 dst = NULL; 896 } 897 } else { 898 dst_release(dst); 899 dst = NULL; 900 } 901 } 902 return dst; 903 } 904 905 static void ip6_link_failure(struct sk_buff *skb) 906 { 907 struct rt6_info *rt; 908 909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 910 911 rt = (struct rt6_info *) skb_dst(skb); 912 if (rt) { 913 if (rt->rt6i_flags&RTF_CACHE) { 914 dst_set_expires(&rt->dst, 0); 915 rt->rt6i_flags |= RTF_EXPIRES; 916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 917 rt->rt6i_node->fn_sernum = -1; 918 } 919 } 920 921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 922 { 923 struct rt6_info *rt6 = (struct rt6_info*)dst; 924 925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 926 rt6->rt6i_flags |= RTF_MODIFIED; 927 if (mtu < IPV6_MIN_MTU) { 928 u32 features = dst_metric(dst, RTAX_FEATURES); 929 mtu = IPV6_MIN_MTU; 930 features |= RTAX_FEATURE_ALLFRAG; 931 dst_metric_set(dst, RTAX_FEATURES, features); 932 } 933 dst_metric_set(dst, RTAX_MTU, mtu); 934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 935 } 936 } 937 938 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 939 { 940 struct net_device *dev = dst->dev; 941 unsigned int mtu = dst_mtu(dst); 942 struct net *net = dev_net(dev); 943 944 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 945 946 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 947 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 948 949 /* 950 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 951 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 952 * IPV6_MAXPLEN is also valid and means: "any MSS, 953 * rely only on pmtu discovery" 954 */ 955 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 956 mtu = IPV6_MAXPLEN; 957 return mtu; 958 } 959 960 static unsigned int ip6_default_mtu(const struct dst_entry *dst) 961 { 962 unsigned int mtu = IPV6_MIN_MTU; 963 struct inet6_dev *idev; 964 965 rcu_read_lock(); 966 idev = __in6_dev_get(dst->dev); 967 if (idev) 968 mtu = idev->cnf.mtu6; 969 rcu_read_unlock(); 970 971 return mtu; 972 } 973 974 static struct dst_entry *icmp6_dst_gc_list; 975 static DEFINE_SPINLOCK(icmp6_dst_lock); 976 977 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 978 struct neighbour *neigh, 979 const struct in6_addr *addr) 980 { 981 struct rt6_info *rt; 982 struct inet6_dev *idev = in6_dev_get(dev); 983 struct net *net = dev_net(dev); 984 985 if (unlikely(idev == NULL)) 986 return NULL; 987 988 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 989 if (unlikely(rt == NULL)) { 990 in6_dev_put(idev); 991 goto out; 992 } 993 994 dev_hold(dev); 995 if (neigh) 996 neigh_hold(neigh); 997 else { 998 neigh = ndisc_get_neigh(dev, addr); 999 if (IS_ERR(neigh)) 1000 neigh = NULL; 1001 } 1002 1003 rt->rt6i_dev = dev; 1004 rt->rt6i_idev = idev; 1005 rt->rt6i_nexthop = neigh; 1006 atomic_set(&rt->dst.__refcnt, 1); 1007 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1008 rt->dst.output = ip6_output; 1009 1010 #if 0 /* there's no chance to use these for ndisc */ 1011 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 1012 ? DST_HOST 1013 : 0; 1014 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1015 rt->rt6i_dst.plen = 128; 1016 #endif 1017 1018 spin_lock_bh(&icmp6_dst_lock); 1019 rt->dst.next = icmp6_dst_gc_list; 1020 icmp6_dst_gc_list = &rt->dst; 1021 spin_unlock_bh(&icmp6_dst_lock); 1022 1023 fib6_force_start_gc(net); 1024 1025 out: 1026 return &rt->dst; 1027 } 1028 1029 int icmp6_dst_gc(void) 1030 { 1031 struct dst_entry *dst, *next, **pprev; 1032 int more = 0; 1033 1034 next = NULL; 1035 1036 spin_lock_bh(&icmp6_dst_lock); 1037 pprev = &icmp6_dst_gc_list; 1038 1039 while ((dst = *pprev) != NULL) { 1040 if (!atomic_read(&dst->__refcnt)) { 1041 *pprev = dst->next; 1042 dst_free(dst); 1043 } else { 1044 pprev = &dst->next; 1045 ++more; 1046 } 1047 } 1048 1049 spin_unlock_bh(&icmp6_dst_lock); 1050 1051 return more; 1052 } 1053 1054 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1055 void *arg) 1056 { 1057 struct dst_entry *dst, **pprev; 1058 1059 spin_lock_bh(&icmp6_dst_lock); 1060 pprev = &icmp6_dst_gc_list; 1061 while ((dst = *pprev) != NULL) { 1062 struct rt6_info *rt = (struct rt6_info *) dst; 1063 if (func(rt, arg)) { 1064 *pprev = dst->next; 1065 dst_free(dst); 1066 } else { 1067 pprev = &dst->next; 1068 } 1069 } 1070 spin_unlock_bh(&icmp6_dst_lock); 1071 } 1072 1073 static int ip6_dst_gc(struct dst_ops *ops) 1074 { 1075 unsigned long now = jiffies; 1076 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1077 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1078 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1079 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1080 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1081 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1082 int entries; 1083 1084 entries = dst_entries_get_fast(ops); 1085 if (time_after(rt_last_gc + rt_min_interval, now) && 1086 entries <= rt_max_size) 1087 goto out; 1088 1089 net->ipv6.ip6_rt_gc_expire++; 1090 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1091 net->ipv6.ip6_rt_last_gc = now; 1092 entries = dst_entries_get_slow(ops); 1093 if (entries < ops->gc_thresh) 1094 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1095 out: 1096 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1097 return entries > rt_max_size; 1098 } 1099 1100 /* Clean host part of a prefix. Not necessary in radix tree, 1101 but results in cleaner routing tables. 1102 1103 Remove it only when all the things will work! 1104 */ 1105 1106 int ip6_dst_hoplimit(struct dst_entry *dst) 1107 { 1108 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1109 if (hoplimit == 0) { 1110 struct net_device *dev = dst->dev; 1111 struct inet6_dev *idev; 1112 1113 rcu_read_lock(); 1114 idev = __in6_dev_get(dev); 1115 if (idev) 1116 hoplimit = idev->cnf.hop_limit; 1117 else 1118 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1119 rcu_read_unlock(); 1120 } 1121 return hoplimit; 1122 } 1123 EXPORT_SYMBOL(ip6_dst_hoplimit); 1124 1125 /* 1126 * 1127 */ 1128 1129 int ip6_route_add(struct fib6_config *cfg) 1130 { 1131 int err; 1132 struct net *net = cfg->fc_nlinfo.nl_net; 1133 struct rt6_info *rt = NULL; 1134 struct net_device *dev = NULL; 1135 struct inet6_dev *idev = NULL; 1136 struct fib6_table *table; 1137 int addr_type; 1138 1139 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1140 return -EINVAL; 1141 #ifndef CONFIG_IPV6_SUBTREES 1142 if (cfg->fc_src_len) 1143 return -EINVAL; 1144 #endif 1145 if (cfg->fc_ifindex) { 1146 err = -ENODEV; 1147 dev = dev_get_by_index(net, cfg->fc_ifindex); 1148 if (!dev) 1149 goto out; 1150 idev = in6_dev_get(dev); 1151 if (!idev) 1152 goto out; 1153 } 1154 1155 if (cfg->fc_metric == 0) 1156 cfg->fc_metric = IP6_RT_PRIO_USER; 1157 1158 table = fib6_new_table(net, cfg->fc_table); 1159 if (table == NULL) { 1160 err = -ENOBUFS; 1161 goto out; 1162 } 1163 1164 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1165 1166 if (rt == NULL) { 1167 err = -ENOMEM; 1168 goto out; 1169 } 1170 1171 rt->dst.obsolete = -1; 1172 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1173 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1174 0; 1175 1176 if (cfg->fc_protocol == RTPROT_UNSPEC) 1177 cfg->fc_protocol = RTPROT_BOOT; 1178 rt->rt6i_protocol = cfg->fc_protocol; 1179 1180 addr_type = ipv6_addr_type(&cfg->fc_dst); 1181 1182 if (addr_type & IPV6_ADDR_MULTICAST) 1183 rt->dst.input = ip6_mc_input; 1184 else if (cfg->fc_flags & RTF_LOCAL) 1185 rt->dst.input = ip6_input; 1186 else 1187 rt->dst.input = ip6_forward; 1188 1189 rt->dst.output = ip6_output; 1190 1191 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1192 rt->rt6i_dst.plen = cfg->fc_dst_len; 1193 if (rt->rt6i_dst.plen == 128) 1194 rt->dst.flags = DST_HOST; 1195 1196 #ifdef CONFIG_IPV6_SUBTREES 1197 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1198 rt->rt6i_src.plen = cfg->fc_src_len; 1199 #endif 1200 1201 rt->rt6i_metric = cfg->fc_metric; 1202 1203 /* We cannot add true routes via loopback here, 1204 they would result in kernel looping; promote them to reject routes 1205 */ 1206 if ((cfg->fc_flags & RTF_REJECT) || 1207 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) 1208 && !(cfg->fc_flags&RTF_LOCAL))) { 1209 /* hold loopback dev/idev if we haven't done so. */ 1210 if (dev != net->loopback_dev) { 1211 if (dev) { 1212 dev_put(dev); 1213 in6_dev_put(idev); 1214 } 1215 dev = net->loopback_dev; 1216 dev_hold(dev); 1217 idev = in6_dev_get(dev); 1218 if (!idev) { 1219 err = -ENODEV; 1220 goto out; 1221 } 1222 } 1223 rt->dst.output = ip6_pkt_discard_out; 1224 rt->dst.input = ip6_pkt_discard; 1225 rt->dst.error = -ENETUNREACH; 1226 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1227 goto install_route; 1228 } 1229 1230 if (cfg->fc_flags & RTF_GATEWAY) { 1231 struct in6_addr *gw_addr; 1232 int gwa_type; 1233 1234 gw_addr = &cfg->fc_gateway; 1235 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1236 gwa_type = ipv6_addr_type(gw_addr); 1237 1238 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1239 struct rt6_info *grt; 1240 1241 /* IPv6 strictly inhibits using not link-local 1242 addresses as nexthop address. 1243 Otherwise, router will not able to send redirects. 1244 It is very good, but in some (rare!) circumstances 1245 (SIT, PtP, NBMA NOARP links) it is handy to allow 1246 some exceptions. --ANK 1247 */ 1248 err = -EINVAL; 1249 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1250 goto out; 1251 1252 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1253 1254 err = -EHOSTUNREACH; 1255 if (grt == NULL) 1256 goto out; 1257 if (dev) { 1258 if (dev != grt->rt6i_dev) { 1259 dst_release(&grt->dst); 1260 goto out; 1261 } 1262 } else { 1263 dev = grt->rt6i_dev; 1264 idev = grt->rt6i_idev; 1265 dev_hold(dev); 1266 in6_dev_hold(grt->rt6i_idev); 1267 } 1268 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1269 err = 0; 1270 dst_release(&grt->dst); 1271 1272 if (err) 1273 goto out; 1274 } 1275 err = -EINVAL; 1276 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1277 goto out; 1278 } 1279 1280 err = -ENODEV; 1281 if (dev == NULL) 1282 goto out; 1283 1284 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1285 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1286 if (IS_ERR(rt->rt6i_nexthop)) { 1287 err = PTR_ERR(rt->rt6i_nexthop); 1288 rt->rt6i_nexthop = NULL; 1289 goto out; 1290 } 1291 } 1292 1293 rt->rt6i_flags = cfg->fc_flags; 1294 1295 install_route: 1296 if (cfg->fc_mx) { 1297 struct nlattr *nla; 1298 int remaining; 1299 1300 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1301 int type = nla_type(nla); 1302 1303 if (type) { 1304 if (type > RTAX_MAX) { 1305 err = -EINVAL; 1306 goto out; 1307 } 1308 1309 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1310 } 1311 } 1312 } 1313 1314 rt->dst.dev = dev; 1315 rt->rt6i_idev = idev; 1316 rt->rt6i_table = table; 1317 1318 cfg->fc_nlinfo.nl_net = dev_net(dev); 1319 1320 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1321 1322 out: 1323 if (dev) 1324 dev_put(dev); 1325 if (idev) 1326 in6_dev_put(idev); 1327 if (rt) 1328 dst_free(&rt->dst); 1329 return err; 1330 } 1331 1332 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1333 { 1334 int err; 1335 struct fib6_table *table; 1336 struct net *net = dev_net(rt->rt6i_dev); 1337 1338 if (rt == net->ipv6.ip6_null_entry) 1339 return -ENOENT; 1340 1341 table = rt->rt6i_table; 1342 write_lock_bh(&table->tb6_lock); 1343 1344 err = fib6_del(rt, info); 1345 dst_release(&rt->dst); 1346 1347 write_unlock_bh(&table->tb6_lock); 1348 1349 return err; 1350 } 1351 1352 int ip6_del_rt(struct rt6_info *rt) 1353 { 1354 struct nl_info info = { 1355 .nl_net = dev_net(rt->rt6i_dev), 1356 }; 1357 return __ip6_del_rt(rt, &info); 1358 } 1359 1360 static int ip6_route_del(struct fib6_config *cfg) 1361 { 1362 struct fib6_table *table; 1363 struct fib6_node *fn; 1364 struct rt6_info *rt; 1365 int err = -ESRCH; 1366 1367 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1368 if (table == NULL) 1369 return err; 1370 1371 read_lock_bh(&table->tb6_lock); 1372 1373 fn = fib6_locate(&table->tb6_root, 1374 &cfg->fc_dst, cfg->fc_dst_len, 1375 &cfg->fc_src, cfg->fc_src_len); 1376 1377 if (fn) { 1378 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1379 if (cfg->fc_ifindex && 1380 (rt->rt6i_dev == NULL || 1381 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1382 continue; 1383 if (cfg->fc_flags & RTF_GATEWAY && 1384 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1385 continue; 1386 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1387 continue; 1388 dst_hold(&rt->dst); 1389 read_unlock_bh(&table->tb6_lock); 1390 1391 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1392 } 1393 } 1394 read_unlock_bh(&table->tb6_lock); 1395 1396 return err; 1397 } 1398 1399 /* 1400 * Handle redirects 1401 */ 1402 struct ip6rd_flowi { 1403 struct flowi fl; 1404 struct in6_addr gateway; 1405 }; 1406 1407 static struct rt6_info *__ip6_route_redirect(struct net *net, 1408 struct fib6_table *table, 1409 struct flowi *fl, 1410 int flags) 1411 { 1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1413 struct rt6_info *rt; 1414 struct fib6_node *fn; 1415 1416 /* 1417 * Get the "current" route for this destination and 1418 * check if the redirect has come from approriate router. 1419 * 1420 * RFC 2461 specifies that redirects should only be 1421 * accepted if they come from the nexthop to the target. 1422 * Due to the way the routes are chosen, this notion 1423 * is a bit fuzzy and one might need to check all possible 1424 * routes. 1425 */ 1426 1427 read_lock_bh(&table->tb6_lock); 1428 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1429 restart: 1430 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1431 /* 1432 * Current route is on-link; redirect is always invalid. 1433 * 1434 * Seems, previous statement is not true. It could 1435 * be node, which looks for us as on-link (f.e. proxy ndisc) 1436 * But then router serving it might decide, that we should 1437 * know truth 8)8) --ANK (980726). 1438 */ 1439 if (rt6_check_expired(rt)) 1440 continue; 1441 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1442 continue; 1443 if (fl->oif != rt->rt6i_dev->ifindex) 1444 continue; 1445 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1446 continue; 1447 break; 1448 } 1449 1450 if (!rt) 1451 rt = net->ipv6.ip6_null_entry; 1452 BACKTRACK(net, &fl->fl6_src); 1453 out: 1454 dst_hold(&rt->dst); 1455 1456 read_unlock_bh(&table->tb6_lock); 1457 1458 return rt; 1459 }; 1460 1461 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1462 struct in6_addr *src, 1463 struct in6_addr *gateway, 1464 struct net_device *dev) 1465 { 1466 int flags = RT6_LOOKUP_F_HAS_SADDR; 1467 struct net *net = dev_net(dev); 1468 struct ip6rd_flowi rdfl = { 1469 .fl = { 1470 .oif = dev->ifindex, 1471 .fl6_dst = *dest, 1472 .fl6_src = *src, 1473 }, 1474 }; 1475 1476 ipv6_addr_copy(&rdfl.gateway, gateway); 1477 1478 if (rt6_need_strict(dest)) 1479 flags |= RT6_LOOKUP_F_IFACE; 1480 1481 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1482 flags, __ip6_route_redirect); 1483 } 1484 1485 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1486 struct in6_addr *saddr, 1487 struct neighbour *neigh, u8 *lladdr, int on_link) 1488 { 1489 struct rt6_info *rt, *nrt = NULL; 1490 struct netevent_redirect netevent; 1491 struct net *net = dev_net(neigh->dev); 1492 1493 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1494 1495 if (rt == net->ipv6.ip6_null_entry) { 1496 if (net_ratelimit()) 1497 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1498 "for redirect target\n"); 1499 goto out; 1500 } 1501 1502 /* 1503 * We have finally decided to accept it. 1504 */ 1505 1506 neigh_update(neigh, lladdr, NUD_STALE, 1507 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1508 NEIGH_UPDATE_F_OVERRIDE| 1509 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1510 NEIGH_UPDATE_F_ISROUTER)) 1511 ); 1512 1513 /* 1514 * Redirect received -> path was valid. 1515 * Look, redirects are sent only in response to data packets, 1516 * so that this nexthop apparently is reachable. --ANK 1517 */ 1518 dst_confirm(&rt->dst); 1519 1520 /* Duplicate redirect: silently ignore. */ 1521 if (neigh == rt->dst.neighbour) 1522 goto out; 1523 1524 nrt = ip6_rt_copy(rt); 1525 if (nrt == NULL) 1526 goto out; 1527 1528 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1529 if (on_link) 1530 nrt->rt6i_flags &= ~RTF_GATEWAY; 1531 1532 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1533 nrt->rt6i_dst.plen = 128; 1534 nrt->dst.flags |= DST_HOST; 1535 1536 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1537 nrt->rt6i_nexthop = neigh_clone(neigh); 1538 1539 if (ip6_ins_rt(nrt)) 1540 goto out; 1541 1542 netevent.old = &rt->dst; 1543 netevent.new = &nrt->dst; 1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1545 1546 if (rt->rt6i_flags&RTF_CACHE) { 1547 ip6_del_rt(rt); 1548 return; 1549 } 1550 1551 out: 1552 dst_release(&rt->dst); 1553 } 1554 1555 /* 1556 * Handle ICMP "packet too big" messages 1557 * i.e. Path MTU discovery 1558 */ 1559 1560 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr, 1561 struct net *net, u32 pmtu, int ifindex) 1562 { 1563 struct rt6_info *rt, *nrt; 1564 int allfrag = 0; 1565 again: 1566 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1567 if (rt == NULL) 1568 return; 1569 1570 if (rt6_check_expired(rt)) { 1571 ip6_del_rt(rt); 1572 goto again; 1573 } 1574 1575 if (pmtu >= dst_mtu(&rt->dst)) 1576 goto out; 1577 1578 if (pmtu < IPV6_MIN_MTU) { 1579 /* 1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1581 * MTU (1280) and a fragment header should always be included 1582 * after a node receiving Too Big message reporting PMTU is 1583 * less than the IPv6 Minimum Link MTU. 1584 */ 1585 pmtu = IPV6_MIN_MTU; 1586 allfrag = 1; 1587 } 1588 1589 /* New mtu received -> path was valid. 1590 They are sent only in response to data packets, 1591 so that this nexthop apparently is reachable. --ANK 1592 */ 1593 dst_confirm(&rt->dst); 1594 1595 /* Host route. If it is static, it would be better 1596 not to override it, but add new one, so that 1597 when cache entry will expire old pmtu 1598 would return automatically. 1599 */ 1600 if (rt->rt6i_flags & RTF_CACHE) { 1601 dst_metric_set(&rt->dst, RTAX_MTU, pmtu); 1602 if (allfrag) { 1603 u32 features = dst_metric(&rt->dst, RTAX_FEATURES); 1604 features |= RTAX_FEATURE_ALLFRAG; 1605 dst_metric_set(&rt->dst, RTAX_FEATURES, features); 1606 } 1607 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1608 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1609 goto out; 1610 } 1611 1612 /* Network route. 1613 Two cases are possible: 1614 1. It is connected route. Action: COW 1615 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1616 */ 1617 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1618 nrt = rt6_alloc_cow(rt, daddr, saddr); 1619 else 1620 nrt = rt6_alloc_clone(rt, daddr); 1621 1622 if (nrt) { 1623 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); 1624 if (allfrag) { 1625 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); 1626 features |= RTAX_FEATURE_ALLFRAG; 1627 dst_metric_set(&nrt->dst, RTAX_FEATURES, features); 1628 } 1629 1630 /* According to RFC 1981, detecting PMTU increase shouldn't be 1631 * happened within 5 mins, the recommended timer is 10 mins. 1632 * Here this route expiration time is set to ip6_rt_mtu_expires 1633 * which is 10 mins. After 10 mins the decreased pmtu is expired 1634 * and detecting PMTU increase will be automatically happened. 1635 */ 1636 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1637 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1638 1639 ip6_ins_rt(nrt); 1640 } 1641 out: 1642 dst_release(&rt->dst); 1643 } 1644 1645 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1646 struct net_device *dev, u32 pmtu) 1647 { 1648 struct net *net = dev_net(dev); 1649 1650 /* 1651 * RFC 1981 states that a node "MUST reduce the size of the packets it 1652 * is sending along the path" that caused the Packet Too Big message. 1653 * Since it's not possible in the general case to determine which 1654 * interface was used to send the original packet, we update the MTU 1655 * on the interface that will be used to send future packets. We also 1656 * update the MTU on the interface that received the Packet Too Big in 1657 * case the original packet was forced out that interface with 1658 * SO_BINDTODEVICE or similar. This is the next best thing to the 1659 * correct behaviour, which would be to update the MTU on all 1660 * interfaces. 1661 */ 1662 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); 1663 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); 1664 } 1665 1666 /* 1667 * Misc support functions 1668 */ 1669 1670 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1671 { 1672 struct net *net = dev_net(ort->rt6i_dev); 1673 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1674 1675 if (rt) { 1676 rt->dst.input = ort->dst.input; 1677 rt->dst.output = ort->dst.output; 1678 1679 dst_copy_metrics(&rt->dst, &ort->dst); 1680 rt->dst.error = ort->dst.error; 1681 rt->dst.dev = ort->dst.dev; 1682 if (rt->dst.dev) 1683 dev_hold(rt->dst.dev); 1684 rt->rt6i_idev = ort->rt6i_idev; 1685 if (rt->rt6i_idev) 1686 in6_dev_hold(rt->rt6i_idev); 1687 rt->dst.lastuse = jiffies; 1688 rt->rt6i_expires = 0; 1689 1690 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1691 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1692 rt->rt6i_metric = 0; 1693 1694 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1695 #ifdef CONFIG_IPV6_SUBTREES 1696 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1697 #endif 1698 rt->rt6i_table = ort->rt6i_table; 1699 } 1700 return rt; 1701 } 1702 1703 #ifdef CONFIG_IPV6_ROUTE_INFO 1704 static struct rt6_info *rt6_get_route_info(struct net *net, 1705 struct in6_addr *prefix, int prefixlen, 1706 struct in6_addr *gwaddr, int ifindex) 1707 { 1708 struct fib6_node *fn; 1709 struct rt6_info *rt = NULL; 1710 struct fib6_table *table; 1711 1712 table = fib6_get_table(net, RT6_TABLE_INFO); 1713 if (table == NULL) 1714 return NULL; 1715 1716 write_lock_bh(&table->tb6_lock); 1717 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1718 if (!fn) 1719 goto out; 1720 1721 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1722 if (rt->rt6i_dev->ifindex != ifindex) 1723 continue; 1724 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1725 continue; 1726 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1727 continue; 1728 dst_hold(&rt->dst); 1729 break; 1730 } 1731 out: 1732 write_unlock_bh(&table->tb6_lock); 1733 return rt; 1734 } 1735 1736 static struct rt6_info *rt6_add_route_info(struct net *net, 1737 struct in6_addr *prefix, int prefixlen, 1738 struct in6_addr *gwaddr, int ifindex, 1739 unsigned pref) 1740 { 1741 struct fib6_config cfg = { 1742 .fc_table = RT6_TABLE_INFO, 1743 .fc_metric = IP6_RT_PRIO_USER, 1744 .fc_ifindex = ifindex, 1745 .fc_dst_len = prefixlen, 1746 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1747 RTF_UP | RTF_PREF(pref), 1748 .fc_nlinfo.pid = 0, 1749 .fc_nlinfo.nlh = NULL, 1750 .fc_nlinfo.nl_net = net, 1751 }; 1752 1753 ipv6_addr_copy(&cfg.fc_dst, prefix); 1754 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1755 1756 /* We should treat it as a default route if prefix length is 0. */ 1757 if (!prefixlen) 1758 cfg.fc_flags |= RTF_DEFAULT; 1759 1760 ip6_route_add(&cfg); 1761 1762 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1763 } 1764 #endif 1765 1766 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1767 { 1768 struct rt6_info *rt; 1769 struct fib6_table *table; 1770 1771 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1772 if (table == NULL) 1773 return NULL; 1774 1775 write_lock_bh(&table->tb6_lock); 1776 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1777 if (dev == rt->rt6i_dev && 1778 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1779 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1780 break; 1781 } 1782 if (rt) 1783 dst_hold(&rt->dst); 1784 write_unlock_bh(&table->tb6_lock); 1785 return rt; 1786 } 1787 1788 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1789 struct net_device *dev, 1790 unsigned int pref) 1791 { 1792 struct fib6_config cfg = { 1793 .fc_table = RT6_TABLE_DFLT, 1794 .fc_metric = IP6_RT_PRIO_USER, 1795 .fc_ifindex = dev->ifindex, 1796 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1797 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1798 .fc_nlinfo.pid = 0, 1799 .fc_nlinfo.nlh = NULL, 1800 .fc_nlinfo.nl_net = dev_net(dev), 1801 }; 1802 1803 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1804 1805 ip6_route_add(&cfg); 1806 1807 return rt6_get_dflt_router(gwaddr, dev); 1808 } 1809 1810 void rt6_purge_dflt_routers(struct net *net) 1811 { 1812 struct rt6_info *rt; 1813 struct fib6_table *table; 1814 1815 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1816 table = fib6_get_table(net, RT6_TABLE_DFLT); 1817 if (table == NULL) 1818 return; 1819 1820 restart: 1821 read_lock_bh(&table->tb6_lock); 1822 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1823 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1824 dst_hold(&rt->dst); 1825 read_unlock_bh(&table->tb6_lock); 1826 ip6_del_rt(rt); 1827 goto restart; 1828 } 1829 } 1830 read_unlock_bh(&table->tb6_lock); 1831 } 1832 1833 static void rtmsg_to_fib6_config(struct net *net, 1834 struct in6_rtmsg *rtmsg, 1835 struct fib6_config *cfg) 1836 { 1837 memset(cfg, 0, sizeof(*cfg)); 1838 1839 cfg->fc_table = RT6_TABLE_MAIN; 1840 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1841 cfg->fc_metric = rtmsg->rtmsg_metric; 1842 cfg->fc_expires = rtmsg->rtmsg_info; 1843 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1844 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1845 cfg->fc_flags = rtmsg->rtmsg_flags; 1846 1847 cfg->fc_nlinfo.nl_net = net; 1848 1849 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1850 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1851 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1852 } 1853 1854 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1855 { 1856 struct fib6_config cfg; 1857 struct in6_rtmsg rtmsg; 1858 int err; 1859 1860 switch(cmd) { 1861 case SIOCADDRT: /* Add a route */ 1862 case SIOCDELRT: /* Delete a route */ 1863 if (!capable(CAP_NET_ADMIN)) 1864 return -EPERM; 1865 err = copy_from_user(&rtmsg, arg, 1866 sizeof(struct in6_rtmsg)); 1867 if (err) 1868 return -EFAULT; 1869 1870 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1871 1872 rtnl_lock(); 1873 switch (cmd) { 1874 case SIOCADDRT: 1875 err = ip6_route_add(&cfg); 1876 break; 1877 case SIOCDELRT: 1878 err = ip6_route_del(&cfg); 1879 break; 1880 default: 1881 err = -EINVAL; 1882 } 1883 rtnl_unlock(); 1884 1885 return err; 1886 } 1887 1888 return -EINVAL; 1889 } 1890 1891 /* 1892 * Drop the packet on the floor 1893 */ 1894 1895 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1896 { 1897 int type; 1898 struct dst_entry *dst = skb_dst(skb); 1899 switch (ipstats_mib_noroutes) { 1900 case IPSTATS_MIB_INNOROUTES: 1901 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1902 if (type == IPV6_ADDR_ANY) { 1903 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1904 IPSTATS_MIB_INADDRERRORS); 1905 break; 1906 } 1907 /* FALLTHROUGH */ 1908 case IPSTATS_MIB_OUTNOROUTES: 1909 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1910 ipstats_mib_noroutes); 1911 break; 1912 } 1913 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 1914 kfree_skb(skb); 1915 return 0; 1916 } 1917 1918 static int ip6_pkt_discard(struct sk_buff *skb) 1919 { 1920 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1921 } 1922 1923 static int ip6_pkt_discard_out(struct sk_buff *skb) 1924 { 1925 skb->dev = skb_dst(skb)->dev; 1926 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1927 } 1928 1929 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1930 1931 static int ip6_pkt_prohibit(struct sk_buff *skb) 1932 { 1933 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1934 } 1935 1936 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1937 { 1938 skb->dev = skb_dst(skb)->dev; 1939 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1940 } 1941 1942 #endif 1943 1944 /* 1945 * Allocate a dst for local (unicast / anycast) address. 1946 */ 1947 1948 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1949 const struct in6_addr *addr, 1950 int anycast) 1951 { 1952 struct net *net = dev_net(idev->dev); 1953 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops); 1954 struct neighbour *neigh; 1955 1956 if (rt == NULL) { 1957 if (net_ratelimit()) 1958 pr_warning("IPv6: Maximum number of routes reached," 1959 " consider increasing route/max_size.\n"); 1960 return ERR_PTR(-ENOMEM); 1961 } 1962 1963 dev_hold(net->loopback_dev); 1964 in6_dev_hold(idev); 1965 1966 rt->dst.flags = DST_HOST; 1967 rt->dst.input = ip6_input; 1968 rt->dst.output = ip6_output; 1969 rt->rt6i_dev = net->loopback_dev; 1970 rt->rt6i_idev = idev; 1971 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); 1972 rt->dst.obsolete = -1; 1973 1974 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1975 if (anycast) 1976 rt->rt6i_flags |= RTF_ANYCAST; 1977 else 1978 rt->rt6i_flags |= RTF_LOCAL; 1979 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1980 if (IS_ERR(neigh)) { 1981 dst_free(&rt->dst); 1982 1983 /* We are casting this because that is the return 1984 * value type. But an errno encoded pointer is the 1985 * same regardless of the underlying pointer type, 1986 * and that's what we are returning. So this is OK. 1987 */ 1988 return (struct rt6_info *) neigh; 1989 } 1990 rt->rt6i_nexthop = neigh; 1991 1992 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1993 rt->rt6i_dst.plen = 128; 1994 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1995 1996 atomic_set(&rt->dst.__refcnt, 1); 1997 1998 return rt; 1999 } 2000 2001 struct arg_dev_net { 2002 struct net_device *dev; 2003 struct net *net; 2004 }; 2005 2006 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2007 { 2008 const struct arg_dev_net *adn = arg; 2009 const struct net_device *dev = adn->dev; 2010 2011 if ((rt->rt6i_dev == dev || dev == NULL) && 2012 rt != adn->net->ipv6.ip6_null_entry) { 2013 RT6_TRACE("deleted by ifdown %p\n", rt); 2014 return -1; 2015 } 2016 return 0; 2017 } 2018 2019 void rt6_ifdown(struct net *net, struct net_device *dev) 2020 { 2021 struct arg_dev_net adn = { 2022 .dev = dev, 2023 .net = net, 2024 }; 2025 2026 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2027 icmp6_clean_all(fib6_ifdown, &adn); 2028 } 2029 2030 struct rt6_mtu_change_arg 2031 { 2032 struct net_device *dev; 2033 unsigned mtu; 2034 }; 2035 2036 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2037 { 2038 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2039 struct inet6_dev *idev; 2040 2041 /* In IPv6 pmtu discovery is not optional, 2042 so that RTAX_MTU lock cannot disable it. 2043 We still use this lock to block changes 2044 caused by addrconf/ndisc. 2045 */ 2046 2047 idev = __in6_dev_get(arg->dev); 2048 if (idev == NULL) 2049 return 0; 2050 2051 /* For administrative MTU increase, there is no way to discover 2052 IPv6 PMTU increase, so PMTU increase should be updated here. 2053 Since RFC 1981 doesn't include administrative MTU increase 2054 update PMTU increase is a MUST. (i.e. jumbo frame) 2055 */ 2056 /* 2057 If new MTU is less than route PMTU, this new MTU will be the 2058 lowest MTU in the path, update the route PMTU to reflect PMTU 2059 decreases; if new MTU is greater than route PMTU, and the 2060 old MTU is the lowest MTU in the path, update the route PMTU 2061 to reflect the increase. In this case if the other nodes' MTU 2062 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2063 PMTU discouvery. 2064 */ 2065 if (rt->rt6i_dev == arg->dev && 2066 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2067 (dst_mtu(&rt->dst) >= arg->mtu || 2068 (dst_mtu(&rt->dst) < arg->mtu && 2069 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2070 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2071 } 2072 return 0; 2073 } 2074 2075 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2076 { 2077 struct rt6_mtu_change_arg arg = { 2078 .dev = dev, 2079 .mtu = mtu, 2080 }; 2081 2082 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2083 } 2084 2085 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2086 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2087 [RTA_OIF] = { .type = NLA_U32 }, 2088 [RTA_IIF] = { .type = NLA_U32 }, 2089 [RTA_PRIORITY] = { .type = NLA_U32 }, 2090 [RTA_METRICS] = { .type = NLA_NESTED }, 2091 }; 2092 2093 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2094 struct fib6_config *cfg) 2095 { 2096 struct rtmsg *rtm; 2097 struct nlattr *tb[RTA_MAX+1]; 2098 int err; 2099 2100 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2101 if (err < 0) 2102 goto errout; 2103 2104 err = -EINVAL; 2105 rtm = nlmsg_data(nlh); 2106 memset(cfg, 0, sizeof(*cfg)); 2107 2108 cfg->fc_table = rtm->rtm_table; 2109 cfg->fc_dst_len = rtm->rtm_dst_len; 2110 cfg->fc_src_len = rtm->rtm_src_len; 2111 cfg->fc_flags = RTF_UP; 2112 cfg->fc_protocol = rtm->rtm_protocol; 2113 2114 if (rtm->rtm_type == RTN_UNREACHABLE) 2115 cfg->fc_flags |= RTF_REJECT; 2116 2117 if (rtm->rtm_type == RTN_LOCAL) 2118 cfg->fc_flags |= RTF_LOCAL; 2119 2120 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2121 cfg->fc_nlinfo.nlh = nlh; 2122 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2123 2124 if (tb[RTA_GATEWAY]) { 2125 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2126 cfg->fc_flags |= RTF_GATEWAY; 2127 } 2128 2129 if (tb[RTA_DST]) { 2130 int plen = (rtm->rtm_dst_len + 7) >> 3; 2131 2132 if (nla_len(tb[RTA_DST]) < plen) 2133 goto errout; 2134 2135 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2136 } 2137 2138 if (tb[RTA_SRC]) { 2139 int plen = (rtm->rtm_src_len + 7) >> 3; 2140 2141 if (nla_len(tb[RTA_SRC]) < plen) 2142 goto errout; 2143 2144 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2145 } 2146 2147 if (tb[RTA_OIF]) 2148 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2149 2150 if (tb[RTA_PRIORITY]) 2151 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2152 2153 if (tb[RTA_METRICS]) { 2154 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2155 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2156 } 2157 2158 if (tb[RTA_TABLE]) 2159 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2160 2161 err = 0; 2162 errout: 2163 return err; 2164 } 2165 2166 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2167 { 2168 struct fib6_config cfg; 2169 int err; 2170 2171 err = rtm_to_fib6_config(skb, nlh, &cfg); 2172 if (err < 0) 2173 return err; 2174 2175 return ip6_route_del(&cfg); 2176 } 2177 2178 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2179 { 2180 struct fib6_config cfg; 2181 int err; 2182 2183 err = rtm_to_fib6_config(skb, nlh, &cfg); 2184 if (err < 0) 2185 return err; 2186 2187 return ip6_route_add(&cfg); 2188 } 2189 2190 static inline size_t rt6_nlmsg_size(void) 2191 { 2192 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2193 + nla_total_size(16) /* RTA_SRC */ 2194 + nla_total_size(16) /* RTA_DST */ 2195 + nla_total_size(16) /* RTA_GATEWAY */ 2196 + nla_total_size(16) /* RTA_PREFSRC */ 2197 + nla_total_size(4) /* RTA_TABLE */ 2198 + nla_total_size(4) /* RTA_IIF */ 2199 + nla_total_size(4) /* RTA_OIF */ 2200 + nla_total_size(4) /* RTA_PRIORITY */ 2201 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2202 + nla_total_size(sizeof(struct rta_cacheinfo)); 2203 } 2204 2205 static int rt6_fill_node(struct net *net, 2206 struct sk_buff *skb, struct rt6_info *rt, 2207 struct in6_addr *dst, struct in6_addr *src, 2208 int iif, int type, u32 pid, u32 seq, 2209 int prefix, int nowait, unsigned int flags) 2210 { 2211 struct rtmsg *rtm; 2212 struct nlmsghdr *nlh; 2213 long expires; 2214 u32 table; 2215 2216 if (prefix) { /* user wants prefix routes only */ 2217 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2218 /* success since this is not a prefix route */ 2219 return 1; 2220 } 2221 } 2222 2223 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2224 if (nlh == NULL) 2225 return -EMSGSIZE; 2226 2227 rtm = nlmsg_data(nlh); 2228 rtm->rtm_family = AF_INET6; 2229 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2230 rtm->rtm_src_len = rt->rt6i_src.plen; 2231 rtm->rtm_tos = 0; 2232 if (rt->rt6i_table) 2233 table = rt->rt6i_table->tb6_id; 2234 else 2235 table = RT6_TABLE_UNSPEC; 2236 rtm->rtm_table = table; 2237 NLA_PUT_U32(skb, RTA_TABLE, table); 2238 if (rt->rt6i_flags&RTF_REJECT) 2239 rtm->rtm_type = RTN_UNREACHABLE; 2240 else if (rt->rt6i_flags&RTF_LOCAL) 2241 rtm->rtm_type = RTN_LOCAL; 2242 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2243 rtm->rtm_type = RTN_LOCAL; 2244 else 2245 rtm->rtm_type = RTN_UNICAST; 2246 rtm->rtm_flags = 0; 2247 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2248 rtm->rtm_protocol = rt->rt6i_protocol; 2249 if (rt->rt6i_flags&RTF_DYNAMIC) 2250 rtm->rtm_protocol = RTPROT_REDIRECT; 2251 else if (rt->rt6i_flags & RTF_ADDRCONF) 2252 rtm->rtm_protocol = RTPROT_KERNEL; 2253 else if (rt->rt6i_flags&RTF_DEFAULT) 2254 rtm->rtm_protocol = RTPROT_RA; 2255 2256 if (rt->rt6i_flags&RTF_CACHE) 2257 rtm->rtm_flags |= RTM_F_CLONED; 2258 2259 if (dst) { 2260 NLA_PUT(skb, RTA_DST, 16, dst); 2261 rtm->rtm_dst_len = 128; 2262 } else if (rtm->rtm_dst_len) 2263 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2264 #ifdef CONFIG_IPV6_SUBTREES 2265 if (src) { 2266 NLA_PUT(skb, RTA_SRC, 16, src); 2267 rtm->rtm_src_len = 128; 2268 } else if (rtm->rtm_src_len) 2269 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2270 #endif 2271 if (iif) { 2272 #ifdef CONFIG_IPV6_MROUTE 2273 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2274 int err = ip6mr_get_route(net, skb, rtm, nowait); 2275 if (err <= 0) { 2276 if (!nowait) { 2277 if (err == 0) 2278 return 0; 2279 goto nla_put_failure; 2280 } else { 2281 if (err == -EMSGSIZE) 2282 goto nla_put_failure; 2283 } 2284 } 2285 } else 2286 #endif 2287 NLA_PUT_U32(skb, RTA_IIF, iif); 2288 } else if (dst) { 2289 struct inet6_dev *idev = ip6_dst_idev(&rt->dst); 2290 struct in6_addr saddr_buf; 2291 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2292 dst, 0, &saddr_buf) == 0) 2293 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2294 } 2295 2296 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2297 goto nla_put_failure; 2298 2299 if (rt->dst.neighbour) 2300 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key); 2301 2302 if (rt->dst.dev) 2303 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2304 2305 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2306 2307 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2308 expires = 0; 2309 else if (rt->rt6i_expires - jiffies < INT_MAX) 2310 expires = rt->rt6i_expires - jiffies; 2311 else 2312 expires = INT_MAX; 2313 2314 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, 2315 expires, rt->dst.error) < 0) 2316 goto nla_put_failure; 2317 2318 return nlmsg_end(skb, nlh); 2319 2320 nla_put_failure: 2321 nlmsg_cancel(skb, nlh); 2322 return -EMSGSIZE; 2323 } 2324 2325 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2326 { 2327 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2328 int prefix; 2329 2330 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2331 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2332 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2333 } else 2334 prefix = 0; 2335 2336 return rt6_fill_node(arg->net, 2337 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2338 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2339 prefix, 0, NLM_F_MULTI); 2340 } 2341 2342 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2343 { 2344 struct net *net = sock_net(in_skb->sk); 2345 struct nlattr *tb[RTA_MAX+1]; 2346 struct rt6_info *rt; 2347 struct sk_buff *skb; 2348 struct rtmsg *rtm; 2349 struct flowi fl; 2350 int err, iif = 0; 2351 2352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2353 if (err < 0) 2354 goto errout; 2355 2356 err = -EINVAL; 2357 memset(&fl, 0, sizeof(fl)); 2358 2359 if (tb[RTA_SRC]) { 2360 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2361 goto errout; 2362 2363 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2364 } 2365 2366 if (tb[RTA_DST]) { 2367 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2368 goto errout; 2369 2370 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2371 } 2372 2373 if (tb[RTA_IIF]) 2374 iif = nla_get_u32(tb[RTA_IIF]); 2375 2376 if (tb[RTA_OIF]) 2377 fl.oif = nla_get_u32(tb[RTA_OIF]); 2378 2379 if (iif) { 2380 struct net_device *dev; 2381 dev = __dev_get_by_index(net, iif); 2382 if (!dev) { 2383 err = -ENODEV; 2384 goto errout; 2385 } 2386 } 2387 2388 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2389 if (skb == NULL) { 2390 err = -ENOBUFS; 2391 goto errout; 2392 } 2393 2394 /* Reserve room for dummy headers, this skb can pass 2395 through good chunk of routing engine. 2396 */ 2397 skb_reset_mac_header(skb); 2398 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2399 2400 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2401 skb_dst_set(skb, &rt->dst); 2402 2403 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2404 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2405 nlh->nlmsg_seq, 0, 0, 0); 2406 if (err < 0) { 2407 kfree_skb(skb); 2408 goto errout; 2409 } 2410 2411 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2412 errout: 2413 return err; 2414 } 2415 2416 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2417 { 2418 struct sk_buff *skb; 2419 struct net *net = info->nl_net; 2420 u32 seq; 2421 int err; 2422 2423 err = -ENOBUFS; 2424 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2425 2426 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2427 if (skb == NULL) 2428 goto errout; 2429 2430 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2431 event, info->pid, seq, 0, 0, 0); 2432 if (err < 0) { 2433 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2434 WARN_ON(err == -EMSGSIZE); 2435 kfree_skb(skb); 2436 goto errout; 2437 } 2438 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2439 info->nlh, gfp_any()); 2440 return; 2441 errout: 2442 if (err < 0) 2443 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2444 } 2445 2446 static int ip6_route_dev_notify(struct notifier_block *this, 2447 unsigned long event, void *data) 2448 { 2449 struct net_device *dev = (struct net_device *)data; 2450 struct net *net = dev_net(dev); 2451 2452 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2453 net->ipv6.ip6_null_entry->dst.dev = dev; 2454 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2455 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2456 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2457 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2458 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2459 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2460 #endif 2461 } 2462 2463 return NOTIFY_OK; 2464 } 2465 2466 /* 2467 * /proc 2468 */ 2469 2470 #ifdef CONFIG_PROC_FS 2471 2472 struct rt6_proc_arg 2473 { 2474 char *buffer; 2475 int offset; 2476 int length; 2477 int skip; 2478 int len; 2479 }; 2480 2481 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2482 { 2483 struct seq_file *m = p_arg; 2484 2485 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2486 2487 #ifdef CONFIG_IPV6_SUBTREES 2488 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2489 #else 2490 seq_puts(m, "00000000000000000000000000000000 00 "); 2491 #endif 2492 2493 if (rt->rt6i_nexthop) { 2494 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2495 } else { 2496 seq_puts(m, "00000000000000000000000000000000"); 2497 } 2498 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2499 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2500 rt->dst.__use, rt->rt6i_flags, 2501 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2502 return 0; 2503 } 2504 2505 static int ipv6_route_show(struct seq_file *m, void *v) 2506 { 2507 struct net *net = (struct net *)m->private; 2508 fib6_clean_all(net, rt6_info_route, 0, m); 2509 return 0; 2510 } 2511 2512 static int ipv6_route_open(struct inode *inode, struct file *file) 2513 { 2514 return single_open_net(inode, file, ipv6_route_show); 2515 } 2516 2517 static const struct file_operations ipv6_route_proc_fops = { 2518 .owner = THIS_MODULE, 2519 .open = ipv6_route_open, 2520 .read = seq_read, 2521 .llseek = seq_lseek, 2522 .release = single_release_net, 2523 }; 2524 2525 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2526 { 2527 struct net *net = (struct net *)seq->private; 2528 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2529 net->ipv6.rt6_stats->fib_nodes, 2530 net->ipv6.rt6_stats->fib_route_nodes, 2531 net->ipv6.rt6_stats->fib_rt_alloc, 2532 net->ipv6.rt6_stats->fib_rt_entries, 2533 net->ipv6.rt6_stats->fib_rt_cache, 2534 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2535 net->ipv6.rt6_stats->fib_discarded_routes); 2536 2537 return 0; 2538 } 2539 2540 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2541 { 2542 return single_open_net(inode, file, rt6_stats_seq_show); 2543 } 2544 2545 static const struct file_operations rt6_stats_seq_fops = { 2546 .owner = THIS_MODULE, 2547 .open = rt6_stats_seq_open, 2548 .read = seq_read, 2549 .llseek = seq_lseek, 2550 .release = single_release_net, 2551 }; 2552 #endif /* CONFIG_PROC_FS */ 2553 2554 #ifdef CONFIG_SYSCTL 2555 2556 static 2557 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2558 void __user *buffer, size_t *lenp, loff_t *ppos) 2559 { 2560 struct net *net = current->nsproxy->net_ns; 2561 int delay = net->ipv6.sysctl.flush_delay; 2562 if (write) { 2563 proc_dointvec(ctl, write, buffer, lenp, ppos); 2564 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2565 return 0; 2566 } else 2567 return -EINVAL; 2568 } 2569 2570 ctl_table ipv6_route_table_template[] = { 2571 { 2572 .procname = "flush", 2573 .data = &init_net.ipv6.sysctl.flush_delay, 2574 .maxlen = sizeof(int), 2575 .mode = 0200, 2576 .proc_handler = ipv6_sysctl_rtcache_flush 2577 }, 2578 { 2579 .procname = "gc_thresh", 2580 .data = &ip6_dst_ops_template.gc_thresh, 2581 .maxlen = sizeof(int), 2582 .mode = 0644, 2583 .proc_handler = proc_dointvec, 2584 }, 2585 { 2586 .procname = "max_size", 2587 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2588 .maxlen = sizeof(int), 2589 .mode = 0644, 2590 .proc_handler = proc_dointvec, 2591 }, 2592 { 2593 .procname = "gc_min_interval", 2594 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2595 .maxlen = sizeof(int), 2596 .mode = 0644, 2597 .proc_handler = proc_dointvec_jiffies, 2598 }, 2599 { 2600 .procname = "gc_timeout", 2601 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2602 .maxlen = sizeof(int), 2603 .mode = 0644, 2604 .proc_handler = proc_dointvec_jiffies, 2605 }, 2606 { 2607 .procname = "gc_interval", 2608 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2609 .maxlen = sizeof(int), 2610 .mode = 0644, 2611 .proc_handler = proc_dointvec_jiffies, 2612 }, 2613 { 2614 .procname = "gc_elasticity", 2615 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2616 .maxlen = sizeof(int), 2617 .mode = 0644, 2618 .proc_handler = proc_dointvec, 2619 }, 2620 { 2621 .procname = "mtu_expires", 2622 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2623 .maxlen = sizeof(int), 2624 .mode = 0644, 2625 .proc_handler = proc_dointvec_jiffies, 2626 }, 2627 { 2628 .procname = "min_adv_mss", 2629 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2630 .maxlen = sizeof(int), 2631 .mode = 0644, 2632 .proc_handler = proc_dointvec, 2633 }, 2634 { 2635 .procname = "gc_min_interval_ms", 2636 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2637 .maxlen = sizeof(int), 2638 .mode = 0644, 2639 .proc_handler = proc_dointvec_ms_jiffies, 2640 }, 2641 { } 2642 }; 2643 2644 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2645 { 2646 struct ctl_table *table; 2647 2648 table = kmemdup(ipv6_route_table_template, 2649 sizeof(ipv6_route_table_template), 2650 GFP_KERNEL); 2651 2652 if (table) { 2653 table[0].data = &net->ipv6.sysctl.flush_delay; 2654 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2655 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2656 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2657 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2658 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2659 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2660 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2661 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2662 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2663 } 2664 2665 return table; 2666 } 2667 #endif 2668 2669 static int __net_init ip6_route_net_init(struct net *net) 2670 { 2671 int ret = -ENOMEM; 2672 2673 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2674 sizeof(net->ipv6.ip6_dst_ops)); 2675 2676 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2677 goto out_ip6_dst_ops; 2678 2679 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2680 sizeof(*net->ipv6.ip6_null_entry), 2681 GFP_KERNEL); 2682 if (!net->ipv6.ip6_null_entry) 2683 goto out_ip6_dst_entries; 2684 net->ipv6.ip6_null_entry->dst.path = 2685 (struct dst_entry *)net->ipv6.ip6_null_entry; 2686 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2687 dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255); 2688 2689 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2690 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2691 sizeof(*net->ipv6.ip6_prohibit_entry), 2692 GFP_KERNEL); 2693 if (!net->ipv6.ip6_prohibit_entry) 2694 goto out_ip6_null_entry; 2695 net->ipv6.ip6_prohibit_entry->dst.path = 2696 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2697 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2698 dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255); 2699 2700 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2701 sizeof(*net->ipv6.ip6_blk_hole_entry), 2702 GFP_KERNEL); 2703 if (!net->ipv6.ip6_blk_hole_entry) 2704 goto out_ip6_prohibit_entry; 2705 net->ipv6.ip6_blk_hole_entry->dst.path = 2706 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2707 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2708 dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255); 2709 #endif 2710 2711 net->ipv6.sysctl.flush_delay = 0; 2712 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2713 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2714 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2715 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2716 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2717 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2718 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2719 2720 #ifdef CONFIG_PROC_FS 2721 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2722 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2723 #endif 2724 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2725 2726 ret = 0; 2727 out: 2728 return ret; 2729 2730 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2731 out_ip6_prohibit_entry: 2732 kfree(net->ipv6.ip6_prohibit_entry); 2733 out_ip6_null_entry: 2734 kfree(net->ipv6.ip6_null_entry); 2735 #endif 2736 out_ip6_dst_entries: 2737 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2738 out_ip6_dst_ops: 2739 goto out; 2740 } 2741 2742 static void __net_exit ip6_route_net_exit(struct net *net) 2743 { 2744 #ifdef CONFIG_PROC_FS 2745 proc_net_remove(net, "ipv6_route"); 2746 proc_net_remove(net, "rt6_stats"); 2747 #endif 2748 kfree(net->ipv6.ip6_null_entry); 2749 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2750 kfree(net->ipv6.ip6_prohibit_entry); 2751 kfree(net->ipv6.ip6_blk_hole_entry); 2752 #endif 2753 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2754 } 2755 2756 static struct pernet_operations ip6_route_net_ops = { 2757 .init = ip6_route_net_init, 2758 .exit = ip6_route_net_exit, 2759 }; 2760 2761 static struct notifier_block ip6_route_dev_notifier = { 2762 .notifier_call = ip6_route_dev_notify, 2763 .priority = 0, 2764 }; 2765 2766 int __init ip6_route_init(void) 2767 { 2768 int ret; 2769 2770 ret = -ENOMEM; 2771 ip6_dst_ops_template.kmem_cachep = 2772 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2773 SLAB_HWCACHE_ALIGN, NULL); 2774 if (!ip6_dst_ops_template.kmem_cachep) 2775 goto out; 2776 2777 ret = dst_entries_init(&ip6_dst_blackhole_ops); 2778 if (ret) 2779 goto out_kmem_cache; 2780 2781 ret = register_pernet_subsys(&ip6_route_net_ops); 2782 if (ret) 2783 goto out_dst_entries; 2784 2785 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2786 2787 /* Registering of the loopback is done before this portion of code, 2788 * the loopback reference in rt6_info will not be taken, do it 2789 * manually for init_net */ 2790 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 2791 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2792 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2793 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 2794 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2795 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 2796 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2797 #endif 2798 ret = fib6_init(); 2799 if (ret) 2800 goto out_register_subsys; 2801 2802 ret = xfrm6_init(); 2803 if (ret) 2804 goto out_fib6_init; 2805 2806 ret = fib6_rules_init(); 2807 if (ret) 2808 goto xfrm6_init; 2809 2810 ret = -ENOBUFS; 2811 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2812 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2813 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2814 goto fib6_rules_init; 2815 2816 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2817 if (ret) 2818 goto fib6_rules_init; 2819 2820 out: 2821 return ret; 2822 2823 fib6_rules_init: 2824 fib6_rules_cleanup(); 2825 xfrm6_init: 2826 xfrm6_fini(); 2827 out_fib6_init: 2828 fib6_gc_cleanup(); 2829 out_register_subsys: 2830 unregister_pernet_subsys(&ip6_route_net_ops); 2831 out_dst_entries: 2832 dst_entries_destroy(&ip6_dst_blackhole_ops); 2833 out_kmem_cache: 2834 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2835 goto out; 2836 } 2837 2838 void ip6_route_cleanup(void) 2839 { 2840 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2841 fib6_rules_cleanup(); 2842 xfrm6_fini(); 2843 fib6_gc_cleanup(); 2844 unregister_pernet_subsys(&ip6_route_net_ops); 2845 dst_entries_destroy(&ip6_dst_blackhole_ops); 2846 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2847 } 2848