1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <net/net_namespace.h> 44 #include <net/snmp.h> 45 #include <net/ipv6.h> 46 #include <net/ip6_fib.h> 47 #include <net/ip6_route.h> 48 #include <net/ndisc.h> 49 #include <net/addrconf.h> 50 #include <net/tcp.h> 51 #include <linux/rtnetlink.h> 52 #include <net/dst.h> 53 #include <net/xfrm.h> 54 #include <net/netevent.h> 55 #include <net/netlink.h> 56 57 #include <asm/uaccess.h> 58 59 #ifdef CONFIG_SYSCTL 60 #include <linux/sysctl.h> 61 #endif 62 63 /* Set to 3 to get tracing. */ 64 #define RT6_DEBUG 2 65 66 #if RT6_DEBUG >= 3 67 #define RDBG(x) printk x 68 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 69 #else 70 #define RDBG(x) 71 #define RT6_TRACE(x...) do { ; } while (0) 72 #endif 73 74 #define CLONE_OFFLINK_ROUTE 0 75 76 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 79 static void ip6_dst_destroy(struct dst_entry *); 80 static void ip6_dst_ifdown(struct dst_entry *, 81 struct net_device *dev, int how); 82 static int ip6_dst_gc(struct dst_ops *ops); 83 84 static int ip6_pkt_discard(struct sk_buff *skb); 85 static int ip6_pkt_discard_out(struct sk_buff *skb); 86 static void ip6_link_failure(struct sk_buff *skb); 87 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 88 89 #ifdef CONFIG_IPV6_ROUTE_INFO 90 static struct rt6_info *rt6_add_route_info(struct net *net, 91 struct in6_addr *prefix, int prefixlen, 92 struct in6_addr *gwaddr, int ifindex, 93 unsigned pref); 94 static struct rt6_info *rt6_get_route_info(struct net *net, 95 struct in6_addr *prefix, int prefixlen, 96 struct in6_addr *gwaddr, int ifindex); 97 #endif 98 99 static struct dst_ops ip6_dst_ops_template = { 100 .family = AF_INET6, 101 .protocol = cpu_to_be16(ETH_P_IPV6), 102 .gc = ip6_dst_gc, 103 .gc_thresh = 1024, 104 .check = ip6_dst_check, 105 .destroy = ip6_dst_destroy, 106 .ifdown = ip6_dst_ifdown, 107 .negative_advice = ip6_negative_advice, 108 .link_failure = ip6_link_failure, 109 .update_pmtu = ip6_rt_update_pmtu, 110 .local_out = __ip6_local_out, 111 .entries = ATOMIC_INIT(0), 112 }; 113 114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 115 { 116 } 117 118 static struct dst_ops ip6_dst_blackhole_ops = { 119 .family = AF_INET6, 120 .protocol = cpu_to_be16(ETH_P_IPV6), 121 .destroy = ip6_dst_destroy, 122 .check = ip6_dst_check, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu, 124 .entries = ATOMIC_INIT(0), 125 }; 126 127 static struct rt6_info ip6_null_entry_template = { 128 .u = { 129 .dst = { 130 .__refcnt = ATOMIC_INIT(1), 131 .__use = 1, 132 .obsolete = -1, 133 .error = -ENETUNREACH, 134 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 135 .input = ip6_pkt_discard, 136 .output = ip6_pkt_discard_out, 137 } 138 }, 139 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 140 .rt6i_metric = ~(u32) 0, 141 .rt6i_ref = ATOMIC_INIT(1), 142 }; 143 144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 145 146 static int ip6_pkt_prohibit(struct sk_buff *skb); 147 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 148 149 static struct rt6_info ip6_prohibit_entry_template = { 150 .u = { 151 .dst = { 152 .__refcnt = ATOMIC_INIT(1), 153 .__use = 1, 154 .obsolete = -1, 155 .error = -EACCES, 156 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 157 .input = ip6_pkt_prohibit, 158 .output = ip6_pkt_prohibit_out, 159 } 160 }, 161 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 162 .rt6i_metric = ~(u32) 0, 163 .rt6i_ref = ATOMIC_INIT(1), 164 }; 165 166 static struct rt6_info ip6_blk_hole_entry_template = { 167 .u = { 168 .dst = { 169 .__refcnt = ATOMIC_INIT(1), 170 .__use = 1, 171 .obsolete = -1, 172 .error = -EINVAL, 173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, 174 .input = dst_discard, 175 .output = dst_discard, 176 } 177 }, 178 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 179 .rt6i_metric = ~(u32) 0, 180 .rt6i_ref = ATOMIC_INIT(1), 181 }; 182 183 #endif 184 185 /* allocate dst with ip6_dst_ops */ 186 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops) 187 { 188 return (struct rt6_info *)dst_alloc(ops); 189 } 190 191 static void ip6_dst_destroy(struct dst_entry *dst) 192 { 193 struct rt6_info *rt = (struct rt6_info *)dst; 194 struct inet6_dev *idev = rt->rt6i_idev; 195 196 if (idev != NULL) { 197 rt->rt6i_idev = NULL; 198 in6_dev_put(idev); 199 } 200 } 201 202 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 203 int how) 204 { 205 struct rt6_info *rt = (struct rt6_info *)dst; 206 struct inet6_dev *idev = rt->rt6i_idev; 207 struct net_device *loopback_dev = 208 dev_net(dev)->loopback_dev; 209 210 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 211 struct inet6_dev *loopback_idev = 212 in6_dev_get(loopback_dev); 213 if (loopback_idev != NULL) { 214 rt->rt6i_idev = loopback_idev; 215 in6_dev_put(idev); 216 } 217 } 218 } 219 220 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 221 { 222 return (rt->rt6i_flags & RTF_EXPIRES && 223 time_after(jiffies, rt->rt6i_expires)); 224 } 225 226 static inline int rt6_need_strict(struct in6_addr *daddr) 227 { 228 return (ipv6_addr_type(daddr) & 229 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 230 } 231 232 /* 233 * Route lookup. Any table->tb6_lock is implied. 234 */ 235 236 static inline struct rt6_info *rt6_device_match(struct net *net, 237 struct rt6_info *rt, 238 struct in6_addr *saddr, 239 int oif, 240 int flags) 241 { 242 struct rt6_info *local = NULL; 243 struct rt6_info *sprt; 244 245 if (!oif && ipv6_addr_any(saddr)) 246 goto out; 247 248 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) { 249 struct net_device *dev = sprt->rt6i_dev; 250 251 if (oif) { 252 if (dev->ifindex == oif) 253 return sprt; 254 if (dev->flags & IFF_LOOPBACK) { 255 if (sprt->rt6i_idev == NULL || 256 sprt->rt6i_idev->dev->ifindex != oif) { 257 if (flags & RT6_LOOKUP_F_IFACE && oif) 258 continue; 259 if (local && (!oif || 260 local->rt6i_idev->dev->ifindex == oif)) 261 continue; 262 } 263 local = sprt; 264 } 265 } else { 266 if (ipv6_chk_addr(net, saddr, dev, 267 flags & RT6_LOOKUP_F_IFACE)) 268 return sprt; 269 } 270 } 271 272 if (oif) { 273 if (local) 274 return local; 275 276 if (flags & RT6_LOOKUP_F_IFACE) 277 return net->ipv6.ip6_null_entry; 278 } 279 out: 280 return rt; 281 } 282 283 #ifdef CONFIG_IPV6_ROUTER_PREF 284 static void rt6_probe(struct rt6_info *rt) 285 { 286 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL; 287 /* 288 * Okay, this does not seem to be appropriate 289 * for now, however, we need to check if it 290 * is really so; aka Router Reachability Probing. 291 * 292 * Router Reachability Probe MUST be rate-limited 293 * to no more than one per minute. 294 */ 295 if (!neigh || (neigh->nud_state & NUD_VALID)) 296 return; 297 read_lock_bh(&neigh->lock); 298 if (!(neigh->nud_state & NUD_VALID) && 299 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 300 struct in6_addr mcaddr; 301 struct in6_addr *target; 302 303 neigh->updated = jiffies; 304 read_unlock_bh(&neigh->lock); 305 306 target = (struct in6_addr *)&neigh->primary_key; 307 addrconf_addr_solict_mult(target, &mcaddr); 308 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 309 } else 310 read_unlock_bh(&neigh->lock); 311 } 312 #else 313 static inline void rt6_probe(struct rt6_info *rt) 314 { 315 return; 316 } 317 #endif 318 319 /* 320 * Default Router Selection (RFC 2461 6.3.6) 321 */ 322 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 323 { 324 struct net_device *dev = rt->rt6i_dev; 325 if (!oif || dev->ifindex == oif) 326 return 2; 327 if ((dev->flags & IFF_LOOPBACK) && 328 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 329 return 1; 330 return 0; 331 } 332 333 static inline int rt6_check_neigh(struct rt6_info *rt) 334 { 335 struct neighbour *neigh = rt->rt6i_nexthop; 336 int m; 337 if (rt->rt6i_flags & RTF_NONEXTHOP || 338 !(rt->rt6i_flags & RTF_GATEWAY)) 339 m = 1; 340 else if (neigh) { 341 read_lock_bh(&neigh->lock); 342 if (neigh->nud_state & NUD_VALID) 343 m = 2; 344 #ifdef CONFIG_IPV6_ROUTER_PREF 345 else if (neigh->nud_state & NUD_FAILED) 346 m = 0; 347 #endif 348 else 349 m = 1; 350 read_unlock_bh(&neigh->lock); 351 } else 352 m = 0; 353 return m; 354 } 355 356 static int rt6_score_route(struct rt6_info *rt, int oif, 357 int strict) 358 { 359 int m, n; 360 361 m = rt6_check_dev(rt, oif); 362 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 363 return -1; 364 #ifdef CONFIG_IPV6_ROUTER_PREF 365 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 366 #endif 367 n = rt6_check_neigh(rt); 368 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 369 return -1; 370 return m; 371 } 372 373 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 374 int *mpri, struct rt6_info *match) 375 { 376 int m; 377 378 if (rt6_check_expired(rt)) 379 goto out; 380 381 m = rt6_score_route(rt, oif, strict); 382 if (m < 0) 383 goto out; 384 385 if (m > *mpri) { 386 if (strict & RT6_LOOKUP_F_REACHABLE) 387 rt6_probe(match); 388 *mpri = m; 389 match = rt; 390 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 391 rt6_probe(rt); 392 } 393 394 out: 395 return match; 396 } 397 398 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 399 struct rt6_info *rr_head, 400 u32 metric, int oif, int strict) 401 { 402 struct rt6_info *rt, *match; 403 int mpri = -1; 404 405 match = NULL; 406 for (rt = rr_head; rt && rt->rt6i_metric == metric; 407 rt = rt->u.dst.rt6_next) 408 match = find_match(rt, oif, strict, &mpri, match); 409 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 410 rt = rt->u.dst.rt6_next) 411 match = find_match(rt, oif, strict, &mpri, match); 412 413 return match; 414 } 415 416 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 417 { 418 struct rt6_info *match, *rt0; 419 struct net *net; 420 421 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 422 __func__, fn->leaf, oif); 423 424 rt0 = fn->rr_ptr; 425 if (!rt0) 426 fn->rr_ptr = rt0 = fn->leaf; 427 428 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 429 430 if (!match && 431 (strict & RT6_LOOKUP_F_REACHABLE)) { 432 struct rt6_info *next = rt0->u.dst.rt6_next; 433 434 /* no entries matched; do round-robin */ 435 if (!next || next->rt6i_metric != rt0->rt6i_metric) 436 next = fn->leaf; 437 438 if (next != rt0) 439 fn->rr_ptr = next; 440 } 441 442 RT6_TRACE("%s() => %p\n", 443 __func__, match); 444 445 net = dev_net(rt0->rt6i_dev); 446 return (match ? match : net->ipv6.ip6_null_entry); 447 } 448 449 #ifdef CONFIG_IPV6_ROUTE_INFO 450 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 451 struct in6_addr *gwaddr) 452 { 453 struct net *net = dev_net(dev); 454 struct route_info *rinfo = (struct route_info *) opt; 455 struct in6_addr prefix_buf, *prefix; 456 unsigned int pref; 457 unsigned long lifetime; 458 struct rt6_info *rt; 459 460 if (len < sizeof(struct route_info)) { 461 return -EINVAL; 462 } 463 464 /* Sanity check for prefix_len and length */ 465 if (rinfo->length > 3) { 466 return -EINVAL; 467 } else if (rinfo->prefix_len > 128) { 468 return -EINVAL; 469 } else if (rinfo->prefix_len > 64) { 470 if (rinfo->length < 2) { 471 return -EINVAL; 472 } 473 } else if (rinfo->prefix_len > 0) { 474 if (rinfo->length < 1) { 475 return -EINVAL; 476 } 477 } 478 479 pref = rinfo->route_pref; 480 if (pref == ICMPV6_ROUTER_PREF_INVALID) 481 pref = ICMPV6_ROUTER_PREF_MEDIUM; 482 483 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 484 485 if (rinfo->length == 3) 486 prefix = (struct in6_addr *)rinfo->prefix; 487 else { 488 /* this function is safe */ 489 ipv6_addr_prefix(&prefix_buf, 490 (struct in6_addr *)rinfo->prefix, 491 rinfo->prefix_len); 492 prefix = &prefix_buf; 493 } 494 495 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 496 dev->ifindex); 497 498 if (rt && !lifetime) { 499 ip6_del_rt(rt); 500 rt = NULL; 501 } 502 503 if (!rt && lifetime) 504 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 505 pref); 506 else if (rt) 507 rt->rt6i_flags = RTF_ROUTEINFO | 508 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 509 510 if (rt) { 511 if (!addrconf_finite_timeout(lifetime)) { 512 rt->rt6i_flags &= ~RTF_EXPIRES; 513 } else { 514 rt->rt6i_expires = jiffies + HZ * lifetime; 515 rt->rt6i_flags |= RTF_EXPIRES; 516 } 517 dst_release(&rt->u.dst); 518 } 519 return 0; 520 } 521 #endif 522 523 #define BACKTRACK(__net, saddr) \ 524 do { \ 525 if (rt == __net->ipv6.ip6_null_entry) { \ 526 struct fib6_node *pn; \ 527 while (1) { \ 528 if (fn->fn_flags & RTN_TL_ROOT) \ 529 goto out; \ 530 pn = fn->parent; \ 531 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 532 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 533 else \ 534 fn = pn; \ 535 if (fn->fn_flags & RTN_RTINFO) \ 536 goto restart; \ 537 } \ 538 } \ 539 } while(0) 540 541 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 542 struct fib6_table *table, 543 struct flowi *fl, int flags) 544 { 545 struct fib6_node *fn; 546 struct rt6_info *rt; 547 548 read_lock_bh(&table->tb6_lock); 549 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 550 restart: 551 rt = fn->leaf; 552 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags); 553 BACKTRACK(net, &fl->fl6_src); 554 out: 555 dst_use(&rt->u.dst, jiffies); 556 read_unlock_bh(&table->tb6_lock); 557 return rt; 558 559 } 560 561 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 562 const struct in6_addr *saddr, int oif, int strict) 563 { 564 struct flowi fl = { 565 .oif = oif, 566 .nl_u = { 567 .ip6_u = { 568 .daddr = *daddr, 569 }, 570 }, 571 }; 572 struct dst_entry *dst; 573 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 574 575 if (saddr) { 576 memcpy(&fl.fl6_src, saddr, sizeof(*saddr)); 577 flags |= RT6_LOOKUP_F_HAS_SADDR; 578 } 579 580 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup); 581 if (dst->error == 0) 582 return (struct rt6_info *) dst; 583 584 dst_release(dst); 585 586 return NULL; 587 } 588 589 EXPORT_SYMBOL(rt6_lookup); 590 591 /* ip6_ins_rt is called with FREE table->tb6_lock. 592 It takes new route entry, the addition fails by any reason the 593 route is freed. In any case, if caller does not hold it, it may 594 be destroyed. 595 */ 596 597 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 598 { 599 int err; 600 struct fib6_table *table; 601 602 table = rt->rt6i_table; 603 write_lock_bh(&table->tb6_lock); 604 err = fib6_add(&table->tb6_root, rt, info); 605 write_unlock_bh(&table->tb6_lock); 606 607 return err; 608 } 609 610 int ip6_ins_rt(struct rt6_info *rt) 611 { 612 struct nl_info info = { 613 .nl_net = dev_net(rt->rt6i_dev), 614 }; 615 return __ip6_ins_rt(rt, &info); 616 } 617 618 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, 619 struct in6_addr *saddr) 620 { 621 struct rt6_info *rt; 622 623 /* 624 * Clone the route. 625 */ 626 627 rt = ip6_rt_copy(ort); 628 629 if (rt) { 630 struct neighbour *neigh; 631 int attempts = !in_softirq(); 632 633 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 634 if (rt->rt6i_dst.plen != 128 && 635 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr)) 636 rt->rt6i_flags |= RTF_ANYCAST; 637 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 638 } 639 640 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 641 rt->rt6i_dst.plen = 128; 642 rt->rt6i_flags |= RTF_CACHE; 643 rt->u.dst.flags |= DST_HOST; 644 645 #ifdef CONFIG_IPV6_SUBTREES 646 if (rt->rt6i_src.plen && saddr) { 647 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 648 rt->rt6i_src.plen = 128; 649 } 650 #endif 651 652 retry: 653 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 654 if (IS_ERR(neigh)) { 655 struct net *net = dev_net(rt->rt6i_dev); 656 int saved_rt_min_interval = 657 net->ipv6.sysctl.ip6_rt_gc_min_interval; 658 int saved_rt_elasticity = 659 net->ipv6.sysctl.ip6_rt_gc_elasticity; 660 661 if (attempts-- > 0) { 662 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 663 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 664 665 ip6_dst_gc(net->ipv6.ip6_dst_ops); 666 667 net->ipv6.sysctl.ip6_rt_gc_elasticity = 668 saved_rt_elasticity; 669 net->ipv6.sysctl.ip6_rt_gc_min_interval = 670 saved_rt_min_interval; 671 goto retry; 672 } 673 674 if (net_ratelimit()) 675 printk(KERN_WARNING 676 "Neighbour table overflow.\n"); 677 dst_free(&rt->u.dst); 678 return NULL; 679 } 680 rt->rt6i_nexthop = neigh; 681 682 } 683 684 return rt; 685 } 686 687 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr) 688 { 689 struct rt6_info *rt = ip6_rt_copy(ort); 690 if (rt) { 691 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr); 692 rt->rt6i_dst.plen = 128; 693 rt->rt6i_flags |= RTF_CACHE; 694 rt->u.dst.flags |= DST_HOST; 695 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop); 696 } 697 return rt; 698 } 699 700 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 701 struct flowi *fl, int flags) 702 { 703 struct fib6_node *fn; 704 struct rt6_info *rt, *nrt; 705 int strict = 0; 706 int attempts = 3; 707 int err; 708 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 709 710 strict |= flags & RT6_LOOKUP_F_IFACE; 711 712 relookup: 713 read_lock_bh(&table->tb6_lock); 714 715 restart_2: 716 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 717 718 restart: 719 rt = rt6_select(fn, oif, strict | reachable); 720 721 BACKTRACK(net, &fl->fl6_src); 722 if (rt == net->ipv6.ip6_null_entry || 723 rt->rt6i_flags & RTF_CACHE) 724 goto out; 725 726 dst_hold(&rt->u.dst); 727 read_unlock_bh(&table->tb6_lock); 728 729 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 730 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); 731 else { 732 #if CLONE_OFFLINK_ROUTE 733 nrt = rt6_alloc_clone(rt, &fl->fl6_dst); 734 #else 735 goto out2; 736 #endif 737 } 738 739 dst_release(&rt->u.dst); 740 rt = nrt ? : net->ipv6.ip6_null_entry; 741 742 dst_hold(&rt->u.dst); 743 if (nrt) { 744 err = ip6_ins_rt(nrt); 745 if (!err) 746 goto out2; 747 } 748 749 if (--attempts <= 0) 750 goto out2; 751 752 /* 753 * Race condition! In the gap, when table->tb6_lock was 754 * released someone could insert this route. Relookup. 755 */ 756 dst_release(&rt->u.dst); 757 goto relookup; 758 759 out: 760 if (reachable) { 761 reachable = 0; 762 goto restart_2; 763 } 764 dst_hold(&rt->u.dst); 765 read_unlock_bh(&table->tb6_lock); 766 out2: 767 rt->u.dst.lastuse = jiffies; 768 rt->u.dst.__use++; 769 770 return rt; 771 } 772 773 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 774 struct flowi *fl, int flags) 775 { 776 return ip6_pol_route(net, table, fl->iif, fl, flags); 777 } 778 779 void ip6_route_input(struct sk_buff *skb) 780 { 781 struct ipv6hdr *iph = ipv6_hdr(skb); 782 struct net *net = dev_net(skb->dev); 783 int flags = RT6_LOOKUP_F_HAS_SADDR; 784 struct flowi fl = { 785 .iif = skb->dev->ifindex, 786 .nl_u = { 787 .ip6_u = { 788 .daddr = iph->daddr, 789 .saddr = iph->saddr, 790 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 791 }, 792 }, 793 .mark = skb->mark, 794 .proto = iph->nexthdr, 795 }; 796 797 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 798 flags |= RT6_LOOKUP_F_IFACE; 799 800 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); 801 } 802 803 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 804 struct flowi *fl, int flags) 805 { 806 return ip6_pol_route(net, table, fl->oif, fl, flags); 807 } 808 809 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk, 810 struct flowi *fl) 811 { 812 int flags = 0; 813 814 if (rt6_need_strict(&fl->fl6_dst)) 815 flags |= RT6_LOOKUP_F_IFACE; 816 817 if (!ipv6_addr_any(&fl->fl6_src)) 818 flags |= RT6_LOOKUP_F_HAS_SADDR; 819 else if (sk) { 820 unsigned int prefs = inet6_sk(sk)->srcprefs; 821 if (prefs & IPV6_PREFER_SRC_TMP) 822 flags |= RT6_LOOKUP_F_SRCPREF_TMP; 823 if (prefs & IPV6_PREFER_SRC_PUBLIC) 824 flags |= RT6_LOOKUP_F_SRCPREF_PUBLIC; 825 if (prefs & IPV6_PREFER_SRC_COA) 826 flags |= RT6_LOOKUP_F_SRCPREF_COA; 827 } 828 829 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output); 830 } 831 832 EXPORT_SYMBOL(ip6_route_output); 833 834 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl) 835 { 836 struct rt6_info *ort = (struct rt6_info *) *dstp; 837 struct rt6_info *rt = (struct rt6_info *) 838 dst_alloc(&ip6_dst_blackhole_ops); 839 struct dst_entry *new = NULL; 840 841 if (rt) { 842 new = &rt->u.dst; 843 844 atomic_set(&new->__refcnt, 1); 845 new->__use = 1; 846 new->input = dst_discard; 847 new->output = dst_discard; 848 849 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 850 new->dev = ort->u.dst.dev; 851 if (new->dev) 852 dev_hold(new->dev); 853 rt->rt6i_idev = ort->rt6i_idev; 854 if (rt->rt6i_idev) 855 in6_dev_hold(rt->rt6i_idev); 856 rt->rt6i_expires = 0; 857 858 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 859 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 860 rt->rt6i_metric = 0; 861 862 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 863 #ifdef CONFIG_IPV6_SUBTREES 864 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 865 #endif 866 867 dst_free(new); 868 } 869 870 dst_release(*dstp); 871 *dstp = new; 872 return (new ? 0 : -ENOMEM); 873 } 874 EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 875 876 /* 877 * Destination cache support functions 878 */ 879 880 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 881 { 882 struct rt6_info *rt; 883 884 rt = (struct rt6_info *) dst; 885 886 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) 887 return dst; 888 889 return NULL; 890 } 891 892 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 893 { 894 struct rt6_info *rt = (struct rt6_info *) dst; 895 896 if (rt) { 897 if (rt->rt6i_flags & RTF_CACHE) 898 ip6_del_rt(rt); 899 else 900 dst_release(dst); 901 } 902 return NULL; 903 } 904 905 static void ip6_link_failure(struct sk_buff *skb) 906 { 907 struct rt6_info *rt; 908 909 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); 910 911 rt = (struct rt6_info *) skb->dst; 912 if (rt) { 913 if (rt->rt6i_flags&RTF_CACHE) { 914 dst_set_expires(&rt->u.dst, 0); 915 rt->rt6i_flags |= RTF_EXPIRES; 916 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 917 rt->rt6i_node->fn_sernum = -1; 918 } 919 } 920 921 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 922 { 923 struct rt6_info *rt6 = (struct rt6_info*)dst; 924 925 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 926 rt6->rt6i_flags |= RTF_MODIFIED; 927 if (mtu < IPV6_MIN_MTU) { 928 mtu = IPV6_MIN_MTU; 929 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 930 } 931 dst->metrics[RTAX_MTU-1] = mtu; 932 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst); 933 } 934 } 935 936 static int ipv6_get_mtu(struct net_device *dev); 937 938 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 939 { 940 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 941 942 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 943 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 944 945 /* 946 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 947 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 948 * IPV6_MAXPLEN is also valid and means: "any MSS, 949 * rely only on pmtu discovery" 950 */ 951 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 952 mtu = IPV6_MAXPLEN; 953 return mtu; 954 } 955 956 static struct dst_entry *icmp6_dst_gc_list; 957 static DEFINE_SPINLOCK(icmp6_dst_lock); 958 959 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 960 struct neighbour *neigh, 961 const struct in6_addr *addr) 962 { 963 struct rt6_info *rt; 964 struct inet6_dev *idev = in6_dev_get(dev); 965 struct net *net = dev_net(dev); 966 967 if (unlikely(idev == NULL)) 968 return NULL; 969 970 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 971 if (unlikely(rt == NULL)) { 972 in6_dev_put(idev); 973 goto out; 974 } 975 976 dev_hold(dev); 977 if (neigh) 978 neigh_hold(neigh); 979 else { 980 neigh = ndisc_get_neigh(dev, addr); 981 if (IS_ERR(neigh)) 982 neigh = NULL; 983 } 984 985 rt->rt6i_dev = dev; 986 rt->rt6i_idev = idev; 987 rt->rt6i_nexthop = neigh; 988 atomic_set(&rt->u.dst.__refcnt, 1); 989 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 990 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 991 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 992 rt->u.dst.output = ip6_output; 993 994 #if 0 /* there's no chance to use these for ndisc */ 995 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 996 ? DST_HOST 997 : 0; 998 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 999 rt->rt6i_dst.plen = 128; 1000 #endif 1001 1002 spin_lock_bh(&icmp6_dst_lock); 1003 rt->u.dst.next = icmp6_dst_gc_list; 1004 icmp6_dst_gc_list = &rt->u.dst; 1005 spin_unlock_bh(&icmp6_dst_lock); 1006 1007 fib6_force_start_gc(net); 1008 1009 out: 1010 return &rt->u.dst; 1011 } 1012 1013 int icmp6_dst_gc(void) 1014 { 1015 struct dst_entry *dst, *next, **pprev; 1016 int more = 0; 1017 1018 next = NULL; 1019 1020 spin_lock_bh(&icmp6_dst_lock); 1021 pprev = &icmp6_dst_gc_list; 1022 1023 while ((dst = *pprev) != NULL) { 1024 if (!atomic_read(&dst->__refcnt)) { 1025 *pprev = dst->next; 1026 dst_free(dst); 1027 } else { 1028 pprev = &dst->next; 1029 ++more; 1030 } 1031 } 1032 1033 spin_unlock_bh(&icmp6_dst_lock); 1034 1035 return more; 1036 } 1037 1038 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1039 void *arg) 1040 { 1041 struct dst_entry *dst, **pprev; 1042 1043 spin_lock_bh(&icmp6_dst_lock); 1044 pprev = &icmp6_dst_gc_list; 1045 while ((dst = *pprev) != NULL) { 1046 struct rt6_info *rt = (struct rt6_info *) dst; 1047 if (func(rt, arg)) { 1048 *pprev = dst->next; 1049 dst_free(dst); 1050 } else { 1051 pprev = &dst->next; 1052 } 1053 } 1054 spin_unlock_bh(&icmp6_dst_lock); 1055 } 1056 1057 static int ip6_dst_gc(struct dst_ops *ops) 1058 { 1059 unsigned long now = jiffies; 1060 struct net *net = ops->dst_net; 1061 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1062 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1063 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1064 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1065 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1066 1067 if (time_after(rt_last_gc + rt_min_interval, now) && 1068 atomic_read(&ops->entries) <= rt_max_size) 1069 goto out; 1070 1071 net->ipv6.ip6_rt_gc_expire++; 1072 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1073 net->ipv6.ip6_rt_last_gc = now; 1074 if (atomic_read(&ops->entries) < ops->gc_thresh) 1075 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1076 out: 1077 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1078 return (atomic_read(&ops->entries) > rt_max_size); 1079 } 1080 1081 /* Clean host part of a prefix. Not necessary in radix tree, 1082 but results in cleaner routing tables. 1083 1084 Remove it only when all the things will work! 1085 */ 1086 1087 static int ipv6_get_mtu(struct net_device *dev) 1088 { 1089 int mtu = IPV6_MIN_MTU; 1090 struct inet6_dev *idev; 1091 1092 idev = in6_dev_get(dev); 1093 if (idev) { 1094 mtu = idev->cnf.mtu6; 1095 in6_dev_put(idev); 1096 } 1097 return mtu; 1098 } 1099 1100 int ip6_dst_hoplimit(struct dst_entry *dst) 1101 { 1102 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT); 1103 if (hoplimit < 0) { 1104 struct net_device *dev = dst->dev; 1105 struct inet6_dev *idev = in6_dev_get(dev); 1106 if (idev) { 1107 hoplimit = idev->cnf.hop_limit; 1108 in6_dev_put(idev); 1109 } else 1110 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1111 } 1112 return hoplimit; 1113 } 1114 1115 /* 1116 * 1117 */ 1118 1119 int ip6_route_add(struct fib6_config *cfg) 1120 { 1121 int err; 1122 struct net *net = cfg->fc_nlinfo.nl_net; 1123 struct rt6_info *rt = NULL; 1124 struct net_device *dev = NULL; 1125 struct inet6_dev *idev = NULL; 1126 struct fib6_table *table; 1127 int addr_type; 1128 1129 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1130 return -EINVAL; 1131 #ifndef CONFIG_IPV6_SUBTREES 1132 if (cfg->fc_src_len) 1133 return -EINVAL; 1134 #endif 1135 if (cfg->fc_ifindex) { 1136 err = -ENODEV; 1137 dev = dev_get_by_index(net, cfg->fc_ifindex); 1138 if (!dev) 1139 goto out; 1140 idev = in6_dev_get(dev); 1141 if (!idev) 1142 goto out; 1143 } 1144 1145 if (cfg->fc_metric == 0) 1146 cfg->fc_metric = IP6_RT_PRIO_USER; 1147 1148 table = fib6_new_table(net, cfg->fc_table); 1149 if (table == NULL) { 1150 err = -ENOBUFS; 1151 goto out; 1152 } 1153 1154 rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1155 1156 if (rt == NULL) { 1157 err = -ENOMEM; 1158 goto out; 1159 } 1160 1161 rt->u.dst.obsolete = -1; 1162 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1163 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1164 0; 1165 1166 if (cfg->fc_protocol == RTPROT_UNSPEC) 1167 cfg->fc_protocol = RTPROT_BOOT; 1168 rt->rt6i_protocol = cfg->fc_protocol; 1169 1170 addr_type = ipv6_addr_type(&cfg->fc_dst); 1171 1172 if (addr_type & IPV6_ADDR_MULTICAST) 1173 rt->u.dst.input = ip6_mc_input; 1174 else 1175 rt->u.dst.input = ip6_forward; 1176 1177 rt->u.dst.output = ip6_output; 1178 1179 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1180 rt->rt6i_dst.plen = cfg->fc_dst_len; 1181 if (rt->rt6i_dst.plen == 128) 1182 rt->u.dst.flags = DST_HOST; 1183 1184 #ifdef CONFIG_IPV6_SUBTREES 1185 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1186 rt->rt6i_src.plen = cfg->fc_src_len; 1187 #endif 1188 1189 rt->rt6i_metric = cfg->fc_metric; 1190 1191 /* We cannot add true routes via loopback here, 1192 they would result in kernel looping; promote them to reject routes 1193 */ 1194 if ((cfg->fc_flags & RTF_REJECT) || 1195 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1196 /* hold loopback dev/idev if we haven't done so. */ 1197 if (dev != net->loopback_dev) { 1198 if (dev) { 1199 dev_put(dev); 1200 in6_dev_put(idev); 1201 } 1202 dev = net->loopback_dev; 1203 dev_hold(dev); 1204 idev = in6_dev_get(dev); 1205 if (!idev) { 1206 err = -ENODEV; 1207 goto out; 1208 } 1209 } 1210 rt->u.dst.output = ip6_pkt_discard_out; 1211 rt->u.dst.input = ip6_pkt_discard; 1212 rt->u.dst.error = -ENETUNREACH; 1213 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1214 goto install_route; 1215 } 1216 1217 if (cfg->fc_flags & RTF_GATEWAY) { 1218 struct in6_addr *gw_addr; 1219 int gwa_type; 1220 1221 gw_addr = &cfg->fc_gateway; 1222 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1223 gwa_type = ipv6_addr_type(gw_addr); 1224 1225 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1226 struct rt6_info *grt; 1227 1228 /* IPv6 strictly inhibits using not link-local 1229 addresses as nexthop address. 1230 Otherwise, router will not able to send redirects. 1231 It is very good, but in some (rare!) circumstances 1232 (SIT, PtP, NBMA NOARP links) it is handy to allow 1233 some exceptions. --ANK 1234 */ 1235 err = -EINVAL; 1236 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1237 goto out; 1238 1239 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1240 1241 err = -EHOSTUNREACH; 1242 if (grt == NULL) 1243 goto out; 1244 if (dev) { 1245 if (dev != grt->rt6i_dev) { 1246 dst_release(&grt->u.dst); 1247 goto out; 1248 } 1249 } else { 1250 dev = grt->rt6i_dev; 1251 idev = grt->rt6i_idev; 1252 dev_hold(dev); 1253 in6_dev_hold(grt->rt6i_idev); 1254 } 1255 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1256 err = 0; 1257 dst_release(&grt->u.dst); 1258 1259 if (err) 1260 goto out; 1261 } 1262 err = -EINVAL; 1263 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1264 goto out; 1265 } 1266 1267 err = -ENODEV; 1268 if (dev == NULL) 1269 goto out; 1270 1271 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1272 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1273 if (IS_ERR(rt->rt6i_nexthop)) { 1274 err = PTR_ERR(rt->rt6i_nexthop); 1275 rt->rt6i_nexthop = NULL; 1276 goto out; 1277 } 1278 } 1279 1280 rt->rt6i_flags = cfg->fc_flags; 1281 1282 install_route: 1283 if (cfg->fc_mx) { 1284 struct nlattr *nla; 1285 int remaining; 1286 1287 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1288 int type = nla_type(nla); 1289 1290 if (type) { 1291 if (type > RTAX_MAX) { 1292 err = -EINVAL; 1293 goto out; 1294 } 1295 1296 rt->u.dst.metrics[type - 1] = nla_get_u32(nla); 1297 } 1298 } 1299 } 1300 1301 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0) 1302 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1303 if (!dst_mtu(&rt->u.dst)) 1304 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1305 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS)) 1306 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1307 rt->u.dst.dev = dev; 1308 rt->rt6i_idev = idev; 1309 rt->rt6i_table = table; 1310 1311 cfg->fc_nlinfo.nl_net = dev_net(dev); 1312 1313 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1314 1315 out: 1316 if (dev) 1317 dev_put(dev); 1318 if (idev) 1319 in6_dev_put(idev); 1320 if (rt) 1321 dst_free(&rt->u.dst); 1322 return err; 1323 } 1324 1325 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1326 { 1327 int err; 1328 struct fib6_table *table; 1329 struct net *net = dev_net(rt->rt6i_dev); 1330 1331 if (rt == net->ipv6.ip6_null_entry) 1332 return -ENOENT; 1333 1334 table = rt->rt6i_table; 1335 write_lock_bh(&table->tb6_lock); 1336 1337 err = fib6_del(rt, info); 1338 dst_release(&rt->u.dst); 1339 1340 write_unlock_bh(&table->tb6_lock); 1341 1342 return err; 1343 } 1344 1345 int ip6_del_rt(struct rt6_info *rt) 1346 { 1347 struct nl_info info = { 1348 .nl_net = dev_net(rt->rt6i_dev), 1349 }; 1350 return __ip6_del_rt(rt, &info); 1351 } 1352 1353 static int ip6_route_del(struct fib6_config *cfg) 1354 { 1355 struct fib6_table *table; 1356 struct fib6_node *fn; 1357 struct rt6_info *rt; 1358 int err = -ESRCH; 1359 1360 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1361 if (table == NULL) 1362 return err; 1363 1364 read_lock_bh(&table->tb6_lock); 1365 1366 fn = fib6_locate(&table->tb6_root, 1367 &cfg->fc_dst, cfg->fc_dst_len, 1368 &cfg->fc_src, cfg->fc_src_len); 1369 1370 if (fn) { 1371 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1372 if (cfg->fc_ifindex && 1373 (rt->rt6i_dev == NULL || 1374 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1375 continue; 1376 if (cfg->fc_flags & RTF_GATEWAY && 1377 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1378 continue; 1379 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1380 continue; 1381 dst_hold(&rt->u.dst); 1382 read_unlock_bh(&table->tb6_lock); 1383 1384 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1385 } 1386 } 1387 read_unlock_bh(&table->tb6_lock); 1388 1389 return err; 1390 } 1391 1392 /* 1393 * Handle redirects 1394 */ 1395 struct ip6rd_flowi { 1396 struct flowi fl; 1397 struct in6_addr gateway; 1398 }; 1399 1400 static struct rt6_info *__ip6_route_redirect(struct net *net, 1401 struct fib6_table *table, 1402 struct flowi *fl, 1403 int flags) 1404 { 1405 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; 1406 struct rt6_info *rt; 1407 struct fib6_node *fn; 1408 1409 /* 1410 * Get the "current" route for this destination and 1411 * check if the redirect has come from approriate router. 1412 * 1413 * RFC 2461 specifies that redirects should only be 1414 * accepted if they come from the nexthop to the target. 1415 * Due to the way the routes are chosen, this notion 1416 * is a bit fuzzy and one might need to check all possible 1417 * routes. 1418 */ 1419 1420 read_lock_bh(&table->tb6_lock); 1421 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); 1422 restart: 1423 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1424 /* 1425 * Current route is on-link; redirect is always invalid. 1426 * 1427 * Seems, previous statement is not true. It could 1428 * be node, which looks for us as on-link (f.e. proxy ndisc) 1429 * But then router serving it might decide, that we should 1430 * know truth 8)8) --ANK (980726). 1431 */ 1432 if (rt6_check_expired(rt)) 1433 continue; 1434 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1435 continue; 1436 if (fl->oif != rt->rt6i_dev->ifindex) 1437 continue; 1438 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1439 continue; 1440 break; 1441 } 1442 1443 if (!rt) 1444 rt = net->ipv6.ip6_null_entry; 1445 BACKTRACK(net, &fl->fl6_src); 1446 out: 1447 dst_hold(&rt->u.dst); 1448 1449 read_unlock_bh(&table->tb6_lock); 1450 1451 return rt; 1452 }; 1453 1454 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, 1455 struct in6_addr *src, 1456 struct in6_addr *gateway, 1457 struct net_device *dev) 1458 { 1459 int flags = RT6_LOOKUP_F_HAS_SADDR; 1460 struct net *net = dev_net(dev); 1461 struct ip6rd_flowi rdfl = { 1462 .fl = { 1463 .oif = dev->ifindex, 1464 .nl_u = { 1465 .ip6_u = { 1466 .daddr = *dest, 1467 .saddr = *src, 1468 }, 1469 }, 1470 }, 1471 .gateway = *gateway, 1472 }; 1473 1474 if (rt6_need_strict(dest)) 1475 flags |= RT6_LOOKUP_F_IFACE; 1476 1477 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1478 flags, __ip6_route_redirect); 1479 } 1480 1481 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, 1482 struct in6_addr *saddr, 1483 struct neighbour *neigh, u8 *lladdr, int on_link) 1484 { 1485 struct rt6_info *rt, *nrt = NULL; 1486 struct netevent_redirect netevent; 1487 struct net *net = dev_net(neigh->dev); 1488 1489 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1490 1491 if (rt == net->ipv6.ip6_null_entry) { 1492 if (net_ratelimit()) 1493 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1494 "for redirect target\n"); 1495 goto out; 1496 } 1497 1498 /* 1499 * We have finally decided to accept it. 1500 */ 1501 1502 neigh_update(neigh, lladdr, NUD_STALE, 1503 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1504 NEIGH_UPDATE_F_OVERRIDE| 1505 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1506 NEIGH_UPDATE_F_ISROUTER)) 1507 ); 1508 1509 /* 1510 * Redirect received -> path was valid. 1511 * Look, redirects are sent only in response to data packets, 1512 * so that this nexthop apparently is reachable. --ANK 1513 */ 1514 dst_confirm(&rt->u.dst); 1515 1516 /* Duplicate redirect: silently ignore. */ 1517 if (neigh == rt->u.dst.neighbour) 1518 goto out; 1519 1520 nrt = ip6_rt_copy(rt); 1521 if (nrt == NULL) 1522 goto out; 1523 1524 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1525 if (on_link) 1526 nrt->rt6i_flags &= ~RTF_GATEWAY; 1527 1528 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest); 1529 nrt->rt6i_dst.plen = 128; 1530 nrt->u.dst.flags |= DST_HOST; 1531 1532 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1533 nrt->rt6i_nexthop = neigh_clone(neigh); 1534 /* Reset pmtu, it may be better */ 1535 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1536 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev), 1537 dst_mtu(&nrt->u.dst)); 1538 1539 if (ip6_ins_rt(nrt)) 1540 goto out; 1541 1542 netevent.old = &rt->u.dst; 1543 netevent.new = &nrt->u.dst; 1544 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1545 1546 if (rt->rt6i_flags&RTF_CACHE) { 1547 ip6_del_rt(rt); 1548 return; 1549 } 1550 1551 out: 1552 dst_release(&rt->u.dst); 1553 return; 1554 } 1555 1556 /* 1557 * Handle ICMP "packet too big" messages 1558 * i.e. Path MTU discovery 1559 */ 1560 1561 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, 1562 struct net_device *dev, u32 pmtu) 1563 { 1564 struct rt6_info *rt, *nrt; 1565 struct net *net = dev_net(dev); 1566 int allfrag = 0; 1567 1568 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); 1569 if (rt == NULL) 1570 return; 1571 1572 if (pmtu >= dst_mtu(&rt->u.dst)) 1573 goto out; 1574 1575 if (pmtu < IPV6_MIN_MTU) { 1576 /* 1577 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1578 * MTU (1280) and a fragment header should always be included 1579 * after a node receiving Too Big message reporting PMTU is 1580 * less than the IPv6 Minimum Link MTU. 1581 */ 1582 pmtu = IPV6_MIN_MTU; 1583 allfrag = 1; 1584 } 1585 1586 /* New mtu received -> path was valid. 1587 They are sent only in response to data packets, 1588 so that this nexthop apparently is reachable. --ANK 1589 */ 1590 dst_confirm(&rt->u.dst); 1591 1592 /* Host route. If it is static, it would be better 1593 not to override it, but add new one, so that 1594 when cache entry will expire old pmtu 1595 would return automatically. 1596 */ 1597 if (rt->rt6i_flags & RTF_CACHE) { 1598 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1599 if (allfrag) 1600 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1601 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1602 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1603 goto out; 1604 } 1605 1606 /* Network route. 1607 Two cases are possible: 1608 1. It is connected route. Action: COW 1609 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1610 */ 1611 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1612 nrt = rt6_alloc_cow(rt, daddr, saddr); 1613 else 1614 nrt = rt6_alloc_clone(rt, daddr); 1615 1616 if (nrt) { 1617 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1618 if (allfrag) 1619 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1620 1621 /* According to RFC 1981, detecting PMTU increase shouldn't be 1622 * happened within 5 mins, the recommended timer is 10 mins. 1623 * Here this route expiration time is set to ip6_rt_mtu_expires 1624 * which is 10 mins. After 10 mins the decreased pmtu is expired 1625 * and detecting PMTU increase will be automatically happened. 1626 */ 1627 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1628 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1629 1630 ip6_ins_rt(nrt); 1631 } 1632 out: 1633 dst_release(&rt->u.dst); 1634 } 1635 1636 /* 1637 * Misc support functions 1638 */ 1639 1640 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) 1641 { 1642 struct net *net = dev_net(ort->rt6i_dev); 1643 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1644 1645 if (rt) { 1646 rt->u.dst.input = ort->u.dst.input; 1647 rt->u.dst.output = ort->u.dst.output; 1648 1649 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32)); 1650 rt->u.dst.error = ort->u.dst.error; 1651 rt->u.dst.dev = ort->u.dst.dev; 1652 if (rt->u.dst.dev) 1653 dev_hold(rt->u.dst.dev); 1654 rt->rt6i_idev = ort->rt6i_idev; 1655 if (rt->rt6i_idev) 1656 in6_dev_hold(rt->rt6i_idev); 1657 rt->u.dst.lastuse = jiffies; 1658 rt->rt6i_expires = 0; 1659 1660 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1661 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1662 rt->rt6i_metric = 0; 1663 1664 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1665 #ifdef CONFIG_IPV6_SUBTREES 1666 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1667 #endif 1668 rt->rt6i_table = ort->rt6i_table; 1669 } 1670 return rt; 1671 } 1672 1673 #ifdef CONFIG_IPV6_ROUTE_INFO 1674 static struct rt6_info *rt6_get_route_info(struct net *net, 1675 struct in6_addr *prefix, int prefixlen, 1676 struct in6_addr *gwaddr, int ifindex) 1677 { 1678 struct fib6_node *fn; 1679 struct rt6_info *rt = NULL; 1680 struct fib6_table *table; 1681 1682 table = fib6_get_table(net, RT6_TABLE_INFO); 1683 if (table == NULL) 1684 return NULL; 1685 1686 write_lock_bh(&table->tb6_lock); 1687 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1688 if (!fn) 1689 goto out; 1690 1691 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) { 1692 if (rt->rt6i_dev->ifindex != ifindex) 1693 continue; 1694 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1695 continue; 1696 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1697 continue; 1698 dst_hold(&rt->u.dst); 1699 break; 1700 } 1701 out: 1702 write_unlock_bh(&table->tb6_lock); 1703 return rt; 1704 } 1705 1706 static struct rt6_info *rt6_add_route_info(struct net *net, 1707 struct in6_addr *prefix, int prefixlen, 1708 struct in6_addr *gwaddr, int ifindex, 1709 unsigned pref) 1710 { 1711 struct fib6_config cfg = { 1712 .fc_table = RT6_TABLE_INFO, 1713 .fc_metric = IP6_RT_PRIO_USER, 1714 .fc_ifindex = ifindex, 1715 .fc_dst_len = prefixlen, 1716 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1717 RTF_UP | RTF_PREF(pref), 1718 .fc_nlinfo.pid = 0, 1719 .fc_nlinfo.nlh = NULL, 1720 .fc_nlinfo.nl_net = net, 1721 }; 1722 1723 ipv6_addr_copy(&cfg.fc_dst, prefix); 1724 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1725 1726 /* We should treat it as a default route if prefix length is 0. */ 1727 if (!prefixlen) 1728 cfg.fc_flags |= RTF_DEFAULT; 1729 1730 ip6_route_add(&cfg); 1731 1732 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1733 } 1734 #endif 1735 1736 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) 1737 { 1738 struct rt6_info *rt; 1739 struct fib6_table *table; 1740 1741 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1742 if (table == NULL) 1743 return NULL; 1744 1745 write_lock_bh(&table->tb6_lock); 1746 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) { 1747 if (dev == rt->rt6i_dev && 1748 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1749 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1750 break; 1751 } 1752 if (rt) 1753 dst_hold(&rt->u.dst); 1754 write_unlock_bh(&table->tb6_lock); 1755 return rt; 1756 } 1757 1758 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, 1759 struct net_device *dev, 1760 unsigned int pref) 1761 { 1762 struct fib6_config cfg = { 1763 .fc_table = RT6_TABLE_DFLT, 1764 .fc_metric = IP6_RT_PRIO_USER, 1765 .fc_ifindex = dev->ifindex, 1766 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1767 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1768 .fc_nlinfo.pid = 0, 1769 .fc_nlinfo.nlh = NULL, 1770 .fc_nlinfo.nl_net = dev_net(dev), 1771 }; 1772 1773 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1774 1775 ip6_route_add(&cfg); 1776 1777 return rt6_get_dflt_router(gwaddr, dev); 1778 } 1779 1780 void rt6_purge_dflt_routers(struct net *net) 1781 { 1782 struct rt6_info *rt; 1783 struct fib6_table *table; 1784 1785 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1786 table = fib6_get_table(net, RT6_TABLE_DFLT); 1787 if (table == NULL) 1788 return; 1789 1790 restart: 1791 read_lock_bh(&table->tb6_lock); 1792 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) { 1793 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1794 dst_hold(&rt->u.dst); 1795 read_unlock_bh(&table->tb6_lock); 1796 ip6_del_rt(rt); 1797 goto restart; 1798 } 1799 } 1800 read_unlock_bh(&table->tb6_lock); 1801 } 1802 1803 static void rtmsg_to_fib6_config(struct net *net, 1804 struct in6_rtmsg *rtmsg, 1805 struct fib6_config *cfg) 1806 { 1807 memset(cfg, 0, sizeof(*cfg)); 1808 1809 cfg->fc_table = RT6_TABLE_MAIN; 1810 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1811 cfg->fc_metric = rtmsg->rtmsg_metric; 1812 cfg->fc_expires = rtmsg->rtmsg_info; 1813 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1814 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1815 cfg->fc_flags = rtmsg->rtmsg_flags; 1816 1817 cfg->fc_nlinfo.nl_net = net; 1818 1819 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1820 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1821 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1822 } 1823 1824 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1825 { 1826 struct fib6_config cfg; 1827 struct in6_rtmsg rtmsg; 1828 int err; 1829 1830 switch(cmd) { 1831 case SIOCADDRT: /* Add a route */ 1832 case SIOCDELRT: /* Delete a route */ 1833 if (!capable(CAP_NET_ADMIN)) 1834 return -EPERM; 1835 err = copy_from_user(&rtmsg, arg, 1836 sizeof(struct in6_rtmsg)); 1837 if (err) 1838 return -EFAULT; 1839 1840 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1841 1842 rtnl_lock(); 1843 switch (cmd) { 1844 case SIOCADDRT: 1845 err = ip6_route_add(&cfg); 1846 break; 1847 case SIOCDELRT: 1848 err = ip6_route_del(&cfg); 1849 break; 1850 default: 1851 err = -EINVAL; 1852 } 1853 rtnl_unlock(); 1854 1855 return err; 1856 } 1857 1858 return -EINVAL; 1859 } 1860 1861 /* 1862 * Drop the packet on the floor 1863 */ 1864 1865 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) 1866 { 1867 int type; 1868 struct dst_entry *dst = skb->dst; 1869 switch (ipstats_mib_noroutes) { 1870 case IPSTATS_MIB_INNOROUTES: 1871 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1872 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { 1873 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1874 IPSTATS_MIB_INADDRERRORS); 1875 break; 1876 } 1877 /* FALLTHROUGH */ 1878 case IPSTATS_MIB_OUTNOROUTES: 1879 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1880 ipstats_mib_noroutes); 1881 break; 1882 } 1883 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); 1884 kfree_skb(skb); 1885 return 0; 1886 } 1887 1888 static int ip6_pkt_discard(struct sk_buff *skb) 1889 { 1890 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 1891 } 1892 1893 static int ip6_pkt_discard_out(struct sk_buff *skb) 1894 { 1895 skb->dev = skb->dst->dev; 1896 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 1897 } 1898 1899 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 1900 1901 static int ip6_pkt_prohibit(struct sk_buff *skb) 1902 { 1903 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 1904 } 1905 1906 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 1907 { 1908 skb->dev = skb->dst->dev; 1909 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 1910 } 1911 1912 #endif 1913 1914 /* 1915 * Allocate a dst for local (unicast / anycast) address. 1916 */ 1917 1918 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 1919 const struct in6_addr *addr, 1920 int anycast) 1921 { 1922 struct net *net = dev_net(idev->dev); 1923 struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops); 1924 struct neighbour *neigh; 1925 1926 if (rt == NULL) 1927 return ERR_PTR(-ENOMEM); 1928 1929 dev_hold(net->loopback_dev); 1930 in6_dev_hold(idev); 1931 1932 rt->u.dst.flags = DST_HOST; 1933 rt->u.dst.input = ip6_input; 1934 rt->u.dst.output = ip6_output; 1935 rt->rt6i_dev = net->loopback_dev; 1936 rt->rt6i_idev = idev; 1937 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1938 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1939 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1940 rt->u.dst.obsolete = -1; 1941 1942 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 1943 if (anycast) 1944 rt->rt6i_flags |= RTF_ANYCAST; 1945 else 1946 rt->rt6i_flags |= RTF_LOCAL; 1947 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 1948 if (IS_ERR(neigh)) { 1949 dst_free(&rt->u.dst); 1950 1951 /* We are casting this because that is the return 1952 * value type. But an errno encoded pointer is the 1953 * same regardless of the underlying pointer type, 1954 * and that's what we are returning. So this is OK. 1955 */ 1956 return (struct rt6_info *) neigh; 1957 } 1958 rt->rt6i_nexthop = neigh; 1959 1960 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1961 rt->rt6i_dst.plen = 128; 1962 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1963 1964 atomic_set(&rt->u.dst.__refcnt, 1); 1965 1966 return rt; 1967 } 1968 1969 struct arg_dev_net { 1970 struct net_device *dev; 1971 struct net *net; 1972 }; 1973 1974 static int fib6_ifdown(struct rt6_info *rt, void *arg) 1975 { 1976 struct net_device *dev = ((struct arg_dev_net *)arg)->dev; 1977 struct net *net = ((struct arg_dev_net *)arg)->net; 1978 1979 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 1980 rt != net->ipv6.ip6_null_entry) { 1981 RT6_TRACE("deleted by ifdown %p\n", rt); 1982 return -1; 1983 } 1984 return 0; 1985 } 1986 1987 void rt6_ifdown(struct net *net, struct net_device *dev) 1988 { 1989 struct arg_dev_net adn = { 1990 .dev = dev, 1991 .net = net, 1992 }; 1993 1994 fib6_clean_all(net, fib6_ifdown, 0, &adn); 1995 icmp6_clean_all(fib6_ifdown, &adn); 1996 } 1997 1998 struct rt6_mtu_change_arg 1999 { 2000 struct net_device *dev; 2001 unsigned mtu; 2002 }; 2003 2004 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2005 { 2006 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2007 struct inet6_dev *idev; 2008 struct net *net = dev_net(arg->dev); 2009 2010 /* In IPv6 pmtu discovery is not optional, 2011 so that RTAX_MTU lock cannot disable it. 2012 We still use this lock to block changes 2013 caused by addrconf/ndisc. 2014 */ 2015 2016 idev = __in6_dev_get(arg->dev); 2017 if (idev == NULL) 2018 return 0; 2019 2020 /* For administrative MTU increase, there is no way to discover 2021 IPv6 PMTU increase, so PMTU increase should be updated here. 2022 Since RFC 1981 doesn't include administrative MTU increase 2023 update PMTU increase is a MUST. (i.e. jumbo frame) 2024 */ 2025 /* 2026 If new MTU is less than route PMTU, this new MTU will be the 2027 lowest MTU in the path, update the route PMTU to reflect PMTU 2028 decreases; if new MTU is greater than route PMTU, and the 2029 old MTU is the lowest MTU in the path, update the route PMTU 2030 to reflect the increase. In this case if the other nodes' MTU 2031 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2032 PMTU discouvery. 2033 */ 2034 if (rt->rt6i_dev == arg->dev && 2035 !dst_metric_locked(&rt->u.dst, RTAX_MTU) && 2036 (dst_mtu(&rt->u.dst) >= arg->mtu || 2037 (dst_mtu(&rt->u.dst) < arg->mtu && 2038 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 2039 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 2040 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 2041 } 2042 return 0; 2043 } 2044 2045 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2046 { 2047 struct rt6_mtu_change_arg arg = { 2048 .dev = dev, 2049 .mtu = mtu, 2050 }; 2051 2052 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2053 } 2054 2055 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2056 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2057 [RTA_OIF] = { .type = NLA_U32 }, 2058 [RTA_IIF] = { .type = NLA_U32 }, 2059 [RTA_PRIORITY] = { .type = NLA_U32 }, 2060 [RTA_METRICS] = { .type = NLA_NESTED }, 2061 }; 2062 2063 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2064 struct fib6_config *cfg) 2065 { 2066 struct rtmsg *rtm; 2067 struct nlattr *tb[RTA_MAX+1]; 2068 int err; 2069 2070 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2071 if (err < 0) 2072 goto errout; 2073 2074 err = -EINVAL; 2075 rtm = nlmsg_data(nlh); 2076 memset(cfg, 0, sizeof(*cfg)); 2077 2078 cfg->fc_table = rtm->rtm_table; 2079 cfg->fc_dst_len = rtm->rtm_dst_len; 2080 cfg->fc_src_len = rtm->rtm_src_len; 2081 cfg->fc_flags = RTF_UP; 2082 cfg->fc_protocol = rtm->rtm_protocol; 2083 2084 if (rtm->rtm_type == RTN_UNREACHABLE) 2085 cfg->fc_flags |= RTF_REJECT; 2086 2087 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2088 cfg->fc_nlinfo.nlh = nlh; 2089 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2090 2091 if (tb[RTA_GATEWAY]) { 2092 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2093 cfg->fc_flags |= RTF_GATEWAY; 2094 } 2095 2096 if (tb[RTA_DST]) { 2097 int plen = (rtm->rtm_dst_len + 7) >> 3; 2098 2099 if (nla_len(tb[RTA_DST]) < plen) 2100 goto errout; 2101 2102 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2103 } 2104 2105 if (tb[RTA_SRC]) { 2106 int plen = (rtm->rtm_src_len + 7) >> 3; 2107 2108 if (nla_len(tb[RTA_SRC]) < plen) 2109 goto errout; 2110 2111 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2112 } 2113 2114 if (tb[RTA_OIF]) 2115 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2116 2117 if (tb[RTA_PRIORITY]) 2118 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2119 2120 if (tb[RTA_METRICS]) { 2121 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2122 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2123 } 2124 2125 if (tb[RTA_TABLE]) 2126 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2127 2128 err = 0; 2129 errout: 2130 return err; 2131 } 2132 2133 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2134 { 2135 struct fib6_config cfg; 2136 int err; 2137 2138 err = rtm_to_fib6_config(skb, nlh, &cfg); 2139 if (err < 0) 2140 return err; 2141 2142 return ip6_route_del(&cfg); 2143 } 2144 2145 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2146 { 2147 struct fib6_config cfg; 2148 int err; 2149 2150 err = rtm_to_fib6_config(skb, nlh, &cfg); 2151 if (err < 0) 2152 return err; 2153 2154 return ip6_route_add(&cfg); 2155 } 2156 2157 static inline size_t rt6_nlmsg_size(void) 2158 { 2159 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2160 + nla_total_size(16) /* RTA_SRC */ 2161 + nla_total_size(16) /* RTA_DST */ 2162 + nla_total_size(16) /* RTA_GATEWAY */ 2163 + nla_total_size(16) /* RTA_PREFSRC */ 2164 + nla_total_size(4) /* RTA_TABLE */ 2165 + nla_total_size(4) /* RTA_IIF */ 2166 + nla_total_size(4) /* RTA_OIF */ 2167 + nla_total_size(4) /* RTA_PRIORITY */ 2168 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2169 + nla_total_size(sizeof(struct rta_cacheinfo)); 2170 } 2171 2172 static int rt6_fill_node(struct net *net, 2173 struct sk_buff *skb, struct rt6_info *rt, 2174 struct in6_addr *dst, struct in6_addr *src, 2175 int iif, int type, u32 pid, u32 seq, 2176 int prefix, int nowait, unsigned int flags) 2177 { 2178 struct rtmsg *rtm; 2179 struct nlmsghdr *nlh; 2180 long expires; 2181 u32 table; 2182 2183 if (prefix) { /* user wants prefix routes only */ 2184 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2185 /* success since this is not a prefix route */ 2186 return 1; 2187 } 2188 } 2189 2190 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2191 if (nlh == NULL) 2192 return -EMSGSIZE; 2193 2194 rtm = nlmsg_data(nlh); 2195 rtm->rtm_family = AF_INET6; 2196 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2197 rtm->rtm_src_len = rt->rt6i_src.plen; 2198 rtm->rtm_tos = 0; 2199 if (rt->rt6i_table) 2200 table = rt->rt6i_table->tb6_id; 2201 else 2202 table = RT6_TABLE_UNSPEC; 2203 rtm->rtm_table = table; 2204 NLA_PUT_U32(skb, RTA_TABLE, table); 2205 if (rt->rt6i_flags&RTF_REJECT) 2206 rtm->rtm_type = RTN_UNREACHABLE; 2207 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2208 rtm->rtm_type = RTN_LOCAL; 2209 else 2210 rtm->rtm_type = RTN_UNICAST; 2211 rtm->rtm_flags = 0; 2212 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2213 rtm->rtm_protocol = rt->rt6i_protocol; 2214 if (rt->rt6i_flags&RTF_DYNAMIC) 2215 rtm->rtm_protocol = RTPROT_REDIRECT; 2216 else if (rt->rt6i_flags & RTF_ADDRCONF) 2217 rtm->rtm_protocol = RTPROT_KERNEL; 2218 else if (rt->rt6i_flags&RTF_DEFAULT) 2219 rtm->rtm_protocol = RTPROT_RA; 2220 2221 if (rt->rt6i_flags&RTF_CACHE) 2222 rtm->rtm_flags |= RTM_F_CLONED; 2223 2224 if (dst) { 2225 NLA_PUT(skb, RTA_DST, 16, dst); 2226 rtm->rtm_dst_len = 128; 2227 } else if (rtm->rtm_dst_len) 2228 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2229 #ifdef CONFIG_IPV6_SUBTREES 2230 if (src) { 2231 NLA_PUT(skb, RTA_SRC, 16, src); 2232 rtm->rtm_src_len = 128; 2233 } else if (rtm->rtm_src_len) 2234 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2235 #endif 2236 if (iif) { 2237 #ifdef CONFIG_IPV6_MROUTE 2238 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2239 int err = ip6mr_get_route(net, skb, rtm, nowait); 2240 if (err <= 0) { 2241 if (!nowait) { 2242 if (err == 0) 2243 return 0; 2244 goto nla_put_failure; 2245 } else { 2246 if (err == -EMSGSIZE) 2247 goto nla_put_failure; 2248 } 2249 } 2250 } else 2251 #endif 2252 NLA_PUT_U32(skb, RTA_IIF, iif); 2253 } else if (dst) { 2254 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst); 2255 struct in6_addr saddr_buf; 2256 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2257 dst, 0, &saddr_buf) == 0) 2258 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2259 } 2260 2261 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 2262 goto nla_put_failure; 2263 2264 if (rt->u.dst.neighbour) 2265 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); 2266 2267 if (rt->u.dst.dev) 2268 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2269 2270 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2271 2272 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2273 expires = 0; 2274 else if (rt->rt6i_expires - jiffies < INT_MAX) 2275 expires = rt->rt6i_expires - jiffies; 2276 else 2277 expires = INT_MAX; 2278 2279 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2280 expires, rt->u.dst.error) < 0) 2281 goto nla_put_failure; 2282 2283 return nlmsg_end(skb, nlh); 2284 2285 nla_put_failure: 2286 nlmsg_cancel(skb, nlh); 2287 return -EMSGSIZE; 2288 } 2289 2290 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2291 { 2292 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2293 int prefix; 2294 2295 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2296 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2297 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2298 } else 2299 prefix = 0; 2300 2301 return rt6_fill_node(arg->net, 2302 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2303 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2304 prefix, 0, NLM_F_MULTI); 2305 } 2306 2307 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2308 { 2309 struct net *net = sock_net(in_skb->sk); 2310 struct nlattr *tb[RTA_MAX+1]; 2311 struct rt6_info *rt; 2312 struct sk_buff *skb; 2313 struct rtmsg *rtm; 2314 struct flowi fl; 2315 int err, iif = 0; 2316 2317 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2318 if (err < 0) 2319 goto errout; 2320 2321 err = -EINVAL; 2322 memset(&fl, 0, sizeof(fl)); 2323 2324 if (tb[RTA_SRC]) { 2325 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2326 goto errout; 2327 2328 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); 2329 } 2330 2331 if (tb[RTA_DST]) { 2332 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2333 goto errout; 2334 2335 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); 2336 } 2337 2338 if (tb[RTA_IIF]) 2339 iif = nla_get_u32(tb[RTA_IIF]); 2340 2341 if (tb[RTA_OIF]) 2342 fl.oif = nla_get_u32(tb[RTA_OIF]); 2343 2344 if (iif) { 2345 struct net_device *dev; 2346 dev = __dev_get_by_index(net, iif); 2347 if (!dev) { 2348 err = -ENODEV; 2349 goto errout; 2350 } 2351 } 2352 2353 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2354 if (skb == NULL) { 2355 err = -ENOBUFS; 2356 goto errout; 2357 } 2358 2359 /* Reserve room for dummy headers, this skb can pass 2360 through good chunk of routing engine. 2361 */ 2362 skb_reset_mac_header(skb); 2363 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2364 2365 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl); 2366 skb->dst = &rt->u.dst; 2367 2368 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, 2369 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2370 nlh->nlmsg_seq, 0, 0, 0); 2371 if (err < 0) { 2372 kfree_skb(skb); 2373 goto errout; 2374 } 2375 2376 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2377 errout: 2378 return err; 2379 } 2380 2381 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2382 { 2383 struct sk_buff *skb; 2384 struct net *net = info->nl_net; 2385 u32 seq; 2386 int err; 2387 2388 err = -ENOBUFS; 2389 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2390 2391 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2392 if (skb == NULL) 2393 goto errout; 2394 2395 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2396 event, info->pid, seq, 0, 0, 0); 2397 if (err < 0) { 2398 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2399 WARN_ON(err == -EMSGSIZE); 2400 kfree_skb(skb); 2401 goto errout; 2402 } 2403 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2404 info->nlh, gfp_any()); 2405 return; 2406 errout: 2407 if (err < 0) 2408 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2409 } 2410 2411 static int ip6_route_dev_notify(struct notifier_block *this, 2412 unsigned long event, void *data) 2413 { 2414 struct net_device *dev = (struct net_device *)data; 2415 struct net *net = dev_net(dev); 2416 2417 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2418 net->ipv6.ip6_null_entry->u.dst.dev = dev; 2419 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2420 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2421 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev; 2422 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2423 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev; 2424 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2425 #endif 2426 } 2427 2428 return NOTIFY_OK; 2429 } 2430 2431 /* 2432 * /proc 2433 */ 2434 2435 #ifdef CONFIG_PROC_FS 2436 2437 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1) 2438 2439 struct rt6_proc_arg 2440 { 2441 char *buffer; 2442 int offset; 2443 int length; 2444 int skip; 2445 int len; 2446 }; 2447 2448 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2449 { 2450 struct seq_file *m = p_arg; 2451 2452 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2453 2454 #ifdef CONFIG_IPV6_SUBTREES 2455 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2456 #else 2457 seq_puts(m, "00000000000000000000000000000000 00 "); 2458 #endif 2459 2460 if (rt->rt6i_nexthop) { 2461 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key); 2462 } else { 2463 seq_puts(m, "00000000000000000000000000000000"); 2464 } 2465 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2466 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), 2467 rt->u.dst.__use, rt->rt6i_flags, 2468 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2469 return 0; 2470 } 2471 2472 static int ipv6_route_show(struct seq_file *m, void *v) 2473 { 2474 struct net *net = (struct net *)m->private; 2475 fib6_clean_all(net, rt6_info_route, 0, m); 2476 return 0; 2477 } 2478 2479 static int ipv6_route_open(struct inode *inode, struct file *file) 2480 { 2481 return single_open_net(inode, file, ipv6_route_show); 2482 } 2483 2484 static const struct file_operations ipv6_route_proc_fops = { 2485 .owner = THIS_MODULE, 2486 .open = ipv6_route_open, 2487 .read = seq_read, 2488 .llseek = seq_lseek, 2489 .release = single_release_net, 2490 }; 2491 2492 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2493 { 2494 struct net *net = (struct net *)seq->private; 2495 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2496 net->ipv6.rt6_stats->fib_nodes, 2497 net->ipv6.rt6_stats->fib_route_nodes, 2498 net->ipv6.rt6_stats->fib_rt_alloc, 2499 net->ipv6.rt6_stats->fib_rt_entries, 2500 net->ipv6.rt6_stats->fib_rt_cache, 2501 atomic_read(&net->ipv6.ip6_dst_ops->entries), 2502 net->ipv6.rt6_stats->fib_discarded_routes); 2503 2504 return 0; 2505 } 2506 2507 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2508 { 2509 return single_open_net(inode, file, rt6_stats_seq_show); 2510 } 2511 2512 static const struct file_operations rt6_stats_seq_fops = { 2513 .owner = THIS_MODULE, 2514 .open = rt6_stats_seq_open, 2515 .read = seq_read, 2516 .llseek = seq_lseek, 2517 .release = single_release_net, 2518 }; 2519 #endif /* CONFIG_PROC_FS */ 2520 2521 #ifdef CONFIG_SYSCTL 2522 2523 static 2524 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp, 2525 void __user *buffer, size_t *lenp, loff_t *ppos) 2526 { 2527 struct net *net = current->nsproxy->net_ns; 2528 int delay = net->ipv6.sysctl.flush_delay; 2529 if (write) { 2530 proc_dointvec(ctl, write, filp, buffer, lenp, ppos); 2531 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2532 return 0; 2533 } else 2534 return -EINVAL; 2535 } 2536 2537 ctl_table ipv6_route_table_template[] = { 2538 { 2539 .procname = "flush", 2540 .data = &init_net.ipv6.sysctl.flush_delay, 2541 .maxlen = sizeof(int), 2542 .mode = 0200, 2543 .proc_handler = ipv6_sysctl_rtcache_flush 2544 }, 2545 { 2546 .ctl_name = NET_IPV6_ROUTE_GC_THRESH, 2547 .procname = "gc_thresh", 2548 .data = &ip6_dst_ops_template.gc_thresh, 2549 .maxlen = sizeof(int), 2550 .mode = 0644, 2551 .proc_handler = proc_dointvec, 2552 }, 2553 { 2554 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE, 2555 .procname = "max_size", 2556 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2557 .maxlen = sizeof(int), 2558 .mode = 0644, 2559 .proc_handler = proc_dointvec, 2560 }, 2561 { 2562 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL, 2563 .procname = "gc_min_interval", 2564 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2565 .maxlen = sizeof(int), 2566 .mode = 0644, 2567 .proc_handler = proc_dointvec_jiffies, 2568 .strategy = sysctl_jiffies, 2569 }, 2570 { 2571 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT, 2572 .procname = "gc_timeout", 2573 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2574 .maxlen = sizeof(int), 2575 .mode = 0644, 2576 .proc_handler = proc_dointvec_jiffies, 2577 .strategy = sysctl_jiffies, 2578 }, 2579 { 2580 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL, 2581 .procname = "gc_interval", 2582 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2583 .maxlen = sizeof(int), 2584 .mode = 0644, 2585 .proc_handler = proc_dointvec_jiffies, 2586 .strategy = sysctl_jiffies, 2587 }, 2588 { 2589 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY, 2590 .procname = "gc_elasticity", 2591 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2592 .maxlen = sizeof(int), 2593 .mode = 0644, 2594 .proc_handler = proc_dointvec_jiffies, 2595 .strategy = sysctl_jiffies, 2596 }, 2597 { 2598 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES, 2599 .procname = "mtu_expires", 2600 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2601 .maxlen = sizeof(int), 2602 .mode = 0644, 2603 .proc_handler = proc_dointvec_jiffies, 2604 .strategy = sysctl_jiffies, 2605 }, 2606 { 2607 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS, 2608 .procname = "min_adv_mss", 2609 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2610 .maxlen = sizeof(int), 2611 .mode = 0644, 2612 .proc_handler = proc_dointvec_jiffies, 2613 .strategy = sysctl_jiffies, 2614 }, 2615 { 2616 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS, 2617 .procname = "gc_min_interval_ms", 2618 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2619 .maxlen = sizeof(int), 2620 .mode = 0644, 2621 .proc_handler = proc_dointvec_ms_jiffies, 2622 .strategy = sysctl_ms_jiffies, 2623 }, 2624 { .ctl_name = 0 } 2625 }; 2626 2627 struct ctl_table *ipv6_route_sysctl_init(struct net *net) 2628 { 2629 struct ctl_table *table; 2630 2631 table = kmemdup(ipv6_route_table_template, 2632 sizeof(ipv6_route_table_template), 2633 GFP_KERNEL); 2634 2635 if (table) { 2636 table[0].data = &net->ipv6.sysctl.flush_delay; 2637 table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh; 2638 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2639 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2640 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2641 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2642 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2643 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2644 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2645 } 2646 2647 return table; 2648 } 2649 #endif 2650 2651 static int ip6_route_net_init(struct net *net) 2652 { 2653 int ret = -ENOMEM; 2654 2655 net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template, 2656 sizeof(*net->ipv6.ip6_dst_ops), 2657 GFP_KERNEL); 2658 if (!net->ipv6.ip6_dst_ops) 2659 goto out; 2660 net->ipv6.ip6_dst_ops->dst_net = hold_net(net); 2661 2662 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2663 sizeof(*net->ipv6.ip6_null_entry), 2664 GFP_KERNEL); 2665 if (!net->ipv6.ip6_null_entry) 2666 goto out_ip6_dst_ops; 2667 net->ipv6.ip6_null_entry->u.dst.path = 2668 (struct dst_entry *)net->ipv6.ip6_null_entry; 2669 net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops; 2670 2671 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2672 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2673 sizeof(*net->ipv6.ip6_prohibit_entry), 2674 GFP_KERNEL); 2675 if (!net->ipv6.ip6_prohibit_entry) 2676 goto out_ip6_null_entry; 2677 net->ipv6.ip6_prohibit_entry->u.dst.path = 2678 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2679 net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; 2680 2681 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2682 sizeof(*net->ipv6.ip6_blk_hole_entry), 2683 GFP_KERNEL); 2684 if (!net->ipv6.ip6_blk_hole_entry) 2685 goto out_ip6_prohibit_entry; 2686 net->ipv6.ip6_blk_hole_entry->u.dst.path = 2687 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2688 net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; 2689 #endif 2690 2691 net->ipv6.sysctl.flush_delay = 0; 2692 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2693 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2694 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2695 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2696 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2697 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2698 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2699 2700 #ifdef CONFIG_PROC_FS 2701 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2702 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2703 #endif 2704 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2705 2706 ret = 0; 2707 out: 2708 return ret; 2709 2710 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2711 out_ip6_prohibit_entry: 2712 kfree(net->ipv6.ip6_prohibit_entry); 2713 out_ip6_null_entry: 2714 kfree(net->ipv6.ip6_null_entry); 2715 #endif 2716 out_ip6_dst_ops: 2717 release_net(net->ipv6.ip6_dst_ops->dst_net); 2718 kfree(net->ipv6.ip6_dst_ops); 2719 goto out; 2720 } 2721 2722 static void ip6_route_net_exit(struct net *net) 2723 { 2724 #ifdef CONFIG_PROC_FS 2725 proc_net_remove(net, "ipv6_route"); 2726 proc_net_remove(net, "rt6_stats"); 2727 #endif 2728 kfree(net->ipv6.ip6_null_entry); 2729 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2730 kfree(net->ipv6.ip6_prohibit_entry); 2731 kfree(net->ipv6.ip6_blk_hole_entry); 2732 #endif 2733 release_net(net->ipv6.ip6_dst_ops->dst_net); 2734 kfree(net->ipv6.ip6_dst_ops); 2735 } 2736 2737 static struct pernet_operations ip6_route_net_ops = { 2738 .init = ip6_route_net_init, 2739 .exit = ip6_route_net_exit, 2740 }; 2741 2742 static struct notifier_block ip6_route_dev_notifier = { 2743 .notifier_call = ip6_route_dev_notify, 2744 .priority = 0, 2745 }; 2746 2747 int __init ip6_route_init(void) 2748 { 2749 int ret; 2750 2751 ret = -ENOMEM; 2752 ip6_dst_ops_template.kmem_cachep = 2753 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2754 SLAB_HWCACHE_ALIGN, NULL); 2755 if (!ip6_dst_ops_template.kmem_cachep) 2756 goto out; 2757 2758 ret = register_pernet_subsys(&ip6_route_net_ops); 2759 if (ret) 2760 goto out_kmem_cache; 2761 2762 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2763 2764 /* Registering of the loopback is done before this portion of code, 2765 * the loopback reference in rt6_info will not be taken, do it 2766 * manually for init_net */ 2767 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev; 2768 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2769 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2770 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev; 2771 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2772 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev; 2773 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2774 #endif 2775 ret = fib6_init(); 2776 if (ret) 2777 goto out_register_subsys; 2778 2779 ret = xfrm6_init(); 2780 if (ret) 2781 goto out_fib6_init; 2782 2783 ret = fib6_rules_init(); 2784 if (ret) 2785 goto xfrm6_init; 2786 2787 ret = -ENOBUFS; 2788 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) || 2789 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) || 2790 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL)) 2791 goto fib6_rules_init; 2792 2793 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2794 if (ret) 2795 goto fib6_rules_init; 2796 2797 out: 2798 return ret; 2799 2800 fib6_rules_init: 2801 fib6_rules_cleanup(); 2802 xfrm6_init: 2803 xfrm6_fini(); 2804 out_fib6_init: 2805 fib6_gc_cleanup(); 2806 out_register_subsys: 2807 unregister_pernet_subsys(&ip6_route_net_ops); 2808 out_kmem_cache: 2809 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2810 goto out; 2811 } 2812 2813 void ip6_route_cleanup(void) 2814 { 2815 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2816 fib6_rules_cleanup(); 2817 xfrm6_fini(); 2818 fib6_gc_cleanup(); 2819 unregister_pernet_subsys(&ip6_route_net_ops); 2820 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2821 } 2822