1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #include <linux/capability.h> 28 #include <linux/errno.h> 29 #include <linux/types.h> 30 #include <linux/times.h> 31 #include <linux/socket.h> 32 #include <linux/sockios.h> 33 #include <linux/net.h> 34 #include <linux/route.h> 35 #include <linux/netdevice.h> 36 #include <linux/in6.h> 37 #include <linux/mroute6.h> 38 #include <linux/init.h> 39 #include <linux/if_arp.h> 40 #include <linux/proc_fs.h> 41 #include <linux/seq_file.h> 42 #include <linux/nsproxy.h> 43 #include <linux/slab.h> 44 #include <net/net_namespace.h> 45 #include <net/snmp.h> 46 #include <net/ipv6.h> 47 #include <net/ip6_fib.h> 48 #include <net/ip6_route.h> 49 #include <net/ndisc.h> 50 #include <net/addrconf.h> 51 #include <net/tcp.h> 52 #include <linux/rtnetlink.h> 53 #include <net/dst.h> 54 #include <net/xfrm.h> 55 #include <net/netevent.h> 56 #include <net/netlink.h> 57 58 #include <asm/uaccess.h> 59 60 #ifdef CONFIG_SYSCTL 61 #include <linux/sysctl.h> 62 #endif 63 64 /* Set to 3 to get tracing. */ 65 #define RT6_DEBUG 2 66 67 #if RT6_DEBUG >= 3 68 #define RDBG(x) printk x 69 #define RT6_TRACE(x...) printk(KERN_DEBUG x) 70 #else 71 #define RDBG(x) 72 #define RT6_TRACE(x...) do { ; } while (0) 73 #endif 74 75 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, 76 const struct in6_addr *dest); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 79 static unsigned int ip6_default_mtu(const struct dst_entry *dst); 80 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 81 static void ip6_dst_destroy(struct dst_entry *); 82 static void ip6_dst_ifdown(struct dst_entry *, 83 struct net_device *dev, int how); 84 static int ip6_dst_gc(struct dst_ops *ops); 85 86 static int ip6_pkt_discard(struct sk_buff *skb); 87 static int ip6_pkt_discard_out(struct sk_buff *skb); 88 static void ip6_link_failure(struct sk_buff *skb); 89 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 90 91 #ifdef CONFIG_IPV6_ROUTE_INFO 92 static struct rt6_info *rt6_add_route_info(struct net *net, 93 const struct in6_addr *prefix, int prefixlen, 94 const struct in6_addr *gwaddr, int ifindex, 95 unsigned pref); 96 static struct rt6_info *rt6_get_route_info(struct net *net, 97 const struct in6_addr *prefix, int prefixlen, 98 const struct in6_addr *gwaddr, int ifindex); 99 #endif 100 101 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 102 { 103 struct rt6_info *rt = (struct rt6_info *) dst; 104 struct inet_peer *peer; 105 u32 *p = NULL; 106 107 if (!(rt->dst.flags & DST_HOST)) 108 return NULL; 109 110 if (!rt->rt6i_peer) 111 rt6_bind_peer(rt, 1); 112 113 peer = rt->rt6i_peer; 114 if (peer) { 115 u32 *old_p = __DST_METRICS_PTR(old); 116 unsigned long prev, new; 117 118 p = peer->metrics; 119 if (inet_metrics_new(peer)) 120 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 121 122 new = (unsigned long) p; 123 prev = cmpxchg(&dst->_metrics, old, new); 124 125 if (prev != old) { 126 p = __DST_METRICS_PTR(prev); 127 if (prev & DST_METRICS_READ_ONLY) 128 p = NULL; 129 } 130 } 131 return p; 132 } 133 134 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr) 135 { 136 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev); 137 } 138 139 static struct dst_ops ip6_dst_ops_template = { 140 .family = AF_INET6, 141 .protocol = cpu_to_be16(ETH_P_IPV6), 142 .gc = ip6_dst_gc, 143 .gc_thresh = 1024, 144 .check = ip6_dst_check, 145 .default_advmss = ip6_default_advmss, 146 .default_mtu = ip6_default_mtu, 147 .cow_metrics = ipv6_cow_metrics, 148 .destroy = ip6_dst_destroy, 149 .ifdown = ip6_dst_ifdown, 150 .negative_advice = ip6_negative_advice, 151 .link_failure = ip6_link_failure, 152 .update_pmtu = ip6_rt_update_pmtu, 153 .local_out = __ip6_local_out, 154 .neigh_lookup = ip6_neigh_lookup, 155 }; 156 157 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst) 158 { 159 return 0; 160 } 161 162 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 163 { 164 } 165 166 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 167 unsigned long old) 168 { 169 return NULL; 170 } 171 172 static struct dst_ops ip6_dst_blackhole_ops = { 173 .family = AF_INET6, 174 .protocol = cpu_to_be16(ETH_P_IPV6), 175 .destroy = ip6_dst_destroy, 176 .check = ip6_dst_check, 177 .default_mtu = ip6_blackhole_default_mtu, 178 .default_advmss = ip6_default_advmss, 179 .update_pmtu = ip6_rt_blackhole_update_pmtu, 180 .cow_metrics = ip6_rt_blackhole_cow_metrics, 181 .neigh_lookup = ip6_neigh_lookup, 182 }; 183 184 static const u32 ip6_template_metrics[RTAX_MAX] = { 185 [RTAX_HOPLIMIT - 1] = 255, 186 }; 187 188 static struct rt6_info ip6_null_entry_template = { 189 .dst = { 190 .__refcnt = ATOMIC_INIT(1), 191 .__use = 1, 192 .obsolete = -1, 193 .error = -ENETUNREACH, 194 .input = ip6_pkt_discard, 195 .output = ip6_pkt_discard_out, 196 }, 197 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 198 .rt6i_protocol = RTPROT_KERNEL, 199 .rt6i_metric = ~(u32) 0, 200 .rt6i_ref = ATOMIC_INIT(1), 201 }; 202 203 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 204 205 static int ip6_pkt_prohibit(struct sk_buff *skb); 206 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 207 208 static struct rt6_info ip6_prohibit_entry_template = { 209 .dst = { 210 .__refcnt = ATOMIC_INIT(1), 211 .__use = 1, 212 .obsolete = -1, 213 .error = -EACCES, 214 .input = ip6_pkt_prohibit, 215 .output = ip6_pkt_prohibit_out, 216 }, 217 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 218 .rt6i_protocol = RTPROT_KERNEL, 219 .rt6i_metric = ~(u32) 0, 220 .rt6i_ref = ATOMIC_INIT(1), 221 }; 222 223 static struct rt6_info ip6_blk_hole_entry_template = { 224 .dst = { 225 .__refcnt = ATOMIC_INIT(1), 226 .__use = 1, 227 .obsolete = -1, 228 .error = -EINVAL, 229 .input = dst_discard, 230 .output = dst_discard, 231 }, 232 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 233 .rt6i_protocol = RTPROT_KERNEL, 234 .rt6i_metric = ~(u32) 0, 235 .rt6i_ref = ATOMIC_INIT(1), 236 }; 237 238 #endif 239 240 /* allocate dst with ip6_dst_ops */ 241 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops, 242 struct net_device *dev, 243 int flags) 244 { 245 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags); 246 247 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); 248 249 return rt; 250 } 251 252 static void ip6_dst_destroy(struct dst_entry *dst) 253 { 254 struct rt6_info *rt = (struct rt6_info *)dst; 255 struct inet6_dev *idev = rt->rt6i_idev; 256 struct inet_peer *peer = rt->rt6i_peer; 257 258 if (!(rt->dst.flags & DST_HOST)) 259 dst_destroy_metrics_generic(dst); 260 261 if (idev != NULL) { 262 rt->rt6i_idev = NULL; 263 in6_dev_put(idev); 264 } 265 if (peer) { 266 rt->rt6i_peer = NULL; 267 inet_putpeer(peer); 268 } 269 } 270 271 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); 272 273 static u32 rt6_peer_genid(void) 274 { 275 return atomic_read(&__rt6_peer_genid); 276 } 277 278 void rt6_bind_peer(struct rt6_info *rt, int create) 279 { 280 struct inet_peer *peer; 281 282 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create); 283 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL) 284 inet_putpeer(peer); 285 else 286 rt->rt6i_peer_genid = rt6_peer_genid(); 287 } 288 289 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 290 int how) 291 { 292 struct rt6_info *rt = (struct rt6_info *)dst; 293 struct inet6_dev *idev = rt->rt6i_idev; 294 struct net_device *loopback_dev = 295 dev_net(dev)->loopback_dev; 296 297 if (dev != loopback_dev && idev != NULL && idev->dev == dev) { 298 struct inet6_dev *loopback_idev = 299 in6_dev_get(loopback_dev); 300 if (loopback_idev != NULL) { 301 rt->rt6i_idev = loopback_idev; 302 in6_dev_put(idev); 303 } 304 } 305 } 306 307 static __inline__ int rt6_check_expired(const struct rt6_info *rt) 308 { 309 return (rt->rt6i_flags & RTF_EXPIRES) && 310 time_after(jiffies, rt->rt6i_expires); 311 } 312 313 static inline int rt6_need_strict(const struct in6_addr *daddr) 314 { 315 return ipv6_addr_type(daddr) & 316 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 317 } 318 319 /* 320 * Route lookup. Any table->tb6_lock is implied. 321 */ 322 323 static inline struct rt6_info *rt6_device_match(struct net *net, 324 struct rt6_info *rt, 325 const struct in6_addr *saddr, 326 int oif, 327 int flags) 328 { 329 struct rt6_info *local = NULL; 330 struct rt6_info *sprt; 331 332 if (!oif && ipv6_addr_any(saddr)) 333 goto out; 334 335 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 336 struct net_device *dev = sprt->rt6i_dev; 337 338 if (oif) { 339 if (dev->ifindex == oif) 340 return sprt; 341 if (dev->flags & IFF_LOOPBACK) { 342 if (sprt->rt6i_idev == NULL || 343 sprt->rt6i_idev->dev->ifindex != oif) { 344 if (flags & RT6_LOOKUP_F_IFACE && oif) 345 continue; 346 if (local && (!oif || 347 local->rt6i_idev->dev->ifindex == oif)) 348 continue; 349 } 350 local = sprt; 351 } 352 } else { 353 if (ipv6_chk_addr(net, saddr, dev, 354 flags & RT6_LOOKUP_F_IFACE)) 355 return sprt; 356 } 357 } 358 359 if (oif) { 360 if (local) 361 return local; 362 363 if (flags & RT6_LOOKUP_F_IFACE) 364 return net->ipv6.ip6_null_entry; 365 } 366 out: 367 return rt; 368 } 369 370 #ifdef CONFIG_IPV6_ROUTER_PREF 371 static void rt6_probe(struct rt6_info *rt) 372 { 373 struct neighbour *neigh; 374 /* 375 * Okay, this does not seem to be appropriate 376 * for now, however, we need to check if it 377 * is really so; aka Router Reachability Probing. 378 * 379 * Router Reachability Probe MUST be rate-limited 380 * to no more than one per minute. 381 */ 382 rcu_read_lock(); 383 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL; 384 if (!neigh || (neigh->nud_state & NUD_VALID)) 385 goto out; 386 read_lock_bh(&neigh->lock); 387 if (!(neigh->nud_state & NUD_VALID) && 388 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 389 struct in6_addr mcaddr; 390 struct in6_addr *target; 391 392 neigh->updated = jiffies; 393 read_unlock_bh(&neigh->lock); 394 395 target = (struct in6_addr *)&neigh->primary_key; 396 addrconf_addr_solict_mult(target, &mcaddr); 397 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL); 398 } else { 399 read_unlock_bh(&neigh->lock); 400 } 401 out: 402 rcu_read_unlock(); 403 } 404 #else 405 static inline void rt6_probe(struct rt6_info *rt) 406 { 407 } 408 #endif 409 410 /* 411 * Default Router Selection (RFC 2461 6.3.6) 412 */ 413 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 414 { 415 struct net_device *dev = rt->rt6i_dev; 416 if (!oif || dev->ifindex == oif) 417 return 2; 418 if ((dev->flags & IFF_LOOPBACK) && 419 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 420 return 1; 421 return 0; 422 } 423 424 static inline int rt6_check_neigh(struct rt6_info *rt) 425 { 426 struct neighbour *neigh; 427 int m; 428 429 rcu_read_lock(); 430 neigh = dst_get_neighbour(&rt->dst); 431 if (rt->rt6i_flags & RTF_NONEXTHOP || 432 !(rt->rt6i_flags & RTF_GATEWAY)) 433 m = 1; 434 else if (neigh) { 435 read_lock_bh(&neigh->lock); 436 if (neigh->nud_state & NUD_VALID) 437 m = 2; 438 #ifdef CONFIG_IPV6_ROUTER_PREF 439 else if (neigh->nud_state & NUD_FAILED) 440 m = 0; 441 #endif 442 else 443 m = 1; 444 read_unlock_bh(&neigh->lock); 445 } else 446 m = 0; 447 rcu_read_unlock(); 448 return m; 449 } 450 451 static int rt6_score_route(struct rt6_info *rt, int oif, 452 int strict) 453 { 454 int m, n; 455 456 m = rt6_check_dev(rt, oif); 457 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 458 return -1; 459 #ifdef CONFIG_IPV6_ROUTER_PREF 460 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 461 #endif 462 n = rt6_check_neigh(rt); 463 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 464 return -1; 465 return m; 466 } 467 468 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 469 int *mpri, struct rt6_info *match) 470 { 471 int m; 472 473 if (rt6_check_expired(rt)) 474 goto out; 475 476 m = rt6_score_route(rt, oif, strict); 477 if (m < 0) 478 goto out; 479 480 if (m > *mpri) { 481 if (strict & RT6_LOOKUP_F_REACHABLE) 482 rt6_probe(match); 483 *mpri = m; 484 match = rt; 485 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 486 rt6_probe(rt); 487 } 488 489 out: 490 return match; 491 } 492 493 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 494 struct rt6_info *rr_head, 495 u32 metric, int oif, int strict) 496 { 497 struct rt6_info *rt, *match; 498 int mpri = -1; 499 500 match = NULL; 501 for (rt = rr_head; rt && rt->rt6i_metric == metric; 502 rt = rt->dst.rt6_next) 503 match = find_match(rt, oif, strict, &mpri, match); 504 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 505 rt = rt->dst.rt6_next) 506 match = find_match(rt, oif, strict, &mpri, match); 507 508 return match; 509 } 510 511 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 512 { 513 struct rt6_info *match, *rt0; 514 struct net *net; 515 516 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n", 517 __func__, fn->leaf, oif); 518 519 rt0 = fn->rr_ptr; 520 if (!rt0) 521 fn->rr_ptr = rt0 = fn->leaf; 522 523 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 524 525 if (!match && 526 (strict & RT6_LOOKUP_F_REACHABLE)) { 527 struct rt6_info *next = rt0->dst.rt6_next; 528 529 /* no entries matched; do round-robin */ 530 if (!next || next->rt6i_metric != rt0->rt6i_metric) 531 next = fn->leaf; 532 533 if (next != rt0) 534 fn->rr_ptr = next; 535 } 536 537 RT6_TRACE("%s() => %p\n", 538 __func__, match); 539 540 net = dev_net(rt0->rt6i_dev); 541 return match ? match : net->ipv6.ip6_null_entry; 542 } 543 544 #ifdef CONFIG_IPV6_ROUTE_INFO 545 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 546 const struct in6_addr *gwaddr) 547 { 548 struct net *net = dev_net(dev); 549 struct route_info *rinfo = (struct route_info *) opt; 550 struct in6_addr prefix_buf, *prefix; 551 unsigned int pref; 552 unsigned long lifetime; 553 struct rt6_info *rt; 554 555 if (len < sizeof(struct route_info)) { 556 return -EINVAL; 557 } 558 559 /* Sanity check for prefix_len and length */ 560 if (rinfo->length > 3) { 561 return -EINVAL; 562 } else if (rinfo->prefix_len > 128) { 563 return -EINVAL; 564 } else if (rinfo->prefix_len > 64) { 565 if (rinfo->length < 2) { 566 return -EINVAL; 567 } 568 } else if (rinfo->prefix_len > 0) { 569 if (rinfo->length < 1) { 570 return -EINVAL; 571 } 572 } 573 574 pref = rinfo->route_pref; 575 if (pref == ICMPV6_ROUTER_PREF_INVALID) 576 return -EINVAL; 577 578 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 579 580 if (rinfo->length == 3) 581 prefix = (struct in6_addr *)rinfo->prefix; 582 else { 583 /* this function is safe */ 584 ipv6_addr_prefix(&prefix_buf, 585 (struct in6_addr *)rinfo->prefix, 586 rinfo->prefix_len); 587 prefix = &prefix_buf; 588 } 589 590 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 591 dev->ifindex); 592 593 if (rt && !lifetime) { 594 ip6_del_rt(rt); 595 rt = NULL; 596 } 597 598 if (!rt && lifetime) 599 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 600 pref); 601 else if (rt) 602 rt->rt6i_flags = RTF_ROUTEINFO | 603 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 604 605 if (rt) { 606 if (!addrconf_finite_timeout(lifetime)) { 607 rt->rt6i_flags &= ~RTF_EXPIRES; 608 } else { 609 rt->rt6i_expires = jiffies + HZ * lifetime; 610 rt->rt6i_flags |= RTF_EXPIRES; 611 } 612 dst_release(&rt->dst); 613 } 614 return 0; 615 } 616 #endif 617 618 #define BACKTRACK(__net, saddr) \ 619 do { \ 620 if (rt == __net->ipv6.ip6_null_entry) { \ 621 struct fib6_node *pn; \ 622 while (1) { \ 623 if (fn->fn_flags & RTN_TL_ROOT) \ 624 goto out; \ 625 pn = fn->parent; \ 626 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 627 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 628 else \ 629 fn = pn; \ 630 if (fn->fn_flags & RTN_RTINFO) \ 631 goto restart; \ 632 } \ 633 } \ 634 } while(0) 635 636 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 637 struct fib6_table *table, 638 struct flowi6 *fl6, int flags) 639 { 640 struct fib6_node *fn; 641 struct rt6_info *rt; 642 643 read_lock_bh(&table->tb6_lock); 644 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 645 restart: 646 rt = fn->leaf; 647 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 648 BACKTRACK(net, &fl6->saddr); 649 out: 650 dst_use(&rt->dst, jiffies); 651 read_unlock_bh(&table->tb6_lock); 652 return rt; 653 654 } 655 656 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 657 const struct in6_addr *saddr, int oif, int strict) 658 { 659 struct flowi6 fl6 = { 660 .flowi6_oif = oif, 661 .daddr = *daddr, 662 }; 663 struct dst_entry *dst; 664 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 665 666 if (saddr) { 667 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 668 flags |= RT6_LOOKUP_F_HAS_SADDR; 669 } 670 671 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 672 if (dst->error == 0) 673 return (struct rt6_info *) dst; 674 675 dst_release(dst); 676 677 return NULL; 678 } 679 680 EXPORT_SYMBOL(rt6_lookup); 681 682 /* ip6_ins_rt is called with FREE table->tb6_lock. 683 It takes new route entry, the addition fails by any reason the 684 route is freed. In any case, if caller does not hold it, it may 685 be destroyed. 686 */ 687 688 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 689 { 690 int err; 691 struct fib6_table *table; 692 693 table = rt->rt6i_table; 694 write_lock_bh(&table->tb6_lock); 695 err = fib6_add(&table->tb6_root, rt, info); 696 write_unlock_bh(&table->tb6_lock); 697 698 return err; 699 } 700 701 int ip6_ins_rt(struct rt6_info *rt) 702 { 703 struct nl_info info = { 704 .nl_net = dev_net(rt->rt6i_dev), 705 }; 706 return __ip6_ins_rt(rt, &info); 707 } 708 709 static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort, 710 const struct in6_addr *daddr, 711 const struct in6_addr *saddr) 712 { 713 struct rt6_info *rt; 714 715 /* 716 * Clone the route. 717 */ 718 719 rt = ip6_rt_copy(ort, daddr); 720 721 if (rt) { 722 struct neighbour *neigh; 723 int attempts = !in_softirq(); 724 725 if (!(rt->rt6i_flags&RTF_GATEWAY)) { 726 if (rt->rt6i_dst.plen != 128 && 727 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 728 rt->rt6i_flags |= RTF_ANYCAST; 729 ipv6_addr_copy(&rt->rt6i_gateway, daddr); 730 } 731 732 rt->rt6i_flags |= RTF_CACHE; 733 734 #ifdef CONFIG_IPV6_SUBTREES 735 if (rt->rt6i_src.plen && saddr) { 736 ipv6_addr_copy(&rt->rt6i_src.addr, saddr); 737 rt->rt6i_src.plen = 128; 738 } 739 #endif 740 741 retry: 742 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 743 if (IS_ERR(neigh)) { 744 struct net *net = dev_net(rt->rt6i_dev); 745 int saved_rt_min_interval = 746 net->ipv6.sysctl.ip6_rt_gc_min_interval; 747 int saved_rt_elasticity = 748 net->ipv6.sysctl.ip6_rt_gc_elasticity; 749 750 if (attempts-- > 0) { 751 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 752 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 753 754 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 755 756 net->ipv6.sysctl.ip6_rt_gc_elasticity = 757 saved_rt_elasticity; 758 net->ipv6.sysctl.ip6_rt_gc_min_interval = 759 saved_rt_min_interval; 760 goto retry; 761 } 762 763 if (net_ratelimit()) 764 printk(KERN_WARNING 765 "ipv6: Neighbour table overflow.\n"); 766 dst_free(&rt->dst); 767 return NULL; 768 } 769 dst_set_neighbour(&rt->dst, neigh); 770 771 } 772 773 return rt; 774 } 775 776 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 777 const struct in6_addr *daddr) 778 { 779 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 780 781 if (rt) { 782 rt->rt6i_flags |= RTF_CACHE; 783 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst))); 784 } 785 return rt; 786 } 787 788 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 789 struct flowi6 *fl6, int flags) 790 { 791 struct fib6_node *fn; 792 struct rt6_info *rt, *nrt; 793 int strict = 0; 794 int attempts = 3; 795 int err; 796 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 797 798 strict |= flags & RT6_LOOKUP_F_IFACE; 799 800 relookup: 801 read_lock_bh(&table->tb6_lock); 802 803 restart_2: 804 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 805 806 restart: 807 rt = rt6_select(fn, oif, strict | reachable); 808 809 BACKTRACK(net, &fl6->saddr); 810 if (rt == net->ipv6.ip6_null_entry || 811 rt->rt6i_flags & RTF_CACHE) 812 goto out; 813 814 dst_hold(&rt->dst); 815 read_unlock_bh(&table->tb6_lock); 816 817 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 818 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 819 else if (!(rt->dst.flags & DST_HOST)) 820 nrt = rt6_alloc_clone(rt, &fl6->daddr); 821 else 822 goto out2; 823 824 dst_release(&rt->dst); 825 rt = nrt ? : net->ipv6.ip6_null_entry; 826 827 dst_hold(&rt->dst); 828 if (nrt) { 829 err = ip6_ins_rt(nrt); 830 if (!err) 831 goto out2; 832 } 833 834 if (--attempts <= 0) 835 goto out2; 836 837 /* 838 * Race condition! In the gap, when table->tb6_lock was 839 * released someone could insert this route. Relookup. 840 */ 841 dst_release(&rt->dst); 842 goto relookup; 843 844 out: 845 if (reachable) { 846 reachable = 0; 847 goto restart_2; 848 } 849 dst_hold(&rt->dst); 850 read_unlock_bh(&table->tb6_lock); 851 out2: 852 rt->dst.lastuse = jiffies; 853 rt->dst.__use++; 854 855 return rt; 856 } 857 858 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 859 struct flowi6 *fl6, int flags) 860 { 861 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 862 } 863 864 void ip6_route_input(struct sk_buff *skb) 865 { 866 const struct ipv6hdr *iph = ipv6_hdr(skb); 867 struct net *net = dev_net(skb->dev); 868 int flags = RT6_LOOKUP_F_HAS_SADDR; 869 struct flowi6 fl6 = { 870 .flowi6_iif = skb->dev->ifindex, 871 .daddr = iph->daddr, 872 .saddr = iph->saddr, 873 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK, 874 .flowi6_mark = skb->mark, 875 .flowi6_proto = iph->nexthdr, 876 }; 877 878 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG) 879 flags |= RT6_LOOKUP_F_IFACE; 880 881 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input)); 882 } 883 884 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 885 struct flowi6 *fl6, int flags) 886 { 887 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 888 } 889 890 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 891 struct flowi6 *fl6) 892 { 893 int flags = 0; 894 895 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 896 flags |= RT6_LOOKUP_F_IFACE; 897 898 if (!ipv6_addr_any(&fl6->saddr)) 899 flags |= RT6_LOOKUP_F_HAS_SADDR; 900 else if (sk) 901 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 902 903 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 904 } 905 906 EXPORT_SYMBOL(ip6_route_output); 907 908 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 909 { 910 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 911 struct dst_entry *new = NULL; 912 913 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); 914 if (rt) { 915 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry)); 916 917 new = &rt->dst; 918 919 new->__use = 1; 920 new->input = dst_discard; 921 new->output = dst_discard; 922 923 if (dst_metrics_read_only(&ort->dst)) 924 new->_metrics = ort->dst._metrics; 925 else 926 dst_copy_metrics(new, &ort->dst); 927 rt->rt6i_idev = ort->rt6i_idev; 928 if (rt->rt6i_idev) 929 in6_dev_hold(rt->rt6i_idev); 930 rt->rt6i_expires = 0; 931 932 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 933 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 934 rt->rt6i_metric = 0; 935 936 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 937 #ifdef CONFIG_IPV6_SUBTREES 938 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 939 #endif 940 941 dst_free(new); 942 } 943 944 dst_release(dst_orig); 945 return new ? new : ERR_PTR(-ENOMEM); 946 } 947 948 /* 949 * Destination cache support functions 950 */ 951 952 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 953 { 954 struct rt6_info *rt; 955 956 rt = (struct rt6_info *) dst; 957 958 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 959 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 960 if (!rt->rt6i_peer) 961 rt6_bind_peer(rt, 0); 962 rt->rt6i_peer_genid = rt6_peer_genid(); 963 } 964 return dst; 965 } 966 return NULL; 967 } 968 969 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 970 { 971 struct rt6_info *rt = (struct rt6_info *) dst; 972 973 if (rt) { 974 if (rt->rt6i_flags & RTF_CACHE) { 975 if (rt6_check_expired(rt)) { 976 ip6_del_rt(rt); 977 dst = NULL; 978 } 979 } else { 980 dst_release(dst); 981 dst = NULL; 982 } 983 } 984 return dst; 985 } 986 987 static void ip6_link_failure(struct sk_buff *skb) 988 { 989 struct rt6_info *rt; 990 991 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 992 993 rt = (struct rt6_info *) skb_dst(skb); 994 if (rt) { 995 if (rt->rt6i_flags&RTF_CACHE) { 996 dst_set_expires(&rt->dst, 0); 997 rt->rt6i_flags |= RTF_EXPIRES; 998 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 999 rt->rt6i_node->fn_sernum = -1; 1000 } 1001 } 1002 1003 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1004 { 1005 struct rt6_info *rt6 = (struct rt6_info*)dst; 1006 1007 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1008 rt6->rt6i_flags |= RTF_MODIFIED; 1009 if (mtu < IPV6_MIN_MTU) { 1010 u32 features = dst_metric(dst, RTAX_FEATURES); 1011 mtu = IPV6_MIN_MTU; 1012 features |= RTAX_FEATURE_ALLFRAG; 1013 dst_metric_set(dst, RTAX_FEATURES, features); 1014 } 1015 dst_metric_set(dst, RTAX_MTU, mtu); 1016 } 1017 } 1018 1019 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1020 { 1021 struct net_device *dev = dst->dev; 1022 unsigned int mtu = dst_mtu(dst); 1023 struct net *net = dev_net(dev); 1024 1025 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1026 1027 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1028 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1029 1030 /* 1031 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1032 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1033 * IPV6_MAXPLEN is also valid and means: "any MSS, 1034 * rely only on pmtu discovery" 1035 */ 1036 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1037 mtu = IPV6_MAXPLEN; 1038 return mtu; 1039 } 1040 1041 static unsigned int ip6_default_mtu(const struct dst_entry *dst) 1042 { 1043 unsigned int mtu = IPV6_MIN_MTU; 1044 struct inet6_dev *idev; 1045 1046 rcu_read_lock(); 1047 idev = __in6_dev_get(dst->dev); 1048 if (idev) 1049 mtu = idev->cnf.mtu6; 1050 rcu_read_unlock(); 1051 1052 return mtu; 1053 } 1054 1055 static struct dst_entry *icmp6_dst_gc_list; 1056 static DEFINE_SPINLOCK(icmp6_dst_lock); 1057 1058 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1059 struct neighbour *neigh, 1060 const struct in6_addr *addr) 1061 { 1062 struct rt6_info *rt; 1063 struct inet6_dev *idev = in6_dev_get(dev); 1064 struct net *net = dev_net(dev); 1065 1066 if (unlikely(idev == NULL)) 1067 return NULL; 1068 1069 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0); 1070 if (unlikely(rt == NULL)) { 1071 in6_dev_put(idev); 1072 goto out; 1073 } 1074 1075 if (neigh) 1076 neigh_hold(neigh); 1077 else { 1078 neigh = ndisc_get_neigh(dev, addr); 1079 if (IS_ERR(neigh)) 1080 neigh = NULL; 1081 } 1082 1083 rt->dst.flags |= DST_HOST; 1084 rt->dst.output = ip6_output; 1085 dst_set_neighbour(&rt->dst, neigh); 1086 atomic_set(&rt->dst.__refcnt, 1); 1087 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1088 1089 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1090 rt->rt6i_dst.plen = 128; 1091 rt->rt6i_idev = idev; 1092 1093 spin_lock_bh(&icmp6_dst_lock); 1094 rt->dst.next = icmp6_dst_gc_list; 1095 icmp6_dst_gc_list = &rt->dst; 1096 spin_unlock_bh(&icmp6_dst_lock); 1097 1098 fib6_force_start_gc(net); 1099 1100 out: 1101 return &rt->dst; 1102 } 1103 1104 int icmp6_dst_gc(void) 1105 { 1106 struct dst_entry *dst, **pprev; 1107 int more = 0; 1108 1109 spin_lock_bh(&icmp6_dst_lock); 1110 pprev = &icmp6_dst_gc_list; 1111 1112 while ((dst = *pprev) != NULL) { 1113 if (!atomic_read(&dst->__refcnt)) { 1114 *pprev = dst->next; 1115 dst_free(dst); 1116 } else { 1117 pprev = &dst->next; 1118 ++more; 1119 } 1120 } 1121 1122 spin_unlock_bh(&icmp6_dst_lock); 1123 1124 return more; 1125 } 1126 1127 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1128 void *arg) 1129 { 1130 struct dst_entry *dst, **pprev; 1131 1132 spin_lock_bh(&icmp6_dst_lock); 1133 pprev = &icmp6_dst_gc_list; 1134 while ((dst = *pprev) != NULL) { 1135 struct rt6_info *rt = (struct rt6_info *) dst; 1136 if (func(rt, arg)) { 1137 *pprev = dst->next; 1138 dst_free(dst); 1139 } else { 1140 pprev = &dst->next; 1141 } 1142 } 1143 spin_unlock_bh(&icmp6_dst_lock); 1144 } 1145 1146 static int ip6_dst_gc(struct dst_ops *ops) 1147 { 1148 unsigned long now = jiffies; 1149 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1150 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1151 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1152 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1153 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1154 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1155 int entries; 1156 1157 entries = dst_entries_get_fast(ops); 1158 if (time_after(rt_last_gc + rt_min_interval, now) && 1159 entries <= rt_max_size) 1160 goto out; 1161 1162 net->ipv6.ip6_rt_gc_expire++; 1163 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1164 net->ipv6.ip6_rt_last_gc = now; 1165 entries = dst_entries_get_slow(ops); 1166 if (entries < ops->gc_thresh) 1167 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1168 out: 1169 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1170 return entries > rt_max_size; 1171 } 1172 1173 /* Clean host part of a prefix. Not necessary in radix tree, 1174 but results in cleaner routing tables. 1175 1176 Remove it only when all the things will work! 1177 */ 1178 1179 int ip6_dst_hoplimit(struct dst_entry *dst) 1180 { 1181 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1182 if (hoplimit == 0) { 1183 struct net_device *dev = dst->dev; 1184 struct inet6_dev *idev; 1185 1186 rcu_read_lock(); 1187 idev = __in6_dev_get(dev); 1188 if (idev) 1189 hoplimit = idev->cnf.hop_limit; 1190 else 1191 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1192 rcu_read_unlock(); 1193 } 1194 return hoplimit; 1195 } 1196 EXPORT_SYMBOL(ip6_dst_hoplimit); 1197 1198 /* 1199 * 1200 */ 1201 1202 int ip6_route_add(struct fib6_config *cfg) 1203 { 1204 int err; 1205 struct net *net = cfg->fc_nlinfo.nl_net; 1206 struct rt6_info *rt = NULL; 1207 struct net_device *dev = NULL; 1208 struct inet6_dev *idev = NULL; 1209 struct fib6_table *table; 1210 int addr_type; 1211 1212 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1213 return -EINVAL; 1214 #ifndef CONFIG_IPV6_SUBTREES 1215 if (cfg->fc_src_len) 1216 return -EINVAL; 1217 #endif 1218 if (cfg->fc_ifindex) { 1219 err = -ENODEV; 1220 dev = dev_get_by_index(net, cfg->fc_ifindex); 1221 if (!dev) 1222 goto out; 1223 idev = in6_dev_get(dev); 1224 if (!idev) 1225 goto out; 1226 } 1227 1228 if (cfg->fc_metric == 0) 1229 cfg->fc_metric = IP6_RT_PRIO_USER; 1230 1231 table = fib6_new_table(net, cfg->fc_table); 1232 if (table == NULL) { 1233 err = -ENOBUFS; 1234 goto out; 1235 } 1236 1237 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT); 1238 1239 if (rt == NULL) { 1240 err = -ENOMEM; 1241 goto out; 1242 } 1243 1244 rt->dst.obsolete = -1; 1245 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? 1246 jiffies + clock_t_to_jiffies(cfg->fc_expires) : 1247 0; 1248 1249 if (cfg->fc_protocol == RTPROT_UNSPEC) 1250 cfg->fc_protocol = RTPROT_BOOT; 1251 rt->rt6i_protocol = cfg->fc_protocol; 1252 1253 addr_type = ipv6_addr_type(&cfg->fc_dst); 1254 1255 if (addr_type & IPV6_ADDR_MULTICAST) 1256 rt->dst.input = ip6_mc_input; 1257 else if (cfg->fc_flags & RTF_LOCAL) 1258 rt->dst.input = ip6_input; 1259 else 1260 rt->dst.input = ip6_forward; 1261 1262 rt->dst.output = ip6_output; 1263 1264 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1265 rt->rt6i_dst.plen = cfg->fc_dst_len; 1266 if (rt->rt6i_dst.plen == 128) 1267 rt->dst.flags |= DST_HOST; 1268 1269 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { 1270 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1271 if (!metrics) { 1272 err = -ENOMEM; 1273 goto out; 1274 } 1275 dst_init_metrics(&rt->dst, metrics, 0); 1276 } 1277 #ifdef CONFIG_IPV6_SUBTREES 1278 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1279 rt->rt6i_src.plen = cfg->fc_src_len; 1280 #endif 1281 1282 rt->rt6i_metric = cfg->fc_metric; 1283 1284 /* We cannot add true routes via loopback here, 1285 they would result in kernel looping; promote them to reject routes 1286 */ 1287 if ((cfg->fc_flags & RTF_REJECT) || 1288 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK) 1289 && !(cfg->fc_flags&RTF_LOCAL))) { 1290 /* hold loopback dev/idev if we haven't done so. */ 1291 if (dev != net->loopback_dev) { 1292 if (dev) { 1293 dev_put(dev); 1294 in6_dev_put(idev); 1295 } 1296 dev = net->loopback_dev; 1297 dev_hold(dev); 1298 idev = in6_dev_get(dev); 1299 if (!idev) { 1300 err = -ENODEV; 1301 goto out; 1302 } 1303 } 1304 rt->dst.output = ip6_pkt_discard_out; 1305 rt->dst.input = ip6_pkt_discard; 1306 rt->dst.error = -ENETUNREACH; 1307 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1308 goto install_route; 1309 } 1310 1311 if (cfg->fc_flags & RTF_GATEWAY) { 1312 const struct in6_addr *gw_addr; 1313 int gwa_type; 1314 1315 gw_addr = &cfg->fc_gateway; 1316 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); 1317 gwa_type = ipv6_addr_type(gw_addr); 1318 1319 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1320 struct rt6_info *grt; 1321 1322 /* IPv6 strictly inhibits using not link-local 1323 addresses as nexthop address. 1324 Otherwise, router will not able to send redirects. 1325 It is very good, but in some (rare!) circumstances 1326 (SIT, PtP, NBMA NOARP links) it is handy to allow 1327 some exceptions. --ANK 1328 */ 1329 err = -EINVAL; 1330 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1331 goto out; 1332 1333 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1334 1335 err = -EHOSTUNREACH; 1336 if (grt == NULL) 1337 goto out; 1338 if (dev) { 1339 if (dev != grt->rt6i_dev) { 1340 dst_release(&grt->dst); 1341 goto out; 1342 } 1343 } else { 1344 dev = grt->rt6i_dev; 1345 idev = grt->rt6i_idev; 1346 dev_hold(dev); 1347 in6_dev_hold(grt->rt6i_idev); 1348 } 1349 if (!(grt->rt6i_flags&RTF_GATEWAY)) 1350 err = 0; 1351 dst_release(&grt->dst); 1352 1353 if (err) 1354 goto out; 1355 } 1356 err = -EINVAL; 1357 if (dev == NULL || (dev->flags&IFF_LOOPBACK)) 1358 goto out; 1359 } 1360 1361 err = -ENODEV; 1362 if (dev == NULL) 1363 goto out; 1364 1365 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1366 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1367 err = -EINVAL; 1368 goto out; 1369 } 1370 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc); 1371 rt->rt6i_prefsrc.plen = 128; 1372 } else 1373 rt->rt6i_prefsrc.plen = 0; 1374 1375 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1376 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); 1377 if (IS_ERR(n)) { 1378 err = PTR_ERR(n); 1379 goto out; 1380 } 1381 dst_set_neighbour(&rt->dst, n); 1382 } 1383 1384 rt->rt6i_flags = cfg->fc_flags; 1385 1386 install_route: 1387 if (cfg->fc_mx) { 1388 struct nlattr *nla; 1389 int remaining; 1390 1391 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1392 int type = nla_type(nla); 1393 1394 if (type) { 1395 if (type > RTAX_MAX) { 1396 err = -EINVAL; 1397 goto out; 1398 } 1399 1400 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1401 } 1402 } 1403 } 1404 1405 rt->dst.dev = dev; 1406 rt->rt6i_idev = idev; 1407 rt->rt6i_table = table; 1408 1409 cfg->fc_nlinfo.nl_net = dev_net(dev); 1410 1411 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1412 1413 out: 1414 if (dev) 1415 dev_put(dev); 1416 if (idev) 1417 in6_dev_put(idev); 1418 if (rt) 1419 dst_free(&rt->dst); 1420 return err; 1421 } 1422 1423 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1424 { 1425 int err; 1426 struct fib6_table *table; 1427 struct net *net = dev_net(rt->rt6i_dev); 1428 1429 if (rt == net->ipv6.ip6_null_entry) 1430 return -ENOENT; 1431 1432 table = rt->rt6i_table; 1433 write_lock_bh(&table->tb6_lock); 1434 1435 err = fib6_del(rt, info); 1436 dst_release(&rt->dst); 1437 1438 write_unlock_bh(&table->tb6_lock); 1439 1440 return err; 1441 } 1442 1443 int ip6_del_rt(struct rt6_info *rt) 1444 { 1445 struct nl_info info = { 1446 .nl_net = dev_net(rt->rt6i_dev), 1447 }; 1448 return __ip6_del_rt(rt, &info); 1449 } 1450 1451 static int ip6_route_del(struct fib6_config *cfg) 1452 { 1453 struct fib6_table *table; 1454 struct fib6_node *fn; 1455 struct rt6_info *rt; 1456 int err = -ESRCH; 1457 1458 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1459 if (table == NULL) 1460 return err; 1461 1462 read_lock_bh(&table->tb6_lock); 1463 1464 fn = fib6_locate(&table->tb6_root, 1465 &cfg->fc_dst, cfg->fc_dst_len, 1466 &cfg->fc_src, cfg->fc_src_len); 1467 1468 if (fn) { 1469 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1470 if (cfg->fc_ifindex && 1471 (rt->rt6i_dev == NULL || 1472 rt->rt6i_dev->ifindex != cfg->fc_ifindex)) 1473 continue; 1474 if (cfg->fc_flags & RTF_GATEWAY && 1475 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1476 continue; 1477 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1478 continue; 1479 dst_hold(&rt->dst); 1480 read_unlock_bh(&table->tb6_lock); 1481 1482 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1483 } 1484 } 1485 read_unlock_bh(&table->tb6_lock); 1486 1487 return err; 1488 } 1489 1490 /* 1491 * Handle redirects 1492 */ 1493 struct ip6rd_flowi { 1494 struct flowi6 fl6; 1495 struct in6_addr gateway; 1496 }; 1497 1498 static struct rt6_info *__ip6_route_redirect(struct net *net, 1499 struct fib6_table *table, 1500 struct flowi6 *fl6, 1501 int flags) 1502 { 1503 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1504 struct rt6_info *rt; 1505 struct fib6_node *fn; 1506 1507 /* 1508 * Get the "current" route for this destination and 1509 * check if the redirect has come from approriate router. 1510 * 1511 * RFC 2461 specifies that redirects should only be 1512 * accepted if they come from the nexthop to the target. 1513 * Due to the way the routes are chosen, this notion 1514 * is a bit fuzzy and one might need to check all possible 1515 * routes. 1516 */ 1517 1518 read_lock_bh(&table->tb6_lock); 1519 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1520 restart: 1521 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1522 /* 1523 * Current route is on-link; redirect is always invalid. 1524 * 1525 * Seems, previous statement is not true. It could 1526 * be node, which looks for us as on-link (f.e. proxy ndisc) 1527 * But then router serving it might decide, that we should 1528 * know truth 8)8) --ANK (980726). 1529 */ 1530 if (rt6_check_expired(rt)) 1531 continue; 1532 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1533 continue; 1534 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex) 1535 continue; 1536 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1537 continue; 1538 break; 1539 } 1540 1541 if (!rt) 1542 rt = net->ipv6.ip6_null_entry; 1543 BACKTRACK(net, &fl6->saddr); 1544 out: 1545 dst_hold(&rt->dst); 1546 1547 read_unlock_bh(&table->tb6_lock); 1548 1549 return rt; 1550 }; 1551 1552 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest, 1553 const struct in6_addr *src, 1554 const struct in6_addr *gateway, 1555 struct net_device *dev) 1556 { 1557 int flags = RT6_LOOKUP_F_HAS_SADDR; 1558 struct net *net = dev_net(dev); 1559 struct ip6rd_flowi rdfl = { 1560 .fl6 = { 1561 .flowi6_oif = dev->ifindex, 1562 .daddr = *dest, 1563 .saddr = *src, 1564 }, 1565 }; 1566 1567 ipv6_addr_copy(&rdfl.gateway, gateway); 1568 1569 if (rt6_need_strict(dest)) 1570 flags |= RT6_LOOKUP_F_IFACE; 1571 1572 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6, 1573 flags, __ip6_route_redirect); 1574 } 1575 1576 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src, 1577 const struct in6_addr *saddr, 1578 struct neighbour *neigh, u8 *lladdr, int on_link) 1579 { 1580 struct rt6_info *rt, *nrt = NULL; 1581 struct netevent_redirect netevent; 1582 struct net *net = dev_net(neigh->dev); 1583 1584 rt = ip6_route_redirect(dest, src, saddr, neigh->dev); 1585 1586 if (rt == net->ipv6.ip6_null_entry) { 1587 if (net_ratelimit()) 1588 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " 1589 "for redirect target\n"); 1590 goto out; 1591 } 1592 1593 /* 1594 * We have finally decided to accept it. 1595 */ 1596 1597 neigh_update(neigh, lladdr, NUD_STALE, 1598 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1599 NEIGH_UPDATE_F_OVERRIDE| 1600 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1601 NEIGH_UPDATE_F_ISROUTER)) 1602 ); 1603 1604 /* 1605 * Redirect received -> path was valid. 1606 * Look, redirects are sent only in response to data packets, 1607 * so that this nexthop apparently is reachable. --ANK 1608 */ 1609 dst_confirm(&rt->dst); 1610 1611 /* Duplicate redirect: silently ignore. */ 1612 if (neigh == dst_get_neighbour_raw(&rt->dst)) 1613 goto out; 1614 1615 nrt = ip6_rt_copy(rt, dest); 1616 if (nrt == NULL) 1617 goto out; 1618 1619 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1620 if (on_link) 1621 nrt->rt6i_flags &= ~RTF_GATEWAY; 1622 1623 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key); 1624 dst_set_neighbour(&nrt->dst, neigh_clone(neigh)); 1625 1626 if (ip6_ins_rt(nrt)) 1627 goto out; 1628 1629 netevent.old = &rt->dst; 1630 netevent.new = &nrt->dst; 1631 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1632 1633 if (rt->rt6i_flags&RTF_CACHE) { 1634 ip6_del_rt(rt); 1635 return; 1636 } 1637 1638 out: 1639 dst_release(&rt->dst); 1640 } 1641 1642 /* 1643 * Handle ICMP "packet too big" messages 1644 * i.e. Path MTU discovery 1645 */ 1646 1647 static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, 1648 struct net *net, u32 pmtu, int ifindex) 1649 { 1650 struct rt6_info *rt, *nrt; 1651 int allfrag = 0; 1652 again: 1653 rt = rt6_lookup(net, daddr, saddr, ifindex, 0); 1654 if (rt == NULL) 1655 return; 1656 1657 if (rt6_check_expired(rt)) { 1658 ip6_del_rt(rt); 1659 goto again; 1660 } 1661 1662 if (pmtu >= dst_mtu(&rt->dst)) 1663 goto out; 1664 1665 if (pmtu < IPV6_MIN_MTU) { 1666 /* 1667 * According to RFC2460, PMTU is set to the IPv6 Minimum Link 1668 * MTU (1280) and a fragment header should always be included 1669 * after a node receiving Too Big message reporting PMTU is 1670 * less than the IPv6 Minimum Link MTU. 1671 */ 1672 pmtu = IPV6_MIN_MTU; 1673 allfrag = 1; 1674 } 1675 1676 /* New mtu received -> path was valid. 1677 They are sent only in response to data packets, 1678 so that this nexthop apparently is reachable. --ANK 1679 */ 1680 dst_confirm(&rt->dst); 1681 1682 /* Host route. If it is static, it would be better 1683 not to override it, but add new one, so that 1684 when cache entry will expire old pmtu 1685 would return automatically. 1686 */ 1687 if (rt->rt6i_flags & RTF_CACHE) { 1688 dst_metric_set(&rt->dst, RTAX_MTU, pmtu); 1689 if (allfrag) { 1690 u32 features = dst_metric(&rt->dst, RTAX_FEATURES); 1691 features |= RTAX_FEATURE_ALLFRAG; 1692 dst_metric_set(&rt->dst, RTAX_FEATURES, features); 1693 } 1694 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1695 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1696 goto out; 1697 } 1698 1699 /* Network route. 1700 Two cases are possible: 1701 1. It is connected route. Action: COW 1702 2. It is gatewayed route or NONEXTHOP route. Action: clone it. 1703 */ 1704 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) 1705 nrt = rt6_alloc_cow(rt, daddr, saddr); 1706 else 1707 nrt = rt6_alloc_clone(rt, daddr); 1708 1709 if (nrt) { 1710 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); 1711 if (allfrag) { 1712 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); 1713 features |= RTAX_FEATURE_ALLFRAG; 1714 dst_metric_set(&nrt->dst, RTAX_FEATURES, features); 1715 } 1716 1717 /* According to RFC 1981, detecting PMTU increase shouldn't be 1718 * happened within 5 mins, the recommended timer is 10 mins. 1719 * Here this route expiration time is set to ip6_rt_mtu_expires 1720 * which is 10 mins. After 10 mins the decreased pmtu is expired 1721 * and detecting PMTU increase will be automatically happened. 1722 */ 1723 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1724 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1725 1726 ip6_ins_rt(nrt); 1727 } 1728 out: 1729 dst_release(&rt->dst); 1730 } 1731 1732 void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, 1733 struct net_device *dev, u32 pmtu) 1734 { 1735 struct net *net = dev_net(dev); 1736 1737 /* 1738 * RFC 1981 states that a node "MUST reduce the size of the packets it 1739 * is sending along the path" that caused the Packet Too Big message. 1740 * Since it's not possible in the general case to determine which 1741 * interface was used to send the original packet, we update the MTU 1742 * on the interface that will be used to send future packets. We also 1743 * update the MTU on the interface that received the Packet Too Big in 1744 * case the original packet was forced out that interface with 1745 * SO_BINDTODEVICE or similar. This is the next best thing to the 1746 * correct behaviour, which would be to update the MTU on all 1747 * interfaces. 1748 */ 1749 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); 1750 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); 1751 } 1752 1753 /* 1754 * Misc support functions 1755 */ 1756 1757 static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort, 1758 const struct in6_addr *dest) 1759 { 1760 struct net *net = dev_net(ort->rt6i_dev); 1761 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, 1762 ort->dst.dev, 0); 1763 1764 if (rt) { 1765 rt->dst.input = ort->dst.input; 1766 rt->dst.output = ort->dst.output; 1767 rt->dst.flags |= DST_HOST; 1768 1769 ipv6_addr_copy(&rt->rt6i_dst.addr, dest); 1770 rt->rt6i_dst.plen = 128; 1771 dst_copy_metrics(&rt->dst, &ort->dst); 1772 rt->dst.error = ort->dst.error; 1773 rt->rt6i_idev = ort->rt6i_idev; 1774 if (rt->rt6i_idev) 1775 in6_dev_hold(rt->rt6i_idev); 1776 rt->dst.lastuse = jiffies; 1777 rt->rt6i_expires = 0; 1778 1779 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway); 1780 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES; 1781 rt->rt6i_metric = 0; 1782 1783 #ifdef CONFIG_IPV6_SUBTREES 1784 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1785 #endif 1786 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1787 rt->rt6i_table = ort->rt6i_table; 1788 } 1789 return rt; 1790 } 1791 1792 #ifdef CONFIG_IPV6_ROUTE_INFO 1793 static struct rt6_info *rt6_get_route_info(struct net *net, 1794 const struct in6_addr *prefix, int prefixlen, 1795 const struct in6_addr *gwaddr, int ifindex) 1796 { 1797 struct fib6_node *fn; 1798 struct rt6_info *rt = NULL; 1799 struct fib6_table *table; 1800 1801 table = fib6_get_table(net, RT6_TABLE_INFO); 1802 if (table == NULL) 1803 return NULL; 1804 1805 write_lock_bh(&table->tb6_lock); 1806 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1807 if (!fn) 1808 goto out; 1809 1810 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1811 if (rt->rt6i_dev->ifindex != ifindex) 1812 continue; 1813 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1814 continue; 1815 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1816 continue; 1817 dst_hold(&rt->dst); 1818 break; 1819 } 1820 out: 1821 write_unlock_bh(&table->tb6_lock); 1822 return rt; 1823 } 1824 1825 static struct rt6_info *rt6_add_route_info(struct net *net, 1826 const struct in6_addr *prefix, int prefixlen, 1827 const struct in6_addr *gwaddr, int ifindex, 1828 unsigned pref) 1829 { 1830 struct fib6_config cfg = { 1831 .fc_table = RT6_TABLE_INFO, 1832 .fc_metric = IP6_RT_PRIO_USER, 1833 .fc_ifindex = ifindex, 1834 .fc_dst_len = prefixlen, 1835 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1836 RTF_UP | RTF_PREF(pref), 1837 .fc_nlinfo.pid = 0, 1838 .fc_nlinfo.nlh = NULL, 1839 .fc_nlinfo.nl_net = net, 1840 }; 1841 1842 ipv6_addr_copy(&cfg.fc_dst, prefix); 1843 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1844 1845 /* We should treat it as a default route if prefix length is 0. */ 1846 if (!prefixlen) 1847 cfg.fc_flags |= RTF_DEFAULT; 1848 1849 ip6_route_add(&cfg); 1850 1851 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1852 } 1853 #endif 1854 1855 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 1856 { 1857 struct rt6_info *rt; 1858 struct fib6_table *table; 1859 1860 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1861 if (table == NULL) 1862 return NULL; 1863 1864 write_lock_bh(&table->tb6_lock); 1865 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1866 if (dev == rt->rt6i_dev && 1867 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1868 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1869 break; 1870 } 1871 if (rt) 1872 dst_hold(&rt->dst); 1873 write_unlock_bh(&table->tb6_lock); 1874 return rt; 1875 } 1876 1877 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 1878 struct net_device *dev, 1879 unsigned int pref) 1880 { 1881 struct fib6_config cfg = { 1882 .fc_table = RT6_TABLE_DFLT, 1883 .fc_metric = IP6_RT_PRIO_USER, 1884 .fc_ifindex = dev->ifindex, 1885 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1886 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1887 .fc_nlinfo.pid = 0, 1888 .fc_nlinfo.nlh = NULL, 1889 .fc_nlinfo.nl_net = dev_net(dev), 1890 }; 1891 1892 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); 1893 1894 ip6_route_add(&cfg); 1895 1896 return rt6_get_dflt_router(gwaddr, dev); 1897 } 1898 1899 void rt6_purge_dflt_routers(struct net *net) 1900 { 1901 struct rt6_info *rt; 1902 struct fib6_table *table; 1903 1904 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1905 table = fib6_get_table(net, RT6_TABLE_DFLT); 1906 if (table == NULL) 1907 return; 1908 1909 restart: 1910 read_lock_bh(&table->tb6_lock); 1911 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1912 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1913 dst_hold(&rt->dst); 1914 read_unlock_bh(&table->tb6_lock); 1915 ip6_del_rt(rt); 1916 goto restart; 1917 } 1918 } 1919 read_unlock_bh(&table->tb6_lock); 1920 } 1921 1922 static void rtmsg_to_fib6_config(struct net *net, 1923 struct in6_rtmsg *rtmsg, 1924 struct fib6_config *cfg) 1925 { 1926 memset(cfg, 0, sizeof(*cfg)); 1927 1928 cfg->fc_table = RT6_TABLE_MAIN; 1929 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1930 cfg->fc_metric = rtmsg->rtmsg_metric; 1931 cfg->fc_expires = rtmsg->rtmsg_info; 1932 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1933 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1934 cfg->fc_flags = rtmsg->rtmsg_flags; 1935 1936 cfg->fc_nlinfo.nl_net = net; 1937 1938 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1939 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1940 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1941 } 1942 1943 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1944 { 1945 struct fib6_config cfg; 1946 struct in6_rtmsg rtmsg; 1947 int err; 1948 1949 switch(cmd) { 1950 case SIOCADDRT: /* Add a route */ 1951 case SIOCDELRT: /* Delete a route */ 1952 if (!capable(CAP_NET_ADMIN)) 1953 return -EPERM; 1954 err = copy_from_user(&rtmsg, arg, 1955 sizeof(struct in6_rtmsg)); 1956 if (err) 1957 return -EFAULT; 1958 1959 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1960 1961 rtnl_lock(); 1962 switch (cmd) { 1963 case SIOCADDRT: 1964 err = ip6_route_add(&cfg); 1965 break; 1966 case SIOCDELRT: 1967 err = ip6_route_del(&cfg); 1968 break; 1969 default: 1970 err = -EINVAL; 1971 } 1972 rtnl_unlock(); 1973 1974 return err; 1975 } 1976 1977 return -EINVAL; 1978 } 1979 1980 /* 1981 * Drop the packet on the floor 1982 */ 1983 1984 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 1985 { 1986 int type; 1987 struct dst_entry *dst = skb_dst(skb); 1988 switch (ipstats_mib_noroutes) { 1989 case IPSTATS_MIB_INNOROUTES: 1990 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 1991 if (type == IPV6_ADDR_ANY) { 1992 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1993 IPSTATS_MIB_INADDRERRORS); 1994 break; 1995 } 1996 /* FALLTHROUGH */ 1997 case IPSTATS_MIB_OUTNOROUTES: 1998 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 1999 ipstats_mib_noroutes); 2000 break; 2001 } 2002 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2003 kfree_skb(skb); 2004 return 0; 2005 } 2006 2007 static int ip6_pkt_discard(struct sk_buff *skb) 2008 { 2009 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2010 } 2011 2012 static int ip6_pkt_discard_out(struct sk_buff *skb) 2013 { 2014 skb->dev = skb_dst(skb)->dev; 2015 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2016 } 2017 2018 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2019 2020 static int ip6_pkt_prohibit(struct sk_buff *skb) 2021 { 2022 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2023 } 2024 2025 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2026 { 2027 skb->dev = skb_dst(skb)->dev; 2028 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2029 } 2030 2031 #endif 2032 2033 /* 2034 * Allocate a dst for local (unicast / anycast) address. 2035 */ 2036 2037 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2038 const struct in6_addr *addr, 2039 int anycast) 2040 { 2041 struct net *net = dev_net(idev->dev); 2042 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, 2043 net->loopback_dev, 0); 2044 struct neighbour *neigh; 2045 2046 if (rt == NULL) { 2047 if (net_ratelimit()) 2048 pr_warning("IPv6: Maximum number of routes reached," 2049 " consider increasing route/max_size.\n"); 2050 return ERR_PTR(-ENOMEM); 2051 } 2052 2053 in6_dev_hold(idev); 2054 2055 rt->dst.flags |= DST_HOST; 2056 rt->dst.input = ip6_input; 2057 rt->dst.output = ip6_output; 2058 rt->rt6i_idev = idev; 2059 rt->dst.obsolete = -1; 2060 2061 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2062 if (anycast) 2063 rt->rt6i_flags |= RTF_ANYCAST; 2064 else 2065 rt->rt6i_flags |= RTF_LOCAL; 2066 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway); 2067 if (IS_ERR(neigh)) { 2068 dst_free(&rt->dst); 2069 2070 return ERR_CAST(neigh); 2071 } 2072 dst_set_neighbour(&rt->dst, neigh); 2073 2074 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 2075 rt->rt6i_dst.plen = 128; 2076 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2077 2078 atomic_set(&rt->dst.__refcnt, 1); 2079 2080 return rt; 2081 } 2082 2083 int ip6_route_get_saddr(struct net *net, 2084 struct rt6_info *rt, 2085 const struct in6_addr *daddr, 2086 unsigned int prefs, 2087 struct in6_addr *saddr) 2088 { 2089 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2090 int err = 0; 2091 if (rt->rt6i_prefsrc.plen) 2092 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr); 2093 else 2094 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2095 daddr, prefs, saddr); 2096 return err; 2097 } 2098 2099 /* remove deleted ip from prefsrc entries */ 2100 struct arg_dev_net_ip { 2101 struct net_device *dev; 2102 struct net *net; 2103 struct in6_addr *addr; 2104 }; 2105 2106 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2107 { 2108 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2109 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2110 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2111 2112 if (((void *)rt->rt6i_dev == dev || dev == NULL) && 2113 rt != net->ipv6.ip6_null_entry && 2114 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2115 /* remove prefsrc entry */ 2116 rt->rt6i_prefsrc.plen = 0; 2117 } 2118 return 0; 2119 } 2120 2121 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2122 { 2123 struct net *net = dev_net(ifp->idev->dev); 2124 struct arg_dev_net_ip adni = { 2125 .dev = ifp->idev->dev, 2126 .net = net, 2127 .addr = &ifp->addr, 2128 }; 2129 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); 2130 } 2131 2132 struct arg_dev_net { 2133 struct net_device *dev; 2134 struct net *net; 2135 }; 2136 2137 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2138 { 2139 const struct arg_dev_net *adn = arg; 2140 const struct net_device *dev = adn->dev; 2141 2142 if ((rt->rt6i_dev == dev || dev == NULL) && 2143 rt != adn->net->ipv6.ip6_null_entry) { 2144 RT6_TRACE("deleted by ifdown %p\n", rt); 2145 return -1; 2146 } 2147 return 0; 2148 } 2149 2150 void rt6_ifdown(struct net *net, struct net_device *dev) 2151 { 2152 struct arg_dev_net adn = { 2153 .dev = dev, 2154 .net = net, 2155 }; 2156 2157 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2158 icmp6_clean_all(fib6_ifdown, &adn); 2159 } 2160 2161 struct rt6_mtu_change_arg 2162 { 2163 struct net_device *dev; 2164 unsigned mtu; 2165 }; 2166 2167 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2168 { 2169 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2170 struct inet6_dev *idev; 2171 2172 /* In IPv6 pmtu discovery is not optional, 2173 so that RTAX_MTU lock cannot disable it. 2174 We still use this lock to block changes 2175 caused by addrconf/ndisc. 2176 */ 2177 2178 idev = __in6_dev_get(arg->dev); 2179 if (idev == NULL) 2180 return 0; 2181 2182 /* For administrative MTU increase, there is no way to discover 2183 IPv6 PMTU increase, so PMTU increase should be updated here. 2184 Since RFC 1981 doesn't include administrative MTU increase 2185 update PMTU increase is a MUST. (i.e. jumbo frame) 2186 */ 2187 /* 2188 If new MTU is less than route PMTU, this new MTU will be the 2189 lowest MTU in the path, update the route PMTU to reflect PMTU 2190 decreases; if new MTU is greater than route PMTU, and the 2191 old MTU is the lowest MTU in the path, update the route PMTU 2192 to reflect the increase. In this case if the other nodes' MTU 2193 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2194 PMTU discouvery. 2195 */ 2196 if (rt->rt6i_dev == arg->dev && 2197 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2198 (dst_mtu(&rt->dst) >= arg->mtu || 2199 (dst_mtu(&rt->dst) < arg->mtu && 2200 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2201 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2202 } 2203 return 0; 2204 } 2205 2206 void rt6_mtu_change(struct net_device *dev, unsigned mtu) 2207 { 2208 struct rt6_mtu_change_arg arg = { 2209 .dev = dev, 2210 .mtu = mtu, 2211 }; 2212 2213 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2214 } 2215 2216 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2217 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2218 [RTA_OIF] = { .type = NLA_U32 }, 2219 [RTA_IIF] = { .type = NLA_U32 }, 2220 [RTA_PRIORITY] = { .type = NLA_U32 }, 2221 [RTA_METRICS] = { .type = NLA_NESTED }, 2222 }; 2223 2224 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2225 struct fib6_config *cfg) 2226 { 2227 struct rtmsg *rtm; 2228 struct nlattr *tb[RTA_MAX+1]; 2229 int err; 2230 2231 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2232 if (err < 0) 2233 goto errout; 2234 2235 err = -EINVAL; 2236 rtm = nlmsg_data(nlh); 2237 memset(cfg, 0, sizeof(*cfg)); 2238 2239 cfg->fc_table = rtm->rtm_table; 2240 cfg->fc_dst_len = rtm->rtm_dst_len; 2241 cfg->fc_src_len = rtm->rtm_src_len; 2242 cfg->fc_flags = RTF_UP; 2243 cfg->fc_protocol = rtm->rtm_protocol; 2244 2245 if (rtm->rtm_type == RTN_UNREACHABLE) 2246 cfg->fc_flags |= RTF_REJECT; 2247 2248 if (rtm->rtm_type == RTN_LOCAL) 2249 cfg->fc_flags |= RTF_LOCAL; 2250 2251 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2252 cfg->fc_nlinfo.nlh = nlh; 2253 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2254 2255 if (tb[RTA_GATEWAY]) { 2256 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2257 cfg->fc_flags |= RTF_GATEWAY; 2258 } 2259 2260 if (tb[RTA_DST]) { 2261 int plen = (rtm->rtm_dst_len + 7) >> 3; 2262 2263 if (nla_len(tb[RTA_DST]) < plen) 2264 goto errout; 2265 2266 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2267 } 2268 2269 if (tb[RTA_SRC]) { 2270 int plen = (rtm->rtm_src_len + 7) >> 3; 2271 2272 if (nla_len(tb[RTA_SRC]) < plen) 2273 goto errout; 2274 2275 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2276 } 2277 2278 if (tb[RTA_PREFSRC]) 2279 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); 2280 2281 if (tb[RTA_OIF]) 2282 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2283 2284 if (tb[RTA_PRIORITY]) 2285 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2286 2287 if (tb[RTA_METRICS]) { 2288 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2289 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2290 } 2291 2292 if (tb[RTA_TABLE]) 2293 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2294 2295 err = 0; 2296 errout: 2297 return err; 2298 } 2299 2300 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2301 { 2302 struct fib6_config cfg; 2303 int err; 2304 2305 err = rtm_to_fib6_config(skb, nlh, &cfg); 2306 if (err < 0) 2307 return err; 2308 2309 return ip6_route_del(&cfg); 2310 } 2311 2312 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2313 { 2314 struct fib6_config cfg; 2315 int err; 2316 2317 err = rtm_to_fib6_config(skb, nlh, &cfg); 2318 if (err < 0) 2319 return err; 2320 2321 return ip6_route_add(&cfg); 2322 } 2323 2324 static inline size_t rt6_nlmsg_size(void) 2325 { 2326 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2327 + nla_total_size(16) /* RTA_SRC */ 2328 + nla_total_size(16) /* RTA_DST */ 2329 + nla_total_size(16) /* RTA_GATEWAY */ 2330 + nla_total_size(16) /* RTA_PREFSRC */ 2331 + nla_total_size(4) /* RTA_TABLE */ 2332 + nla_total_size(4) /* RTA_IIF */ 2333 + nla_total_size(4) /* RTA_OIF */ 2334 + nla_total_size(4) /* RTA_PRIORITY */ 2335 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2336 + nla_total_size(sizeof(struct rta_cacheinfo)); 2337 } 2338 2339 static int rt6_fill_node(struct net *net, 2340 struct sk_buff *skb, struct rt6_info *rt, 2341 struct in6_addr *dst, struct in6_addr *src, 2342 int iif, int type, u32 pid, u32 seq, 2343 int prefix, int nowait, unsigned int flags) 2344 { 2345 struct rtmsg *rtm; 2346 struct nlmsghdr *nlh; 2347 long expires; 2348 u32 table; 2349 struct neighbour *n; 2350 2351 if (prefix) { /* user wants prefix routes only */ 2352 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2353 /* success since this is not a prefix route */ 2354 return 1; 2355 } 2356 } 2357 2358 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2359 if (nlh == NULL) 2360 return -EMSGSIZE; 2361 2362 rtm = nlmsg_data(nlh); 2363 rtm->rtm_family = AF_INET6; 2364 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2365 rtm->rtm_src_len = rt->rt6i_src.plen; 2366 rtm->rtm_tos = 0; 2367 if (rt->rt6i_table) 2368 table = rt->rt6i_table->tb6_id; 2369 else 2370 table = RT6_TABLE_UNSPEC; 2371 rtm->rtm_table = table; 2372 NLA_PUT_U32(skb, RTA_TABLE, table); 2373 if (rt->rt6i_flags&RTF_REJECT) 2374 rtm->rtm_type = RTN_UNREACHABLE; 2375 else if (rt->rt6i_flags&RTF_LOCAL) 2376 rtm->rtm_type = RTN_LOCAL; 2377 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2378 rtm->rtm_type = RTN_LOCAL; 2379 else 2380 rtm->rtm_type = RTN_UNICAST; 2381 rtm->rtm_flags = 0; 2382 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2383 rtm->rtm_protocol = rt->rt6i_protocol; 2384 if (rt->rt6i_flags&RTF_DYNAMIC) 2385 rtm->rtm_protocol = RTPROT_REDIRECT; 2386 else if (rt->rt6i_flags & RTF_ADDRCONF) 2387 rtm->rtm_protocol = RTPROT_KERNEL; 2388 else if (rt->rt6i_flags&RTF_DEFAULT) 2389 rtm->rtm_protocol = RTPROT_RA; 2390 2391 if (rt->rt6i_flags&RTF_CACHE) 2392 rtm->rtm_flags |= RTM_F_CLONED; 2393 2394 if (dst) { 2395 NLA_PUT(skb, RTA_DST, 16, dst); 2396 rtm->rtm_dst_len = 128; 2397 } else if (rtm->rtm_dst_len) 2398 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); 2399 #ifdef CONFIG_IPV6_SUBTREES 2400 if (src) { 2401 NLA_PUT(skb, RTA_SRC, 16, src); 2402 rtm->rtm_src_len = 128; 2403 } else if (rtm->rtm_src_len) 2404 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); 2405 #endif 2406 if (iif) { 2407 #ifdef CONFIG_IPV6_MROUTE 2408 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2409 int err = ip6mr_get_route(net, skb, rtm, nowait); 2410 if (err <= 0) { 2411 if (!nowait) { 2412 if (err == 0) 2413 return 0; 2414 goto nla_put_failure; 2415 } else { 2416 if (err == -EMSGSIZE) 2417 goto nla_put_failure; 2418 } 2419 } 2420 } else 2421 #endif 2422 NLA_PUT_U32(skb, RTA_IIF, iif); 2423 } else if (dst) { 2424 struct in6_addr saddr_buf; 2425 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0) 2426 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2427 } 2428 2429 if (rt->rt6i_prefsrc.plen) { 2430 struct in6_addr saddr_buf; 2431 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr); 2432 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); 2433 } 2434 2435 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2436 goto nla_put_failure; 2437 2438 rcu_read_lock(); 2439 n = dst_get_neighbour(&rt->dst); 2440 if (n) 2441 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key); 2442 rcu_read_unlock(); 2443 2444 if (rt->dst.dev) 2445 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); 2446 2447 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2448 2449 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2450 expires = 0; 2451 else if (rt->rt6i_expires - jiffies < INT_MAX) 2452 expires = rt->rt6i_expires - jiffies; 2453 else 2454 expires = INT_MAX; 2455 2456 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0, 2457 expires, rt->dst.error) < 0) 2458 goto nla_put_failure; 2459 2460 return nlmsg_end(skb, nlh); 2461 2462 nla_put_failure: 2463 nlmsg_cancel(skb, nlh); 2464 return -EMSGSIZE; 2465 } 2466 2467 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2468 { 2469 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2470 int prefix; 2471 2472 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2473 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2474 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2475 } else 2476 prefix = 0; 2477 2478 return rt6_fill_node(arg->net, 2479 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2480 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2481 prefix, 0, NLM_F_MULTI); 2482 } 2483 2484 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2485 { 2486 struct net *net = sock_net(in_skb->sk); 2487 struct nlattr *tb[RTA_MAX+1]; 2488 struct rt6_info *rt; 2489 struct sk_buff *skb; 2490 struct rtmsg *rtm; 2491 struct flowi6 fl6; 2492 int err, iif = 0; 2493 2494 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2495 if (err < 0) 2496 goto errout; 2497 2498 err = -EINVAL; 2499 memset(&fl6, 0, sizeof(fl6)); 2500 2501 if (tb[RTA_SRC]) { 2502 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2503 goto errout; 2504 2505 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC])); 2506 } 2507 2508 if (tb[RTA_DST]) { 2509 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2510 goto errout; 2511 2512 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST])); 2513 } 2514 2515 if (tb[RTA_IIF]) 2516 iif = nla_get_u32(tb[RTA_IIF]); 2517 2518 if (tb[RTA_OIF]) 2519 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]); 2520 2521 if (iif) { 2522 struct net_device *dev; 2523 dev = __dev_get_by_index(net, iif); 2524 if (!dev) { 2525 err = -ENODEV; 2526 goto errout; 2527 } 2528 } 2529 2530 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2531 if (skb == NULL) { 2532 err = -ENOBUFS; 2533 goto errout; 2534 } 2535 2536 /* Reserve room for dummy headers, this skb can pass 2537 through good chunk of routing engine. 2538 */ 2539 skb_reset_mac_header(skb); 2540 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2541 2542 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6); 2543 skb_dst_set(skb, &rt->dst); 2544 2545 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2546 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2547 nlh->nlmsg_seq, 0, 0, 0); 2548 if (err < 0) { 2549 kfree_skb(skb); 2550 goto errout; 2551 } 2552 2553 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2554 errout: 2555 return err; 2556 } 2557 2558 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2559 { 2560 struct sk_buff *skb; 2561 struct net *net = info->nl_net; 2562 u32 seq; 2563 int err; 2564 2565 err = -ENOBUFS; 2566 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0; 2567 2568 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2569 if (skb == NULL) 2570 goto errout; 2571 2572 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2573 event, info->pid, seq, 0, 0, 0); 2574 if (err < 0) { 2575 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2576 WARN_ON(err == -EMSGSIZE); 2577 kfree_skb(skb); 2578 goto errout; 2579 } 2580 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2581 info->nlh, gfp_any()); 2582 return; 2583 errout: 2584 if (err < 0) 2585 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2586 } 2587 2588 static int ip6_route_dev_notify(struct notifier_block *this, 2589 unsigned long event, void *data) 2590 { 2591 struct net_device *dev = (struct net_device *)data; 2592 struct net *net = dev_net(dev); 2593 2594 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2595 net->ipv6.ip6_null_entry->dst.dev = dev; 2596 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2597 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2598 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2599 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2600 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2601 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2602 #endif 2603 } 2604 2605 return NOTIFY_OK; 2606 } 2607 2608 /* 2609 * /proc 2610 */ 2611 2612 #ifdef CONFIG_PROC_FS 2613 2614 struct rt6_proc_arg 2615 { 2616 char *buffer; 2617 int offset; 2618 int length; 2619 int skip; 2620 int len; 2621 }; 2622 2623 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2624 { 2625 struct seq_file *m = p_arg; 2626 struct neighbour *n; 2627 2628 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2629 2630 #ifdef CONFIG_IPV6_SUBTREES 2631 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2632 #else 2633 seq_puts(m, "00000000000000000000000000000000 00 "); 2634 #endif 2635 rcu_read_lock(); 2636 n = dst_get_neighbour(&rt->dst); 2637 if (n) { 2638 seq_printf(m, "%pi6", n->primary_key); 2639 } else { 2640 seq_puts(m, "00000000000000000000000000000000"); 2641 } 2642 rcu_read_unlock(); 2643 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2644 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2645 rt->dst.__use, rt->rt6i_flags, 2646 rt->rt6i_dev ? rt->rt6i_dev->name : ""); 2647 return 0; 2648 } 2649 2650 static int ipv6_route_show(struct seq_file *m, void *v) 2651 { 2652 struct net *net = (struct net *)m->private; 2653 fib6_clean_all(net, rt6_info_route, 0, m); 2654 return 0; 2655 } 2656 2657 static int ipv6_route_open(struct inode *inode, struct file *file) 2658 { 2659 return single_open_net(inode, file, ipv6_route_show); 2660 } 2661 2662 static const struct file_operations ipv6_route_proc_fops = { 2663 .owner = THIS_MODULE, 2664 .open = ipv6_route_open, 2665 .read = seq_read, 2666 .llseek = seq_lseek, 2667 .release = single_release_net, 2668 }; 2669 2670 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2671 { 2672 struct net *net = (struct net *)seq->private; 2673 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2674 net->ipv6.rt6_stats->fib_nodes, 2675 net->ipv6.rt6_stats->fib_route_nodes, 2676 net->ipv6.rt6_stats->fib_rt_alloc, 2677 net->ipv6.rt6_stats->fib_rt_entries, 2678 net->ipv6.rt6_stats->fib_rt_cache, 2679 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2680 net->ipv6.rt6_stats->fib_discarded_routes); 2681 2682 return 0; 2683 } 2684 2685 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2686 { 2687 return single_open_net(inode, file, rt6_stats_seq_show); 2688 } 2689 2690 static const struct file_operations rt6_stats_seq_fops = { 2691 .owner = THIS_MODULE, 2692 .open = rt6_stats_seq_open, 2693 .read = seq_read, 2694 .llseek = seq_lseek, 2695 .release = single_release_net, 2696 }; 2697 #endif /* CONFIG_PROC_FS */ 2698 2699 #ifdef CONFIG_SYSCTL 2700 2701 static 2702 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2703 void __user *buffer, size_t *lenp, loff_t *ppos) 2704 { 2705 struct net *net; 2706 int delay; 2707 if (!write) 2708 return -EINVAL; 2709 2710 net = (struct net *)ctl->extra1; 2711 delay = net->ipv6.sysctl.flush_delay; 2712 proc_dointvec(ctl, write, buffer, lenp, ppos); 2713 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2714 return 0; 2715 } 2716 2717 ctl_table ipv6_route_table_template[] = { 2718 { 2719 .procname = "flush", 2720 .data = &init_net.ipv6.sysctl.flush_delay, 2721 .maxlen = sizeof(int), 2722 .mode = 0200, 2723 .proc_handler = ipv6_sysctl_rtcache_flush 2724 }, 2725 { 2726 .procname = "gc_thresh", 2727 .data = &ip6_dst_ops_template.gc_thresh, 2728 .maxlen = sizeof(int), 2729 .mode = 0644, 2730 .proc_handler = proc_dointvec, 2731 }, 2732 { 2733 .procname = "max_size", 2734 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2735 .maxlen = sizeof(int), 2736 .mode = 0644, 2737 .proc_handler = proc_dointvec, 2738 }, 2739 { 2740 .procname = "gc_min_interval", 2741 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2742 .maxlen = sizeof(int), 2743 .mode = 0644, 2744 .proc_handler = proc_dointvec_jiffies, 2745 }, 2746 { 2747 .procname = "gc_timeout", 2748 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2749 .maxlen = sizeof(int), 2750 .mode = 0644, 2751 .proc_handler = proc_dointvec_jiffies, 2752 }, 2753 { 2754 .procname = "gc_interval", 2755 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2756 .maxlen = sizeof(int), 2757 .mode = 0644, 2758 .proc_handler = proc_dointvec_jiffies, 2759 }, 2760 { 2761 .procname = "gc_elasticity", 2762 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2763 .maxlen = sizeof(int), 2764 .mode = 0644, 2765 .proc_handler = proc_dointvec, 2766 }, 2767 { 2768 .procname = "mtu_expires", 2769 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2770 .maxlen = sizeof(int), 2771 .mode = 0644, 2772 .proc_handler = proc_dointvec_jiffies, 2773 }, 2774 { 2775 .procname = "min_adv_mss", 2776 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2777 .maxlen = sizeof(int), 2778 .mode = 0644, 2779 .proc_handler = proc_dointvec, 2780 }, 2781 { 2782 .procname = "gc_min_interval_ms", 2783 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2784 .maxlen = sizeof(int), 2785 .mode = 0644, 2786 .proc_handler = proc_dointvec_ms_jiffies, 2787 }, 2788 { } 2789 }; 2790 2791 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2792 { 2793 struct ctl_table *table; 2794 2795 table = kmemdup(ipv6_route_table_template, 2796 sizeof(ipv6_route_table_template), 2797 GFP_KERNEL); 2798 2799 if (table) { 2800 table[0].data = &net->ipv6.sysctl.flush_delay; 2801 table[0].extra1 = net; 2802 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2803 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2804 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2805 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2806 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2807 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2808 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2809 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2810 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2811 } 2812 2813 return table; 2814 } 2815 #endif 2816 2817 static int __net_init ip6_route_net_init(struct net *net) 2818 { 2819 int ret = -ENOMEM; 2820 2821 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2822 sizeof(net->ipv6.ip6_dst_ops)); 2823 2824 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2825 goto out_ip6_dst_ops; 2826 2827 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2828 sizeof(*net->ipv6.ip6_null_entry), 2829 GFP_KERNEL); 2830 if (!net->ipv6.ip6_null_entry) 2831 goto out_ip6_dst_entries; 2832 net->ipv6.ip6_null_entry->dst.path = 2833 (struct dst_entry *)net->ipv6.ip6_null_entry; 2834 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2835 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2836 ip6_template_metrics, true); 2837 2838 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2839 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2840 sizeof(*net->ipv6.ip6_prohibit_entry), 2841 GFP_KERNEL); 2842 if (!net->ipv6.ip6_prohibit_entry) 2843 goto out_ip6_null_entry; 2844 net->ipv6.ip6_prohibit_entry->dst.path = 2845 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2846 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2847 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 2848 ip6_template_metrics, true); 2849 2850 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2851 sizeof(*net->ipv6.ip6_blk_hole_entry), 2852 GFP_KERNEL); 2853 if (!net->ipv6.ip6_blk_hole_entry) 2854 goto out_ip6_prohibit_entry; 2855 net->ipv6.ip6_blk_hole_entry->dst.path = 2856 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2857 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2858 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 2859 ip6_template_metrics, true); 2860 #endif 2861 2862 net->ipv6.sysctl.flush_delay = 0; 2863 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2864 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2865 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2866 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2867 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2868 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2869 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2870 2871 #ifdef CONFIG_PROC_FS 2872 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2873 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2874 #endif 2875 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2876 2877 ret = 0; 2878 out: 2879 return ret; 2880 2881 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2882 out_ip6_prohibit_entry: 2883 kfree(net->ipv6.ip6_prohibit_entry); 2884 out_ip6_null_entry: 2885 kfree(net->ipv6.ip6_null_entry); 2886 #endif 2887 out_ip6_dst_entries: 2888 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2889 out_ip6_dst_ops: 2890 goto out; 2891 } 2892 2893 static void __net_exit ip6_route_net_exit(struct net *net) 2894 { 2895 #ifdef CONFIG_PROC_FS 2896 proc_net_remove(net, "ipv6_route"); 2897 proc_net_remove(net, "rt6_stats"); 2898 #endif 2899 kfree(net->ipv6.ip6_null_entry); 2900 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2901 kfree(net->ipv6.ip6_prohibit_entry); 2902 kfree(net->ipv6.ip6_blk_hole_entry); 2903 #endif 2904 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2905 } 2906 2907 static struct pernet_operations ip6_route_net_ops = { 2908 .init = ip6_route_net_init, 2909 .exit = ip6_route_net_exit, 2910 }; 2911 2912 static struct notifier_block ip6_route_dev_notifier = { 2913 .notifier_call = ip6_route_dev_notify, 2914 .priority = 0, 2915 }; 2916 2917 int __init ip6_route_init(void) 2918 { 2919 int ret; 2920 2921 ret = -ENOMEM; 2922 ip6_dst_ops_template.kmem_cachep = 2923 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 2924 SLAB_HWCACHE_ALIGN, NULL); 2925 if (!ip6_dst_ops_template.kmem_cachep) 2926 goto out; 2927 2928 ret = dst_entries_init(&ip6_dst_blackhole_ops); 2929 if (ret) 2930 goto out_kmem_cache; 2931 2932 ret = register_pernet_subsys(&ip6_route_net_ops); 2933 if (ret) 2934 goto out_dst_entries; 2935 2936 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2937 2938 /* Registering of the loopback is done before this portion of code, 2939 * the loopback reference in rt6_info will not be taken, do it 2940 * manually for init_net */ 2941 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 2942 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2943 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2944 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 2945 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2946 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 2947 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 2948 #endif 2949 ret = fib6_init(); 2950 if (ret) 2951 goto out_register_subsys; 2952 2953 ret = xfrm6_init(); 2954 if (ret) 2955 goto out_fib6_init; 2956 2957 ret = fib6_rules_init(); 2958 if (ret) 2959 goto xfrm6_init; 2960 2961 ret = -ENOBUFS; 2962 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 2963 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 2964 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 2965 goto fib6_rules_init; 2966 2967 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 2968 if (ret) 2969 goto fib6_rules_init; 2970 2971 out: 2972 return ret; 2973 2974 fib6_rules_init: 2975 fib6_rules_cleanup(); 2976 xfrm6_init: 2977 xfrm6_fini(); 2978 out_fib6_init: 2979 fib6_gc_cleanup(); 2980 out_register_subsys: 2981 unregister_pernet_subsys(&ip6_route_net_ops); 2982 out_dst_entries: 2983 dst_entries_destroy(&ip6_dst_blackhole_ops); 2984 out_kmem_cache: 2985 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2986 goto out; 2987 } 2988 2989 void ip6_route_cleanup(void) 2990 { 2991 unregister_netdevice_notifier(&ip6_route_dev_notifier); 2992 fib6_rules_cleanup(); 2993 xfrm6_fini(); 2994 fib6_gc_cleanup(); 2995 unregister_pernet_subsys(&ip6_route_net_ops); 2996 dst_entries_destroy(&ip6_dst_blackhole_ops); 2997 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2998 } 2999