1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/xfrm.h> 58 #include <net/netevent.h> 59 #include <net/netlink.h> 60 61 #include <asm/uaccess.h> 62 63 #ifdef CONFIG_SYSCTL 64 #include <linux/sysctl.h> 65 #endif 66 67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 68 const struct in6_addr *dest); 69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 70 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 71 static unsigned int ip6_mtu(const struct dst_entry *dst); 72 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 73 static void ip6_dst_destroy(struct dst_entry *); 74 static void ip6_dst_ifdown(struct dst_entry *, 75 struct net_device *dev, int how); 76 static int ip6_dst_gc(struct dst_ops *ops); 77 78 static int ip6_pkt_discard(struct sk_buff *skb); 79 static int ip6_pkt_discard_out(struct sk_buff *skb); 80 static void ip6_link_failure(struct sk_buff *skb); 81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu); 82 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb); 83 84 #ifdef CONFIG_IPV6_ROUTE_INFO 85 static struct rt6_info *rt6_add_route_info(struct net *net, 86 const struct in6_addr *prefix, int prefixlen, 87 const struct in6_addr *gwaddr, int ifindex, 88 unsigned int pref); 89 static struct rt6_info *rt6_get_route_info(struct net *net, 90 const struct in6_addr *prefix, int prefixlen, 91 const struct in6_addr *gwaddr, int ifindex); 92 #endif 93 94 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 95 { 96 struct rt6_info *rt = (struct rt6_info *) dst; 97 struct inet_peer *peer; 98 u32 *p = NULL; 99 100 if (!(rt->dst.flags & DST_HOST)) 101 return NULL; 102 103 peer = rt6_get_peer_create(rt); 104 if (peer) { 105 u32 *old_p = __DST_METRICS_PTR(old); 106 unsigned long prev, new; 107 108 p = peer->metrics; 109 if (inet_metrics_new(peer)) 110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 111 112 new = (unsigned long) p; 113 prev = cmpxchg(&dst->_metrics, old, new); 114 115 if (prev != old) { 116 p = __DST_METRICS_PTR(prev); 117 if (prev & DST_METRICS_READ_ONLY) 118 p = NULL; 119 } 120 } 121 return p; 122 } 123 124 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 125 struct sk_buff *skb, 126 const void *daddr) 127 { 128 struct in6_addr *p = &rt->rt6i_gateway; 129 130 if (!ipv6_addr_any(p)) 131 return (const void *) p; 132 else if (skb) 133 return &ipv6_hdr(skb)->daddr; 134 return daddr; 135 } 136 137 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 138 struct sk_buff *skb, 139 const void *daddr) 140 { 141 struct rt6_info *rt = (struct rt6_info *) dst; 142 struct neighbour *n; 143 144 daddr = choose_neigh_daddr(rt, skb, daddr); 145 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr); 146 if (n) 147 return n; 148 return neigh_create(&nd_tbl, daddr, dst->dev); 149 } 150 151 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev) 152 { 153 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway); 154 if (!n) { 155 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev); 156 if (IS_ERR(n)) 157 return PTR_ERR(n); 158 } 159 rt->n = n; 160 161 return 0; 162 } 163 164 static struct dst_ops ip6_dst_ops_template = { 165 .family = AF_INET6, 166 .protocol = cpu_to_be16(ETH_P_IPV6), 167 .gc = ip6_dst_gc, 168 .gc_thresh = 1024, 169 .check = ip6_dst_check, 170 .default_advmss = ip6_default_advmss, 171 .mtu = ip6_mtu, 172 .cow_metrics = ipv6_cow_metrics, 173 .destroy = ip6_dst_destroy, 174 .ifdown = ip6_dst_ifdown, 175 .negative_advice = ip6_negative_advice, 176 .link_failure = ip6_link_failure, 177 .update_pmtu = ip6_rt_update_pmtu, 178 .redirect = rt6_do_redirect, 179 .local_out = __ip6_local_out, 180 .neigh_lookup = ip6_neigh_lookup, 181 }; 182 183 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 184 { 185 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 186 187 return mtu ? : dst->dev->mtu; 188 } 189 190 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 191 { 192 } 193 194 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sk_buff *skb) 195 { 196 } 197 198 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 199 unsigned long old) 200 { 201 return NULL; 202 } 203 204 static struct dst_ops ip6_dst_blackhole_ops = { 205 .family = AF_INET6, 206 .protocol = cpu_to_be16(ETH_P_IPV6), 207 .destroy = ip6_dst_destroy, 208 .check = ip6_dst_check, 209 .mtu = ip6_blackhole_mtu, 210 .default_advmss = ip6_default_advmss, 211 .update_pmtu = ip6_rt_blackhole_update_pmtu, 212 .redirect = ip6_rt_blackhole_redirect, 213 .cow_metrics = ip6_rt_blackhole_cow_metrics, 214 .neigh_lookup = ip6_neigh_lookup, 215 }; 216 217 static const u32 ip6_template_metrics[RTAX_MAX] = { 218 [RTAX_HOPLIMIT - 1] = 255, 219 }; 220 221 static struct rt6_info ip6_null_entry_template = { 222 .dst = { 223 .__refcnt = ATOMIC_INIT(1), 224 .__use = 1, 225 .obsolete = -1, 226 .error = -ENETUNREACH, 227 .input = ip6_pkt_discard, 228 .output = ip6_pkt_discard_out, 229 }, 230 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 231 .rt6i_protocol = RTPROT_KERNEL, 232 .rt6i_metric = ~(u32) 0, 233 .rt6i_ref = ATOMIC_INIT(1), 234 }; 235 236 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 237 238 static int ip6_pkt_prohibit(struct sk_buff *skb); 239 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 240 241 static struct rt6_info ip6_prohibit_entry_template = { 242 .dst = { 243 .__refcnt = ATOMIC_INIT(1), 244 .__use = 1, 245 .obsolete = -1, 246 .error = -EACCES, 247 .input = ip6_pkt_prohibit, 248 .output = ip6_pkt_prohibit_out, 249 }, 250 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 251 .rt6i_protocol = RTPROT_KERNEL, 252 .rt6i_metric = ~(u32) 0, 253 .rt6i_ref = ATOMIC_INIT(1), 254 }; 255 256 static struct rt6_info ip6_blk_hole_entry_template = { 257 .dst = { 258 .__refcnt = ATOMIC_INIT(1), 259 .__use = 1, 260 .obsolete = -1, 261 .error = -EINVAL, 262 .input = dst_discard, 263 .output = dst_discard, 264 }, 265 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 266 .rt6i_protocol = RTPROT_KERNEL, 267 .rt6i_metric = ~(u32) 0, 268 .rt6i_ref = ATOMIC_INIT(1), 269 }; 270 271 #endif 272 273 /* allocate dst with ip6_dst_ops */ 274 static inline struct rt6_info *ip6_dst_alloc(struct net *net, 275 struct net_device *dev, 276 int flags, 277 struct fib6_table *table) 278 { 279 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 280 0, 0, flags); 281 282 if (rt) { 283 struct dst_entry *dst = &rt->dst; 284 285 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 286 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 287 } 288 return rt; 289 } 290 291 static void ip6_dst_destroy(struct dst_entry *dst) 292 { 293 struct rt6_info *rt = (struct rt6_info *)dst; 294 struct inet6_dev *idev = rt->rt6i_idev; 295 296 if (rt->n) 297 neigh_release(rt->n); 298 299 if (!(rt->dst.flags & DST_HOST)) 300 dst_destroy_metrics_generic(dst); 301 302 if (idev) { 303 rt->rt6i_idev = NULL; 304 in6_dev_put(idev); 305 } 306 307 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from) 308 dst_release(dst->from); 309 310 if (rt6_has_peer(rt)) { 311 struct inet_peer *peer = rt6_peer_ptr(rt); 312 inet_putpeer(peer); 313 } 314 } 315 316 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0); 317 318 static u32 rt6_peer_genid(void) 319 { 320 return atomic_read(&__rt6_peer_genid); 321 } 322 323 void rt6_bind_peer(struct rt6_info *rt, int create) 324 { 325 struct inet_peer_base *base; 326 struct inet_peer *peer; 327 328 base = inetpeer_base_ptr(rt->_rt6i_peer); 329 if (!base) 330 return; 331 332 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 333 if (peer) { 334 if (!rt6_set_peer(rt, peer)) 335 inet_putpeer(peer); 336 else 337 rt->rt6i_peer_genid = rt6_peer_genid(); 338 } 339 } 340 341 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 342 int how) 343 { 344 struct rt6_info *rt = (struct rt6_info *)dst; 345 struct inet6_dev *idev = rt->rt6i_idev; 346 struct net_device *loopback_dev = 347 dev_net(dev)->loopback_dev; 348 349 if (dev != loopback_dev) { 350 if (idev && idev->dev == dev) { 351 struct inet6_dev *loopback_idev = 352 in6_dev_get(loopback_dev); 353 if (loopback_idev) { 354 rt->rt6i_idev = loopback_idev; 355 in6_dev_put(idev); 356 } 357 } 358 if (rt->n && rt->n->dev == dev) { 359 rt->n->dev = loopback_dev; 360 dev_hold(loopback_dev); 361 dev_put(dev); 362 } 363 } 364 } 365 366 static bool rt6_check_expired(const struct rt6_info *rt) 367 { 368 struct rt6_info *ort = NULL; 369 370 if (rt->rt6i_flags & RTF_EXPIRES) { 371 if (time_after(jiffies, rt->dst.expires)) 372 return true; 373 } else if (rt->dst.from) { 374 ort = (struct rt6_info *) rt->dst.from; 375 return (ort->rt6i_flags & RTF_EXPIRES) && 376 time_after(jiffies, ort->dst.expires); 377 } 378 return false; 379 } 380 381 static bool rt6_need_strict(const struct in6_addr *daddr) 382 { 383 return ipv6_addr_type(daddr) & 384 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); 385 } 386 387 /* 388 * Route lookup. Any table->tb6_lock is implied. 389 */ 390 391 static inline struct rt6_info *rt6_device_match(struct net *net, 392 struct rt6_info *rt, 393 const struct in6_addr *saddr, 394 int oif, 395 int flags) 396 { 397 struct rt6_info *local = NULL; 398 struct rt6_info *sprt; 399 400 if (!oif && ipv6_addr_any(saddr)) 401 goto out; 402 403 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 404 struct net_device *dev = sprt->dst.dev; 405 406 if (oif) { 407 if (dev->ifindex == oif) 408 return sprt; 409 if (dev->flags & IFF_LOOPBACK) { 410 if (!sprt->rt6i_idev || 411 sprt->rt6i_idev->dev->ifindex != oif) { 412 if (flags & RT6_LOOKUP_F_IFACE && oif) 413 continue; 414 if (local && (!oif || 415 local->rt6i_idev->dev->ifindex == oif)) 416 continue; 417 } 418 local = sprt; 419 } 420 } else { 421 if (ipv6_chk_addr(net, saddr, dev, 422 flags & RT6_LOOKUP_F_IFACE)) 423 return sprt; 424 } 425 } 426 427 if (oif) { 428 if (local) 429 return local; 430 431 if (flags & RT6_LOOKUP_F_IFACE) 432 return net->ipv6.ip6_null_entry; 433 } 434 out: 435 return rt; 436 } 437 438 #ifdef CONFIG_IPV6_ROUTER_PREF 439 static void rt6_probe(struct rt6_info *rt) 440 { 441 struct neighbour *neigh; 442 /* 443 * Okay, this does not seem to be appropriate 444 * for now, however, we need to check if it 445 * is really so; aka Router Reachability Probing. 446 * 447 * Router Reachability Probe MUST be rate-limited 448 * to no more than one per minute. 449 */ 450 rcu_read_lock(); 451 neigh = rt ? rt->n : NULL; 452 if (!neigh || (neigh->nud_state & NUD_VALID)) 453 goto out; 454 read_lock_bh(&neigh->lock); 455 if (!(neigh->nud_state & NUD_VALID) && 456 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 457 struct in6_addr mcaddr; 458 struct in6_addr *target; 459 460 neigh->updated = jiffies; 461 read_unlock_bh(&neigh->lock); 462 463 target = (struct in6_addr *)&neigh->primary_key; 464 addrconf_addr_solict_mult(target, &mcaddr); 465 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); 466 } else { 467 read_unlock_bh(&neigh->lock); 468 } 469 out: 470 rcu_read_unlock(); 471 } 472 #else 473 static inline void rt6_probe(struct rt6_info *rt) 474 { 475 } 476 #endif 477 478 /* 479 * Default Router Selection (RFC 2461 6.3.6) 480 */ 481 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 482 { 483 struct net_device *dev = rt->dst.dev; 484 if (!oif || dev->ifindex == oif) 485 return 2; 486 if ((dev->flags & IFF_LOOPBACK) && 487 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 488 return 1; 489 return 0; 490 } 491 492 static inline int rt6_check_neigh(struct rt6_info *rt) 493 { 494 struct neighbour *neigh; 495 int m; 496 497 rcu_read_lock(); 498 neigh = rt->n; 499 if (rt->rt6i_flags & RTF_NONEXTHOP || 500 !(rt->rt6i_flags & RTF_GATEWAY)) 501 m = 1; 502 else if (neigh) { 503 read_lock_bh(&neigh->lock); 504 if (neigh->nud_state & NUD_VALID) 505 m = 2; 506 #ifdef CONFIG_IPV6_ROUTER_PREF 507 else if (neigh->nud_state & NUD_FAILED) 508 m = 0; 509 #endif 510 else 511 m = 1; 512 read_unlock_bh(&neigh->lock); 513 } else 514 m = 0; 515 rcu_read_unlock(); 516 return m; 517 } 518 519 static int rt6_score_route(struct rt6_info *rt, int oif, 520 int strict) 521 { 522 int m, n; 523 524 m = rt6_check_dev(rt, oif); 525 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 526 return -1; 527 #ifdef CONFIG_IPV6_ROUTER_PREF 528 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 529 #endif 530 n = rt6_check_neigh(rt); 531 if (!n && (strict & RT6_LOOKUP_F_REACHABLE)) 532 return -1; 533 return m; 534 } 535 536 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 537 int *mpri, struct rt6_info *match) 538 { 539 int m; 540 541 if (rt6_check_expired(rt)) 542 goto out; 543 544 m = rt6_score_route(rt, oif, strict); 545 if (m < 0) 546 goto out; 547 548 if (m > *mpri) { 549 if (strict & RT6_LOOKUP_F_REACHABLE) 550 rt6_probe(match); 551 *mpri = m; 552 match = rt; 553 } else if (strict & RT6_LOOKUP_F_REACHABLE) { 554 rt6_probe(rt); 555 } 556 557 out: 558 return match; 559 } 560 561 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 562 struct rt6_info *rr_head, 563 u32 metric, int oif, int strict) 564 { 565 struct rt6_info *rt, *match; 566 int mpri = -1; 567 568 match = NULL; 569 for (rt = rr_head; rt && rt->rt6i_metric == metric; 570 rt = rt->dst.rt6_next) 571 match = find_match(rt, oif, strict, &mpri, match); 572 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 573 rt = rt->dst.rt6_next) 574 match = find_match(rt, oif, strict, &mpri, match); 575 576 return match; 577 } 578 579 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 580 { 581 struct rt6_info *match, *rt0; 582 struct net *net; 583 584 rt0 = fn->rr_ptr; 585 if (!rt0) 586 fn->rr_ptr = rt0 = fn->leaf; 587 588 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); 589 590 if (!match && 591 (strict & RT6_LOOKUP_F_REACHABLE)) { 592 struct rt6_info *next = rt0->dst.rt6_next; 593 594 /* no entries matched; do round-robin */ 595 if (!next || next->rt6i_metric != rt0->rt6i_metric) 596 next = fn->leaf; 597 598 if (next != rt0) 599 fn->rr_ptr = next; 600 } 601 602 net = dev_net(rt0->dst.dev); 603 return match ? match : net->ipv6.ip6_null_entry; 604 } 605 606 #ifdef CONFIG_IPV6_ROUTE_INFO 607 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 608 const struct in6_addr *gwaddr) 609 { 610 struct net *net = dev_net(dev); 611 struct route_info *rinfo = (struct route_info *) opt; 612 struct in6_addr prefix_buf, *prefix; 613 unsigned int pref; 614 unsigned long lifetime; 615 struct rt6_info *rt; 616 617 if (len < sizeof(struct route_info)) { 618 return -EINVAL; 619 } 620 621 /* Sanity check for prefix_len and length */ 622 if (rinfo->length > 3) { 623 return -EINVAL; 624 } else if (rinfo->prefix_len > 128) { 625 return -EINVAL; 626 } else if (rinfo->prefix_len > 64) { 627 if (rinfo->length < 2) { 628 return -EINVAL; 629 } 630 } else if (rinfo->prefix_len > 0) { 631 if (rinfo->length < 1) { 632 return -EINVAL; 633 } 634 } 635 636 pref = rinfo->route_pref; 637 if (pref == ICMPV6_ROUTER_PREF_INVALID) 638 return -EINVAL; 639 640 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 641 642 if (rinfo->length == 3) 643 prefix = (struct in6_addr *)rinfo->prefix; 644 else { 645 /* this function is safe */ 646 ipv6_addr_prefix(&prefix_buf, 647 (struct in6_addr *)rinfo->prefix, 648 rinfo->prefix_len); 649 prefix = &prefix_buf; 650 } 651 652 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, 653 dev->ifindex); 654 655 if (rt && !lifetime) { 656 ip6_del_rt(rt); 657 rt = NULL; 658 } 659 660 if (!rt && lifetime) 661 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 662 pref); 663 else if (rt) 664 rt->rt6i_flags = RTF_ROUTEINFO | 665 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 666 667 if (rt) { 668 if (!addrconf_finite_timeout(lifetime)) 669 rt6_clean_expires(rt); 670 else 671 rt6_set_expires(rt, jiffies + HZ * lifetime); 672 673 dst_release(&rt->dst); 674 } 675 return 0; 676 } 677 #endif 678 679 #define BACKTRACK(__net, saddr) \ 680 do { \ 681 if (rt == __net->ipv6.ip6_null_entry) { \ 682 struct fib6_node *pn; \ 683 while (1) { \ 684 if (fn->fn_flags & RTN_TL_ROOT) \ 685 goto out; \ 686 pn = fn->parent; \ 687 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 688 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 689 else \ 690 fn = pn; \ 691 if (fn->fn_flags & RTN_RTINFO) \ 692 goto restart; \ 693 } \ 694 } \ 695 } while (0) 696 697 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 698 struct fib6_table *table, 699 struct flowi6 *fl6, int flags) 700 { 701 struct fib6_node *fn; 702 struct rt6_info *rt; 703 704 read_lock_bh(&table->tb6_lock); 705 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 706 restart: 707 rt = fn->leaf; 708 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 709 BACKTRACK(net, &fl6->saddr); 710 out: 711 dst_use(&rt->dst, jiffies); 712 read_unlock_bh(&table->tb6_lock); 713 return rt; 714 715 } 716 717 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, 718 int flags) 719 { 720 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 721 } 722 EXPORT_SYMBOL_GPL(ip6_route_lookup); 723 724 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 725 const struct in6_addr *saddr, int oif, int strict) 726 { 727 struct flowi6 fl6 = { 728 .flowi6_oif = oif, 729 .daddr = *daddr, 730 }; 731 struct dst_entry *dst; 732 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 733 734 if (saddr) { 735 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 736 flags |= RT6_LOOKUP_F_HAS_SADDR; 737 } 738 739 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 740 if (dst->error == 0) 741 return (struct rt6_info *) dst; 742 743 dst_release(dst); 744 745 return NULL; 746 } 747 748 EXPORT_SYMBOL(rt6_lookup); 749 750 /* ip6_ins_rt is called with FREE table->tb6_lock. 751 It takes new route entry, the addition fails by any reason the 752 route is freed. In any case, if caller does not hold it, it may 753 be destroyed. 754 */ 755 756 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) 757 { 758 int err; 759 struct fib6_table *table; 760 761 table = rt->rt6i_table; 762 write_lock_bh(&table->tb6_lock); 763 err = fib6_add(&table->tb6_root, rt, info); 764 write_unlock_bh(&table->tb6_lock); 765 766 return err; 767 } 768 769 int ip6_ins_rt(struct rt6_info *rt) 770 { 771 struct nl_info info = { 772 .nl_net = dev_net(rt->dst.dev), 773 }; 774 return __ip6_ins_rt(rt, &info); 775 } 776 777 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 778 const struct in6_addr *daddr, 779 const struct in6_addr *saddr) 780 { 781 struct rt6_info *rt; 782 783 /* 784 * Clone the route. 785 */ 786 787 rt = ip6_rt_copy(ort, daddr); 788 789 if (rt) { 790 int attempts = !in_softirq(); 791 792 if (!(rt->rt6i_flags & RTF_GATEWAY)) { 793 if (ort->rt6i_dst.plen != 128 && 794 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 795 rt->rt6i_flags |= RTF_ANYCAST; 796 rt->rt6i_gateway = *daddr; 797 } 798 799 rt->rt6i_flags |= RTF_CACHE; 800 801 #ifdef CONFIG_IPV6_SUBTREES 802 if (rt->rt6i_src.plen && saddr) { 803 rt->rt6i_src.addr = *saddr; 804 rt->rt6i_src.plen = 128; 805 } 806 #endif 807 808 retry: 809 if (rt6_bind_neighbour(rt, rt->dst.dev)) { 810 struct net *net = dev_net(rt->dst.dev); 811 int saved_rt_min_interval = 812 net->ipv6.sysctl.ip6_rt_gc_min_interval; 813 int saved_rt_elasticity = 814 net->ipv6.sysctl.ip6_rt_gc_elasticity; 815 816 if (attempts-- > 0) { 817 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1; 818 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0; 819 820 ip6_dst_gc(&net->ipv6.ip6_dst_ops); 821 822 net->ipv6.sysctl.ip6_rt_gc_elasticity = 823 saved_rt_elasticity; 824 net->ipv6.sysctl.ip6_rt_gc_min_interval = 825 saved_rt_min_interval; 826 goto retry; 827 } 828 829 net_warn_ratelimited("Neighbour table overflow\n"); 830 dst_free(&rt->dst); 831 return NULL; 832 } 833 } 834 835 return rt; 836 } 837 838 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 839 const struct in6_addr *daddr) 840 { 841 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 842 843 if (rt) { 844 rt->rt6i_flags |= RTF_CACHE; 845 rt->n = neigh_clone(ort->n); 846 } 847 return rt; 848 } 849 850 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 851 struct flowi6 *fl6, int flags) 852 { 853 struct fib6_node *fn; 854 struct rt6_info *rt, *nrt; 855 int strict = 0; 856 int attempts = 3; 857 int err; 858 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 859 860 strict |= flags & RT6_LOOKUP_F_IFACE; 861 862 relookup: 863 read_lock_bh(&table->tb6_lock); 864 865 restart_2: 866 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 867 868 restart: 869 rt = rt6_select(fn, oif, strict | reachable); 870 871 BACKTRACK(net, &fl6->saddr); 872 if (rt == net->ipv6.ip6_null_entry || 873 rt->rt6i_flags & RTF_CACHE) 874 goto out; 875 876 dst_hold(&rt->dst); 877 read_unlock_bh(&table->tb6_lock); 878 879 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP)) 880 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 881 else if (!(rt->dst.flags & DST_HOST)) 882 nrt = rt6_alloc_clone(rt, &fl6->daddr); 883 else 884 goto out2; 885 886 dst_release(&rt->dst); 887 rt = nrt ? : net->ipv6.ip6_null_entry; 888 889 dst_hold(&rt->dst); 890 if (nrt) { 891 err = ip6_ins_rt(nrt); 892 if (!err) 893 goto out2; 894 } 895 896 if (--attempts <= 0) 897 goto out2; 898 899 /* 900 * Race condition! In the gap, when table->tb6_lock was 901 * released someone could insert this route. Relookup. 902 */ 903 dst_release(&rt->dst); 904 goto relookup; 905 906 out: 907 if (reachable) { 908 reachable = 0; 909 goto restart_2; 910 } 911 dst_hold(&rt->dst); 912 read_unlock_bh(&table->tb6_lock); 913 out2: 914 rt->dst.lastuse = jiffies; 915 rt->dst.__use++; 916 917 return rt; 918 } 919 920 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 921 struct flowi6 *fl6, int flags) 922 { 923 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 924 } 925 926 static struct dst_entry *ip6_route_input_lookup(struct net *net, 927 struct net_device *dev, 928 struct flowi6 *fl6, int flags) 929 { 930 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 931 flags |= RT6_LOOKUP_F_IFACE; 932 933 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 934 } 935 936 void ip6_route_input(struct sk_buff *skb) 937 { 938 const struct ipv6hdr *iph = ipv6_hdr(skb); 939 struct net *net = dev_net(skb->dev); 940 int flags = RT6_LOOKUP_F_HAS_SADDR; 941 struct flowi6 fl6 = { 942 .flowi6_iif = skb->dev->ifindex, 943 .daddr = iph->daddr, 944 .saddr = iph->saddr, 945 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, 946 .flowi6_mark = skb->mark, 947 .flowi6_proto = iph->nexthdr, 948 }; 949 950 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 951 } 952 953 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 954 struct flowi6 *fl6, int flags) 955 { 956 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 957 } 958 959 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 960 struct flowi6 *fl6) 961 { 962 int flags = 0; 963 964 fl6->flowi6_iif = net->loopback_dev->ifindex; 965 966 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 967 flags |= RT6_LOOKUP_F_IFACE; 968 969 if (!ipv6_addr_any(&fl6->saddr)) 970 flags |= RT6_LOOKUP_F_HAS_SADDR; 971 else if (sk) 972 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 973 974 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 975 } 976 977 EXPORT_SYMBOL(ip6_route_output); 978 979 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 980 { 981 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 982 struct dst_entry *new = NULL; 983 984 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0); 985 if (rt) { 986 new = &rt->dst; 987 988 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 989 rt6_init_peer(rt, net->ipv6.peers); 990 991 new->__use = 1; 992 new->input = dst_discard; 993 new->output = dst_discard; 994 995 if (dst_metrics_read_only(&ort->dst)) 996 new->_metrics = ort->dst._metrics; 997 else 998 dst_copy_metrics(new, &ort->dst); 999 rt->rt6i_idev = ort->rt6i_idev; 1000 if (rt->rt6i_idev) 1001 in6_dev_hold(rt->rt6i_idev); 1002 1003 rt->rt6i_gateway = ort->rt6i_gateway; 1004 rt->rt6i_flags = ort->rt6i_flags; 1005 rt6_clean_expires(rt); 1006 rt->rt6i_metric = 0; 1007 1008 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1009 #ifdef CONFIG_IPV6_SUBTREES 1010 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1011 #endif 1012 1013 dst_free(new); 1014 } 1015 1016 dst_release(dst_orig); 1017 return new ? new : ERR_PTR(-ENOMEM); 1018 } 1019 1020 /* 1021 * Destination cache support functions 1022 */ 1023 1024 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1025 { 1026 struct rt6_info *rt; 1027 1028 rt = (struct rt6_info *) dst; 1029 1030 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { 1031 if (rt->rt6i_peer_genid != rt6_peer_genid()) { 1032 if (!rt6_has_peer(rt)) 1033 rt6_bind_peer(rt, 0); 1034 rt->rt6i_peer_genid = rt6_peer_genid(); 1035 } 1036 return dst; 1037 } 1038 return NULL; 1039 } 1040 1041 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1042 { 1043 struct rt6_info *rt = (struct rt6_info *) dst; 1044 1045 if (rt) { 1046 if (rt->rt6i_flags & RTF_CACHE) { 1047 if (rt6_check_expired(rt)) { 1048 ip6_del_rt(rt); 1049 dst = NULL; 1050 } 1051 } else { 1052 dst_release(dst); 1053 dst = NULL; 1054 } 1055 } 1056 return dst; 1057 } 1058 1059 static void ip6_link_failure(struct sk_buff *skb) 1060 { 1061 struct rt6_info *rt; 1062 1063 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1064 1065 rt = (struct rt6_info *) skb_dst(skb); 1066 if (rt) { 1067 if (rt->rt6i_flags & RTF_CACHE) 1068 rt6_update_expires(rt, 0); 1069 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) 1070 rt->rt6i_node->fn_sernum = -1; 1071 } 1072 } 1073 1074 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1075 { 1076 struct rt6_info *rt6 = (struct rt6_info*)dst; 1077 1078 dst_confirm(dst); 1079 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1080 struct net *net = dev_net(dst->dev); 1081 1082 rt6->rt6i_flags |= RTF_MODIFIED; 1083 if (mtu < IPV6_MIN_MTU) { 1084 u32 features = dst_metric(dst, RTAX_FEATURES); 1085 mtu = IPV6_MIN_MTU; 1086 features |= RTAX_FEATURE_ALLFRAG; 1087 dst_metric_set(dst, RTAX_FEATURES, features); 1088 } 1089 dst_metric_set(dst, RTAX_MTU, mtu); 1090 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); 1091 } 1092 } 1093 1094 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1095 int oif, u32 mark) 1096 { 1097 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1098 struct dst_entry *dst; 1099 struct flowi6 fl6; 1100 1101 memset(&fl6, 0, sizeof(fl6)); 1102 fl6.flowi6_oif = oif; 1103 fl6.flowi6_mark = mark; 1104 fl6.flowi6_flags = 0; 1105 fl6.daddr = iph->daddr; 1106 fl6.saddr = iph->saddr; 1107 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; 1108 1109 dst = ip6_route_output(net, NULL, &fl6); 1110 if (!dst->error) 1111 ip6_rt_update_pmtu(dst, ntohl(mtu)); 1112 dst_release(dst); 1113 } 1114 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1115 1116 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1117 { 1118 ip6_update_pmtu(skb, sock_net(sk), mtu, 1119 sk->sk_bound_dev_if, sk->sk_mark); 1120 } 1121 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1122 1123 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1124 { 1125 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1126 struct dst_entry *dst; 1127 struct flowi6 fl6; 1128 1129 memset(&fl6, 0, sizeof(fl6)); 1130 fl6.flowi6_oif = oif; 1131 fl6.flowi6_mark = mark; 1132 fl6.flowi6_flags = 0; 1133 fl6.daddr = iph->daddr; 1134 fl6.saddr = iph->saddr; 1135 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; 1136 1137 dst = ip6_route_output(net, NULL, &fl6); 1138 if (!dst->error) 1139 rt6_do_redirect(dst, skb); 1140 dst_release(dst); 1141 } 1142 EXPORT_SYMBOL_GPL(ip6_redirect); 1143 1144 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1145 { 1146 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1147 } 1148 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1149 1150 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1151 { 1152 struct net_device *dev = dst->dev; 1153 unsigned int mtu = dst_mtu(dst); 1154 struct net *net = dev_net(dev); 1155 1156 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1157 1158 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1159 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1160 1161 /* 1162 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1163 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1164 * IPV6_MAXPLEN is also valid and means: "any MSS, 1165 * rely only on pmtu discovery" 1166 */ 1167 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1168 mtu = IPV6_MAXPLEN; 1169 return mtu; 1170 } 1171 1172 static unsigned int ip6_mtu(const struct dst_entry *dst) 1173 { 1174 struct inet6_dev *idev; 1175 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1176 1177 if (mtu) 1178 return mtu; 1179 1180 mtu = IPV6_MIN_MTU; 1181 1182 rcu_read_lock(); 1183 idev = __in6_dev_get(dst->dev); 1184 if (idev) 1185 mtu = idev->cnf.mtu6; 1186 rcu_read_unlock(); 1187 1188 return mtu; 1189 } 1190 1191 static struct dst_entry *icmp6_dst_gc_list; 1192 static DEFINE_SPINLOCK(icmp6_dst_lock); 1193 1194 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1195 struct neighbour *neigh, 1196 struct flowi6 *fl6) 1197 { 1198 struct dst_entry *dst; 1199 struct rt6_info *rt; 1200 struct inet6_dev *idev = in6_dev_get(dev); 1201 struct net *net = dev_net(dev); 1202 1203 if (unlikely(!idev)) 1204 return ERR_PTR(-ENODEV); 1205 1206 rt = ip6_dst_alloc(net, dev, 0, NULL); 1207 if (unlikely(!rt)) { 1208 in6_dev_put(idev); 1209 dst = ERR_PTR(-ENOMEM); 1210 goto out; 1211 } 1212 1213 if (neigh) 1214 neigh_hold(neigh); 1215 else { 1216 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr); 1217 if (IS_ERR(neigh)) { 1218 in6_dev_put(idev); 1219 dst_free(&rt->dst); 1220 return ERR_CAST(neigh); 1221 } 1222 } 1223 1224 rt->dst.flags |= DST_HOST; 1225 rt->dst.output = ip6_output; 1226 rt->n = neigh; 1227 atomic_set(&rt->dst.__refcnt, 1); 1228 rt->rt6i_dst.addr = fl6->daddr; 1229 rt->rt6i_dst.plen = 128; 1230 rt->rt6i_idev = idev; 1231 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 1232 1233 spin_lock_bh(&icmp6_dst_lock); 1234 rt->dst.next = icmp6_dst_gc_list; 1235 icmp6_dst_gc_list = &rt->dst; 1236 spin_unlock_bh(&icmp6_dst_lock); 1237 1238 fib6_force_start_gc(net); 1239 1240 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1241 1242 out: 1243 return dst; 1244 } 1245 1246 int icmp6_dst_gc(void) 1247 { 1248 struct dst_entry *dst, **pprev; 1249 int more = 0; 1250 1251 spin_lock_bh(&icmp6_dst_lock); 1252 pprev = &icmp6_dst_gc_list; 1253 1254 while ((dst = *pprev) != NULL) { 1255 if (!atomic_read(&dst->__refcnt)) { 1256 *pprev = dst->next; 1257 dst_free(dst); 1258 } else { 1259 pprev = &dst->next; 1260 ++more; 1261 } 1262 } 1263 1264 spin_unlock_bh(&icmp6_dst_lock); 1265 1266 return more; 1267 } 1268 1269 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1270 void *arg) 1271 { 1272 struct dst_entry *dst, **pprev; 1273 1274 spin_lock_bh(&icmp6_dst_lock); 1275 pprev = &icmp6_dst_gc_list; 1276 while ((dst = *pprev) != NULL) { 1277 struct rt6_info *rt = (struct rt6_info *) dst; 1278 if (func(rt, arg)) { 1279 *pprev = dst->next; 1280 dst_free(dst); 1281 } else { 1282 pprev = &dst->next; 1283 } 1284 } 1285 spin_unlock_bh(&icmp6_dst_lock); 1286 } 1287 1288 static int ip6_dst_gc(struct dst_ops *ops) 1289 { 1290 unsigned long now = jiffies; 1291 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1292 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1293 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1294 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1295 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1296 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1297 int entries; 1298 1299 entries = dst_entries_get_fast(ops); 1300 if (time_after(rt_last_gc + rt_min_interval, now) && 1301 entries <= rt_max_size) 1302 goto out; 1303 1304 net->ipv6.ip6_rt_gc_expire++; 1305 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1306 net->ipv6.ip6_rt_last_gc = now; 1307 entries = dst_entries_get_slow(ops); 1308 if (entries < ops->gc_thresh) 1309 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1310 out: 1311 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1312 return entries > rt_max_size; 1313 } 1314 1315 /* Clean host part of a prefix. Not necessary in radix tree, 1316 but results in cleaner routing tables. 1317 1318 Remove it only when all the things will work! 1319 */ 1320 1321 int ip6_dst_hoplimit(struct dst_entry *dst) 1322 { 1323 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); 1324 if (hoplimit == 0) { 1325 struct net_device *dev = dst->dev; 1326 struct inet6_dev *idev; 1327 1328 rcu_read_lock(); 1329 idev = __in6_dev_get(dev); 1330 if (idev) 1331 hoplimit = idev->cnf.hop_limit; 1332 else 1333 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit; 1334 rcu_read_unlock(); 1335 } 1336 return hoplimit; 1337 } 1338 EXPORT_SYMBOL(ip6_dst_hoplimit); 1339 1340 /* 1341 * 1342 */ 1343 1344 int ip6_route_add(struct fib6_config *cfg) 1345 { 1346 int err; 1347 struct net *net = cfg->fc_nlinfo.nl_net; 1348 struct rt6_info *rt = NULL; 1349 struct net_device *dev = NULL; 1350 struct inet6_dev *idev = NULL; 1351 struct fib6_table *table; 1352 int addr_type; 1353 1354 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1355 return -EINVAL; 1356 #ifndef CONFIG_IPV6_SUBTREES 1357 if (cfg->fc_src_len) 1358 return -EINVAL; 1359 #endif 1360 if (cfg->fc_ifindex) { 1361 err = -ENODEV; 1362 dev = dev_get_by_index(net, cfg->fc_ifindex); 1363 if (!dev) 1364 goto out; 1365 idev = in6_dev_get(dev); 1366 if (!idev) 1367 goto out; 1368 } 1369 1370 if (cfg->fc_metric == 0) 1371 cfg->fc_metric = IP6_RT_PRIO_USER; 1372 1373 err = -ENOBUFS; 1374 if (cfg->fc_nlinfo.nlh && 1375 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1376 table = fib6_get_table(net, cfg->fc_table); 1377 if (!table) { 1378 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1379 table = fib6_new_table(net, cfg->fc_table); 1380 } 1381 } else { 1382 table = fib6_new_table(net, cfg->fc_table); 1383 } 1384 1385 if (!table) 1386 goto out; 1387 1388 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); 1389 1390 if (!rt) { 1391 err = -ENOMEM; 1392 goto out; 1393 } 1394 1395 rt->dst.obsolete = -1; 1396 1397 if (cfg->fc_flags & RTF_EXPIRES) 1398 rt6_set_expires(rt, jiffies + 1399 clock_t_to_jiffies(cfg->fc_expires)); 1400 else 1401 rt6_clean_expires(rt); 1402 1403 if (cfg->fc_protocol == RTPROT_UNSPEC) 1404 cfg->fc_protocol = RTPROT_BOOT; 1405 rt->rt6i_protocol = cfg->fc_protocol; 1406 1407 addr_type = ipv6_addr_type(&cfg->fc_dst); 1408 1409 if (addr_type & IPV6_ADDR_MULTICAST) 1410 rt->dst.input = ip6_mc_input; 1411 else if (cfg->fc_flags & RTF_LOCAL) 1412 rt->dst.input = ip6_input; 1413 else 1414 rt->dst.input = ip6_forward; 1415 1416 rt->dst.output = ip6_output; 1417 1418 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1419 rt->rt6i_dst.plen = cfg->fc_dst_len; 1420 if (rt->rt6i_dst.plen == 128) 1421 rt->dst.flags |= DST_HOST; 1422 1423 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { 1424 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1425 if (!metrics) { 1426 err = -ENOMEM; 1427 goto out; 1428 } 1429 dst_init_metrics(&rt->dst, metrics, 0); 1430 } 1431 #ifdef CONFIG_IPV6_SUBTREES 1432 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1433 rt->rt6i_src.plen = cfg->fc_src_len; 1434 #endif 1435 1436 rt->rt6i_metric = cfg->fc_metric; 1437 1438 /* We cannot add true routes via loopback here, 1439 they would result in kernel looping; promote them to reject routes 1440 */ 1441 if ((cfg->fc_flags & RTF_REJECT) || 1442 (dev && (dev->flags & IFF_LOOPBACK) && 1443 !(addr_type & IPV6_ADDR_LOOPBACK) && 1444 !(cfg->fc_flags & RTF_LOCAL))) { 1445 /* hold loopback dev/idev if we haven't done so. */ 1446 if (dev != net->loopback_dev) { 1447 if (dev) { 1448 dev_put(dev); 1449 in6_dev_put(idev); 1450 } 1451 dev = net->loopback_dev; 1452 dev_hold(dev); 1453 idev = in6_dev_get(dev); 1454 if (!idev) { 1455 err = -ENODEV; 1456 goto out; 1457 } 1458 } 1459 rt->dst.output = ip6_pkt_discard_out; 1460 rt->dst.input = ip6_pkt_discard; 1461 rt->dst.error = -ENETUNREACH; 1462 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1463 goto install_route; 1464 } 1465 1466 if (cfg->fc_flags & RTF_GATEWAY) { 1467 const struct in6_addr *gw_addr; 1468 int gwa_type; 1469 1470 gw_addr = &cfg->fc_gateway; 1471 rt->rt6i_gateway = *gw_addr; 1472 gwa_type = ipv6_addr_type(gw_addr); 1473 1474 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1475 struct rt6_info *grt; 1476 1477 /* IPv6 strictly inhibits using not link-local 1478 addresses as nexthop address. 1479 Otherwise, router will not able to send redirects. 1480 It is very good, but in some (rare!) circumstances 1481 (SIT, PtP, NBMA NOARP links) it is handy to allow 1482 some exceptions. --ANK 1483 */ 1484 err = -EINVAL; 1485 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1486 goto out; 1487 1488 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1489 1490 err = -EHOSTUNREACH; 1491 if (!grt) 1492 goto out; 1493 if (dev) { 1494 if (dev != grt->dst.dev) { 1495 dst_release(&grt->dst); 1496 goto out; 1497 } 1498 } else { 1499 dev = grt->dst.dev; 1500 idev = grt->rt6i_idev; 1501 dev_hold(dev); 1502 in6_dev_hold(grt->rt6i_idev); 1503 } 1504 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1505 err = 0; 1506 dst_release(&grt->dst); 1507 1508 if (err) 1509 goto out; 1510 } 1511 err = -EINVAL; 1512 if (!dev || (dev->flags & IFF_LOOPBACK)) 1513 goto out; 1514 } 1515 1516 err = -ENODEV; 1517 if (!dev) 1518 goto out; 1519 1520 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1521 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1522 err = -EINVAL; 1523 goto out; 1524 } 1525 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1526 rt->rt6i_prefsrc.plen = 128; 1527 } else 1528 rt->rt6i_prefsrc.plen = 0; 1529 1530 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { 1531 err = rt6_bind_neighbour(rt, dev); 1532 if (err) 1533 goto out; 1534 } 1535 1536 rt->rt6i_flags = cfg->fc_flags; 1537 1538 install_route: 1539 if (cfg->fc_mx) { 1540 struct nlattr *nla; 1541 int remaining; 1542 1543 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1544 int type = nla_type(nla); 1545 1546 if (type) { 1547 if (type > RTAX_MAX) { 1548 err = -EINVAL; 1549 goto out; 1550 } 1551 1552 dst_metric_set(&rt->dst, type, nla_get_u32(nla)); 1553 } 1554 } 1555 } 1556 1557 rt->dst.dev = dev; 1558 rt->rt6i_idev = idev; 1559 rt->rt6i_table = table; 1560 1561 cfg->fc_nlinfo.nl_net = dev_net(dev); 1562 1563 return __ip6_ins_rt(rt, &cfg->fc_nlinfo); 1564 1565 out: 1566 if (dev) 1567 dev_put(dev); 1568 if (idev) 1569 in6_dev_put(idev); 1570 if (rt) 1571 dst_free(&rt->dst); 1572 return err; 1573 } 1574 1575 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1576 { 1577 int err; 1578 struct fib6_table *table; 1579 struct net *net = dev_net(rt->dst.dev); 1580 1581 if (rt == net->ipv6.ip6_null_entry) 1582 return -ENOENT; 1583 1584 table = rt->rt6i_table; 1585 write_lock_bh(&table->tb6_lock); 1586 1587 err = fib6_del(rt, info); 1588 dst_release(&rt->dst); 1589 1590 write_unlock_bh(&table->tb6_lock); 1591 1592 return err; 1593 } 1594 1595 int ip6_del_rt(struct rt6_info *rt) 1596 { 1597 struct nl_info info = { 1598 .nl_net = dev_net(rt->dst.dev), 1599 }; 1600 return __ip6_del_rt(rt, &info); 1601 } 1602 1603 static int ip6_route_del(struct fib6_config *cfg) 1604 { 1605 struct fib6_table *table; 1606 struct fib6_node *fn; 1607 struct rt6_info *rt; 1608 int err = -ESRCH; 1609 1610 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1611 if (!table) 1612 return err; 1613 1614 read_lock_bh(&table->tb6_lock); 1615 1616 fn = fib6_locate(&table->tb6_root, 1617 &cfg->fc_dst, cfg->fc_dst_len, 1618 &cfg->fc_src, cfg->fc_src_len); 1619 1620 if (fn) { 1621 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1622 if (cfg->fc_ifindex && 1623 (!rt->dst.dev || 1624 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1625 continue; 1626 if (cfg->fc_flags & RTF_GATEWAY && 1627 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1628 continue; 1629 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1630 continue; 1631 dst_hold(&rt->dst); 1632 read_unlock_bh(&table->tb6_lock); 1633 1634 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1635 } 1636 } 1637 read_unlock_bh(&table->tb6_lock); 1638 1639 return err; 1640 } 1641 1642 static void rt6_do_redirect(struct dst_entry *dst, struct sk_buff *skb) 1643 { 1644 struct net *net = dev_net(skb->dev); 1645 struct netevent_redirect netevent; 1646 struct rt6_info *rt, *nrt = NULL; 1647 const struct in6_addr *target; 1648 struct ndisc_options ndopts; 1649 const struct in6_addr *dest; 1650 struct neighbour *old_neigh; 1651 struct inet6_dev *in6_dev; 1652 struct neighbour *neigh; 1653 struct icmp6hdr *icmph; 1654 int optlen, on_link; 1655 u8 *lladdr; 1656 1657 optlen = skb->tail - skb->transport_header; 1658 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr); 1659 1660 if (optlen < 0) { 1661 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 1662 return; 1663 } 1664 1665 icmph = icmp6_hdr(skb); 1666 target = (const struct in6_addr *) (icmph + 1); 1667 dest = target + 1; 1668 1669 if (ipv6_addr_is_multicast(dest)) { 1670 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 1671 return; 1672 } 1673 1674 on_link = 0; 1675 if (ipv6_addr_equal(dest, target)) { 1676 on_link = 1; 1677 } else if (ipv6_addr_type(target) != 1678 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 1679 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 1680 return; 1681 } 1682 1683 in6_dev = __in6_dev_get(skb->dev); 1684 if (!in6_dev) 1685 return; 1686 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 1687 return; 1688 1689 /* RFC2461 8.1: 1690 * The IP source address of the Redirect MUST be the same as the current 1691 * first-hop router for the specified ICMP Destination Address. 1692 */ 1693 1694 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) { 1695 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 1696 return; 1697 } 1698 1699 lladdr = NULL; 1700 if (ndopts.nd_opts_tgt_lladdr) { 1701 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 1702 skb->dev); 1703 if (!lladdr) { 1704 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 1705 return; 1706 } 1707 } 1708 1709 rt = (struct rt6_info *) dst; 1710 if (rt == net->ipv6.ip6_null_entry) { 1711 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1712 return; 1713 } 1714 1715 /* Redirect received -> path was valid. 1716 * Look, redirects are sent only in response to data packets, 1717 * so that this nexthop apparently is reachable. --ANK 1718 */ 1719 dst_confirm(&rt->dst); 1720 1721 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); 1722 if (!neigh) 1723 return; 1724 1725 /* Duplicate redirect: silently ignore. */ 1726 old_neigh = rt->n; 1727 if (neigh == old_neigh) 1728 goto out; 1729 1730 /* 1731 * We have finally decided to accept it. 1732 */ 1733 1734 neigh_update(neigh, lladdr, NUD_STALE, 1735 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1736 NEIGH_UPDATE_F_OVERRIDE| 1737 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1738 NEIGH_UPDATE_F_ISROUTER)) 1739 ); 1740 1741 nrt = ip6_rt_copy(rt, dest); 1742 if (!nrt) 1743 goto out; 1744 1745 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1746 if (on_link) 1747 nrt->rt6i_flags &= ~RTF_GATEWAY; 1748 1749 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1750 nrt->n = neigh_clone(neigh); 1751 1752 if (ip6_ins_rt(nrt)) 1753 goto out; 1754 1755 netevent.old = &rt->dst; 1756 netevent.old_neigh = old_neigh; 1757 netevent.new = &nrt->dst; 1758 netevent.new_neigh = neigh; 1759 netevent.daddr = dest; 1760 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1761 1762 if (rt->rt6i_flags & RTF_CACHE) { 1763 rt = (struct rt6_info *) dst_clone(&rt->dst); 1764 ip6_del_rt(rt); 1765 } 1766 1767 out: 1768 neigh_release(neigh); 1769 } 1770 1771 /* 1772 * Misc support functions 1773 */ 1774 1775 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 1776 const struct in6_addr *dest) 1777 { 1778 struct net *net = dev_net(ort->dst.dev); 1779 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, 1780 ort->rt6i_table); 1781 1782 if (rt) { 1783 rt->dst.input = ort->dst.input; 1784 rt->dst.output = ort->dst.output; 1785 rt->dst.flags |= DST_HOST; 1786 1787 rt->rt6i_dst.addr = *dest; 1788 rt->rt6i_dst.plen = 128; 1789 dst_copy_metrics(&rt->dst, &ort->dst); 1790 rt->dst.error = ort->dst.error; 1791 rt->rt6i_idev = ort->rt6i_idev; 1792 if (rt->rt6i_idev) 1793 in6_dev_hold(rt->rt6i_idev); 1794 rt->dst.lastuse = jiffies; 1795 1796 rt->rt6i_gateway = ort->rt6i_gateway; 1797 rt->rt6i_flags = ort->rt6i_flags; 1798 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == 1799 (RTF_DEFAULT | RTF_ADDRCONF)) 1800 rt6_set_from(rt, ort); 1801 else 1802 rt6_clean_expires(rt); 1803 rt->rt6i_metric = 0; 1804 1805 #ifdef CONFIG_IPV6_SUBTREES 1806 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1807 #endif 1808 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1809 rt->rt6i_table = ort->rt6i_table; 1810 } 1811 return rt; 1812 } 1813 1814 #ifdef CONFIG_IPV6_ROUTE_INFO 1815 static struct rt6_info *rt6_get_route_info(struct net *net, 1816 const struct in6_addr *prefix, int prefixlen, 1817 const struct in6_addr *gwaddr, int ifindex) 1818 { 1819 struct fib6_node *fn; 1820 struct rt6_info *rt = NULL; 1821 struct fib6_table *table; 1822 1823 table = fib6_get_table(net, RT6_TABLE_INFO); 1824 if (!table) 1825 return NULL; 1826 1827 write_lock_bh(&table->tb6_lock); 1828 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1829 if (!fn) 1830 goto out; 1831 1832 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1833 if (rt->dst.dev->ifindex != ifindex) 1834 continue; 1835 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1836 continue; 1837 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1838 continue; 1839 dst_hold(&rt->dst); 1840 break; 1841 } 1842 out: 1843 write_unlock_bh(&table->tb6_lock); 1844 return rt; 1845 } 1846 1847 static struct rt6_info *rt6_add_route_info(struct net *net, 1848 const struct in6_addr *prefix, int prefixlen, 1849 const struct in6_addr *gwaddr, int ifindex, 1850 unsigned int pref) 1851 { 1852 struct fib6_config cfg = { 1853 .fc_table = RT6_TABLE_INFO, 1854 .fc_metric = IP6_RT_PRIO_USER, 1855 .fc_ifindex = ifindex, 1856 .fc_dst_len = prefixlen, 1857 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1858 RTF_UP | RTF_PREF(pref), 1859 .fc_nlinfo.pid = 0, 1860 .fc_nlinfo.nlh = NULL, 1861 .fc_nlinfo.nl_net = net, 1862 }; 1863 1864 cfg.fc_dst = *prefix; 1865 cfg.fc_gateway = *gwaddr; 1866 1867 /* We should treat it as a default route if prefix length is 0. */ 1868 if (!prefixlen) 1869 cfg.fc_flags |= RTF_DEFAULT; 1870 1871 ip6_route_add(&cfg); 1872 1873 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1874 } 1875 #endif 1876 1877 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 1878 { 1879 struct rt6_info *rt; 1880 struct fib6_table *table; 1881 1882 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1883 if (!table) 1884 return NULL; 1885 1886 write_lock_bh(&table->tb6_lock); 1887 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1888 if (dev == rt->dst.dev && 1889 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1890 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1891 break; 1892 } 1893 if (rt) 1894 dst_hold(&rt->dst); 1895 write_unlock_bh(&table->tb6_lock); 1896 return rt; 1897 } 1898 1899 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 1900 struct net_device *dev, 1901 unsigned int pref) 1902 { 1903 struct fib6_config cfg = { 1904 .fc_table = RT6_TABLE_DFLT, 1905 .fc_metric = IP6_RT_PRIO_USER, 1906 .fc_ifindex = dev->ifindex, 1907 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1908 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1909 .fc_nlinfo.pid = 0, 1910 .fc_nlinfo.nlh = NULL, 1911 .fc_nlinfo.nl_net = dev_net(dev), 1912 }; 1913 1914 cfg.fc_gateway = *gwaddr; 1915 1916 ip6_route_add(&cfg); 1917 1918 return rt6_get_dflt_router(gwaddr, dev); 1919 } 1920 1921 void rt6_purge_dflt_routers(struct net *net) 1922 { 1923 struct rt6_info *rt; 1924 struct fib6_table *table; 1925 1926 /* NOTE: Keep consistent with rt6_get_dflt_router */ 1927 table = fib6_get_table(net, RT6_TABLE_DFLT); 1928 if (!table) 1929 return; 1930 1931 restart: 1932 read_lock_bh(&table->tb6_lock); 1933 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 1934 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { 1935 dst_hold(&rt->dst); 1936 read_unlock_bh(&table->tb6_lock); 1937 ip6_del_rt(rt); 1938 goto restart; 1939 } 1940 } 1941 read_unlock_bh(&table->tb6_lock); 1942 } 1943 1944 static void rtmsg_to_fib6_config(struct net *net, 1945 struct in6_rtmsg *rtmsg, 1946 struct fib6_config *cfg) 1947 { 1948 memset(cfg, 0, sizeof(*cfg)); 1949 1950 cfg->fc_table = RT6_TABLE_MAIN; 1951 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 1952 cfg->fc_metric = rtmsg->rtmsg_metric; 1953 cfg->fc_expires = rtmsg->rtmsg_info; 1954 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 1955 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1956 cfg->fc_flags = rtmsg->rtmsg_flags; 1957 1958 cfg->fc_nlinfo.nl_net = net; 1959 1960 cfg->fc_dst = rtmsg->rtmsg_dst; 1961 cfg->fc_src = rtmsg->rtmsg_src; 1962 cfg->fc_gateway = rtmsg->rtmsg_gateway; 1963 } 1964 1965 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1966 { 1967 struct fib6_config cfg; 1968 struct in6_rtmsg rtmsg; 1969 int err; 1970 1971 switch(cmd) { 1972 case SIOCADDRT: /* Add a route */ 1973 case SIOCDELRT: /* Delete a route */ 1974 if (!capable(CAP_NET_ADMIN)) 1975 return -EPERM; 1976 err = copy_from_user(&rtmsg, arg, 1977 sizeof(struct in6_rtmsg)); 1978 if (err) 1979 return -EFAULT; 1980 1981 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1982 1983 rtnl_lock(); 1984 switch (cmd) { 1985 case SIOCADDRT: 1986 err = ip6_route_add(&cfg); 1987 break; 1988 case SIOCDELRT: 1989 err = ip6_route_del(&cfg); 1990 break; 1991 default: 1992 err = -EINVAL; 1993 } 1994 rtnl_unlock(); 1995 1996 return err; 1997 } 1998 1999 return -EINVAL; 2000 } 2001 2002 /* 2003 * Drop the packet on the floor 2004 */ 2005 2006 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2007 { 2008 int type; 2009 struct dst_entry *dst = skb_dst(skb); 2010 switch (ipstats_mib_noroutes) { 2011 case IPSTATS_MIB_INNOROUTES: 2012 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2013 if (type == IPV6_ADDR_ANY) { 2014 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2015 IPSTATS_MIB_INADDRERRORS); 2016 break; 2017 } 2018 /* FALLTHROUGH */ 2019 case IPSTATS_MIB_OUTNOROUTES: 2020 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2021 ipstats_mib_noroutes); 2022 break; 2023 } 2024 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2025 kfree_skb(skb); 2026 return 0; 2027 } 2028 2029 static int ip6_pkt_discard(struct sk_buff *skb) 2030 { 2031 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2032 } 2033 2034 static int ip6_pkt_discard_out(struct sk_buff *skb) 2035 { 2036 skb->dev = skb_dst(skb)->dev; 2037 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2038 } 2039 2040 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2041 2042 static int ip6_pkt_prohibit(struct sk_buff *skb) 2043 { 2044 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2045 } 2046 2047 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2048 { 2049 skb->dev = skb_dst(skb)->dev; 2050 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2051 } 2052 2053 #endif 2054 2055 /* 2056 * Allocate a dst for local (unicast / anycast) address. 2057 */ 2058 2059 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2060 const struct in6_addr *addr, 2061 bool anycast) 2062 { 2063 struct net *net = dev_net(idev->dev); 2064 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); 2065 int err; 2066 2067 if (!rt) { 2068 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); 2069 return ERR_PTR(-ENOMEM); 2070 } 2071 2072 in6_dev_hold(idev); 2073 2074 rt->dst.flags |= DST_HOST; 2075 rt->dst.input = ip6_input; 2076 rt->dst.output = ip6_output; 2077 rt->rt6i_idev = idev; 2078 rt->dst.obsolete = -1; 2079 2080 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2081 if (anycast) 2082 rt->rt6i_flags |= RTF_ANYCAST; 2083 else 2084 rt->rt6i_flags |= RTF_LOCAL; 2085 err = rt6_bind_neighbour(rt, rt->dst.dev); 2086 if (err) { 2087 dst_free(&rt->dst); 2088 return ERR_PTR(err); 2089 } 2090 2091 rt->rt6i_dst.addr = *addr; 2092 rt->rt6i_dst.plen = 128; 2093 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2094 2095 atomic_set(&rt->dst.__refcnt, 1); 2096 2097 return rt; 2098 } 2099 2100 int ip6_route_get_saddr(struct net *net, 2101 struct rt6_info *rt, 2102 const struct in6_addr *daddr, 2103 unsigned int prefs, 2104 struct in6_addr *saddr) 2105 { 2106 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2107 int err = 0; 2108 if (rt->rt6i_prefsrc.plen) 2109 *saddr = rt->rt6i_prefsrc.addr; 2110 else 2111 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2112 daddr, prefs, saddr); 2113 return err; 2114 } 2115 2116 /* remove deleted ip from prefsrc entries */ 2117 struct arg_dev_net_ip { 2118 struct net_device *dev; 2119 struct net *net; 2120 struct in6_addr *addr; 2121 }; 2122 2123 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2124 { 2125 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2126 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2127 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2128 2129 if (((void *)rt->dst.dev == dev || !dev) && 2130 rt != net->ipv6.ip6_null_entry && 2131 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2132 /* remove prefsrc entry */ 2133 rt->rt6i_prefsrc.plen = 0; 2134 } 2135 return 0; 2136 } 2137 2138 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2139 { 2140 struct net *net = dev_net(ifp->idev->dev); 2141 struct arg_dev_net_ip adni = { 2142 .dev = ifp->idev->dev, 2143 .net = net, 2144 .addr = &ifp->addr, 2145 }; 2146 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); 2147 } 2148 2149 struct arg_dev_net { 2150 struct net_device *dev; 2151 struct net *net; 2152 }; 2153 2154 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2155 { 2156 const struct arg_dev_net *adn = arg; 2157 const struct net_device *dev = adn->dev; 2158 2159 if ((rt->dst.dev == dev || !dev) && 2160 rt != adn->net->ipv6.ip6_null_entry) 2161 return -1; 2162 2163 return 0; 2164 } 2165 2166 void rt6_ifdown(struct net *net, struct net_device *dev) 2167 { 2168 struct arg_dev_net adn = { 2169 .dev = dev, 2170 .net = net, 2171 }; 2172 2173 fib6_clean_all(net, fib6_ifdown, 0, &adn); 2174 icmp6_clean_all(fib6_ifdown, &adn); 2175 } 2176 2177 struct rt6_mtu_change_arg { 2178 struct net_device *dev; 2179 unsigned int mtu; 2180 }; 2181 2182 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2183 { 2184 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2185 struct inet6_dev *idev; 2186 2187 /* In IPv6 pmtu discovery is not optional, 2188 so that RTAX_MTU lock cannot disable it. 2189 We still use this lock to block changes 2190 caused by addrconf/ndisc. 2191 */ 2192 2193 idev = __in6_dev_get(arg->dev); 2194 if (!idev) 2195 return 0; 2196 2197 /* For administrative MTU increase, there is no way to discover 2198 IPv6 PMTU increase, so PMTU increase should be updated here. 2199 Since RFC 1981 doesn't include administrative MTU increase 2200 update PMTU increase is a MUST. (i.e. jumbo frame) 2201 */ 2202 /* 2203 If new MTU is less than route PMTU, this new MTU will be the 2204 lowest MTU in the path, update the route PMTU to reflect PMTU 2205 decreases; if new MTU is greater than route PMTU, and the 2206 old MTU is the lowest MTU in the path, update the route PMTU 2207 to reflect the increase. In this case if the other nodes' MTU 2208 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2209 PMTU discouvery. 2210 */ 2211 if (rt->dst.dev == arg->dev && 2212 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2213 (dst_mtu(&rt->dst) >= arg->mtu || 2214 (dst_mtu(&rt->dst) < arg->mtu && 2215 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2216 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2217 } 2218 return 0; 2219 } 2220 2221 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2222 { 2223 struct rt6_mtu_change_arg arg = { 2224 .dev = dev, 2225 .mtu = mtu, 2226 }; 2227 2228 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); 2229 } 2230 2231 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2232 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2233 [RTA_OIF] = { .type = NLA_U32 }, 2234 [RTA_IIF] = { .type = NLA_U32 }, 2235 [RTA_PRIORITY] = { .type = NLA_U32 }, 2236 [RTA_METRICS] = { .type = NLA_NESTED }, 2237 }; 2238 2239 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2240 struct fib6_config *cfg) 2241 { 2242 struct rtmsg *rtm; 2243 struct nlattr *tb[RTA_MAX+1]; 2244 int err; 2245 2246 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2247 if (err < 0) 2248 goto errout; 2249 2250 err = -EINVAL; 2251 rtm = nlmsg_data(nlh); 2252 memset(cfg, 0, sizeof(*cfg)); 2253 2254 cfg->fc_table = rtm->rtm_table; 2255 cfg->fc_dst_len = rtm->rtm_dst_len; 2256 cfg->fc_src_len = rtm->rtm_src_len; 2257 cfg->fc_flags = RTF_UP; 2258 cfg->fc_protocol = rtm->rtm_protocol; 2259 2260 if (rtm->rtm_type == RTN_UNREACHABLE) 2261 cfg->fc_flags |= RTF_REJECT; 2262 2263 if (rtm->rtm_type == RTN_LOCAL) 2264 cfg->fc_flags |= RTF_LOCAL; 2265 2266 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2267 cfg->fc_nlinfo.nlh = nlh; 2268 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2269 2270 if (tb[RTA_GATEWAY]) { 2271 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2272 cfg->fc_flags |= RTF_GATEWAY; 2273 } 2274 2275 if (tb[RTA_DST]) { 2276 int plen = (rtm->rtm_dst_len + 7) >> 3; 2277 2278 if (nla_len(tb[RTA_DST]) < plen) 2279 goto errout; 2280 2281 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2282 } 2283 2284 if (tb[RTA_SRC]) { 2285 int plen = (rtm->rtm_src_len + 7) >> 3; 2286 2287 if (nla_len(tb[RTA_SRC]) < plen) 2288 goto errout; 2289 2290 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2291 } 2292 2293 if (tb[RTA_PREFSRC]) 2294 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); 2295 2296 if (tb[RTA_OIF]) 2297 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2298 2299 if (tb[RTA_PRIORITY]) 2300 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2301 2302 if (tb[RTA_METRICS]) { 2303 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2304 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2305 } 2306 2307 if (tb[RTA_TABLE]) 2308 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2309 2310 err = 0; 2311 errout: 2312 return err; 2313 } 2314 2315 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2316 { 2317 struct fib6_config cfg; 2318 int err; 2319 2320 err = rtm_to_fib6_config(skb, nlh, &cfg); 2321 if (err < 0) 2322 return err; 2323 2324 return ip6_route_del(&cfg); 2325 } 2326 2327 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2328 { 2329 struct fib6_config cfg; 2330 int err; 2331 2332 err = rtm_to_fib6_config(skb, nlh, &cfg); 2333 if (err < 0) 2334 return err; 2335 2336 return ip6_route_add(&cfg); 2337 } 2338 2339 static inline size_t rt6_nlmsg_size(void) 2340 { 2341 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2342 + nla_total_size(16) /* RTA_SRC */ 2343 + nla_total_size(16) /* RTA_DST */ 2344 + nla_total_size(16) /* RTA_GATEWAY */ 2345 + nla_total_size(16) /* RTA_PREFSRC */ 2346 + nla_total_size(4) /* RTA_TABLE */ 2347 + nla_total_size(4) /* RTA_IIF */ 2348 + nla_total_size(4) /* RTA_OIF */ 2349 + nla_total_size(4) /* RTA_PRIORITY */ 2350 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2351 + nla_total_size(sizeof(struct rta_cacheinfo)); 2352 } 2353 2354 static int rt6_fill_node(struct net *net, 2355 struct sk_buff *skb, struct rt6_info *rt, 2356 struct in6_addr *dst, struct in6_addr *src, 2357 int iif, int type, u32 pid, u32 seq, 2358 int prefix, int nowait, unsigned int flags) 2359 { 2360 struct rtmsg *rtm; 2361 struct nlmsghdr *nlh; 2362 long expires; 2363 u32 table; 2364 struct neighbour *n; 2365 2366 if (prefix) { /* user wants prefix routes only */ 2367 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2368 /* success since this is not a prefix route */ 2369 return 1; 2370 } 2371 } 2372 2373 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); 2374 if (!nlh) 2375 return -EMSGSIZE; 2376 2377 rtm = nlmsg_data(nlh); 2378 rtm->rtm_family = AF_INET6; 2379 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2380 rtm->rtm_src_len = rt->rt6i_src.plen; 2381 rtm->rtm_tos = 0; 2382 if (rt->rt6i_table) 2383 table = rt->rt6i_table->tb6_id; 2384 else 2385 table = RT6_TABLE_UNSPEC; 2386 rtm->rtm_table = table; 2387 if (nla_put_u32(skb, RTA_TABLE, table)) 2388 goto nla_put_failure; 2389 if (rt->rt6i_flags & RTF_REJECT) 2390 rtm->rtm_type = RTN_UNREACHABLE; 2391 else if (rt->rt6i_flags & RTF_LOCAL) 2392 rtm->rtm_type = RTN_LOCAL; 2393 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2394 rtm->rtm_type = RTN_LOCAL; 2395 else 2396 rtm->rtm_type = RTN_UNICAST; 2397 rtm->rtm_flags = 0; 2398 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2399 rtm->rtm_protocol = rt->rt6i_protocol; 2400 if (rt->rt6i_flags & RTF_DYNAMIC) 2401 rtm->rtm_protocol = RTPROT_REDIRECT; 2402 else if (rt->rt6i_flags & RTF_ADDRCONF) 2403 rtm->rtm_protocol = RTPROT_KERNEL; 2404 else if (rt->rt6i_flags & RTF_DEFAULT) 2405 rtm->rtm_protocol = RTPROT_RA; 2406 2407 if (rt->rt6i_flags & RTF_CACHE) 2408 rtm->rtm_flags |= RTM_F_CLONED; 2409 2410 if (dst) { 2411 if (nla_put(skb, RTA_DST, 16, dst)) 2412 goto nla_put_failure; 2413 rtm->rtm_dst_len = 128; 2414 } else if (rtm->rtm_dst_len) 2415 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) 2416 goto nla_put_failure; 2417 #ifdef CONFIG_IPV6_SUBTREES 2418 if (src) { 2419 if (nla_put(skb, RTA_SRC, 16, src)) 2420 goto nla_put_failure; 2421 rtm->rtm_src_len = 128; 2422 } else if (rtm->rtm_src_len && 2423 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) 2424 goto nla_put_failure; 2425 #endif 2426 if (iif) { 2427 #ifdef CONFIG_IPV6_MROUTE 2428 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2429 int err = ip6mr_get_route(net, skb, rtm, nowait); 2430 if (err <= 0) { 2431 if (!nowait) { 2432 if (err == 0) 2433 return 0; 2434 goto nla_put_failure; 2435 } else { 2436 if (err == -EMSGSIZE) 2437 goto nla_put_failure; 2438 } 2439 } 2440 } else 2441 #endif 2442 if (nla_put_u32(skb, RTA_IIF, iif)) 2443 goto nla_put_failure; 2444 } else if (dst) { 2445 struct in6_addr saddr_buf; 2446 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2447 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2448 goto nla_put_failure; 2449 } 2450 2451 if (rt->rt6i_prefsrc.plen) { 2452 struct in6_addr saddr_buf; 2453 saddr_buf = rt->rt6i_prefsrc.addr; 2454 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2455 goto nla_put_failure; 2456 } 2457 2458 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2459 goto nla_put_failure; 2460 2461 rcu_read_lock(); 2462 n = rt->n; 2463 if (n) { 2464 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { 2465 rcu_read_unlock(); 2466 goto nla_put_failure; 2467 } 2468 } 2469 rcu_read_unlock(); 2470 2471 if (rt->dst.dev && 2472 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2473 goto nla_put_failure; 2474 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2475 goto nla_put_failure; 2476 if (!(rt->rt6i_flags & RTF_EXPIRES)) 2477 expires = 0; 2478 else if (rt->dst.expires - jiffies < INT_MAX) 2479 expires = rt->dst.expires - jiffies; 2480 else 2481 expires = INT_MAX; 2482 2483 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2484 goto nla_put_failure; 2485 2486 return nlmsg_end(skb, nlh); 2487 2488 nla_put_failure: 2489 nlmsg_cancel(skb, nlh); 2490 return -EMSGSIZE; 2491 } 2492 2493 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2494 { 2495 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2496 int prefix; 2497 2498 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2499 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2500 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2501 } else 2502 prefix = 0; 2503 2504 return rt6_fill_node(arg->net, 2505 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2506 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, 2507 prefix, 0, NLM_F_MULTI); 2508 } 2509 2510 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) 2511 { 2512 struct net *net = sock_net(in_skb->sk); 2513 struct nlattr *tb[RTA_MAX+1]; 2514 struct rt6_info *rt; 2515 struct sk_buff *skb; 2516 struct rtmsg *rtm; 2517 struct flowi6 fl6; 2518 int err, iif = 0, oif = 0; 2519 2520 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2521 if (err < 0) 2522 goto errout; 2523 2524 err = -EINVAL; 2525 memset(&fl6, 0, sizeof(fl6)); 2526 2527 if (tb[RTA_SRC]) { 2528 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2529 goto errout; 2530 2531 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 2532 } 2533 2534 if (tb[RTA_DST]) { 2535 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2536 goto errout; 2537 2538 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 2539 } 2540 2541 if (tb[RTA_IIF]) 2542 iif = nla_get_u32(tb[RTA_IIF]); 2543 2544 if (tb[RTA_OIF]) 2545 oif = nla_get_u32(tb[RTA_OIF]); 2546 2547 if (iif) { 2548 struct net_device *dev; 2549 int flags = 0; 2550 2551 dev = __dev_get_by_index(net, iif); 2552 if (!dev) { 2553 err = -ENODEV; 2554 goto errout; 2555 } 2556 2557 fl6.flowi6_iif = iif; 2558 2559 if (!ipv6_addr_any(&fl6.saddr)) 2560 flags |= RT6_LOOKUP_F_HAS_SADDR; 2561 2562 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 2563 flags); 2564 } else { 2565 fl6.flowi6_oif = oif; 2566 2567 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 2568 } 2569 2570 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2571 if (!skb) { 2572 dst_release(&rt->dst); 2573 err = -ENOBUFS; 2574 goto errout; 2575 } 2576 2577 /* Reserve room for dummy headers, this skb can pass 2578 through good chunk of routing engine. 2579 */ 2580 skb_reset_mac_header(skb); 2581 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2582 2583 skb_dst_set(skb, &rt->dst); 2584 2585 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2586 RTM_NEWROUTE, NETLINK_CB(in_skb).pid, 2587 nlh->nlmsg_seq, 0, 0, 0); 2588 if (err < 0) { 2589 kfree_skb(skb); 2590 goto errout; 2591 } 2592 2593 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2594 errout: 2595 return err; 2596 } 2597 2598 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2599 { 2600 struct sk_buff *skb; 2601 struct net *net = info->nl_net; 2602 u32 seq; 2603 int err; 2604 2605 err = -ENOBUFS; 2606 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2607 2608 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2609 if (!skb) 2610 goto errout; 2611 2612 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2613 event, info->pid, seq, 0, 0, 0); 2614 if (err < 0) { 2615 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2616 WARN_ON(err == -EMSGSIZE); 2617 kfree_skb(skb); 2618 goto errout; 2619 } 2620 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2621 info->nlh, gfp_any()); 2622 return; 2623 errout: 2624 if (err < 0) 2625 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2626 } 2627 2628 static int ip6_route_dev_notify(struct notifier_block *this, 2629 unsigned long event, void *data) 2630 { 2631 struct net_device *dev = (struct net_device *)data; 2632 struct net *net = dev_net(dev); 2633 2634 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2635 net->ipv6.ip6_null_entry->dst.dev = dev; 2636 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2637 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2638 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2639 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2640 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2641 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2642 #endif 2643 } 2644 2645 return NOTIFY_OK; 2646 } 2647 2648 /* 2649 * /proc 2650 */ 2651 2652 #ifdef CONFIG_PROC_FS 2653 2654 struct rt6_proc_arg 2655 { 2656 char *buffer; 2657 int offset; 2658 int length; 2659 int skip; 2660 int len; 2661 }; 2662 2663 static int rt6_info_route(struct rt6_info *rt, void *p_arg) 2664 { 2665 struct seq_file *m = p_arg; 2666 struct neighbour *n; 2667 2668 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); 2669 2670 #ifdef CONFIG_IPV6_SUBTREES 2671 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); 2672 #else 2673 seq_puts(m, "00000000000000000000000000000000 00 "); 2674 #endif 2675 rcu_read_lock(); 2676 n = rt->n; 2677 if (n) { 2678 seq_printf(m, "%pi6", n->primary_key); 2679 } else { 2680 seq_puts(m, "00000000000000000000000000000000"); 2681 } 2682 rcu_read_unlock(); 2683 seq_printf(m, " %08x %08x %08x %08x %8s\n", 2684 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), 2685 rt->dst.__use, rt->rt6i_flags, 2686 rt->dst.dev ? rt->dst.dev->name : ""); 2687 return 0; 2688 } 2689 2690 static int ipv6_route_show(struct seq_file *m, void *v) 2691 { 2692 struct net *net = (struct net *)m->private; 2693 fib6_clean_all_ro(net, rt6_info_route, 0, m); 2694 return 0; 2695 } 2696 2697 static int ipv6_route_open(struct inode *inode, struct file *file) 2698 { 2699 return single_open_net(inode, file, ipv6_route_show); 2700 } 2701 2702 static const struct file_operations ipv6_route_proc_fops = { 2703 .owner = THIS_MODULE, 2704 .open = ipv6_route_open, 2705 .read = seq_read, 2706 .llseek = seq_lseek, 2707 .release = single_release_net, 2708 }; 2709 2710 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2711 { 2712 struct net *net = (struct net *)seq->private; 2713 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2714 net->ipv6.rt6_stats->fib_nodes, 2715 net->ipv6.rt6_stats->fib_route_nodes, 2716 net->ipv6.rt6_stats->fib_rt_alloc, 2717 net->ipv6.rt6_stats->fib_rt_entries, 2718 net->ipv6.rt6_stats->fib_rt_cache, 2719 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2720 net->ipv6.rt6_stats->fib_discarded_routes); 2721 2722 return 0; 2723 } 2724 2725 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2726 { 2727 return single_open_net(inode, file, rt6_stats_seq_show); 2728 } 2729 2730 static const struct file_operations rt6_stats_seq_fops = { 2731 .owner = THIS_MODULE, 2732 .open = rt6_stats_seq_open, 2733 .read = seq_read, 2734 .llseek = seq_lseek, 2735 .release = single_release_net, 2736 }; 2737 #endif /* CONFIG_PROC_FS */ 2738 2739 #ifdef CONFIG_SYSCTL 2740 2741 static 2742 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, 2743 void __user *buffer, size_t *lenp, loff_t *ppos) 2744 { 2745 struct net *net; 2746 int delay; 2747 if (!write) 2748 return -EINVAL; 2749 2750 net = (struct net *)ctl->extra1; 2751 delay = net->ipv6.sysctl.flush_delay; 2752 proc_dointvec(ctl, write, buffer, lenp, ppos); 2753 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); 2754 return 0; 2755 } 2756 2757 ctl_table ipv6_route_table_template[] = { 2758 { 2759 .procname = "flush", 2760 .data = &init_net.ipv6.sysctl.flush_delay, 2761 .maxlen = sizeof(int), 2762 .mode = 0200, 2763 .proc_handler = ipv6_sysctl_rtcache_flush 2764 }, 2765 { 2766 .procname = "gc_thresh", 2767 .data = &ip6_dst_ops_template.gc_thresh, 2768 .maxlen = sizeof(int), 2769 .mode = 0644, 2770 .proc_handler = proc_dointvec, 2771 }, 2772 { 2773 .procname = "max_size", 2774 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2775 .maxlen = sizeof(int), 2776 .mode = 0644, 2777 .proc_handler = proc_dointvec, 2778 }, 2779 { 2780 .procname = "gc_min_interval", 2781 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2782 .maxlen = sizeof(int), 2783 .mode = 0644, 2784 .proc_handler = proc_dointvec_jiffies, 2785 }, 2786 { 2787 .procname = "gc_timeout", 2788 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2789 .maxlen = sizeof(int), 2790 .mode = 0644, 2791 .proc_handler = proc_dointvec_jiffies, 2792 }, 2793 { 2794 .procname = "gc_interval", 2795 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2796 .maxlen = sizeof(int), 2797 .mode = 0644, 2798 .proc_handler = proc_dointvec_jiffies, 2799 }, 2800 { 2801 .procname = "gc_elasticity", 2802 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2803 .maxlen = sizeof(int), 2804 .mode = 0644, 2805 .proc_handler = proc_dointvec, 2806 }, 2807 { 2808 .procname = "mtu_expires", 2809 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2810 .maxlen = sizeof(int), 2811 .mode = 0644, 2812 .proc_handler = proc_dointvec_jiffies, 2813 }, 2814 { 2815 .procname = "min_adv_mss", 2816 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2817 .maxlen = sizeof(int), 2818 .mode = 0644, 2819 .proc_handler = proc_dointvec, 2820 }, 2821 { 2822 .procname = "gc_min_interval_ms", 2823 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2824 .maxlen = sizeof(int), 2825 .mode = 0644, 2826 .proc_handler = proc_dointvec_ms_jiffies, 2827 }, 2828 { } 2829 }; 2830 2831 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2832 { 2833 struct ctl_table *table; 2834 2835 table = kmemdup(ipv6_route_table_template, 2836 sizeof(ipv6_route_table_template), 2837 GFP_KERNEL); 2838 2839 if (table) { 2840 table[0].data = &net->ipv6.sysctl.flush_delay; 2841 table[0].extra1 = net; 2842 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2843 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2844 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2845 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2846 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2847 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2848 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2849 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2850 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2851 } 2852 2853 return table; 2854 } 2855 #endif 2856 2857 static int __net_init ip6_route_net_init(struct net *net) 2858 { 2859 int ret = -ENOMEM; 2860 2861 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2862 sizeof(net->ipv6.ip6_dst_ops)); 2863 2864 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2865 goto out_ip6_dst_ops; 2866 2867 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2868 sizeof(*net->ipv6.ip6_null_entry), 2869 GFP_KERNEL); 2870 if (!net->ipv6.ip6_null_entry) 2871 goto out_ip6_dst_entries; 2872 net->ipv6.ip6_null_entry->dst.path = 2873 (struct dst_entry *)net->ipv6.ip6_null_entry; 2874 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2875 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2876 ip6_template_metrics, true); 2877 2878 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2879 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2880 sizeof(*net->ipv6.ip6_prohibit_entry), 2881 GFP_KERNEL); 2882 if (!net->ipv6.ip6_prohibit_entry) 2883 goto out_ip6_null_entry; 2884 net->ipv6.ip6_prohibit_entry->dst.path = 2885 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 2886 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2887 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 2888 ip6_template_metrics, true); 2889 2890 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 2891 sizeof(*net->ipv6.ip6_blk_hole_entry), 2892 GFP_KERNEL); 2893 if (!net->ipv6.ip6_blk_hole_entry) 2894 goto out_ip6_prohibit_entry; 2895 net->ipv6.ip6_blk_hole_entry->dst.path = 2896 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 2897 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2898 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 2899 ip6_template_metrics, true); 2900 #endif 2901 2902 net->ipv6.sysctl.flush_delay = 0; 2903 net->ipv6.sysctl.ip6_rt_max_size = 4096; 2904 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 2905 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 2906 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 2907 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 2908 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 2909 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 2910 2911 net->ipv6.ip6_rt_gc_expire = 30*HZ; 2912 2913 ret = 0; 2914 out: 2915 return ret; 2916 2917 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2918 out_ip6_prohibit_entry: 2919 kfree(net->ipv6.ip6_prohibit_entry); 2920 out_ip6_null_entry: 2921 kfree(net->ipv6.ip6_null_entry); 2922 #endif 2923 out_ip6_dst_entries: 2924 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2925 out_ip6_dst_ops: 2926 goto out; 2927 } 2928 2929 static void __net_exit ip6_route_net_exit(struct net *net) 2930 { 2931 kfree(net->ipv6.ip6_null_entry); 2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2933 kfree(net->ipv6.ip6_prohibit_entry); 2934 kfree(net->ipv6.ip6_blk_hole_entry); 2935 #endif 2936 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 2937 } 2938 2939 static int __net_init ip6_route_net_init_late(struct net *net) 2940 { 2941 #ifdef CONFIG_PROC_FS 2942 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); 2943 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); 2944 #endif 2945 return 0; 2946 } 2947 2948 static void __net_exit ip6_route_net_exit_late(struct net *net) 2949 { 2950 #ifdef CONFIG_PROC_FS 2951 proc_net_remove(net, "ipv6_route"); 2952 proc_net_remove(net, "rt6_stats"); 2953 #endif 2954 } 2955 2956 static struct pernet_operations ip6_route_net_ops = { 2957 .init = ip6_route_net_init, 2958 .exit = ip6_route_net_exit, 2959 }; 2960 2961 static int __net_init ipv6_inetpeer_init(struct net *net) 2962 { 2963 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 2964 2965 if (!bp) 2966 return -ENOMEM; 2967 inet_peer_base_init(bp); 2968 net->ipv6.peers = bp; 2969 return 0; 2970 } 2971 2972 static void __net_exit ipv6_inetpeer_exit(struct net *net) 2973 { 2974 struct inet_peer_base *bp = net->ipv6.peers; 2975 2976 net->ipv6.peers = NULL; 2977 inetpeer_invalidate_tree(bp); 2978 kfree(bp); 2979 } 2980 2981 static struct pernet_operations ipv6_inetpeer_ops = { 2982 .init = ipv6_inetpeer_init, 2983 .exit = ipv6_inetpeer_exit, 2984 }; 2985 2986 static struct pernet_operations ip6_route_net_late_ops = { 2987 .init = ip6_route_net_init_late, 2988 .exit = ip6_route_net_exit_late, 2989 }; 2990 2991 static struct notifier_block ip6_route_dev_notifier = { 2992 .notifier_call = ip6_route_dev_notify, 2993 .priority = 0, 2994 }; 2995 2996 int __init ip6_route_init(void) 2997 { 2998 int ret; 2999 3000 ret = -ENOMEM; 3001 ip6_dst_ops_template.kmem_cachep = 3002 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3003 SLAB_HWCACHE_ALIGN, NULL); 3004 if (!ip6_dst_ops_template.kmem_cachep) 3005 goto out; 3006 3007 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3008 if (ret) 3009 goto out_kmem_cache; 3010 3011 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3012 if (ret) 3013 goto out_dst_entries; 3014 3015 ret = register_pernet_subsys(&ip6_route_net_ops); 3016 if (ret) 3017 goto out_register_inetpeer; 3018 3019 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3020 3021 /* Registering of the loopback is done before this portion of code, 3022 * the loopback reference in rt6_info will not be taken, do it 3023 * manually for init_net */ 3024 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3025 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3026 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3027 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3028 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3029 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3030 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3031 #endif 3032 ret = fib6_init(); 3033 if (ret) 3034 goto out_register_subsys; 3035 3036 ret = xfrm6_init(); 3037 if (ret) 3038 goto out_fib6_init; 3039 3040 ret = fib6_rules_init(); 3041 if (ret) 3042 goto xfrm6_init; 3043 3044 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3045 if (ret) 3046 goto fib6_rules_init; 3047 3048 ret = -ENOBUFS; 3049 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3050 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3051 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3052 goto out_register_late_subsys; 3053 3054 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3055 if (ret) 3056 goto out_register_late_subsys; 3057 3058 out: 3059 return ret; 3060 3061 out_register_late_subsys: 3062 unregister_pernet_subsys(&ip6_route_net_late_ops); 3063 fib6_rules_init: 3064 fib6_rules_cleanup(); 3065 xfrm6_init: 3066 xfrm6_fini(); 3067 out_fib6_init: 3068 fib6_gc_cleanup(); 3069 out_register_subsys: 3070 unregister_pernet_subsys(&ip6_route_net_ops); 3071 out_register_inetpeer: 3072 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3073 out_dst_entries: 3074 dst_entries_destroy(&ip6_dst_blackhole_ops); 3075 out_kmem_cache: 3076 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3077 goto out; 3078 } 3079 3080 void ip6_route_cleanup(void) 3081 { 3082 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3083 unregister_pernet_subsys(&ip6_route_net_late_ops); 3084 fib6_rules_cleanup(); 3085 xfrm6_fini(); 3086 fib6_gc_cleanup(); 3087 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3088 unregister_pernet_subsys(&ip6_route_net_ops); 3089 dst_entries_destroy(&ip6_dst_blackhole_ops); 3090 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3091 } 3092