1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/xfrm.h> 58 #include <net/netevent.h> 59 #include <net/netlink.h> 60 #include <net/nexthop.h> 61 62 #include <asm/uaccess.h> 63 64 #ifdef CONFIG_SYSCTL 65 #include <linux/sysctl.h> 66 #endif 67 68 enum rt6_nud_state { 69 RT6_NUD_FAIL_HARD = -3, 70 RT6_NUD_FAIL_PROBE = -2, 71 RT6_NUD_FAIL_DO_RR = -1, 72 RT6_NUD_SUCCEED = 1 73 }; 74 75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 76 const struct in6_addr *dest); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 79 static unsigned int ip6_mtu(const struct dst_entry *dst); 80 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 81 static void ip6_dst_destroy(struct dst_entry *); 82 static void ip6_dst_ifdown(struct dst_entry *, 83 struct net_device *dev, int how); 84 static int ip6_dst_gc(struct dst_ops *ops); 85 86 static int ip6_pkt_discard(struct sk_buff *skb); 87 static int ip6_pkt_discard_out(struct sk_buff *skb); 88 static int ip6_pkt_prohibit(struct sk_buff *skb); 89 static int ip6_pkt_prohibit_out(struct sk_buff *skb); 90 static void ip6_link_failure(struct sk_buff *skb); 91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 92 struct sk_buff *skb, u32 mtu); 93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 94 struct sk_buff *skb); 95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 96 97 #ifdef CONFIG_IPV6_ROUTE_INFO 98 static struct rt6_info *rt6_add_route_info(struct net *net, 99 const struct in6_addr *prefix, int prefixlen, 100 const struct in6_addr *gwaddr, int ifindex, 101 unsigned int pref); 102 static struct rt6_info *rt6_get_route_info(struct net *net, 103 const struct in6_addr *prefix, int prefixlen, 104 const struct in6_addr *gwaddr, int ifindex); 105 #endif 106 107 static void rt6_bind_peer(struct rt6_info *rt, int create) 108 { 109 struct inet_peer_base *base; 110 struct inet_peer *peer; 111 112 base = inetpeer_base_ptr(rt->_rt6i_peer); 113 if (!base) 114 return; 115 116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); 117 if (peer) { 118 if (!rt6_set_peer(rt, peer)) 119 inet_putpeer(peer); 120 } 121 } 122 123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) 124 { 125 if (rt6_has_peer(rt)) 126 return rt6_peer_ptr(rt); 127 128 rt6_bind_peer(rt, create); 129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); 130 } 131 132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) 133 { 134 return __rt6_get_peer(rt, 1); 135 } 136 137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 138 { 139 struct rt6_info *rt = (struct rt6_info *) dst; 140 struct inet_peer *peer; 141 u32 *p = NULL; 142 143 if (!(rt->dst.flags & DST_HOST)) 144 return NULL; 145 146 peer = rt6_get_peer_create(rt); 147 if (peer) { 148 u32 *old_p = __DST_METRICS_PTR(old); 149 unsigned long prev, new; 150 151 p = peer->metrics; 152 if (inet_metrics_new(peer) || 153 (old & DST_METRICS_FORCE_OVERWRITE)) 154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX); 155 156 new = (unsigned long) p; 157 prev = cmpxchg(&dst->_metrics, old, new); 158 159 if (prev != old) { 160 p = __DST_METRICS_PTR(prev); 161 if (prev & DST_METRICS_READ_ONLY) 162 p = NULL; 163 } 164 } 165 return p; 166 } 167 168 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 169 struct sk_buff *skb, 170 const void *daddr) 171 { 172 struct in6_addr *p = &rt->rt6i_gateway; 173 174 if (!ipv6_addr_any(p)) 175 return (const void *) p; 176 else if (skb) 177 return &ipv6_hdr(skb)->daddr; 178 return daddr; 179 } 180 181 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 182 struct sk_buff *skb, 183 const void *daddr) 184 { 185 struct rt6_info *rt = (struct rt6_info *) dst; 186 struct neighbour *n; 187 188 daddr = choose_neigh_daddr(rt, skb, daddr); 189 n = __ipv6_neigh_lookup(dst->dev, daddr); 190 if (n) 191 return n; 192 return neigh_create(&nd_tbl, daddr, dst->dev); 193 } 194 195 static struct dst_ops ip6_dst_ops_template = { 196 .family = AF_INET6, 197 .protocol = cpu_to_be16(ETH_P_IPV6), 198 .gc = ip6_dst_gc, 199 .gc_thresh = 1024, 200 .check = ip6_dst_check, 201 .default_advmss = ip6_default_advmss, 202 .mtu = ip6_mtu, 203 .cow_metrics = ipv6_cow_metrics, 204 .destroy = ip6_dst_destroy, 205 .ifdown = ip6_dst_ifdown, 206 .negative_advice = ip6_negative_advice, 207 .link_failure = ip6_link_failure, 208 .update_pmtu = ip6_rt_update_pmtu, 209 .redirect = rt6_do_redirect, 210 .local_out = __ip6_local_out, 211 .neigh_lookup = ip6_neigh_lookup, 212 }; 213 214 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 215 { 216 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 217 218 return mtu ? : dst->dev->mtu; 219 } 220 221 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 222 struct sk_buff *skb, u32 mtu) 223 { 224 } 225 226 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 227 struct sk_buff *skb) 228 { 229 } 230 231 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 232 unsigned long old) 233 { 234 return NULL; 235 } 236 237 static struct dst_ops ip6_dst_blackhole_ops = { 238 .family = AF_INET6, 239 .protocol = cpu_to_be16(ETH_P_IPV6), 240 .destroy = ip6_dst_destroy, 241 .check = ip6_dst_check, 242 .mtu = ip6_blackhole_mtu, 243 .default_advmss = ip6_default_advmss, 244 .update_pmtu = ip6_rt_blackhole_update_pmtu, 245 .redirect = ip6_rt_blackhole_redirect, 246 .cow_metrics = ip6_rt_blackhole_cow_metrics, 247 .neigh_lookup = ip6_neigh_lookup, 248 }; 249 250 static const u32 ip6_template_metrics[RTAX_MAX] = { 251 [RTAX_HOPLIMIT - 1] = 0, 252 }; 253 254 static const struct rt6_info ip6_null_entry_template = { 255 .dst = { 256 .__refcnt = ATOMIC_INIT(1), 257 .__use = 1, 258 .obsolete = DST_OBSOLETE_FORCE_CHK, 259 .error = -ENETUNREACH, 260 .input = ip6_pkt_discard, 261 .output = ip6_pkt_discard_out, 262 }, 263 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 264 .rt6i_protocol = RTPROT_KERNEL, 265 .rt6i_metric = ~(u32) 0, 266 .rt6i_ref = ATOMIC_INIT(1), 267 }; 268 269 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 270 271 static const struct rt6_info ip6_prohibit_entry_template = { 272 .dst = { 273 .__refcnt = ATOMIC_INIT(1), 274 .__use = 1, 275 .obsolete = DST_OBSOLETE_FORCE_CHK, 276 .error = -EACCES, 277 .input = ip6_pkt_prohibit, 278 .output = ip6_pkt_prohibit_out, 279 }, 280 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 281 .rt6i_protocol = RTPROT_KERNEL, 282 .rt6i_metric = ~(u32) 0, 283 .rt6i_ref = ATOMIC_INIT(1), 284 }; 285 286 static const struct rt6_info ip6_blk_hole_entry_template = { 287 .dst = { 288 .__refcnt = ATOMIC_INIT(1), 289 .__use = 1, 290 .obsolete = DST_OBSOLETE_FORCE_CHK, 291 .error = -EINVAL, 292 .input = dst_discard, 293 .output = dst_discard, 294 }, 295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 296 .rt6i_protocol = RTPROT_KERNEL, 297 .rt6i_metric = ~(u32) 0, 298 .rt6i_ref = ATOMIC_INIT(1), 299 }; 300 301 #endif 302 303 /* allocate dst with ip6_dst_ops */ 304 static inline struct rt6_info *ip6_dst_alloc(struct net *net, 305 struct net_device *dev, 306 int flags, 307 struct fib6_table *table) 308 { 309 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 310 0, DST_OBSOLETE_FORCE_CHK, flags); 311 312 if (rt) { 313 struct dst_entry *dst = &rt->dst; 314 315 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 316 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); 317 rt->rt6i_genid = rt_genid_ipv6(net); 318 INIT_LIST_HEAD(&rt->rt6i_siblings); 319 } 320 return rt; 321 } 322 323 static void ip6_dst_destroy(struct dst_entry *dst) 324 { 325 struct rt6_info *rt = (struct rt6_info *)dst; 326 struct inet6_dev *idev = rt->rt6i_idev; 327 struct dst_entry *from = dst->from; 328 329 if (!(rt->dst.flags & DST_HOST)) 330 dst_destroy_metrics_generic(dst); 331 332 if (idev) { 333 rt->rt6i_idev = NULL; 334 in6_dev_put(idev); 335 } 336 337 dst->from = NULL; 338 dst_release(from); 339 340 if (rt6_has_peer(rt)) { 341 struct inet_peer *peer = rt6_peer_ptr(rt); 342 inet_putpeer(peer); 343 } 344 } 345 346 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 347 int how) 348 { 349 struct rt6_info *rt = (struct rt6_info *)dst; 350 struct inet6_dev *idev = rt->rt6i_idev; 351 struct net_device *loopback_dev = 352 dev_net(dev)->loopback_dev; 353 354 if (dev != loopback_dev) { 355 if (idev && idev->dev == dev) { 356 struct inet6_dev *loopback_idev = 357 in6_dev_get(loopback_dev); 358 if (loopback_idev) { 359 rt->rt6i_idev = loopback_idev; 360 in6_dev_put(idev); 361 } 362 } 363 } 364 } 365 366 static bool rt6_check_expired(const struct rt6_info *rt) 367 { 368 if (rt->rt6i_flags & RTF_EXPIRES) { 369 if (time_after(jiffies, rt->dst.expires)) 370 return true; 371 } else if (rt->dst.from) { 372 return rt6_check_expired((struct rt6_info *) rt->dst.from); 373 } 374 return false; 375 } 376 377 /* Multipath route selection: 378 * Hash based function using packet header and flowlabel. 379 * Adapted from fib_info_hashfn() 380 */ 381 static int rt6_info_hash_nhsfn(unsigned int candidate_count, 382 const struct flowi6 *fl6) 383 { 384 unsigned int val = fl6->flowi6_proto; 385 386 val ^= ipv6_addr_hash(&fl6->daddr); 387 val ^= ipv6_addr_hash(&fl6->saddr); 388 389 /* Work only if this not encapsulated */ 390 switch (fl6->flowi6_proto) { 391 case IPPROTO_UDP: 392 case IPPROTO_TCP: 393 case IPPROTO_SCTP: 394 val ^= (__force u16)fl6->fl6_sport; 395 val ^= (__force u16)fl6->fl6_dport; 396 break; 397 398 case IPPROTO_ICMPV6: 399 val ^= (__force u16)fl6->fl6_icmp_type; 400 val ^= (__force u16)fl6->fl6_icmp_code; 401 break; 402 } 403 /* RFC6438 recommands to use flowlabel */ 404 val ^= (__force u32)fl6->flowlabel; 405 406 /* Perhaps, we need to tune, this function? */ 407 val = val ^ (val >> 7) ^ (val >> 12); 408 return val % candidate_count; 409 } 410 411 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 412 struct flowi6 *fl6, int oif, 413 int strict) 414 { 415 struct rt6_info *sibling, *next_sibling; 416 int route_choosen; 417 418 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); 419 /* Don't change the route, if route_choosen == 0 420 * (siblings does not include ourself) 421 */ 422 if (route_choosen) 423 list_for_each_entry_safe(sibling, next_sibling, 424 &match->rt6i_siblings, rt6i_siblings) { 425 route_choosen--; 426 if (route_choosen == 0) { 427 if (rt6_score_route(sibling, oif, strict) < 0) 428 break; 429 match = sibling; 430 break; 431 } 432 } 433 return match; 434 } 435 436 /* 437 * Route lookup. Any table->tb6_lock is implied. 438 */ 439 440 static inline struct rt6_info *rt6_device_match(struct net *net, 441 struct rt6_info *rt, 442 const struct in6_addr *saddr, 443 int oif, 444 int flags) 445 { 446 struct rt6_info *local = NULL; 447 struct rt6_info *sprt; 448 449 if (!oif && ipv6_addr_any(saddr)) 450 goto out; 451 452 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 453 struct net_device *dev = sprt->dst.dev; 454 455 if (oif) { 456 if (dev->ifindex == oif) 457 return sprt; 458 if (dev->flags & IFF_LOOPBACK) { 459 if (!sprt->rt6i_idev || 460 sprt->rt6i_idev->dev->ifindex != oif) { 461 if (flags & RT6_LOOKUP_F_IFACE && oif) 462 continue; 463 if (local && (!oif || 464 local->rt6i_idev->dev->ifindex == oif)) 465 continue; 466 } 467 local = sprt; 468 } 469 } else { 470 if (ipv6_chk_addr(net, saddr, dev, 471 flags & RT6_LOOKUP_F_IFACE)) 472 return sprt; 473 } 474 } 475 476 if (oif) { 477 if (local) 478 return local; 479 480 if (flags & RT6_LOOKUP_F_IFACE) 481 return net->ipv6.ip6_null_entry; 482 } 483 out: 484 return rt; 485 } 486 487 #ifdef CONFIG_IPV6_ROUTER_PREF 488 struct __rt6_probe_work { 489 struct work_struct work; 490 struct in6_addr target; 491 struct net_device *dev; 492 }; 493 494 static void rt6_probe_deferred(struct work_struct *w) 495 { 496 struct in6_addr mcaddr; 497 struct __rt6_probe_work *work = 498 container_of(w, struct __rt6_probe_work, work); 499 500 addrconf_addr_solict_mult(&work->target, &mcaddr); 501 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); 502 dev_put(work->dev); 503 kfree(w); 504 } 505 506 static void rt6_probe(struct rt6_info *rt) 507 { 508 struct neighbour *neigh; 509 /* 510 * Okay, this does not seem to be appropriate 511 * for now, however, we need to check if it 512 * is really so; aka Router Reachability Probing. 513 * 514 * Router Reachability Probe MUST be rate-limited 515 * to no more than one per minute. 516 */ 517 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 518 return; 519 rcu_read_lock_bh(); 520 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 521 if (neigh) { 522 write_lock(&neigh->lock); 523 if (neigh->nud_state & NUD_VALID) 524 goto out; 525 } 526 527 if (!neigh || 528 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { 529 struct __rt6_probe_work *work; 530 531 work = kmalloc(sizeof(*work), GFP_ATOMIC); 532 533 if (neigh && work) 534 __neigh_set_probe_once(neigh); 535 536 if (neigh) 537 write_unlock(&neigh->lock); 538 539 if (work) { 540 INIT_WORK(&work->work, rt6_probe_deferred); 541 work->target = rt->rt6i_gateway; 542 dev_hold(rt->dst.dev); 543 work->dev = rt->dst.dev; 544 schedule_work(&work->work); 545 } 546 } else { 547 out: 548 write_unlock(&neigh->lock); 549 } 550 rcu_read_unlock_bh(); 551 } 552 #else 553 static inline void rt6_probe(struct rt6_info *rt) 554 { 555 } 556 #endif 557 558 /* 559 * Default Router Selection (RFC 2461 6.3.6) 560 */ 561 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 562 { 563 struct net_device *dev = rt->dst.dev; 564 if (!oif || dev->ifindex == oif) 565 return 2; 566 if ((dev->flags & IFF_LOOPBACK) && 567 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 568 return 1; 569 return 0; 570 } 571 572 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 573 { 574 struct neighbour *neigh; 575 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 576 577 if (rt->rt6i_flags & RTF_NONEXTHOP || 578 !(rt->rt6i_flags & RTF_GATEWAY)) 579 return RT6_NUD_SUCCEED; 580 581 rcu_read_lock_bh(); 582 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 583 if (neigh) { 584 read_lock(&neigh->lock); 585 if (neigh->nud_state & NUD_VALID) 586 ret = RT6_NUD_SUCCEED; 587 #ifdef CONFIG_IPV6_ROUTER_PREF 588 else if (!(neigh->nud_state & NUD_FAILED)) 589 ret = RT6_NUD_SUCCEED; 590 else 591 ret = RT6_NUD_FAIL_PROBE; 592 #endif 593 read_unlock(&neigh->lock); 594 } else { 595 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 596 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 597 } 598 rcu_read_unlock_bh(); 599 600 return ret; 601 } 602 603 static int rt6_score_route(struct rt6_info *rt, int oif, 604 int strict) 605 { 606 int m; 607 608 m = rt6_check_dev(rt, oif); 609 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 610 return RT6_NUD_FAIL_HARD; 611 #ifdef CONFIG_IPV6_ROUTER_PREF 612 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 613 #endif 614 if (strict & RT6_LOOKUP_F_REACHABLE) { 615 int n = rt6_check_neigh(rt); 616 if (n < 0) 617 return n; 618 } 619 return m; 620 } 621 622 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 623 int *mpri, struct rt6_info *match, 624 bool *do_rr) 625 { 626 int m; 627 bool match_do_rr = false; 628 629 if (rt6_check_expired(rt)) 630 goto out; 631 632 m = rt6_score_route(rt, oif, strict); 633 if (m == RT6_NUD_FAIL_DO_RR) { 634 match_do_rr = true; 635 m = 0; /* lowest valid score */ 636 } else if (m == RT6_NUD_FAIL_HARD) { 637 goto out; 638 } 639 640 if (strict & RT6_LOOKUP_F_REACHABLE) 641 rt6_probe(rt); 642 643 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 644 if (m > *mpri) { 645 *do_rr = match_do_rr; 646 *mpri = m; 647 match = rt; 648 } 649 out: 650 return match; 651 } 652 653 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 654 struct rt6_info *rr_head, 655 u32 metric, int oif, int strict, 656 bool *do_rr) 657 { 658 struct rt6_info *rt, *match; 659 int mpri = -1; 660 661 match = NULL; 662 for (rt = rr_head; rt && rt->rt6i_metric == metric; 663 rt = rt->dst.rt6_next) 664 match = find_match(rt, oif, strict, &mpri, match, do_rr); 665 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; 666 rt = rt->dst.rt6_next) 667 match = find_match(rt, oif, strict, &mpri, match, do_rr); 668 669 return match; 670 } 671 672 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 673 { 674 struct rt6_info *match, *rt0; 675 struct net *net; 676 bool do_rr = false; 677 678 rt0 = fn->rr_ptr; 679 if (!rt0) 680 fn->rr_ptr = rt0 = fn->leaf; 681 682 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 683 &do_rr); 684 685 if (do_rr) { 686 struct rt6_info *next = rt0->dst.rt6_next; 687 688 /* no entries matched; do round-robin */ 689 if (!next || next->rt6i_metric != rt0->rt6i_metric) 690 next = fn->leaf; 691 692 if (next != rt0) 693 fn->rr_ptr = next; 694 } 695 696 net = dev_net(rt0->dst.dev); 697 return match ? match : net->ipv6.ip6_null_entry; 698 } 699 700 #ifdef CONFIG_IPV6_ROUTE_INFO 701 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 702 const struct in6_addr *gwaddr) 703 { 704 struct net *net = dev_net(dev); 705 struct route_info *rinfo = (struct route_info *) opt; 706 struct in6_addr prefix_buf, *prefix; 707 unsigned int pref; 708 unsigned long lifetime; 709 struct rt6_info *rt; 710 711 if (len < sizeof(struct route_info)) { 712 return -EINVAL; 713 } 714 715 /* Sanity check for prefix_len and length */ 716 if (rinfo->length > 3) { 717 return -EINVAL; 718 } else if (rinfo->prefix_len > 128) { 719 return -EINVAL; 720 } else if (rinfo->prefix_len > 64) { 721 if (rinfo->length < 2) { 722 return -EINVAL; 723 } 724 } else if (rinfo->prefix_len > 0) { 725 if (rinfo->length < 1) { 726 return -EINVAL; 727 } 728 } 729 730 pref = rinfo->route_pref; 731 if (pref == ICMPV6_ROUTER_PREF_INVALID) 732 return -EINVAL; 733 734 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 735 736 if (rinfo->length == 3) 737 prefix = (struct in6_addr *)rinfo->prefix; 738 else { 739 /* this function is safe */ 740 ipv6_addr_prefix(&prefix_buf, 741 (struct in6_addr *)rinfo->prefix, 742 rinfo->prefix_len); 743 prefix = &prefix_buf; 744 } 745 746 if (rinfo->prefix_len == 0) 747 rt = rt6_get_dflt_router(gwaddr, dev); 748 else 749 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 750 gwaddr, dev->ifindex); 751 752 if (rt && !lifetime) { 753 ip6_del_rt(rt); 754 rt = NULL; 755 } 756 757 if (!rt && lifetime) 758 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 759 pref); 760 else if (rt) 761 rt->rt6i_flags = RTF_ROUTEINFO | 762 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 763 764 if (rt) { 765 if (!addrconf_finite_timeout(lifetime)) 766 rt6_clean_expires(rt); 767 else 768 rt6_set_expires(rt, jiffies + HZ * lifetime); 769 770 ip6_rt_put(rt); 771 } 772 return 0; 773 } 774 #endif 775 776 #define BACKTRACK(__net, saddr) \ 777 do { \ 778 if (rt == __net->ipv6.ip6_null_entry) { \ 779 struct fib6_node *pn; \ 780 while (1) { \ 781 if (fn->fn_flags & RTN_TL_ROOT) \ 782 goto out; \ 783 pn = fn->parent; \ 784 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ 785 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \ 786 else \ 787 fn = pn; \ 788 if (fn->fn_flags & RTN_RTINFO) \ 789 goto restart; \ 790 } \ 791 } \ 792 } while (0) 793 794 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 795 struct fib6_table *table, 796 struct flowi6 *fl6, int flags) 797 { 798 struct fib6_node *fn; 799 struct rt6_info *rt; 800 801 read_lock_bh(&table->tb6_lock); 802 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 803 restart: 804 rt = fn->leaf; 805 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 806 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 807 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 808 BACKTRACK(net, &fl6->saddr); 809 out: 810 dst_use(&rt->dst, jiffies); 811 read_unlock_bh(&table->tb6_lock); 812 return rt; 813 814 } 815 816 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, 817 int flags) 818 { 819 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 820 } 821 EXPORT_SYMBOL_GPL(ip6_route_lookup); 822 823 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 824 const struct in6_addr *saddr, int oif, int strict) 825 { 826 struct flowi6 fl6 = { 827 .flowi6_oif = oif, 828 .daddr = *daddr, 829 }; 830 struct dst_entry *dst; 831 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 832 833 if (saddr) { 834 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 835 flags |= RT6_LOOKUP_F_HAS_SADDR; 836 } 837 838 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 839 if (dst->error == 0) 840 return (struct rt6_info *) dst; 841 842 dst_release(dst); 843 844 return NULL; 845 } 846 847 EXPORT_SYMBOL(rt6_lookup); 848 849 /* ip6_ins_rt is called with FREE table->tb6_lock. 850 It takes new route entry, the addition fails by any reason the 851 route is freed. In any case, if caller does not hold it, it may 852 be destroyed. 853 */ 854 855 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 856 struct nlattr *mx, int mx_len) 857 { 858 int err; 859 struct fib6_table *table; 860 861 table = rt->rt6i_table; 862 write_lock_bh(&table->tb6_lock); 863 err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); 864 write_unlock_bh(&table->tb6_lock); 865 866 return err; 867 } 868 869 int ip6_ins_rt(struct rt6_info *rt) 870 { 871 struct nl_info info = { 872 .nl_net = dev_net(rt->dst.dev), 873 }; 874 return __ip6_ins_rt(rt, &info, NULL, 0); 875 } 876 877 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, 878 const struct in6_addr *daddr, 879 const struct in6_addr *saddr) 880 { 881 struct rt6_info *rt; 882 883 /* 884 * Clone the route. 885 */ 886 887 rt = ip6_rt_copy(ort, daddr); 888 889 if (rt) { 890 if (ort->rt6i_dst.plen != 128 && 891 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 892 rt->rt6i_flags |= RTF_ANYCAST; 893 894 rt->rt6i_flags |= RTF_CACHE; 895 896 #ifdef CONFIG_IPV6_SUBTREES 897 if (rt->rt6i_src.plen && saddr) { 898 rt->rt6i_src.addr = *saddr; 899 rt->rt6i_src.plen = 128; 900 } 901 #endif 902 } 903 904 return rt; 905 } 906 907 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, 908 const struct in6_addr *daddr) 909 { 910 struct rt6_info *rt = ip6_rt_copy(ort, daddr); 911 912 if (rt) 913 rt->rt6i_flags |= RTF_CACHE; 914 return rt; 915 } 916 917 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 918 struct flowi6 *fl6, int flags) 919 { 920 struct fib6_node *fn; 921 struct rt6_info *rt, *nrt; 922 int strict = 0; 923 int attempts = 3; 924 int err; 925 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; 926 927 strict |= flags & RT6_LOOKUP_F_IFACE; 928 929 relookup: 930 read_lock_bh(&table->tb6_lock); 931 932 restart_2: 933 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 934 935 restart: 936 rt = rt6_select(fn, oif, strict | reachable); 937 if (rt->rt6i_nsiblings) 938 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable); 939 BACKTRACK(net, &fl6->saddr); 940 if (rt == net->ipv6.ip6_null_entry || 941 rt->rt6i_flags & RTF_CACHE) 942 goto out; 943 944 dst_hold(&rt->dst); 945 read_unlock_bh(&table->tb6_lock); 946 947 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) 948 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); 949 else if (!(rt->dst.flags & DST_HOST)) 950 nrt = rt6_alloc_clone(rt, &fl6->daddr); 951 else 952 goto out2; 953 954 ip6_rt_put(rt); 955 rt = nrt ? : net->ipv6.ip6_null_entry; 956 957 dst_hold(&rt->dst); 958 if (nrt) { 959 err = ip6_ins_rt(nrt); 960 if (!err) 961 goto out2; 962 } 963 964 if (--attempts <= 0) 965 goto out2; 966 967 /* 968 * Race condition! In the gap, when table->tb6_lock was 969 * released someone could insert this route. Relookup. 970 */ 971 ip6_rt_put(rt); 972 goto relookup; 973 974 out: 975 if (reachable) { 976 reachable = 0; 977 goto restart_2; 978 } 979 dst_hold(&rt->dst); 980 read_unlock_bh(&table->tb6_lock); 981 out2: 982 rt->dst.lastuse = jiffies; 983 rt->dst.__use++; 984 985 return rt; 986 } 987 988 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 989 struct flowi6 *fl6, int flags) 990 { 991 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 992 } 993 994 static struct dst_entry *ip6_route_input_lookup(struct net *net, 995 struct net_device *dev, 996 struct flowi6 *fl6, int flags) 997 { 998 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 999 flags |= RT6_LOOKUP_F_IFACE; 1000 1001 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1002 } 1003 1004 void ip6_route_input(struct sk_buff *skb) 1005 { 1006 const struct ipv6hdr *iph = ipv6_hdr(skb); 1007 struct net *net = dev_net(skb->dev); 1008 int flags = RT6_LOOKUP_F_HAS_SADDR; 1009 struct flowi6 fl6 = { 1010 .flowi6_iif = skb->dev->ifindex, 1011 .daddr = iph->daddr, 1012 .saddr = iph->saddr, 1013 .flowlabel = ip6_flowinfo(iph), 1014 .flowi6_mark = skb->mark, 1015 .flowi6_proto = iph->nexthdr, 1016 }; 1017 1018 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1019 } 1020 1021 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1022 struct flowi6 *fl6, int flags) 1023 { 1024 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1025 } 1026 1027 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, 1028 struct flowi6 *fl6) 1029 { 1030 int flags = 0; 1031 1032 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1033 1034 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 1035 flags |= RT6_LOOKUP_F_IFACE; 1036 1037 if (!ipv6_addr_any(&fl6->saddr)) 1038 flags |= RT6_LOOKUP_F_HAS_SADDR; 1039 else if (sk) 1040 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1041 1042 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1043 } 1044 1045 EXPORT_SYMBOL(ip6_route_output); 1046 1047 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1048 { 1049 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1050 struct dst_entry *new = NULL; 1051 1052 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 1053 if (rt) { 1054 new = &rt->dst; 1055 1056 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 1057 rt6_init_peer(rt, net->ipv6.peers); 1058 1059 new->__use = 1; 1060 new->input = dst_discard; 1061 new->output = dst_discard; 1062 1063 if (dst_metrics_read_only(&ort->dst)) 1064 new->_metrics = ort->dst._metrics; 1065 else 1066 dst_copy_metrics(new, &ort->dst); 1067 rt->rt6i_idev = ort->rt6i_idev; 1068 if (rt->rt6i_idev) 1069 in6_dev_hold(rt->rt6i_idev); 1070 1071 rt->rt6i_gateway = ort->rt6i_gateway; 1072 rt->rt6i_flags = ort->rt6i_flags; 1073 rt->rt6i_metric = 0; 1074 1075 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1076 #ifdef CONFIG_IPV6_SUBTREES 1077 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1078 #endif 1079 1080 dst_free(new); 1081 } 1082 1083 dst_release(dst_orig); 1084 return new ? new : ERR_PTR(-ENOMEM); 1085 } 1086 1087 /* 1088 * Destination cache support functions 1089 */ 1090 1091 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1092 { 1093 struct rt6_info *rt; 1094 1095 rt = (struct rt6_info *) dst; 1096 1097 /* All IPV6 dsts are created with ->obsolete set to the value 1098 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1099 * into this function always. 1100 */ 1101 if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev))) 1102 return NULL; 1103 1104 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) 1105 return NULL; 1106 1107 if (rt6_check_expired(rt)) 1108 return NULL; 1109 1110 return dst; 1111 } 1112 1113 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1114 { 1115 struct rt6_info *rt = (struct rt6_info *) dst; 1116 1117 if (rt) { 1118 if (rt->rt6i_flags & RTF_CACHE) { 1119 if (rt6_check_expired(rt)) { 1120 ip6_del_rt(rt); 1121 dst = NULL; 1122 } 1123 } else { 1124 dst_release(dst); 1125 dst = NULL; 1126 } 1127 } 1128 return dst; 1129 } 1130 1131 static void ip6_link_failure(struct sk_buff *skb) 1132 { 1133 struct rt6_info *rt; 1134 1135 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1136 1137 rt = (struct rt6_info *) skb_dst(skb); 1138 if (rt) { 1139 if (rt->rt6i_flags & RTF_CACHE) { 1140 dst_hold(&rt->dst); 1141 if (ip6_del_rt(rt)) 1142 dst_free(&rt->dst); 1143 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { 1144 rt->rt6i_node->fn_sernum = -1; 1145 } 1146 } 1147 } 1148 1149 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1150 struct sk_buff *skb, u32 mtu) 1151 { 1152 struct rt6_info *rt6 = (struct rt6_info*)dst; 1153 1154 dst_confirm(dst); 1155 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { 1156 struct net *net = dev_net(dst->dev); 1157 1158 rt6->rt6i_flags |= RTF_MODIFIED; 1159 if (mtu < IPV6_MIN_MTU) { 1160 u32 features = dst_metric(dst, RTAX_FEATURES); 1161 mtu = IPV6_MIN_MTU; 1162 features |= RTAX_FEATURE_ALLFRAG; 1163 dst_metric_set(dst, RTAX_FEATURES, features); 1164 } 1165 dst_metric_set(dst, RTAX_MTU, mtu); 1166 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); 1167 } 1168 } 1169 1170 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1171 int oif, u32 mark) 1172 { 1173 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1174 struct dst_entry *dst; 1175 struct flowi6 fl6; 1176 1177 memset(&fl6, 0, sizeof(fl6)); 1178 fl6.flowi6_oif = oif; 1179 fl6.flowi6_mark = mark; 1180 fl6.daddr = iph->daddr; 1181 fl6.saddr = iph->saddr; 1182 fl6.flowlabel = ip6_flowinfo(iph); 1183 1184 dst = ip6_route_output(net, NULL, &fl6); 1185 if (!dst->error) 1186 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); 1187 dst_release(dst); 1188 } 1189 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1190 1191 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1192 { 1193 ip6_update_pmtu(skb, sock_net(sk), mtu, 1194 sk->sk_bound_dev_if, sk->sk_mark); 1195 } 1196 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1197 1198 /* Handle redirects */ 1199 struct ip6rd_flowi { 1200 struct flowi6 fl6; 1201 struct in6_addr gateway; 1202 }; 1203 1204 static struct rt6_info *__ip6_route_redirect(struct net *net, 1205 struct fib6_table *table, 1206 struct flowi6 *fl6, 1207 int flags) 1208 { 1209 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1210 struct rt6_info *rt; 1211 struct fib6_node *fn; 1212 1213 /* Get the "current" route for this destination and 1214 * check if the redirect has come from approriate router. 1215 * 1216 * RFC 4861 specifies that redirects should only be 1217 * accepted if they come from the nexthop to the target. 1218 * Due to the way the routes are chosen, this notion 1219 * is a bit fuzzy and one might need to check all possible 1220 * routes. 1221 */ 1222 1223 read_lock_bh(&table->tb6_lock); 1224 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1225 restart: 1226 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1227 if (rt6_check_expired(rt)) 1228 continue; 1229 if (rt->dst.error) 1230 break; 1231 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1232 continue; 1233 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 1234 continue; 1235 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1236 continue; 1237 break; 1238 } 1239 1240 if (!rt) 1241 rt = net->ipv6.ip6_null_entry; 1242 else if (rt->dst.error) { 1243 rt = net->ipv6.ip6_null_entry; 1244 goto out; 1245 } 1246 BACKTRACK(net, &fl6->saddr); 1247 out: 1248 dst_hold(&rt->dst); 1249 1250 read_unlock_bh(&table->tb6_lock); 1251 1252 return rt; 1253 }; 1254 1255 static struct dst_entry *ip6_route_redirect(struct net *net, 1256 const struct flowi6 *fl6, 1257 const struct in6_addr *gateway) 1258 { 1259 int flags = RT6_LOOKUP_F_HAS_SADDR; 1260 struct ip6rd_flowi rdfl; 1261 1262 rdfl.fl6 = *fl6; 1263 rdfl.gateway = *gateway; 1264 1265 return fib6_rule_lookup(net, &rdfl.fl6, 1266 flags, __ip6_route_redirect); 1267 } 1268 1269 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1270 { 1271 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1272 struct dst_entry *dst; 1273 struct flowi6 fl6; 1274 1275 memset(&fl6, 0, sizeof(fl6)); 1276 fl6.flowi6_oif = oif; 1277 fl6.flowi6_mark = mark; 1278 fl6.daddr = iph->daddr; 1279 fl6.saddr = iph->saddr; 1280 fl6.flowlabel = ip6_flowinfo(iph); 1281 1282 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1283 rt6_do_redirect(dst, NULL, skb); 1284 dst_release(dst); 1285 } 1286 EXPORT_SYMBOL_GPL(ip6_redirect); 1287 1288 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 1289 u32 mark) 1290 { 1291 const struct ipv6hdr *iph = ipv6_hdr(skb); 1292 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 1293 struct dst_entry *dst; 1294 struct flowi6 fl6; 1295 1296 memset(&fl6, 0, sizeof(fl6)); 1297 fl6.flowi6_oif = oif; 1298 fl6.flowi6_mark = mark; 1299 fl6.daddr = msg->dest; 1300 fl6.saddr = iph->daddr; 1301 1302 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1303 rt6_do_redirect(dst, NULL, skb); 1304 dst_release(dst); 1305 } 1306 1307 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1308 { 1309 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1310 } 1311 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1312 1313 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1314 { 1315 struct net_device *dev = dst->dev; 1316 unsigned int mtu = dst_mtu(dst); 1317 struct net *net = dev_net(dev); 1318 1319 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1320 1321 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1322 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1323 1324 /* 1325 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1326 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1327 * IPV6_MAXPLEN is also valid and means: "any MSS, 1328 * rely only on pmtu discovery" 1329 */ 1330 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1331 mtu = IPV6_MAXPLEN; 1332 return mtu; 1333 } 1334 1335 static unsigned int ip6_mtu(const struct dst_entry *dst) 1336 { 1337 struct inet6_dev *idev; 1338 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1339 1340 if (mtu) 1341 return mtu; 1342 1343 mtu = IPV6_MIN_MTU; 1344 1345 rcu_read_lock(); 1346 idev = __in6_dev_get(dst->dev); 1347 if (idev) 1348 mtu = idev->cnf.mtu6; 1349 rcu_read_unlock(); 1350 1351 return mtu; 1352 } 1353 1354 static struct dst_entry *icmp6_dst_gc_list; 1355 static DEFINE_SPINLOCK(icmp6_dst_lock); 1356 1357 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1358 struct flowi6 *fl6) 1359 { 1360 struct dst_entry *dst; 1361 struct rt6_info *rt; 1362 struct inet6_dev *idev = in6_dev_get(dev); 1363 struct net *net = dev_net(dev); 1364 1365 if (unlikely(!idev)) 1366 return ERR_PTR(-ENODEV); 1367 1368 rt = ip6_dst_alloc(net, dev, 0, NULL); 1369 if (unlikely(!rt)) { 1370 in6_dev_put(idev); 1371 dst = ERR_PTR(-ENOMEM); 1372 goto out; 1373 } 1374 1375 rt->dst.flags |= DST_HOST; 1376 rt->dst.output = ip6_output; 1377 atomic_set(&rt->dst.__refcnt, 1); 1378 rt->rt6i_gateway = fl6->daddr; 1379 rt->rt6i_dst.addr = fl6->daddr; 1380 rt->rt6i_dst.plen = 128; 1381 rt->rt6i_idev = idev; 1382 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1383 1384 spin_lock_bh(&icmp6_dst_lock); 1385 rt->dst.next = icmp6_dst_gc_list; 1386 icmp6_dst_gc_list = &rt->dst; 1387 spin_unlock_bh(&icmp6_dst_lock); 1388 1389 fib6_force_start_gc(net); 1390 1391 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1392 1393 out: 1394 return dst; 1395 } 1396 1397 int icmp6_dst_gc(void) 1398 { 1399 struct dst_entry *dst, **pprev; 1400 int more = 0; 1401 1402 spin_lock_bh(&icmp6_dst_lock); 1403 pprev = &icmp6_dst_gc_list; 1404 1405 while ((dst = *pprev) != NULL) { 1406 if (!atomic_read(&dst->__refcnt)) { 1407 *pprev = dst->next; 1408 dst_free(dst); 1409 } else { 1410 pprev = &dst->next; 1411 ++more; 1412 } 1413 } 1414 1415 spin_unlock_bh(&icmp6_dst_lock); 1416 1417 return more; 1418 } 1419 1420 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1421 void *arg) 1422 { 1423 struct dst_entry *dst, **pprev; 1424 1425 spin_lock_bh(&icmp6_dst_lock); 1426 pprev = &icmp6_dst_gc_list; 1427 while ((dst = *pprev) != NULL) { 1428 struct rt6_info *rt = (struct rt6_info *) dst; 1429 if (func(rt, arg)) { 1430 *pprev = dst->next; 1431 dst_free(dst); 1432 } else { 1433 pprev = &dst->next; 1434 } 1435 } 1436 spin_unlock_bh(&icmp6_dst_lock); 1437 } 1438 1439 static int ip6_dst_gc(struct dst_ops *ops) 1440 { 1441 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1442 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1443 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1444 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1445 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1446 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1447 int entries; 1448 1449 entries = dst_entries_get_fast(ops); 1450 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 1451 entries <= rt_max_size) 1452 goto out; 1453 1454 net->ipv6.ip6_rt_gc_expire++; 1455 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); 1456 entries = dst_entries_get_slow(ops); 1457 if (entries < ops->gc_thresh) 1458 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1459 out: 1460 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1461 return entries > rt_max_size; 1462 } 1463 1464 /* 1465 * 1466 */ 1467 1468 int ip6_route_add(struct fib6_config *cfg) 1469 { 1470 int err; 1471 struct net *net = cfg->fc_nlinfo.nl_net; 1472 struct rt6_info *rt = NULL; 1473 struct net_device *dev = NULL; 1474 struct inet6_dev *idev = NULL; 1475 struct fib6_table *table; 1476 int addr_type; 1477 1478 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1479 return -EINVAL; 1480 #ifndef CONFIG_IPV6_SUBTREES 1481 if (cfg->fc_src_len) 1482 return -EINVAL; 1483 #endif 1484 if (cfg->fc_ifindex) { 1485 err = -ENODEV; 1486 dev = dev_get_by_index(net, cfg->fc_ifindex); 1487 if (!dev) 1488 goto out; 1489 idev = in6_dev_get(dev); 1490 if (!idev) 1491 goto out; 1492 } 1493 1494 if (cfg->fc_metric == 0) 1495 cfg->fc_metric = IP6_RT_PRIO_USER; 1496 1497 err = -ENOBUFS; 1498 if (cfg->fc_nlinfo.nlh && 1499 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1500 table = fib6_get_table(net, cfg->fc_table); 1501 if (!table) { 1502 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1503 table = fib6_new_table(net, cfg->fc_table); 1504 } 1505 } else { 1506 table = fib6_new_table(net, cfg->fc_table); 1507 } 1508 1509 if (!table) 1510 goto out; 1511 1512 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); 1513 1514 if (!rt) { 1515 err = -ENOMEM; 1516 goto out; 1517 } 1518 1519 if (cfg->fc_flags & RTF_EXPIRES) 1520 rt6_set_expires(rt, jiffies + 1521 clock_t_to_jiffies(cfg->fc_expires)); 1522 else 1523 rt6_clean_expires(rt); 1524 1525 if (cfg->fc_protocol == RTPROT_UNSPEC) 1526 cfg->fc_protocol = RTPROT_BOOT; 1527 rt->rt6i_protocol = cfg->fc_protocol; 1528 1529 addr_type = ipv6_addr_type(&cfg->fc_dst); 1530 1531 if (addr_type & IPV6_ADDR_MULTICAST) 1532 rt->dst.input = ip6_mc_input; 1533 else if (cfg->fc_flags & RTF_LOCAL) 1534 rt->dst.input = ip6_input; 1535 else 1536 rt->dst.input = ip6_forward; 1537 1538 rt->dst.output = ip6_output; 1539 1540 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1541 rt->rt6i_dst.plen = cfg->fc_dst_len; 1542 if (rt->rt6i_dst.plen == 128) { 1543 rt->dst.flags |= DST_HOST; 1544 dst_metrics_set_force_overwrite(&rt->dst); 1545 } 1546 1547 #ifdef CONFIG_IPV6_SUBTREES 1548 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1549 rt->rt6i_src.plen = cfg->fc_src_len; 1550 #endif 1551 1552 rt->rt6i_metric = cfg->fc_metric; 1553 1554 /* We cannot add true routes via loopback here, 1555 they would result in kernel looping; promote them to reject routes 1556 */ 1557 if ((cfg->fc_flags & RTF_REJECT) || 1558 (dev && (dev->flags & IFF_LOOPBACK) && 1559 !(addr_type & IPV6_ADDR_LOOPBACK) && 1560 !(cfg->fc_flags & RTF_LOCAL))) { 1561 /* hold loopback dev/idev if we haven't done so. */ 1562 if (dev != net->loopback_dev) { 1563 if (dev) { 1564 dev_put(dev); 1565 in6_dev_put(idev); 1566 } 1567 dev = net->loopback_dev; 1568 dev_hold(dev); 1569 idev = in6_dev_get(dev); 1570 if (!idev) { 1571 err = -ENODEV; 1572 goto out; 1573 } 1574 } 1575 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1576 switch (cfg->fc_type) { 1577 case RTN_BLACKHOLE: 1578 rt->dst.error = -EINVAL; 1579 rt->dst.output = dst_discard; 1580 rt->dst.input = dst_discard; 1581 break; 1582 case RTN_PROHIBIT: 1583 rt->dst.error = -EACCES; 1584 rt->dst.output = ip6_pkt_prohibit_out; 1585 rt->dst.input = ip6_pkt_prohibit; 1586 break; 1587 case RTN_THROW: 1588 default: 1589 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1590 : -ENETUNREACH; 1591 rt->dst.output = ip6_pkt_discard_out; 1592 rt->dst.input = ip6_pkt_discard; 1593 break; 1594 } 1595 goto install_route; 1596 } 1597 1598 if (cfg->fc_flags & RTF_GATEWAY) { 1599 const struct in6_addr *gw_addr; 1600 int gwa_type; 1601 1602 gw_addr = &cfg->fc_gateway; 1603 rt->rt6i_gateway = *gw_addr; 1604 gwa_type = ipv6_addr_type(gw_addr); 1605 1606 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1607 struct rt6_info *grt; 1608 1609 /* IPv6 strictly inhibits using not link-local 1610 addresses as nexthop address. 1611 Otherwise, router will not able to send redirects. 1612 It is very good, but in some (rare!) circumstances 1613 (SIT, PtP, NBMA NOARP links) it is handy to allow 1614 some exceptions. --ANK 1615 */ 1616 err = -EINVAL; 1617 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1618 goto out; 1619 1620 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1621 1622 err = -EHOSTUNREACH; 1623 if (!grt) 1624 goto out; 1625 if (dev) { 1626 if (dev != grt->dst.dev) { 1627 ip6_rt_put(grt); 1628 goto out; 1629 } 1630 } else { 1631 dev = grt->dst.dev; 1632 idev = grt->rt6i_idev; 1633 dev_hold(dev); 1634 in6_dev_hold(grt->rt6i_idev); 1635 } 1636 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1637 err = 0; 1638 ip6_rt_put(grt); 1639 1640 if (err) 1641 goto out; 1642 } 1643 err = -EINVAL; 1644 if (!dev || (dev->flags & IFF_LOOPBACK)) 1645 goto out; 1646 } 1647 1648 err = -ENODEV; 1649 if (!dev) 1650 goto out; 1651 1652 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1653 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1654 err = -EINVAL; 1655 goto out; 1656 } 1657 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1658 rt->rt6i_prefsrc.plen = 128; 1659 } else 1660 rt->rt6i_prefsrc.plen = 0; 1661 1662 rt->rt6i_flags = cfg->fc_flags; 1663 1664 install_route: 1665 rt->dst.dev = dev; 1666 rt->rt6i_idev = idev; 1667 rt->rt6i_table = table; 1668 1669 cfg->fc_nlinfo.nl_net = dev_net(dev); 1670 1671 return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); 1672 1673 out: 1674 if (dev) 1675 dev_put(dev); 1676 if (idev) 1677 in6_dev_put(idev); 1678 if (rt) 1679 dst_free(&rt->dst); 1680 return err; 1681 } 1682 1683 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1684 { 1685 int err; 1686 struct fib6_table *table; 1687 struct net *net = dev_net(rt->dst.dev); 1688 1689 if (rt == net->ipv6.ip6_null_entry) { 1690 err = -ENOENT; 1691 goto out; 1692 } 1693 1694 table = rt->rt6i_table; 1695 write_lock_bh(&table->tb6_lock); 1696 err = fib6_del(rt, info); 1697 write_unlock_bh(&table->tb6_lock); 1698 1699 out: 1700 ip6_rt_put(rt); 1701 return err; 1702 } 1703 1704 int ip6_del_rt(struct rt6_info *rt) 1705 { 1706 struct nl_info info = { 1707 .nl_net = dev_net(rt->dst.dev), 1708 }; 1709 return __ip6_del_rt(rt, &info); 1710 } 1711 1712 static int ip6_route_del(struct fib6_config *cfg) 1713 { 1714 struct fib6_table *table; 1715 struct fib6_node *fn; 1716 struct rt6_info *rt; 1717 int err = -ESRCH; 1718 1719 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1720 if (!table) 1721 return err; 1722 1723 read_lock_bh(&table->tb6_lock); 1724 1725 fn = fib6_locate(&table->tb6_root, 1726 &cfg->fc_dst, cfg->fc_dst_len, 1727 &cfg->fc_src, cfg->fc_src_len); 1728 1729 if (fn) { 1730 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1731 if (cfg->fc_ifindex && 1732 (!rt->dst.dev || 1733 rt->dst.dev->ifindex != cfg->fc_ifindex)) 1734 continue; 1735 if (cfg->fc_flags & RTF_GATEWAY && 1736 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 1737 continue; 1738 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 1739 continue; 1740 dst_hold(&rt->dst); 1741 read_unlock_bh(&table->tb6_lock); 1742 1743 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 1744 } 1745 } 1746 read_unlock_bh(&table->tb6_lock); 1747 1748 return err; 1749 } 1750 1751 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 1752 { 1753 struct net *net = dev_net(skb->dev); 1754 struct netevent_redirect netevent; 1755 struct rt6_info *rt, *nrt = NULL; 1756 struct ndisc_options ndopts; 1757 struct inet6_dev *in6_dev; 1758 struct neighbour *neigh; 1759 struct rd_msg *msg; 1760 int optlen, on_link; 1761 u8 *lladdr; 1762 1763 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 1764 optlen -= sizeof(*msg); 1765 1766 if (optlen < 0) { 1767 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 1768 return; 1769 } 1770 1771 msg = (struct rd_msg *)icmp6_hdr(skb); 1772 1773 if (ipv6_addr_is_multicast(&msg->dest)) { 1774 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 1775 return; 1776 } 1777 1778 on_link = 0; 1779 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 1780 on_link = 1; 1781 } else if (ipv6_addr_type(&msg->target) != 1782 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 1783 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 1784 return; 1785 } 1786 1787 in6_dev = __in6_dev_get(skb->dev); 1788 if (!in6_dev) 1789 return; 1790 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 1791 return; 1792 1793 /* RFC2461 8.1: 1794 * The IP source address of the Redirect MUST be the same as the current 1795 * first-hop router for the specified ICMP Destination Address. 1796 */ 1797 1798 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { 1799 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 1800 return; 1801 } 1802 1803 lladdr = NULL; 1804 if (ndopts.nd_opts_tgt_lladdr) { 1805 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 1806 skb->dev); 1807 if (!lladdr) { 1808 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 1809 return; 1810 } 1811 } 1812 1813 rt = (struct rt6_info *) dst; 1814 if (rt == net->ipv6.ip6_null_entry) { 1815 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 1816 return; 1817 } 1818 1819 /* Redirect received -> path was valid. 1820 * Look, redirects are sent only in response to data packets, 1821 * so that this nexthop apparently is reachable. --ANK 1822 */ 1823 dst_confirm(&rt->dst); 1824 1825 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 1826 if (!neigh) 1827 return; 1828 1829 /* 1830 * We have finally decided to accept it. 1831 */ 1832 1833 neigh_update(neigh, lladdr, NUD_STALE, 1834 NEIGH_UPDATE_F_WEAK_OVERRIDE| 1835 NEIGH_UPDATE_F_OVERRIDE| 1836 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 1837 NEIGH_UPDATE_F_ISROUTER)) 1838 ); 1839 1840 nrt = ip6_rt_copy(rt, &msg->dest); 1841 if (!nrt) 1842 goto out; 1843 1844 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 1845 if (on_link) 1846 nrt->rt6i_flags &= ~RTF_GATEWAY; 1847 1848 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 1849 1850 if (ip6_ins_rt(nrt)) 1851 goto out; 1852 1853 netevent.old = &rt->dst; 1854 netevent.new = &nrt->dst; 1855 netevent.daddr = &msg->dest; 1856 netevent.neigh = neigh; 1857 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 1858 1859 if (rt->rt6i_flags & RTF_CACHE) { 1860 rt = (struct rt6_info *) dst_clone(&rt->dst); 1861 ip6_del_rt(rt); 1862 } 1863 1864 out: 1865 neigh_release(neigh); 1866 } 1867 1868 /* 1869 * Misc support functions 1870 */ 1871 1872 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, 1873 const struct in6_addr *dest) 1874 { 1875 struct net *net = dev_net(ort->dst.dev); 1876 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, 1877 ort->rt6i_table); 1878 1879 if (rt) { 1880 rt->dst.input = ort->dst.input; 1881 rt->dst.output = ort->dst.output; 1882 rt->dst.flags |= DST_HOST; 1883 1884 rt->rt6i_dst.addr = *dest; 1885 rt->rt6i_dst.plen = 128; 1886 dst_copy_metrics(&rt->dst, &ort->dst); 1887 rt->dst.error = ort->dst.error; 1888 rt->rt6i_idev = ort->rt6i_idev; 1889 if (rt->rt6i_idev) 1890 in6_dev_hold(rt->rt6i_idev); 1891 rt->dst.lastuse = jiffies; 1892 1893 if (ort->rt6i_flags & RTF_GATEWAY) 1894 rt->rt6i_gateway = ort->rt6i_gateway; 1895 else 1896 rt->rt6i_gateway = *dest; 1897 rt->rt6i_flags = ort->rt6i_flags; 1898 rt6_set_from(rt, ort); 1899 rt->rt6i_metric = 0; 1900 1901 #ifdef CONFIG_IPV6_SUBTREES 1902 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1903 #endif 1904 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); 1905 rt->rt6i_table = ort->rt6i_table; 1906 } 1907 return rt; 1908 } 1909 1910 #ifdef CONFIG_IPV6_ROUTE_INFO 1911 static struct rt6_info *rt6_get_route_info(struct net *net, 1912 const struct in6_addr *prefix, int prefixlen, 1913 const struct in6_addr *gwaddr, int ifindex) 1914 { 1915 struct fib6_node *fn; 1916 struct rt6_info *rt = NULL; 1917 struct fib6_table *table; 1918 1919 table = fib6_get_table(net, RT6_TABLE_INFO); 1920 if (!table) 1921 return NULL; 1922 1923 read_lock_bh(&table->tb6_lock); 1924 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); 1925 if (!fn) 1926 goto out; 1927 1928 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1929 if (rt->dst.dev->ifindex != ifindex) 1930 continue; 1931 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 1932 continue; 1933 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 1934 continue; 1935 dst_hold(&rt->dst); 1936 break; 1937 } 1938 out: 1939 read_unlock_bh(&table->tb6_lock); 1940 return rt; 1941 } 1942 1943 static struct rt6_info *rt6_add_route_info(struct net *net, 1944 const struct in6_addr *prefix, int prefixlen, 1945 const struct in6_addr *gwaddr, int ifindex, 1946 unsigned int pref) 1947 { 1948 struct fib6_config cfg = { 1949 .fc_table = RT6_TABLE_INFO, 1950 .fc_metric = IP6_RT_PRIO_USER, 1951 .fc_ifindex = ifindex, 1952 .fc_dst_len = prefixlen, 1953 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 1954 RTF_UP | RTF_PREF(pref), 1955 .fc_nlinfo.portid = 0, 1956 .fc_nlinfo.nlh = NULL, 1957 .fc_nlinfo.nl_net = net, 1958 }; 1959 1960 cfg.fc_dst = *prefix; 1961 cfg.fc_gateway = *gwaddr; 1962 1963 /* We should treat it as a default route if prefix length is 0. */ 1964 if (!prefixlen) 1965 cfg.fc_flags |= RTF_DEFAULT; 1966 1967 ip6_route_add(&cfg); 1968 1969 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 1970 } 1971 #endif 1972 1973 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 1974 { 1975 struct rt6_info *rt; 1976 struct fib6_table *table; 1977 1978 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 1979 if (!table) 1980 return NULL; 1981 1982 read_lock_bh(&table->tb6_lock); 1983 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { 1984 if (dev == rt->dst.dev && 1985 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 1986 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 1987 break; 1988 } 1989 if (rt) 1990 dst_hold(&rt->dst); 1991 read_unlock_bh(&table->tb6_lock); 1992 return rt; 1993 } 1994 1995 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 1996 struct net_device *dev, 1997 unsigned int pref) 1998 { 1999 struct fib6_config cfg = { 2000 .fc_table = RT6_TABLE_DFLT, 2001 .fc_metric = IP6_RT_PRIO_USER, 2002 .fc_ifindex = dev->ifindex, 2003 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2004 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 2005 .fc_nlinfo.portid = 0, 2006 .fc_nlinfo.nlh = NULL, 2007 .fc_nlinfo.nl_net = dev_net(dev), 2008 }; 2009 2010 cfg.fc_gateway = *gwaddr; 2011 2012 ip6_route_add(&cfg); 2013 2014 return rt6_get_dflt_router(gwaddr, dev); 2015 } 2016 2017 void rt6_purge_dflt_routers(struct net *net) 2018 { 2019 struct rt6_info *rt; 2020 struct fib6_table *table; 2021 2022 /* NOTE: Keep consistent with rt6_get_dflt_router */ 2023 table = fib6_get_table(net, RT6_TABLE_DFLT); 2024 if (!table) 2025 return; 2026 2027 restart: 2028 read_lock_bh(&table->tb6_lock); 2029 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2030 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 2031 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 2032 dst_hold(&rt->dst); 2033 read_unlock_bh(&table->tb6_lock); 2034 ip6_del_rt(rt); 2035 goto restart; 2036 } 2037 } 2038 read_unlock_bh(&table->tb6_lock); 2039 } 2040 2041 static void rtmsg_to_fib6_config(struct net *net, 2042 struct in6_rtmsg *rtmsg, 2043 struct fib6_config *cfg) 2044 { 2045 memset(cfg, 0, sizeof(*cfg)); 2046 2047 cfg->fc_table = RT6_TABLE_MAIN; 2048 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2049 cfg->fc_metric = rtmsg->rtmsg_metric; 2050 cfg->fc_expires = rtmsg->rtmsg_info; 2051 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 2052 cfg->fc_src_len = rtmsg->rtmsg_src_len; 2053 cfg->fc_flags = rtmsg->rtmsg_flags; 2054 2055 cfg->fc_nlinfo.nl_net = net; 2056 2057 cfg->fc_dst = rtmsg->rtmsg_dst; 2058 cfg->fc_src = rtmsg->rtmsg_src; 2059 cfg->fc_gateway = rtmsg->rtmsg_gateway; 2060 } 2061 2062 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 2063 { 2064 struct fib6_config cfg; 2065 struct in6_rtmsg rtmsg; 2066 int err; 2067 2068 switch(cmd) { 2069 case SIOCADDRT: /* Add a route */ 2070 case SIOCDELRT: /* Delete a route */ 2071 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2072 return -EPERM; 2073 err = copy_from_user(&rtmsg, arg, 2074 sizeof(struct in6_rtmsg)); 2075 if (err) 2076 return -EFAULT; 2077 2078 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 2079 2080 rtnl_lock(); 2081 switch (cmd) { 2082 case SIOCADDRT: 2083 err = ip6_route_add(&cfg); 2084 break; 2085 case SIOCDELRT: 2086 err = ip6_route_del(&cfg); 2087 break; 2088 default: 2089 err = -EINVAL; 2090 } 2091 rtnl_unlock(); 2092 2093 return err; 2094 } 2095 2096 return -EINVAL; 2097 } 2098 2099 /* 2100 * Drop the packet on the floor 2101 */ 2102 2103 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2104 { 2105 int type; 2106 struct dst_entry *dst = skb_dst(skb); 2107 switch (ipstats_mib_noroutes) { 2108 case IPSTATS_MIB_INNOROUTES: 2109 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2110 if (type == IPV6_ADDR_ANY) { 2111 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2112 IPSTATS_MIB_INADDRERRORS); 2113 break; 2114 } 2115 /* FALLTHROUGH */ 2116 case IPSTATS_MIB_OUTNOROUTES: 2117 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2118 ipstats_mib_noroutes); 2119 break; 2120 } 2121 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2122 kfree_skb(skb); 2123 return 0; 2124 } 2125 2126 static int ip6_pkt_discard(struct sk_buff *skb) 2127 { 2128 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2129 } 2130 2131 static int ip6_pkt_discard_out(struct sk_buff *skb) 2132 { 2133 skb->dev = skb_dst(skb)->dev; 2134 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2135 } 2136 2137 static int ip6_pkt_prohibit(struct sk_buff *skb) 2138 { 2139 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2140 } 2141 2142 static int ip6_pkt_prohibit_out(struct sk_buff *skb) 2143 { 2144 skb->dev = skb_dst(skb)->dev; 2145 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2146 } 2147 2148 /* 2149 * Allocate a dst for local (unicast / anycast) address. 2150 */ 2151 2152 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2153 const struct in6_addr *addr, 2154 bool anycast) 2155 { 2156 struct net *net = dev_net(idev->dev); 2157 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 2158 DST_NOCOUNT, NULL); 2159 if (!rt) 2160 return ERR_PTR(-ENOMEM); 2161 2162 in6_dev_hold(idev); 2163 2164 rt->dst.flags |= DST_HOST; 2165 rt->dst.input = ip6_input; 2166 rt->dst.output = ip6_output; 2167 rt->rt6i_idev = idev; 2168 2169 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2170 if (anycast) 2171 rt->rt6i_flags |= RTF_ANYCAST; 2172 else 2173 rt->rt6i_flags |= RTF_LOCAL; 2174 2175 rt->rt6i_gateway = *addr; 2176 rt->rt6i_dst.addr = *addr; 2177 rt->rt6i_dst.plen = 128; 2178 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2179 2180 atomic_set(&rt->dst.__refcnt, 1); 2181 2182 return rt; 2183 } 2184 2185 int ip6_route_get_saddr(struct net *net, 2186 struct rt6_info *rt, 2187 const struct in6_addr *daddr, 2188 unsigned int prefs, 2189 struct in6_addr *saddr) 2190 { 2191 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt); 2192 int err = 0; 2193 if (rt->rt6i_prefsrc.plen) 2194 *saddr = rt->rt6i_prefsrc.addr; 2195 else 2196 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2197 daddr, prefs, saddr); 2198 return err; 2199 } 2200 2201 /* remove deleted ip from prefsrc entries */ 2202 struct arg_dev_net_ip { 2203 struct net_device *dev; 2204 struct net *net; 2205 struct in6_addr *addr; 2206 }; 2207 2208 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2209 { 2210 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2211 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2212 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2213 2214 if (((void *)rt->dst.dev == dev || !dev) && 2215 rt != net->ipv6.ip6_null_entry && 2216 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2217 /* remove prefsrc entry */ 2218 rt->rt6i_prefsrc.plen = 0; 2219 } 2220 return 0; 2221 } 2222 2223 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2224 { 2225 struct net *net = dev_net(ifp->idev->dev); 2226 struct arg_dev_net_ip adni = { 2227 .dev = ifp->idev->dev, 2228 .net = net, 2229 .addr = &ifp->addr, 2230 }; 2231 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 2232 } 2233 2234 struct arg_dev_net { 2235 struct net_device *dev; 2236 struct net *net; 2237 }; 2238 2239 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2240 { 2241 const struct arg_dev_net *adn = arg; 2242 const struct net_device *dev = adn->dev; 2243 2244 if ((rt->dst.dev == dev || !dev) && 2245 rt != adn->net->ipv6.ip6_null_entry) 2246 return -1; 2247 2248 return 0; 2249 } 2250 2251 void rt6_ifdown(struct net *net, struct net_device *dev) 2252 { 2253 struct arg_dev_net adn = { 2254 .dev = dev, 2255 .net = net, 2256 }; 2257 2258 fib6_clean_all(net, fib6_ifdown, &adn); 2259 icmp6_clean_all(fib6_ifdown, &adn); 2260 } 2261 2262 struct rt6_mtu_change_arg { 2263 struct net_device *dev; 2264 unsigned int mtu; 2265 }; 2266 2267 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2268 { 2269 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2270 struct inet6_dev *idev; 2271 2272 /* In IPv6 pmtu discovery is not optional, 2273 so that RTAX_MTU lock cannot disable it. 2274 We still use this lock to block changes 2275 caused by addrconf/ndisc. 2276 */ 2277 2278 idev = __in6_dev_get(arg->dev); 2279 if (!idev) 2280 return 0; 2281 2282 /* For administrative MTU increase, there is no way to discover 2283 IPv6 PMTU increase, so PMTU increase should be updated here. 2284 Since RFC 1981 doesn't include administrative MTU increase 2285 update PMTU increase is a MUST. (i.e. jumbo frame) 2286 */ 2287 /* 2288 If new MTU is less than route PMTU, this new MTU will be the 2289 lowest MTU in the path, update the route PMTU to reflect PMTU 2290 decreases; if new MTU is greater than route PMTU, and the 2291 old MTU is the lowest MTU in the path, update the route PMTU 2292 to reflect the increase. In this case if the other nodes' MTU 2293 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2294 PMTU discouvery. 2295 */ 2296 if (rt->dst.dev == arg->dev && 2297 !dst_metric_locked(&rt->dst, RTAX_MTU) && 2298 (dst_mtu(&rt->dst) >= arg->mtu || 2299 (dst_mtu(&rt->dst) < arg->mtu && 2300 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2301 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2302 } 2303 return 0; 2304 } 2305 2306 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2307 { 2308 struct rt6_mtu_change_arg arg = { 2309 .dev = dev, 2310 .mtu = mtu, 2311 }; 2312 2313 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 2314 } 2315 2316 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2317 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2318 [RTA_OIF] = { .type = NLA_U32 }, 2319 [RTA_IIF] = { .type = NLA_U32 }, 2320 [RTA_PRIORITY] = { .type = NLA_U32 }, 2321 [RTA_METRICS] = { .type = NLA_NESTED }, 2322 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2323 }; 2324 2325 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2326 struct fib6_config *cfg) 2327 { 2328 struct rtmsg *rtm; 2329 struct nlattr *tb[RTA_MAX+1]; 2330 int err; 2331 2332 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2333 if (err < 0) 2334 goto errout; 2335 2336 err = -EINVAL; 2337 rtm = nlmsg_data(nlh); 2338 memset(cfg, 0, sizeof(*cfg)); 2339 2340 cfg->fc_table = rtm->rtm_table; 2341 cfg->fc_dst_len = rtm->rtm_dst_len; 2342 cfg->fc_src_len = rtm->rtm_src_len; 2343 cfg->fc_flags = RTF_UP; 2344 cfg->fc_protocol = rtm->rtm_protocol; 2345 cfg->fc_type = rtm->rtm_type; 2346 2347 if (rtm->rtm_type == RTN_UNREACHABLE || 2348 rtm->rtm_type == RTN_BLACKHOLE || 2349 rtm->rtm_type == RTN_PROHIBIT || 2350 rtm->rtm_type == RTN_THROW) 2351 cfg->fc_flags |= RTF_REJECT; 2352 2353 if (rtm->rtm_type == RTN_LOCAL) 2354 cfg->fc_flags |= RTF_LOCAL; 2355 2356 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2357 cfg->fc_nlinfo.nlh = nlh; 2358 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2359 2360 if (tb[RTA_GATEWAY]) { 2361 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); 2362 cfg->fc_flags |= RTF_GATEWAY; 2363 } 2364 2365 if (tb[RTA_DST]) { 2366 int plen = (rtm->rtm_dst_len + 7) >> 3; 2367 2368 if (nla_len(tb[RTA_DST]) < plen) 2369 goto errout; 2370 2371 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2372 } 2373 2374 if (tb[RTA_SRC]) { 2375 int plen = (rtm->rtm_src_len + 7) >> 3; 2376 2377 if (nla_len(tb[RTA_SRC]) < plen) 2378 goto errout; 2379 2380 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2381 } 2382 2383 if (tb[RTA_PREFSRC]) 2384 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); 2385 2386 if (tb[RTA_OIF]) 2387 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2388 2389 if (tb[RTA_PRIORITY]) 2390 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2391 2392 if (tb[RTA_METRICS]) { 2393 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2394 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2395 } 2396 2397 if (tb[RTA_TABLE]) 2398 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2399 2400 if (tb[RTA_MULTIPATH]) { 2401 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 2402 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 2403 } 2404 2405 err = 0; 2406 errout: 2407 return err; 2408 } 2409 2410 static int ip6_route_multipath(struct fib6_config *cfg, int add) 2411 { 2412 struct fib6_config r_cfg; 2413 struct rtnexthop *rtnh; 2414 int remaining; 2415 int attrlen; 2416 int err = 0, last_err = 0; 2417 2418 beginning: 2419 rtnh = (struct rtnexthop *)cfg->fc_mp; 2420 remaining = cfg->fc_mp_len; 2421 2422 /* Parse a Multipath Entry */ 2423 while (rtnh_ok(rtnh, remaining)) { 2424 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2425 if (rtnh->rtnh_ifindex) 2426 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2427 2428 attrlen = rtnh_attrlen(rtnh); 2429 if (attrlen > 0) { 2430 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2431 2432 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2433 if (nla) { 2434 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 2435 r_cfg.fc_flags |= RTF_GATEWAY; 2436 } 2437 } 2438 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); 2439 if (err) { 2440 last_err = err; 2441 /* If we are trying to remove a route, do not stop the 2442 * loop when ip6_route_del() fails (because next hop is 2443 * already gone), we should try to remove all next hops. 2444 */ 2445 if (add) { 2446 /* If add fails, we should try to delete all 2447 * next hops that have been already added. 2448 */ 2449 add = 0; 2450 goto beginning; 2451 } 2452 } 2453 /* Because each route is added like a single route we remove 2454 * this flag after the first nexthop (if there is a collision, 2455 * we have already fail to add the first nexthop: 2456 * fib6_add_rt2node() has reject it). 2457 */ 2458 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; 2459 rtnh = rtnh_next(rtnh, &remaining); 2460 } 2461 2462 return last_err; 2463 } 2464 2465 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh) 2466 { 2467 struct fib6_config cfg; 2468 int err; 2469 2470 err = rtm_to_fib6_config(skb, nlh, &cfg); 2471 if (err < 0) 2472 return err; 2473 2474 if (cfg.fc_mp) 2475 return ip6_route_multipath(&cfg, 0); 2476 else 2477 return ip6_route_del(&cfg); 2478 } 2479 2480 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh) 2481 { 2482 struct fib6_config cfg; 2483 int err; 2484 2485 err = rtm_to_fib6_config(skb, nlh, &cfg); 2486 if (err < 0) 2487 return err; 2488 2489 if (cfg.fc_mp) 2490 return ip6_route_multipath(&cfg, 1); 2491 else 2492 return ip6_route_add(&cfg); 2493 } 2494 2495 static inline size_t rt6_nlmsg_size(void) 2496 { 2497 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2498 + nla_total_size(16) /* RTA_SRC */ 2499 + nla_total_size(16) /* RTA_DST */ 2500 + nla_total_size(16) /* RTA_GATEWAY */ 2501 + nla_total_size(16) /* RTA_PREFSRC */ 2502 + nla_total_size(4) /* RTA_TABLE */ 2503 + nla_total_size(4) /* RTA_IIF */ 2504 + nla_total_size(4) /* RTA_OIF */ 2505 + nla_total_size(4) /* RTA_PRIORITY */ 2506 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2507 + nla_total_size(sizeof(struct rta_cacheinfo)); 2508 } 2509 2510 static int rt6_fill_node(struct net *net, 2511 struct sk_buff *skb, struct rt6_info *rt, 2512 struct in6_addr *dst, struct in6_addr *src, 2513 int iif, int type, u32 portid, u32 seq, 2514 int prefix, int nowait, unsigned int flags) 2515 { 2516 struct rtmsg *rtm; 2517 struct nlmsghdr *nlh; 2518 long expires; 2519 u32 table; 2520 2521 if (prefix) { /* user wants prefix routes only */ 2522 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2523 /* success since this is not a prefix route */ 2524 return 1; 2525 } 2526 } 2527 2528 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 2529 if (!nlh) 2530 return -EMSGSIZE; 2531 2532 rtm = nlmsg_data(nlh); 2533 rtm->rtm_family = AF_INET6; 2534 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2535 rtm->rtm_src_len = rt->rt6i_src.plen; 2536 rtm->rtm_tos = 0; 2537 if (rt->rt6i_table) 2538 table = rt->rt6i_table->tb6_id; 2539 else 2540 table = RT6_TABLE_UNSPEC; 2541 rtm->rtm_table = table; 2542 if (nla_put_u32(skb, RTA_TABLE, table)) 2543 goto nla_put_failure; 2544 if (rt->rt6i_flags & RTF_REJECT) { 2545 switch (rt->dst.error) { 2546 case -EINVAL: 2547 rtm->rtm_type = RTN_BLACKHOLE; 2548 break; 2549 case -EACCES: 2550 rtm->rtm_type = RTN_PROHIBIT; 2551 break; 2552 case -EAGAIN: 2553 rtm->rtm_type = RTN_THROW; 2554 break; 2555 default: 2556 rtm->rtm_type = RTN_UNREACHABLE; 2557 break; 2558 } 2559 } 2560 else if (rt->rt6i_flags & RTF_LOCAL) 2561 rtm->rtm_type = RTN_LOCAL; 2562 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2563 rtm->rtm_type = RTN_LOCAL; 2564 else 2565 rtm->rtm_type = RTN_UNICAST; 2566 rtm->rtm_flags = 0; 2567 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2568 rtm->rtm_protocol = rt->rt6i_protocol; 2569 if (rt->rt6i_flags & RTF_DYNAMIC) 2570 rtm->rtm_protocol = RTPROT_REDIRECT; 2571 else if (rt->rt6i_flags & RTF_ADDRCONF) { 2572 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 2573 rtm->rtm_protocol = RTPROT_RA; 2574 else 2575 rtm->rtm_protocol = RTPROT_KERNEL; 2576 } 2577 2578 if (rt->rt6i_flags & RTF_CACHE) 2579 rtm->rtm_flags |= RTM_F_CLONED; 2580 2581 if (dst) { 2582 if (nla_put(skb, RTA_DST, 16, dst)) 2583 goto nla_put_failure; 2584 rtm->rtm_dst_len = 128; 2585 } else if (rtm->rtm_dst_len) 2586 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) 2587 goto nla_put_failure; 2588 #ifdef CONFIG_IPV6_SUBTREES 2589 if (src) { 2590 if (nla_put(skb, RTA_SRC, 16, src)) 2591 goto nla_put_failure; 2592 rtm->rtm_src_len = 128; 2593 } else if (rtm->rtm_src_len && 2594 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) 2595 goto nla_put_failure; 2596 #endif 2597 if (iif) { 2598 #ifdef CONFIG_IPV6_MROUTE 2599 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2600 int err = ip6mr_get_route(net, skb, rtm, nowait); 2601 if (err <= 0) { 2602 if (!nowait) { 2603 if (err == 0) 2604 return 0; 2605 goto nla_put_failure; 2606 } else { 2607 if (err == -EMSGSIZE) 2608 goto nla_put_failure; 2609 } 2610 } 2611 } else 2612 #endif 2613 if (nla_put_u32(skb, RTA_IIF, iif)) 2614 goto nla_put_failure; 2615 } else if (dst) { 2616 struct in6_addr saddr_buf; 2617 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2618 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2619 goto nla_put_failure; 2620 } 2621 2622 if (rt->rt6i_prefsrc.plen) { 2623 struct in6_addr saddr_buf; 2624 saddr_buf = rt->rt6i_prefsrc.addr; 2625 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) 2626 goto nla_put_failure; 2627 } 2628 2629 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) 2630 goto nla_put_failure; 2631 2632 if (rt->rt6i_flags & RTF_GATEWAY) { 2633 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) 2634 goto nla_put_failure; 2635 } 2636 2637 if (rt->dst.dev && 2638 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2639 goto nla_put_failure; 2640 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2641 goto nla_put_failure; 2642 2643 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 2644 2645 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2646 goto nla_put_failure; 2647 2648 return nlmsg_end(skb, nlh); 2649 2650 nla_put_failure: 2651 nlmsg_cancel(skb, nlh); 2652 return -EMSGSIZE; 2653 } 2654 2655 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 2656 { 2657 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 2658 int prefix; 2659 2660 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 2661 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 2662 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 2663 } else 2664 prefix = 0; 2665 2666 return rt6_fill_node(arg->net, 2667 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 2668 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 2669 prefix, 0, NLM_F_MULTI); 2670 } 2671 2672 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) 2673 { 2674 struct net *net = sock_net(in_skb->sk); 2675 struct nlattr *tb[RTA_MAX+1]; 2676 struct rt6_info *rt; 2677 struct sk_buff *skb; 2678 struct rtmsg *rtm; 2679 struct flowi6 fl6; 2680 int err, iif = 0, oif = 0; 2681 2682 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2683 if (err < 0) 2684 goto errout; 2685 2686 err = -EINVAL; 2687 memset(&fl6, 0, sizeof(fl6)); 2688 2689 if (tb[RTA_SRC]) { 2690 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 2691 goto errout; 2692 2693 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 2694 } 2695 2696 if (tb[RTA_DST]) { 2697 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 2698 goto errout; 2699 2700 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 2701 } 2702 2703 if (tb[RTA_IIF]) 2704 iif = nla_get_u32(tb[RTA_IIF]); 2705 2706 if (tb[RTA_OIF]) 2707 oif = nla_get_u32(tb[RTA_OIF]); 2708 2709 if (iif) { 2710 struct net_device *dev; 2711 int flags = 0; 2712 2713 dev = __dev_get_by_index(net, iif); 2714 if (!dev) { 2715 err = -ENODEV; 2716 goto errout; 2717 } 2718 2719 fl6.flowi6_iif = iif; 2720 2721 if (!ipv6_addr_any(&fl6.saddr)) 2722 flags |= RT6_LOOKUP_F_HAS_SADDR; 2723 2724 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 2725 flags); 2726 } else { 2727 fl6.flowi6_oif = oif; 2728 2729 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 2730 } 2731 2732 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 2733 if (!skb) { 2734 ip6_rt_put(rt); 2735 err = -ENOBUFS; 2736 goto errout; 2737 } 2738 2739 /* Reserve room for dummy headers, this skb can pass 2740 through good chunk of routing engine. 2741 */ 2742 skb_reset_mac_header(skb); 2743 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 2744 2745 skb_dst_set(skb, &rt->dst); 2746 2747 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 2748 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 2749 nlh->nlmsg_seq, 0, 0, 0); 2750 if (err < 0) { 2751 kfree_skb(skb); 2752 goto errout; 2753 } 2754 2755 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 2756 errout: 2757 return err; 2758 } 2759 2760 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2761 { 2762 struct sk_buff *skb; 2763 struct net *net = info->nl_net; 2764 u32 seq; 2765 int err; 2766 2767 err = -ENOBUFS; 2768 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2769 2770 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any()); 2771 if (!skb) 2772 goto errout; 2773 2774 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 2775 event, info->portid, seq, 0, 0, 0); 2776 if (err < 0) { 2777 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 2778 WARN_ON(err == -EMSGSIZE); 2779 kfree_skb(skb); 2780 goto errout; 2781 } 2782 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 2783 info->nlh, gfp_any()); 2784 return; 2785 errout: 2786 if (err < 0) 2787 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2788 } 2789 2790 static int ip6_route_dev_notify(struct notifier_block *this, 2791 unsigned long event, void *ptr) 2792 { 2793 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2794 struct net *net = dev_net(dev); 2795 2796 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 2797 net->ipv6.ip6_null_entry->dst.dev = dev; 2798 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 2799 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2800 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 2801 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 2802 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 2803 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 2804 #endif 2805 } 2806 2807 return NOTIFY_OK; 2808 } 2809 2810 /* 2811 * /proc 2812 */ 2813 2814 #ifdef CONFIG_PROC_FS 2815 2816 static const struct file_operations ipv6_route_proc_fops = { 2817 .owner = THIS_MODULE, 2818 .open = ipv6_route_open, 2819 .read = seq_read, 2820 .llseek = seq_lseek, 2821 .release = seq_release_net, 2822 }; 2823 2824 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 2825 { 2826 struct net *net = (struct net *)seq->private; 2827 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 2828 net->ipv6.rt6_stats->fib_nodes, 2829 net->ipv6.rt6_stats->fib_route_nodes, 2830 net->ipv6.rt6_stats->fib_rt_alloc, 2831 net->ipv6.rt6_stats->fib_rt_entries, 2832 net->ipv6.rt6_stats->fib_rt_cache, 2833 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 2834 net->ipv6.rt6_stats->fib_discarded_routes); 2835 2836 return 0; 2837 } 2838 2839 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 2840 { 2841 return single_open_net(inode, file, rt6_stats_seq_show); 2842 } 2843 2844 static const struct file_operations rt6_stats_seq_fops = { 2845 .owner = THIS_MODULE, 2846 .open = rt6_stats_seq_open, 2847 .read = seq_read, 2848 .llseek = seq_lseek, 2849 .release = single_release_net, 2850 }; 2851 #endif /* CONFIG_PROC_FS */ 2852 2853 #ifdef CONFIG_SYSCTL 2854 2855 static 2856 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 2857 void __user *buffer, size_t *lenp, loff_t *ppos) 2858 { 2859 struct net *net; 2860 int delay; 2861 if (!write) 2862 return -EINVAL; 2863 2864 net = (struct net *)ctl->extra1; 2865 delay = net->ipv6.sysctl.flush_delay; 2866 proc_dointvec(ctl, write, buffer, lenp, ppos); 2867 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 2868 return 0; 2869 } 2870 2871 struct ctl_table ipv6_route_table_template[] = { 2872 { 2873 .procname = "flush", 2874 .data = &init_net.ipv6.sysctl.flush_delay, 2875 .maxlen = sizeof(int), 2876 .mode = 0200, 2877 .proc_handler = ipv6_sysctl_rtcache_flush 2878 }, 2879 { 2880 .procname = "gc_thresh", 2881 .data = &ip6_dst_ops_template.gc_thresh, 2882 .maxlen = sizeof(int), 2883 .mode = 0644, 2884 .proc_handler = proc_dointvec, 2885 }, 2886 { 2887 .procname = "max_size", 2888 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 2889 .maxlen = sizeof(int), 2890 .mode = 0644, 2891 .proc_handler = proc_dointvec, 2892 }, 2893 { 2894 .procname = "gc_min_interval", 2895 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2896 .maxlen = sizeof(int), 2897 .mode = 0644, 2898 .proc_handler = proc_dointvec_jiffies, 2899 }, 2900 { 2901 .procname = "gc_timeout", 2902 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 2903 .maxlen = sizeof(int), 2904 .mode = 0644, 2905 .proc_handler = proc_dointvec_jiffies, 2906 }, 2907 { 2908 .procname = "gc_interval", 2909 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 2910 .maxlen = sizeof(int), 2911 .mode = 0644, 2912 .proc_handler = proc_dointvec_jiffies, 2913 }, 2914 { 2915 .procname = "gc_elasticity", 2916 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 2917 .maxlen = sizeof(int), 2918 .mode = 0644, 2919 .proc_handler = proc_dointvec, 2920 }, 2921 { 2922 .procname = "mtu_expires", 2923 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 2924 .maxlen = sizeof(int), 2925 .mode = 0644, 2926 .proc_handler = proc_dointvec_jiffies, 2927 }, 2928 { 2929 .procname = "min_adv_mss", 2930 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 2931 .maxlen = sizeof(int), 2932 .mode = 0644, 2933 .proc_handler = proc_dointvec, 2934 }, 2935 { 2936 .procname = "gc_min_interval_ms", 2937 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 2938 .maxlen = sizeof(int), 2939 .mode = 0644, 2940 .proc_handler = proc_dointvec_ms_jiffies, 2941 }, 2942 { } 2943 }; 2944 2945 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 2946 { 2947 struct ctl_table *table; 2948 2949 table = kmemdup(ipv6_route_table_template, 2950 sizeof(ipv6_route_table_template), 2951 GFP_KERNEL); 2952 2953 if (table) { 2954 table[0].data = &net->ipv6.sysctl.flush_delay; 2955 table[0].extra1 = net; 2956 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 2957 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 2958 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2959 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 2960 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 2961 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 2962 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 2963 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 2964 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 2965 2966 /* Don't export sysctls to unprivileged users */ 2967 if (net->user_ns != &init_user_ns) 2968 table[0].procname = NULL; 2969 } 2970 2971 return table; 2972 } 2973 #endif 2974 2975 static int __net_init ip6_route_net_init(struct net *net) 2976 { 2977 int ret = -ENOMEM; 2978 2979 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2980 sizeof(net->ipv6.ip6_dst_ops)); 2981 2982 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 2983 goto out_ip6_dst_ops; 2984 2985 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2986 sizeof(*net->ipv6.ip6_null_entry), 2987 GFP_KERNEL); 2988 if (!net->ipv6.ip6_null_entry) 2989 goto out_ip6_dst_entries; 2990 net->ipv6.ip6_null_entry->dst.path = 2991 (struct dst_entry *)net->ipv6.ip6_null_entry; 2992 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2993 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 2994 ip6_template_metrics, true); 2995 2996 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 2997 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 2998 sizeof(*net->ipv6.ip6_prohibit_entry), 2999 GFP_KERNEL); 3000 if (!net->ipv6.ip6_prohibit_entry) 3001 goto out_ip6_null_entry; 3002 net->ipv6.ip6_prohibit_entry->dst.path = 3003 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 3004 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3005 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 3006 ip6_template_metrics, true); 3007 3008 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 3009 sizeof(*net->ipv6.ip6_blk_hole_entry), 3010 GFP_KERNEL); 3011 if (!net->ipv6.ip6_blk_hole_entry) 3012 goto out_ip6_prohibit_entry; 3013 net->ipv6.ip6_blk_hole_entry->dst.path = 3014 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 3015 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3016 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 3017 ip6_template_metrics, true); 3018 #endif 3019 3020 net->ipv6.sysctl.flush_delay = 0; 3021 net->ipv6.sysctl.ip6_rt_max_size = 4096; 3022 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 3023 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 3024 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 3025 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 3026 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 3027 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 3028 3029 net->ipv6.ip6_rt_gc_expire = 30*HZ; 3030 3031 ret = 0; 3032 out: 3033 return ret; 3034 3035 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3036 out_ip6_prohibit_entry: 3037 kfree(net->ipv6.ip6_prohibit_entry); 3038 out_ip6_null_entry: 3039 kfree(net->ipv6.ip6_null_entry); 3040 #endif 3041 out_ip6_dst_entries: 3042 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3043 out_ip6_dst_ops: 3044 goto out; 3045 } 3046 3047 static void __net_exit ip6_route_net_exit(struct net *net) 3048 { 3049 kfree(net->ipv6.ip6_null_entry); 3050 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3051 kfree(net->ipv6.ip6_prohibit_entry); 3052 kfree(net->ipv6.ip6_blk_hole_entry); 3053 #endif 3054 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3055 } 3056 3057 static int __net_init ip6_route_net_init_late(struct net *net) 3058 { 3059 #ifdef CONFIG_PROC_FS 3060 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 3061 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 3062 #endif 3063 return 0; 3064 } 3065 3066 static void __net_exit ip6_route_net_exit_late(struct net *net) 3067 { 3068 #ifdef CONFIG_PROC_FS 3069 remove_proc_entry("ipv6_route", net->proc_net); 3070 remove_proc_entry("rt6_stats", net->proc_net); 3071 #endif 3072 } 3073 3074 static struct pernet_operations ip6_route_net_ops = { 3075 .init = ip6_route_net_init, 3076 .exit = ip6_route_net_exit, 3077 }; 3078 3079 static int __net_init ipv6_inetpeer_init(struct net *net) 3080 { 3081 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3082 3083 if (!bp) 3084 return -ENOMEM; 3085 inet_peer_base_init(bp); 3086 net->ipv6.peers = bp; 3087 return 0; 3088 } 3089 3090 static void __net_exit ipv6_inetpeer_exit(struct net *net) 3091 { 3092 struct inet_peer_base *bp = net->ipv6.peers; 3093 3094 net->ipv6.peers = NULL; 3095 inetpeer_invalidate_tree(bp); 3096 kfree(bp); 3097 } 3098 3099 static struct pernet_operations ipv6_inetpeer_ops = { 3100 .init = ipv6_inetpeer_init, 3101 .exit = ipv6_inetpeer_exit, 3102 }; 3103 3104 static struct pernet_operations ip6_route_net_late_ops = { 3105 .init = ip6_route_net_init_late, 3106 .exit = ip6_route_net_exit_late, 3107 }; 3108 3109 static struct notifier_block ip6_route_dev_notifier = { 3110 .notifier_call = ip6_route_dev_notify, 3111 .priority = 0, 3112 }; 3113 3114 int __init ip6_route_init(void) 3115 { 3116 int ret; 3117 3118 ret = -ENOMEM; 3119 ip6_dst_ops_template.kmem_cachep = 3120 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3121 SLAB_HWCACHE_ALIGN, NULL); 3122 if (!ip6_dst_ops_template.kmem_cachep) 3123 goto out; 3124 3125 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3126 if (ret) 3127 goto out_kmem_cache; 3128 3129 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3130 if (ret) 3131 goto out_dst_entries; 3132 3133 ret = register_pernet_subsys(&ip6_route_net_ops); 3134 if (ret) 3135 goto out_register_inetpeer; 3136 3137 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3138 3139 /* Registering of the loopback is done before this portion of code, 3140 * the loopback reference in rt6_info will not be taken, do it 3141 * manually for init_net */ 3142 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3143 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3144 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3145 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3146 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3147 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3148 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3149 #endif 3150 ret = fib6_init(); 3151 if (ret) 3152 goto out_register_subsys; 3153 3154 ret = xfrm6_init(); 3155 if (ret) 3156 goto out_fib6_init; 3157 3158 ret = fib6_rules_init(); 3159 if (ret) 3160 goto xfrm6_init; 3161 3162 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3163 if (ret) 3164 goto fib6_rules_init; 3165 3166 ret = -ENOBUFS; 3167 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3168 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3169 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3170 goto out_register_late_subsys; 3171 3172 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3173 if (ret) 3174 goto out_register_late_subsys; 3175 3176 out: 3177 return ret; 3178 3179 out_register_late_subsys: 3180 unregister_pernet_subsys(&ip6_route_net_late_ops); 3181 fib6_rules_init: 3182 fib6_rules_cleanup(); 3183 xfrm6_init: 3184 xfrm6_fini(); 3185 out_fib6_init: 3186 fib6_gc_cleanup(); 3187 out_register_subsys: 3188 unregister_pernet_subsys(&ip6_route_net_ops); 3189 out_register_inetpeer: 3190 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3191 out_dst_entries: 3192 dst_entries_destroy(&ip6_dst_blackhole_ops); 3193 out_kmem_cache: 3194 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3195 goto out; 3196 } 3197 3198 void ip6_route_cleanup(void) 3199 { 3200 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3201 unregister_pernet_subsys(&ip6_route_net_late_ops); 3202 fib6_rules_cleanup(); 3203 xfrm6_fini(); 3204 fib6_gc_cleanup(); 3205 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3206 unregister_pernet_subsys(&ip6_route_net_ops); 3207 dst_entries_destroy(&ip6_dst_blackhole_ops); 3208 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3209 } 3210