1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <net/net_namespace.h> 48 #include <net/snmp.h> 49 #include <net/ipv6.h> 50 #include <net/ip6_fib.h> 51 #include <net/ip6_route.h> 52 #include <net/ndisc.h> 53 #include <net/addrconf.h> 54 #include <net/tcp.h> 55 #include <linux/rtnetlink.h> 56 #include <net/dst.h> 57 #include <net/xfrm.h> 58 #include <net/netevent.h> 59 #include <net/netlink.h> 60 #include <net/nexthop.h> 61 #include <net/lwtunnel.h> 62 63 #include <asm/uaccess.h> 64 65 #ifdef CONFIG_SYSCTL 66 #include <linux/sysctl.h> 67 #endif 68 69 enum rt6_nud_state { 70 RT6_NUD_FAIL_HARD = -3, 71 RT6_NUD_FAIL_PROBE = -2, 72 RT6_NUD_FAIL_DO_RR = -1, 73 RT6_NUD_SUCCEED = 1 74 }; 75 76 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); 77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 78 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 79 static unsigned int ip6_mtu(const struct dst_entry *dst); 80 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 81 static void ip6_dst_destroy(struct dst_entry *); 82 static void ip6_dst_ifdown(struct dst_entry *, 83 struct net_device *dev, int how); 84 static int ip6_dst_gc(struct dst_ops *ops); 85 86 static int ip6_pkt_discard(struct sk_buff *skb); 87 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); 88 static int ip6_pkt_prohibit(struct sk_buff *skb); 89 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); 90 static void ip6_link_failure(struct sk_buff *skb); 91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 92 struct sk_buff *skb, u32 mtu); 93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 94 struct sk_buff *skb); 95 static void rt6_dst_from_metrics_check(struct rt6_info *rt); 96 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 97 98 #ifdef CONFIG_IPV6_ROUTE_INFO 99 static struct rt6_info *rt6_add_route_info(struct net *net, 100 const struct in6_addr *prefix, int prefixlen, 101 const struct in6_addr *gwaddr, int ifindex, 102 unsigned int pref); 103 static struct rt6_info *rt6_get_route_info(struct net *net, 104 const struct in6_addr *prefix, int prefixlen, 105 const struct in6_addr *gwaddr, int ifindex); 106 #endif 107 108 struct uncached_list { 109 spinlock_t lock; 110 struct list_head head; 111 }; 112 113 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 114 115 static void rt6_uncached_list_add(struct rt6_info *rt) 116 { 117 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 118 119 rt->dst.flags |= DST_NOCACHE; 120 rt->rt6i_uncached_list = ul; 121 122 spin_lock_bh(&ul->lock); 123 list_add_tail(&rt->rt6i_uncached, &ul->head); 124 spin_unlock_bh(&ul->lock); 125 } 126 127 static void rt6_uncached_list_del(struct rt6_info *rt) 128 { 129 if (!list_empty(&rt->rt6i_uncached)) { 130 struct uncached_list *ul = rt->rt6i_uncached_list; 131 132 spin_lock_bh(&ul->lock); 133 list_del(&rt->rt6i_uncached); 134 spin_unlock_bh(&ul->lock); 135 } 136 } 137 138 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 139 { 140 struct net_device *loopback_dev = net->loopback_dev; 141 int cpu; 142 143 for_each_possible_cpu(cpu) { 144 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 145 struct rt6_info *rt; 146 147 spin_lock_bh(&ul->lock); 148 list_for_each_entry(rt, &ul->head, rt6i_uncached) { 149 struct inet6_dev *rt_idev = rt->rt6i_idev; 150 struct net_device *rt_dev = rt->dst.dev; 151 152 if (rt_idev && (rt_idev->dev == dev || !dev) && 153 rt_idev->dev != loopback_dev) { 154 rt->rt6i_idev = in6_dev_get(loopback_dev); 155 in6_dev_put(rt_idev); 156 } 157 158 if (rt_dev && (rt_dev == dev || !dev) && 159 rt_dev != loopback_dev) { 160 rt->dst.dev = loopback_dev; 161 dev_hold(rt->dst.dev); 162 dev_put(rt_dev); 163 } 164 } 165 spin_unlock_bh(&ul->lock); 166 } 167 } 168 169 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 170 { 171 return dst_metrics_write_ptr(rt->dst.from); 172 } 173 174 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 175 { 176 struct rt6_info *rt = (struct rt6_info *)dst; 177 178 if (rt->rt6i_flags & RTF_PCPU) 179 return rt6_pcpu_cow_metrics(rt); 180 else if (rt->rt6i_flags & RTF_CACHE) 181 return NULL; 182 else 183 return dst_cow_metrics_generic(dst, old); 184 } 185 186 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 187 struct sk_buff *skb, 188 const void *daddr) 189 { 190 struct in6_addr *p = &rt->rt6i_gateway; 191 192 if (!ipv6_addr_any(p)) 193 return (const void *) p; 194 else if (skb) 195 return &ipv6_hdr(skb)->daddr; 196 return daddr; 197 } 198 199 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 200 struct sk_buff *skb, 201 const void *daddr) 202 { 203 struct rt6_info *rt = (struct rt6_info *) dst; 204 struct neighbour *n; 205 206 daddr = choose_neigh_daddr(rt, skb, daddr); 207 n = __ipv6_neigh_lookup(dst->dev, daddr); 208 if (n) 209 return n; 210 return neigh_create(&nd_tbl, daddr, dst->dev); 211 } 212 213 static struct dst_ops ip6_dst_ops_template = { 214 .family = AF_INET6, 215 .gc = ip6_dst_gc, 216 .gc_thresh = 1024, 217 .check = ip6_dst_check, 218 .default_advmss = ip6_default_advmss, 219 .mtu = ip6_mtu, 220 .cow_metrics = ipv6_cow_metrics, 221 .destroy = ip6_dst_destroy, 222 .ifdown = ip6_dst_ifdown, 223 .negative_advice = ip6_negative_advice, 224 .link_failure = ip6_link_failure, 225 .update_pmtu = ip6_rt_update_pmtu, 226 .redirect = rt6_do_redirect, 227 .local_out = __ip6_local_out, 228 .neigh_lookup = ip6_neigh_lookup, 229 }; 230 231 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 232 { 233 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 234 235 return mtu ? : dst->dev->mtu; 236 } 237 238 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 239 struct sk_buff *skb, u32 mtu) 240 { 241 } 242 243 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 244 struct sk_buff *skb) 245 { 246 } 247 248 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, 249 unsigned long old) 250 { 251 return NULL; 252 } 253 254 static struct dst_ops ip6_dst_blackhole_ops = { 255 .family = AF_INET6, 256 .destroy = ip6_dst_destroy, 257 .check = ip6_dst_check, 258 .mtu = ip6_blackhole_mtu, 259 .default_advmss = ip6_default_advmss, 260 .update_pmtu = ip6_rt_blackhole_update_pmtu, 261 .redirect = ip6_rt_blackhole_redirect, 262 .cow_metrics = ip6_rt_blackhole_cow_metrics, 263 .neigh_lookup = ip6_neigh_lookup, 264 }; 265 266 static const u32 ip6_template_metrics[RTAX_MAX] = { 267 [RTAX_HOPLIMIT - 1] = 0, 268 }; 269 270 static const struct rt6_info ip6_null_entry_template = { 271 .dst = { 272 .__refcnt = ATOMIC_INIT(1), 273 .__use = 1, 274 .obsolete = DST_OBSOLETE_FORCE_CHK, 275 .error = -ENETUNREACH, 276 .input = ip6_pkt_discard, 277 .output = ip6_pkt_discard_out, 278 }, 279 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 280 .rt6i_protocol = RTPROT_KERNEL, 281 .rt6i_metric = ~(u32) 0, 282 .rt6i_ref = ATOMIC_INIT(1), 283 }; 284 285 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 286 287 static const struct rt6_info ip6_prohibit_entry_template = { 288 .dst = { 289 .__refcnt = ATOMIC_INIT(1), 290 .__use = 1, 291 .obsolete = DST_OBSOLETE_FORCE_CHK, 292 .error = -EACCES, 293 .input = ip6_pkt_prohibit, 294 .output = ip6_pkt_prohibit_out, 295 }, 296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 297 .rt6i_protocol = RTPROT_KERNEL, 298 .rt6i_metric = ~(u32) 0, 299 .rt6i_ref = ATOMIC_INIT(1), 300 }; 301 302 static const struct rt6_info ip6_blk_hole_entry_template = { 303 .dst = { 304 .__refcnt = ATOMIC_INIT(1), 305 .__use = 1, 306 .obsolete = DST_OBSOLETE_FORCE_CHK, 307 .error = -EINVAL, 308 .input = dst_discard, 309 .output = dst_discard_sk, 310 }, 311 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 312 .rt6i_protocol = RTPROT_KERNEL, 313 .rt6i_metric = ~(u32) 0, 314 .rt6i_ref = ATOMIC_INIT(1), 315 }; 316 317 #endif 318 319 /* allocate dst with ip6_dst_ops */ 320 static struct rt6_info *__ip6_dst_alloc(struct net *net, 321 struct net_device *dev, 322 int flags, 323 struct fib6_table *table) 324 { 325 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 326 0, DST_OBSOLETE_FORCE_CHK, flags); 327 328 if (rt) { 329 struct dst_entry *dst = &rt->dst; 330 331 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 332 INIT_LIST_HEAD(&rt->rt6i_siblings); 333 INIT_LIST_HEAD(&rt->rt6i_uncached); 334 } 335 return rt; 336 } 337 338 static struct rt6_info *ip6_dst_alloc(struct net *net, 339 struct net_device *dev, 340 int flags, 341 struct fib6_table *table) 342 { 343 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); 344 345 if (rt) { 346 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 347 if (rt->rt6i_pcpu) { 348 int cpu; 349 350 for_each_possible_cpu(cpu) { 351 struct rt6_info **p; 352 353 p = per_cpu_ptr(rt->rt6i_pcpu, cpu); 354 /* no one shares rt */ 355 *p = NULL; 356 } 357 } else { 358 dst_destroy((struct dst_entry *)rt); 359 return NULL; 360 } 361 } 362 363 return rt; 364 } 365 366 static void ip6_dst_destroy(struct dst_entry *dst) 367 { 368 struct rt6_info *rt = (struct rt6_info *)dst; 369 struct dst_entry *from = dst->from; 370 struct inet6_dev *idev; 371 372 dst_destroy_metrics_generic(dst); 373 free_percpu(rt->rt6i_pcpu); 374 rt6_uncached_list_del(rt); 375 376 idev = rt->rt6i_idev; 377 if (idev) { 378 rt->rt6i_idev = NULL; 379 in6_dev_put(idev); 380 } 381 382 dst->from = NULL; 383 dst_release(from); 384 } 385 386 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 387 int how) 388 { 389 struct rt6_info *rt = (struct rt6_info *)dst; 390 struct inet6_dev *idev = rt->rt6i_idev; 391 struct net_device *loopback_dev = 392 dev_net(dev)->loopback_dev; 393 394 if (dev != loopback_dev) { 395 if (idev && idev->dev == dev) { 396 struct inet6_dev *loopback_idev = 397 in6_dev_get(loopback_dev); 398 if (loopback_idev) { 399 rt->rt6i_idev = loopback_idev; 400 in6_dev_put(idev); 401 } 402 } 403 } 404 } 405 406 static bool rt6_check_expired(const struct rt6_info *rt) 407 { 408 if (rt->rt6i_flags & RTF_EXPIRES) { 409 if (time_after(jiffies, rt->dst.expires)) 410 return true; 411 } else if (rt->dst.from) { 412 return rt6_check_expired((struct rt6_info *) rt->dst.from); 413 } 414 return false; 415 } 416 417 /* Multipath route selection: 418 * Hash based function using packet header and flowlabel. 419 * Adapted from fib_info_hashfn() 420 */ 421 static int rt6_info_hash_nhsfn(unsigned int candidate_count, 422 const struct flowi6 *fl6) 423 { 424 unsigned int val = fl6->flowi6_proto; 425 426 val ^= ipv6_addr_hash(&fl6->daddr); 427 val ^= ipv6_addr_hash(&fl6->saddr); 428 429 /* Work only if this not encapsulated */ 430 switch (fl6->flowi6_proto) { 431 case IPPROTO_UDP: 432 case IPPROTO_TCP: 433 case IPPROTO_SCTP: 434 val ^= (__force u16)fl6->fl6_sport; 435 val ^= (__force u16)fl6->fl6_dport; 436 break; 437 438 case IPPROTO_ICMPV6: 439 val ^= (__force u16)fl6->fl6_icmp_type; 440 val ^= (__force u16)fl6->fl6_icmp_code; 441 break; 442 } 443 /* RFC6438 recommands to use flowlabel */ 444 val ^= (__force u32)fl6->flowlabel; 445 446 /* Perhaps, we need to tune, this function? */ 447 val = val ^ (val >> 7) ^ (val >> 12); 448 return val % candidate_count; 449 } 450 451 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 452 struct flowi6 *fl6, int oif, 453 int strict) 454 { 455 struct rt6_info *sibling, *next_sibling; 456 int route_choosen; 457 458 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); 459 /* Don't change the route, if route_choosen == 0 460 * (siblings does not include ourself) 461 */ 462 if (route_choosen) 463 list_for_each_entry_safe(sibling, next_sibling, 464 &match->rt6i_siblings, rt6i_siblings) { 465 route_choosen--; 466 if (route_choosen == 0) { 467 if (rt6_score_route(sibling, oif, strict) < 0) 468 break; 469 match = sibling; 470 break; 471 } 472 } 473 return match; 474 } 475 476 /* 477 * Route lookup. Any table->tb6_lock is implied. 478 */ 479 480 static inline struct rt6_info *rt6_device_match(struct net *net, 481 struct rt6_info *rt, 482 const struct in6_addr *saddr, 483 int oif, 484 int flags) 485 { 486 struct rt6_info *local = NULL; 487 struct rt6_info *sprt; 488 489 if (!oif && ipv6_addr_any(saddr)) 490 goto out; 491 492 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { 493 struct net_device *dev = sprt->dst.dev; 494 495 if (oif) { 496 if (dev->ifindex == oif) 497 return sprt; 498 if (dev->flags & IFF_LOOPBACK) { 499 if (!sprt->rt6i_idev || 500 sprt->rt6i_idev->dev->ifindex != oif) { 501 if (flags & RT6_LOOKUP_F_IFACE && oif) 502 continue; 503 if (local && (!oif || 504 local->rt6i_idev->dev->ifindex == oif)) 505 continue; 506 } 507 local = sprt; 508 } 509 } else { 510 if (ipv6_chk_addr(net, saddr, dev, 511 flags & RT6_LOOKUP_F_IFACE)) 512 return sprt; 513 } 514 } 515 516 if (oif) { 517 if (local) 518 return local; 519 520 if (flags & RT6_LOOKUP_F_IFACE) 521 return net->ipv6.ip6_null_entry; 522 } 523 out: 524 return rt; 525 } 526 527 #ifdef CONFIG_IPV6_ROUTER_PREF 528 struct __rt6_probe_work { 529 struct work_struct work; 530 struct in6_addr target; 531 struct net_device *dev; 532 }; 533 534 static void rt6_probe_deferred(struct work_struct *w) 535 { 536 struct in6_addr mcaddr; 537 struct __rt6_probe_work *work = 538 container_of(w, struct __rt6_probe_work, work); 539 540 addrconf_addr_solict_mult(&work->target, &mcaddr); 541 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); 542 dev_put(work->dev); 543 kfree(work); 544 } 545 546 static void rt6_probe(struct rt6_info *rt) 547 { 548 struct __rt6_probe_work *work; 549 struct neighbour *neigh; 550 /* 551 * Okay, this does not seem to be appropriate 552 * for now, however, we need to check if it 553 * is really so; aka Router Reachability Probing. 554 * 555 * Router Reachability Probe MUST be rate-limited 556 * to no more than one per minute. 557 */ 558 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 559 return; 560 rcu_read_lock_bh(); 561 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 562 if (neigh) { 563 if (neigh->nud_state & NUD_VALID) 564 goto out; 565 566 work = NULL; 567 write_lock(&neigh->lock); 568 if (!(neigh->nud_state & NUD_VALID) && 569 time_after(jiffies, 570 neigh->updated + 571 rt->rt6i_idev->cnf.rtr_probe_interval)) { 572 work = kmalloc(sizeof(*work), GFP_ATOMIC); 573 if (work) 574 __neigh_set_probe_once(neigh); 575 } 576 write_unlock(&neigh->lock); 577 } else { 578 work = kmalloc(sizeof(*work), GFP_ATOMIC); 579 } 580 581 if (work) { 582 INIT_WORK(&work->work, rt6_probe_deferred); 583 work->target = rt->rt6i_gateway; 584 dev_hold(rt->dst.dev); 585 work->dev = rt->dst.dev; 586 schedule_work(&work->work); 587 } 588 589 out: 590 rcu_read_unlock_bh(); 591 } 592 #else 593 static inline void rt6_probe(struct rt6_info *rt) 594 { 595 } 596 #endif 597 598 /* 599 * Default Router Selection (RFC 2461 6.3.6) 600 */ 601 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 602 { 603 struct net_device *dev = rt->dst.dev; 604 if (!oif || dev->ifindex == oif) 605 return 2; 606 if ((dev->flags & IFF_LOOPBACK) && 607 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 608 return 1; 609 return 0; 610 } 611 612 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 613 { 614 struct neighbour *neigh; 615 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 616 617 if (rt->rt6i_flags & RTF_NONEXTHOP || 618 !(rt->rt6i_flags & RTF_GATEWAY)) 619 return RT6_NUD_SUCCEED; 620 621 rcu_read_lock_bh(); 622 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 623 if (neigh) { 624 read_lock(&neigh->lock); 625 if (neigh->nud_state & NUD_VALID) 626 ret = RT6_NUD_SUCCEED; 627 #ifdef CONFIG_IPV6_ROUTER_PREF 628 else if (!(neigh->nud_state & NUD_FAILED)) 629 ret = RT6_NUD_SUCCEED; 630 else 631 ret = RT6_NUD_FAIL_PROBE; 632 #endif 633 read_unlock(&neigh->lock); 634 } else { 635 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 636 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 637 } 638 rcu_read_unlock_bh(); 639 640 return ret; 641 } 642 643 static int rt6_score_route(struct rt6_info *rt, int oif, 644 int strict) 645 { 646 int m; 647 648 m = rt6_check_dev(rt, oif); 649 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 650 return RT6_NUD_FAIL_HARD; 651 #ifdef CONFIG_IPV6_ROUTER_PREF 652 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 653 #endif 654 if (strict & RT6_LOOKUP_F_REACHABLE) { 655 int n = rt6_check_neigh(rt); 656 if (n < 0) 657 return n; 658 } 659 return m; 660 } 661 662 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 663 int *mpri, struct rt6_info *match, 664 bool *do_rr) 665 { 666 int m; 667 bool match_do_rr = false; 668 struct inet6_dev *idev = rt->rt6i_idev; 669 struct net_device *dev = rt->dst.dev; 670 671 if (dev && !netif_carrier_ok(dev) && 672 idev->cnf.ignore_routes_with_linkdown) 673 goto out; 674 675 if (rt6_check_expired(rt)) 676 goto out; 677 678 m = rt6_score_route(rt, oif, strict); 679 if (m == RT6_NUD_FAIL_DO_RR) { 680 match_do_rr = true; 681 m = 0; /* lowest valid score */ 682 } else if (m == RT6_NUD_FAIL_HARD) { 683 goto out; 684 } 685 686 if (strict & RT6_LOOKUP_F_REACHABLE) 687 rt6_probe(rt); 688 689 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 690 if (m > *mpri) { 691 *do_rr = match_do_rr; 692 *mpri = m; 693 match = rt; 694 } 695 out: 696 return match; 697 } 698 699 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 700 struct rt6_info *rr_head, 701 u32 metric, int oif, int strict, 702 bool *do_rr) 703 { 704 struct rt6_info *rt, *match, *cont; 705 int mpri = -1; 706 707 match = NULL; 708 cont = NULL; 709 for (rt = rr_head; rt; rt = rt->dst.rt6_next) { 710 if (rt->rt6i_metric != metric) { 711 cont = rt; 712 break; 713 } 714 715 match = find_match(rt, oif, strict, &mpri, match, do_rr); 716 } 717 718 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { 719 if (rt->rt6i_metric != metric) { 720 cont = rt; 721 break; 722 } 723 724 match = find_match(rt, oif, strict, &mpri, match, do_rr); 725 } 726 727 if (match || !cont) 728 return match; 729 730 for (rt = cont; rt; rt = rt->dst.rt6_next) 731 match = find_match(rt, oif, strict, &mpri, match, do_rr); 732 733 return match; 734 } 735 736 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) 737 { 738 struct rt6_info *match, *rt0; 739 struct net *net; 740 bool do_rr = false; 741 742 rt0 = fn->rr_ptr; 743 if (!rt0) 744 fn->rr_ptr = rt0 = fn->leaf; 745 746 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, 747 &do_rr); 748 749 if (do_rr) { 750 struct rt6_info *next = rt0->dst.rt6_next; 751 752 /* no entries matched; do round-robin */ 753 if (!next || next->rt6i_metric != rt0->rt6i_metric) 754 next = fn->leaf; 755 756 if (next != rt0) 757 fn->rr_ptr = next; 758 } 759 760 net = dev_net(rt0->dst.dev); 761 return match ? match : net->ipv6.ip6_null_entry; 762 } 763 764 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) 765 { 766 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); 767 } 768 769 #ifdef CONFIG_IPV6_ROUTE_INFO 770 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 771 const struct in6_addr *gwaddr) 772 { 773 struct net *net = dev_net(dev); 774 struct route_info *rinfo = (struct route_info *) opt; 775 struct in6_addr prefix_buf, *prefix; 776 unsigned int pref; 777 unsigned long lifetime; 778 struct rt6_info *rt; 779 780 if (len < sizeof(struct route_info)) { 781 return -EINVAL; 782 } 783 784 /* Sanity check for prefix_len and length */ 785 if (rinfo->length > 3) { 786 return -EINVAL; 787 } else if (rinfo->prefix_len > 128) { 788 return -EINVAL; 789 } else if (rinfo->prefix_len > 64) { 790 if (rinfo->length < 2) { 791 return -EINVAL; 792 } 793 } else if (rinfo->prefix_len > 0) { 794 if (rinfo->length < 1) { 795 return -EINVAL; 796 } 797 } 798 799 pref = rinfo->route_pref; 800 if (pref == ICMPV6_ROUTER_PREF_INVALID) 801 return -EINVAL; 802 803 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 804 805 if (rinfo->length == 3) 806 prefix = (struct in6_addr *)rinfo->prefix; 807 else { 808 /* this function is safe */ 809 ipv6_addr_prefix(&prefix_buf, 810 (struct in6_addr *)rinfo->prefix, 811 rinfo->prefix_len); 812 prefix = &prefix_buf; 813 } 814 815 if (rinfo->prefix_len == 0) 816 rt = rt6_get_dflt_router(gwaddr, dev); 817 else 818 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 819 gwaddr, dev->ifindex); 820 821 if (rt && !lifetime) { 822 ip6_del_rt(rt); 823 rt = NULL; 824 } 825 826 if (!rt && lifetime) 827 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex, 828 pref); 829 else if (rt) 830 rt->rt6i_flags = RTF_ROUTEINFO | 831 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 832 833 if (rt) { 834 if (!addrconf_finite_timeout(lifetime)) 835 rt6_clean_expires(rt); 836 else 837 rt6_set_expires(rt, jiffies + HZ * lifetime); 838 839 ip6_rt_put(rt); 840 } 841 return 0; 842 } 843 #endif 844 845 static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 846 struct in6_addr *saddr) 847 { 848 struct fib6_node *pn; 849 while (1) { 850 if (fn->fn_flags & RTN_TL_ROOT) 851 return NULL; 852 pn = fn->parent; 853 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) 854 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); 855 else 856 fn = pn; 857 if (fn->fn_flags & RTN_RTINFO) 858 return fn; 859 } 860 } 861 862 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 863 struct fib6_table *table, 864 struct flowi6 *fl6, int flags) 865 { 866 struct fib6_node *fn; 867 struct rt6_info *rt; 868 869 read_lock_bh(&table->tb6_lock); 870 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 871 restart: 872 rt = fn->leaf; 873 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); 874 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 875 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); 876 if (rt == net->ipv6.ip6_null_entry) { 877 fn = fib6_backtrack(fn, &fl6->saddr); 878 if (fn) 879 goto restart; 880 } 881 dst_use(&rt->dst, jiffies); 882 read_unlock_bh(&table->tb6_lock); 883 return rt; 884 885 } 886 887 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 888 int flags) 889 { 890 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 891 } 892 EXPORT_SYMBOL_GPL(ip6_route_lookup); 893 894 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 895 const struct in6_addr *saddr, int oif, int strict) 896 { 897 struct flowi6 fl6 = { 898 .flowi6_oif = oif, 899 .daddr = *daddr, 900 }; 901 struct dst_entry *dst; 902 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 903 904 if (saddr) { 905 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 906 flags |= RT6_LOOKUP_F_HAS_SADDR; 907 } 908 909 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 910 if (dst->error == 0) 911 return (struct rt6_info *) dst; 912 913 dst_release(dst); 914 915 return NULL; 916 } 917 EXPORT_SYMBOL(rt6_lookup); 918 919 /* ip6_ins_rt is called with FREE table->tb6_lock. 920 It takes new route entry, the addition fails by any reason the 921 route is freed. In any case, if caller does not hold it, it may 922 be destroyed. 923 */ 924 925 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 926 struct mx6_config *mxc) 927 { 928 int err; 929 struct fib6_table *table; 930 931 table = rt->rt6i_table; 932 write_lock_bh(&table->tb6_lock); 933 err = fib6_add(&table->tb6_root, rt, info, mxc); 934 write_unlock_bh(&table->tb6_lock); 935 936 return err; 937 } 938 939 int ip6_ins_rt(struct rt6_info *rt) 940 { 941 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 942 struct mx6_config mxc = { .mx = NULL, }; 943 944 return __ip6_ins_rt(rt, &info, &mxc); 945 } 946 947 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, 948 const struct in6_addr *daddr, 949 const struct in6_addr *saddr) 950 { 951 struct rt6_info *rt; 952 953 /* 954 * Clone the route. 955 */ 956 957 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 958 ort = (struct rt6_info *)ort->dst.from; 959 960 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 961 0, ort->rt6i_table); 962 963 if (!rt) 964 return NULL; 965 966 ip6_rt_copy_init(rt, ort); 967 rt->rt6i_flags |= RTF_CACHE; 968 rt->rt6i_metric = 0; 969 rt->dst.flags |= DST_HOST; 970 rt->rt6i_dst.addr = *daddr; 971 rt->rt6i_dst.plen = 128; 972 973 if (!rt6_is_gw_or_nonexthop(ort)) { 974 if (ort->rt6i_dst.plen != 128 && 975 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 976 rt->rt6i_flags |= RTF_ANYCAST; 977 #ifdef CONFIG_IPV6_SUBTREES 978 if (rt->rt6i_src.plen && saddr) { 979 rt->rt6i_src.addr = *saddr; 980 rt->rt6i_src.plen = 128; 981 } 982 #endif 983 } 984 985 return rt; 986 } 987 988 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) 989 { 990 struct rt6_info *pcpu_rt; 991 992 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), 993 rt->dst.dev, rt->dst.flags, 994 rt->rt6i_table); 995 996 if (!pcpu_rt) 997 return NULL; 998 ip6_rt_copy_init(pcpu_rt, rt); 999 pcpu_rt->rt6i_protocol = rt->rt6i_protocol; 1000 pcpu_rt->rt6i_flags |= RTF_PCPU; 1001 return pcpu_rt; 1002 } 1003 1004 /* It should be called with read_lock_bh(&tb6_lock) acquired */ 1005 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1006 { 1007 struct rt6_info *pcpu_rt, *prev, **p; 1008 1009 p = this_cpu_ptr(rt->rt6i_pcpu); 1010 pcpu_rt = *p; 1011 1012 if (pcpu_rt) 1013 goto done; 1014 1015 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1016 if (!pcpu_rt) { 1017 struct net *net = dev_net(rt->dst.dev); 1018 1019 pcpu_rt = net->ipv6.ip6_null_entry; 1020 goto done; 1021 } 1022 1023 prev = cmpxchg(p, NULL, pcpu_rt); 1024 if (prev) { 1025 /* If someone did it before us, return prev instead */ 1026 dst_destroy(&pcpu_rt->dst); 1027 pcpu_rt = prev; 1028 } 1029 1030 done: 1031 dst_hold(&pcpu_rt->dst); 1032 rt6_dst_from_metrics_check(pcpu_rt); 1033 return pcpu_rt; 1034 } 1035 1036 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, 1037 struct flowi6 *fl6, int flags) 1038 { 1039 struct fib6_node *fn, *saved_fn; 1040 struct rt6_info *rt; 1041 int strict = 0; 1042 1043 strict |= flags & RT6_LOOKUP_F_IFACE; 1044 if (net->ipv6.devconf_all->forwarding == 0) 1045 strict |= RT6_LOOKUP_F_REACHABLE; 1046 1047 read_lock_bh(&table->tb6_lock); 1048 1049 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1050 saved_fn = fn; 1051 1052 redo_rt6_select: 1053 rt = rt6_select(fn, oif, strict); 1054 if (rt->rt6i_nsiblings) 1055 rt = rt6_multipath_select(rt, fl6, oif, strict); 1056 if (rt == net->ipv6.ip6_null_entry) { 1057 fn = fib6_backtrack(fn, &fl6->saddr); 1058 if (fn) 1059 goto redo_rt6_select; 1060 else if (strict & RT6_LOOKUP_F_REACHABLE) { 1061 /* also consider unreachable route */ 1062 strict &= ~RT6_LOOKUP_F_REACHABLE; 1063 fn = saved_fn; 1064 goto redo_rt6_select; 1065 } 1066 } 1067 1068 1069 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { 1070 dst_use(&rt->dst, jiffies); 1071 read_unlock_bh(&table->tb6_lock); 1072 1073 rt6_dst_from_metrics_check(rt); 1074 return rt; 1075 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1076 !(rt->rt6i_flags & RTF_GATEWAY))) { 1077 /* Create a RTF_CACHE clone which will not be 1078 * owned by the fib6 tree. It is for the special case where 1079 * the daddr in the skb during the neighbor look-up is different 1080 * from the fl6->daddr used to look-up route here. 1081 */ 1082 1083 struct rt6_info *uncached_rt; 1084 1085 dst_use(&rt->dst, jiffies); 1086 read_unlock_bh(&table->tb6_lock); 1087 1088 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1089 dst_release(&rt->dst); 1090 1091 if (uncached_rt) 1092 rt6_uncached_list_add(uncached_rt); 1093 else 1094 uncached_rt = net->ipv6.ip6_null_entry; 1095 1096 dst_hold(&uncached_rt->dst); 1097 return uncached_rt; 1098 1099 } else { 1100 /* Get a percpu copy */ 1101 1102 struct rt6_info *pcpu_rt; 1103 1104 rt->dst.lastuse = jiffies; 1105 rt->dst.__use++; 1106 pcpu_rt = rt6_get_pcpu_route(rt); 1107 read_unlock_bh(&table->tb6_lock); 1108 1109 return pcpu_rt; 1110 } 1111 } 1112 1113 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1114 struct flowi6 *fl6, int flags) 1115 { 1116 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 1117 } 1118 1119 static struct dst_entry *ip6_route_input_lookup(struct net *net, 1120 struct net_device *dev, 1121 struct flowi6 *fl6, int flags) 1122 { 1123 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1124 flags |= RT6_LOOKUP_F_IFACE; 1125 1126 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1127 } 1128 1129 void ip6_route_input(struct sk_buff *skb) 1130 { 1131 const struct ipv6hdr *iph = ipv6_hdr(skb); 1132 struct net *net = dev_net(skb->dev); 1133 int flags = RT6_LOOKUP_F_HAS_SADDR; 1134 struct flowi6 fl6 = { 1135 .flowi6_iif = skb->dev->ifindex, 1136 .daddr = iph->daddr, 1137 .saddr = iph->saddr, 1138 .flowlabel = ip6_flowinfo(iph), 1139 .flowi6_mark = skb->mark, 1140 .flowi6_proto = iph->nexthdr, 1141 }; 1142 1143 skb_dst_drop(skb); 1144 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1145 } 1146 1147 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1148 struct flowi6 *fl6, int flags) 1149 { 1150 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1151 } 1152 1153 struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk, 1154 struct flowi6 *fl6) 1155 { 1156 int flags = 0; 1157 1158 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1159 1160 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) 1161 flags |= RT6_LOOKUP_F_IFACE; 1162 1163 if (!ipv6_addr_any(&fl6->saddr)) 1164 flags |= RT6_LOOKUP_F_HAS_SADDR; 1165 else if (sk) 1166 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1167 1168 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1169 } 1170 EXPORT_SYMBOL(ip6_route_output); 1171 1172 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1173 { 1174 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1175 struct dst_entry *new = NULL; 1176 1177 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0); 1178 if (rt) { 1179 new = &rt->dst; 1180 1181 memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); 1182 1183 new->__use = 1; 1184 new->input = dst_discard; 1185 new->output = dst_discard_sk; 1186 1187 if (dst_metrics_read_only(&ort->dst)) 1188 new->_metrics = ort->dst._metrics; 1189 else 1190 dst_copy_metrics(new, &ort->dst); 1191 rt->rt6i_idev = ort->rt6i_idev; 1192 if (rt->rt6i_idev) 1193 in6_dev_hold(rt->rt6i_idev); 1194 1195 rt->rt6i_gateway = ort->rt6i_gateway; 1196 rt->rt6i_flags = ort->rt6i_flags; 1197 rt->rt6i_metric = 0; 1198 1199 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1200 #ifdef CONFIG_IPV6_SUBTREES 1201 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1202 #endif 1203 1204 dst_free(new); 1205 } 1206 1207 dst_release(dst_orig); 1208 return new ? new : ERR_PTR(-ENOMEM); 1209 } 1210 1211 /* 1212 * Destination cache support functions 1213 */ 1214 1215 static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1216 { 1217 if (rt->dst.from && 1218 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) 1219 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); 1220 } 1221 1222 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1223 { 1224 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) 1225 return NULL; 1226 1227 if (rt6_check_expired(rt)) 1228 return NULL; 1229 1230 return &rt->dst; 1231 } 1232 1233 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1234 { 1235 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1236 rt6_check((struct rt6_info *)(rt->dst.from), cookie)) 1237 return &rt->dst; 1238 else 1239 return NULL; 1240 } 1241 1242 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1243 { 1244 struct rt6_info *rt; 1245 1246 rt = (struct rt6_info *) dst; 1247 1248 /* All IPV6 dsts are created with ->obsolete set to the value 1249 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1250 * into this function always. 1251 */ 1252 1253 rt6_dst_from_metrics_check(rt); 1254 1255 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) 1256 return rt6_dst_from_check(rt, cookie); 1257 else 1258 return rt6_check(rt, cookie); 1259 } 1260 1261 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 1262 { 1263 struct rt6_info *rt = (struct rt6_info *) dst; 1264 1265 if (rt) { 1266 if (rt->rt6i_flags & RTF_CACHE) { 1267 if (rt6_check_expired(rt)) { 1268 ip6_del_rt(rt); 1269 dst = NULL; 1270 } 1271 } else { 1272 dst_release(dst); 1273 dst = NULL; 1274 } 1275 } 1276 return dst; 1277 } 1278 1279 static void ip6_link_failure(struct sk_buff *skb) 1280 { 1281 struct rt6_info *rt; 1282 1283 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 1284 1285 rt = (struct rt6_info *) skb_dst(skb); 1286 if (rt) { 1287 if (rt->rt6i_flags & RTF_CACHE) { 1288 dst_hold(&rt->dst); 1289 if (ip6_del_rt(rt)) 1290 dst_free(&rt->dst); 1291 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { 1292 rt->rt6i_node->fn_sernum = -1; 1293 } 1294 } 1295 } 1296 1297 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 1298 { 1299 struct net *net = dev_net(rt->dst.dev); 1300 1301 rt->rt6i_flags |= RTF_MODIFIED; 1302 rt->rt6i_pmtu = mtu; 1303 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 1304 } 1305 1306 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 1307 const struct ipv6hdr *iph, u32 mtu) 1308 { 1309 struct rt6_info *rt6 = (struct rt6_info *)dst; 1310 1311 if (rt6->rt6i_flags & RTF_LOCAL) 1312 return; 1313 1314 dst_confirm(dst); 1315 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 1316 if (mtu >= dst_mtu(dst)) 1317 return; 1318 1319 if (rt6->rt6i_flags & RTF_CACHE) { 1320 rt6_do_update_pmtu(rt6, mtu); 1321 } else { 1322 const struct in6_addr *daddr, *saddr; 1323 struct rt6_info *nrt6; 1324 1325 if (iph) { 1326 daddr = &iph->daddr; 1327 saddr = &iph->saddr; 1328 } else if (sk) { 1329 daddr = &sk->sk_v6_daddr; 1330 saddr = &inet6_sk(sk)->saddr; 1331 } else { 1332 return; 1333 } 1334 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 1335 if (nrt6) { 1336 rt6_do_update_pmtu(nrt6, mtu); 1337 1338 /* ip6_ins_rt(nrt6) will bump the 1339 * rt6->rt6i_node->fn_sernum 1340 * which will fail the next rt6_check() and 1341 * invalidate the sk->sk_dst_cache. 1342 */ 1343 ip6_ins_rt(nrt6); 1344 } 1345 } 1346 } 1347 1348 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1349 struct sk_buff *skb, u32 mtu) 1350 { 1351 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 1352 } 1353 1354 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 1355 int oif, u32 mark) 1356 { 1357 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1358 struct dst_entry *dst; 1359 struct flowi6 fl6; 1360 1361 memset(&fl6, 0, sizeof(fl6)); 1362 fl6.flowi6_oif = oif; 1363 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 1364 fl6.daddr = iph->daddr; 1365 fl6.saddr = iph->saddr; 1366 fl6.flowlabel = ip6_flowinfo(iph); 1367 1368 dst = ip6_route_output(net, NULL, &fl6); 1369 if (!dst->error) 1370 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 1371 dst_release(dst); 1372 } 1373 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 1374 1375 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 1376 { 1377 ip6_update_pmtu(skb, sock_net(sk), mtu, 1378 sk->sk_bound_dev_if, sk->sk_mark); 1379 } 1380 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 1381 1382 /* Handle redirects */ 1383 struct ip6rd_flowi { 1384 struct flowi6 fl6; 1385 struct in6_addr gateway; 1386 }; 1387 1388 static struct rt6_info *__ip6_route_redirect(struct net *net, 1389 struct fib6_table *table, 1390 struct flowi6 *fl6, 1391 int flags) 1392 { 1393 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 1394 struct rt6_info *rt; 1395 struct fib6_node *fn; 1396 1397 /* Get the "current" route for this destination and 1398 * check if the redirect has come from approriate router. 1399 * 1400 * RFC 4861 specifies that redirects should only be 1401 * accepted if they come from the nexthop to the target. 1402 * Due to the way the routes are chosen, this notion 1403 * is a bit fuzzy and one might need to check all possible 1404 * routes. 1405 */ 1406 1407 read_lock_bh(&table->tb6_lock); 1408 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1409 restart: 1410 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 1411 if (rt6_check_expired(rt)) 1412 continue; 1413 if (rt->dst.error) 1414 break; 1415 if (!(rt->rt6i_flags & RTF_GATEWAY)) 1416 continue; 1417 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 1418 continue; 1419 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) 1420 continue; 1421 break; 1422 } 1423 1424 if (!rt) 1425 rt = net->ipv6.ip6_null_entry; 1426 else if (rt->dst.error) { 1427 rt = net->ipv6.ip6_null_entry; 1428 goto out; 1429 } 1430 1431 if (rt == net->ipv6.ip6_null_entry) { 1432 fn = fib6_backtrack(fn, &fl6->saddr); 1433 if (fn) 1434 goto restart; 1435 } 1436 1437 out: 1438 dst_hold(&rt->dst); 1439 1440 read_unlock_bh(&table->tb6_lock); 1441 1442 return rt; 1443 }; 1444 1445 static struct dst_entry *ip6_route_redirect(struct net *net, 1446 const struct flowi6 *fl6, 1447 const struct in6_addr *gateway) 1448 { 1449 int flags = RT6_LOOKUP_F_HAS_SADDR; 1450 struct ip6rd_flowi rdfl; 1451 1452 rdfl.fl6 = *fl6; 1453 rdfl.gateway = *gateway; 1454 1455 return fib6_rule_lookup(net, &rdfl.fl6, 1456 flags, __ip6_route_redirect); 1457 } 1458 1459 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) 1460 { 1461 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 1462 struct dst_entry *dst; 1463 struct flowi6 fl6; 1464 1465 memset(&fl6, 0, sizeof(fl6)); 1466 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1467 fl6.flowi6_oif = oif; 1468 fl6.flowi6_mark = mark; 1469 fl6.daddr = iph->daddr; 1470 fl6.saddr = iph->saddr; 1471 fl6.flowlabel = ip6_flowinfo(iph); 1472 1473 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 1474 rt6_do_redirect(dst, NULL, skb); 1475 dst_release(dst); 1476 } 1477 EXPORT_SYMBOL_GPL(ip6_redirect); 1478 1479 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 1480 u32 mark) 1481 { 1482 const struct ipv6hdr *iph = ipv6_hdr(skb); 1483 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 1484 struct dst_entry *dst; 1485 struct flowi6 fl6; 1486 1487 memset(&fl6, 0, sizeof(fl6)); 1488 fl6.flowi6_iif = LOOPBACK_IFINDEX; 1489 fl6.flowi6_oif = oif; 1490 fl6.flowi6_mark = mark; 1491 fl6.daddr = msg->dest; 1492 fl6.saddr = iph->daddr; 1493 1494 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 1495 rt6_do_redirect(dst, NULL, skb); 1496 dst_release(dst); 1497 } 1498 1499 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 1500 { 1501 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark); 1502 } 1503 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 1504 1505 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 1506 { 1507 struct net_device *dev = dst->dev; 1508 unsigned int mtu = dst_mtu(dst); 1509 struct net *net = dev_net(dev); 1510 1511 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 1512 1513 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 1514 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 1515 1516 /* 1517 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 1518 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 1519 * IPV6_MAXPLEN is also valid and means: "any MSS, 1520 * rely only on pmtu discovery" 1521 */ 1522 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 1523 mtu = IPV6_MAXPLEN; 1524 return mtu; 1525 } 1526 1527 static unsigned int ip6_mtu(const struct dst_entry *dst) 1528 { 1529 const struct rt6_info *rt = (const struct rt6_info *)dst; 1530 unsigned int mtu = rt->rt6i_pmtu; 1531 struct inet6_dev *idev; 1532 1533 if (mtu) 1534 goto out; 1535 1536 mtu = dst_metric_raw(dst, RTAX_MTU); 1537 if (mtu) 1538 goto out; 1539 1540 mtu = IPV6_MIN_MTU; 1541 1542 rcu_read_lock(); 1543 idev = __in6_dev_get(dst->dev); 1544 if (idev) 1545 mtu = idev->cnf.mtu6; 1546 rcu_read_unlock(); 1547 1548 out: 1549 return min_t(unsigned int, mtu, IP6_MAX_MTU); 1550 } 1551 1552 static struct dst_entry *icmp6_dst_gc_list; 1553 static DEFINE_SPINLOCK(icmp6_dst_lock); 1554 1555 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 1556 struct flowi6 *fl6) 1557 { 1558 struct dst_entry *dst; 1559 struct rt6_info *rt; 1560 struct inet6_dev *idev = in6_dev_get(dev); 1561 struct net *net = dev_net(dev); 1562 1563 if (unlikely(!idev)) 1564 return ERR_PTR(-ENODEV); 1565 1566 rt = ip6_dst_alloc(net, dev, 0, NULL); 1567 if (unlikely(!rt)) { 1568 in6_dev_put(idev); 1569 dst = ERR_PTR(-ENOMEM); 1570 goto out; 1571 } 1572 1573 rt->dst.flags |= DST_HOST; 1574 rt->dst.output = ip6_output; 1575 atomic_set(&rt->dst.__refcnt, 1); 1576 rt->rt6i_gateway = fl6->daddr; 1577 rt->rt6i_dst.addr = fl6->daddr; 1578 rt->rt6i_dst.plen = 128; 1579 rt->rt6i_idev = idev; 1580 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 1581 1582 spin_lock_bh(&icmp6_dst_lock); 1583 rt->dst.next = icmp6_dst_gc_list; 1584 icmp6_dst_gc_list = &rt->dst; 1585 spin_unlock_bh(&icmp6_dst_lock); 1586 1587 fib6_force_start_gc(net); 1588 1589 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 1590 1591 out: 1592 return dst; 1593 } 1594 1595 int icmp6_dst_gc(void) 1596 { 1597 struct dst_entry *dst, **pprev; 1598 int more = 0; 1599 1600 spin_lock_bh(&icmp6_dst_lock); 1601 pprev = &icmp6_dst_gc_list; 1602 1603 while ((dst = *pprev) != NULL) { 1604 if (!atomic_read(&dst->__refcnt)) { 1605 *pprev = dst->next; 1606 dst_free(dst); 1607 } else { 1608 pprev = &dst->next; 1609 ++more; 1610 } 1611 } 1612 1613 spin_unlock_bh(&icmp6_dst_lock); 1614 1615 return more; 1616 } 1617 1618 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), 1619 void *arg) 1620 { 1621 struct dst_entry *dst, **pprev; 1622 1623 spin_lock_bh(&icmp6_dst_lock); 1624 pprev = &icmp6_dst_gc_list; 1625 while ((dst = *pprev) != NULL) { 1626 struct rt6_info *rt = (struct rt6_info *) dst; 1627 if (func(rt, arg)) { 1628 *pprev = dst->next; 1629 dst_free(dst); 1630 } else { 1631 pprev = &dst->next; 1632 } 1633 } 1634 spin_unlock_bh(&icmp6_dst_lock); 1635 } 1636 1637 static int ip6_dst_gc(struct dst_ops *ops) 1638 { 1639 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 1640 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 1641 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 1642 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1643 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1644 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1645 int entries; 1646 1647 entries = dst_entries_get_fast(ops); 1648 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 1649 entries <= rt_max_size) 1650 goto out; 1651 1652 net->ipv6.ip6_rt_gc_expire++; 1653 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 1654 entries = dst_entries_get_slow(ops); 1655 if (entries < ops->gc_thresh) 1656 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1657 out: 1658 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1659 return entries > rt_max_size; 1660 } 1661 1662 static int ip6_convert_metrics(struct mx6_config *mxc, 1663 const struct fib6_config *cfg) 1664 { 1665 struct nlattr *nla; 1666 int remaining; 1667 u32 *mp; 1668 1669 if (!cfg->fc_mx) 1670 return 0; 1671 1672 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 1673 if (unlikely(!mp)) 1674 return -ENOMEM; 1675 1676 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 1677 int type = nla_type(nla); 1678 1679 if (type) { 1680 u32 val; 1681 1682 if (unlikely(type > RTAX_MAX)) 1683 goto err; 1684 if (type == RTAX_CC_ALGO) { 1685 char tmp[TCP_CA_NAME_MAX]; 1686 1687 nla_strlcpy(tmp, nla, sizeof(tmp)); 1688 val = tcp_ca_get_key_by_name(tmp); 1689 if (val == TCP_CA_UNSPEC) 1690 goto err; 1691 } else { 1692 val = nla_get_u32(nla); 1693 } 1694 1695 mp[type - 1] = val; 1696 __set_bit(type - 1, mxc->mx_valid); 1697 } 1698 } 1699 1700 mxc->mx = mp; 1701 1702 return 0; 1703 err: 1704 kfree(mp); 1705 return -EINVAL; 1706 } 1707 1708 int ip6_route_add(struct fib6_config *cfg) 1709 { 1710 int err; 1711 struct net *net = cfg->fc_nlinfo.nl_net; 1712 struct rt6_info *rt = NULL; 1713 struct net_device *dev = NULL; 1714 struct inet6_dev *idev = NULL; 1715 struct fib6_table *table; 1716 struct mx6_config mxc = { .mx = NULL, }; 1717 int addr_type; 1718 1719 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) 1720 return -EINVAL; 1721 #ifndef CONFIG_IPV6_SUBTREES 1722 if (cfg->fc_src_len) 1723 return -EINVAL; 1724 #endif 1725 if (cfg->fc_ifindex) { 1726 err = -ENODEV; 1727 dev = dev_get_by_index(net, cfg->fc_ifindex); 1728 if (!dev) 1729 goto out; 1730 idev = in6_dev_get(dev); 1731 if (!idev) 1732 goto out; 1733 } 1734 1735 if (cfg->fc_metric == 0) 1736 cfg->fc_metric = IP6_RT_PRIO_USER; 1737 1738 err = -ENOBUFS; 1739 if (cfg->fc_nlinfo.nlh && 1740 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 1741 table = fib6_get_table(net, cfg->fc_table); 1742 if (!table) { 1743 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 1744 table = fib6_new_table(net, cfg->fc_table); 1745 } 1746 } else { 1747 table = fib6_new_table(net, cfg->fc_table); 1748 } 1749 1750 if (!table) 1751 goto out; 1752 1753 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); 1754 1755 if (!rt) { 1756 err = -ENOMEM; 1757 goto out; 1758 } 1759 1760 if (cfg->fc_flags & RTF_EXPIRES) 1761 rt6_set_expires(rt, jiffies + 1762 clock_t_to_jiffies(cfg->fc_expires)); 1763 else 1764 rt6_clean_expires(rt); 1765 1766 if (cfg->fc_protocol == RTPROT_UNSPEC) 1767 cfg->fc_protocol = RTPROT_BOOT; 1768 rt->rt6i_protocol = cfg->fc_protocol; 1769 1770 addr_type = ipv6_addr_type(&cfg->fc_dst); 1771 1772 if (addr_type & IPV6_ADDR_MULTICAST) 1773 rt->dst.input = ip6_mc_input; 1774 else if (cfg->fc_flags & RTF_LOCAL) 1775 rt->dst.input = ip6_input; 1776 else 1777 rt->dst.input = ip6_forward; 1778 1779 rt->dst.output = ip6_output; 1780 1781 if (cfg->fc_encap) { 1782 struct lwtunnel_state *lwtstate; 1783 1784 err = lwtunnel_build_state(dev, cfg->fc_encap_type, 1785 cfg->fc_encap, &lwtstate); 1786 if (err) 1787 goto out; 1788 rt->dst.lwtstate = lwtstate_get(lwtstate); 1789 if (lwtunnel_output_redirect(rt->dst.lwtstate)) { 1790 rt->dst.lwtstate->orig_output = rt->dst.output; 1791 rt->dst.output = lwtunnel_output; 1792 } 1793 if (lwtunnel_input_redirect(rt->dst.lwtstate)) { 1794 rt->dst.lwtstate->orig_input = rt->dst.input; 1795 rt->dst.input = lwtunnel_input; 1796 } 1797 } 1798 1799 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 1800 rt->rt6i_dst.plen = cfg->fc_dst_len; 1801 if (rt->rt6i_dst.plen == 128) 1802 rt->dst.flags |= DST_HOST; 1803 1804 #ifdef CONFIG_IPV6_SUBTREES 1805 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 1806 rt->rt6i_src.plen = cfg->fc_src_len; 1807 #endif 1808 1809 rt->rt6i_metric = cfg->fc_metric; 1810 1811 /* We cannot add true routes via loopback here, 1812 they would result in kernel looping; promote them to reject routes 1813 */ 1814 if ((cfg->fc_flags & RTF_REJECT) || 1815 (dev && (dev->flags & IFF_LOOPBACK) && 1816 !(addr_type & IPV6_ADDR_LOOPBACK) && 1817 !(cfg->fc_flags & RTF_LOCAL))) { 1818 /* hold loopback dev/idev if we haven't done so. */ 1819 if (dev != net->loopback_dev) { 1820 if (dev) { 1821 dev_put(dev); 1822 in6_dev_put(idev); 1823 } 1824 dev = net->loopback_dev; 1825 dev_hold(dev); 1826 idev = in6_dev_get(dev); 1827 if (!idev) { 1828 err = -ENODEV; 1829 goto out; 1830 } 1831 } 1832 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 1833 switch (cfg->fc_type) { 1834 case RTN_BLACKHOLE: 1835 rt->dst.error = -EINVAL; 1836 rt->dst.output = dst_discard_sk; 1837 rt->dst.input = dst_discard; 1838 break; 1839 case RTN_PROHIBIT: 1840 rt->dst.error = -EACCES; 1841 rt->dst.output = ip6_pkt_prohibit_out; 1842 rt->dst.input = ip6_pkt_prohibit; 1843 break; 1844 case RTN_THROW: 1845 default: 1846 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 1847 : -ENETUNREACH; 1848 rt->dst.output = ip6_pkt_discard_out; 1849 rt->dst.input = ip6_pkt_discard; 1850 break; 1851 } 1852 goto install_route; 1853 } 1854 1855 if (cfg->fc_flags & RTF_GATEWAY) { 1856 const struct in6_addr *gw_addr; 1857 int gwa_type; 1858 1859 gw_addr = &cfg->fc_gateway; 1860 gwa_type = ipv6_addr_type(gw_addr); 1861 1862 /* if gw_addr is local we will fail to detect this in case 1863 * address is still TENTATIVE (DAD in progress). rt6_lookup() 1864 * will return already-added prefix route via interface that 1865 * prefix route was assigned to, which might be non-loopback. 1866 */ 1867 err = -EINVAL; 1868 if (ipv6_chk_addr_and_flags(net, gw_addr, 1869 gwa_type & IPV6_ADDR_LINKLOCAL ? 1870 dev : NULL, 0, 0)) 1871 goto out; 1872 1873 rt->rt6i_gateway = *gw_addr; 1874 1875 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 1876 struct rt6_info *grt; 1877 1878 /* IPv6 strictly inhibits using not link-local 1879 addresses as nexthop address. 1880 Otherwise, router will not able to send redirects. 1881 It is very good, but in some (rare!) circumstances 1882 (SIT, PtP, NBMA NOARP links) it is handy to allow 1883 some exceptions. --ANK 1884 */ 1885 if (!(gwa_type & IPV6_ADDR_UNICAST)) 1886 goto out; 1887 1888 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1889 1890 err = -EHOSTUNREACH; 1891 if (!grt) 1892 goto out; 1893 if (dev) { 1894 if (dev != grt->dst.dev) { 1895 ip6_rt_put(grt); 1896 goto out; 1897 } 1898 } else { 1899 dev = grt->dst.dev; 1900 idev = grt->rt6i_idev; 1901 dev_hold(dev); 1902 in6_dev_hold(grt->rt6i_idev); 1903 } 1904 if (!(grt->rt6i_flags & RTF_GATEWAY)) 1905 err = 0; 1906 ip6_rt_put(grt); 1907 1908 if (err) 1909 goto out; 1910 } 1911 err = -EINVAL; 1912 if (!dev || (dev->flags & IFF_LOOPBACK)) 1913 goto out; 1914 } 1915 1916 err = -ENODEV; 1917 if (!dev) 1918 goto out; 1919 1920 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 1921 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 1922 err = -EINVAL; 1923 goto out; 1924 } 1925 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 1926 rt->rt6i_prefsrc.plen = 128; 1927 } else 1928 rt->rt6i_prefsrc.plen = 0; 1929 1930 rt->rt6i_flags = cfg->fc_flags; 1931 1932 install_route: 1933 rt->dst.dev = dev; 1934 rt->rt6i_idev = idev; 1935 rt->rt6i_table = table; 1936 1937 cfg->fc_nlinfo.nl_net = dev_net(dev); 1938 1939 err = ip6_convert_metrics(&mxc, cfg); 1940 if (err) 1941 goto out; 1942 1943 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc); 1944 1945 kfree(mxc.mx); 1946 return err; 1947 out: 1948 if (dev) 1949 dev_put(dev); 1950 if (idev) 1951 in6_dev_put(idev); 1952 if (rt) 1953 dst_free(&rt->dst); 1954 return err; 1955 } 1956 1957 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 1958 { 1959 int err; 1960 struct fib6_table *table; 1961 struct net *net = dev_net(rt->dst.dev); 1962 1963 if (rt == net->ipv6.ip6_null_entry) { 1964 err = -ENOENT; 1965 goto out; 1966 } 1967 1968 table = rt->rt6i_table; 1969 write_lock_bh(&table->tb6_lock); 1970 err = fib6_del(rt, info); 1971 write_unlock_bh(&table->tb6_lock); 1972 1973 out: 1974 ip6_rt_put(rt); 1975 return err; 1976 } 1977 1978 int ip6_del_rt(struct rt6_info *rt) 1979 { 1980 struct nl_info info = { 1981 .nl_net = dev_net(rt->dst.dev), 1982 }; 1983 return __ip6_del_rt(rt, &info); 1984 } 1985 1986 static int ip6_route_del(struct fib6_config *cfg) 1987 { 1988 struct fib6_table *table; 1989 struct fib6_node *fn; 1990 struct rt6_info *rt; 1991 int err = -ESRCH; 1992 1993 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1994 if (!table) 1995 return err; 1996 1997 read_lock_bh(&table->tb6_lock); 1998 1999 fn = fib6_locate(&table->tb6_root, 2000 &cfg->fc_dst, cfg->fc_dst_len, 2001 &cfg->fc_src, cfg->fc_src_len); 2002 2003 if (fn) { 2004 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2005 if ((rt->rt6i_flags & RTF_CACHE) && 2006 !(cfg->fc_flags & RTF_CACHE)) 2007 continue; 2008 if (cfg->fc_ifindex && 2009 (!rt->dst.dev || 2010 rt->dst.dev->ifindex != cfg->fc_ifindex)) 2011 continue; 2012 if (cfg->fc_flags & RTF_GATEWAY && 2013 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 2014 continue; 2015 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 2016 continue; 2017 dst_hold(&rt->dst); 2018 read_unlock_bh(&table->tb6_lock); 2019 2020 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 2021 } 2022 } 2023 read_unlock_bh(&table->tb6_lock); 2024 2025 return err; 2026 } 2027 2028 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 2029 { 2030 struct net *net = dev_net(skb->dev); 2031 struct netevent_redirect netevent; 2032 struct rt6_info *rt, *nrt = NULL; 2033 struct ndisc_options ndopts; 2034 struct inet6_dev *in6_dev; 2035 struct neighbour *neigh; 2036 struct rd_msg *msg; 2037 int optlen, on_link; 2038 u8 *lladdr; 2039 2040 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 2041 optlen -= sizeof(*msg); 2042 2043 if (optlen < 0) { 2044 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 2045 return; 2046 } 2047 2048 msg = (struct rd_msg *)icmp6_hdr(skb); 2049 2050 if (ipv6_addr_is_multicast(&msg->dest)) { 2051 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 2052 return; 2053 } 2054 2055 on_link = 0; 2056 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 2057 on_link = 1; 2058 } else if (ipv6_addr_type(&msg->target) != 2059 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 2060 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 2061 return; 2062 } 2063 2064 in6_dev = __in6_dev_get(skb->dev); 2065 if (!in6_dev) 2066 return; 2067 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 2068 return; 2069 2070 /* RFC2461 8.1: 2071 * The IP source address of the Redirect MUST be the same as the current 2072 * first-hop router for the specified ICMP Destination Address. 2073 */ 2074 2075 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) { 2076 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 2077 return; 2078 } 2079 2080 lladdr = NULL; 2081 if (ndopts.nd_opts_tgt_lladdr) { 2082 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 2083 skb->dev); 2084 if (!lladdr) { 2085 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 2086 return; 2087 } 2088 } 2089 2090 rt = (struct rt6_info *) dst; 2091 if (rt == net->ipv6.ip6_null_entry) { 2092 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 2093 return; 2094 } 2095 2096 /* Redirect received -> path was valid. 2097 * Look, redirects are sent only in response to data packets, 2098 * so that this nexthop apparently is reachable. --ANK 2099 */ 2100 dst_confirm(&rt->dst); 2101 2102 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 2103 if (!neigh) 2104 return; 2105 2106 /* 2107 * We have finally decided to accept it. 2108 */ 2109 2110 neigh_update(neigh, lladdr, NUD_STALE, 2111 NEIGH_UPDATE_F_WEAK_OVERRIDE| 2112 NEIGH_UPDATE_F_OVERRIDE| 2113 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 2114 NEIGH_UPDATE_F_ISROUTER)) 2115 ); 2116 2117 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); 2118 if (!nrt) 2119 goto out; 2120 2121 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 2122 if (on_link) 2123 nrt->rt6i_flags &= ~RTF_GATEWAY; 2124 2125 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 2126 2127 if (ip6_ins_rt(nrt)) 2128 goto out; 2129 2130 netevent.old = &rt->dst; 2131 netevent.new = &nrt->dst; 2132 netevent.daddr = &msg->dest; 2133 netevent.neigh = neigh; 2134 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 2135 2136 if (rt->rt6i_flags & RTF_CACHE) { 2137 rt = (struct rt6_info *) dst_clone(&rt->dst); 2138 ip6_del_rt(rt); 2139 } 2140 2141 out: 2142 neigh_release(neigh); 2143 } 2144 2145 /* 2146 * Misc support functions 2147 */ 2148 2149 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 2150 { 2151 BUG_ON(from->dst.from); 2152 2153 rt->rt6i_flags &= ~RTF_EXPIRES; 2154 dst_hold(&from->dst); 2155 rt->dst.from = &from->dst; 2156 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 2157 } 2158 2159 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) 2160 { 2161 rt->dst.input = ort->dst.input; 2162 rt->dst.output = ort->dst.output; 2163 rt->rt6i_dst = ort->rt6i_dst; 2164 rt->dst.error = ort->dst.error; 2165 rt->rt6i_idev = ort->rt6i_idev; 2166 if (rt->rt6i_idev) 2167 in6_dev_hold(rt->rt6i_idev); 2168 rt->dst.lastuse = jiffies; 2169 rt->rt6i_gateway = ort->rt6i_gateway; 2170 rt->rt6i_flags = ort->rt6i_flags; 2171 rt6_set_from(rt, ort); 2172 rt->rt6i_metric = ort->rt6i_metric; 2173 #ifdef CONFIG_IPV6_SUBTREES 2174 rt->rt6i_src = ort->rt6i_src; 2175 #endif 2176 rt->rt6i_prefsrc = ort->rt6i_prefsrc; 2177 rt->rt6i_table = ort->rt6i_table; 2178 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); 2179 } 2180 2181 #ifdef CONFIG_IPV6_ROUTE_INFO 2182 static struct rt6_info *rt6_get_route_info(struct net *net, 2183 const struct in6_addr *prefix, int prefixlen, 2184 const struct in6_addr *gwaddr, int ifindex) 2185 { 2186 struct fib6_node *fn; 2187 struct rt6_info *rt = NULL; 2188 struct fib6_table *table; 2189 2190 table = fib6_get_table(net, RT6_TABLE_INFO); 2191 if (!table) 2192 return NULL; 2193 2194 read_lock_bh(&table->tb6_lock); 2195 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); 2196 if (!fn) 2197 goto out; 2198 2199 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { 2200 if (rt->dst.dev->ifindex != ifindex) 2201 continue; 2202 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 2203 continue; 2204 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 2205 continue; 2206 dst_hold(&rt->dst); 2207 break; 2208 } 2209 out: 2210 read_unlock_bh(&table->tb6_lock); 2211 return rt; 2212 } 2213 2214 static struct rt6_info *rt6_add_route_info(struct net *net, 2215 const struct in6_addr *prefix, int prefixlen, 2216 const struct in6_addr *gwaddr, int ifindex, 2217 unsigned int pref) 2218 { 2219 struct fib6_config cfg = { 2220 .fc_table = RT6_TABLE_INFO, 2221 .fc_metric = IP6_RT_PRIO_USER, 2222 .fc_ifindex = ifindex, 2223 .fc_dst_len = prefixlen, 2224 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 2225 RTF_UP | RTF_PREF(pref), 2226 .fc_nlinfo.portid = 0, 2227 .fc_nlinfo.nlh = NULL, 2228 .fc_nlinfo.nl_net = net, 2229 }; 2230 2231 cfg.fc_dst = *prefix; 2232 cfg.fc_gateway = *gwaddr; 2233 2234 /* We should treat it as a default route if prefix length is 0. */ 2235 if (!prefixlen) 2236 cfg.fc_flags |= RTF_DEFAULT; 2237 2238 ip6_route_add(&cfg); 2239 2240 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex); 2241 } 2242 #endif 2243 2244 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 2245 { 2246 struct rt6_info *rt; 2247 struct fib6_table *table; 2248 2249 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT); 2250 if (!table) 2251 return NULL; 2252 2253 read_lock_bh(&table->tb6_lock); 2254 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2255 if (dev == rt->dst.dev && 2256 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 2257 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 2258 break; 2259 } 2260 if (rt) 2261 dst_hold(&rt->dst); 2262 read_unlock_bh(&table->tb6_lock); 2263 return rt; 2264 } 2265 2266 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 2267 struct net_device *dev, 2268 unsigned int pref) 2269 { 2270 struct fib6_config cfg = { 2271 .fc_table = RT6_TABLE_DFLT, 2272 .fc_metric = IP6_RT_PRIO_USER, 2273 .fc_ifindex = dev->ifindex, 2274 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 2275 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 2276 .fc_nlinfo.portid = 0, 2277 .fc_nlinfo.nlh = NULL, 2278 .fc_nlinfo.nl_net = dev_net(dev), 2279 }; 2280 2281 cfg.fc_gateway = *gwaddr; 2282 2283 ip6_route_add(&cfg); 2284 2285 return rt6_get_dflt_router(gwaddr, dev); 2286 } 2287 2288 void rt6_purge_dflt_routers(struct net *net) 2289 { 2290 struct rt6_info *rt; 2291 struct fib6_table *table; 2292 2293 /* NOTE: Keep consistent with rt6_get_dflt_router */ 2294 table = fib6_get_table(net, RT6_TABLE_DFLT); 2295 if (!table) 2296 return; 2297 2298 restart: 2299 read_lock_bh(&table->tb6_lock); 2300 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { 2301 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 2302 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 2303 dst_hold(&rt->dst); 2304 read_unlock_bh(&table->tb6_lock); 2305 ip6_del_rt(rt); 2306 goto restart; 2307 } 2308 } 2309 read_unlock_bh(&table->tb6_lock); 2310 } 2311 2312 static void rtmsg_to_fib6_config(struct net *net, 2313 struct in6_rtmsg *rtmsg, 2314 struct fib6_config *cfg) 2315 { 2316 memset(cfg, 0, sizeof(*cfg)); 2317 2318 cfg->fc_table = RT6_TABLE_MAIN; 2319 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 2320 cfg->fc_metric = rtmsg->rtmsg_metric; 2321 cfg->fc_expires = rtmsg->rtmsg_info; 2322 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 2323 cfg->fc_src_len = rtmsg->rtmsg_src_len; 2324 cfg->fc_flags = rtmsg->rtmsg_flags; 2325 2326 cfg->fc_nlinfo.nl_net = net; 2327 2328 cfg->fc_dst = rtmsg->rtmsg_dst; 2329 cfg->fc_src = rtmsg->rtmsg_src; 2330 cfg->fc_gateway = rtmsg->rtmsg_gateway; 2331 } 2332 2333 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 2334 { 2335 struct fib6_config cfg; 2336 struct in6_rtmsg rtmsg; 2337 int err; 2338 2339 switch (cmd) { 2340 case SIOCADDRT: /* Add a route */ 2341 case SIOCDELRT: /* Delete a route */ 2342 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 2343 return -EPERM; 2344 err = copy_from_user(&rtmsg, arg, 2345 sizeof(struct in6_rtmsg)); 2346 if (err) 2347 return -EFAULT; 2348 2349 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 2350 2351 rtnl_lock(); 2352 switch (cmd) { 2353 case SIOCADDRT: 2354 err = ip6_route_add(&cfg); 2355 break; 2356 case SIOCDELRT: 2357 err = ip6_route_del(&cfg); 2358 break; 2359 default: 2360 err = -EINVAL; 2361 } 2362 rtnl_unlock(); 2363 2364 return err; 2365 } 2366 2367 return -EINVAL; 2368 } 2369 2370 /* 2371 * Drop the packet on the floor 2372 */ 2373 2374 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 2375 { 2376 int type; 2377 struct dst_entry *dst = skb_dst(skb); 2378 switch (ipstats_mib_noroutes) { 2379 case IPSTATS_MIB_INNOROUTES: 2380 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 2381 if (type == IPV6_ADDR_ANY) { 2382 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2383 IPSTATS_MIB_INADDRERRORS); 2384 break; 2385 } 2386 /* FALLTHROUGH */ 2387 case IPSTATS_MIB_OUTNOROUTES: 2388 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 2389 ipstats_mib_noroutes); 2390 break; 2391 } 2392 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 2393 kfree_skb(skb); 2394 return 0; 2395 } 2396 2397 static int ip6_pkt_discard(struct sk_buff *skb) 2398 { 2399 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 2400 } 2401 2402 static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) 2403 { 2404 skb->dev = skb_dst(skb)->dev; 2405 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 2406 } 2407 2408 static int ip6_pkt_prohibit(struct sk_buff *skb) 2409 { 2410 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 2411 } 2412 2413 static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) 2414 { 2415 skb->dev = skb_dst(skb)->dev; 2416 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 2417 } 2418 2419 /* 2420 * Allocate a dst for local (unicast / anycast) address. 2421 */ 2422 2423 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 2424 const struct in6_addr *addr, 2425 bool anycast) 2426 { 2427 struct net *net = dev_net(idev->dev); 2428 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 2429 DST_NOCOUNT, NULL); 2430 if (!rt) 2431 return ERR_PTR(-ENOMEM); 2432 2433 in6_dev_hold(idev); 2434 2435 rt->dst.flags |= DST_HOST; 2436 rt->dst.input = ip6_input; 2437 rt->dst.output = ip6_output; 2438 rt->rt6i_idev = idev; 2439 2440 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 2441 if (anycast) 2442 rt->rt6i_flags |= RTF_ANYCAST; 2443 else 2444 rt->rt6i_flags |= RTF_LOCAL; 2445 2446 rt->rt6i_gateway = *addr; 2447 rt->rt6i_dst.addr = *addr; 2448 rt->rt6i_dst.plen = 128; 2449 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 2450 2451 atomic_set(&rt->dst.__refcnt, 1); 2452 2453 return rt; 2454 } 2455 2456 int ip6_route_get_saddr(struct net *net, 2457 struct rt6_info *rt, 2458 const struct in6_addr *daddr, 2459 unsigned int prefs, 2460 struct in6_addr *saddr) 2461 { 2462 struct inet6_dev *idev = 2463 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; 2464 int err = 0; 2465 if (rt && rt->rt6i_prefsrc.plen) 2466 *saddr = rt->rt6i_prefsrc.addr; 2467 else 2468 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, 2469 daddr, prefs, saddr); 2470 return err; 2471 } 2472 2473 /* remove deleted ip from prefsrc entries */ 2474 struct arg_dev_net_ip { 2475 struct net_device *dev; 2476 struct net *net; 2477 struct in6_addr *addr; 2478 }; 2479 2480 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 2481 { 2482 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 2483 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 2484 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 2485 2486 if (((void *)rt->dst.dev == dev || !dev) && 2487 rt != net->ipv6.ip6_null_entry && 2488 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 2489 /* remove prefsrc entry */ 2490 rt->rt6i_prefsrc.plen = 0; 2491 } 2492 return 0; 2493 } 2494 2495 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 2496 { 2497 struct net *net = dev_net(ifp->idev->dev); 2498 struct arg_dev_net_ip adni = { 2499 .dev = ifp->idev->dev, 2500 .net = net, 2501 .addr = &ifp->addr, 2502 }; 2503 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 2504 } 2505 2506 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 2507 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 2508 2509 /* Remove routers and update dst entries when gateway turn into host. */ 2510 static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 2511 { 2512 struct in6_addr *gateway = (struct in6_addr *)arg; 2513 2514 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || 2515 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && 2516 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 2517 return -1; 2518 } 2519 return 0; 2520 } 2521 2522 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 2523 { 2524 fib6_clean_all(net, fib6_clean_tohost, gateway); 2525 } 2526 2527 struct arg_dev_net { 2528 struct net_device *dev; 2529 struct net *net; 2530 }; 2531 2532 static int fib6_ifdown(struct rt6_info *rt, void *arg) 2533 { 2534 const struct arg_dev_net *adn = arg; 2535 const struct net_device *dev = adn->dev; 2536 2537 if ((rt->dst.dev == dev || !dev) && 2538 rt != adn->net->ipv6.ip6_null_entry) 2539 return -1; 2540 2541 return 0; 2542 } 2543 2544 void rt6_ifdown(struct net *net, struct net_device *dev) 2545 { 2546 struct arg_dev_net adn = { 2547 .dev = dev, 2548 .net = net, 2549 }; 2550 2551 fib6_clean_all(net, fib6_ifdown, &adn); 2552 icmp6_clean_all(fib6_ifdown, &adn); 2553 rt6_uncached_list_flush_dev(net, dev); 2554 } 2555 2556 struct rt6_mtu_change_arg { 2557 struct net_device *dev; 2558 unsigned int mtu; 2559 }; 2560 2561 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 2562 { 2563 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2564 struct inet6_dev *idev; 2565 2566 /* In IPv6 pmtu discovery is not optional, 2567 so that RTAX_MTU lock cannot disable it. 2568 We still use this lock to block changes 2569 caused by addrconf/ndisc. 2570 */ 2571 2572 idev = __in6_dev_get(arg->dev); 2573 if (!idev) 2574 return 0; 2575 2576 /* For administrative MTU increase, there is no way to discover 2577 IPv6 PMTU increase, so PMTU increase should be updated here. 2578 Since RFC 1981 doesn't include administrative MTU increase 2579 update PMTU increase is a MUST. (i.e. jumbo frame) 2580 */ 2581 /* 2582 If new MTU is less than route PMTU, this new MTU will be the 2583 lowest MTU in the path, update the route PMTU to reflect PMTU 2584 decreases; if new MTU is greater than route PMTU, and the 2585 old MTU is the lowest MTU in the path, update the route PMTU 2586 to reflect the increase. In this case if the other nodes' MTU 2587 also have the lowest MTU, TOO BIG MESSAGE will be lead to 2588 PMTU discouvery. 2589 */ 2590 if (rt->dst.dev == arg->dev && 2591 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 2592 if (rt->rt6i_flags & RTF_CACHE) { 2593 /* For RTF_CACHE with rt6i_pmtu == 0 2594 * (i.e. a redirected route), 2595 * the metrics of its rt->dst.from has already 2596 * been updated. 2597 */ 2598 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) 2599 rt->rt6i_pmtu = arg->mtu; 2600 } else if (dst_mtu(&rt->dst) >= arg->mtu || 2601 (dst_mtu(&rt->dst) < arg->mtu && 2602 dst_mtu(&rt->dst) == idev->cnf.mtu6)) { 2603 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2604 } 2605 } 2606 return 0; 2607 } 2608 2609 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 2610 { 2611 struct rt6_mtu_change_arg arg = { 2612 .dev = dev, 2613 .mtu = mtu, 2614 }; 2615 2616 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 2617 } 2618 2619 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 2620 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 2621 [RTA_OIF] = { .type = NLA_U32 }, 2622 [RTA_IIF] = { .type = NLA_U32 }, 2623 [RTA_PRIORITY] = { .type = NLA_U32 }, 2624 [RTA_METRICS] = { .type = NLA_NESTED }, 2625 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 2626 [RTA_PREF] = { .type = NLA_U8 }, 2627 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 2628 [RTA_ENCAP] = { .type = NLA_NESTED }, 2629 }; 2630 2631 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 2632 struct fib6_config *cfg) 2633 { 2634 struct rtmsg *rtm; 2635 struct nlattr *tb[RTA_MAX+1]; 2636 unsigned int pref; 2637 int err; 2638 2639 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2640 if (err < 0) 2641 goto errout; 2642 2643 err = -EINVAL; 2644 rtm = nlmsg_data(nlh); 2645 memset(cfg, 0, sizeof(*cfg)); 2646 2647 cfg->fc_table = rtm->rtm_table; 2648 cfg->fc_dst_len = rtm->rtm_dst_len; 2649 cfg->fc_src_len = rtm->rtm_src_len; 2650 cfg->fc_flags = RTF_UP; 2651 cfg->fc_protocol = rtm->rtm_protocol; 2652 cfg->fc_type = rtm->rtm_type; 2653 2654 if (rtm->rtm_type == RTN_UNREACHABLE || 2655 rtm->rtm_type == RTN_BLACKHOLE || 2656 rtm->rtm_type == RTN_PROHIBIT || 2657 rtm->rtm_type == RTN_THROW) 2658 cfg->fc_flags |= RTF_REJECT; 2659 2660 if (rtm->rtm_type == RTN_LOCAL) 2661 cfg->fc_flags |= RTF_LOCAL; 2662 2663 if (rtm->rtm_flags & RTM_F_CLONED) 2664 cfg->fc_flags |= RTF_CACHE; 2665 2666 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 2667 cfg->fc_nlinfo.nlh = nlh; 2668 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2669 2670 if (tb[RTA_GATEWAY]) { 2671 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 2672 cfg->fc_flags |= RTF_GATEWAY; 2673 } 2674 2675 if (tb[RTA_DST]) { 2676 int plen = (rtm->rtm_dst_len + 7) >> 3; 2677 2678 if (nla_len(tb[RTA_DST]) < plen) 2679 goto errout; 2680 2681 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 2682 } 2683 2684 if (tb[RTA_SRC]) { 2685 int plen = (rtm->rtm_src_len + 7) >> 3; 2686 2687 if (nla_len(tb[RTA_SRC]) < plen) 2688 goto errout; 2689 2690 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 2691 } 2692 2693 if (tb[RTA_PREFSRC]) 2694 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 2695 2696 if (tb[RTA_OIF]) 2697 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 2698 2699 if (tb[RTA_PRIORITY]) 2700 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 2701 2702 if (tb[RTA_METRICS]) { 2703 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 2704 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 2705 } 2706 2707 if (tb[RTA_TABLE]) 2708 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 2709 2710 if (tb[RTA_MULTIPATH]) { 2711 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 2712 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 2713 } 2714 2715 if (tb[RTA_PREF]) { 2716 pref = nla_get_u8(tb[RTA_PREF]); 2717 if (pref != ICMPV6_ROUTER_PREF_LOW && 2718 pref != ICMPV6_ROUTER_PREF_HIGH) 2719 pref = ICMPV6_ROUTER_PREF_MEDIUM; 2720 cfg->fc_flags |= RTF_PREF(pref); 2721 } 2722 2723 if (tb[RTA_ENCAP]) 2724 cfg->fc_encap = tb[RTA_ENCAP]; 2725 2726 if (tb[RTA_ENCAP_TYPE]) 2727 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 2728 2729 err = 0; 2730 errout: 2731 return err; 2732 } 2733 2734 static int ip6_route_multipath(struct fib6_config *cfg, int add) 2735 { 2736 struct fib6_config r_cfg; 2737 struct rtnexthop *rtnh; 2738 int remaining; 2739 int attrlen; 2740 int err = 0, last_err = 0; 2741 2742 remaining = cfg->fc_mp_len; 2743 beginning: 2744 rtnh = (struct rtnexthop *)cfg->fc_mp; 2745 2746 /* Parse a Multipath Entry */ 2747 while (rtnh_ok(rtnh, remaining)) { 2748 memcpy(&r_cfg, cfg, sizeof(*cfg)); 2749 if (rtnh->rtnh_ifindex) 2750 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 2751 2752 attrlen = rtnh_attrlen(rtnh); 2753 if (attrlen > 0) { 2754 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 2755 2756 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 2757 if (nla) { 2758 r_cfg.fc_gateway = nla_get_in6_addr(nla); 2759 r_cfg.fc_flags |= RTF_GATEWAY; 2760 } 2761 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 2762 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 2763 if (nla) 2764 r_cfg.fc_encap_type = nla_get_u16(nla); 2765 } 2766 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg); 2767 if (err) { 2768 last_err = err; 2769 /* If we are trying to remove a route, do not stop the 2770 * loop when ip6_route_del() fails (because next hop is 2771 * already gone), we should try to remove all next hops. 2772 */ 2773 if (add) { 2774 /* If add fails, we should try to delete all 2775 * next hops that have been already added. 2776 */ 2777 add = 0; 2778 remaining = cfg->fc_mp_len - remaining; 2779 goto beginning; 2780 } 2781 } 2782 /* Because each route is added like a single route we remove 2783 * these flags after the first nexthop: if there is a collision, 2784 * we have already failed to add the first nexthop: 2785 * fib6_add_rt2node() has rejected it; when replacing, old 2786 * nexthops have been replaced by first new, the rest should 2787 * be added to it. 2788 */ 2789 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 2790 NLM_F_REPLACE); 2791 rtnh = rtnh_next(rtnh, &remaining); 2792 } 2793 2794 return last_err; 2795 } 2796 2797 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) 2798 { 2799 struct fib6_config cfg; 2800 int err; 2801 2802 err = rtm_to_fib6_config(skb, nlh, &cfg); 2803 if (err < 0) 2804 return err; 2805 2806 if (cfg.fc_mp) 2807 return ip6_route_multipath(&cfg, 0); 2808 else 2809 return ip6_route_del(&cfg); 2810 } 2811 2812 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) 2813 { 2814 struct fib6_config cfg; 2815 int err; 2816 2817 err = rtm_to_fib6_config(skb, nlh, &cfg); 2818 if (err < 0) 2819 return err; 2820 2821 if (cfg.fc_mp) 2822 return ip6_route_multipath(&cfg, 1); 2823 else 2824 return ip6_route_add(&cfg); 2825 } 2826 2827 static inline size_t rt6_nlmsg_size(struct rt6_info *rt) 2828 { 2829 return NLMSG_ALIGN(sizeof(struct rtmsg)) 2830 + nla_total_size(16) /* RTA_SRC */ 2831 + nla_total_size(16) /* RTA_DST */ 2832 + nla_total_size(16) /* RTA_GATEWAY */ 2833 + nla_total_size(16) /* RTA_PREFSRC */ 2834 + nla_total_size(4) /* RTA_TABLE */ 2835 + nla_total_size(4) /* RTA_IIF */ 2836 + nla_total_size(4) /* RTA_OIF */ 2837 + nla_total_size(4) /* RTA_PRIORITY */ 2838 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 2839 + nla_total_size(sizeof(struct rta_cacheinfo)) 2840 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 2841 + nla_total_size(1) /* RTA_PREF */ 2842 + lwtunnel_get_encap_size(rt->dst.lwtstate); 2843 } 2844 2845 static int rt6_fill_node(struct net *net, 2846 struct sk_buff *skb, struct rt6_info *rt, 2847 struct in6_addr *dst, struct in6_addr *src, 2848 int iif, int type, u32 portid, u32 seq, 2849 int prefix, int nowait, unsigned int flags) 2850 { 2851 u32 metrics[RTAX_MAX]; 2852 struct rtmsg *rtm; 2853 struct nlmsghdr *nlh; 2854 long expires; 2855 u32 table; 2856 2857 if (prefix) { /* user wants prefix routes only */ 2858 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { 2859 /* success since this is not a prefix route */ 2860 return 1; 2861 } 2862 } 2863 2864 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 2865 if (!nlh) 2866 return -EMSGSIZE; 2867 2868 rtm = nlmsg_data(nlh); 2869 rtm->rtm_family = AF_INET6; 2870 rtm->rtm_dst_len = rt->rt6i_dst.plen; 2871 rtm->rtm_src_len = rt->rt6i_src.plen; 2872 rtm->rtm_tos = 0; 2873 if (rt->rt6i_table) 2874 table = rt->rt6i_table->tb6_id; 2875 else 2876 table = RT6_TABLE_UNSPEC; 2877 rtm->rtm_table = table; 2878 if (nla_put_u32(skb, RTA_TABLE, table)) 2879 goto nla_put_failure; 2880 if (rt->rt6i_flags & RTF_REJECT) { 2881 switch (rt->dst.error) { 2882 case -EINVAL: 2883 rtm->rtm_type = RTN_BLACKHOLE; 2884 break; 2885 case -EACCES: 2886 rtm->rtm_type = RTN_PROHIBIT; 2887 break; 2888 case -EAGAIN: 2889 rtm->rtm_type = RTN_THROW; 2890 break; 2891 default: 2892 rtm->rtm_type = RTN_UNREACHABLE; 2893 break; 2894 } 2895 } 2896 else if (rt->rt6i_flags & RTF_LOCAL) 2897 rtm->rtm_type = RTN_LOCAL; 2898 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 2899 rtm->rtm_type = RTN_LOCAL; 2900 else 2901 rtm->rtm_type = RTN_UNICAST; 2902 rtm->rtm_flags = 0; 2903 if (!netif_carrier_ok(rt->dst.dev)) { 2904 rtm->rtm_flags |= RTNH_F_LINKDOWN; 2905 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 2906 rtm->rtm_flags |= RTNH_F_DEAD; 2907 } 2908 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 2909 rtm->rtm_protocol = rt->rt6i_protocol; 2910 if (rt->rt6i_flags & RTF_DYNAMIC) 2911 rtm->rtm_protocol = RTPROT_REDIRECT; 2912 else if (rt->rt6i_flags & RTF_ADDRCONF) { 2913 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) 2914 rtm->rtm_protocol = RTPROT_RA; 2915 else 2916 rtm->rtm_protocol = RTPROT_KERNEL; 2917 } 2918 2919 if (rt->rt6i_flags & RTF_CACHE) 2920 rtm->rtm_flags |= RTM_F_CLONED; 2921 2922 if (dst) { 2923 if (nla_put_in6_addr(skb, RTA_DST, dst)) 2924 goto nla_put_failure; 2925 rtm->rtm_dst_len = 128; 2926 } else if (rtm->rtm_dst_len) 2927 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 2928 goto nla_put_failure; 2929 #ifdef CONFIG_IPV6_SUBTREES 2930 if (src) { 2931 if (nla_put_in6_addr(skb, RTA_SRC, src)) 2932 goto nla_put_failure; 2933 rtm->rtm_src_len = 128; 2934 } else if (rtm->rtm_src_len && 2935 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 2936 goto nla_put_failure; 2937 #endif 2938 if (iif) { 2939 #ifdef CONFIG_IPV6_MROUTE 2940 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 2941 int err = ip6mr_get_route(net, skb, rtm, nowait); 2942 if (err <= 0) { 2943 if (!nowait) { 2944 if (err == 0) 2945 return 0; 2946 goto nla_put_failure; 2947 } else { 2948 if (err == -EMSGSIZE) 2949 goto nla_put_failure; 2950 } 2951 } 2952 } else 2953 #endif 2954 if (nla_put_u32(skb, RTA_IIF, iif)) 2955 goto nla_put_failure; 2956 } else if (dst) { 2957 struct in6_addr saddr_buf; 2958 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 2959 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 2960 goto nla_put_failure; 2961 } 2962 2963 if (rt->rt6i_prefsrc.plen) { 2964 struct in6_addr saddr_buf; 2965 saddr_buf = rt->rt6i_prefsrc.addr; 2966 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 2967 goto nla_put_failure; 2968 } 2969 2970 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 2971 if (rt->rt6i_pmtu) 2972 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; 2973 if (rtnetlink_put_metrics(skb, metrics) < 0) 2974 goto nla_put_failure; 2975 2976 if (rt->rt6i_flags & RTF_GATEWAY) { 2977 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 2978 goto nla_put_failure; 2979 } 2980 2981 if (rt->dst.dev && 2982 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 2983 goto nla_put_failure; 2984 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 2985 goto nla_put_failure; 2986 2987 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 2988 2989 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 2990 goto nla_put_failure; 2991 2992 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 2993 goto nla_put_failure; 2994 2995 lwtunnel_fill_encap(skb, rt->dst.lwtstate); 2996 2997 nlmsg_end(skb, nlh); 2998 return 0; 2999 3000 nla_put_failure: 3001 nlmsg_cancel(skb, nlh); 3002 return -EMSGSIZE; 3003 } 3004 3005 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 3006 { 3007 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 3008 int prefix; 3009 3010 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 3011 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 3012 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; 3013 } else 3014 prefix = 0; 3015 3016 return rt6_fill_node(arg->net, 3017 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 3018 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 3019 prefix, 0, NLM_F_MULTI); 3020 } 3021 3022 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) 3023 { 3024 struct net *net = sock_net(in_skb->sk); 3025 struct nlattr *tb[RTA_MAX+1]; 3026 struct rt6_info *rt; 3027 struct sk_buff *skb; 3028 struct rtmsg *rtm; 3029 struct flowi6 fl6; 3030 int err, iif = 0, oif = 0; 3031 3032 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 3033 if (err < 0) 3034 goto errout; 3035 3036 err = -EINVAL; 3037 memset(&fl6, 0, sizeof(fl6)); 3038 3039 if (tb[RTA_SRC]) { 3040 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 3041 goto errout; 3042 3043 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 3044 } 3045 3046 if (tb[RTA_DST]) { 3047 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 3048 goto errout; 3049 3050 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 3051 } 3052 3053 if (tb[RTA_IIF]) 3054 iif = nla_get_u32(tb[RTA_IIF]); 3055 3056 if (tb[RTA_OIF]) 3057 oif = nla_get_u32(tb[RTA_OIF]); 3058 3059 if (tb[RTA_MARK]) 3060 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 3061 3062 if (iif) { 3063 struct net_device *dev; 3064 int flags = 0; 3065 3066 dev = __dev_get_by_index(net, iif); 3067 if (!dev) { 3068 err = -ENODEV; 3069 goto errout; 3070 } 3071 3072 fl6.flowi6_iif = iif; 3073 3074 if (!ipv6_addr_any(&fl6.saddr)) 3075 flags |= RT6_LOOKUP_F_HAS_SADDR; 3076 3077 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6, 3078 flags); 3079 } else { 3080 fl6.flowi6_oif = oif; 3081 3082 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); 3083 } 3084 3085 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 3086 if (!skb) { 3087 ip6_rt_put(rt); 3088 err = -ENOBUFS; 3089 goto errout; 3090 } 3091 3092 /* Reserve room for dummy headers, this skb can pass 3093 through good chunk of routing engine. 3094 */ 3095 skb_reset_mac_header(skb); 3096 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); 3097 3098 skb_dst_set(skb, &rt->dst); 3099 3100 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 3101 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 3102 nlh->nlmsg_seq, 0, 0, 0); 3103 if (err < 0) { 3104 kfree_skb(skb); 3105 goto errout; 3106 } 3107 3108 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 3109 errout: 3110 return err; 3111 } 3112 3113 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 3114 { 3115 struct sk_buff *skb; 3116 struct net *net = info->nl_net; 3117 u32 seq; 3118 int err; 3119 3120 err = -ENOBUFS; 3121 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 3122 3123 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 3124 if (!skb) 3125 goto errout; 3126 3127 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 3128 event, info->portid, seq, 0, 0, 0); 3129 if (err < 0) { 3130 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 3131 WARN_ON(err == -EMSGSIZE); 3132 kfree_skb(skb); 3133 goto errout; 3134 } 3135 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 3136 info->nlh, gfp_any()); 3137 return; 3138 errout: 3139 if (err < 0) 3140 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 3141 } 3142 3143 static int ip6_route_dev_notify(struct notifier_block *this, 3144 unsigned long event, void *ptr) 3145 { 3146 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 3147 struct net *net = dev_net(dev); 3148 3149 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) { 3150 net->ipv6.ip6_null_entry->dst.dev = dev; 3151 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 3152 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3153 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 3154 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 3155 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 3156 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 3157 #endif 3158 } 3159 3160 return NOTIFY_OK; 3161 } 3162 3163 /* 3164 * /proc 3165 */ 3166 3167 #ifdef CONFIG_PROC_FS 3168 3169 static const struct file_operations ipv6_route_proc_fops = { 3170 .owner = THIS_MODULE, 3171 .open = ipv6_route_open, 3172 .read = seq_read, 3173 .llseek = seq_lseek, 3174 .release = seq_release_net, 3175 }; 3176 3177 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 3178 { 3179 struct net *net = (struct net *)seq->private; 3180 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 3181 net->ipv6.rt6_stats->fib_nodes, 3182 net->ipv6.rt6_stats->fib_route_nodes, 3183 net->ipv6.rt6_stats->fib_rt_alloc, 3184 net->ipv6.rt6_stats->fib_rt_entries, 3185 net->ipv6.rt6_stats->fib_rt_cache, 3186 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 3187 net->ipv6.rt6_stats->fib_discarded_routes); 3188 3189 return 0; 3190 } 3191 3192 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 3193 { 3194 return single_open_net(inode, file, rt6_stats_seq_show); 3195 } 3196 3197 static const struct file_operations rt6_stats_seq_fops = { 3198 .owner = THIS_MODULE, 3199 .open = rt6_stats_seq_open, 3200 .read = seq_read, 3201 .llseek = seq_lseek, 3202 .release = single_release_net, 3203 }; 3204 #endif /* CONFIG_PROC_FS */ 3205 3206 #ifdef CONFIG_SYSCTL 3207 3208 static 3209 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 3210 void __user *buffer, size_t *lenp, loff_t *ppos) 3211 { 3212 struct net *net; 3213 int delay; 3214 if (!write) 3215 return -EINVAL; 3216 3217 net = (struct net *)ctl->extra1; 3218 delay = net->ipv6.sysctl.flush_delay; 3219 proc_dointvec(ctl, write, buffer, lenp, ppos); 3220 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 3221 return 0; 3222 } 3223 3224 struct ctl_table ipv6_route_table_template[] = { 3225 { 3226 .procname = "flush", 3227 .data = &init_net.ipv6.sysctl.flush_delay, 3228 .maxlen = sizeof(int), 3229 .mode = 0200, 3230 .proc_handler = ipv6_sysctl_rtcache_flush 3231 }, 3232 { 3233 .procname = "gc_thresh", 3234 .data = &ip6_dst_ops_template.gc_thresh, 3235 .maxlen = sizeof(int), 3236 .mode = 0644, 3237 .proc_handler = proc_dointvec, 3238 }, 3239 { 3240 .procname = "max_size", 3241 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 3242 .maxlen = sizeof(int), 3243 .mode = 0644, 3244 .proc_handler = proc_dointvec, 3245 }, 3246 { 3247 .procname = "gc_min_interval", 3248 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3249 .maxlen = sizeof(int), 3250 .mode = 0644, 3251 .proc_handler = proc_dointvec_jiffies, 3252 }, 3253 { 3254 .procname = "gc_timeout", 3255 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 3256 .maxlen = sizeof(int), 3257 .mode = 0644, 3258 .proc_handler = proc_dointvec_jiffies, 3259 }, 3260 { 3261 .procname = "gc_interval", 3262 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 3263 .maxlen = sizeof(int), 3264 .mode = 0644, 3265 .proc_handler = proc_dointvec_jiffies, 3266 }, 3267 { 3268 .procname = "gc_elasticity", 3269 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 3270 .maxlen = sizeof(int), 3271 .mode = 0644, 3272 .proc_handler = proc_dointvec, 3273 }, 3274 { 3275 .procname = "mtu_expires", 3276 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 3277 .maxlen = sizeof(int), 3278 .mode = 0644, 3279 .proc_handler = proc_dointvec_jiffies, 3280 }, 3281 { 3282 .procname = "min_adv_mss", 3283 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 3284 .maxlen = sizeof(int), 3285 .mode = 0644, 3286 .proc_handler = proc_dointvec, 3287 }, 3288 { 3289 .procname = "gc_min_interval_ms", 3290 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 3291 .maxlen = sizeof(int), 3292 .mode = 0644, 3293 .proc_handler = proc_dointvec_ms_jiffies, 3294 }, 3295 { } 3296 }; 3297 3298 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 3299 { 3300 struct ctl_table *table; 3301 3302 table = kmemdup(ipv6_route_table_template, 3303 sizeof(ipv6_route_table_template), 3304 GFP_KERNEL); 3305 3306 if (table) { 3307 table[0].data = &net->ipv6.sysctl.flush_delay; 3308 table[0].extra1 = net; 3309 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 3310 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 3311 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3312 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 3313 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 3314 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 3315 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 3316 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 3317 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 3318 3319 /* Don't export sysctls to unprivileged users */ 3320 if (net->user_ns != &init_user_ns) 3321 table[0].procname = NULL; 3322 } 3323 3324 return table; 3325 } 3326 #endif 3327 3328 static int __net_init ip6_route_net_init(struct net *net) 3329 { 3330 int ret = -ENOMEM; 3331 3332 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 3333 sizeof(net->ipv6.ip6_dst_ops)); 3334 3335 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 3336 goto out_ip6_dst_ops; 3337 3338 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 3339 sizeof(*net->ipv6.ip6_null_entry), 3340 GFP_KERNEL); 3341 if (!net->ipv6.ip6_null_entry) 3342 goto out_ip6_dst_entries; 3343 net->ipv6.ip6_null_entry->dst.path = 3344 (struct dst_entry *)net->ipv6.ip6_null_entry; 3345 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3346 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 3347 ip6_template_metrics, true); 3348 3349 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3350 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 3351 sizeof(*net->ipv6.ip6_prohibit_entry), 3352 GFP_KERNEL); 3353 if (!net->ipv6.ip6_prohibit_entry) 3354 goto out_ip6_null_entry; 3355 net->ipv6.ip6_prohibit_entry->dst.path = 3356 (struct dst_entry *)net->ipv6.ip6_prohibit_entry; 3357 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3358 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 3359 ip6_template_metrics, true); 3360 3361 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 3362 sizeof(*net->ipv6.ip6_blk_hole_entry), 3363 GFP_KERNEL); 3364 if (!net->ipv6.ip6_blk_hole_entry) 3365 goto out_ip6_prohibit_entry; 3366 net->ipv6.ip6_blk_hole_entry->dst.path = 3367 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; 3368 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 3369 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 3370 ip6_template_metrics, true); 3371 #endif 3372 3373 net->ipv6.sysctl.flush_delay = 0; 3374 net->ipv6.sysctl.ip6_rt_max_size = 4096; 3375 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 3376 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 3377 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 3378 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 3379 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 3380 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 3381 3382 net->ipv6.ip6_rt_gc_expire = 30*HZ; 3383 3384 ret = 0; 3385 out: 3386 return ret; 3387 3388 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3389 out_ip6_prohibit_entry: 3390 kfree(net->ipv6.ip6_prohibit_entry); 3391 out_ip6_null_entry: 3392 kfree(net->ipv6.ip6_null_entry); 3393 #endif 3394 out_ip6_dst_entries: 3395 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3396 out_ip6_dst_ops: 3397 goto out; 3398 } 3399 3400 static void __net_exit ip6_route_net_exit(struct net *net) 3401 { 3402 kfree(net->ipv6.ip6_null_entry); 3403 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3404 kfree(net->ipv6.ip6_prohibit_entry); 3405 kfree(net->ipv6.ip6_blk_hole_entry); 3406 #endif 3407 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 3408 } 3409 3410 static int __net_init ip6_route_net_init_late(struct net *net) 3411 { 3412 #ifdef CONFIG_PROC_FS 3413 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 3414 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 3415 #endif 3416 return 0; 3417 } 3418 3419 static void __net_exit ip6_route_net_exit_late(struct net *net) 3420 { 3421 #ifdef CONFIG_PROC_FS 3422 remove_proc_entry("ipv6_route", net->proc_net); 3423 remove_proc_entry("rt6_stats", net->proc_net); 3424 #endif 3425 } 3426 3427 static struct pernet_operations ip6_route_net_ops = { 3428 .init = ip6_route_net_init, 3429 .exit = ip6_route_net_exit, 3430 }; 3431 3432 static int __net_init ipv6_inetpeer_init(struct net *net) 3433 { 3434 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 3435 3436 if (!bp) 3437 return -ENOMEM; 3438 inet_peer_base_init(bp); 3439 net->ipv6.peers = bp; 3440 return 0; 3441 } 3442 3443 static void __net_exit ipv6_inetpeer_exit(struct net *net) 3444 { 3445 struct inet_peer_base *bp = net->ipv6.peers; 3446 3447 net->ipv6.peers = NULL; 3448 inetpeer_invalidate_tree(bp); 3449 kfree(bp); 3450 } 3451 3452 static struct pernet_operations ipv6_inetpeer_ops = { 3453 .init = ipv6_inetpeer_init, 3454 .exit = ipv6_inetpeer_exit, 3455 }; 3456 3457 static struct pernet_operations ip6_route_net_late_ops = { 3458 .init = ip6_route_net_init_late, 3459 .exit = ip6_route_net_exit_late, 3460 }; 3461 3462 static struct notifier_block ip6_route_dev_notifier = { 3463 .notifier_call = ip6_route_dev_notify, 3464 .priority = 0, 3465 }; 3466 3467 int __init ip6_route_init(void) 3468 { 3469 int ret; 3470 int cpu; 3471 3472 ret = -ENOMEM; 3473 ip6_dst_ops_template.kmem_cachep = 3474 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 3475 SLAB_HWCACHE_ALIGN, NULL); 3476 if (!ip6_dst_ops_template.kmem_cachep) 3477 goto out; 3478 3479 ret = dst_entries_init(&ip6_dst_blackhole_ops); 3480 if (ret) 3481 goto out_kmem_cache; 3482 3483 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 3484 if (ret) 3485 goto out_dst_entries; 3486 3487 ret = register_pernet_subsys(&ip6_route_net_ops); 3488 if (ret) 3489 goto out_register_inetpeer; 3490 3491 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 3492 3493 /* Registering of the loopback is done before this portion of code, 3494 * the loopback reference in rt6_info will not be taken, do it 3495 * manually for init_net */ 3496 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 3497 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3498 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 3499 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 3500 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3501 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 3502 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 3503 #endif 3504 ret = fib6_init(); 3505 if (ret) 3506 goto out_register_subsys; 3507 3508 ret = xfrm6_init(); 3509 if (ret) 3510 goto out_fib6_init; 3511 3512 ret = fib6_rules_init(); 3513 if (ret) 3514 goto xfrm6_init; 3515 3516 ret = register_pernet_subsys(&ip6_route_net_late_ops); 3517 if (ret) 3518 goto fib6_rules_init; 3519 3520 ret = -ENOBUFS; 3521 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || 3522 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || 3523 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) 3524 goto out_register_late_subsys; 3525 3526 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 3527 if (ret) 3528 goto out_register_late_subsys; 3529 3530 for_each_possible_cpu(cpu) { 3531 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 3532 3533 INIT_LIST_HEAD(&ul->head); 3534 spin_lock_init(&ul->lock); 3535 } 3536 3537 out: 3538 return ret; 3539 3540 out_register_late_subsys: 3541 unregister_pernet_subsys(&ip6_route_net_late_ops); 3542 fib6_rules_init: 3543 fib6_rules_cleanup(); 3544 xfrm6_init: 3545 xfrm6_fini(); 3546 out_fib6_init: 3547 fib6_gc_cleanup(); 3548 out_register_subsys: 3549 unregister_pernet_subsys(&ip6_route_net_ops); 3550 out_register_inetpeer: 3551 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3552 out_dst_entries: 3553 dst_entries_destroy(&ip6_dst_blackhole_ops); 3554 out_kmem_cache: 3555 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3556 goto out; 3557 } 3558 3559 void ip6_route_cleanup(void) 3560 { 3561 unregister_netdevice_notifier(&ip6_route_dev_notifier); 3562 unregister_pernet_subsys(&ip6_route_net_late_ops); 3563 fib6_rules_cleanup(); 3564 xfrm6_fini(); 3565 fib6_gc_cleanup(); 3566 unregister_pernet_subsys(&ipv6_inetpeer_ops); 3567 unregister_pernet_subsys(&ip6_route_net_ops); 3568 dst_entries_destroy(&ip6_dst_blackhole_ops); 3569 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 3570 } 3571