1 /* 2 * Linux INET6 implementation 3 * FIB front-end. 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * 8 * This program is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License 10 * as published by the Free Software Foundation; either version 11 * 2 of the License, or (at your option) any later version. 12 */ 13 14 /* Changes: 15 * 16 * YOSHIFUJI Hideaki @USAGI 17 * reworked default router selection. 18 * - respect outgoing interface 19 * - select from (probably) reachable routers (i.e. 20 * routers in REACHABLE, STALE, DELAY or PROBE states). 21 * - always select the same router if it is (probably) 22 * reachable. otherwise, round-robin the list. 23 * Ville Nuorvala 24 * Fixed routing subtrees. 25 */ 26 27 #define pr_fmt(fmt) "IPv6: " fmt 28 29 #include <linux/capability.h> 30 #include <linux/errno.h> 31 #include <linux/export.h> 32 #include <linux/types.h> 33 #include <linux/times.h> 34 #include <linux/socket.h> 35 #include <linux/sockios.h> 36 #include <linux/net.h> 37 #include <linux/route.h> 38 #include <linux/netdevice.h> 39 #include <linux/in6.h> 40 #include <linux/mroute6.h> 41 #include <linux/init.h> 42 #include <linux/if_arp.h> 43 #include <linux/proc_fs.h> 44 #include <linux/seq_file.h> 45 #include <linux/nsproxy.h> 46 #include <linux/slab.h> 47 #include <linux/jhash.h> 48 #include <net/net_namespace.h> 49 #include <net/snmp.h> 50 #include <net/ipv6.h> 51 #include <net/ip6_fib.h> 52 #include <net/ip6_route.h> 53 #include <net/ndisc.h> 54 #include <net/addrconf.h> 55 #include <net/tcp.h> 56 #include <linux/rtnetlink.h> 57 #include <net/dst.h> 58 #include <net/dst_metadata.h> 59 #include <net/xfrm.h> 60 #include <net/netevent.h> 61 #include <net/netlink.h> 62 #include <net/nexthop.h> 63 #include <net/lwtunnel.h> 64 #include <net/ip_tunnels.h> 65 #include <net/l3mdev.h> 66 #include <trace/events/fib6.h> 67 68 #include <linux/uaccess.h> 69 70 #ifdef CONFIG_SYSCTL 71 #include <linux/sysctl.h> 72 #endif 73 74 enum rt6_nud_state { 75 RT6_NUD_FAIL_HARD = -3, 76 RT6_NUD_FAIL_PROBE = -2, 77 RT6_NUD_FAIL_DO_RR = -1, 78 RT6_NUD_SUCCEED = 1 79 }; 80 81 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); 82 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 83 static unsigned int ip6_default_advmss(const struct dst_entry *dst); 84 static unsigned int ip6_mtu(const struct dst_entry *dst); 85 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 86 static void ip6_dst_destroy(struct dst_entry *); 87 static void ip6_dst_ifdown(struct dst_entry *, 88 struct net_device *dev, int how); 89 static int ip6_dst_gc(struct dst_ops *ops); 90 91 static int ip6_pkt_discard(struct sk_buff *skb); 92 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 93 static int ip6_pkt_prohibit(struct sk_buff *skb); 94 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); 95 static void ip6_link_failure(struct sk_buff *skb); 96 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 97 struct sk_buff *skb, u32 mtu); 98 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 99 struct sk_buff *skb); 100 static void rt6_dst_from_metrics_check(struct rt6_info *rt); 101 static int rt6_score_route(struct rt6_info *rt, int oif, int strict); 102 static size_t rt6_nlmsg_size(struct rt6_info *rt); 103 static int rt6_fill_node(struct net *net, 104 struct sk_buff *skb, struct rt6_info *rt, 105 struct in6_addr *dst, struct in6_addr *src, 106 int iif, int type, u32 portid, u32 seq, 107 unsigned int flags); 108 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, 109 struct in6_addr *daddr, 110 struct in6_addr *saddr); 111 112 #ifdef CONFIG_IPV6_ROUTE_INFO 113 static struct rt6_info *rt6_add_route_info(struct net *net, 114 const struct in6_addr *prefix, int prefixlen, 115 const struct in6_addr *gwaddr, 116 struct net_device *dev, 117 unsigned int pref); 118 static struct rt6_info *rt6_get_route_info(struct net *net, 119 const struct in6_addr *prefix, int prefixlen, 120 const struct in6_addr *gwaddr, 121 struct net_device *dev); 122 #endif 123 124 struct uncached_list { 125 spinlock_t lock; 126 struct list_head head; 127 }; 128 129 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 130 131 static void rt6_uncached_list_add(struct rt6_info *rt) 132 { 133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 134 135 rt->rt6i_uncached_list = ul; 136 137 spin_lock_bh(&ul->lock); 138 list_add_tail(&rt->rt6i_uncached, &ul->head); 139 spin_unlock_bh(&ul->lock); 140 } 141 142 static void rt6_uncached_list_del(struct rt6_info *rt) 143 { 144 if (!list_empty(&rt->rt6i_uncached)) { 145 struct uncached_list *ul = rt->rt6i_uncached_list; 146 struct net *net = dev_net(rt->dst.dev); 147 148 spin_lock_bh(&ul->lock); 149 list_del(&rt->rt6i_uncached); 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); 151 spin_unlock_bh(&ul->lock); 152 } 153 } 154 155 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 156 { 157 struct net_device *loopback_dev = net->loopback_dev; 158 int cpu; 159 160 if (dev == loopback_dev) 161 return; 162 163 for_each_possible_cpu(cpu) { 164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 165 struct rt6_info *rt; 166 167 spin_lock_bh(&ul->lock); 168 list_for_each_entry(rt, &ul->head, rt6i_uncached) { 169 struct inet6_dev *rt_idev = rt->rt6i_idev; 170 struct net_device *rt_dev = rt->dst.dev; 171 172 if (rt_idev->dev == dev) { 173 rt->rt6i_idev = in6_dev_get(loopback_dev); 174 in6_dev_put(rt_idev); 175 } 176 177 if (rt_dev == dev) { 178 rt->dst.dev = loopback_dev; 179 dev_hold(rt->dst.dev); 180 dev_put(rt_dev); 181 } 182 } 183 spin_unlock_bh(&ul->lock); 184 } 185 } 186 187 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) 188 { 189 return dst_metrics_write_ptr(&rt->from->dst); 190 } 191 192 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) 193 { 194 struct rt6_info *rt = (struct rt6_info *)dst; 195 196 if (rt->rt6i_flags & RTF_PCPU) 197 return rt6_pcpu_cow_metrics(rt); 198 else if (rt->rt6i_flags & RTF_CACHE) 199 return NULL; 200 else 201 return dst_cow_metrics_generic(dst, old); 202 } 203 204 static inline const void *choose_neigh_daddr(struct rt6_info *rt, 205 struct sk_buff *skb, 206 const void *daddr) 207 { 208 struct in6_addr *p = &rt->rt6i_gateway; 209 210 if (!ipv6_addr_any(p)) 211 return (const void *) p; 212 else if (skb) 213 return &ipv6_hdr(skb)->daddr; 214 return daddr; 215 } 216 217 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, 218 struct sk_buff *skb, 219 const void *daddr) 220 { 221 struct rt6_info *rt = (struct rt6_info *) dst; 222 struct neighbour *n; 223 224 daddr = choose_neigh_daddr(rt, skb, daddr); 225 n = __ipv6_neigh_lookup(dst->dev, daddr); 226 if (n) 227 return n; 228 return neigh_create(&nd_tbl, daddr, dst->dev); 229 } 230 231 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) 232 { 233 struct net_device *dev = dst->dev; 234 struct rt6_info *rt = (struct rt6_info *)dst; 235 236 daddr = choose_neigh_daddr(rt, NULL, daddr); 237 if (!daddr) 238 return; 239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 240 return; 241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) 242 return; 243 __ipv6_confirm_neigh(dev, daddr); 244 } 245 246 static struct dst_ops ip6_dst_ops_template = { 247 .family = AF_INET6, 248 .gc = ip6_dst_gc, 249 .gc_thresh = 1024, 250 .check = ip6_dst_check, 251 .default_advmss = ip6_default_advmss, 252 .mtu = ip6_mtu, 253 .cow_metrics = ipv6_cow_metrics, 254 .destroy = ip6_dst_destroy, 255 .ifdown = ip6_dst_ifdown, 256 .negative_advice = ip6_negative_advice, 257 .link_failure = ip6_link_failure, 258 .update_pmtu = ip6_rt_update_pmtu, 259 .redirect = rt6_do_redirect, 260 .local_out = __ip6_local_out, 261 .neigh_lookup = ip6_neigh_lookup, 262 .confirm_neigh = ip6_confirm_neigh, 263 }; 264 265 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 266 { 267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 268 269 return mtu ? : dst->dev->mtu; 270 } 271 272 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 273 struct sk_buff *skb, u32 mtu) 274 { 275 } 276 277 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 278 struct sk_buff *skb) 279 { 280 } 281 282 static struct dst_ops ip6_dst_blackhole_ops = { 283 .family = AF_INET6, 284 .destroy = ip6_dst_destroy, 285 .check = ip6_dst_check, 286 .mtu = ip6_blackhole_mtu, 287 .default_advmss = ip6_default_advmss, 288 .update_pmtu = ip6_rt_blackhole_update_pmtu, 289 .redirect = ip6_rt_blackhole_redirect, 290 .cow_metrics = dst_cow_metrics_generic, 291 .neigh_lookup = ip6_neigh_lookup, 292 }; 293 294 static const u32 ip6_template_metrics[RTAX_MAX] = { 295 [RTAX_HOPLIMIT - 1] = 0, 296 }; 297 298 static const struct rt6_info ip6_null_entry_template = { 299 .dst = { 300 .__refcnt = ATOMIC_INIT(1), 301 .__use = 1, 302 .obsolete = DST_OBSOLETE_FORCE_CHK, 303 .error = -ENETUNREACH, 304 .input = ip6_pkt_discard, 305 .output = ip6_pkt_discard_out, 306 }, 307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 308 .rt6i_protocol = RTPROT_KERNEL, 309 .rt6i_metric = ~(u32) 0, 310 .rt6i_ref = ATOMIC_INIT(1), 311 }; 312 313 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 314 315 static const struct rt6_info ip6_prohibit_entry_template = { 316 .dst = { 317 .__refcnt = ATOMIC_INIT(1), 318 .__use = 1, 319 .obsolete = DST_OBSOLETE_FORCE_CHK, 320 .error = -EACCES, 321 .input = ip6_pkt_prohibit, 322 .output = ip6_pkt_prohibit_out, 323 }, 324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 325 .rt6i_protocol = RTPROT_KERNEL, 326 .rt6i_metric = ~(u32) 0, 327 .rt6i_ref = ATOMIC_INIT(1), 328 }; 329 330 static const struct rt6_info ip6_blk_hole_entry_template = { 331 .dst = { 332 .__refcnt = ATOMIC_INIT(1), 333 .__use = 1, 334 .obsolete = DST_OBSOLETE_FORCE_CHK, 335 .error = -EINVAL, 336 .input = dst_discard, 337 .output = dst_discard_out, 338 }, 339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 340 .rt6i_protocol = RTPROT_KERNEL, 341 .rt6i_metric = ~(u32) 0, 342 .rt6i_ref = ATOMIC_INIT(1), 343 }; 344 345 #endif 346 347 static void rt6_info_init(struct rt6_info *rt) 348 { 349 struct dst_entry *dst = &rt->dst; 350 351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 352 INIT_LIST_HEAD(&rt->rt6i_siblings); 353 INIT_LIST_HEAD(&rt->rt6i_uncached); 354 } 355 356 /* allocate dst with ip6_dst_ops */ 357 static struct rt6_info *__ip6_dst_alloc(struct net *net, 358 struct net_device *dev, 359 int flags) 360 { 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 362 1, DST_OBSOLETE_FORCE_CHK, flags); 363 364 if (rt) { 365 rt6_info_init(rt); 366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 367 } 368 369 return rt; 370 } 371 372 struct rt6_info *ip6_dst_alloc(struct net *net, 373 struct net_device *dev, 374 int flags) 375 { 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags); 377 378 if (rt) { 379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); 380 if (!rt->rt6i_pcpu) { 381 dst_release_immediate(&rt->dst); 382 return NULL; 383 } 384 } 385 386 return rt; 387 } 388 EXPORT_SYMBOL(ip6_dst_alloc); 389 390 static void ip6_dst_destroy(struct dst_entry *dst) 391 { 392 struct rt6_info *rt = (struct rt6_info *)dst; 393 struct rt6_exception_bucket *bucket; 394 struct rt6_info *from = rt->from; 395 struct inet6_dev *idev; 396 397 dst_destroy_metrics_generic(dst); 398 free_percpu(rt->rt6i_pcpu); 399 rt6_uncached_list_del(rt); 400 401 idev = rt->rt6i_idev; 402 if (idev) { 403 rt->rt6i_idev = NULL; 404 in6_dev_put(idev); 405 } 406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); 407 if (bucket) { 408 rt->rt6i_exception_bucket = NULL; 409 kfree(bucket); 410 } 411 412 rt->from = NULL; 413 dst_release(&from->dst); 414 } 415 416 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 417 int how) 418 { 419 struct rt6_info *rt = (struct rt6_info *)dst; 420 struct inet6_dev *idev = rt->rt6i_idev; 421 struct net_device *loopback_dev = 422 dev_net(dev)->loopback_dev; 423 424 if (idev && idev->dev != loopback_dev) { 425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); 426 if (loopback_idev) { 427 rt->rt6i_idev = loopback_idev; 428 in6_dev_put(idev); 429 } 430 } 431 } 432 433 static bool __rt6_check_expired(const struct rt6_info *rt) 434 { 435 if (rt->rt6i_flags & RTF_EXPIRES) 436 return time_after(jiffies, rt->dst.expires); 437 else 438 return false; 439 } 440 441 static bool rt6_check_expired(const struct rt6_info *rt) 442 { 443 if (rt->rt6i_flags & RTF_EXPIRES) { 444 if (time_after(jiffies, rt->dst.expires)) 445 return true; 446 } else if (rt->from) { 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || 448 rt6_check_expired(rt->from); 449 } 450 return false; 451 } 452 453 static struct rt6_info *rt6_multipath_select(struct rt6_info *match, 454 struct flowi6 *fl6, int oif, 455 int strict) 456 { 457 struct rt6_info *sibling, *next_sibling; 458 459 /* We might have already computed the hash for ICMPv6 errors. In such 460 * case it will always be non-zero. Otherwise now is the time to do it. 461 */ 462 if (!fl6->mp_hash) 463 fl6->mp_hash = rt6_multipath_hash(fl6, NULL, NULL); 464 465 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound)) 466 return match; 467 468 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings, 469 rt6i_siblings) { 470 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound)) 471 continue; 472 if (rt6_score_route(sibling, oif, strict) < 0) 473 break; 474 match = sibling; 475 break; 476 } 477 478 return match; 479 } 480 481 /* 482 * Route lookup. rcu_read_lock() should be held. 483 */ 484 485 static inline struct rt6_info *rt6_device_match(struct net *net, 486 struct rt6_info *rt, 487 const struct in6_addr *saddr, 488 int oif, 489 int flags) 490 { 491 struct rt6_info *local = NULL; 492 struct rt6_info *sprt; 493 494 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD)) 495 return rt; 496 497 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) { 498 struct net_device *dev = sprt->dst.dev; 499 500 if (sprt->rt6i_nh_flags & RTNH_F_DEAD) 501 continue; 502 503 if (oif) { 504 if (dev->ifindex == oif) 505 return sprt; 506 if (dev->flags & IFF_LOOPBACK) { 507 if (!sprt->rt6i_idev || 508 sprt->rt6i_idev->dev->ifindex != oif) { 509 if (flags & RT6_LOOKUP_F_IFACE) 510 continue; 511 if (local && 512 local->rt6i_idev->dev->ifindex == oif) 513 continue; 514 } 515 local = sprt; 516 } 517 } else { 518 if (ipv6_chk_addr(net, saddr, dev, 519 flags & RT6_LOOKUP_F_IFACE)) 520 return sprt; 521 } 522 } 523 524 if (oif) { 525 if (local) 526 return local; 527 528 if (flags & RT6_LOOKUP_F_IFACE) 529 return net->ipv6.ip6_null_entry; 530 } 531 532 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt; 533 } 534 535 #ifdef CONFIG_IPV6_ROUTER_PREF 536 struct __rt6_probe_work { 537 struct work_struct work; 538 struct in6_addr target; 539 struct net_device *dev; 540 }; 541 542 static void rt6_probe_deferred(struct work_struct *w) 543 { 544 struct in6_addr mcaddr; 545 struct __rt6_probe_work *work = 546 container_of(w, struct __rt6_probe_work, work); 547 548 addrconf_addr_solict_mult(&work->target, &mcaddr); 549 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); 550 dev_put(work->dev); 551 kfree(work); 552 } 553 554 static void rt6_probe(struct rt6_info *rt) 555 { 556 struct __rt6_probe_work *work; 557 struct neighbour *neigh; 558 /* 559 * Okay, this does not seem to be appropriate 560 * for now, however, we need to check if it 561 * is really so; aka Router Reachability Probing. 562 * 563 * Router Reachability Probe MUST be rate-limited 564 * to no more than one per minute. 565 */ 566 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY)) 567 return; 568 rcu_read_lock_bh(); 569 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 570 if (neigh) { 571 if (neigh->nud_state & NUD_VALID) 572 goto out; 573 574 work = NULL; 575 write_lock(&neigh->lock); 576 if (!(neigh->nud_state & NUD_VALID) && 577 time_after(jiffies, 578 neigh->updated + 579 rt->rt6i_idev->cnf.rtr_probe_interval)) { 580 work = kmalloc(sizeof(*work), GFP_ATOMIC); 581 if (work) 582 __neigh_set_probe_once(neigh); 583 } 584 write_unlock(&neigh->lock); 585 } else { 586 work = kmalloc(sizeof(*work), GFP_ATOMIC); 587 } 588 589 if (work) { 590 INIT_WORK(&work->work, rt6_probe_deferred); 591 work->target = rt->rt6i_gateway; 592 dev_hold(rt->dst.dev); 593 work->dev = rt->dst.dev; 594 schedule_work(&work->work); 595 } 596 597 out: 598 rcu_read_unlock_bh(); 599 } 600 #else 601 static inline void rt6_probe(struct rt6_info *rt) 602 { 603 } 604 #endif 605 606 /* 607 * Default Router Selection (RFC 2461 6.3.6) 608 */ 609 static inline int rt6_check_dev(struct rt6_info *rt, int oif) 610 { 611 struct net_device *dev = rt->dst.dev; 612 if (!oif || dev->ifindex == oif) 613 return 2; 614 if ((dev->flags & IFF_LOOPBACK) && 615 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif) 616 return 1; 617 return 0; 618 } 619 620 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) 621 { 622 struct neighbour *neigh; 623 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 624 625 if (rt->rt6i_flags & RTF_NONEXTHOP || 626 !(rt->rt6i_flags & RTF_GATEWAY)) 627 return RT6_NUD_SUCCEED; 628 629 rcu_read_lock_bh(); 630 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 631 if (neigh) { 632 read_lock(&neigh->lock); 633 if (neigh->nud_state & NUD_VALID) 634 ret = RT6_NUD_SUCCEED; 635 #ifdef CONFIG_IPV6_ROUTER_PREF 636 else if (!(neigh->nud_state & NUD_FAILED)) 637 ret = RT6_NUD_SUCCEED; 638 else 639 ret = RT6_NUD_FAIL_PROBE; 640 #endif 641 read_unlock(&neigh->lock); 642 } else { 643 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 644 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 645 } 646 rcu_read_unlock_bh(); 647 648 return ret; 649 } 650 651 static int rt6_score_route(struct rt6_info *rt, int oif, 652 int strict) 653 { 654 int m; 655 656 m = rt6_check_dev(rt, oif); 657 if (!m && (strict & RT6_LOOKUP_F_IFACE)) 658 return RT6_NUD_FAIL_HARD; 659 #ifdef CONFIG_IPV6_ROUTER_PREF 660 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; 661 #endif 662 if (strict & RT6_LOOKUP_F_REACHABLE) { 663 int n = rt6_check_neigh(rt); 664 if (n < 0) 665 return n; 666 } 667 return m; 668 } 669 670 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, 671 int *mpri, struct rt6_info *match, 672 bool *do_rr) 673 { 674 int m; 675 bool match_do_rr = false; 676 struct inet6_dev *idev = rt->rt6i_idev; 677 678 if (rt->rt6i_nh_flags & RTNH_F_DEAD) 679 goto out; 680 681 if (idev->cnf.ignore_routes_with_linkdown && 682 rt->rt6i_nh_flags & RTNH_F_LINKDOWN && 683 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 684 goto out; 685 686 if (rt6_check_expired(rt)) 687 goto out; 688 689 m = rt6_score_route(rt, oif, strict); 690 if (m == RT6_NUD_FAIL_DO_RR) { 691 match_do_rr = true; 692 m = 0; /* lowest valid score */ 693 } else if (m == RT6_NUD_FAIL_HARD) { 694 goto out; 695 } 696 697 if (strict & RT6_LOOKUP_F_REACHABLE) 698 rt6_probe(rt); 699 700 /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 701 if (m > *mpri) { 702 *do_rr = match_do_rr; 703 *mpri = m; 704 match = rt; 705 } 706 out: 707 return match; 708 } 709 710 static struct rt6_info *find_rr_leaf(struct fib6_node *fn, 711 struct rt6_info *leaf, 712 struct rt6_info *rr_head, 713 u32 metric, int oif, int strict, 714 bool *do_rr) 715 { 716 struct rt6_info *rt, *match, *cont; 717 int mpri = -1; 718 719 match = NULL; 720 cont = NULL; 721 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) { 722 if (rt->rt6i_metric != metric) { 723 cont = rt; 724 break; 725 } 726 727 match = find_match(rt, oif, strict, &mpri, match, do_rr); 728 } 729 730 for (rt = leaf; rt && rt != rr_head; 731 rt = rcu_dereference(rt->rt6_next)) { 732 if (rt->rt6i_metric != metric) { 733 cont = rt; 734 break; 735 } 736 737 match = find_match(rt, oif, strict, &mpri, match, do_rr); 738 } 739 740 if (match || !cont) 741 return match; 742 743 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next)) 744 match = find_match(rt, oif, strict, &mpri, match, do_rr); 745 746 return match; 747 } 748 749 static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, 750 int oif, int strict) 751 { 752 struct rt6_info *leaf = rcu_dereference(fn->leaf); 753 struct rt6_info *match, *rt0; 754 bool do_rr = false; 755 int key_plen; 756 757 if (!leaf || leaf == net->ipv6.ip6_null_entry) 758 return net->ipv6.ip6_null_entry; 759 760 rt0 = rcu_dereference(fn->rr_ptr); 761 if (!rt0) 762 rt0 = leaf; 763 764 /* Double check to make sure fn is not an intermediate node 765 * and fn->leaf does not points to its child's leaf 766 * (This might happen if all routes under fn are deleted from 767 * the tree and fib6_repair_tree() is called on the node.) 768 */ 769 key_plen = rt0->rt6i_dst.plen; 770 #ifdef CONFIG_IPV6_SUBTREES 771 if (rt0->rt6i_src.plen) 772 key_plen = rt0->rt6i_src.plen; 773 #endif 774 if (fn->fn_bit != key_plen) 775 return net->ipv6.ip6_null_entry; 776 777 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, 778 &do_rr); 779 780 if (do_rr) { 781 struct rt6_info *next = rcu_dereference(rt0->rt6_next); 782 783 /* no entries matched; do round-robin */ 784 if (!next || next->rt6i_metric != rt0->rt6i_metric) 785 next = leaf; 786 787 if (next != rt0) { 788 spin_lock_bh(&leaf->rt6i_table->tb6_lock); 789 /* make sure next is not being deleted from the tree */ 790 if (next->rt6i_node) 791 rcu_assign_pointer(fn->rr_ptr, next); 792 spin_unlock_bh(&leaf->rt6i_table->tb6_lock); 793 } 794 } 795 796 return match ? match : net->ipv6.ip6_null_entry; 797 } 798 799 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) 800 { 801 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); 802 } 803 804 #ifdef CONFIG_IPV6_ROUTE_INFO 805 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 806 const struct in6_addr *gwaddr) 807 { 808 struct net *net = dev_net(dev); 809 struct route_info *rinfo = (struct route_info *) opt; 810 struct in6_addr prefix_buf, *prefix; 811 unsigned int pref; 812 unsigned long lifetime; 813 struct rt6_info *rt; 814 815 if (len < sizeof(struct route_info)) { 816 return -EINVAL; 817 } 818 819 /* Sanity check for prefix_len and length */ 820 if (rinfo->length > 3) { 821 return -EINVAL; 822 } else if (rinfo->prefix_len > 128) { 823 return -EINVAL; 824 } else if (rinfo->prefix_len > 64) { 825 if (rinfo->length < 2) { 826 return -EINVAL; 827 } 828 } else if (rinfo->prefix_len > 0) { 829 if (rinfo->length < 1) { 830 return -EINVAL; 831 } 832 } 833 834 pref = rinfo->route_pref; 835 if (pref == ICMPV6_ROUTER_PREF_INVALID) 836 return -EINVAL; 837 838 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 839 840 if (rinfo->length == 3) 841 prefix = (struct in6_addr *)rinfo->prefix; 842 else { 843 /* this function is safe */ 844 ipv6_addr_prefix(&prefix_buf, 845 (struct in6_addr *)rinfo->prefix, 846 rinfo->prefix_len); 847 prefix = &prefix_buf; 848 } 849 850 if (rinfo->prefix_len == 0) 851 rt = rt6_get_dflt_router(gwaddr, dev); 852 else 853 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 854 gwaddr, dev); 855 856 if (rt && !lifetime) { 857 ip6_del_rt(rt); 858 rt = NULL; 859 } 860 861 if (!rt && lifetime) 862 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, 863 dev, pref); 864 else if (rt) 865 rt->rt6i_flags = RTF_ROUTEINFO | 866 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 867 868 if (rt) { 869 if (!addrconf_finite_timeout(lifetime)) 870 rt6_clean_expires(rt); 871 else 872 rt6_set_expires(rt, jiffies + HZ * lifetime); 873 874 ip6_rt_put(rt); 875 } 876 return 0; 877 } 878 #endif 879 880 static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 881 struct in6_addr *saddr) 882 { 883 struct fib6_node *pn, *sn; 884 while (1) { 885 if (fn->fn_flags & RTN_TL_ROOT) 886 return NULL; 887 pn = rcu_dereference(fn->parent); 888 sn = FIB6_SUBTREE(pn); 889 if (sn && sn != fn) 890 fn = fib6_lookup(sn, NULL, saddr); 891 else 892 fn = pn; 893 if (fn->fn_flags & RTN_RTINFO) 894 return fn; 895 } 896 } 897 898 static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, 899 bool null_fallback) 900 { 901 struct rt6_info *rt = *prt; 902 903 if (dst_hold_safe(&rt->dst)) 904 return true; 905 if (null_fallback) { 906 rt = net->ipv6.ip6_null_entry; 907 dst_hold(&rt->dst); 908 } else { 909 rt = NULL; 910 } 911 *prt = rt; 912 return false; 913 } 914 915 static struct rt6_info *ip6_pol_route_lookup(struct net *net, 916 struct fib6_table *table, 917 struct flowi6 *fl6, int flags) 918 { 919 struct rt6_info *rt, *rt_cache; 920 struct fib6_node *fn; 921 922 rcu_read_lock(); 923 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 924 restart: 925 rt = rcu_dereference(fn->leaf); 926 if (!rt) { 927 rt = net->ipv6.ip6_null_entry; 928 } else { 929 rt = rt6_device_match(net, rt, &fl6->saddr, 930 fl6->flowi6_oif, flags); 931 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) 932 rt = rt6_multipath_select(rt, fl6, 933 fl6->flowi6_oif, flags); 934 } 935 if (rt == net->ipv6.ip6_null_entry) { 936 fn = fib6_backtrack(fn, &fl6->saddr); 937 if (fn) 938 goto restart; 939 } 940 /* Search through exception table */ 941 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); 942 if (rt_cache) 943 rt = rt_cache; 944 945 if (ip6_hold_safe(net, &rt, true)) 946 dst_use_noref(&rt->dst, jiffies); 947 948 rcu_read_unlock(); 949 950 trace_fib6_table_lookup(net, rt, table, fl6); 951 952 return rt; 953 954 } 955 956 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 957 int flags) 958 { 959 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); 960 } 961 EXPORT_SYMBOL_GPL(ip6_route_lookup); 962 963 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 964 const struct in6_addr *saddr, int oif, int strict) 965 { 966 struct flowi6 fl6 = { 967 .flowi6_oif = oif, 968 .daddr = *daddr, 969 }; 970 struct dst_entry *dst; 971 int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 972 973 if (saddr) { 974 memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 975 flags |= RT6_LOOKUP_F_HAS_SADDR; 976 } 977 978 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup); 979 if (dst->error == 0) 980 return (struct rt6_info *) dst; 981 982 dst_release(dst); 983 984 return NULL; 985 } 986 EXPORT_SYMBOL(rt6_lookup); 987 988 /* ip6_ins_rt is called with FREE table->tb6_lock. 989 * It takes new route entry, the addition fails by any reason the 990 * route is released. 991 * Caller must hold dst before calling it. 992 */ 993 994 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, 995 struct mx6_config *mxc, 996 struct netlink_ext_ack *extack) 997 { 998 int err; 999 struct fib6_table *table; 1000 1001 table = rt->rt6i_table; 1002 spin_lock_bh(&table->tb6_lock); 1003 err = fib6_add(&table->tb6_root, rt, info, mxc, extack); 1004 spin_unlock_bh(&table->tb6_lock); 1005 1006 return err; 1007 } 1008 1009 int ip6_ins_rt(struct rt6_info *rt) 1010 { 1011 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; 1012 struct mx6_config mxc = { .mx = NULL, }; 1013 1014 /* Hold dst to account for the reference from the fib6 tree */ 1015 dst_hold(&rt->dst); 1016 return __ip6_ins_rt(rt, &info, &mxc, NULL); 1017 } 1018 1019 /* called with rcu_lock held */ 1020 static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) 1021 { 1022 struct net_device *dev = rt->dst.dev; 1023 1024 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) { 1025 /* for copies of local routes, dst->dev needs to be the 1026 * device if it is a master device, the master device if 1027 * device is enslaved, and the loopback as the default 1028 */ 1029 if (netif_is_l3_slave(dev) && 1030 !rt6_need_strict(&rt->rt6i_dst.addr)) 1031 dev = l3mdev_master_dev_rcu(dev); 1032 else if (!netif_is_l3_master(dev)) 1033 dev = dev_net(dev)->loopback_dev; 1034 /* last case is netif_is_l3_master(dev) is true in which 1035 * case we want dev returned to be dev 1036 */ 1037 } 1038 1039 return dev; 1040 } 1041 1042 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, 1043 const struct in6_addr *daddr, 1044 const struct in6_addr *saddr) 1045 { 1046 struct net_device *dev; 1047 struct rt6_info *rt; 1048 1049 /* 1050 * Clone the route. 1051 */ 1052 1053 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 1054 ort = ort->from; 1055 1056 rcu_read_lock(); 1057 dev = ip6_rt_get_dev_rcu(ort); 1058 rt = __ip6_dst_alloc(dev_net(dev), dev, 0); 1059 rcu_read_unlock(); 1060 if (!rt) 1061 return NULL; 1062 1063 ip6_rt_copy_init(rt, ort); 1064 rt->rt6i_flags |= RTF_CACHE; 1065 rt->rt6i_metric = 0; 1066 rt->dst.flags |= DST_HOST; 1067 rt->rt6i_dst.addr = *daddr; 1068 rt->rt6i_dst.plen = 128; 1069 1070 if (!rt6_is_gw_or_nonexthop(ort)) { 1071 if (ort->rt6i_dst.plen != 128 && 1072 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) 1073 rt->rt6i_flags |= RTF_ANYCAST; 1074 #ifdef CONFIG_IPV6_SUBTREES 1075 if (rt->rt6i_src.plen && saddr) { 1076 rt->rt6i_src.addr = *saddr; 1077 rt->rt6i_src.plen = 128; 1078 } 1079 #endif 1080 } 1081 1082 return rt; 1083 } 1084 1085 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) 1086 { 1087 struct net_device *dev; 1088 struct rt6_info *pcpu_rt; 1089 1090 rcu_read_lock(); 1091 dev = ip6_rt_get_dev_rcu(rt); 1092 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); 1093 rcu_read_unlock(); 1094 if (!pcpu_rt) 1095 return NULL; 1096 ip6_rt_copy_init(pcpu_rt, rt); 1097 pcpu_rt->rt6i_protocol = rt->rt6i_protocol; 1098 pcpu_rt->rt6i_flags |= RTF_PCPU; 1099 return pcpu_rt; 1100 } 1101 1102 /* It should be called with rcu_read_lock() acquired */ 1103 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) 1104 { 1105 struct rt6_info *pcpu_rt, **p; 1106 1107 p = this_cpu_ptr(rt->rt6i_pcpu); 1108 pcpu_rt = *p; 1109 1110 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) 1111 rt6_dst_from_metrics_check(pcpu_rt); 1112 1113 return pcpu_rt; 1114 } 1115 1116 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) 1117 { 1118 struct rt6_info *pcpu_rt, *prev, **p; 1119 1120 pcpu_rt = ip6_rt_pcpu_alloc(rt); 1121 if (!pcpu_rt) { 1122 struct net *net = dev_net(rt->dst.dev); 1123 1124 dst_hold(&net->ipv6.ip6_null_entry->dst); 1125 return net->ipv6.ip6_null_entry; 1126 } 1127 1128 dst_hold(&pcpu_rt->dst); 1129 p = this_cpu_ptr(rt->rt6i_pcpu); 1130 prev = cmpxchg(p, NULL, pcpu_rt); 1131 BUG_ON(prev); 1132 1133 rt6_dst_from_metrics_check(pcpu_rt); 1134 return pcpu_rt; 1135 } 1136 1137 /* exception hash table implementation 1138 */ 1139 static DEFINE_SPINLOCK(rt6_exception_lock); 1140 1141 /* Remove rt6_ex from hash table and free the memory 1142 * Caller must hold rt6_exception_lock 1143 */ 1144 static void rt6_remove_exception(struct rt6_exception_bucket *bucket, 1145 struct rt6_exception *rt6_ex) 1146 { 1147 struct net *net; 1148 1149 if (!bucket || !rt6_ex) 1150 return; 1151 1152 net = dev_net(rt6_ex->rt6i->dst.dev); 1153 rt6_ex->rt6i->rt6i_node = NULL; 1154 hlist_del_rcu(&rt6_ex->hlist); 1155 rt6_release(rt6_ex->rt6i); 1156 kfree_rcu(rt6_ex, rcu); 1157 WARN_ON_ONCE(!bucket->depth); 1158 bucket->depth--; 1159 net->ipv6.rt6_stats->fib_rt_cache--; 1160 } 1161 1162 /* Remove oldest rt6_ex in bucket and free the memory 1163 * Caller must hold rt6_exception_lock 1164 */ 1165 static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) 1166 { 1167 struct rt6_exception *rt6_ex, *oldest = NULL; 1168 1169 if (!bucket) 1170 return; 1171 1172 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1173 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp)) 1174 oldest = rt6_ex; 1175 } 1176 rt6_remove_exception(bucket, oldest); 1177 } 1178 1179 static u32 rt6_exception_hash(const struct in6_addr *dst, 1180 const struct in6_addr *src) 1181 { 1182 static u32 seed __read_mostly; 1183 u32 val; 1184 1185 net_get_random_once(&seed, sizeof(seed)); 1186 val = jhash(dst, sizeof(*dst), seed); 1187 1188 #ifdef CONFIG_IPV6_SUBTREES 1189 if (src) 1190 val = jhash(src, sizeof(*src), val); 1191 #endif 1192 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); 1193 } 1194 1195 /* Helper function to find the cached rt in the hash table 1196 * and update bucket pointer to point to the bucket for this 1197 * (daddr, saddr) pair 1198 * Caller must hold rt6_exception_lock 1199 */ 1200 static struct rt6_exception * 1201 __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket, 1202 const struct in6_addr *daddr, 1203 const struct in6_addr *saddr) 1204 { 1205 struct rt6_exception *rt6_ex; 1206 u32 hval; 1207 1208 if (!(*bucket) || !daddr) 1209 return NULL; 1210 1211 hval = rt6_exception_hash(daddr, saddr); 1212 *bucket += hval; 1213 1214 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) { 1215 struct rt6_info *rt6 = rt6_ex->rt6i; 1216 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 1217 1218 #ifdef CONFIG_IPV6_SUBTREES 1219 if (matched && saddr) 1220 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 1221 #endif 1222 if (matched) 1223 return rt6_ex; 1224 } 1225 return NULL; 1226 } 1227 1228 /* Helper function to find the cached rt in the hash table 1229 * and update bucket pointer to point to the bucket for this 1230 * (daddr, saddr) pair 1231 * Caller must hold rcu_read_lock() 1232 */ 1233 static struct rt6_exception * 1234 __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, 1235 const struct in6_addr *daddr, 1236 const struct in6_addr *saddr) 1237 { 1238 struct rt6_exception *rt6_ex; 1239 u32 hval; 1240 1241 WARN_ON_ONCE(!rcu_read_lock_held()); 1242 1243 if (!(*bucket) || !daddr) 1244 return NULL; 1245 1246 hval = rt6_exception_hash(daddr, saddr); 1247 *bucket += hval; 1248 1249 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) { 1250 struct rt6_info *rt6 = rt6_ex->rt6i; 1251 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 1252 1253 #ifdef CONFIG_IPV6_SUBTREES 1254 if (matched && saddr) 1255 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 1256 #endif 1257 if (matched) 1258 return rt6_ex; 1259 } 1260 return NULL; 1261 } 1262 1263 static int rt6_insert_exception(struct rt6_info *nrt, 1264 struct rt6_info *ort) 1265 { 1266 struct net *net = dev_net(ort->dst.dev); 1267 struct rt6_exception_bucket *bucket; 1268 struct in6_addr *src_key = NULL; 1269 struct rt6_exception *rt6_ex; 1270 int err = 0; 1271 1272 /* ort can't be a cache or pcpu route */ 1273 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) 1274 ort = ort->from; 1275 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); 1276 1277 spin_lock_bh(&rt6_exception_lock); 1278 1279 if (ort->exception_bucket_flushed) { 1280 err = -EINVAL; 1281 goto out; 1282 } 1283 1284 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, 1285 lockdep_is_held(&rt6_exception_lock)); 1286 if (!bucket) { 1287 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), 1288 GFP_ATOMIC); 1289 if (!bucket) { 1290 err = -ENOMEM; 1291 goto out; 1292 } 1293 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); 1294 } 1295 1296 #ifdef CONFIG_IPV6_SUBTREES 1297 /* rt6i_src.plen != 0 indicates ort is in subtree 1298 * and exception table is indexed by a hash of 1299 * both rt6i_dst and rt6i_src. 1300 * Otherwise, the exception table is indexed by 1301 * a hash of only rt6i_dst. 1302 */ 1303 if (ort->rt6i_src.plen) 1304 src_key = &nrt->rt6i_src.addr; 1305 #endif 1306 1307 /* Update rt6i_prefsrc as it could be changed 1308 * in rt6_remove_prefsrc() 1309 */ 1310 nrt->rt6i_prefsrc = ort->rt6i_prefsrc; 1311 /* rt6_mtu_change() might lower mtu on ort. 1312 * Only insert this exception route if its mtu 1313 * is less than ort's mtu value. 1314 */ 1315 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { 1316 err = -EINVAL; 1317 goto out; 1318 } 1319 1320 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, 1321 src_key); 1322 if (rt6_ex) 1323 rt6_remove_exception(bucket, rt6_ex); 1324 1325 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC); 1326 if (!rt6_ex) { 1327 err = -ENOMEM; 1328 goto out; 1329 } 1330 rt6_ex->rt6i = nrt; 1331 rt6_ex->stamp = jiffies; 1332 atomic_inc(&nrt->rt6i_ref); 1333 nrt->rt6i_node = ort->rt6i_node; 1334 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); 1335 bucket->depth++; 1336 net->ipv6.rt6_stats->fib_rt_cache++; 1337 1338 if (bucket->depth > FIB6_MAX_DEPTH) 1339 rt6_exception_remove_oldest(bucket); 1340 1341 out: 1342 spin_unlock_bh(&rt6_exception_lock); 1343 1344 /* Update fn->fn_sernum to invalidate all cached dst */ 1345 if (!err) { 1346 spin_lock_bh(&ort->rt6i_table->tb6_lock); 1347 fib6_update_sernum(ort); 1348 spin_unlock_bh(&ort->rt6i_table->tb6_lock); 1349 fib6_force_start_gc(net); 1350 } 1351 1352 return err; 1353 } 1354 1355 void rt6_flush_exceptions(struct rt6_info *rt) 1356 { 1357 struct rt6_exception_bucket *bucket; 1358 struct rt6_exception *rt6_ex; 1359 struct hlist_node *tmp; 1360 int i; 1361 1362 spin_lock_bh(&rt6_exception_lock); 1363 /* Prevent rt6_insert_exception() to recreate the bucket list */ 1364 rt->exception_bucket_flushed = 1; 1365 1366 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1367 lockdep_is_held(&rt6_exception_lock)); 1368 if (!bucket) 1369 goto out; 1370 1371 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1372 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) 1373 rt6_remove_exception(bucket, rt6_ex); 1374 WARN_ON_ONCE(bucket->depth); 1375 bucket++; 1376 } 1377 1378 out: 1379 spin_unlock_bh(&rt6_exception_lock); 1380 } 1381 1382 /* Find cached rt in the hash table inside passed in rt 1383 * Caller has to hold rcu_read_lock() 1384 */ 1385 static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, 1386 struct in6_addr *daddr, 1387 struct in6_addr *saddr) 1388 { 1389 struct rt6_exception_bucket *bucket; 1390 struct in6_addr *src_key = NULL; 1391 struct rt6_exception *rt6_ex; 1392 struct rt6_info *res = NULL; 1393 1394 bucket = rcu_dereference(rt->rt6i_exception_bucket); 1395 1396 #ifdef CONFIG_IPV6_SUBTREES 1397 /* rt6i_src.plen != 0 indicates rt is in subtree 1398 * and exception table is indexed by a hash of 1399 * both rt6i_dst and rt6i_src. 1400 * Otherwise, the exception table is indexed by 1401 * a hash of only rt6i_dst. 1402 */ 1403 if (rt->rt6i_src.plen) 1404 src_key = saddr; 1405 #endif 1406 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 1407 1408 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) 1409 res = rt6_ex->rt6i; 1410 1411 return res; 1412 } 1413 1414 /* Remove the passed in cached rt from the hash table that contains it */ 1415 int rt6_remove_exception_rt(struct rt6_info *rt) 1416 { 1417 struct rt6_exception_bucket *bucket; 1418 struct rt6_info *from = rt->from; 1419 struct in6_addr *src_key = NULL; 1420 struct rt6_exception *rt6_ex; 1421 int err; 1422 1423 if (!from || 1424 !(rt->rt6i_flags & RTF_CACHE)) 1425 return -EINVAL; 1426 1427 if (!rcu_access_pointer(from->rt6i_exception_bucket)) 1428 return -ENOENT; 1429 1430 spin_lock_bh(&rt6_exception_lock); 1431 bucket = rcu_dereference_protected(from->rt6i_exception_bucket, 1432 lockdep_is_held(&rt6_exception_lock)); 1433 #ifdef CONFIG_IPV6_SUBTREES 1434 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1435 * and exception table is indexed by a hash of 1436 * both rt6i_dst and rt6i_src. 1437 * Otherwise, the exception table is indexed by 1438 * a hash of only rt6i_dst. 1439 */ 1440 if (from->rt6i_src.plen) 1441 src_key = &rt->rt6i_src.addr; 1442 #endif 1443 rt6_ex = __rt6_find_exception_spinlock(&bucket, 1444 &rt->rt6i_dst.addr, 1445 src_key); 1446 if (rt6_ex) { 1447 rt6_remove_exception(bucket, rt6_ex); 1448 err = 0; 1449 } else { 1450 err = -ENOENT; 1451 } 1452 1453 spin_unlock_bh(&rt6_exception_lock); 1454 return err; 1455 } 1456 1457 /* Find rt6_ex which contains the passed in rt cache and 1458 * refresh its stamp 1459 */ 1460 static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 1461 { 1462 struct rt6_exception_bucket *bucket; 1463 struct rt6_info *from = rt->from; 1464 struct in6_addr *src_key = NULL; 1465 struct rt6_exception *rt6_ex; 1466 1467 if (!from || 1468 !(rt->rt6i_flags & RTF_CACHE)) 1469 return; 1470 1471 rcu_read_lock(); 1472 bucket = rcu_dereference(from->rt6i_exception_bucket); 1473 1474 #ifdef CONFIG_IPV6_SUBTREES 1475 /* rt6i_src.plen != 0 indicates 'from' is in subtree 1476 * and exception table is indexed by a hash of 1477 * both rt6i_dst and rt6i_src. 1478 * Otherwise, the exception table is indexed by 1479 * a hash of only rt6i_dst. 1480 */ 1481 if (from->rt6i_src.plen) 1482 src_key = &rt->rt6i_src.addr; 1483 #endif 1484 rt6_ex = __rt6_find_exception_rcu(&bucket, 1485 &rt->rt6i_dst.addr, 1486 src_key); 1487 if (rt6_ex) 1488 rt6_ex->stamp = jiffies; 1489 1490 rcu_read_unlock(); 1491 } 1492 1493 static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) 1494 { 1495 struct rt6_exception_bucket *bucket; 1496 struct rt6_exception *rt6_ex; 1497 int i; 1498 1499 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1500 lockdep_is_held(&rt6_exception_lock)); 1501 1502 if (bucket) { 1503 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1504 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1505 rt6_ex->rt6i->rt6i_prefsrc.plen = 0; 1506 } 1507 bucket++; 1508 } 1509 } 1510 } 1511 1512 static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) 1513 { 1514 struct rt6_exception_bucket *bucket; 1515 struct rt6_exception *rt6_ex; 1516 int i; 1517 1518 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1519 lockdep_is_held(&rt6_exception_lock)); 1520 1521 if (bucket) { 1522 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1523 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1524 struct rt6_info *entry = rt6_ex->rt6i; 1525 /* For RTF_CACHE with rt6i_pmtu == 0 1526 * (i.e. a redirected route), 1527 * the metrics of its rt->dst.from has already 1528 * been updated. 1529 */ 1530 if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu) 1531 entry->rt6i_pmtu = mtu; 1532 } 1533 bucket++; 1534 } 1535 } 1536 } 1537 1538 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 1539 1540 static void rt6_exceptions_clean_tohost(struct rt6_info *rt, 1541 struct in6_addr *gateway) 1542 { 1543 struct rt6_exception_bucket *bucket; 1544 struct rt6_exception *rt6_ex; 1545 struct hlist_node *tmp; 1546 int i; 1547 1548 if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1549 return; 1550 1551 spin_lock_bh(&rt6_exception_lock); 1552 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1553 lockdep_is_held(&rt6_exception_lock)); 1554 1555 if (bucket) { 1556 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1557 hlist_for_each_entry_safe(rt6_ex, tmp, 1558 &bucket->chain, hlist) { 1559 struct rt6_info *entry = rt6_ex->rt6i; 1560 1561 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) == 1562 RTF_CACHE_GATEWAY && 1563 ipv6_addr_equal(gateway, 1564 &entry->rt6i_gateway)) { 1565 rt6_remove_exception(bucket, rt6_ex); 1566 } 1567 } 1568 bucket++; 1569 } 1570 } 1571 1572 spin_unlock_bh(&rt6_exception_lock); 1573 } 1574 1575 static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, 1576 struct rt6_exception *rt6_ex, 1577 struct fib6_gc_args *gc_args, 1578 unsigned long now) 1579 { 1580 struct rt6_info *rt = rt6_ex->rt6i; 1581 1582 /* we are pruning and obsoleting aged-out and non gateway exceptions 1583 * even if others have still references to them, so that on next 1584 * dst_check() such references can be dropped. 1585 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when 1586 * expired, independently from their aging, as per RFC 8201 section 4 1587 */ 1588 if (!(rt->rt6i_flags & RTF_EXPIRES)) { 1589 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { 1590 RT6_TRACE("aging clone %p\n", rt); 1591 rt6_remove_exception(bucket, rt6_ex); 1592 return; 1593 } 1594 } else if (time_after(jiffies, rt->dst.expires)) { 1595 RT6_TRACE("purging expired route %p\n", rt); 1596 rt6_remove_exception(bucket, rt6_ex); 1597 return; 1598 } 1599 1600 if (rt->rt6i_flags & RTF_GATEWAY) { 1601 struct neighbour *neigh; 1602 __u8 neigh_flags = 0; 1603 1604 neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); 1605 if (neigh) { 1606 neigh_flags = neigh->flags; 1607 neigh_release(neigh); 1608 } 1609 if (!(neigh_flags & NTF_ROUTER)) { 1610 RT6_TRACE("purging route %p via non-router but gateway\n", 1611 rt); 1612 rt6_remove_exception(bucket, rt6_ex); 1613 return; 1614 } 1615 } 1616 1617 gc_args->more++; 1618 } 1619 1620 void rt6_age_exceptions(struct rt6_info *rt, 1621 struct fib6_gc_args *gc_args, 1622 unsigned long now) 1623 { 1624 struct rt6_exception_bucket *bucket; 1625 struct rt6_exception *rt6_ex; 1626 struct hlist_node *tmp; 1627 int i; 1628 1629 if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1630 return; 1631 1632 spin_lock_bh(&rt6_exception_lock); 1633 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1634 lockdep_is_held(&rt6_exception_lock)); 1635 1636 if (bucket) { 1637 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1638 hlist_for_each_entry_safe(rt6_ex, tmp, 1639 &bucket->chain, hlist) { 1640 rt6_age_examine_exception(bucket, rt6_ex, 1641 gc_args, now); 1642 } 1643 bucket++; 1644 } 1645 } 1646 spin_unlock_bh(&rt6_exception_lock); 1647 } 1648 1649 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 1650 int oif, struct flowi6 *fl6, int flags) 1651 { 1652 struct fib6_node *fn, *saved_fn; 1653 struct rt6_info *rt, *rt_cache; 1654 int strict = 0; 1655 1656 strict |= flags & RT6_LOOKUP_F_IFACE; 1657 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 1658 if (net->ipv6.devconf_all->forwarding == 0) 1659 strict |= RT6_LOOKUP_F_REACHABLE; 1660 1661 rcu_read_lock(); 1662 1663 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1664 saved_fn = fn; 1665 1666 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 1667 oif = 0; 1668 1669 redo_rt6_select: 1670 rt = rt6_select(net, fn, oif, strict); 1671 if (rt->rt6i_nsiblings) 1672 rt = rt6_multipath_select(rt, fl6, oif, strict); 1673 if (rt == net->ipv6.ip6_null_entry) { 1674 fn = fib6_backtrack(fn, &fl6->saddr); 1675 if (fn) 1676 goto redo_rt6_select; 1677 else if (strict & RT6_LOOKUP_F_REACHABLE) { 1678 /* also consider unreachable route */ 1679 strict &= ~RT6_LOOKUP_F_REACHABLE; 1680 fn = saved_fn; 1681 goto redo_rt6_select; 1682 } 1683 } 1684 1685 /*Search through exception table */ 1686 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); 1687 if (rt_cache) 1688 rt = rt_cache; 1689 1690 if (rt == net->ipv6.ip6_null_entry) { 1691 rcu_read_unlock(); 1692 dst_hold(&rt->dst); 1693 trace_fib6_table_lookup(net, rt, table, fl6); 1694 return rt; 1695 } else if (rt->rt6i_flags & RTF_CACHE) { 1696 if (ip6_hold_safe(net, &rt, true)) { 1697 dst_use_noref(&rt->dst, jiffies); 1698 rt6_dst_from_metrics_check(rt); 1699 } 1700 rcu_read_unlock(); 1701 trace_fib6_table_lookup(net, rt, table, fl6); 1702 return rt; 1703 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1704 !(rt->rt6i_flags & RTF_GATEWAY))) { 1705 /* Create a RTF_CACHE clone which will not be 1706 * owned by the fib6 tree. It is for the special case where 1707 * the daddr in the skb during the neighbor look-up is different 1708 * from the fl6->daddr used to look-up route here. 1709 */ 1710 1711 struct rt6_info *uncached_rt; 1712 1713 if (ip6_hold_safe(net, &rt, true)) { 1714 dst_use_noref(&rt->dst, jiffies); 1715 } else { 1716 rcu_read_unlock(); 1717 uncached_rt = rt; 1718 goto uncached_rt_out; 1719 } 1720 rcu_read_unlock(); 1721 1722 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); 1723 dst_release(&rt->dst); 1724 1725 if (uncached_rt) { 1726 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() 1727 * No need for another dst_hold() 1728 */ 1729 rt6_uncached_list_add(uncached_rt); 1730 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 1731 } else { 1732 uncached_rt = net->ipv6.ip6_null_entry; 1733 dst_hold(&uncached_rt->dst); 1734 } 1735 1736 uncached_rt_out: 1737 trace_fib6_table_lookup(net, uncached_rt, table, fl6); 1738 return uncached_rt; 1739 1740 } else { 1741 /* Get a percpu copy */ 1742 1743 struct rt6_info *pcpu_rt; 1744 1745 dst_use_noref(&rt->dst, jiffies); 1746 local_bh_disable(); 1747 pcpu_rt = rt6_get_pcpu_route(rt); 1748 1749 if (!pcpu_rt) { 1750 /* atomic_inc_not_zero() is needed when using rcu */ 1751 if (atomic_inc_not_zero(&rt->rt6i_ref)) { 1752 /* No dst_hold() on rt is needed because grabbing 1753 * rt->rt6i_ref makes sure rt can't be released. 1754 */ 1755 pcpu_rt = rt6_make_pcpu_route(rt); 1756 rt6_release(rt); 1757 } else { 1758 /* rt is already removed from tree */ 1759 pcpu_rt = net->ipv6.ip6_null_entry; 1760 dst_hold(&pcpu_rt->dst); 1761 } 1762 } 1763 local_bh_enable(); 1764 rcu_read_unlock(); 1765 trace_fib6_table_lookup(net, pcpu_rt, table, fl6); 1766 return pcpu_rt; 1767 } 1768 } 1769 EXPORT_SYMBOL_GPL(ip6_pol_route); 1770 1771 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, 1772 struct flowi6 *fl6, int flags) 1773 { 1774 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); 1775 } 1776 1777 struct dst_entry *ip6_route_input_lookup(struct net *net, 1778 struct net_device *dev, 1779 struct flowi6 *fl6, int flags) 1780 { 1781 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 1782 flags |= RT6_LOOKUP_F_IFACE; 1783 1784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); 1785 } 1786 EXPORT_SYMBOL_GPL(ip6_route_input_lookup); 1787 1788 static void ip6_multipath_l3_keys(const struct sk_buff *skb, 1789 struct flow_keys *keys, 1790 struct flow_keys *flkeys) 1791 { 1792 const struct ipv6hdr *outer_iph = ipv6_hdr(skb); 1793 const struct ipv6hdr *key_iph = outer_iph; 1794 struct flow_keys *_flkeys = flkeys; 1795 const struct ipv6hdr *inner_iph; 1796 const struct icmp6hdr *icmph; 1797 struct ipv6hdr _inner_iph; 1798 1799 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) 1800 goto out; 1801 1802 icmph = icmp6_hdr(skb); 1803 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && 1804 icmph->icmp6_type != ICMPV6_PKT_TOOBIG && 1805 icmph->icmp6_type != ICMPV6_TIME_EXCEED && 1806 icmph->icmp6_type != ICMPV6_PARAMPROB) 1807 goto out; 1808 1809 inner_iph = skb_header_pointer(skb, 1810 skb_transport_offset(skb) + sizeof(*icmph), 1811 sizeof(_inner_iph), &_inner_iph); 1812 if (!inner_iph) 1813 goto out; 1814 1815 key_iph = inner_iph; 1816 _flkeys = NULL; 1817 out: 1818 if (_flkeys) { 1819 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src; 1820 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst; 1821 keys->tags.flow_label = _flkeys->tags.flow_label; 1822 keys->basic.ip_proto = _flkeys->basic.ip_proto; 1823 } else { 1824 keys->addrs.v6addrs.src = key_iph->saddr; 1825 keys->addrs.v6addrs.dst = key_iph->daddr; 1826 keys->tags.flow_label = ip6_flowinfo(key_iph); 1827 keys->basic.ip_proto = key_iph->nexthdr; 1828 } 1829 } 1830 1831 /* if skb is set it will be used and fl6 can be NULL */ 1832 u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb, 1833 struct flow_keys *flkeys) 1834 { 1835 struct flow_keys hash_keys; 1836 u32 mhash; 1837 1838 memset(&hash_keys, 0, sizeof(hash_keys)); 1839 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 1840 if (skb) { 1841 ip6_multipath_l3_keys(skb, &hash_keys, flkeys); 1842 } else { 1843 hash_keys.addrs.v6addrs.src = fl6->saddr; 1844 hash_keys.addrs.v6addrs.dst = fl6->daddr; 1845 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel; 1846 hash_keys.basic.ip_proto = fl6->flowi6_proto; 1847 } 1848 mhash = flow_hash_from_keys(&hash_keys); 1849 1850 return mhash >> 1; 1851 } 1852 1853 void ip6_route_input(struct sk_buff *skb) 1854 { 1855 const struct ipv6hdr *iph = ipv6_hdr(skb); 1856 struct net *net = dev_net(skb->dev); 1857 int flags = RT6_LOOKUP_F_HAS_SADDR; 1858 struct ip_tunnel_info *tun_info; 1859 struct flowi6 fl6 = { 1860 .flowi6_iif = skb->dev->ifindex, 1861 .daddr = iph->daddr, 1862 .saddr = iph->saddr, 1863 .flowlabel = ip6_flowinfo(iph), 1864 .flowi6_mark = skb->mark, 1865 .flowi6_proto = iph->nexthdr, 1866 }; 1867 struct flow_keys *flkeys = NULL, _flkeys; 1868 1869 tun_info = skb_tunnel_info(skb); 1870 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 1871 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 1872 1873 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys)) 1874 flkeys = &_flkeys; 1875 1876 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 1877 fl6.mp_hash = rt6_multipath_hash(&fl6, skb, flkeys); 1878 skb_dst_drop(skb); 1879 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); 1880 } 1881 1882 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, 1883 struct flowi6 *fl6, int flags) 1884 { 1885 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags); 1886 } 1887 1888 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 1889 struct flowi6 *fl6, int flags) 1890 { 1891 bool any_src; 1892 1893 if (rt6_need_strict(&fl6->daddr)) { 1894 struct dst_entry *dst; 1895 1896 dst = l3mdev_link_scope_lookup(net, fl6); 1897 if (dst) 1898 return dst; 1899 } 1900 1901 fl6->flowi6_iif = LOOPBACK_IFINDEX; 1902 1903 any_src = ipv6_addr_any(&fl6->saddr); 1904 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 1905 (fl6->flowi6_oif && any_src)) 1906 flags |= RT6_LOOKUP_F_IFACE; 1907 1908 if (!any_src) 1909 flags |= RT6_LOOKUP_F_HAS_SADDR; 1910 else if (sk) 1911 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 1912 1913 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output); 1914 } 1915 EXPORT_SYMBOL_GPL(ip6_route_output_flags); 1916 1917 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 1918 { 1919 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 1920 struct net_device *loopback_dev = net->loopback_dev; 1921 struct dst_entry *new = NULL; 1922 1923 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, 1924 DST_OBSOLETE_DEAD, 0); 1925 if (rt) { 1926 rt6_info_init(rt); 1927 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 1928 1929 new = &rt->dst; 1930 new->__use = 1; 1931 new->input = dst_discard; 1932 new->output = dst_discard_out; 1933 1934 dst_copy_metrics(new, &ort->dst); 1935 1936 rt->rt6i_idev = in6_dev_get(loopback_dev); 1937 rt->rt6i_gateway = ort->rt6i_gateway; 1938 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 1939 rt->rt6i_metric = 0; 1940 1941 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 1942 #ifdef CONFIG_IPV6_SUBTREES 1943 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 1944 #endif 1945 } 1946 1947 dst_release(dst_orig); 1948 return new ? new : ERR_PTR(-ENOMEM); 1949 } 1950 1951 /* 1952 * Destination cache support functions 1953 */ 1954 1955 static void rt6_dst_from_metrics_check(struct rt6_info *rt) 1956 { 1957 if (rt->from && 1958 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst)) 1959 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true); 1960 } 1961 1962 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 1963 { 1964 u32 rt_cookie = 0; 1965 1966 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) 1967 return NULL; 1968 1969 if (rt6_check_expired(rt)) 1970 return NULL; 1971 1972 return &rt->dst; 1973 } 1974 1975 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 1976 { 1977 if (!__rt6_check_expired(rt) && 1978 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 1979 rt6_check(rt->from, cookie)) 1980 return &rt->dst; 1981 else 1982 return NULL; 1983 } 1984 1985 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 1986 { 1987 struct rt6_info *rt; 1988 1989 rt = (struct rt6_info *) dst; 1990 1991 /* All IPV6 dsts are created with ->obsolete set to the value 1992 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 1993 * into this function always. 1994 */ 1995 1996 rt6_dst_from_metrics_check(rt); 1997 1998 if (rt->rt6i_flags & RTF_PCPU || 1999 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) 2000 return rt6_dst_from_check(rt, cookie); 2001 else 2002 return rt6_check(rt, cookie); 2003 } 2004 2005 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 2006 { 2007 struct rt6_info *rt = (struct rt6_info *) dst; 2008 2009 if (rt) { 2010 if (rt->rt6i_flags & RTF_CACHE) { 2011 if (rt6_check_expired(rt)) { 2012 ip6_del_rt(rt); 2013 dst = NULL; 2014 } 2015 } else { 2016 dst_release(dst); 2017 dst = NULL; 2018 } 2019 } 2020 return dst; 2021 } 2022 2023 static void ip6_link_failure(struct sk_buff *skb) 2024 { 2025 struct rt6_info *rt; 2026 2027 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 2028 2029 rt = (struct rt6_info *) skb_dst(skb); 2030 if (rt) { 2031 if (rt->rt6i_flags & RTF_CACHE) { 2032 if (dst_hold_safe(&rt->dst)) 2033 ip6_del_rt(rt); 2034 } else { 2035 struct fib6_node *fn; 2036 2037 rcu_read_lock(); 2038 fn = rcu_dereference(rt->rt6i_node); 2039 if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 2040 fn->fn_sernum = -1; 2041 rcu_read_unlock(); 2042 } 2043 } 2044 } 2045 2046 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 2047 { 2048 struct net *net = dev_net(rt->dst.dev); 2049 2050 rt->rt6i_flags |= RTF_MODIFIED; 2051 rt->rt6i_pmtu = mtu; 2052 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 2053 } 2054 2055 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) 2056 { 2057 return !(rt->rt6i_flags & RTF_CACHE) && 2058 (rt->rt6i_flags & RTF_PCPU || 2059 rcu_access_pointer(rt->rt6i_node)); 2060 } 2061 2062 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 2063 const struct ipv6hdr *iph, u32 mtu) 2064 { 2065 const struct in6_addr *daddr, *saddr; 2066 struct rt6_info *rt6 = (struct rt6_info *)dst; 2067 2068 if (rt6->rt6i_flags & RTF_LOCAL) 2069 return; 2070 2071 if (dst_metric_locked(dst, RTAX_MTU)) 2072 return; 2073 2074 if (iph) { 2075 daddr = &iph->daddr; 2076 saddr = &iph->saddr; 2077 } else if (sk) { 2078 daddr = &sk->sk_v6_daddr; 2079 saddr = &inet6_sk(sk)->saddr; 2080 } else { 2081 daddr = NULL; 2082 saddr = NULL; 2083 } 2084 dst_confirm_neigh(dst, daddr); 2085 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 2086 if (mtu >= dst_mtu(dst)) 2087 return; 2088 2089 if (!rt6_cache_allowed_for_pmtu(rt6)) { 2090 rt6_do_update_pmtu(rt6, mtu); 2091 /* update rt6_ex->stamp for cache */ 2092 if (rt6->rt6i_flags & RTF_CACHE) 2093 rt6_update_exception_stamp_rt(rt6); 2094 } else if (daddr) { 2095 struct rt6_info *nrt6; 2096 2097 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); 2098 if (nrt6) { 2099 rt6_do_update_pmtu(nrt6, mtu); 2100 if (rt6_insert_exception(nrt6, rt6)) 2101 dst_release_immediate(&nrt6->dst); 2102 } 2103 } 2104 } 2105 2106 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 2107 struct sk_buff *skb, u32 mtu) 2108 { 2109 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 2110 } 2111 2112 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 2113 int oif, u32 mark, kuid_t uid) 2114 { 2115 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 2116 struct dst_entry *dst; 2117 struct flowi6 fl6; 2118 2119 memset(&fl6, 0, sizeof(fl6)); 2120 fl6.flowi6_oif = oif; 2121 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); 2122 fl6.daddr = iph->daddr; 2123 fl6.saddr = iph->saddr; 2124 fl6.flowlabel = ip6_flowinfo(iph); 2125 fl6.flowi6_uid = uid; 2126 2127 dst = ip6_route_output(net, NULL, &fl6); 2128 if (!dst->error) 2129 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 2130 dst_release(dst); 2131 } 2132 EXPORT_SYMBOL_GPL(ip6_update_pmtu); 2133 2134 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 2135 { 2136 struct dst_entry *dst; 2137 2138 ip6_update_pmtu(skb, sock_net(sk), mtu, 2139 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid); 2140 2141 dst = __sk_dst_get(sk); 2142 if (!dst || !dst->obsolete || 2143 dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) 2144 return; 2145 2146 bh_lock_sock(sk); 2147 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 2148 ip6_datagram_dst_update(sk, false); 2149 bh_unlock_sock(sk); 2150 } 2151 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 2152 2153 /* Handle redirects */ 2154 struct ip6rd_flowi { 2155 struct flowi6 fl6; 2156 struct in6_addr gateway; 2157 }; 2158 2159 static struct rt6_info *__ip6_route_redirect(struct net *net, 2160 struct fib6_table *table, 2161 struct flowi6 *fl6, 2162 int flags) 2163 { 2164 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 2165 struct rt6_info *rt, *rt_cache; 2166 struct fib6_node *fn; 2167 2168 /* Get the "current" route for this destination and 2169 * check if the redirect has come from appropriate router. 2170 * 2171 * RFC 4861 specifies that redirects should only be 2172 * accepted if they come from the nexthop to the target. 2173 * Due to the way the routes are chosen, this notion 2174 * is a bit fuzzy and one might need to check all possible 2175 * routes. 2176 */ 2177 2178 rcu_read_lock(); 2179 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2180 restart: 2181 for_each_fib6_node_rt_rcu(fn) { 2182 if (rt->rt6i_nh_flags & RTNH_F_DEAD) 2183 continue; 2184 if (rt6_check_expired(rt)) 2185 continue; 2186 if (rt->dst.error) 2187 break; 2188 if (!(rt->rt6i_flags & RTF_GATEWAY)) 2189 continue; 2190 if (fl6->flowi6_oif != rt->dst.dev->ifindex) 2191 continue; 2192 /* rt_cache's gateway might be different from its 'parent' 2193 * in the case of an ip redirect. 2194 * So we keep searching in the exception table if the gateway 2195 * is different. 2196 */ 2197 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { 2198 rt_cache = rt6_find_cached_rt(rt, 2199 &fl6->daddr, 2200 &fl6->saddr); 2201 if (rt_cache && 2202 ipv6_addr_equal(&rdfl->gateway, 2203 &rt_cache->rt6i_gateway)) { 2204 rt = rt_cache; 2205 break; 2206 } 2207 continue; 2208 } 2209 break; 2210 } 2211 2212 if (!rt) 2213 rt = net->ipv6.ip6_null_entry; 2214 else if (rt->dst.error) { 2215 rt = net->ipv6.ip6_null_entry; 2216 goto out; 2217 } 2218 2219 if (rt == net->ipv6.ip6_null_entry) { 2220 fn = fib6_backtrack(fn, &fl6->saddr); 2221 if (fn) 2222 goto restart; 2223 } 2224 2225 out: 2226 ip6_hold_safe(net, &rt, true); 2227 2228 rcu_read_unlock(); 2229 2230 trace_fib6_table_lookup(net, rt, table, fl6); 2231 return rt; 2232 }; 2233 2234 static struct dst_entry *ip6_route_redirect(struct net *net, 2235 const struct flowi6 *fl6, 2236 const struct in6_addr *gateway) 2237 { 2238 int flags = RT6_LOOKUP_F_HAS_SADDR; 2239 struct ip6rd_flowi rdfl; 2240 2241 rdfl.fl6 = *fl6; 2242 rdfl.gateway = *gateway; 2243 2244 return fib6_rule_lookup(net, &rdfl.fl6, 2245 flags, __ip6_route_redirect); 2246 } 2247 2248 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, 2249 kuid_t uid) 2250 { 2251 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 2252 struct dst_entry *dst; 2253 struct flowi6 fl6; 2254 2255 memset(&fl6, 0, sizeof(fl6)); 2256 fl6.flowi6_iif = LOOPBACK_IFINDEX; 2257 fl6.flowi6_oif = oif; 2258 fl6.flowi6_mark = mark; 2259 fl6.daddr = iph->daddr; 2260 fl6.saddr = iph->saddr; 2261 fl6.flowlabel = ip6_flowinfo(iph); 2262 fl6.flowi6_uid = uid; 2263 2264 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr); 2265 rt6_do_redirect(dst, NULL, skb); 2266 dst_release(dst); 2267 } 2268 EXPORT_SYMBOL_GPL(ip6_redirect); 2269 2270 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, 2271 u32 mark) 2272 { 2273 const struct ipv6hdr *iph = ipv6_hdr(skb); 2274 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 2275 struct dst_entry *dst; 2276 struct flowi6 fl6; 2277 2278 memset(&fl6, 0, sizeof(fl6)); 2279 fl6.flowi6_iif = LOOPBACK_IFINDEX; 2280 fl6.flowi6_oif = oif; 2281 fl6.flowi6_mark = mark; 2282 fl6.daddr = msg->dest; 2283 fl6.saddr = iph->daddr; 2284 fl6.flowi6_uid = sock_net_uid(net, NULL); 2285 2286 dst = ip6_route_redirect(net, &fl6, &iph->saddr); 2287 rt6_do_redirect(dst, NULL, skb); 2288 dst_release(dst); 2289 } 2290 2291 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 2292 { 2293 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, 2294 sk->sk_uid); 2295 } 2296 EXPORT_SYMBOL_GPL(ip6_sk_redirect); 2297 2298 static unsigned int ip6_default_advmss(const struct dst_entry *dst) 2299 { 2300 struct net_device *dev = dst->dev; 2301 unsigned int mtu = dst_mtu(dst); 2302 struct net *net = dev_net(dev); 2303 2304 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 2305 2306 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 2307 mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 2308 2309 /* 2310 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 2311 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 2312 * IPV6_MAXPLEN is also valid and means: "any MSS, 2313 * rely only on pmtu discovery" 2314 */ 2315 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 2316 mtu = IPV6_MAXPLEN; 2317 return mtu; 2318 } 2319 2320 static unsigned int ip6_mtu(const struct dst_entry *dst) 2321 { 2322 const struct rt6_info *rt = (const struct rt6_info *)dst; 2323 unsigned int mtu = rt->rt6i_pmtu; 2324 struct inet6_dev *idev; 2325 2326 if (mtu) 2327 goto out; 2328 2329 mtu = dst_metric_raw(dst, RTAX_MTU); 2330 if (mtu) 2331 goto out; 2332 2333 mtu = IPV6_MIN_MTU; 2334 2335 rcu_read_lock(); 2336 idev = __in6_dev_get(dst->dev); 2337 if (idev) 2338 mtu = idev->cnf.mtu6; 2339 rcu_read_unlock(); 2340 2341 out: 2342 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 2343 2344 return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 2345 } 2346 2347 struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 2348 struct flowi6 *fl6) 2349 { 2350 struct dst_entry *dst; 2351 struct rt6_info *rt; 2352 struct inet6_dev *idev = in6_dev_get(dev); 2353 struct net *net = dev_net(dev); 2354 2355 if (unlikely(!idev)) 2356 return ERR_PTR(-ENODEV); 2357 2358 rt = ip6_dst_alloc(net, dev, 0); 2359 if (unlikely(!rt)) { 2360 in6_dev_put(idev); 2361 dst = ERR_PTR(-ENOMEM); 2362 goto out; 2363 } 2364 2365 rt->dst.flags |= DST_HOST; 2366 rt->dst.input = ip6_input; 2367 rt->dst.output = ip6_output; 2368 rt->rt6i_gateway = fl6->daddr; 2369 rt->rt6i_dst.addr = fl6->daddr; 2370 rt->rt6i_dst.plen = 128; 2371 rt->rt6i_idev = idev; 2372 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 2373 2374 /* Add this dst into uncached_list so that rt6_disable_ip() can 2375 * do proper release of the net_device 2376 */ 2377 rt6_uncached_list_add(rt); 2378 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 2379 2380 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 2381 2382 out: 2383 return dst; 2384 } 2385 2386 static int ip6_dst_gc(struct dst_ops *ops) 2387 { 2388 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 2389 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 2390 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 2391 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 2392 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 2393 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 2394 int entries; 2395 2396 entries = dst_entries_get_fast(ops); 2397 if (time_after(rt_last_gc + rt_min_interval, jiffies) && 2398 entries <= rt_max_size) 2399 goto out; 2400 2401 net->ipv6.ip6_rt_gc_expire++; 2402 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 2403 entries = dst_entries_get_slow(ops); 2404 if (entries < ops->gc_thresh) 2405 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 2406 out: 2407 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 2408 return entries > rt_max_size; 2409 } 2410 2411 static int ip6_convert_metrics(struct mx6_config *mxc, 2412 const struct fib6_config *cfg) 2413 { 2414 struct net *net = cfg->fc_nlinfo.nl_net; 2415 bool ecn_ca = false; 2416 struct nlattr *nla; 2417 int remaining; 2418 u32 *mp; 2419 2420 if (!cfg->fc_mx) 2421 return 0; 2422 2423 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); 2424 if (unlikely(!mp)) 2425 return -ENOMEM; 2426 2427 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { 2428 int type = nla_type(nla); 2429 u32 val; 2430 2431 if (!type) 2432 continue; 2433 if (unlikely(type > RTAX_MAX)) 2434 goto err; 2435 2436 if (type == RTAX_CC_ALGO) { 2437 char tmp[TCP_CA_NAME_MAX]; 2438 2439 nla_strlcpy(tmp, nla, sizeof(tmp)); 2440 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); 2441 if (val == TCP_CA_UNSPEC) 2442 goto err; 2443 } else { 2444 val = nla_get_u32(nla); 2445 } 2446 if (type == RTAX_HOPLIMIT && val > 255) 2447 val = 255; 2448 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK)) 2449 goto err; 2450 2451 mp[type - 1] = val; 2452 __set_bit(type - 1, mxc->mx_valid); 2453 } 2454 2455 if (ecn_ca) { 2456 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid); 2457 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA; 2458 } 2459 2460 mxc->mx = mp; 2461 return 0; 2462 err: 2463 kfree(mp); 2464 return -EINVAL; 2465 } 2466 2467 static struct rt6_info *ip6_nh_lookup_table(struct net *net, 2468 struct fib6_config *cfg, 2469 const struct in6_addr *gw_addr, 2470 u32 tbid, int flags) 2471 { 2472 struct flowi6 fl6 = { 2473 .flowi6_oif = cfg->fc_ifindex, 2474 .daddr = *gw_addr, 2475 .saddr = cfg->fc_prefsrc, 2476 }; 2477 struct fib6_table *table; 2478 struct rt6_info *rt; 2479 2480 table = fib6_get_table(net, tbid); 2481 if (!table) 2482 return NULL; 2483 2484 if (!ipv6_addr_any(&cfg->fc_prefsrc)) 2485 flags |= RT6_LOOKUP_F_HAS_SADDR; 2486 2487 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; 2488 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags); 2489 2490 /* if table lookup failed, fall back to full lookup */ 2491 if (rt == net->ipv6.ip6_null_entry) { 2492 ip6_rt_put(rt); 2493 rt = NULL; 2494 } 2495 2496 return rt; 2497 } 2498 2499 static int ip6_route_check_nh_onlink(struct net *net, 2500 struct fib6_config *cfg, 2501 struct net_device *dev, 2502 struct netlink_ext_ack *extack) 2503 { 2504 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; 2505 const struct in6_addr *gw_addr = &cfg->fc_gateway; 2506 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; 2507 struct rt6_info *grt; 2508 int err; 2509 2510 err = 0; 2511 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); 2512 if (grt) { 2513 if (!grt->dst.error && 2514 (grt->rt6i_flags & flags || dev != grt->dst.dev)) { 2515 NL_SET_ERR_MSG(extack, 2516 "Nexthop has invalid gateway or device mismatch"); 2517 err = -EINVAL; 2518 } 2519 2520 ip6_rt_put(grt); 2521 } 2522 2523 return err; 2524 } 2525 2526 static int ip6_route_check_nh(struct net *net, 2527 struct fib6_config *cfg, 2528 struct net_device **_dev, 2529 struct inet6_dev **idev) 2530 { 2531 const struct in6_addr *gw_addr = &cfg->fc_gateway; 2532 struct net_device *dev = _dev ? *_dev : NULL; 2533 struct rt6_info *grt = NULL; 2534 int err = -EHOSTUNREACH; 2535 2536 if (cfg->fc_table) { 2537 int flags = RT6_LOOKUP_F_IFACE; 2538 2539 grt = ip6_nh_lookup_table(net, cfg, gw_addr, 2540 cfg->fc_table, flags); 2541 if (grt) { 2542 if (grt->rt6i_flags & RTF_GATEWAY || 2543 (dev && dev != grt->dst.dev)) { 2544 ip6_rt_put(grt); 2545 grt = NULL; 2546 } 2547 } 2548 } 2549 2550 if (!grt) 2551 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 2552 2553 if (!grt) 2554 goto out; 2555 2556 if (dev) { 2557 if (dev != grt->dst.dev) { 2558 ip6_rt_put(grt); 2559 goto out; 2560 } 2561 } else { 2562 *_dev = dev = grt->dst.dev; 2563 *idev = grt->rt6i_idev; 2564 dev_hold(dev); 2565 in6_dev_hold(grt->rt6i_idev); 2566 } 2567 2568 if (!(grt->rt6i_flags & RTF_GATEWAY)) 2569 err = 0; 2570 2571 ip6_rt_put(grt); 2572 2573 out: 2574 return err; 2575 } 2576 2577 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, 2578 struct netlink_ext_ack *extack) 2579 { 2580 struct net *net = cfg->fc_nlinfo.nl_net; 2581 struct rt6_info *rt = NULL; 2582 struct net_device *dev = NULL; 2583 struct inet6_dev *idev = NULL; 2584 struct fib6_table *table; 2585 int addr_type; 2586 int err = -EINVAL; 2587 2588 /* RTF_PCPU is an internal flag; can not be set by userspace */ 2589 if (cfg->fc_flags & RTF_PCPU) { 2590 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); 2591 goto out; 2592 } 2593 2594 /* RTF_CACHE is an internal flag; can not be set by userspace */ 2595 if (cfg->fc_flags & RTF_CACHE) { 2596 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); 2597 goto out; 2598 } 2599 2600 if (cfg->fc_dst_len > 128) { 2601 NL_SET_ERR_MSG(extack, "Invalid prefix length"); 2602 goto out; 2603 } 2604 if (cfg->fc_src_len > 128) { 2605 NL_SET_ERR_MSG(extack, "Invalid source address length"); 2606 goto out; 2607 } 2608 #ifndef CONFIG_IPV6_SUBTREES 2609 if (cfg->fc_src_len) { 2610 NL_SET_ERR_MSG(extack, 2611 "Specifying source address requires IPV6_SUBTREES to be enabled"); 2612 goto out; 2613 } 2614 #endif 2615 if (cfg->fc_ifindex) { 2616 err = -ENODEV; 2617 dev = dev_get_by_index(net, cfg->fc_ifindex); 2618 if (!dev) 2619 goto out; 2620 idev = in6_dev_get(dev); 2621 if (!idev) 2622 goto out; 2623 } 2624 2625 if (cfg->fc_metric == 0) 2626 cfg->fc_metric = IP6_RT_PRIO_USER; 2627 2628 if (cfg->fc_flags & RTNH_F_ONLINK) { 2629 if (!dev) { 2630 NL_SET_ERR_MSG(extack, 2631 "Nexthop device required for onlink"); 2632 err = -ENODEV; 2633 goto out; 2634 } 2635 2636 if (!(dev->flags & IFF_UP)) { 2637 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 2638 err = -ENETDOWN; 2639 goto out; 2640 } 2641 } 2642 2643 err = -ENOBUFS; 2644 if (cfg->fc_nlinfo.nlh && 2645 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 2646 table = fib6_get_table(net, cfg->fc_table); 2647 if (!table) { 2648 pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 2649 table = fib6_new_table(net, cfg->fc_table); 2650 } 2651 } else { 2652 table = fib6_new_table(net, cfg->fc_table); 2653 } 2654 2655 if (!table) 2656 goto out; 2657 2658 rt = ip6_dst_alloc(net, NULL, 2659 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT); 2660 2661 if (!rt) { 2662 err = -ENOMEM; 2663 goto out; 2664 } 2665 2666 if (cfg->fc_flags & RTF_EXPIRES) 2667 rt6_set_expires(rt, jiffies + 2668 clock_t_to_jiffies(cfg->fc_expires)); 2669 else 2670 rt6_clean_expires(rt); 2671 2672 if (cfg->fc_protocol == RTPROT_UNSPEC) 2673 cfg->fc_protocol = RTPROT_BOOT; 2674 rt->rt6i_protocol = cfg->fc_protocol; 2675 2676 addr_type = ipv6_addr_type(&cfg->fc_dst); 2677 2678 if (addr_type & IPV6_ADDR_MULTICAST) 2679 rt->dst.input = ip6_mc_input; 2680 else if (cfg->fc_flags & RTF_LOCAL) 2681 rt->dst.input = ip6_input; 2682 else 2683 rt->dst.input = ip6_forward; 2684 2685 rt->dst.output = ip6_output; 2686 2687 if (cfg->fc_encap) { 2688 struct lwtunnel_state *lwtstate; 2689 2690 err = lwtunnel_build_state(cfg->fc_encap_type, 2691 cfg->fc_encap, AF_INET6, cfg, 2692 &lwtstate, extack); 2693 if (err) 2694 goto out; 2695 rt->dst.lwtstate = lwtstate_get(lwtstate); 2696 lwtunnel_set_redirect(&rt->dst); 2697 } 2698 2699 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 2700 rt->rt6i_dst.plen = cfg->fc_dst_len; 2701 if (rt->rt6i_dst.plen == 128) 2702 rt->dst.flags |= DST_HOST; 2703 2704 #ifdef CONFIG_IPV6_SUBTREES 2705 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); 2706 rt->rt6i_src.plen = cfg->fc_src_len; 2707 #endif 2708 2709 rt->rt6i_metric = cfg->fc_metric; 2710 rt->rt6i_nh_weight = 1; 2711 2712 /* We cannot add true routes via loopback here, 2713 they would result in kernel looping; promote them to reject routes 2714 */ 2715 if ((cfg->fc_flags & RTF_REJECT) || 2716 (dev && (dev->flags & IFF_LOOPBACK) && 2717 !(addr_type & IPV6_ADDR_LOOPBACK) && 2718 !(cfg->fc_flags & RTF_LOCAL))) { 2719 /* hold loopback dev/idev if we haven't done so. */ 2720 if (dev != net->loopback_dev) { 2721 if (dev) { 2722 dev_put(dev); 2723 in6_dev_put(idev); 2724 } 2725 dev = net->loopback_dev; 2726 dev_hold(dev); 2727 idev = in6_dev_get(dev); 2728 if (!idev) { 2729 err = -ENODEV; 2730 goto out; 2731 } 2732 } 2733 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; 2734 switch (cfg->fc_type) { 2735 case RTN_BLACKHOLE: 2736 rt->dst.error = -EINVAL; 2737 rt->dst.output = dst_discard_out; 2738 rt->dst.input = dst_discard; 2739 break; 2740 case RTN_PROHIBIT: 2741 rt->dst.error = -EACCES; 2742 rt->dst.output = ip6_pkt_prohibit_out; 2743 rt->dst.input = ip6_pkt_prohibit; 2744 break; 2745 case RTN_THROW: 2746 case RTN_UNREACHABLE: 2747 default: 2748 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN 2749 : (cfg->fc_type == RTN_UNREACHABLE) 2750 ? -EHOSTUNREACH : -ENETUNREACH; 2751 rt->dst.output = ip6_pkt_discard_out; 2752 rt->dst.input = ip6_pkt_discard; 2753 break; 2754 } 2755 goto install_route; 2756 } 2757 2758 if (cfg->fc_flags & RTF_GATEWAY) { 2759 const struct in6_addr *gw_addr; 2760 int gwa_type; 2761 2762 gw_addr = &cfg->fc_gateway; 2763 gwa_type = ipv6_addr_type(gw_addr); 2764 2765 /* if gw_addr is local we will fail to detect this in case 2766 * address is still TENTATIVE (DAD in progress). rt6_lookup() 2767 * will return already-added prefix route via interface that 2768 * prefix route was assigned to, which might be non-loopback. 2769 */ 2770 err = -EINVAL; 2771 if (ipv6_chk_addr_and_flags(net, gw_addr, 2772 gwa_type & IPV6_ADDR_LINKLOCAL ? 2773 dev : NULL, 0, 0)) { 2774 NL_SET_ERR_MSG(extack, "Invalid gateway address"); 2775 goto out; 2776 } 2777 rt->rt6i_gateway = *gw_addr; 2778 2779 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { 2780 /* IPv6 strictly inhibits using not link-local 2781 addresses as nexthop address. 2782 Otherwise, router will not able to send redirects. 2783 It is very good, but in some (rare!) circumstances 2784 (SIT, PtP, NBMA NOARP links) it is handy to allow 2785 some exceptions. --ANK 2786 We allow IPv4-mapped nexthops to support RFC4798-type 2787 addressing 2788 */ 2789 if (!(gwa_type & (IPV6_ADDR_UNICAST | 2790 IPV6_ADDR_MAPPED))) { 2791 NL_SET_ERR_MSG(extack, 2792 "Invalid gateway address"); 2793 goto out; 2794 } 2795 2796 if (cfg->fc_flags & RTNH_F_ONLINK) { 2797 err = ip6_route_check_nh_onlink(net, cfg, dev, 2798 extack); 2799 } else { 2800 err = ip6_route_check_nh(net, cfg, &dev, &idev); 2801 } 2802 if (err) 2803 goto out; 2804 } 2805 err = -EINVAL; 2806 if (!dev) { 2807 NL_SET_ERR_MSG(extack, "Egress device not specified"); 2808 goto out; 2809 } else if (dev->flags & IFF_LOOPBACK) { 2810 NL_SET_ERR_MSG(extack, 2811 "Egress device can not be loopback device for this route"); 2812 goto out; 2813 } 2814 } 2815 2816 err = -ENODEV; 2817 if (!dev) 2818 goto out; 2819 2820 if (!(dev->flags & IFF_UP)) { 2821 NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 2822 err = -ENETDOWN; 2823 goto out; 2824 } 2825 2826 if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 2827 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 2828 NL_SET_ERR_MSG(extack, "Invalid source address"); 2829 err = -EINVAL; 2830 goto out; 2831 } 2832 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc; 2833 rt->rt6i_prefsrc.plen = 128; 2834 } else 2835 rt->rt6i_prefsrc.plen = 0; 2836 2837 rt->rt6i_flags = cfg->fc_flags; 2838 2839 install_route: 2840 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) && 2841 !netif_carrier_ok(dev)) 2842 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; 2843 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); 2844 rt->dst.dev = dev; 2845 rt->rt6i_idev = idev; 2846 rt->rt6i_table = table; 2847 2848 cfg->fc_nlinfo.nl_net = dev_net(dev); 2849 2850 return rt; 2851 out: 2852 if (dev) 2853 dev_put(dev); 2854 if (idev) 2855 in6_dev_put(idev); 2856 if (rt) 2857 dst_release_immediate(&rt->dst); 2858 2859 return ERR_PTR(err); 2860 } 2861 2862 int ip6_route_add(struct fib6_config *cfg, 2863 struct netlink_ext_ack *extack) 2864 { 2865 struct mx6_config mxc = { .mx = NULL, }; 2866 struct rt6_info *rt; 2867 int err; 2868 2869 rt = ip6_route_info_create(cfg, extack); 2870 if (IS_ERR(rt)) { 2871 err = PTR_ERR(rt); 2872 rt = NULL; 2873 goto out; 2874 } 2875 2876 err = ip6_convert_metrics(&mxc, cfg); 2877 if (err) 2878 goto out; 2879 2880 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack); 2881 2882 kfree(mxc.mx); 2883 2884 return err; 2885 out: 2886 if (rt) 2887 dst_release_immediate(&rt->dst); 2888 2889 return err; 2890 } 2891 2892 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) 2893 { 2894 int err; 2895 struct fib6_table *table; 2896 struct net *net = dev_net(rt->dst.dev); 2897 2898 if (rt == net->ipv6.ip6_null_entry) { 2899 err = -ENOENT; 2900 goto out; 2901 } 2902 2903 table = rt->rt6i_table; 2904 spin_lock_bh(&table->tb6_lock); 2905 err = fib6_del(rt, info); 2906 spin_unlock_bh(&table->tb6_lock); 2907 2908 out: 2909 ip6_rt_put(rt); 2910 return err; 2911 } 2912 2913 int ip6_del_rt(struct rt6_info *rt) 2914 { 2915 struct nl_info info = { 2916 .nl_net = dev_net(rt->dst.dev), 2917 }; 2918 return __ip6_del_rt(rt, &info); 2919 } 2920 2921 static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) 2922 { 2923 struct nl_info *info = &cfg->fc_nlinfo; 2924 struct net *net = info->nl_net; 2925 struct sk_buff *skb = NULL; 2926 struct fib6_table *table; 2927 int err = -ENOENT; 2928 2929 if (rt == net->ipv6.ip6_null_entry) 2930 goto out_put; 2931 table = rt->rt6i_table; 2932 spin_lock_bh(&table->tb6_lock); 2933 2934 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { 2935 struct rt6_info *sibling, *next_sibling; 2936 2937 /* prefer to send a single notification with all hops */ 2938 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 2939 if (skb) { 2940 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 2941 2942 if (rt6_fill_node(net, skb, rt, 2943 NULL, NULL, 0, RTM_DELROUTE, 2944 info->portid, seq, 0) < 0) { 2945 kfree_skb(skb); 2946 skb = NULL; 2947 } else 2948 info->skip_notify = 1; 2949 } 2950 2951 list_for_each_entry_safe(sibling, next_sibling, 2952 &rt->rt6i_siblings, 2953 rt6i_siblings) { 2954 err = fib6_del(sibling, info); 2955 if (err) 2956 goto out_unlock; 2957 } 2958 } 2959 2960 err = fib6_del(rt, info); 2961 out_unlock: 2962 spin_unlock_bh(&table->tb6_lock); 2963 out_put: 2964 ip6_rt_put(rt); 2965 2966 if (skb) { 2967 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 2968 info->nlh, gfp_any()); 2969 } 2970 return err; 2971 } 2972 2973 static int ip6_route_del(struct fib6_config *cfg, 2974 struct netlink_ext_ack *extack) 2975 { 2976 struct rt6_info *rt, *rt_cache; 2977 struct fib6_table *table; 2978 struct fib6_node *fn; 2979 int err = -ESRCH; 2980 2981 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 2982 if (!table) { 2983 NL_SET_ERR_MSG(extack, "FIB table does not exist"); 2984 return err; 2985 } 2986 2987 rcu_read_lock(); 2988 2989 fn = fib6_locate(&table->tb6_root, 2990 &cfg->fc_dst, cfg->fc_dst_len, 2991 &cfg->fc_src, cfg->fc_src_len, 2992 !(cfg->fc_flags & RTF_CACHE)); 2993 2994 if (fn) { 2995 for_each_fib6_node_rt_rcu(fn) { 2996 if (cfg->fc_flags & RTF_CACHE) { 2997 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, 2998 &cfg->fc_src); 2999 if (!rt_cache) 3000 continue; 3001 rt = rt_cache; 3002 } 3003 if (cfg->fc_ifindex && 3004 (!rt->dst.dev || 3005 rt->dst.dev->ifindex != cfg->fc_ifindex)) 3006 continue; 3007 if (cfg->fc_flags & RTF_GATEWAY && 3008 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 3009 continue; 3010 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) 3011 continue; 3012 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) 3013 continue; 3014 if (!dst_hold_safe(&rt->dst)) 3015 break; 3016 rcu_read_unlock(); 3017 3018 /* if gateway was specified only delete the one hop */ 3019 if (cfg->fc_flags & RTF_GATEWAY) 3020 return __ip6_del_rt(rt, &cfg->fc_nlinfo); 3021 3022 return __ip6_del_rt_siblings(rt, cfg); 3023 } 3024 } 3025 rcu_read_unlock(); 3026 3027 return err; 3028 } 3029 3030 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 3031 { 3032 struct netevent_redirect netevent; 3033 struct rt6_info *rt, *nrt = NULL; 3034 struct ndisc_options ndopts; 3035 struct inet6_dev *in6_dev; 3036 struct neighbour *neigh; 3037 struct rd_msg *msg; 3038 int optlen, on_link; 3039 u8 *lladdr; 3040 3041 optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 3042 optlen -= sizeof(*msg); 3043 3044 if (optlen < 0) { 3045 net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 3046 return; 3047 } 3048 3049 msg = (struct rd_msg *)icmp6_hdr(skb); 3050 3051 if (ipv6_addr_is_multicast(&msg->dest)) { 3052 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 3053 return; 3054 } 3055 3056 on_link = 0; 3057 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 3058 on_link = 1; 3059 } else if (ipv6_addr_type(&msg->target) != 3060 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 3061 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 3062 return; 3063 } 3064 3065 in6_dev = __in6_dev_get(skb->dev); 3066 if (!in6_dev) 3067 return; 3068 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 3069 return; 3070 3071 /* RFC2461 8.1: 3072 * The IP source address of the Redirect MUST be the same as the current 3073 * first-hop router for the specified ICMP Destination Address. 3074 */ 3075 3076 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { 3077 net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 3078 return; 3079 } 3080 3081 lladdr = NULL; 3082 if (ndopts.nd_opts_tgt_lladdr) { 3083 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 3084 skb->dev); 3085 if (!lladdr) { 3086 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 3087 return; 3088 } 3089 } 3090 3091 rt = (struct rt6_info *) dst; 3092 if (rt->rt6i_flags & RTF_REJECT) { 3093 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 3094 return; 3095 } 3096 3097 /* Redirect received -> path was valid. 3098 * Look, redirects are sent only in response to data packets, 3099 * so that this nexthop apparently is reachable. --ANK 3100 */ 3101 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); 3102 3103 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 3104 if (!neigh) 3105 return; 3106 3107 /* 3108 * We have finally decided to accept it. 3109 */ 3110 3111 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, 3112 NEIGH_UPDATE_F_WEAK_OVERRIDE| 3113 NEIGH_UPDATE_F_OVERRIDE| 3114 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 3115 NEIGH_UPDATE_F_ISROUTER)), 3116 NDISC_REDIRECT, &ndopts); 3117 3118 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); 3119 if (!nrt) 3120 goto out; 3121 3122 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 3123 if (on_link) 3124 nrt->rt6i_flags &= ~RTF_GATEWAY; 3125 3126 nrt->rt6i_protocol = RTPROT_REDIRECT; 3127 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 3128 3129 /* No need to remove rt from the exception table if rt is 3130 * a cached route because rt6_insert_exception() will 3131 * takes care of it 3132 */ 3133 if (rt6_insert_exception(nrt, rt)) { 3134 dst_release_immediate(&nrt->dst); 3135 goto out; 3136 } 3137 3138 netevent.old = &rt->dst; 3139 netevent.new = &nrt->dst; 3140 netevent.daddr = &msg->dest; 3141 netevent.neigh = neigh; 3142 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 3143 3144 out: 3145 neigh_release(neigh); 3146 } 3147 3148 /* 3149 * Misc support functions 3150 */ 3151 3152 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) 3153 { 3154 BUG_ON(from->from); 3155 3156 rt->rt6i_flags &= ~RTF_EXPIRES; 3157 dst_hold(&from->dst); 3158 rt->from = from; 3159 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); 3160 } 3161 3162 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) 3163 { 3164 rt->dst.input = ort->dst.input; 3165 rt->dst.output = ort->dst.output; 3166 rt->rt6i_dst = ort->rt6i_dst; 3167 rt->dst.error = ort->dst.error; 3168 rt->rt6i_idev = ort->rt6i_idev; 3169 if (rt->rt6i_idev) 3170 in6_dev_hold(rt->rt6i_idev); 3171 rt->dst.lastuse = jiffies; 3172 rt->rt6i_gateway = ort->rt6i_gateway; 3173 rt->rt6i_flags = ort->rt6i_flags; 3174 rt6_set_from(rt, ort); 3175 rt->rt6i_metric = ort->rt6i_metric; 3176 #ifdef CONFIG_IPV6_SUBTREES 3177 rt->rt6i_src = ort->rt6i_src; 3178 #endif 3179 rt->rt6i_prefsrc = ort->rt6i_prefsrc; 3180 rt->rt6i_table = ort->rt6i_table; 3181 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate); 3182 } 3183 3184 #ifdef CONFIG_IPV6_ROUTE_INFO 3185 static struct rt6_info *rt6_get_route_info(struct net *net, 3186 const struct in6_addr *prefix, int prefixlen, 3187 const struct in6_addr *gwaddr, 3188 struct net_device *dev) 3189 { 3190 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; 3191 int ifindex = dev->ifindex; 3192 struct fib6_node *fn; 3193 struct rt6_info *rt = NULL; 3194 struct fib6_table *table; 3195 3196 table = fib6_get_table(net, tb_id); 3197 if (!table) 3198 return NULL; 3199 3200 rcu_read_lock(); 3201 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); 3202 if (!fn) 3203 goto out; 3204 3205 for_each_fib6_node_rt_rcu(fn) { 3206 if (rt->dst.dev->ifindex != ifindex) 3207 continue; 3208 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) 3209 continue; 3210 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) 3211 continue; 3212 ip6_hold_safe(NULL, &rt, false); 3213 break; 3214 } 3215 out: 3216 rcu_read_unlock(); 3217 return rt; 3218 } 3219 3220 static struct rt6_info *rt6_add_route_info(struct net *net, 3221 const struct in6_addr *prefix, int prefixlen, 3222 const struct in6_addr *gwaddr, 3223 struct net_device *dev, 3224 unsigned int pref) 3225 { 3226 struct fib6_config cfg = { 3227 .fc_metric = IP6_RT_PRIO_USER, 3228 .fc_ifindex = dev->ifindex, 3229 .fc_dst_len = prefixlen, 3230 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 3231 RTF_UP | RTF_PREF(pref), 3232 .fc_protocol = RTPROT_RA, 3233 .fc_nlinfo.portid = 0, 3234 .fc_nlinfo.nlh = NULL, 3235 .fc_nlinfo.nl_net = net, 3236 }; 3237 3238 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, 3239 cfg.fc_dst = *prefix; 3240 cfg.fc_gateway = *gwaddr; 3241 3242 /* We should treat it as a default route if prefix length is 0. */ 3243 if (!prefixlen) 3244 cfg.fc_flags |= RTF_DEFAULT; 3245 3246 ip6_route_add(&cfg, NULL); 3247 3248 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); 3249 } 3250 #endif 3251 3252 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev) 3253 { 3254 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; 3255 struct rt6_info *rt; 3256 struct fib6_table *table; 3257 3258 table = fib6_get_table(dev_net(dev), tb_id); 3259 if (!table) 3260 return NULL; 3261 3262 rcu_read_lock(); 3263 for_each_fib6_node_rt_rcu(&table->tb6_root) { 3264 if (dev == rt->dst.dev && 3265 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 3266 ipv6_addr_equal(&rt->rt6i_gateway, addr)) 3267 break; 3268 } 3269 if (rt) 3270 ip6_hold_safe(NULL, &rt, false); 3271 rcu_read_unlock(); 3272 return rt; 3273 } 3274 3275 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, 3276 struct net_device *dev, 3277 unsigned int pref) 3278 { 3279 struct fib6_config cfg = { 3280 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, 3281 .fc_metric = IP6_RT_PRIO_USER, 3282 .fc_ifindex = dev->ifindex, 3283 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 3284 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 3285 .fc_protocol = RTPROT_RA, 3286 .fc_nlinfo.portid = 0, 3287 .fc_nlinfo.nlh = NULL, 3288 .fc_nlinfo.nl_net = dev_net(dev), 3289 }; 3290 3291 cfg.fc_gateway = *gwaddr; 3292 3293 if (!ip6_route_add(&cfg, NULL)) { 3294 struct fib6_table *table; 3295 3296 table = fib6_get_table(dev_net(dev), cfg.fc_table); 3297 if (table) 3298 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; 3299 } 3300 3301 return rt6_get_dflt_router(gwaddr, dev); 3302 } 3303 3304 static void __rt6_purge_dflt_routers(struct fib6_table *table) 3305 { 3306 struct rt6_info *rt; 3307 3308 restart: 3309 rcu_read_lock(); 3310 for_each_fib6_node_rt_rcu(&table->tb6_root) { 3311 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 3312 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { 3313 if (dst_hold_safe(&rt->dst)) { 3314 rcu_read_unlock(); 3315 ip6_del_rt(rt); 3316 } else { 3317 rcu_read_unlock(); 3318 } 3319 goto restart; 3320 } 3321 } 3322 rcu_read_unlock(); 3323 3324 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 3325 } 3326 3327 void rt6_purge_dflt_routers(struct net *net) 3328 { 3329 struct fib6_table *table; 3330 struct hlist_head *head; 3331 unsigned int h; 3332 3333 rcu_read_lock(); 3334 3335 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 3336 head = &net->ipv6.fib_table_hash[h]; 3337 hlist_for_each_entry_rcu(table, head, tb6_hlist) { 3338 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) 3339 __rt6_purge_dflt_routers(table); 3340 } 3341 } 3342 3343 rcu_read_unlock(); 3344 } 3345 3346 static void rtmsg_to_fib6_config(struct net *net, 3347 struct in6_rtmsg *rtmsg, 3348 struct fib6_config *cfg) 3349 { 3350 memset(cfg, 0, sizeof(*cfg)); 3351 3352 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? 3353 : RT6_TABLE_MAIN; 3354 cfg->fc_ifindex = rtmsg->rtmsg_ifindex; 3355 cfg->fc_metric = rtmsg->rtmsg_metric; 3356 cfg->fc_expires = rtmsg->rtmsg_info; 3357 cfg->fc_dst_len = rtmsg->rtmsg_dst_len; 3358 cfg->fc_src_len = rtmsg->rtmsg_src_len; 3359 cfg->fc_flags = rtmsg->rtmsg_flags; 3360 3361 cfg->fc_nlinfo.nl_net = net; 3362 3363 cfg->fc_dst = rtmsg->rtmsg_dst; 3364 cfg->fc_src = rtmsg->rtmsg_src; 3365 cfg->fc_gateway = rtmsg->rtmsg_gateway; 3366 } 3367 3368 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 3369 { 3370 struct fib6_config cfg; 3371 struct in6_rtmsg rtmsg; 3372 int err; 3373 3374 switch (cmd) { 3375 case SIOCADDRT: /* Add a route */ 3376 case SIOCDELRT: /* Delete a route */ 3377 if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 3378 return -EPERM; 3379 err = copy_from_user(&rtmsg, arg, 3380 sizeof(struct in6_rtmsg)); 3381 if (err) 3382 return -EFAULT; 3383 3384 rtmsg_to_fib6_config(net, &rtmsg, &cfg); 3385 3386 rtnl_lock(); 3387 switch (cmd) { 3388 case SIOCADDRT: 3389 err = ip6_route_add(&cfg, NULL); 3390 break; 3391 case SIOCDELRT: 3392 err = ip6_route_del(&cfg, NULL); 3393 break; 3394 default: 3395 err = -EINVAL; 3396 } 3397 rtnl_unlock(); 3398 3399 return err; 3400 } 3401 3402 return -EINVAL; 3403 } 3404 3405 /* 3406 * Drop the packet on the floor 3407 */ 3408 3409 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 3410 { 3411 int type; 3412 struct dst_entry *dst = skb_dst(skb); 3413 switch (ipstats_mib_noroutes) { 3414 case IPSTATS_MIB_INNOROUTES: 3415 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 3416 if (type == IPV6_ADDR_ANY) { 3417 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 3418 IPSTATS_MIB_INADDRERRORS); 3419 break; 3420 } 3421 /* FALLTHROUGH */ 3422 case IPSTATS_MIB_OUTNOROUTES: 3423 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 3424 ipstats_mib_noroutes); 3425 break; 3426 } 3427 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 3428 kfree_skb(skb); 3429 return 0; 3430 } 3431 3432 static int ip6_pkt_discard(struct sk_buff *skb) 3433 { 3434 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 3435 } 3436 3437 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) 3438 { 3439 skb->dev = skb_dst(skb)->dev; 3440 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 3441 } 3442 3443 static int ip6_pkt_prohibit(struct sk_buff *skb) 3444 { 3445 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 3446 } 3447 3448 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) 3449 { 3450 skb->dev = skb_dst(skb)->dev; 3451 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 3452 } 3453 3454 /* 3455 * Allocate a dst for local (unicast / anycast) address. 3456 */ 3457 3458 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, 3459 const struct in6_addr *addr, 3460 bool anycast) 3461 { 3462 u32 tb_id; 3463 struct net *net = dev_net(idev->dev); 3464 struct net_device *dev = idev->dev; 3465 struct rt6_info *rt; 3466 3467 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); 3468 if (!rt) 3469 return ERR_PTR(-ENOMEM); 3470 3471 in6_dev_hold(idev); 3472 3473 rt->dst.flags |= DST_HOST; 3474 rt->dst.input = ip6_input; 3475 rt->dst.output = ip6_output; 3476 rt->rt6i_idev = idev; 3477 3478 rt->rt6i_protocol = RTPROT_KERNEL; 3479 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; 3480 if (anycast) 3481 rt->rt6i_flags |= RTF_ANYCAST; 3482 else 3483 rt->rt6i_flags |= RTF_LOCAL; 3484 3485 rt->rt6i_gateway = *addr; 3486 rt->rt6i_dst.addr = *addr; 3487 rt->rt6i_dst.plen = 128; 3488 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; 3489 rt->rt6i_table = fib6_get_table(net, tb_id); 3490 3491 return rt; 3492 } 3493 3494 /* remove deleted ip from prefsrc entries */ 3495 struct arg_dev_net_ip { 3496 struct net_device *dev; 3497 struct net *net; 3498 struct in6_addr *addr; 3499 }; 3500 3501 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) 3502 { 3503 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 3504 struct net *net = ((struct arg_dev_net_ip *)arg)->net; 3505 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 3506 3507 if (((void *)rt->dst.dev == dev || !dev) && 3508 rt != net->ipv6.ip6_null_entry && 3509 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { 3510 spin_lock_bh(&rt6_exception_lock); 3511 /* remove prefsrc entry */ 3512 rt->rt6i_prefsrc.plen = 0; 3513 /* need to update cache as well */ 3514 rt6_exceptions_remove_prefsrc(rt); 3515 spin_unlock_bh(&rt6_exception_lock); 3516 } 3517 return 0; 3518 } 3519 3520 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 3521 { 3522 struct net *net = dev_net(ifp->idev->dev); 3523 struct arg_dev_net_ip adni = { 3524 .dev = ifp->idev->dev, 3525 .net = net, 3526 .addr = &ifp->addr, 3527 }; 3528 fib6_clean_all(net, fib6_remove_prefsrc, &adni); 3529 } 3530 3531 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) 3532 3533 /* Remove routers and update dst entries when gateway turn into host. */ 3534 static int fib6_clean_tohost(struct rt6_info *rt, void *arg) 3535 { 3536 struct in6_addr *gateway = (struct in6_addr *)arg; 3537 3538 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && 3539 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { 3540 return -1; 3541 } 3542 3543 /* Further clean up cached routes in exception table. 3544 * This is needed because cached route may have a different 3545 * gateway than its 'parent' in the case of an ip redirect. 3546 */ 3547 rt6_exceptions_clean_tohost(rt, gateway); 3548 3549 return 0; 3550 } 3551 3552 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 3553 { 3554 fib6_clean_all(net, fib6_clean_tohost, gateway); 3555 } 3556 3557 struct arg_netdev_event { 3558 const struct net_device *dev; 3559 union { 3560 unsigned int nh_flags; 3561 unsigned long event; 3562 }; 3563 }; 3564 3565 static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt) 3566 { 3567 struct rt6_info *iter; 3568 struct fib6_node *fn; 3569 3570 fn = rcu_dereference_protected(rt->rt6i_node, 3571 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 3572 iter = rcu_dereference_protected(fn->leaf, 3573 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 3574 while (iter) { 3575 if (iter->rt6i_metric == rt->rt6i_metric && 3576 rt6_qualify_for_ecmp(iter)) 3577 return iter; 3578 iter = rcu_dereference_protected(iter->rt6_next, 3579 lockdep_is_held(&rt->rt6i_table->tb6_lock)); 3580 } 3581 3582 return NULL; 3583 } 3584 3585 static bool rt6_is_dead(const struct rt6_info *rt) 3586 { 3587 if (rt->rt6i_nh_flags & RTNH_F_DEAD || 3588 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN && 3589 rt->rt6i_idev->cnf.ignore_routes_with_linkdown)) 3590 return true; 3591 3592 return false; 3593 } 3594 3595 static int rt6_multipath_total_weight(const struct rt6_info *rt) 3596 { 3597 struct rt6_info *iter; 3598 int total = 0; 3599 3600 if (!rt6_is_dead(rt)) 3601 total += rt->rt6i_nh_weight; 3602 3603 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) { 3604 if (!rt6_is_dead(iter)) 3605 total += iter->rt6i_nh_weight; 3606 } 3607 3608 return total; 3609 } 3610 3611 static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total) 3612 { 3613 int upper_bound = -1; 3614 3615 if (!rt6_is_dead(rt)) { 3616 *weight += rt->rt6i_nh_weight; 3617 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, 3618 total) - 1; 3619 } 3620 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound); 3621 } 3622 3623 static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total) 3624 { 3625 struct rt6_info *iter; 3626 int weight = 0; 3627 3628 rt6_upper_bound_set(rt, &weight, total); 3629 3630 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3631 rt6_upper_bound_set(iter, &weight, total); 3632 } 3633 3634 void rt6_multipath_rebalance(struct rt6_info *rt) 3635 { 3636 struct rt6_info *first; 3637 int total; 3638 3639 /* In case the entire multipath route was marked for flushing, 3640 * then there is no need to rebalance upon the removal of every 3641 * sibling route. 3642 */ 3643 if (!rt->rt6i_nsiblings || rt->should_flush) 3644 return; 3645 3646 /* During lookup routes are evaluated in order, so we need to 3647 * make sure upper bounds are assigned from the first sibling 3648 * onwards. 3649 */ 3650 first = rt6_multipath_first_sibling(rt); 3651 if (WARN_ON_ONCE(!first)) 3652 return; 3653 3654 total = rt6_multipath_total_weight(first); 3655 rt6_multipath_upper_bound_set(first, total); 3656 } 3657 3658 static int fib6_ifup(struct rt6_info *rt, void *p_arg) 3659 { 3660 const struct arg_netdev_event *arg = p_arg; 3661 const struct net *net = dev_net(arg->dev); 3662 3663 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) { 3664 rt->rt6i_nh_flags &= ~arg->nh_flags; 3665 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt); 3666 rt6_multipath_rebalance(rt); 3667 } 3668 3669 return 0; 3670 } 3671 3672 void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) 3673 { 3674 struct arg_netdev_event arg = { 3675 .dev = dev, 3676 { 3677 .nh_flags = nh_flags, 3678 }, 3679 }; 3680 3681 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev)) 3682 arg.nh_flags |= RTNH_F_LINKDOWN; 3683 3684 fib6_clean_all(dev_net(dev), fib6_ifup, &arg); 3685 } 3686 3687 static bool rt6_multipath_uses_dev(const struct rt6_info *rt, 3688 const struct net_device *dev) 3689 { 3690 struct rt6_info *iter; 3691 3692 if (rt->dst.dev == dev) 3693 return true; 3694 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3695 if (iter->dst.dev == dev) 3696 return true; 3697 3698 return false; 3699 } 3700 3701 static void rt6_multipath_flush(struct rt6_info *rt) 3702 { 3703 struct rt6_info *iter; 3704 3705 rt->should_flush = 1; 3706 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3707 iter->should_flush = 1; 3708 } 3709 3710 static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt, 3711 const struct net_device *down_dev) 3712 { 3713 struct rt6_info *iter; 3714 unsigned int dead = 0; 3715 3716 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD) 3717 dead++; 3718 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3719 if (iter->dst.dev == down_dev || 3720 iter->rt6i_nh_flags & RTNH_F_DEAD) 3721 dead++; 3722 3723 return dead; 3724 } 3725 3726 static void rt6_multipath_nh_flags_set(struct rt6_info *rt, 3727 const struct net_device *dev, 3728 unsigned int nh_flags) 3729 { 3730 struct rt6_info *iter; 3731 3732 if (rt->dst.dev == dev) 3733 rt->rt6i_nh_flags |= nh_flags; 3734 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) 3735 if (iter->dst.dev == dev) 3736 iter->rt6i_nh_flags |= nh_flags; 3737 } 3738 3739 /* called with write lock held for table with rt */ 3740 static int fib6_ifdown(struct rt6_info *rt, void *p_arg) 3741 { 3742 const struct arg_netdev_event *arg = p_arg; 3743 const struct net_device *dev = arg->dev; 3744 const struct net *net = dev_net(dev); 3745 3746 if (rt == net->ipv6.ip6_null_entry) 3747 return 0; 3748 3749 switch (arg->event) { 3750 case NETDEV_UNREGISTER: 3751 return rt->dst.dev == dev ? -1 : 0; 3752 case NETDEV_DOWN: 3753 if (rt->should_flush) 3754 return -1; 3755 if (!rt->rt6i_nsiblings) 3756 return rt->dst.dev == dev ? -1 : 0; 3757 if (rt6_multipath_uses_dev(rt, dev)) { 3758 unsigned int count; 3759 3760 count = rt6_multipath_dead_count(rt, dev); 3761 if (rt->rt6i_nsiblings + 1 == count) { 3762 rt6_multipath_flush(rt); 3763 return -1; 3764 } 3765 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | 3766 RTNH_F_LINKDOWN); 3767 fib6_update_sernum(rt); 3768 rt6_multipath_rebalance(rt); 3769 } 3770 return -2; 3771 case NETDEV_CHANGE: 3772 if (rt->dst.dev != dev || 3773 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) 3774 break; 3775 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN; 3776 rt6_multipath_rebalance(rt); 3777 break; 3778 } 3779 3780 return 0; 3781 } 3782 3783 void rt6_sync_down_dev(struct net_device *dev, unsigned long event) 3784 { 3785 struct arg_netdev_event arg = { 3786 .dev = dev, 3787 { 3788 .event = event, 3789 }, 3790 }; 3791 3792 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg); 3793 } 3794 3795 void rt6_disable_ip(struct net_device *dev, unsigned long event) 3796 { 3797 rt6_sync_down_dev(dev, event); 3798 rt6_uncached_list_flush_dev(dev_net(dev), dev); 3799 neigh_ifdown(&nd_tbl, dev); 3800 } 3801 3802 struct rt6_mtu_change_arg { 3803 struct net_device *dev; 3804 unsigned int mtu; 3805 }; 3806 3807 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) 3808 { 3809 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 3810 struct inet6_dev *idev; 3811 3812 /* In IPv6 pmtu discovery is not optional, 3813 so that RTAX_MTU lock cannot disable it. 3814 We still use this lock to block changes 3815 caused by addrconf/ndisc. 3816 */ 3817 3818 idev = __in6_dev_get(arg->dev); 3819 if (!idev) 3820 return 0; 3821 3822 /* For administrative MTU increase, there is no way to discover 3823 IPv6 PMTU increase, so PMTU increase should be updated here. 3824 Since RFC 1981 doesn't include administrative MTU increase 3825 update PMTU increase is a MUST. (i.e. jumbo frame) 3826 */ 3827 /* 3828 If new MTU is less than route PMTU, this new MTU will be the 3829 lowest MTU in the path, update the route PMTU to reflect PMTU 3830 decreases; if new MTU is greater than route PMTU, and the 3831 old MTU is the lowest MTU in the path, update the route PMTU 3832 to reflect the increase. In this case if the other nodes' MTU 3833 also have the lowest MTU, TOO BIG MESSAGE will be lead to 3834 PMTU discovery. 3835 */ 3836 if (rt->dst.dev == arg->dev && 3837 dst_metric_raw(&rt->dst, RTAX_MTU) && 3838 !dst_metric_locked(&rt->dst, RTAX_MTU)) { 3839 spin_lock_bh(&rt6_exception_lock); 3840 if (dst_mtu(&rt->dst) >= arg->mtu || 3841 (dst_mtu(&rt->dst) < arg->mtu && 3842 dst_mtu(&rt->dst) == idev->cnf.mtu6)) { 3843 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 3844 } 3845 rt6_exceptions_update_pmtu(rt, arg->mtu); 3846 spin_unlock_bh(&rt6_exception_lock); 3847 } 3848 return 0; 3849 } 3850 3851 void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 3852 { 3853 struct rt6_mtu_change_arg arg = { 3854 .dev = dev, 3855 .mtu = mtu, 3856 }; 3857 3858 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 3859 } 3860 3861 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 3862 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 3863 [RTA_OIF] = { .type = NLA_U32 }, 3864 [RTA_IIF] = { .type = NLA_U32 }, 3865 [RTA_PRIORITY] = { .type = NLA_U32 }, 3866 [RTA_METRICS] = { .type = NLA_NESTED }, 3867 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 3868 [RTA_PREF] = { .type = NLA_U8 }, 3869 [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 3870 [RTA_ENCAP] = { .type = NLA_NESTED }, 3871 [RTA_EXPIRES] = { .type = NLA_U32 }, 3872 [RTA_UID] = { .type = NLA_U32 }, 3873 [RTA_MARK] = { .type = NLA_U32 }, 3874 }; 3875 3876 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 3877 struct fib6_config *cfg, 3878 struct netlink_ext_ack *extack) 3879 { 3880 struct rtmsg *rtm; 3881 struct nlattr *tb[RTA_MAX+1]; 3882 unsigned int pref; 3883 int err; 3884 3885 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 3886 NULL); 3887 if (err < 0) 3888 goto errout; 3889 3890 err = -EINVAL; 3891 rtm = nlmsg_data(nlh); 3892 memset(cfg, 0, sizeof(*cfg)); 3893 3894 cfg->fc_table = rtm->rtm_table; 3895 cfg->fc_dst_len = rtm->rtm_dst_len; 3896 cfg->fc_src_len = rtm->rtm_src_len; 3897 cfg->fc_flags = RTF_UP; 3898 cfg->fc_protocol = rtm->rtm_protocol; 3899 cfg->fc_type = rtm->rtm_type; 3900 3901 if (rtm->rtm_type == RTN_UNREACHABLE || 3902 rtm->rtm_type == RTN_BLACKHOLE || 3903 rtm->rtm_type == RTN_PROHIBIT || 3904 rtm->rtm_type == RTN_THROW) 3905 cfg->fc_flags |= RTF_REJECT; 3906 3907 if (rtm->rtm_type == RTN_LOCAL) 3908 cfg->fc_flags |= RTF_LOCAL; 3909 3910 if (rtm->rtm_flags & RTM_F_CLONED) 3911 cfg->fc_flags |= RTF_CACHE; 3912 3913 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); 3914 3915 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; 3916 cfg->fc_nlinfo.nlh = nlh; 3917 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 3918 3919 if (tb[RTA_GATEWAY]) { 3920 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 3921 cfg->fc_flags |= RTF_GATEWAY; 3922 } 3923 3924 if (tb[RTA_DST]) { 3925 int plen = (rtm->rtm_dst_len + 7) >> 3; 3926 3927 if (nla_len(tb[RTA_DST]) < plen) 3928 goto errout; 3929 3930 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 3931 } 3932 3933 if (tb[RTA_SRC]) { 3934 int plen = (rtm->rtm_src_len + 7) >> 3; 3935 3936 if (nla_len(tb[RTA_SRC]) < plen) 3937 goto errout; 3938 3939 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 3940 } 3941 3942 if (tb[RTA_PREFSRC]) 3943 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 3944 3945 if (tb[RTA_OIF]) 3946 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 3947 3948 if (tb[RTA_PRIORITY]) 3949 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 3950 3951 if (tb[RTA_METRICS]) { 3952 cfg->fc_mx = nla_data(tb[RTA_METRICS]); 3953 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 3954 } 3955 3956 if (tb[RTA_TABLE]) 3957 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 3958 3959 if (tb[RTA_MULTIPATH]) { 3960 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 3961 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 3962 3963 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, 3964 cfg->fc_mp_len, extack); 3965 if (err < 0) 3966 goto errout; 3967 } 3968 3969 if (tb[RTA_PREF]) { 3970 pref = nla_get_u8(tb[RTA_PREF]); 3971 if (pref != ICMPV6_ROUTER_PREF_LOW && 3972 pref != ICMPV6_ROUTER_PREF_HIGH) 3973 pref = ICMPV6_ROUTER_PREF_MEDIUM; 3974 cfg->fc_flags |= RTF_PREF(pref); 3975 } 3976 3977 if (tb[RTA_ENCAP]) 3978 cfg->fc_encap = tb[RTA_ENCAP]; 3979 3980 if (tb[RTA_ENCAP_TYPE]) { 3981 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 3982 3983 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); 3984 if (err < 0) 3985 goto errout; 3986 } 3987 3988 if (tb[RTA_EXPIRES]) { 3989 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); 3990 3991 if (addrconf_finite_timeout(timeout)) { 3992 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); 3993 cfg->fc_flags |= RTF_EXPIRES; 3994 } 3995 } 3996 3997 err = 0; 3998 errout: 3999 return err; 4000 } 4001 4002 struct rt6_nh { 4003 struct rt6_info *rt6_info; 4004 struct fib6_config r_cfg; 4005 struct mx6_config mxc; 4006 struct list_head next; 4007 }; 4008 4009 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list) 4010 { 4011 struct rt6_nh *nh; 4012 4013 list_for_each_entry(nh, rt6_nh_list, next) { 4014 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n", 4015 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway, 4016 nh->r_cfg.fc_ifindex); 4017 } 4018 } 4019 4020 static int ip6_route_info_append(struct list_head *rt6_nh_list, 4021 struct rt6_info *rt, struct fib6_config *r_cfg) 4022 { 4023 struct rt6_nh *nh; 4024 int err = -EEXIST; 4025 4026 list_for_each_entry(nh, rt6_nh_list, next) { 4027 /* check if rt6_info already exists */ 4028 if (rt6_duplicate_nexthop(nh->rt6_info, rt)) 4029 return err; 4030 } 4031 4032 nh = kzalloc(sizeof(*nh), GFP_KERNEL); 4033 if (!nh) 4034 return -ENOMEM; 4035 nh->rt6_info = rt; 4036 err = ip6_convert_metrics(&nh->mxc, r_cfg); 4037 if (err) { 4038 kfree(nh); 4039 return err; 4040 } 4041 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 4042 list_add_tail(&nh->next, rt6_nh_list); 4043 4044 return 0; 4045 } 4046 4047 static void ip6_route_mpath_notify(struct rt6_info *rt, 4048 struct rt6_info *rt_last, 4049 struct nl_info *info, 4050 __u16 nlflags) 4051 { 4052 /* if this is an APPEND route, then rt points to the first route 4053 * inserted and rt_last points to last route inserted. Userspace 4054 * wants a consistent dump of the route which starts at the first 4055 * nexthop. Since sibling routes are always added at the end of 4056 * the list, find the first sibling of the last route appended 4057 */ 4058 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) { 4059 rt = list_first_entry(&rt_last->rt6i_siblings, 4060 struct rt6_info, 4061 rt6i_siblings); 4062 } 4063 4064 if (rt) 4065 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 4066 } 4067 4068 static int ip6_route_multipath_add(struct fib6_config *cfg, 4069 struct netlink_ext_ack *extack) 4070 { 4071 struct rt6_info *rt_notif = NULL, *rt_last = NULL; 4072 struct nl_info *info = &cfg->fc_nlinfo; 4073 struct fib6_config r_cfg; 4074 struct rtnexthop *rtnh; 4075 struct rt6_info *rt; 4076 struct rt6_nh *err_nh; 4077 struct rt6_nh *nh, *nh_safe; 4078 __u16 nlflags; 4079 int remaining; 4080 int attrlen; 4081 int err = 1; 4082 int nhn = 0; 4083 int replace = (cfg->fc_nlinfo.nlh && 4084 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 4085 LIST_HEAD(rt6_nh_list); 4086 4087 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; 4088 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) 4089 nlflags |= NLM_F_APPEND; 4090 4091 remaining = cfg->fc_mp_len; 4092 rtnh = (struct rtnexthop *)cfg->fc_mp; 4093 4094 /* Parse a Multipath Entry and build a list (rt6_nh_list) of 4095 * rt6_info structs per nexthop 4096 */ 4097 while (rtnh_ok(rtnh, remaining)) { 4098 memcpy(&r_cfg, cfg, sizeof(*cfg)); 4099 if (rtnh->rtnh_ifindex) 4100 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 4101 4102 attrlen = rtnh_attrlen(rtnh); 4103 if (attrlen > 0) { 4104 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4105 4106 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 4107 if (nla) { 4108 r_cfg.fc_gateway = nla_get_in6_addr(nla); 4109 r_cfg.fc_flags |= RTF_GATEWAY; 4110 } 4111 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 4112 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 4113 if (nla) 4114 r_cfg.fc_encap_type = nla_get_u16(nla); 4115 } 4116 4117 rt = ip6_route_info_create(&r_cfg, extack); 4118 if (IS_ERR(rt)) { 4119 err = PTR_ERR(rt); 4120 rt = NULL; 4121 goto cleanup; 4122 } 4123 4124 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1; 4125 4126 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg); 4127 if (err) { 4128 dst_release_immediate(&rt->dst); 4129 goto cleanup; 4130 } 4131 4132 rtnh = rtnh_next(rtnh, &remaining); 4133 } 4134 4135 /* for add and replace send one notification with all nexthops. 4136 * Skip the notification in fib6_add_rt2node and send one with 4137 * the full route when done 4138 */ 4139 info->skip_notify = 1; 4140 4141 err_nh = NULL; 4142 list_for_each_entry(nh, &rt6_nh_list, next) { 4143 rt_last = nh->rt6_info; 4144 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack); 4145 /* save reference to first route for notification */ 4146 if (!rt_notif && !err) 4147 rt_notif = nh->rt6_info; 4148 4149 /* nh->rt6_info is used or freed at this point, reset to NULL*/ 4150 nh->rt6_info = NULL; 4151 if (err) { 4152 if (replace && nhn) 4153 ip6_print_replace_route_err(&rt6_nh_list); 4154 err_nh = nh; 4155 goto add_errout; 4156 } 4157 4158 /* Because each route is added like a single route we remove 4159 * these flags after the first nexthop: if there is a collision, 4160 * we have already failed to add the first nexthop: 4161 * fib6_add_rt2node() has rejected it; when replacing, old 4162 * nexthops have been replaced by first new, the rest should 4163 * be added to it. 4164 */ 4165 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 4166 NLM_F_REPLACE); 4167 nhn++; 4168 } 4169 4170 /* success ... tell user about new route */ 4171 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 4172 goto cleanup; 4173 4174 add_errout: 4175 /* send notification for routes that were added so that 4176 * the delete notifications sent by ip6_route_del are 4177 * coherent 4178 */ 4179 if (rt_notif) 4180 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 4181 4182 /* Delete routes that were already added */ 4183 list_for_each_entry(nh, &rt6_nh_list, next) { 4184 if (err_nh == nh) 4185 break; 4186 ip6_route_del(&nh->r_cfg, extack); 4187 } 4188 4189 cleanup: 4190 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 4191 if (nh->rt6_info) 4192 dst_release_immediate(&nh->rt6_info->dst); 4193 kfree(nh->mxc.mx); 4194 list_del(&nh->next); 4195 kfree(nh); 4196 } 4197 4198 return err; 4199 } 4200 4201 static int ip6_route_multipath_del(struct fib6_config *cfg, 4202 struct netlink_ext_ack *extack) 4203 { 4204 struct fib6_config r_cfg; 4205 struct rtnexthop *rtnh; 4206 int remaining; 4207 int attrlen; 4208 int err = 1, last_err = 0; 4209 4210 remaining = cfg->fc_mp_len; 4211 rtnh = (struct rtnexthop *)cfg->fc_mp; 4212 4213 /* Parse a Multipath Entry */ 4214 while (rtnh_ok(rtnh, remaining)) { 4215 memcpy(&r_cfg, cfg, sizeof(*cfg)); 4216 if (rtnh->rtnh_ifindex) 4217 r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 4218 4219 attrlen = rtnh_attrlen(rtnh); 4220 if (attrlen > 0) { 4221 struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 4222 4223 nla = nla_find(attrs, attrlen, RTA_GATEWAY); 4224 if (nla) { 4225 nla_memcpy(&r_cfg.fc_gateway, nla, 16); 4226 r_cfg.fc_flags |= RTF_GATEWAY; 4227 } 4228 } 4229 err = ip6_route_del(&r_cfg, extack); 4230 if (err) 4231 last_err = err; 4232 4233 rtnh = rtnh_next(rtnh, &remaining); 4234 } 4235 4236 return last_err; 4237 } 4238 4239 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 4240 struct netlink_ext_ack *extack) 4241 { 4242 struct fib6_config cfg; 4243 int err; 4244 4245 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 4246 if (err < 0) 4247 return err; 4248 4249 if (cfg.fc_mp) 4250 return ip6_route_multipath_del(&cfg, extack); 4251 else { 4252 cfg.fc_delete_all_nh = 1; 4253 return ip6_route_del(&cfg, extack); 4254 } 4255 } 4256 4257 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 4258 struct netlink_ext_ack *extack) 4259 { 4260 struct fib6_config cfg; 4261 int err; 4262 4263 err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 4264 if (err < 0) 4265 return err; 4266 4267 if (cfg.fc_mp) 4268 return ip6_route_multipath_add(&cfg, extack); 4269 else 4270 return ip6_route_add(&cfg, extack); 4271 } 4272 4273 static size_t rt6_nlmsg_size(struct rt6_info *rt) 4274 { 4275 int nexthop_len = 0; 4276 4277 if (rt->rt6i_nsiblings) { 4278 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ 4279 + NLA_ALIGN(sizeof(struct rtnexthop)) 4280 + nla_total_size(16) /* RTA_GATEWAY */ 4281 + lwtunnel_get_encap_size(rt->dst.lwtstate); 4282 4283 nexthop_len *= rt->rt6i_nsiblings; 4284 } 4285 4286 return NLMSG_ALIGN(sizeof(struct rtmsg)) 4287 + nla_total_size(16) /* RTA_SRC */ 4288 + nla_total_size(16) /* RTA_DST */ 4289 + nla_total_size(16) /* RTA_GATEWAY */ 4290 + nla_total_size(16) /* RTA_PREFSRC */ 4291 + nla_total_size(4) /* RTA_TABLE */ 4292 + nla_total_size(4) /* RTA_IIF */ 4293 + nla_total_size(4) /* RTA_OIF */ 4294 + nla_total_size(4) /* RTA_PRIORITY */ 4295 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 4296 + nla_total_size(sizeof(struct rta_cacheinfo)) 4297 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 4298 + nla_total_size(1) /* RTA_PREF */ 4299 + lwtunnel_get_encap_size(rt->dst.lwtstate) 4300 + nexthop_len; 4301 } 4302 4303 static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, 4304 unsigned int *flags, bool skip_oif) 4305 { 4306 if (rt->rt6i_nh_flags & RTNH_F_DEAD) 4307 *flags |= RTNH_F_DEAD; 4308 4309 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) { 4310 *flags |= RTNH_F_LINKDOWN; 4311 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown) 4312 *flags |= RTNH_F_DEAD; 4313 } 4314 4315 if (rt->rt6i_flags & RTF_GATEWAY) { 4316 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) 4317 goto nla_put_failure; 4318 } 4319 4320 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK); 4321 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) 4322 *flags |= RTNH_F_OFFLOAD; 4323 4324 /* not needed for multipath encoding b/c it has a rtnexthop struct */ 4325 if (!skip_oif && rt->dst.dev && 4326 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) 4327 goto nla_put_failure; 4328 4329 if (rt->dst.lwtstate && 4330 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0) 4331 goto nla_put_failure; 4332 4333 return 0; 4334 4335 nla_put_failure: 4336 return -EMSGSIZE; 4337 } 4338 4339 /* add multipath next hop */ 4340 static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt) 4341 { 4342 struct rtnexthop *rtnh; 4343 unsigned int flags = 0; 4344 4345 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); 4346 if (!rtnh) 4347 goto nla_put_failure; 4348 4349 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1; 4350 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0; 4351 4352 if (rt6_nexthop_info(skb, rt, &flags, true) < 0) 4353 goto nla_put_failure; 4354 4355 rtnh->rtnh_flags = flags; 4356 4357 /* length of rtnetlink header + attributes */ 4358 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; 4359 4360 return 0; 4361 4362 nla_put_failure: 4363 return -EMSGSIZE; 4364 } 4365 4366 static int rt6_fill_node(struct net *net, 4367 struct sk_buff *skb, struct rt6_info *rt, 4368 struct in6_addr *dst, struct in6_addr *src, 4369 int iif, int type, u32 portid, u32 seq, 4370 unsigned int flags) 4371 { 4372 u32 metrics[RTAX_MAX]; 4373 struct rtmsg *rtm; 4374 struct nlmsghdr *nlh; 4375 long expires; 4376 u32 table; 4377 4378 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 4379 if (!nlh) 4380 return -EMSGSIZE; 4381 4382 rtm = nlmsg_data(nlh); 4383 rtm->rtm_family = AF_INET6; 4384 rtm->rtm_dst_len = rt->rt6i_dst.plen; 4385 rtm->rtm_src_len = rt->rt6i_src.plen; 4386 rtm->rtm_tos = 0; 4387 if (rt->rt6i_table) 4388 table = rt->rt6i_table->tb6_id; 4389 else 4390 table = RT6_TABLE_UNSPEC; 4391 rtm->rtm_table = table; 4392 if (nla_put_u32(skb, RTA_TABLE, table)) 4393 goto nla_put_failure; 4394 if (rt->rt6i_flags & RTF_REJECT) { 4395 switch (rt->dst.error) { 4396 case -EINVAL: 4397 rtm->rtm_type = RTN_BLACKHOLE; 4398 break; 4399 case -EACCES: 4400 rtm->rtm_type = RTN_PROHIBIT; 4401 break; 4402 case -EAGAIN: 4403 rtm->rtm_type = RTN_THROW; 4404 break; 4405 default: 4406 rtm->rtm_type = RTN_UNREACHABLE; 4407 break; 4408 } 4409 } 4410 else if (rt->rt6i_flags & RTF_LOCAL) 4411 rtm->rtm_type = RTN_LOCAL; 4412 else if (rt->rt6i_flags & RTF_ANYCAST) 4413 rtm->rtm_type = RTN_ANYCAST; 4414 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) 4415 rtm->rtm_type = RTN_LOCAL; 4416 else 4417 rtm->rtm_type = RTN_UNICAST; 4418 rtm->rtm_flags = 0; 4419 rtm->rtm_scope = RT_SCOPE_UNIVERSE; 4420 rtm->rtm_protocol = rt->rt6i_protocol; 4421 4422 if (rt->rt6i_flags & RTF_CACHE) 4423 rtm->rtm_flags |= RTM_F_CLONED; 4424 4425 if (dst) { 4426 if (nla_put_in6_addr(skb, RTA_DST, dst)) 4427 goto nla_put_failure; 4428 rtm->rtm_dst_len = 128; 4429 } else if (rtm->rtm_dst_len) 4430 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) 4431 goto nla_put_failure; 4432 #ifdef CONFIG_IPV6_SUBTREES 4433 if (src) { 4434 if (nla_put_in6_addr(skb, RTA_SRC, src)) 4435 goto nla_put_failure; 4436 rtm->rtm_src_len = 128; 4437 } else if (rtm->rtm_src_len && 4438 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) 4439 goto nla_put_failure; 4440 #endif 4441 if (iif) { 4442 #ifdef CONFIG_IPV6_MROUTE 4443 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) { 4444 int err = ip6mr_get_route(net, skb, rtm, portid); 4445 4446 if (err == 0) 4447 return 0; 4448 if (err < 0) 4449 goto nla_put_failure; 4450 } else 4451 #endif 4452 if (nla_put_u32(skb, RTA_IIF, iif)) 4453 goto nla_put_failure; 4454 } else if (dst) { 4455 struct in6_addr saddr_buf; 4456 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && 4457 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 4458 goto nla_put_failure; 4459 } 4460 4461 if (rt->rt6i_prefsrc.plen) { 4462 struct in6_addr saddr_buf; 4463 saddr_buf = rt->rt6i_prefsrc.addr; 4464 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 4465 goto nla_put_failure; 4466 } 4467 4468 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); 4469 if (rt->rt6i_pmtu) 4470 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; 4471 if (rtnetlink_put_metrics(skb, metrics) < 0) 4472 goto nla_put_failure; 4473 4474 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) 4475 goto nla_put_failure; 4476 4477 /* For multipath routes, walk the siblings list and add 4478 * each as a nexthop within RTA_MULTIPATH. 4479 */ 4480 if (rt->rt6i_nsiblings) { 4481 struct rt6_info *sibling, *next_sibling; 4482 struct nlattr *mp; 4483 4484 mp = nla_nest_start(skb, RTA_MULTIPATH); 4485 if (!mp) 4486 goto nla_put_failure; 4487 4488 if (rt6_add_nexthop(skb, rt) < 0) 4489 goto nla_put_failure; 4490 4491 list_for_each_entry_safe(sibling, next_sibling, 4492 &rt->rt6i_siblings, rt6i_siblings) { 4493 if (rt6_add_nexthop(skb, sibling) < 0) 4494 goto nla_put_failure; 4495 } 4496 4497 nla_nest_end(skb, mp); 4498 } else { 4499 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) 4500 goto nla_put_failure; 4501 } 4502 4503 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; 4504 4505 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) 4506 goto nla_put_failure; 4507 4508 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) 4509 goto nla_put_failure; 4510 4511 4512 nlmsg_end(skb, nlh); 4513 return 0; 4514 4515 nla_put_failure: 4516 nlmsg_cancel(skb, nlh); 4517 return -EMSGSIZE; 4518 } 4519 4520 int rt6_dump_route(struct rt6_info *rt, void *p_arg) 4521 { 4522 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 4523 struct net *net = arg->net; 4524 4525 if (rt == net->ipv6.ip6_null_entry) 4526 return 0; 4527 4528 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { 4529 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); 4530 4531 /* user wants prefix routes only */ 4532 if (rtm->rtm_flags & RTM_F_PREFIX && 4533 !(rt->rt6i_flags & RTF_PREFIX_RT)) { 4534 /* success since this is not a prefix route */ 4535 return 1; 4536 } 4537 } 4538 4539 return rt6_fill_node(net, 4540 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, 4541 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, 4542 NLM_F_MULTI); 4543 } 4544 4545 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 4546 struct netlink_ext_ack *extack) 4547 { 4548 struct net *net = sock_net(in_skb->sk); 4549 struct nlattr *tb[RTA_MAX+1]; 4550 int err, iif = 0, oif = 0; 4551 struct dst_entry *dst; 4552 struct rt6_info *rt; 4553 struct sk_buff *skb; 4554 struct rtmsg *rtm; 4555 struct flowi6 fl6; 4556 bool fibmatch; 4557 4558 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 4559 extack); 4560 if (err < 0) 4561 goto errout; 4562 4563 err = -EINVAL; 4564 memset(&fl6, 0, sizeof(fl6)); 4565 rtm = nlmsg_data(nlh); 4566 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); 4567 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); 4568 4569 if (tb[RTA_SRC]) { 4570 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 4571 goto errout; 4572 4573 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 4574 } 4575 4576 if (tb[RTA_DST]) { 4577 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 4578 goto errout; 4579 4580 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 4581 } 4582 4583 if (tb[RTA_IIF]) 4584 iif = nla_get_u32(tb[RTA_IIF]); 4585 4586 if (tb[RTA_OIF]) 4587 oif = nla_get_u32(tb[RTA_OIF]); 4588 4589 if (tb[RTA_MARK]) 4590 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 4591 4592 if (tb[RTA_UID]) 4593 fl6.flowi6_uid = make_kuid(current_user_ns(), 4594 nla_get_u32(tb[RTA_UID])); 4595 else 4596 fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); 4597 4598 if (iif) { 4599 struct net_device *dev; 4600 int flags = 0; 4601 4602 rcu_read_lock(); 4603 4604 dev = dev_get_by_index_rcu(net, iif); 4605 if (!dev) { 4606 rcu_read_unlock(); 4607 err = -ENODEV; 4608 goto errout; 4609 } 4610 4611 fl6.flowi6_iif = iif; 4612 4613 if (!ipv6_addr_any(&fl6.saddr)) 4614 flags |= RT6_LOOKUP_F_HAS_SADDR; 4615 4616 dst = ip6_route_input_lookup(net, dev, &fl6, flags); 4617 4618 rcu_read_unlock(); 4619 } else { 4620 fl6.flowi6_oif = oif; 4621 4622 dst = ip6_route_output(net, NULL, &fl6); 4623 } 4624 4625 4626 rt = container_of(dst, struct rt6_info, dst); 4627 if (rt->dst.error) { 4628 err = rt->dst.error; 4629 ip6_rt_put(rt); 4630 goto errout; 4631 } 4632 4633 if (rt == net->ipv6.ip6_null_entry) { 4634 err = rt->dst.error; 4635 ip6_rt_put(rt); 4636 goto errout; 4637 } 4638 4639 if (fibmatch && rt->from) { 4640 struct rt6_info *ort = rt->from; 4641 4642 dst_hold(&ort->dst); 4643 ip6_rt_put(rt); 4644 rt = ort; 4645 } 4646 4647 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 4648 if (!skb) { 4649 ip6_rt_put(rt); 4650 err = -ENOBUFS; 4651 goto errout; 4652 } 4653 4654 skb_dst_set(skb, &rt->dst); 4655 if (fibmatch) 4656 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif, 4657 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 4658 nlh->nlmsg_seq, 0); 4659 else 4660 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, 4661 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 4662 nlh->nlmsg_seq, 0); 4663 if (err < 0) { 4664 kfree_skb(skb); 4665 goto errout; 4666 } 4667 4668 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 4669 errout: 4670 return err; 4671 } 4672 4673 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, 4674 unsigned int nlm_flags) 4675 { 4676 struct sk_buff *skb; 4677 struct net *net = info->nl_net; 4678 u32 seq; 4679 int err; 4680 4681 err = -ENOBUFS; 4682 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 4683 4684 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 4685 if (!skb) 4686 goto errout; 4687 4688 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, 4689 event, info->portid, seq, nlm_flags); 4690 if (err < 0) { 4691 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 4692 WARN_ON(err == -EMSGSIZE); 4693 kfree_skb(skb); 4694 goto errout; 4695 } 4696 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 4697 info->nlh, gfp_any()); 4698 return; 4699 errout: 4700 if (err < 0) 4701 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 4702 } 4703 4704 static int ip6_route_dev_notify(struct notifier_block *this, 4705 unsigned long event, void *ptr) 4706 { 4707 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 4708 struct net *net = dev_net(dev); 4709 4710 if (!(dev->flags & IFF_LOOPBACK)) 4711 return NOTIFY_OK; 4712 4713 if (event == NETDEV_REGISTER) { 4714 net->ipv6.ip6_null_entry->dst.dev = dev; 4715 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 4716 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4717 net->ipv6.ip6_prohibit_entry->dst.dev = dev; 4718 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 4719 net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 4720 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 4721 #endif 4722 } else if (event == NETDEV_UNREGISTER && 4723 dev->reg_state != NETREG_UNREGISTERED) { 4724 /* NETDEV_UNREGISTER could be fired for multiple times by 4725 * netdev_wait_allrefs(). Make sure we only call this once. 4726 */ 4727 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); 4728 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4729 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); 4730 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); 4731 #endif 4732 } 4733 4734 return NOTIFY_OK; 4735 } 4736 4737 /* 4738 * /proc 4739 */ 4740 4741 #ifdef CONFIG_PROC_FS 4742 4743 static const struct file_operations ipv6_route_proc_fops = { 4744 .open = ipv6_route_open, 4745 .read = seq_read, 4746 .llseek = seq_lseek, 4747 .release = seq_release_net, 4748 }; 4749 4750 static int rt6_stats_seq_show(struct seq_file *seq, void *v) 4751 { 4752 struct net *net = (struct net *)seq->private; 4753 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 4754 net->ipv6.rt6_stats->fib_nodes, 4755 net->ipv6.rt6_stats->fib_route_nodes, 4756 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc), 4757 net->ipv6.rt6_stats->fib_rt_entries, 4758 net->ipv6.rt6_stats->fib_rt_cache, 4759 dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 4760 net->ipv6.rt6_stats->fib_discarded_routes); 4761 4762 return 0; 4763 } 4764 4765 static int rt6_stats_seq_open(struct inode *inode, struct file *file) 4766 { 4767 return single_open_net(inode, file, rt6_stats_seq_show); 4768 } 4769 4770 static const struct file_operations rt6_stats_seq_fops = { 4771 .open = rt6_stats_seq_open, 4772 .read = seq_read, 4773 .llseek = seq_lseek, 4774 .release = single_release_net, 4775 }; 4776 #endif /* CONFIG_PROC_FS */ 4777 4778 #ifdef CONFIG_SYSCTL 4779 4780 static 4781 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 4782 void __user *buffer, size_t *lenp, loff_t *ppos) 4783 { 4784 struct net *net; 4785 int delay; 4786 if (!write) 4787 return -EINVAL; 4788 4789 net = (struct net *)ctl->extra1; 4790 delay = net->ipv6.sysctl.flush_delay; 4791 proc_dointvec(ctl, write, buffer, lenp, ppos); 4792 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 4793 return 0; 4794 } 4795 4796 struct ctl_table ipv6_route_table_template[] = { 4797 { 4798 .procname = "flush", 4799 .data = &init_net.ipv6.sysctl.flush_delay, 4800 .maxlen = sizeof(int), 4801 .mode = 0200, 4802 .proc_handler = ipv6_sysctl_rtcache_flush 4803 }, 4804 { 4805 .procname = "gc_thresh", 4806 .data = &ip6_dst_ops_template.gc_thresh, 4807 .maxlen = sizeof(int), 4808 .mode = 0644, 4809 .proc_handler = proc_dointvec, 4810 }, 4811 { 4812 .procname = "max_size", 4813 .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 4814 .maxlen = sizeof(int), 4815 .mode = 0644, 4816 .proc_handler = proc_dointvec, 4817 }, 4818 { 4819 .procname = "gc_min_interval", 4820 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 4821 .maxlen = sizeof(int), 4822 .mode = 0644, 4823 .proc_handler = proc_dointvec_jiffies, 4824 }, 4825 { 4826 .procname = "gc_timeout", 4827 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 4828 .maxlen = sizeof(int), 4829 .mode = 0644, 4830 .proc_handler = proc_dointvec_jiffies, 4831 }, 4832 { 4833 .procname = "gc_interval", 4834 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 4835 .maxlen = sizeof(int), 4836 .mode = 0644, 4837 .proc_handler = proc_dointvec_jiffies, 4838 }, 4839 { 4840 .procname = "gc_elasticity", 4841 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 4842 .maxlen = sizeof(int), 4843 .mode = 0644, 4844 .proc_handler = proc_dointvec, 4845 }, 4846 { 4847 .procname = "mtu_expires", 4848 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 4849 .maxlen = sizeof(int), 4850 .mode = 0644, 4851 .proc_handler = proc_dointvec_jiffies, 4852 }, 4853 { 4854 .procname = "min_adv_mss", 4855 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 4856 .maxlen = sizeof(int), 4857 .mode = 0644, 4858 .proc_handler = proc_dointvec, 4859 }, 4860 { 4861 .procname = "gc_min_interval_ms", 4862 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 4863 .maxlen = sizeof(int), 4864 .mode = 0644, 4865 .proc_handler = proc_dointvec_ms_jiffies, 4866 }, 4867 { } 4868 }; 4869 4870 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 4871 { 4872 struct ctl_table *table; 4873 4874 table = kmemdup(ipv6_route_table_template, 4875 sizeof(ipv6_route_table_template), 4876 GFP_KERNEL); 4877 4878 if (table) { 4879 table[0].data = &net->ipv6.sysctl.flush_delay; 4880 table[0].extra1 = net; 4881 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 4882 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 4883 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 4884 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 4885 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 4886 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 4887 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 4888 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 4889 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 4890 4891 /* Don't export sysctls to unprivileged users */ 4892 if (net->user_ns != &init_user_ns) 4893 table[0].procname = NULL; 4894 } 4895 4896 return table; 4897 } 4898 #endif 4899 4900 static int __net_init ip6_route_net_init(struct net *net) 4901 { 4902 int ret = -ENOMEM; 4903 4904 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 4905 sizeof(net->ipv6.ip6_dst_ops)); 4906 4907 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 4908 goto out_ip6_dst_ops; 4909 4910 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 4911 sizeof(*net->ipv6.ip6_null_entry), 4912 GFP_KERNEL); 4913 if (!net->ipv6.ip6_null_entry) 4914 goto out_ip6_dst_entries; 4915 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4916 dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 4917 ip6_template_metrics, true); 4918 4919 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4920 net->ipv6.fib6_has_custom_rules = false; 4921 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 4922 sizeof(*net->ipv6.ip6_prohibit_entry), 4923 GFP_KERNEL); 4924 if (!net->ipv6.ip6_prohibit_entry) 4925 goto out_ip6_null_entry; 4926 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4927 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 4928 ip6_template_metrics, true); 4929 4930 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 4931 sizeof(*net->ipv6.ip6_blk_hole_entry), 4932 GFP_KERNEL); 4933 if (!net->ipv6.ip6_blk_hole_entry) 4934 goto out_ip6_prohibit_entry; 4935 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 4936 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 4937 ip6_template_metrics, true); 4938 #endif 4939 4940 net->ipv6.sysctl.flush_delay = 0; 4941 net->ipv6.sysctl.ip6_rt_max_size = 4096; 4942 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 4943 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 4944 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 4945 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 4946 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 4947 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 4948 4949 net->ipv6.ip6_rt_gc_expire = 30*HZ; 4950 4951 ret = 0; 4952 out: 4953 return ret; 4954 4955 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4956 out_ip6_prohibit_entry: 4957 kfree(net->ipv6.ip6_prohibit_entry); 4958 out_ip6_null_entry: 4959 kfree(net->ipv6.ip6_null_entry); 4960 #endif 4961 out_ip6_dst_entries: 4962 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 4963 out_ip6_dst_ops: 4964 goto out; 4965 } 4966 4967 static void __net_exit ip6_route_net_exit(struct net *net) 4968 { 4969 kfree(net->ipv6.ip6_null_entry); 4970 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 4971 kfree(net->ipv6.ip6_prohibit_entry); 4972 kfree(net->ipv6.ip6_blk_hole_entry); 4973 #endif 4974 dst_entries_destroy(&net->ipv6.ip6_dst_ops); 4975 } 4976 4977 static int __net_init ip6_route_net_init_late(struct net *net) 4978 { 4979 #ifdef CONFIG_PROC_FS 4980 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops); 4981 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops); 4982 #endif 4983 return 0; 4984 } 4985 4986 static void __net_exit ip6_route_net_exit_late(struct net *net) 4987 { 4988 #ifdef CONFIG_PROC_FS 4989 remove_proc_entry("ipv6_route", net->proc_net); 4990 remove_proc_entry("rt6_stats", net->proc_net); 4991 #endif 4992 } 4993 4994 static struct pernet_operations ip6_route_net_ops = { 4995 .init = ip6_route_net_init, 4996 .exit = ip6_route_net_exit, 4997 .async = true, 4998 }; 4999 5000 static int __net_init ipv6_inetpeer_init(struct net *net) 5001 { 5002 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 5003 5004 if (!bp) 5005 return -ENOMEM; 5006 inet_peer_base_init(bp); 5007 net->ipv6.peers = bp; 5008 return 0; 5009 } 5010 5011 static void __net_exit ipv6_inetpeer_exit(struct net *net) 5012 { 5013 struct inet_peer_base *bp = net->ipv6.peers; 5014 5015 net->ipv6.peers = NULL; 5016 inetpeer_invalidate_tree(bp); 5017 kfree(bp); 5018 } 5019 5020 static struct pernet_operations ipv6_inetpeer_ops = { 5021 .init = ipv6_inetpeer_init, 5022 .exit = ipv6_inetpeer_exit, 5023 .async = true, 5024 }; 5025 5026 static struct pernet_operations ip6_route_net_late_ops = { 5027 .init = ip6_route_net_init_late, 5028 .exit = ip6_route_net_exit_late, 5029 .async = true, 5030 }; 5031 5032 static struct notifier_block ip6_route_dev_notifier = { 5033 .notifier_call = ip6_route_dev_notify, 5034 .priority = ADDRCONF_NOTIFY_PRIORITY - 10, 5035 }; 5036 5037 void __init ip6_route_init_special_entries(void) 5038 { 5039 /* Registering of the loopback is done before this portion of code, 5040 * the loopback reference in rt6_info will not be taken, do it 5041 * manually for init_net */ 5042 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 5043 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5044 #ifdef CONFIG_IPV6_MULTIPLE_TABLES 5045 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 5046 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5047 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 5048 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 5049 #endif 5050 } 5051 5052 int __init ip6_route_init(void) 5053 { 5054 int ret; 5055 int cpu; 5056 5057 ret = -ENOMEM; 5058 ip6_dst_ops_template.kmem_cachep = 5059 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 5060 SLAB_HWCACHE_ALIGN, NULL); 5061 if (!ip6_dst_ops_template.kmem_cachep) 5062 goto out; 5063 5064 ret = dst_entries_init(&ip6_dst_blackhole_ops); 5065 if (ret) 5066 goto out_kmem_cache; 5067 5068 ret = register_pernet_subsys(&ipv6_inetpeer_ops); 5069 if (ret) 5070 goto out_dst_entries; 5071 5072 ret = register_pernet_subsys(&ip6_route_net_ops); 5073 if (ret) 5074 goto out_register_inetpeer; 5075 5076 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 5077 5078 ret = fib6_init(); 5079 if (ret) 5080 goto out_register_subsys; 5081 5082 ret = xfrm6_init(); 5083 if (ret) 5084 goto out_fib6_init; 5085 5086 ret = fib6_rules_init(); 5087 if (ret) 5088 goto xfrm6_init; 5089 5090 ret = register_pernet_subsys(&ip6_route_net_late_ops); 5091 if (ret) 5092 goto fib6_rules_init; 5093 5094 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, 5095 inet6_rtm_newroute, NULL, 0); 5096 if (ret < 0) 5097 goto out_register_late_subsys; 5098 5099 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, 5100 inet6_rtm_delroute, NULL, 0); 5101 if (ret < 0) 5102 goto out_register_late_subsys; 5103 5104 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, 5105 inet6_rtm_getroute, NULL, 5106 RTNL_FLAG_DOIT_UNLOCKED); 5107 if (ret < 0) 5108 goto out_register_late_subsys; 5109 5110 ret = register_netdevice_notifier(&ip6_route_dev_notifier); 5111 if (ret) 5112 goto out_register_late_subsys; 5113 5114 for_each_possible_cpu(cpu) { 5115 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 5116 5117 INIT_LIST_HEAD(&ul->head); 5118 spin_lock_init(&ul->lock); 5119 } 5120 5121 out: 5122 return ret; 5123 5124 out_register_late_subsys: 5125 rtnl_unregister_all(PF_INET6); 5126 unregister_pernet_subsys(&ip6_route_net_late_ops); 5127 fib6_rules_init: 5128 fib6_rules_cleanup(); 5129 xfrm6_init: 5130 xfrm6_fini(); 5131 out_fib6_init: 5132 fib6_gc_cleanup(); 5133 out_register_subsys: 5134 unregister_pernet_subsys(&ip6_route_net_ops); 5135 out_register_inetpeer: 5136 unregister_pernet_subsys(&ipv6_inetpeer_ops); 5137 out_dst_entries: 5138 dst_entries_destroy(&ip6_dst_blackhole_ops); 5139 out_kmem_cache: 5140 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 5141 goto out; 5142 } 5143 5144 void ip6_route_cleanup(void) 5145 { 5146 unregister_netdevice_notifier(&ip6_route_dev_notifier); 5147 unregister_pernet_subsys(&ip6_route_net_late_ops); 5148 fib6_rules_cleanup(); 5149 xfrm6_fini(); 5150 fib6_gc_cleanup(); 5151 unregister_pernet_subsys(&ipv6_inetpeer_ops); 5152 unregister_pernet_subsys(&ip6_route_net_ops); 5153 dst_entries_destroy(&ip6_dst_blackhole_ops); 5154 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 5155 } 5156