11da177e4SLinus Torvalds /* 21da177e4SLinus Torvalds * Linux INET6 implementation 31da177e4SLinus Torvalds * FIB front-end. 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Authors: 61da177e4SLinus Torvalds * Pedro Roque <roque@di.fc.ul.pt> 71da177e4SLinus Torvalds * 81da177e4SLinus Torvalds * This program is free software; you can redistribute it and/or 91da177e4SLinus Torvalds * modify it under the terms of the GNU General Public License 101da177e4SLinus Torvalds * as published by the Free Software Foundation; either version 111da177e4SLinus Torvalds * 2 of the License, or (at your option) any later version. 121da177e4SLinus Torvalds */ 131da177e4SLinus Torvalds 141da177e4SLinus Torvalds /* Changes: 151da177e4SLinus Torvalds * 161da177e4SLinus Torvalds * YOSHIFUJI Hideaki @USAGI 171da177e4SLinus Torvalds * reworked default router selection. 181da177e4SLinus Torvalds * - respect outgoing interface 191da177e4SLinus Torvalds * - select from (probably) reachable routers (i.e. 201da177e4SLinus Torvalds * routers in REACHABLE, STALE, DELAY or PROBE states). 211da177e4SLinus Torvalds * - always select the same router if it is (probably) 221da177e4SLinus Torvalds * reachable. otherwise, round-robin the list. 23c0bece9fSYOSHIFUJI Hideaki * Ville Nuorvala 24c0bece9fSYOSHIFUJI Hideaki * Fixed routing subtrees. 251da177e4SLinus Torvalds */ 261da177e4SLinus Torvalds 27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt 28f3213831SJoe Perches 294fc268d2SRandy Dunlap #include <linux/capability.h> 301da177e4SLinus Torvalds #include <linux/errno.h> 31bc3b2d7fSPaul Gortmaker #include <linux/export.h> 321da177e4SLinus Torvalds #include <linux/types.h> 331da177e4SLinus Torvalds #include <linux/times.h> 341da177e4SLinus Torvalds #include <linux/socket.h> 351da177e4SLinus Torvalds #include <linux/sockios.h> 361da177e4SLinus Torvalds #include <linux/net.h> 371da177e4SLinus Torvalds #include <linux/route.h> 381da177e4SLinus Torvalds #include <linux/netdevice.h> 391da177e4SLinus Torvalds #include <linux/in6.h> 407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h> 411da177e4SLinus Torvalds #include <linux/init.h> 421da177e4SLinus Torvalds #include <linux/if_arp.h> 431da177e4SLinus Torvalds #include <linux/proc_fs.h> 441da177e4SLinus Torvalds #include <linux/seq_file.h> 455b7c931dSDaniel Lezcano #include <linux/nsproxy.h> 465a0e3ad6STejun Heo #include <linux/slab.h> 4735732d01SWei Wang #include <linux/jhash.h> 48457c4cbcSEric W. Biederman #include <net/net_namespace.h> 491da177e4SLinus Torvalds #include <net/snmp.h> 501da177e4SLinus Torvalds #include <net/ipv6.h> 511da177e4SLinus Torvalds #include <net/ip6_fib.h> 521da177e4SLinus Torvalds #include <net/ip6_route.h> 531da177e4SLinus Torvalds #include <net/ndisc.h> 541da177e4SLinus Torvalds #include <net/addrconf.h> 551da177e4SLinus Torvalds #include <net/tcp.h> 561da177e4SLinus Torvalds #include <linux/rtnetlink.h> 571da177e4SLinus Torvalds #include <net/dst.h> 58904af04dSJiri Benc #include <net/dst_metadata.h> 591da177e4SLinus Torvalds #include <net/xfrm.h> 608d71740cSTom Tucker #include <net/netevent.h> 6121713ebcSThomas Graf #include <net/netlink.h> 6251ebd318SNicolas Dichtel #include <net/nexthop.h> 6319e42e45SRoopa Prabhu #include <net/lwtunnel.h> 64904af04dSJiri Benc #include <net/ip_tunnels.h> 65ca254490SDavid Ahern #include <net/l3mdev.h> 66eacb9384SRoopa Prabhu #include <net/ip.h> 677c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 681da177e4SLinus Torvalds 691da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 701da177e4SLinus Torvalds #include <linux/sysctl.h> 711da177e4SLinus Torvalds #endif 721da177e4SLinus Torvalds 7330d444d3SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type); 7430d444d3SDavid Ahern 7530d444d3SDavid Ahern #define CREATE_TRACE_POINTS 7630d444d3SDavid Ahern #include <trace/events/fib6.h> 7730d444d3SDavid Ahern EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); 7830d444d3SDavid Ahern #undef CREATE_TRACE_POINTS 7930d444d3SDavid Ahern 80afc154e9SHannes Frederic Sowa enum rt6_nud_state { 817e980569SJiri Benc RT6_NUD_FAIL_HARD = -3, 827e980569SJiri Benc RT6_NUD_FAIL_PROBE = -2, 837e980569SJiri Benc RT6_NUD_FAIL_DO_RR = -1, 84afc154e9SHannes Frederic Sowa RT6_NUD_SUCCEED = 1 85afc154e9SHannes Frederic Sowa }; 86afc154e9SHannes Frederic Sowa 871da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 880dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst); 89ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst); 901da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *); 911da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *); 921da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *, 931da177e4SLinus Torvalds struct net_device *dev, int how); 94569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops); 951da177e4SLinus Torvalds 961da177e4SLinus Torvalds static int ip6_pkt_discard(struct sk_buff *skb); 97ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb); 987150aedeSKamala R static int ip6_pkt_prohibit(struct sk_buff *skb); 99ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); 1001da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb); 1016700c270SDavid S. Miller static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1026700c270SDavid S. Miller struct sk_buff *skb, u32 mtu); 1036700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 1046700c270SDavid S. Miller struct sk_buff *skb); 105702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, 106702cea56SDavid Ahern int strict); 1078d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt); 108d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb, 1098d1c802bSDavid Ahern struct fib6_info *rt, struct dst_entry *dst, 110d4ead6b3SDavid Ahern struct in6_addr *dest, struct in6_addr *src, 11116a16cd3SDavid Ahern int iif, int type, u32 portid, u32 seq, 11216a16cd3SDavid Ahern unsigned int flags); 1138d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, 11435732d01SWei Wang struct in6_addr *daddr, 11535732d01SWei Wang struct in6_addr *saddr); 1161da177e4SLinus Torvalds 11770ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO 1188d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net, 119b71d1d42SEric Dumazet const struct in6_addr *prefix, int prefixlen, 120830218c1SDavid Ahern const struct in6_addr *gwaddr, 121830218c1SDavid Ahern struct net_device *dev, 12295c96174SEric Dumazet unsigned int pref); 1238d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net, 124b71d1d42SEric Dumazet const struct in6_addr *prefix, int prefixlen, 125830218c1SDavid Ahern const struct in6_addr *gwaddr, 126830218c1SDavid Ahern struct net_device *dev); 12770ceb4f5SYOSHIFUJI Hideaki #endif 12870ceb4f5SYOSHIFUJI Hideaki 1298d0b94afSMartin KaFai Lau struct uncached_list { 1308d0b94afSMartin KaFai Lau spinlock_t lock; 1318d0b94afSMartin KaFai Lau struct list_head head; 1328d0b94afSMartin KaFai Lau }; 1338d0b94afSMartin KaFai Lau 1348d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); 1358d0b94afSMartin KaFai Lau 136510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt) 1378d0b94afSMartin KaFai Lau { 1388d0b94afSMartin KaFai Lau struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); 1398d0b94afSMartin KaFai Lau 1408d0b94afSMartin KaFai Lau rt->rt6i_uncached_list = ul; 1418d0b94afSMartin KaFai Lau 1428d0b94afSMartin KaFai Lau spin_lock_bh(&ul->lock); 1438d0b94afSMartin KaFai Lau list_add_tail(&rt->rt6i_uncached, &ul->head); 1448d0b94afSMartin KaFai Lau spin_unlock_bh(&ul->lock); 1458d0b94afSMartin KaFai Lau } 1468d0b94afSMartin KaFai Lau 147510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt) 1488d0b94afSMartin KaFai Lau { 1498d0b94afSMartin KaFai Lau if (!list_empty(&rt->rt6i_uncached)) { 1508d0b94afSMartin KaFai Lau struct uncached_list *ul = rt->rt6i_uncached_list; 15181eb8447SWei Wang struct net *net = dev_net(rt->dst.dev); 1528d0b94afSMartin KaFai Lau 1538d0b94afSMartin KaFai Lau spin_lock_bh(&ul->lock); 1548d0b94afSMartin KaFai Lau list_del(&rt->rt6i_uncached); 15581eb8447SWei Wang atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); 1568d0b94afSMartin KaFai Lau spin_unlock_bh(&ul->lock); 1578d0b94afSMartin KaFai Lau } 1588d0b94afSMartin KaFai Lau } 1598d0b94afSMartin KaFai Lau 1608d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) 1618d0b94afSMartin KaFai Lau { 1628d0b94afSMartin KaFai Lau struct net_device *loopback_dev = net->loopback_dev; 1638d0b94afSMartin KaFai Lau int cpu; 1648d0b94afSMartin KaFai Lau 165e332bc67SEric W. Biederman if (dev == loopback_dev) 166e332bc67SEric W. Biederman return; 167e332bc67SEric W. Biederman 1688d0b94afSMartin KaFai Lau for_each_possible_cpu(cpu) { 1698d0b94afSMartin KaFai Lau struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 1708d0b94afSMartin KaFai Lau struct rt6_info *rt; 1718d0b94afSMartin KaFai Lau 1728d0b94afSMartin KaFai Lau spin_lock_bh(&ul->lock); 1738d0b94afSMartin KaFai Lau list_for_each_entry(rt, &ul->head, rt6i_uncached) { 1748d0b94afSMartin KaFai Lau struct inet6_dev *rt_idev = rt->rt6i_idev; 1758d0b94afSMartin KaFai Lau struct net_device *rt_dev = rt->dst.dev; 1768d0b94afSMartin KaFai Lau 177e332bc67SEric W. Biederman if (rt_idev->dev == dev) { 1788d0b94afSMartin KaFai Lau rt->rt6i_idev = in6_dev_get(loopback_dev); 1798d0b94afSMartin KaFai Lau in6_dev_put(rt_idev); 1808d0b94afSMartin KaFai Lau } 1818d0b94afSMartin KaFai Lau 182e332bc67SEric W. Biederman if (rt_dev == dev) { 1838d0b94afSMartin KaFai Lau rt->dst.dev = loopback_dev; 1848d0b94afSMartin KaFai Lau dev_hold(rt->dst.dev); 1858d0b94afSMartin KaFai Lau dev_put(rt_dev); 1868d0b94afSMartin KaFai Lau } 1878d0b94afSMartin KaFai Lau } 1888d0b94afSMartin KaFai Lau spin_unlock_bh(&ul->lock); 1898d0b94afSMartin KaFai Lau } 1908d0b94afSMartin KaFai Lau } 1918d0b94afSMartin KaFai Lau 192f8a1b43bSDavid Ahern static inline const void *choose_neigh_daddr(const struct in6_addr *p, 193f894cbf8SDavid S. Miller struct sk_buff *skb, 194f894cbf8SDavid S. Miller const void *daddr) 19539232973SDavid S. Miller { 196a7563f34SDavid S. Miller if (!ipv6_addr_any(p)) 19739232973SDavid S. Miller return (const void *) p; 198f894cbf8SDavid S. Miller else if (skb) 199f894cbf8SDavid S. Miller return &ipv6_hdr(skb)->daddr; 20039232973SDavid S. Miller return daddr; 20139232973SDavid S. Miller } 20239232973SDavid S. Miller 203f8a1b43bSDavid Ahern struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, 204f8a1b43bSDavid Ahern struct net_device *dev, 205f894cbf8SDavid S. Miller struct sk_buff *skb, 206f894cbf8SDavid S. Miller const void *daddr) 207d3aaeb38SDavid S. Miller { 20839232973SDavid S. Miller struct neighbour *n; 20939232973SDavid S. Miller 210f8a1b43bSDavid Ahern daddr = choose_neigh_daddr(gw, skb, daddr); 211f8a1b43bSDavid Ahern n = __ipv6_neigh_lookup(dev, daddr); 212f83c7790SDavid S. Miller if (n) 213f83c7790SDavid S. Miller return n; 2147adf3246SStefano Brivio 2157adf3246SStefano Brivio n = neigh_create(&nd_tbl, daddr, dev); 2167adf3246SStefano Brivio return IS_ERR(n) ? NULL : n; 217f8a1b43bSDavid Ahern } 218f8a1b43bSDavid Ahern 219f8a1b43bSDavid Ahern static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst, 220f8a1b43bSDavid Ahern struct sk_buff *skb, 221f8a1b43bSDavid Ahern const void *daddr) 222f8a1b43bSDavid Ahern { 223f8a1b43bSDavid Ahern const struct rt6_info *rt = container_of(dst, struct rt6_info, dst); 224f8a1b43bSDavid Ahern 225f8a1b43bSDavid Ahern return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr); 226f83c7790SDavid S. Miller } 227f83c7790SDavid S. Miller 22863fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) 22963fca65dSJulian Anastasov { 23063fca65dSJulian Anastasov struct net_device *dev = dst->dev; 23163fca65dSJulian Anastasov struct rt6_info *rt = (struct rt6_info *)dst; 23263fca65dSJulian Anastasov 233f8a1b43bSDavid Ahern daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); 23463fca65dSJulian Anastasov if (!daddr) 23563fca65dSJulian Anastasov return; 23663fca65dSJulian Anastasov if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) 23763fca65dSJulian Anastasov return; 23863fca65dSJulian Anastasov if (ipv6_addr_is_multicast((const struct in6_addr *)daddr)) 23963fca65dSJulian Anastasov return; 24063fca65dSJulian Anastasov __ipv6_confirm_neigh(dev, daddr); 24163fca65dSJulian Anastasov } 24263fca65dSJulian Anastasov 2439a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = { 2441da177e4SLinus Torvalds .family = AF_INET6, 2451da177e4SLinus Torvalds .gc = ip6_dst_gc, 2461da177e4SLinus Torvalds .gc_thresh = 1024, 2471da177e4SLinus Torvalds .check = ip6_dst_check, 2480dbaee3bSDavid S. Miller .default_advmss = ip6_default_advmss, 249ebb762f2SSteffen Klassert .mtu = ip6_mtu, 250d4ead6b3SDavid Ahern .cow_metrics = dst_cow_metrics_generic, 2511da177e4SLinus Torvalds .destroy = ip6_dst_destroy, 2521da177e4SLinus Torvalds .ifdown = ip6_dst_ifdown, 2531da177e4SLinus Torvalds .negative_advice = ip6_negative_advice, 2541da177e4SLinus Torvalds .link_failure = ip6_link_failure, 2551da177e4SLinus Torvalds .update_pmtu = ip6_rt_update_pmtu, 2566e157b6aSDavid S. Miller .redirect = rt6_do_redirect, 2579f8955ccSEric W. Biederman .local_out = __ip6_local_out, 258f8a1b43bSDavid Ahern .neigh_lookup = ip6_dst_neigh_lookup, 25963fca65dSJulian Anastasov .confirm_neigh = ip6_confirm_neigh, 2601da177e4SLinus Torvalds }; 2611da177e4SLinus Torvalds 262ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) 263ec831ea7SRoland Dreier { 264618f9bc7SSteffen Klassert unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 265618f9bc7SSteffen Klassert 266618f9bc7SSteffen Klassert return mtu ? : dst->dev->mtu; 267ec831ea7SRoland Dreier } 268ec831ea7SRoland Dreier 2696700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 2706700c270SDavid S. Miller struct sk_buff *skb, u32 mtu) 27114e50e57SDavid S. Miller { 27214e50e57SDavid S. Miller } 27314e50e57SDavid S. Miller 2746700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, 2756700c270SDavid S. Miller struct sk_buff *skb) 276b587ee3bSDavid S. Miller { 277b587ee3bSDavid S. Miller } 278b587ee3bSDavid S. Miller 27914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = { 28014e50e57SDavid S. Miller .family = AF_INET6, 28114e50e57SDavid S. Miller .destroy = ip6_dst_destroy, 28214e50e57SDavid S. Miller .check = ip6_dst_check, 283ebb762f2SSteffen Klassert .mtu = ip6_blackhole_mtu, 284214f45c9SEric Dumazet .default_advmss = ip6_default_advmss, 28514e50e57SDavid S. Miller .update_pmtu = ip6_rt_blackhole_update_pmtu, 286b587ee3bSDavid S. Miller .redirect = ip6_rt_blackhole_redirect, 2870a1f5962SMartin KaFai Lau .cow_metrics = dst_cow_metrics_generic, 288f8a1b43bSDavid Ahern .neigh_lookup = ip6_dst_neigh_lookup, 28914e50e57SDavid S. Miller }; 29014e50e57SDavid S. Miller 29162fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = { 29214edd87dSLi RongQing [RTAX_HOPLIMIT - 1] = 0, 29362fa8a84SDavid S. Miller }; 29462fa8a84SDavid S. Miller 2958d1c802bSDavid Ahern static const struct fib6_info fib6_null_entry_template = { 29693c2fb25SDavid Ahern .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), 29793c2fb25SDavid Ahern .fib6_protocol = RTPROT_KERNEL, 29893c2fb25SDavid Ahern .fib6_metric = ~(u32)0, 29993c2fb25SDavid Ahern .fib6_ref = ATOMIC_INIT(1), 300421842edSDavid Ahern .fib6_type = RTN_UNREACHABLE, 301421842edSDavid Ahern .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, 302421842edSDavid Ahern }; 303421842edSDavid Ahern 304fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = { 3051da177e4SLinus Torvalds .dst = { 3061da177e4SLinus Torvalds .__refcnt = ATOMIC_INIT(1), 3071da177e4SLinus Torvalds .__use = 1, 3082c20cbd7SNicolas Dichtel .obsolete = DST_OBSOLETE_FORCE_CHK, 3091da177e4SLinus Torvalds .error = -ENETUNREACH, 3101da177e4SLinus Torvalds .input = ip6_pkt_discard, 3111da177e4SLinus Torvalds .output = ip6_pkt_discard_out, 3121da177e4SLinus Torvalds }, 3131da177e4SLinus Torvalds .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 3141da177e4SLinus Torvalds }; 3151da177e4SLinus Torvalds 316101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES 317101367c2SThomas Graf 318fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = { 319101367c2SThomas Graf .dst = { 320101367c2SThomas Graf .__refcnt = ATOMIC_INIT(1), 321101367c2SThomas Graf .__use = 1, 3222c20cbd7SNicolas Dichtel .obsolete = DST_OBSOLETE_FORCE_CHK, 323101367c2SThomas Graf .error = -EACCES, 3249ce8ade0SThomas Graf .input = ip6_pkt_prohibit, 3259ce8ade0SThomas Graf .output = ip6_pkt_prohibit_out, 326101367c2SThomas Graf }, 327101367c2SThomas Graf .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 328101367c2SThomas Graf }; 329101367c2SThomas Graf 330fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = { 331101367c2SThomas Graf .dst = { 332101367c2SThomas Graf .__refcnt = ATOMIC_INIT(1), 333101367c2SThomas Graf .__use = 1, 3342c20cbd7SNicolas Dichtel .obsolete = DST_OBSOLETE_FORCE_CHK, 335101367c2SThomas Graf .error = -EINVAL, 336352e512cSHerbert Xu .input = dst_discard, 337ede2059dSEric W. Biederman .output = dst_discard_out, 338101367c2SThomas Graf }, 339101367c2SThomas Graf .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), 340101367c2SThomas Graf }; 341101367c2SThomas Graf 342101367c2SThomas Graf #endif 343101367c2SThomas Graf 344ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt) 345ebfa45f0SMartin KaFai Lau { 346ebfa45f0SMartin KaFai Lau struct dst_entry *dst = &rt->dst; 347ebfa45f0SMartin KaFai Lau 348ebfa45f0SMartin KaFai Lau memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); 349ebfa45f0SMartin KaFai Lau INIT_LIST_HEAD(&rt->rt6i_uncached); 350ebfa45f0SMartin KaFai Lau } 351ebfa45f0SMartin KaFai Lau 3521da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */ 35393531c67SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, 354ad706862SMartin KaFai Lau int flags) 3551da177e4SLinus Torvalds { 35697bab73fSDavid S. Miller struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 357b2a9c0edSWei Wang 1, DST_OBSOLETE_FORCE_CHK, flags); 358cf911662SDavid S. Miller 35981eb8447SWei Wang if (rt) { 360ebfa45f0SMartin KaFai Lau rt6_info_init(rt); 36181eb8447SWei Wang atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 36281eb8447SWei Wang } 3638104891bSSteffen Klassert 364cf911662SDavid S. Miller return rt; 3651da177e4SLinus Torvalds } 3669ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc); 367d52d3997SMartin KaFai Lau 3681da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst) 3691da177e4SLinus Torvalds { 3701da177e4SLinus Torvalds struct rt6_info *rt = (struct rt6_info *)dst; 371a68886a6SDavid Ahern struct fib6_info *from; 3728d0b94afSMartin KaFai Lau struct inet6_dev *idev; 3731da177e4SLinus Torvalds 3741620a336SDavid Ahern ip_dst_metrics_put(dst); 3758d0b94afSMartin KaFai Lau rt6_uncached_list_del(rt); 3768d0b94afSMartin KaFai Lau 3778d0b94afSMartin KaFai Lau idev = rt->rt6i_idev; 37838308473SDavid S. Miller if (idev) { 3791da177e4SLinus Torvalds rt->rt6i_idev = NULL; 3801da177e4SLinus Torvalds in6_dev_put(idev); 3811da177e4SLinus Torvalds } 3821716a961SGao feng 383a68886a6SDavid Ahern rcu_read_lock(); 384a68886a6SDavid Ahern from = rcu_dereference(rt->from); 385a68886a6SDavid Ahern rcu_assign_pointer(rt->from, NULL); 38693531c67SDavid Ahern fib6_info_release(from); 387a68886a6SDavid Ahern rcu_read_unlock(); 388b3419363SDavid S. Miller } 389b3419363SDavid S. Miller 3901da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 3911da177e4SLinus Torvalds int how) 3921da177e4SLinus Torvalds { 3931da177e4SLinus Torvalds struct rt6_info *rt = (struct rt6_info *)dst; 3941da177e4SLinus Torvalds struct inet6_dev *idev = rt->rt6i_idev; 3955a3e55d6SDenis V. Lunev struct net_device *loopback_dev = 396c346dca1SYOSHIFUJI Hideaki dev_net(dev)->loopback_dev; 3971da177e4SLinus Torvalds 398e5645f51SWei Wang if (idev && idev->dev != loopback_dev) { 399e5645f51SWei Wang struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); 40038308473SDavid S. Miller if (loopback_idev) { 4011da177e4SLinus Torvalds rt->rt6i_idev = loopback_idev; 4021da177e4SLinus Torvalds in6_dev_put(idev); 4031da177e4SLinus Torvalds } 4041da177e4SLinus Torvalds } 40597cac082SDavid S. Miller } 4061da177e4SLinus Torvalds 4075973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt) 4085973fb1eSMartin KaFai Lau { 4095973fb1eSMartin KaFai Lau if (rt->rt6i_flags & RTF_EXPIRES) 4105973fb1eSMartin KaFai Lau return time_after(jiffies, rt->dst.expires); 4115973fb1eSMartin KaFai Lau else 4125973fb1eSMartin KaFai Lau return false; 4135973fb1eSMartin KaFai Lau } 4145973fb1eSMartin KaFai Lau 415a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt) 4161da177e4SLinus Torvalds { 417a68886a6SDavid Ahern struct fib6_info *from; 418a68886a6SDavid Ahern 419a68886a6SDavid Ahern from = rcu_dereference(rt->from); 420a68886a6SDavid Ahern 4211716a961SGao feng if (rt->rt6i_flags & RTF_EXPIRES) { 4221716a961SGao feng if (time_after(jiffies, rt->dst.expires)) 423a50feda5SEric Dumazet return true; 424a68886a6SDavid Ahern } else if (from) { 4251e2ea8adSXin Long return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || 426a68886a6SDavid Ahern fib6_check_expired(from); 4271716a961SGao feng } 428a50feda5SEric Dumazet return false; 4291da177e4SLinus Torvalds } 4301da177e4SLinus Torvalds 431*b1d40991SDavid Ahern void fib6_select_path(const struct net *net, struct fib6_result *res, 432*b1d40991SDavid Ahern struct flowi6 *fl6, int oif, bool have_oif_match, 433*b1d40991SDavid Ahern const struct sk_buff *skb, int strict) 43451ebd318SNicolas Dichtel { 4358d1c802bSDavid Ahern struct fib6_info *sibling, *next_sibling; 436*b1d40991SDavid Ahern struct fib6_info *match = res->f6i; 437*b1d40991SDavid Ahern 438*b1d40991SDavid Ahern if (!match->fib6_nsiblings || have_oif_match) 439*b1d40991SDavid Ahern goto out; 44051ebd318SNicolas Dichtel 441b673d6ccSJakub Sitnicki /* We might have already computed the hash for ICMPv6 errors. In such 442b673d6ccSJakub Sitnicki * case it will always be non-zero. Otherwise now is the time to do it. 443b673d6ccSJakub Sitnicki */ 444b673d6ccSJakub Sitnicki if (!fl6->mp_hash) 445b4bac172SDavid Ahern fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); 446b673d6ccSJakub Sitnicki 447ad1601aeSDavid Ahern if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound)) 448*b1d40991SDavid Ahern goto out; 449bbfcd776SIdo Schimmel 45093c2fb25SDavid Ahern list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, 45193c2fb25SDavid Ahern fib6_siblings) { 452702cea56SDavid Ahern const struct fib6_nh *nh = &sibling->fib6_nh; 4535e670d84SDavid Ahern int nh_upper_bound; 4545e670d84SDavid Ahern 455702cea56SDavid Ahern nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); 4565e670d84SDavid Ahern if (fl6->mp_hash > nh_upper_bound) 4573d709f69SIdo Schimmel continue; 458702cea56SDavid Ahern if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) 45952bd4c0cSNicolas Dichtel break; 46051ebd318SNicolas Dichtel match = sibling; 46151ebd318SNicolas Dichtel break; 46251ebd318SNicolas Dichtel } 4633d709f69SIdo Schimmel 464*b1d40991SDavid Ahern out: 465*b1d40991SDavid Ahern res->f6i = match; 466*b1d40991SDavid Ahern res->nh = &match->fib6_nh; 46751ebd318SNicolas Dichtel } 46851ebd318SNicolas Dichtel 4691da177e4SLinus Torvalds /* 47066f5d6ceSWei Wang * Route lookup. rcu_read_lock() should be held. 4711da177e4SLinus Torvalds */ 4721da177e4SLinus Torvalds 4730c59d006SDavid Ahern static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh, 4740c59d006SDavid Ahern const struct in6_addr *saddr, int oif, int flags) 4750c59d006SDavid Ahern { 4760c59d006SDavid Ahern const struct net_device *dev; 4770c59d006SDavid Ahern 4780c59d006SDavid Ahern if (nh->fib_nh_flags & RTNH_F_DEAD) 4790c59d006SDavid Ahern return false; 4800c59d006SDavid Ahern 4810c59d006SDavid Ahern dev = nh->fib_nh_dev; 4820c59d006SDavid Ahern if (oif) { 4830c59d006SDavid Ahern if (dev->ifindex == oif) 4840c59d006SDavid Ahern return true; 4850c59d006SDavid Ahern } else { 4860c59d006SDavid Ahern if (ipv6_chk_addr(net, saddr, dev, 4870c59d006SDavid Ahern flags & RT6_LOOKUP_F_IFACE)) 4880c59d006SDavid Ahern return true; 4890c59d006SDavid Ahern } 4900c59d006SDavid Ahern 4910c59d006SDavid Ahern return false; 4920c59d006SDavid Ahern } 4930c59d006SDavid Ahern 4948d1c802bSDavid Ahern static inline struct fib6_info *rt6_device_match(struct net *net, 4958d1c802bSDavid Ahern struct fib6_info *rt, 496b71d1d42SEric Dumazet const struct in6_addr *saddr, 4971da177e4SLinus Torvalds int oif, 498d420895eSYOSHIFUJI Hideaki int flags) 4991da177e4SLinus Torvalds { 5000c59d006SDavid Ahern const struct fib6_nh *nh; 5018d1c802bSDavid Ahern struct fib6_info *sprt; 5021da177e4SLinus Torvalds 5035e670d84SDavid Ahern if (!oif && ipv6_addr_any(saddr) && 504ad1601aeSDavid Ahern !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)) 5058067bb8cSIdo Schimmel return rt; 506dd3abc4eSYOSHIFUJI Hideaki 5078fb11a9aSDavid Ahern for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { 5080c59d006SDavid Ahern nh = &sprt->fib6_nh; 5090c59d006SDavid Ahern if (__rt6_device_match(net, nh, saddr, oif, flags)) 5101da177e4SLinus Torvalds return sprt; 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds 513eea68cd3SDavid Ahern if (oif && flags & RT6_LOOKUP_F_IFACE) 514421842edSDavid Ahern return net->ipv6.fib6_null_entry; 5151da177e4SLinus Torvalds 516ad1601aeSDavid Ahern return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; 5171da177e4SLinus Torvalds } 5181da177e4SLinus Torvalds 51927097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF 520c2f17e82SHannes Frederic Sowa struct __rt6_probe_work { 521c2f17e82SHannes Frederic Sowa struct work_struct work; 522c2f17e82SHannes Frederic Sowa struct in6_addr target; 523c2f17e82SHannes Frederic Sowa struct net_device *dev; 524c2f17e82SHannes Frederic Sowa }; 525c2f17e82SHannes Frederic Sowa 526c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w) 527c2f17e82SHannes Frederic Sowa { 528c2f17e82SHannes Frederic Sowa struct in6_addr mcaddr; 529c2f17e82SHannes Frederic Sowa struct __rt6_probe_work *work = 530c2f17e82SHannes Frederic Sowa container_of(w, struct __rt6_probe_work, work); 531c2f17e82SHannes Frederic Sowa 532c2f17e82SHannes Frederic Sowa addrconf_addr_solict_mult(&work->target, &mcaddr); 533adc176c5SErik Nordmark ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0); 534c2f17e82SHannes Frederic Sowa dev_put(work->dev); 535662f5533SMichael Büsch kfree(work); 536c2f17e82SHannes Frederic Sowa } 537c2f17e82SHannes Frederic Sowa 538cc3a86c8SDavid Ahern static void rt6_probe(struct fib6_nh *fib6_nh) 53927097255SYOSHIFUJI Hideaki { 540f547fac6SSabrina Dubroca struct __rt6_probe_work *work = NULL; 5415e670d84SDavid Ahern const struct in6_addr *nh_gw; 542f2c31e32SEric Dumazet struct neighbour *neigh; 5435e670d84SDavid Ahern struct net_device *dev; 544f547fac6SSabrina Dubroca struct inet6_dev *idev; 5455e670d84SDavid Ahern 54627097255SYOSHIFUJI Hideaki /* 54727097255SYOSHIFUJI Hideaki * Okay, this does not seem to be appropriate 54827097255SYOSHIFUJI Hideaki * for now, however, we need to check if it 54927097255SYOSHIFUJI Hideaki * is really so; aka Router Reachability Probing. 55027097255SYOSHIFUJI Hideaki * 55127097255SYOSHIFUJI Hideaki * Router Reachability Probe MUST be rate-limited 55227097255SYOSHIFUJI Hideaki * to no more than one per minute. 55327097255SYOSHIFUJI Hideaki */ 554cc3a86c8SDavid Ahern if (fib6_nh->fib_nh_gw_family) 555fdd6681dSAmerigo Wang return; 5565e670d84SDavid Ahern 557cc3a86c8SDavid Ahern nh_gw = &fib6_nh->fib_nh_gw6; 558cc3a86c8SDavid Ahern dev = fib6_nh->fib_nh_dev; 5592152caeaSYOSHIFUJI Hideaki / 吉藤英明 rcu_read_lock_bh(); 560f547fac6SSabrina Dubroca idev = __in6_dev_get(dev); 5615e670d84SDavid Ahern neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); 5622152caeaSYOSHIFUJI Hideaki / 吉藤英明 if (neigh) { 5638d6c31bfSMartin KaFai Lau if (neigh->nud_state & NUD_VALID) 5648d6c31bfSMartin KaFai Lau goto out; 5658d6c31bfSMartin KaFai Lau 5662152caeaSYOSHIFUJI Hideaki / 吉藤英明 write_lock(&neigh->lock); 567990edb42SMartin KaFai Lau if (!(neigh->nud_state & NUD_VALID) && 568990edb42SMartin KaFai Lau time_after(jiffies, 569dcd1f572SDavid Ahern neigh->updated + idev->cnf.rtr_probe_interval)) { 570c2f17e82SHannes Frederic Sowa work = kmalloc(sizeof(*work), GFP_ATOMIC); 571990edb42SMartin KaFai Lau if (work) 5727e980569SJiri Benc __neigh_set_probe_once(neigh); 573990edb42SMartin KaFai Lau } 574c2f17e82SHannes Frederic Sowa write_unlock(&neigh->lock); 575cc3a86c8SDavid Ahern } else if (time_after(jiffies, fib6_nh->last_probe + 576f547fac6SSabrina Dubroca idev->cnf.rtr_probe_interval)) { 577990edb42SMartin KaFai Lau work = kmalloc(sizeof(*work), GFP_ATOMIC); 578990edb42SMartin KaFai Lau } 579c2f17e82SHannes Frederic Sowa 580c2f17e82SHannes Frederic Sowa if (work) { 581cc3a86c8SDavid Ahern fib6_nh->last_probe = jiffies; 582c2f17e82SHannes Frederic Sowa INIT_WORK(&work->work, rt6_probe_deferred); 5835e670d84SDavid Ahern work->target = *nh_gw; 5845e670d84SDavid Ahern dev_hold(dev); 5855e670d84SDavid Ahern work->dev = dev; 586c2f17e82SHannes Frederic Sowa schedule_work(&work->work); 587c2f17e82SHannes Frederic Sowa } 588990edb42SMartin KaFai Lau 5898d6c31bfSMartin KaFai Lau out: 5902152caeaSYOSHIFUJI Hideaki / 吉藤英明 rcu_read_unlock_bh(); 591f2c31e32SEric Dumazet } 59227097255SYOSHIFUJI Hideaki #else 593cc3a86c8SDavid Ahern static inline void rt6_probe(struct fib6_nh *fib6_nh) 59427097255SYOSHIFUJI Hideaki { 59527097255SYOSHIFUJI Hideaki } 59627097255SYOSHIFUJI Hideaki #endif 59727097255SYOSHIFUJI Hideaki 5981da177e4SLinus Torvalds /* 599554cfb7eSYOSHIFUJI Hideaki * Default Router Selection (RFC 2461 6.3.6) 6001da177e4SLinus Torvalds */ 6011ba9a895SDavid Ahern static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) 6021da177e4SLinus Torvalds { 603afc154e9SHannes Frederic Sowa enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; 6045e670d84SDavid Ahern struct neighbour *neigh; 605f2c31e32SEric Dumazet 606145a3621SYOSHIFUJI Hideaki / 吉藤英明 rcu_read_lock_bh(); 6071ba9a895SDavid Ahern neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, 6081ba9a895SDavid Ahern &fib6_nh->fib_nh_gw6); 609145a3621SYOSHIFUJI Hideaki / 吉藤英明 if (neigh) { 610145a3621SYOSHIFUJI Hideaki / 吉藤英明 read_lock(&neigh->lock); 611554cfb7eSYOSHIFUJI Hideaki if (neigh->nud_state & NUD_VALID) 612afc154e9SHannes Frederic Sowa ret = RT6_NUD_SUCCEED; 613398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF 614a5a81f0bSPaul Marks else if (!(neigh->nud_state & NUD_FAILED)) 615afc154e9SHannes Frederic Sowa ret = RT6_NUD_SUCCEED; 6167e980569SJiri Benc else 6177e980569SJiri Benc ret = RT6_NUD_FAIL_PROBE; 618398bcbebSYOSHIFUJI Hideaki #endif 619145a3621SYOSHIFUJI Hideaki / 吉藤英明 read_unlock(&neigh->lock); 620afc154e9SHannes Frederic Sowa } else { 621afc154e9SHannes Frederic Sowa ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? 6227e980569SJiri Benc RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; 623a5a81f0bSPaul Marks } 624145a3621SYOSHIFUJI Hideaki / 吉藤英明 rcu_read_unlock_bh(); 625145a3621SYOSHIFUJI Hideaki / 吉藤英明 626a5a81f0bSPaul Marks return ret; 6271da177e4SLinus Torvalds } 6281da177e4SLinus Torvalds 629702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, 630702cea56SDavid Ahern int strict) 631554cfb7eSYOSHIFUJI Hideaki { 6326e1809a5SDavid Ahern int m = 0; 6334d0c5911SYOSHIFUJI Hideaki 6346e1809a5SDavid Ahern if (!oif || nh->fib_nh_dev->ifindex == oif) 6356e1809a5SDavid Ahern m = 2; 6366e1809a5SDavid Ahern 63777d16f45SYOSHIFUJI Hideaki if (!m && (strict & RT6_LOOKUP_F_IFACE)) 638afc154e9SHannes Frederic Sowa return RT6_NUD_FAIL_HARD; 639ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF 640702cea56SDavid Ahern m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2; 641ebacaaa0SYOSHIFUJI Hideaki #endif 6421ba9a895SDavid Ahern if ((strict & RT6_LOOKUP_F_REACHABLE) && 643702cea56SDavid Ahern !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) { 6441ba9a895SDavid Ahern int n = rt6_check_neigh(nh); 645afc154e9SHannes Frederic Sowa if (n < 0) 646afc154e9SHannes Frederic Sowa return n; 647afc154e9SHannes Frederic Sowa } 648554cfb7eSYOSHIFUJI Hideaki return m; 649554cfb7eSYOSHIFUJI Hideaki } 650554cfb7eSYOSHIFUJI Hideaki 65128679ed1SDavid Ahern static bool find_match(struct fib6_nh *nh, u32 fib6_flags, 65228679ed1SDavid Ahern int oif, int strict, int *mpri, bool *do_rr) 653554cfb7eSYOSHIFUJI Hideaki { 654afc154e9SHannes Frederic Sowa bool match_do_rr = false; 65528679ed1SDavid Ahern bool rc = false; 65628679ed1SDavid Ahern int m; 65735103d11SAndy Gospodarek 65828679ed1SDavid Ahern if (nh->fib_nh_flags & RTNH_F_DEAD) 6598067bb8cSIdo Schimmel goto out; 6608067bb8cSIdo Schimmel 66128679ed1SDavid Ahern if (ip6_ignore_linkdown(nh->fib_nh_dev) && 66228679ed1SDavid Ahern nh->fib_nh_flags & RTNH_F_LINKDOWN && 663d5d32e4bSDavid Ahern !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) 66435103d11SAndy Gospodarek goto out; 665554cfb7eSYOSHIFUJI Hideaki 66628679ed1SDavid Ahern m = rt6_score_route(nh, fib6_flags, oif, strict); 6677e980569SJiri Benc if (m == RT6_NUD_FAIL_DO_RR) { 668afc154e9SHannes Frederic Sowa match_do_rr = true; 669afc154e9SHannes Frederic Sowa m = 0; /* lowest valid score */ 6707e980569SJiri Benc } else if (m == RT6_NUD_FAIL_HARD) { 671f11e6659SDavid S. Miller goto out; 6721da177e4SLinus Torvalds } 673f11e6659SDavid S. Miller 674afc154e9SHannes Frederic Sowa if (strict & RT6_LOOKUP_F_REACHABLE) 67528679ed1SDavid Ahern rt6_probe(nh); 676afc154e9SHannes Frederic Sowa 6777e980569SJiri Benc /* note that m can be RT6_NUD_FAIL_PROBE at this point */ 678afc154e9SHannes Frederic Sowa if (m > *mpri) { 679afc154e9SHannes Frederic Sowa *do_rr = match_do_rr; 680afc154e9SHannes Frederic Sowa *mpri = m; 68128679ed1SDavid Ahern rc = true; 682afc154e9SHannes Frederic Sowa } 683f11e6659SDavid S. Miller out: 68428679ed1SDavid Ahern return rc; 6851da177e4SLinus Torvalds } 6861da177e4SLinus Torvalds 68730c15f03SDavid Ahern static void __find_rr_leaf(struct fib6_info *rt_start, 68830c15f03SDavid Ahern struct fib6_info *nomatch, u32 metric, 68930c15f03SDavid Ahern struct fib6_info **match, struct fib6_info **cont, 69030c15f03SDavid Ahern int oif, int strict, bool *do_rr, int *mpri) 69130c15f03SDavid Ahern { 69230c15f03SDavid Ahern struct fib6_info *rt; 69330c15f03SDavid Ahern 69430c15f03SDavid Ahern for (rt = rt_start; 69530c15f03SDavid Ahern rt && rt != nomatch; 69630c15f03SDavid Ahern rt = rcu_dereference(rt->fib6_next)) { 69730c15f03SDavid Ahern struct fib6_nh *nh; 69830c15f03SDavid Ahern 69930c15f03SDavid Ahern if (cont && rt->fib6_metric != metric) { 70030c15f03SDavid Ahern *cont = rt; 70130c15f03SDavid Ahern return; 70230c15f03SDavid Ahern } 70330c15f03SDavid Ahern 70430c15f03SDavid Ahern if (fib6_check_expired(rt)) 70530c15f03SDavid Ahern continue; 70630c15f03SDavid Ahern 70730c15f03SDavid Ahern nh = &rt->fib6_nh; 70830c15f03SDavid Ahern if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr)) 70930c15f03SDavid Ahern *match = rt; 71030c15f03SDavid Ahern } 71130c15f03SDavid Ahern } 71230c15f03SDavid Ahern 7138d1c802bSDavid Ahern static struct fib6_info *find_rr_leaf(struct fib6_node *fn, 7148d1c802bSDavid Ahern struct fib6_info *leaf, 7158d1c802bSDavid Ahern struct fib6_info *rr_head, 716afc154e9SHannes Frederic Sowa u32 metric, int oif, int strict, 717afc154e9SHannes Frederic Sowa bool *do_rr) 718f11e6659SDavid S. Miller { 71930c15f03SDavid Ahern struct fib6_info *match = NULL, *cont = NULL; 720f11e6659SDavid S. Miller int mpri = -1; 721f11e6659SDavid S. Miller 72230c15f03SDavid Ahern __find_rr_leaf(rr_head, NULL, metric, &match, &cont, 72330c15f03SDavid Ahern oif, strict, do_rr, &mpri); 7249fbdcfafSSteffen Klassert 72530c15f03SDavid Ahern __find_rr_leaf(leaf, rr_head, metric, &match, &cont, 72630c15f03SDavid Ahern oif, strict, do_rr, &mpri); 7279fbdcfafSSteffen Klassert 7289fbdcfafSSteffen Klassert if (match || !cont) 7299fbdcfafSSteffen Klassert return match; 7309fbdcfafSSteffen Klassert 73130c15f03SDavid Ahern __find_rr_leaf(cont, NULL, metric, &match, NULL, 73230c15f03SDavid Ahern oif, strict, do_rr, &mpri); 733f11e6659SDavid S. Miller 734f11e6659SDavid S. Miller return match; 735f11e6659SDavid S. Miller } 736f11e6659SDavid S. Miller 7378d1c802bSDavid Ahern static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, 7388d1040e8SWei Wang int oif, int strict) 739f11e6659SDavid S. Miller { 7408d1c802bSDavid Ahern struct fib6_info *leaf = rcu_dereference(fn->leaf); 7418d1c802bSDavid Ahern struct fib6_info *match, *rt0; 742afc154e9SHannes Frederic Sowa bool do_rr = false; 74317ecf590SWei Wang int key_plen; 744f11e6659SDavid S. Miller 745421842edSDavid Ahern if (!leaf || leaf == net->ipv6.fib6_null_entry) 746421842edSDavid Ahern return net->ipv6.fib6_null_entry; 7478d1040e8SWei Wang 74866f5d6ceSWei Wang rt0 = rcu_dereference(fn->rr_ptr); 749f11e6659SDavid S. Miller if (!rt0) 75066f5d6ceSWei Wang rt0 = leaf; 751f11e6659SDavid S. Miller 75217ecf590SWei Wang /* Double check to make sure fn is not an intermediate node 75317ecf590SWei Wang * and fn->leaf does not points to its child's leaf 75417ecf590SWei Wang * (This might happen if all routes under fn are deleted from 75517ecf590SWei Wang * the tree and fib6_repair_tree() is called on the node.) 75617ecf590SWei Wang */ 75793c2fb25SDavid Ahern key_plen = rt0->fib6_dst.plen; 75817ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES 75993c2fb25SDavid Ahern if (rt0->fib6_src.plen) 76093c2fb25SDavid Ahern key_plen = rt0->fib6_src.plen; 76117ecf590SWei Wang #endif 76217ecf590SWei Wang if (fn->fn_bit != key_plen) 763421842edSDavid Ahern return net->ipv6.fib6_null_entry; 76417ecf590SWei Wang 76593c2fb25SDavid Ahern match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict, 766afc154e9SHannes Frederic Sowa &do_rr); 767f11e6659SDavid S. Miller 768afc154e9SHannes Frederic Sowa if (do_rr) { 7698fb11a9aSDavid Ahern struct fib6_info *next = rcu_dereference(rt0->fib6_next); 770f11e6659SDavid S. Miller 771554cfb7eSYOSHIFUJI Hideaki /* no entries matched; do round-robin */ 77293c2fb25SDavid Ahern if (!next || next->fib6_metric != rt0->fib6_metric) 7738d1040e8SWei Wang next = leaf; 774f11e6659SDavid S. Miller 77566f5d6ceSWei Wang if (next != rt0) { 77693c2fb25SDavid Ahern spin_lock_bh(&leaf->fib6_table->tb6_lock); 77766f5d6ceSWei Wang /* make sure next is not being deleted from the tree */ 77893c2fb25SDavid Ahern if (next->fib6_node) 77966f5d6ceSWei Wang rcu_assign_pointer(fn->rr_ptr, next); 78093c2fb25SDavid Ahern spin_unlock_bh(&leaf->fib6_table->tb6_lock); 78166f5d6ceSWei Wang } 782554cfb7eSYOSHIFUJI Hideaki } 783554cfb7eSYOSHIFUJI Hideaki 784421842edSDavid Ahern return match ? match : net->ipv6.fib6_null_entry; 7851da177e4SLinus Torvalds } 7861da177e4SLinus Torvalds 7878d1c802bSDavid Ahern static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) 7888b9df265SMartin KaFai Lau { 789bdf00467SDavid Ahern return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family; 7908b9df265SMartin KaFai Lau } 7918b9df265SMartin KaFai Lau 79270ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO 79370ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, 794b71d1d42SEric Dumazet const struct in6_addr *gwaddr) 79570ceb4f5SYOSHIFUJI Hideaki { 796c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(dev); 79770ceb4f5SYOSHIFUJI Hideaki struct route_info *rinfo = (struct route_info *) opt; 79870ceb4f5SYOSHIFUJI Hideaki struct in6_addr prefix_buf, *prefix; 79970ceb4f5SYOSHIFUJI Hideaki unsigned int pref; 8004bed72e4SYOSHIFUJI Hideaki unsigned long lifetime; 8018d1c802bSDavid Ahern struct fib6_info *rt; 80270ceb4f5SYOSHIFUJI Hideaki 80370ceb4f5SYOSHIFUJI Hideaki if (len < sizeof(struct route_info)) { 80470ceb4f5SYOSHIFUJI Hideaki return -EINVAL; 80570ceb4f5SYOSHIFUJI Hideaki } 80670ceb4f5SYOSHIFUJI Hideaki 80770ceb4f5SYOSHIFUJI Hideaki /* Sanity check for prefix_len and length */ 80870ceb4f5SYOSHIFUJI Hideaki if (rinfo->length > 3) { 80970ceb4f5SYOSHIFUJI Hideaki return -EINVAL; 81070ceb4f5SYOSHIFUJI Hideaki } else if (rinfo->prefix_len > 128) { 81170ceb4f5SYOSHIFUJI Hideaki return -EINVAL; 81270ceb4f5SYOSHIFUJI Hideaki } else if (rinfo->prefix_len > 64) { 81370ceb4f5SYOSHIFUJI Hideaki if (rinfo->length < 2) { 81470ceb4f5SYOSHIFUJI Hideaki return -EINVAL; 81570ceb4f5SYOSHIFUJI Hideaki } 81670ceb4f5SYOSHIFUJI Hideaki } else if (rinfo->prefix_len > 0) { 81770ceb4f5SYOSHIFUJI Hideaki if (rinfo->length < 1) { 81870ceb4f5SYOSHIFUJI Hideaki return -EINVAL; 81970ceb4f5SYOSHIFUJI Hideaki } 82070ceb4f5SYOSHIFUJI Hideaki } 82170ceb4f5SYOSHIFUJI Hideaki 82270ceb4f5SYOSHIFUJI Hideaki pref = rinfo->route_pref; 82370ceb4f5SYOSHIFUJI Hideaki if (pref == ICMPV6_ROUTER_PREF_INVALID) 8243933fc95SJens Rosenboom return -EINVAL; 82570ceb4f5SYOSHIFUJI Hideaki 8264bed72e4SYOSHIFUJI Hideaki lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); 82770ceb4f5SYOSHIFUJI Hideaki 82870ceb4f5SYOSHIFUJI Hideaki if (rinfo->length == 3) 82970ceb4f5SYOSHIFUJI Hideaki prefix = (struct in6_addr *)rinfo->prefix; 83070ceb4f5SYOSHIFUJI Hideaki else { 83170ceb4f5SYOSHIFUJI Hideaki /* this function is safe */ 83270ceb4f5SYOSHIFUJI Hideaki ipv6_addr_prefix(&prefix_buf, 83370ceb4f5SYOSHIFUJI Hideaki (struct in6_addr *)rinfo->prefix, 83470ceb4f5SYOSHIFUJI Hideaki rinfo->prefix_len); 83570ceb4f5SYOSHIFUJI Hideaki prefix = &prefix_buf; 83670ceb4f5SYOSHIFUJI Hideaki } 83770ceb4f5SYOSHIFUJI Hideaki 838f104a567SDuan Jiong if (rinfo->prefix_len == 0) 839afb1d4b5SDavid Ahern rt = rt6_get_dflt_router(net, gwaddr, dev); 840f104a567SDuan Jiong else 841f104a567SDuan Jiong rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, 842830218c1SDavid Ahern gwaddr, dev); 84370ceb4f5SYOSHIFUJI Hideaki 84470ceb4f5SYOSHIFUJI Hideaki if (rt && !lifetime) { 845afb1d4b5SDavid Ahern ip6_del_rt(net, rt); 84670ceb4f5SYOSHIFUJI Hideaki rt = NULL; 84770ceb4f5SYOSHIFUJI Hideaki } 84870ceb4f5SYOSHIFUJI Hideaki 84970ceb4f5SYOSHIFUJI Hideaki if (!rt && lifetime) 850830218c1SDavid Ahern rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, 851830218c1SDavid Ahern dev, pref); 85270ceb4f5SYOSHIFUJI Hideaki else if (rt) 85393c2fb25SDavid Ahern rt->fib6_flags = RTF_ROUTEINFO | 85493c2fb25SDavid Ahern (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); 85570ceb4f5SYOSHIFUJI Hideaki 85670ceb4f5SYOSHIFUJI Hideaki if (rt) { 8571716a961SGao feng if (!addrconf_finite_timeout(lifetime)) 85814895687SDavid Ahern fib6_clean_expires(rt); 8591716a961SGao feng else 86014895687SDavid Ahern fib6_set_expires(rt, jiffies + HZ * lifetime); 8611716a961SGao feng 86293531c67SDavid Ahern fib6_info_release(rt); 86370ceb4f5SYOSHIFUJI Hideaki } 86470ceb4f5SYOSHIFUJI Hideaki return 0; 86570ceb4f5SYOSHIFUJI Hideaki } 86670ceb4f5SYOSHIFUJI Hideaki #endif 86770ceb4f5SYOSHIFUJI Hideaki 868ae90d867SDavid Ahern /* 869ae90d867SDavid Ahern * Misc support functions 870ae90d867SDavid Ahern */ 871ae90d867SDavid Ahern 872ae90d867SDavid Ahern /* called with rcu_lock held */ 8738d1c802bSDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) 874ae90d867SDavid Ahern { 875ad1601aeSDavid Ahern struct net_device *dev = rt->fib6_nh.fib_nh_dev; 876ae90d867SDavid Ahern 87793c2fb25SDavid Ahern if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { 878ae90d867SDavid Ahern /* for copies of local routes, dst->dev needs to be the 879ae90d867SDavid Ahern * device if it is a master device, the master device if 880ae90d867SDavid Ahern * device is enslaved, and the loopback as the default 881ae90d867SDavid Ahern */ 882ae90d867SDavid Ahern if (netif_is_l3_slave(dev) && 88393c2fb25SDavid Ahern !rt6_need_strict(&rt->fib6_dst.addr)) 884ae90d867SDavid Ahern dev = l3mdev_master_dev_rcu(dev); 885ae90d867SDavid Ahern else if (!netif_is_l3_master(dev)) 886ae90d867SDavid Ahern dev = dev_net(dev)->loopback_dev; 887ae90d867SDavid Ahern /* last case is netif_is_l3_master(dev) is true in which 888ae90d867SDavid Ahern * case we want dev returned to be dev 889ae90d867SDavid Ahern */ 890ae90d867SDavid Ahern } 891ae90d867SDavid Ahern 892ae90d867SDavid Ahern return dev; 893ae90d867SDavid Ahern } 894ae90d867SDavid Ahern 8956edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = { 8966edb3c96SDavid Ahern [RTN_UNSPEC] = 0, 8976edb3c96SDavid Ahern [RTN_UNICAST] = 0, 8986edb3c96SDavid Ahern [RTN_LOCAL] = 0, 8996edb3c96SDavid Ahern [RTN_BROADCAST] = 0, 9006edb3c96SDavid Ahern [RTN_ANYCAST] = 0, 9016edb3c96SDavid Ahern [RTN_MULTICAST] = 0, 9026edb3c96SDavid Ahern [RTN_BLACKHOLE] = -EINVAL, 9036edb3c96SDavid Ahern [RTN_UNREACHABLE] = -EHOSTUNREACH, 9046edb3c96SDavid Ahern [RTN_PROHIBIT] = -EACCES, 9056edb3c96SDavid Ahern [RTN_THROW] = -EAGAIN, 9066edb3c96SDavid Ahern [RTN_NAT] = -EINVAL, 9076edb3c96SDavid Ahern [RTN_XRESOLVE] = -EINVAL, 9086edb3c96SDavid Ahern }; 9096edb3c96SDavid Ahern 9106edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type) 9116edb3c96SDavid Ahern { 9126edb3c96SDavid Ahern return fib6_prop[fib6_type]; 9136edb3c96SDavid Ahern } 9146edb3c96SDavid Ahern 9158d1c802bSDavid Ahern static unsigned short fib6_info_dst_flags(struct fib6_info *rt) 9163b6761d1SDavid Ahern { 9173b6761d1SDavid Ahern unsigned short flags = 0; 9183b6761d1SDavid Ahern 9193b6761d1SDavid Ahern if (rt->dst_nocount) 9203b6761d1SDavid Ahern flags |= DST_NOCOUNT; 9213b6761d1SDavid Ahern if (rt->dst_nopolicy) 9223b6761d1SDavid Ahern flags |= DST_NOPOLICY; 9233b6761d1SDavid Ahern if (rt->dst_host) 9243b6761d1SDavid Ahern flags |= DST_HOST; 9253b6761d1SDavid Ahern 9263b6761d1SDavid Ahern return flags; 9273b6761d1SDavid Ahern } 9283b6761d1SDavid Ahern 9298d1c802bSDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) 9306edb3c96SDavid Ahern { 9316edb3c96SDavid Ahern rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); 9326edb3c96SDavid Ahern 9336edb3c96SDavid Ahern switch (ort->fib6_type) { 9346edb3c96SDavid Ahern case RTN_BLACKHOLE: 9356edb3c96SDavid Ahern rt->dst.output = dst_discard_out; 9366edb3c96SDavid Ahern rt->dst.input = dst_discard; 9376edb3c96SDavid Ahern break; 9386edb3c96SDavid Ahern case RTN_PROHIBIT: 9396edb3c96SDavid Ahern rt->dst.output = ip6_pkt_prohibit_out; 9406edb3c96SDavid Ahern rt->dst.input = ip6_pkt_prohibit; 9416edb3c96SDavid Ahern break; 9426edb3c96SDavid Ahern case RTN_THROW: 9436edb3c96SDavid Ahern case RTN_UNREACHABLE: 9446edb3c96SDavid Ahern default: 9456edb3c96SDavid Ahern rt->dst.output = ip6_pkt_discard_out; 9466edb3c96SDavid Ahern rt->dst.input = ip6_pkt_discard; 9476edb3c96SDavid Ahern break; 9486edb3c96SDavid Ahern } 9496edb3c96SDavid Ahern } 9506edb3c96SDavid Ahern 9518d1c802bSDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) 9526edb3c96SDavid Ahern { 95393c2fb25SDavid Ahern if (ort->fib6_flags & RTF_REJECT) { 9546edb3c96SDavid Ahern ip6_rt_init_dst_reject(rt, ort); 9556edb3c96SDavid Ahern return; 9566edb3c96SDavid Ahern } 9576edb3c96SDavid Ahern 9586edb3c96SDavid Ahern rt->dst.error = 0; 9596edb3c96SDavid Ahern rt->dst.output = ip6_output; 9606edb3c96SDavid Ahern 961d23c4b63SHangbin Liu if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { 9626edb3c96SDavid Ahern rt->dst.input = ip6_input; 96393c2fb25SDavid Ahern } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { 9646edb3c96SDavid Ahern rt->dst.input = ip6_mc_input; 9656edb3c96SDavid Ahern } else { 9666edb3c96SDavid Ahern rt->dst.input = ip6_forward; 9676edb3c96SDavid Ahern } 9686edb3c96SDavid Ahern 969ad1601aeSDavid Ahern if (ort->fib6_nh.fib_nh_lws) { 970ad1601aeSDavid Ahern rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws); 9716edb3c96SDavid Ahern lwtunnel_set_redirect(&rt->dst); 9726edb3c96SDavid Ahern } 9736edb3c96SDavid Ahern 9746edb3c96SDavid Ahern rt->dst.lastuse = jiffies; 9756edb3c96SDavid Ahern } 9766edb3c96SDavid Ahern 977e873e4b9SWei Wang /* Caller must already hold reference to @from */ 9788d1c802bSDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from) 979ae90d867SDavid Ahern { 980ae90d867SDavid Ahern rt->rt6i_flags &= ~RTF_EXPIRES; 981a68886a6SDavid Ahern rcu_assign_pointer(rt->from, from); 982e1255ed4SDavid Ahern ip_dst_init_metrics(&rt->dst, from->fib6_metrics); 983ae90d867SDavid Ahern } 984ae90d867SDavid Ahern 985e873e4b9SWei Wang /* Caller must already hold reference to @ort */ 9868d1c802bSDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) 987ae90d867SDavid Ahern { 988dcd1f572SDavid Ahern struct net_device *dev = fib6_info_nh_dev(ort); 989dcd1f572SDavid Ahern 9906edb3c96SDavid Ahern ip6_rt_init_dst(rt, ort); 9916edb3c96SDavid Ahern 99293c2fb25SDavid Ahern rt->rt6i_dst = ort->fib6_dst; 993dcd1f572SDavid Ahern rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; 99493c2fb25SDavid Ahern rt->rt6i_flags = ort->fib6_flags; 995bdf00467SDavid Ahern if (ort->fib6_nh.fib_nh_gw_family) { 996ad1601aeSDavid Ahern rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6; 9972b2450caSDavid Ahern rt->rt6i_flags |= RTF_GATEWAY; 9982b2450caSDavid Ahern } 999ae90d867SDavid Ahern rt6_set_from(rt, ort); 1000ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES 100193c2fb25SDavid Ahern rt->rt6i_src = ort->fib6_src; 1002ae90d867SDavid Ahern #endif 1003ae90d867SDavid Ahern } 1004ae90d867SDavid Ahern 1005a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn, 1006a3c00e46SMartin KaFai Lau struct in6_addr *saddr) 1007a3c00e46SMartin KaFai Lau { 100866f5d6ceSWei Wang struct fib6_node *pn, *sn; 1009a3c00e46SMartin KaFai Lau while (1) { 1010a3c00e46SMartin KaFai Lau if (fn->fn_flags & RTN_TL_ROOT) 1011a3c00e46SMartin KaFai Lau return NULL; 101266f5d6ceSWei Wang pn = rcu_dereference(fn->parent); 101366f5d6ceSWei Wang sn = FIB6_SUBTREE(pn); 101466f5d6ceSWei Wang if (sn && sn != fn) 10156454743bSDavid Ahern fn = fib6_node_lookup(sn, NULL, saddr); 1016a3c00e46SMartin KaFai Lau else 1017a3c00e46SMartin KaFai Lau fn = pn; 1018a3c00e46SMartin KaFai Lau if (fn->fn_flags & RTN_RTINFO) 1019a3c00e46SMartin KaFai Lau return fn; 1020a3c00e46SMartin KaFai Lau } 1021a3c00e46SMartin KaFai Lau } 1022c71099acSThomas Graf 102310585b43SDavid Ahern static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) 1024d3843fe5SWei Wang { 1025d3843fe5SWei Wang struct rt6_info *rt = *prt; 1026d3843fe5SWei Wang 1027d3843fe5SWei Wang if (dst_hold_safe(&rt->dst)) 1028d3843fe5SWei Wang return true; 102910585b43SDavid Ahern if (net) { 1030d3843fe5SWei Wang rt = net->ipv6.ip6_null_entry; 1031d3843fe5SWei Wang dst_hold(&rt->dst); 1032d3843fe5SWei Wang } else { 1033d3843fe5SWei Wang rt = NULL; 1034d3843fe5SWei Wang } 1035d3843fe5SWei Wang *prt = rt; 1036d3843fe5SWei Wang return false; 1037d3843fe5SWei Wang } 1038d3843fe5SWei Wang 1039dec9b0e2SDavid Ahern /* called with rcu_lock held */ 10408d1c802bSDavid Ahern static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) 1041dec9b0e2SDavid Ahern { 10423b6761d1SDavid Ahern unsigned short flags = fib6_info_dst_flags(rt); 1043ad1601aeSDavid Ahern struct net_device *dev = rt->fib6_nh.fib_nh_dev; 1044dec9b0e2SDavid Ahern struct rt6_info *nrt; 1045dec9b0e2SDavid Ahern 1046e873e4b9SWei Wang if (!fib6_info_hold_safe(rt)) 10471c87e79aSXin Long goto fallback; 1048e873e4b9SWei Wang 104993531c67SDavid Ahern nrt = ip6_dst_alloc(dev_net(dev), dev, flags); 10501c87e79aSXin Long if (!nrt) { 1051e873e4b9SWei Wang fib6_info_release(rt); 10521c87e79aSXin Long goto fallback; 10531c87e79aSXin Long } 1054dec9b0e2SDavid Ahern 10551c87e79aSXin Long ip6_rt_copy_init(nrt, rt); 10561c87e79aSXin Long return nrt; 10571c87e79aSXin Long 10581c87e79aSXin Long fallback: 10591c87e79aSXin Long nrt = dev_net(dev)->ipv6.ip6_null_entry; 10601c87e79aSXin Long dst_hold(&nrt->dst); 1061dec9b0e2SDavid Ahern return nrt; 1062dec9b0e2SDavid Ahern } 1063dec9b0e2SDavid Ahern 10648ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net, 10658ed67789SDaniel Lezcano struct fib6_table *table, 1066b75cc8f9SDavid Ahern struct flowi6 *fl6, 1067b75cc8f9SDavid Ahern const struct sk_buff *skb, 1068b75cc8f9SDavid Ahern int flags) 10691da177e4SLinus Torvalds { 1070*b1d40991SDavid Ahern struct fib6_result res = {}; 10711da177e4SLinus Torvalds struct fib6_node *fn; 107223fb93a4SDavid Ahern struct rt6_info *rt; 10731da177e4SLinus Torvalds 1074b6cdbc85SDavid Ahern if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 1075b6cdbc85SDavid Ahern flags &= ~RT6_LOOKUP_F_IFACE; 1076b6cdbc85SDavid Ahern 107766f5d6ceSWei Wang rcu_read_lock(); 10786454743bSDavid Ahern fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1079c71099acSThomas Graf restart: 1080*b1d40991SDavid Ahern res.f6i = rcu_dereference(fn->leaf); 1081*b1d40991SDavid Ahern if (!res.f6i) 1082*b1d40991SDavid Ahern res.f6i = net->ipv6.fib6_null_entry; 1083af52a52cSDavid Ahern else 1084*b1d40991SDavid Ahern res.f6i = rt6_device_match(net, res.f6i, &fl6->saddr, 108566f5d6ceSWei Wang fl6->flowi6_oif, flags); 1086af52a52cSDavid Ahern 1087*b1d40991SDavid Ahern if (res.f6i == net->ipv6.fib6_null_entry) { 1088a3c00e46SMartin KaFai Lau fn = fib6_backtrack(fn, &fl6->saddr); 1089a3c00e46SMartin KaFai Lau if (fn) 1090a3c00e46SMartin KaFai Lau goto restart; 1091af52a52cSDavid Ahern 1092af52a52cSDavid Ahern rt = net->ipv6.ip6_null_entry; 1093af52a52cSDavid Ahern dst_hold(&rt->dst); 1094af52a52cSDavid Ahern goto out; 1095a3c00e46SMartin KaFai Lau } 10962b760fcfSWei Wang 1097*b1d40991SDavid Ahern fib6_select_path(net, &res, fl6, fl6->flowi6_oif, 1098*b1d40991SDavid Ahern fl6->flowi6_oif != 0, skb, flags); 1099*b1d40991SDavid Ahern 11004c9483b2SDavid S. Miller /* Search through exception table */ 1101*b1d40991SDavid Ahern rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr); 110223fb93a4SDavid Ahern if (rt) { 110310585b43SDavid Ahern if (ip6_hold_safe(net, &rt)) 1104d3843fe5SWei Wang dst_use_noref(&rt->dst, jiffies); 110523fb93a4SDavid Ahern } else { 1106*b1d40991SDavid Ahern rt = ip6_create_rt_rcu(res.f6i); 1107dec9b0e2SDavid Ahern } 1108d3843fe5SWei Wang 1109af52a52cSDavid Ahern out: 1110*b1d40991SDavid Ahern trace_fib6_table_lookup(net, res.f6i, table, fl6); 1111af52a52cSDavid Ahern 111266f5d6ceSWei Wang rcu_read_unlock(); 1113b811580dSDavid Ahern 11141da177e4SLinus Torvalds return rt; 1115c71099acSThomas Graf } 1116c71099acSThomas Graf 1117ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, 1118b75cc8f9SDavid Ahern const struct sk_buff *skb, int flags) 1119ea6e574eSFlorian Westphal { 1120b75cc8f9SDavid Ahern return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup); 1121ea6e574eSFlorian Westphal } 1122ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup); 1123ea6e574eSFlorian Westphal 11249acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, 1125b75cc8f9SDavid Ahern const struct in6_addr *saddr, int oif, 1126b75cc8f9SDavid Ahern const struct sk_buff *skb, int strict) 1127c71099acSThomas Graf { 11284c9483b2SDavid S. Miller struct flowi6 fl6 = { 11294c9483b2SDavid S. Miller .flowi6_oif = oif, 11304c9483b2SDavid S. Miller .daddr = *daddr, 1131c71099acSThomas Graf }; 1132c71099acSThomas Graf struct dst_entry *dst; 113377d16f45SYOSHIFUJI Hideaki int flags = strict ? RT6_LOOKUP_F_IFACE : 0; 1134c71099acSThomas Graf 1135adaa70bbSThomas Graf if (saddr) { 11364c9483b2SDavid S. Miller memcpy(&fl6.saddr, saddr, sizeof(*saddr)); 1137adaa70bbSThomas Graf flags |= RT6_LOOKUP_F_HAS_SADDR; 1138adaa70bbSThomas Graf } 1139adaa70bbSThomas Graf 1140b75cc8f9SDavid Ahern dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup); 1141c71099acSThomas Graf if (dst->error == 0) 1142c71099acSThomas Graf return (struct rt6_info *) dst; 1143c71099acSThomas Graf 1144c71099acSThomas Graf dst_release(dst); 1145c71099acSThomas Graf 11461da177e4SLinus Torvalds return NULL; 11471da177e4SLinus Torvalds } 11487159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup); 11497159039aSYOSHIFUJI Hideaki 1150c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock. 11511cfb71eeSWei Wang * It takes new route entry, the addition fails by any reason the 11521cfb71eeSWei Wang * route is released. 11531cfb71eeSWei Wang * Caller must hold dst before calling it. 11541da177e4SLinus Torvalds */ 11551da177e4SLinus Torvalds 11568d1c802bSDavid Ahern static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info, 1157333c4301SDavid Ahern struct netlink_ext_ack *extack) 11581da177e4SLinus Torvalds { 11591da177e4SLinus Torvalds int err; 1160c71099acSThomas Graf struct fib6_table *table; 11611da177e4SLinus Torvalds 116293c2fb25SDavid Ahern table = rt->fib6_table; 116366f5d6ceSWei Wang spin_lock_bh(&table->tb6_lock); 1164d4ead6b3SDavid Ahern err = fib6_add(&table->tb6_root, rt, info, extack); 116566f5d6ceSWei Wang spin_unlock_bh(&table->tb6_lock); 11661da177e4SLinus Torvalds 11671da177e4SLinus Torvalds return err; 11681da177e4SLinus Torvalds } 11691da177e4SLinus Torvalds 11708d1c802bSDavid Ahern int ip6_ins_rt(struct net *net, struct fib6_info *rt) 117140e22e8fSThomas Graf { 1172afb1d4b5SDavid Ahern struct nl_info info = { .nl_net = net, }; 1173e715b6d3SFlorian Westphal 1174d4ead6b3SDavid Ahern return __ip6_ins_rt(rt, &info, NULL); 117540e22e8fSThomas Graf } 117640e22e8fSThomas Graf 11778d1c802bSDavid Ahern static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, 117821efcfa0SEric Dumazet const struct in6_addr *daddr, 1179b71d1d42SEric Dumazet const struct in6_addr *saddr) 11801da177e4SLinus Torvalds { 11814832c30dSDavid Ahern struct net_device *dev; 11821da177e4SLinus Torvalds struct rt6_info *rt; 11831da177e4SLinus Torvalds 11841da177e4SLinus Torvalds /* 11851da177e4SLinus Torvalds * Clone the route. 11861da177e4SLinus Torvalds */ 11871da177e4SLinus Torvalds 1188e873e4b9SWei Wang if (!fib6_info_hold_safe(ort)) 1189e873e4b9SWei Wang return NULL; 1190e873e4b9SWei Wang 11914832c30dSDavid Ahern dev = ip6_rt_get_dev_rcu(ort); 119293531c67SDavid Ahern rt = ip6_dst_alloc(dev_net(dev), dev, 0); 1193e873e4b9SWei Wang if (!rt) { 1194e873e4b9SWei Wang fib6_info_release(ort); 119583a09abdSMartin KaFai Lau return NULL; 1196e873e4b9SWei Wang } 119783a09abdSMartin KaFai Lau 119883a09abdSMartin KaFai Lau ip6_rt_copy_init(rt, ort); 11998b9df265SMartin KaFai Lau rt->rt6i_flags |= RTF_CACHE; 120083a09abdSMartin KaFai Lau rt->dst.flags |= DST_HOST; 120183a09abdSMartin KaFai Lau rt->rt6i_dst.addr = *daddr; 120283a09abdSMartin KaFai Lau rt->rt6i_dst.plen = 128; 12038b9df265SMartin KaFai Lau 12048b9df265SMartin KaFai Lau if (!rt6_is_gw_or_nonexthop(ort)) { 120593c2fb25SDavid Ahern if (ort->fib6_dst.plen != 128 && 120693c2fb25SDavid Ahern ipv6_addr_equal(&ort->fib6_dst.addr, daddr)) 120758c4fb86SYOSHIFUJI Hideaki rt->rt6i_flags |= RTF_ANYCAST; 12081da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 12091da177e4SLinus Torvalds if (rt->rt6i_src.plen && saddr) { 12104e3fd7a0SAlexey Dobriyan rt->rt6i_src.addr = *saddr; 12111da177e4SLinus Torvalds rt->rt6i_src.plen = 128; 12121da177e4SLinus Torvalds } 12131da177e4SLinus Torvalds #endif 121495a9a5baSYOSHIFUJI Hideaki } 121595a9a5baSYOSHIFUJI Hideaki 1216299d9939SYOSHIFUJI Hideaki return rt; 1217299d9939SYOSHIFUJI Hideaki } 1218299d9939SYOSHIFUJI Hideaki 12198d1c802bSDavid Ahern static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) 1220d52d3997SMartin KaFai Lau { 12213b6761d1SDavid Ahern unsigned short flags = fib6_info_dst_flags(rt); 12224832c30dSDavid Ahern struct net_device *dev; 1223d52d3997SMartin KaFai Lau struct rt6_info *pcpu_rt; 1224d52d3997SMartin KaFai Lau 1225e873e4b9SWei Wang if (!fib6_info_hold_safe(rt)) 1226e873e4b9SWei Wang return NULL; 1227e873e4b9SWei Wang 12284832c30dSDavid Ahern rcu_read_lock(); 12294832c30dSDavid Ahern dev = ip6_rt_get_dev_rcu(rt); 123093531c67SDavid Ahern pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); 12314832c30dSDavid Ahern rcu_read_unlock(); 1232e873e4b9SWei Wang if (!pcpu_rt) { 1233e873e4b9SWei Wang fib6_info_release(rt); 1234d52d3997SMartin KaFai Lau return NULL; 1235e873e4b9SWei Wang } 1236d52d3997SMartin KaFai Lau ip6_rt_copy_init(pcpu_rt, rt); 1237d52d3997SMartin KaFai Lau pcpu_rt->rt6i_flags |= RTF_PCPU; 1238d52d3997SMartin KaFai Lau return pcpu_rt; 1239d52d3997SMartin KaFai Lau } 1240d52d3997SMartin KaFai Lau 124166f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */ 12428d1c802bSDavid Ahern static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) 1243d52d3997SMartin KaFai Lau { 1244a73e4195SMartin KaFai Lau struct rt6_info *pcpu_rt, **p; 1245d52d3997SMartin KaFai Lau 1246d52d3997SMartin KaFai Lau p = this_cpu_ptr(rt->rt6i_pcpu); 1247d52d3997SMartin KaFai Lau pcpu_rt = *p; 1248d52d3997SMartin KaFai Lau 1249d4ead6b3SDavid Ahern if (pcpu_rt) 125010585b43SDavid Ahern ip6_hold_safe(NULL, &pcpu_rt); 1251d3843fe5SWei Wang 1252a73e4195SMartin KaFai Lau return pcpu_rt; 1253a73e4195SMartin KaFai Lau } 1254a73e4195SMartin KaFai Lau 1255afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net, 12568d1c802bSDavid Ahern struct fib6_info *rt) 1257a73e4195SMartin KaFai Lau { 1258a73e4195SMartin KaFai Lau struct rt6_info *pcpu_rt, *prev, **p; 1259d52d3997SMartin KaFai Lau 1260d52d3997SMartin KaFai Lau pcpu_rt = ip6_rt_pcpu_alloc(rt); 1261d52d3997SMartin KaFai Lau if (!pcpu_rt) { 12629c7370a1SMartin KaFai Lau dst_hold(&net->ipv6.ip6_null_entry->dst); 12639c7370a1SMartin KaFai Lau return net->ipv6.ip6_null_entry; 1264d52d3997SMartin KaFai Lau } 1265d52d3997SMartin KaFai Lau 1266a94b9367SWei Wang dst_hold(&pcpu_rt->dst); 1267a73e4195SMartin KaFai Lau p = this_cpu_ptr(rt->rt6i_pcpu); 1268d52d3997SMartin KaFai Lau prev = cmpxchg(p, NULL, pcpu_rt); 1269951f788aSEric Dumazet BUG_ON(prev); 1270a94b9367SWei Wang 1271d52d3997SMartin KaFai Lau return pcpu_rt; 1272d52d3997SMartin KaFai Lau } 1273d52d3997SMartin KaFai Lau 127435732d01SWei Wang /* exception hash table implementation 127535732d01SWei Wang */ 127635732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock); 127735732d01SWei Wang 127835732d01SWei Wang /* Remove rt6_ex from hash table and free the memory 127935732d01SWei Wang * Caller must hold rt6_exception_lock 128035732d01SWei Wang */ 128135732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket, 128235732d01SWei Wang struct rt6_exception *rt6_ex) 128335732d01SWei Wang { 1284f5b51fe8SPaolo Abeni struct fib6_info *from; 1285b2427e67SColin Ian King struct net *net; 128681eb8447SWei Wang 128735732d01SWei Wang if (!bucket || !rt6_ex) 128835732d01SWei Wang return; 1289b2427e67SColin Ian King 1290b2427e67SColin Ian King net = dev_net(rt6_ex->rt6i->dst.dev); 1291f5b51fe8SPaolo Abeni net->ipv6.rt6_stats->fib_rt_cache--; 1292f5b51fe8SPaolo Abeni 1293f5b51fe8SPaolo Abeni /* purge completely the exception to allow releasing the held resources: 1294f5b51fe8SPaolo Abeni * some [sk] cache may keep the dst around for unlimited time 1295f5b51fe8SPaolo Abeni */ 1296f5b51fe8SPaolo Abeni from = rcu_dereference_protected(rt6_ex->rt6i->from, 1297f5b51fe8SPaolo Abeni lockdep_is_held(&rt6_exception_lock)); 1298f5b51fe8SPaolo Abeni rcu_assign_pointer(rt6_ex->rt6i->from, NULL); 1299f5b51fe8SPaolo Abeni fib6_info_release(from); 1300f5b51fe8SPaolo Abeni dst_dev_put(&rt6_ex->rt6i->dst); 1301f5b51fe8SPaolo Abeni 130235732d01SWei Wang hlist_del_rcu(&rt6_ex->hlist); 130377634cc6SDavid Ahern dst_release(&rt6_ex->rt6i->dst); 130435732d01SWei Wang kfree_rcu(rt6_ex, rcu); 130535732d01SWei Wang WARN_ON_ONCE(!bucket->depth); 130635732d01SWei Wang bucket->depth--; 130735732d01SWei Wang } 130835732d01SWei Wang 130935732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory 131035732d01SWei Wang * Caller must hold rt6_exception_lock 131135732d01SWei Wang */ 131235732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) 131335732d01SWei Wang { 131435732d01SWei Wang struct rt6_exception *rt6_ex, *oldest = NULL; 131535732d01SWei Wang 131635732d01SWei Wang if (!bucket) 131735732d01SWei Wang return; 131835732d01SWei Wang 131935732d01SWei Wang hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 132035732d01SWei Wang if (!oldest || time_before(rt6_ex->stamp, oldest->stamp)) 132135732d01SWei Wang oldest = rt6_ex; 132235732d01SWei Wang } 132335732d01SWei Wang rt6_remove_exception(bucket, oldest); 132435732d01SWei Wang } 132535732d01SWei Wang 132635732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst, 132735732d01SWei Wang const struct in6_addr *src) 132835732d01SWei Wang { 132935732d01SWei Wang static u32 seed __read_mostly; 133035732d01SWei Wang u32 val; 133135732d01SWei Wang 133235732d01SWei Wang net_get_random_once(&seed, sizeof(seed)); 133335732d01SWei Wang val = jhash(dst, sizeof(*dst), seed); 133435732d01SWei Wang 133535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 133635732d01SWei Wang if (src) 133735732d01SWei Wang val = jhash(src, sizeof(*src), val); 133835732d01SWei Wang #endif 133935732d01SWei Wang return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); 134035732d01SWei Wang } 134135732d01SWei Wang 134235732d01SWei Wang /* Helper function to find the cached rt in the hash table 134335732d01SWei Wang * and update bucket pointer to point to the bucket for this 134435732d01SWei Wang * (daddr, saddr) pair 134535732d01SWei Wang * Caller must hold rt6_exception_lock 134635732d01SWei Wang */ 134735732d01SWei Wang static struct rt6_exception * 134835732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket, 134935732d01SWei Wang const struct in6_addr *daddr, 135035732d01SWei Wang const struct in6_addr *saddr) 135135732d01SWei Wang { 135235732d01SWei Wang struct rt6_exception *rt6_ex; 135335732d01SWei Wang u32 hval; 135435732d01SWei Wang 135535732d01SWei Wang if (!(*bucket) || !daddr) 135635732d01SWei Wang return NULL; 135735732d01SWei Wang 135835732d01SWei Wang hval = rt6_exception_hash(daddr, saddr); 135935732d01SWei Wang *bucket += hval; 136035732d01SWei Wang 136135732d01SWei Wang hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) { 136235732d01SWei Wang struct rt6_info *rt6 = rt6_ex->rt6i; 136335732d01SWei Wang bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 136435732d01SWei Wang 136535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 136635732d01SWei Wang if (matched && saddr) 136735732d01SWei Wang matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 136835732d01SWei Wang #endif 136935732d01SWei Wang if (matched) 137035732d01SWei Wang return rt6_ex; 137135732d01SWei Wang } 137235732d01SWei Wang return NULL; 137335732d01SWei Wang } 137435732d01SWei Wang 137535732d01SWei Wang /* Helper function to find the cached rt in the hash table 137635732d01SWei Wang * and update bucket pointer to point to the bucket for this 137735732d01SWei Wang * (daddr, saddr) pair 137835732d01SWei Wang * Caller must hold rcu_read_lock() 137935732d01SWei Wang */ 138035732d01SWei Wang static struct rt6_exception * 138135732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, 138235732d01SWei Wang const struct in6_addr *daddr, 138335732d01SWei Wang const struct in6_addr *saddr) 138435732d01SWei Wang { 138535732d01SWei Wang struct rt6_exception *rt6_ex; 138635732d01SWei Wang u32 hval; 138735732d01SWei Wang 138835732d01SWei Wang WARN_ON_ONCE(!rcu_read_lock_held()); 138935732d01SWei Wang 139035732d01SWei Wang if (!(*bucket) || !daddr) 139135732d01SWei Wang return NULL; 139235732d01SWei Wang 139335732d01SWei Wang hval = rt6_exception_hash(daddr, saddr); 139435732d01SWei Wang *bucket += hval; 139535732d01SWei Wang 139635732d01SWei Wang hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) { 139735732d01SWei Wang struct rt6_info *rt6 = rt6_ex->rt6i; 139835732d01SWei Wang bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); 139935732d01SWei Wang 140035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 140135732d01SWei Wang if (matched && saddr) 140235732d01SWei Wang matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); 140335732d01SWei Wang #endif 140435732d01SWei Wang if (matched) 140535732d01SWei Wang return rt6_ex; 140635732d01SWei Wang } 140735732d01SWei Wang return NULL; 140835732d01SWei Wang } 140935732d01SWei Wang 14108d1c802bSDavid Ahern static unsigned int fib6_mtu(const struct fib6_info *rt) 141135732d01SWei Wang { 1412d4ead6b3SDavid Ahern unsigned int mtu; 1413d4ead6b3SDavid Ahern 1414dcd1f572SDavid Ahern if (rt->fib6_pmtu) { 1415dcd1f572SDavid Ahern mtu = rt->fib6_pmtu; 1416dcd1f572SDavid Ahern } else { 1417dcd1f572SDavid Ahern struct net_device *dev = fib6_info_nh_dev(rt); 1418dcd1f572SDavid Ahern struct inet6_dev *idev; 1419dcd1f572SDavid Ahern 1420dcd1f572SDavid Ahern rcu_read_lock(); 1421dcd1f572SDavid Ahern idev = __in6_dev_get(dev); 1422dcd1f572SDavid Ahern mtu = idev->cnf.mtu6; 1423dcd1f572SDavid Ahern rcu_read_unlock(); 1424dcd1f572SDavid Ahern } 1425dcd1f572SDavid Ahern 1426d4ead6b3SDavid Ahern mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 1427d4ead6b3SDavid Ahern 1428ad1601aeSDavid Ahern return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu); 1429d4ead6b3SDavid Ahern } 1430d4ead6b3SDavid Ahern 143135732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt, 14328d1c802bSDavid Ahern struct fib6_info *ort) 143335732d01SWei Wang { 14345e670d84SDavid Ahern struct net *net = dev_net(nrt->dst.dev); 143535732d01SWei Wang struct rt6_exception_bucket *bucket; 143635732d01SWei Wang struct in6_addr *src_key = NULL; 143735732d01SWei Wang struct rt6_exception *rt6_ex; 143835732d01SWei Wang int err = 0; 143935732d01SWei Wang 144035732d01SWei Wang spin_lock_bh(&rt6_exception_lock); 144135732d01SWei Wang 144235732d01SWei Wang if (ort->exception_bucket_flushed) { 144335732d01SWei Wang err = -EINVAL; 144435732d01SWei Wang goto out; 144535732d01SWei Wang } 144635732d01SWei Wang 144735732d01SWei Wang bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, 144835732d01SWei Wang lockdep_is_held(&rt6_exception_lock)); 144935732d01SWei Wang if (!bucket) { 145035732d01SWei Wang bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), 145135732d01SWei Wang GFP_ATOMIC); 145235732d01SWei Wang if (!bucket) { 145335732d01SWei Wang err = -ENOMEM; 145435732d01SWei Wang goto out; 145535732d01SWei Wang } 145635732d01SWei Wang rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); 145735732d01SWei Wang } 145835732d01SWei Wang 145935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 146035732d01SWei Wang /* rt6i_src.plen != 0 indicates ort is in subtree 146135732d01SWei Wang * and exception table is indexed by a hash of 146235732d01SWei Wang * both rt6i_dst and rt6i_src. 146335732d01SWei Wang * Otherwise, the exception table is indexed by 146435732d01SWei Wang * a hash of only rt6i_dst. 146535732d01SWei Wang */ 146693c2fb25SDavid Ahern if (ort->fib6_src.plen) 146735732d01SWei Wang src_key = &nrt->rt6i_src.addr; 146835732d01SWei Wang #endif 1469f5bbe7eeSWei Wang /* rt6_mtu_change() might lower mtu on ort. 1470f5bbe7eeSWei Wang * Only insert this exception route if its mtu 1471f5bbe7eeSWei Wang * is less than ort's mtu value. 1472f5bbe7eeSWei Wang */ 1473d4ead6b3SDavid Ahern if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { 1474f5bbe7eeSWei Wang err = -EINVAL; 1475f5bbe7eeSWei Wang goto out; 1476f5bbe7eeSWei Wang } 147760006a48SWei Wang 147835732d01SWei Wang rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, 147935732d01SWei Wang src_key); 148035732d01SWei Wang if (rt6_ex) 148135732d01SWei Wang rt6_remove_exception(bucket, rt6_ex); 148235732d01SWei Wang 148335732d01SWei Wang rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC); 148435732d01SWei Wang if (!rt6_ex) { 148535732d01SWei Wang err = -ENOMEM; 148635732d01SWei Wang goto out; 148735732d01SWei Wang } 148835732d01SWei Wang rt6_ex->rt6i = nrt; 148935732d01SWei Wang rt6_ex->stamp = jiffies; 149035732d01SWei Wang hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); 149135732d01SWei Wang bucket->depth++; 149281eb8447SWei Wang net->ipv6.rt6_stats->fib_rt_cache++; 149335732d01SWei Wang 149435732d01SWei Wang if (bucket->depth > FIB6_MAX_DEPTH) 149535732d01SWei Wang rt6_exception_remove_oldest(bucket); 149635732d01SWei Wang 149735732d01SWei Wang out: 149835732d01SWei Wang spin_unlock_bh(&rt6_exception_lock); 149935732d01SWei Wang 150035732d01SWei Wang /* Update fn->fn_sernum to invalidate all cached dst */ 1501b886d5f2SPaolo Abeni if (!err) { 150293c2fb25SDavid Ahern spin_lock_bh(&ort->fib6_table->tb6_lock); 15037aef6859SDavid Ahern fib6_update_sernum(net, ort); 150493c2fb25SDavid Ahern spin_unlock_bh(&ort->fib6_table->tb6_lock); 1505b886d5f2SPaolo Abeni fib6_force_start_gc(net); 1506b886d5f2SPaolo Abeni } 150735732d01SWei Wang 150835732d01SWei Wang return err; 150935732d01SWei Wang } 151035732d01SWei Wang 15118d1c802bSDavid Ahern void rt6_flush_exceptions(struct fib6_info *rt) 151235732d01SWei Wang { 151335732d01SWei Wang struct rt6_exception_bucket *bucket; 151435732d01SWei Wang struct rt6_exception *rt6_ex; 151535732d01SWei Wang struct hlist_node *tmp; 151635732d01SWei Wang int i; 151735732d01SWei Wang 151835732d01SWei Wang spin_lock_bh(&rt6_exception_lock); 151935732d01SWei Wang /* Prevent rt6_insert_exception() to recreate the bucket list */ 152035732d01SWei Wang rt->exception_bucket_flushed = 1; 152135732d01SWei Wang 152235732d01SWei Wang bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 152335732d01SWei Wang lockdep_is_held(&rt6_exception_lock)); 152435732d01SWei Wang if (!bucket) 152535732d01SWei Wang goto out; 152635732d01SWei Wang 152735732d01SWei Wang for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 152835732d01SWei Wang hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) 152935732d01SWei Wang rt6_remove_exception(bucket, rt6_ex); 153035732d01SWei Wang WARN_ON_ONCE(bucket->depth); 153135732d01SWei Wang bucket++; 153235732d01SWei Wang } 153335732d01SWei Wang 153435732d01SWei Wang out: 153535732d01SWei Wang spin_unlock_bh(&rt6_exception_lock); 153635732d01SWei Wang } 153735732d01SWei Wang 153835732d01SWei Wang /* Find cached rt in the hash table inside passed in rt 153935732d01SWei Wang * Caller has to hold rcu_read_lock() 154035732d01SWei Wang */ 15418d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, 154235732d01SWei Wang struct in6_addr *daddr, 154335732d01SWei Wang struct in6_addr *saddr) 154435732d01SWei Wang { 154535732d01SWei Wang struct rt6_exception_bucket *bucket; 154635732d01SWei Wang struct in6_addr *src_key = NULL; 154735732d01SWei Wang struct rt6_exception *rt6_ex; 154835732d01SWei Wang struct rt6_info *res = NULL; 154935732d01SWei Wang 155035732d01SWei Wang bucket = rcu_dereference(rt->rt6i_exception_bucket); 155135732d01SWei Wang 155235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 155335732d01SWei Wang /* rt6i_src.plen != 0 indicates rt is in subtree 155435732d01SWei Wang * and exception table is indexed by a hash of 155535732d01SWei Wang * both rt6i_dst and rt6i_src. 155635732d01SWei Wang * Otherwise, the exception table is indexed by 155735732d01SWei Wang * a hash of only rt6i_dst. 155835732d01SWei Wang */ 155993c2fb25SDavid Ahern if (rt->fib6_src.plen) 156035732d01SWei Wang src_key = saddr; 156135732d01SWei Wang #endif 156235732d01SWei Wang rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 156335732d01SWei Wang 156435732d01SWei Wang if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) 156535732d01SWei Wang res = rt6_ex->rt6i; 156635732d01SWei Wang 156735732d01SWei Wang return res; 156835732d01SWei Wang } 156935732d01SWei Wang 157035732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */ 157123fb93a4SDavid Ahern static int rt6_remove_exception_rt(struct rt6_info *rt) 157235732d01SWei Wang { 157335732d01SWei Wang struct rt6_exception_bucket *bucket; 157435732d01SWei Wang struct in6_addr *src_key = NULL; 157535732d01SWei Wang struct rt6_exception *rt6_ex; 15768a14e46fSDavid Ahern struct fib6_info *from; 157735732d01SWei Wang int err; 157835732d01SWei Wang 1579091311deSEric Dumazet from = rcu_dereference(rt->from); 158035732d01SWei Wang if (!from || 1581442d713bSColin Ian King !(rt->rt6i_flags & RTF_CACHE)) 158235732d01SWei Wang return -EINVAL; 158335732d01SWei Wang 158435732d01SWei Wang if (!rcu_access_pointer(from->rt6i_exception_bucket)) 158535732d01SWei Wang return -ENOENT; 158635732d01SWei Wang 158735732d01SWei Wang spin_lock_bh(&rt6_exception_lock); 158835732d01SWei Wang bucket = rcu_dereference_protected(from->rt6i_exception_bucket, 158935732d01SWei Wang lockdep_is_held(&rt6_exception_lock)); 159035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 159135732d01SWei Wang /* rt6i_src.plen != 0 indicates 'from' is in subtree 159235732d01SWei Wang * and exception table is indexed by a hash of 159335732d01SWei Wang * both rt6i_dst and rt6i_src. 159435732d01SWei Wang * Otherwise, the exception table is indexed by 159535732d01SWei Wang * a hash of only rt6i_dst. 159635732d01SWei Wang */ 159793c2fb25SDavid Ahern if (from->fib6_src.plen) 159835732d01SWei Wang src_key = &rt->rt6i_src.addr; 159935732d01SWei Wang #endif 160035732d01SWei Wang rt6_ex = __rt6_find_exception_spinlock(&bucket, 160135732d01SWei Wang &rt->rt6i_dst.addr, 160235732d01SWei Wang src_key); 160335732d01SWei Wang if (rt6_ex) { 160435732d01SWei Wang rt6_remove_exception(bucket, rt6_ex); 160535732d01SWei Wang err = 0; 160635732d01SWei Wang } else { 160735732d01SWei Wang err = -ENOENT; 160835732d01SWei Wang } 160935732d01SWei Wang 161035732d01SWei Wang spin_unlock_bh(&rt6_exception_lock); 161135732d01SWei Wang return err; 161235732d01SWei Wang } 161335732d01SWei Wang 161435732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and 161535732d01SWei Wang * refresh its stamp 161635732d01SWei Wang */ 161735732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt) 161835732d01SWei Wang { 161935732d01SWei Wang struct rt6_exception_bucket *bucket; 162035732d01SWei Wang struct in6_addr *src_key = NULL; 162135732d01SWei Wang struct rt6_exception *rt6_ex; 1622193f3685SPaolo Abeni struct fib6_info *from; 162335732d01SWei Wang 162435732d01SWei Wang rcu_read_lock(); 1625193f3685SPaolo Abeni from = rcu_dereference(rt->from); 1626193f3685SPaolo Abeni if (!from || !(rt->rt6i_flags & RTF_CACHE)) 1627193f3685SPaolo Abeni goto unlock; 1628193f3685SPaolo Abeni 162935732d01SWei Wang bucket = rcu_dereference(from->rt6i_exception_bucket); 163035732d01SWei Wang 163135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES 163235732d01SWei Wang /* rt6i_src.plen != 0 indicates 'from' is in subtree 163335732d01SWei Wang * and exception table is indexed by a hash of 163435732d01SWei Wang * both rt6i_dst and rt6i_src. 163535732d01SWei Wang * Otherwise, the exception table is indexed by 163635732d01SWei Wang * a hash of only rt6i_dst. 163735732d01SWei Wang */ 163893c2fb25SDavid Ahern if (from->fib6_src.plen) 163935732d01SWei Wang src_key = &rt->rt6i_src.addr; 164035732d01SWei Wang #endif 164135732d01SWei Wang rt6_ex = __rt6_find_exception_rcu(&bucket, 164235732d01SWei Wang &rt->rt6i_dst.addr, 164335732d01SWei Wang src_key); 164435732d01SWei Wang if (rt6_ex) 164535732d01SWei Wang rt6_ex->stamp = jiffies; 164635732d01SWei Wang 1647193f3685SPaolo Abeni unlock: 164835732d01SWei Wang rcu_read_unlock(); 164935732d01SWei Wang } 165035732d01SWei Wang 1651e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev, 1652e9fa1495SStefano Brivio struct rt6_info *rt, int mtu) 1653e9fa1495SStefano Brivio { 1654e9fa1495SStefano Brivio /* If the new MTU is lower than the route PMTU, this new MTU will be the 1655e9fa1495SStefano Brivio * lowest MTU in the path: always allow updating the route PMTU to 1656e9fa1495SStefano Brivio * reflect PMTU decreases. 1657e9fa1495SStefano Brivio * 1658e9fa1495SStefano Brivio * If the new MTU is higher, and the route PMTU is equal to the local 1659e9fa1495SStefano Brivio * MTU, this means the old MTU is the lowest in the path, so allow 1660e9fa1495SStefano Brivio * updating it: if other nodes now have lower MTUs, PMTU discovery will 1661e9fa1495SStefano Brivio * handle this. 1662e9fa1495SStefano Brivio */ 1663e9fa1495SStefano Brivio 1664e9fa1495SStefano Brivio if (dst_mtu(&rt->dst) >= mtu) 1665e9fa1495SStefano Brivio return true; 1666e9fa1495SStefano Brivio 1667e9fa1495SStefano Brivio if (dst_mtu(&rt->dst) == idev->cnf.mtu6) 1668e9fa1495SStefano Brivio return true; 1669e9fa1495SStefano Brivio 1670e9fa1495SStefano Brivio return false; 1671e9fa1495SStefano Brivio } 1672e9fa1495SStefano Brivio 1673e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev, 16748d1c802bSDavid Ahern struct fib6_info *rt, int mtu) 1675f5bbe7eeSWei Wang { 1676f5bbe7eeSWei Wang struct rt6_exception_bucket *bucket; 1677f5bbe7eeSWei Wang struct rt6_exception *rt6_ex; 1678f5bbe7eeSWei Wang int i; 1679f5bbe7eeSWei Wang 1680f5bbe7eeSWei Wang bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1681f5bbe7eeSWei Wang lockdep_is_held(&rt6_exception_lock)); 1682f5bbe7eeSWei Wang 1683e9fa1495SStefano Brivio if (!bucket) 1684e9fa1495SStefano Brivio return; 1685e9fa1495SStefano Brivio 1686f5bbe7eeSWei Wang for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1687f5bbe7eeSWei Wang hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { 1688f5bbe7eeSWei Wang struct rt6_info *entry = rt6_ex->rt6i; 1689e9fa1495SStefano Brivio 1690e9fa1495SStefano Brivio /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected 1691d4ead6b3SDavid Ahern * route), the metrics of its rt->from have already 1692f5bbe7eeSWei Wang * been updated. 1693f5bbe7eeSWei Wang */ 1694d4ead6b3SDavid Ahern if (dst_metric_raw(&entry->dst, RTAX_MTU) && 1695e9fa1495SStefano Brivio rt6_mtu_change_route_allowed(idev, entry, mtu)) 1696d4ead6b3SDavid Ahern dst_metric_set(&entry->dst, RTAX_MTU, mtu); 1697f5bbe7eeSWei Wang } 1698f5bbe7eeSWei Wang bucket++; 1699f5bbe7eeSWei Wang } 1700f5bbe7eeSWei Wang } 1701f5bbe7eeSWei Wang 1702b16cb459SWei Wang #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) 1703b16cb459SWei Wang 17048d1c802bSDavid Ahern static void rt6_exceptions_clean_tohost(struct fib6_info *rt, 1705b16cb459SWei Wang struct in6_addr *gateway) 1706b16cb459SWei Wang { 1707b16cb459SWei Wang struct rt6_exception_bucket *bucket; 1708b16cb459SWei Wang struct rt6_exception *rt6_ex; 1709b16cb459SWei Wang struct hlist_node *tmp; 1710b16cb459SWei Wang int i; 1711b16cb459SWei Wang 1712b16cb459SWei Wang if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1713b16cb459SWei Wang return; 1714b16cb459SWei Wang 1715b16cb459SWei Wang spin_lock_bh(&rt6_exception_lock); 1716b16cb459SWei Wang bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1717b16cb459SWei Wang lockdep_is_held(&rt6_exception_lock)); 1718b16cb459SWei Wang 1719b16cb459SWei Wang if (bucket) { 1720b16cb459SWei Wang for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1721b16cb459SWei Wang hlist_for_each_entry_safe(rt6_ex, tmp, 1722b16cb459SWei Wang &bucket->chain, hlist) { 1723b16cb459SWei Wang struct rt6_info *entry = rt6_ex->rt6i; 1724b16cb459SWei Wang 1725b16cb459SWei Wang if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) == 1726b16cb459SWei Wang RTF_CACHE_GATEWAY && 1727b16cb459SWei Wang ipv6_addr_equal(gateway, 1728b16cb459SWei Wang &entry->rt6i_gateway)) { 1729b16cb459SWei Wang rt6_remove_exception(bucket, rt6_ex); 1730b16cb459SWei Wang } 1731b16cb459SWei Wang } 1732b16cb459SWei Wang bucket++; 1733b16cb459SWei Wang } 1734b16cb459SWei Wang } 1735b16cb459SWei Wang 1736b16cb459SWei Wang spin_unlock_bh(&rt6_exception_lock); 1737b16cb459SWei Wang } 1738b16cb459SWei Wang 1739c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, 1740c757faa8SWei Wang struct rt6_exception *rt6_ex, 1741c757faa8SWei Wang struct fib6_gc_args *gc_args, 1742c757faa8SWei Wang unsigned long now) 1743c757faa8SWei Wang { 1744c757faa8SWei Wang struct rt6_info *rt = rt6_ex->rt6i; 1745c757faa8SWei Wang 17461859bac0SPaolo Abeni /* we are pruning and obsoleting aged-out and non gateway exceptions 17471859bac0SPaolo Abeni * even if others have still references to them, so that on next 17481859bac0SPaolo Abeni * dst_check() such references can be dropped. 17491859bac0SPaolo Abeni * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when 17501859bac0SPaolo Abeni * expired, independently from their aging, as per RFC 8201 section 4 17511859bac0SPaolo Abeni */ 175231afeb42SWei Wang if (!(rt->rt6i_flags & RTF_EXPIRES)) { 175331afeb42SWei Wang if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { 1754c757faa8SWei Wang RT6_TRACE("aging clone %p\n", rt); 1755c757faa8SWei Wang rt6_remove_exception(bucket, rt6_ex); 1756c757faa8SWei Wang return; 175731afeb42SWei Wang } 175831afeb42SWei Wang } else if (time_after(jiffies, rt->dst.expires)) { 175931afeb42SWei Wang RT6_TRACE("purging expired route %p\n", rt); 176031afeb42SWei Wang rt6_remove_exception(bucket, rt6_ex); 176131afeb42SWei Wang return; 176231afeb42SWei Wang } 176331afeb42SWei Wang 176431afeb42SWei Wang if (rt->rt6i_flags & RTF_GATEWAY) { 1765c757faa8SWei Wang struct neighbour *neigh; 1766c757faa8SWei Wang __u8 neigh_flags = 0; 1767c757faa8SWei Wang 17681bfa26ffSEric Dumazet neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); 17691bfa26ffSEric Dumazet if (neigh) 1770c757faa8SWei Wang neigh_flags = neigh->flags; 17711bfa26ffSEric Dumazet 1772c757faa8SWei Wang if (!(neigh_flags & NTF_ROUTER)) { 1773c757faa8SWei Wang RT6_TRACE("purging route %p via non-router but gateway\n", 1774c757faa8SWei Wang rt); 1775c757faa8SWei Wang rt6_remove_exception(bucket, rt6_ex); 1776c757faa8SWei Wang return; 1777c757faa8SWei Wang } 1778c757faa8SWei Wang } 177931afeb42SWei Wang 1780c757faa8SWei Wang gc_args->more++; 1781c757faa8SWei Wang } 1782c757faa8SWei Wang 17838d1c802bSDavid Ahern void rt6_age_exceptions(struct fib6_info *rt, 1784c757faa8SWei Wang struct fib6_gc_args *gc_args, 1785c757faa8SWei Wang unsigned long now) 1786c757faa8SWei Wang { 1787c757faa8SWei Wang struct rt6_exception_bucket *bucket; 1788c757faa8SWei Wang struct rt6_exception *rt6_ex; 1789c757faa8SWei Wang struct hlist_node *tmp; 1790c757faa8SWei Wang int i; 1791c757faa8SWei Wang 1792c757faa8SWei Wang if (!rcu_access_pointer(rt->rt6i_exception_bucket)) 1793c757faa8SWei Wang return; 1794c757faa8SWei Wang 17951bfa26ffSEric Dumazet rcu_read_lock_bh(); 17961bfa26ffSEric Dumazet spin_lock(&rt6_exception_lock); 1797c757faa8SWei Wang bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1798c757faa8SWei Wang lockdep_is_held(&rt6_exception_lock)); 1799c757faa8SWei Wang 1800c757faa8SWei Wang if (bucket) { 1801c757faa8SWei Wang for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { 1802c757faa8SWei Wang hlist_for_each_entry_safe(rt6_ex, tmp, 1803c757faa8SWei Wang &bucket->chain, hlist) { 1804c757faa8SWei Wang rt6_age_examine_exception(bucket, rt6_ex, 1805c757faa8SWei Wang gc_args, now); 1806c757faa8SWei Wang } 1807c757faa8SWei Wang bucket++; 1808c757faa8SWei Wang } 1809c757faa8SWei Wang } 18101bfa26ffSEric Dumazet spin_unlock(&rt6_exception_lock); 18111bfa26ffSEric Dumazet rcu_read_unlock_bh(); 1812c757faa8SWei Wang } 1813c757faa8SWei Wang 18141d053da9SDavid Ahern /* must be called with rcu lock held */ 18151d053da9SDavid Ahern struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, 18161d053da9SDavid Ahern int oif, struct flowi6 *fl6, int strict) 18171da177e4SLinus Torvalds { 1818367efcb9SMartin KaFai Lau struct fib6_node *fn, *saved_fn; 18198d1c802bSDavid Ahern struct fib6_info *f6i; 18201da177e4SLinus Torvalds 18216454743bSDavid Ahern fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 1822367efcb9SMartin KaFai Lau saved_fn = fn; 18231da177e4SLinus Torvalds 1824ca254490SDavid Ahern if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) 1825ca254490SDavid Ahern oif = 0; 1826ca254490SDavid Ahern 1827a3c00e46SMartin KaFai Lau redo_rt6_select: 182823fb93a4SDavid Ahern f6i = rt6_select(net, fn, oif, strict); 182923fb93a4SDavid Ahern if (f6i == net->ipv6.fib6_null_entry) { 1830a3c00e46SMartin KaFai Lau fn = fib6_backtrack(fn, &fl6->saddr); 1831a3c00e46SMartin KaFai Lau if (fn) 1832a3c00e46SMartin KaFai Lau goto redo_rt6_select; 1833367efcb9SMartin KaFai Lau else if (strict & RT6_LOOKUP_F_REACHABLE) { 1834367efcb9SMartin KaFai Lau /* also consider unreachable route */ 1835367efcb9SMartin KaFai Lau strict &= ~RT6_LOOKUP_F_REACHABLE; 1836367efcb9SMartin KaFai Lau fn = saved_fn; 1837367efcb9SMartin KaFai Lau goto redo_rt6_select; 1838367efcb9SMartin KaFai Lau } 1839a3c00e46SMartin KaFai Lau } 1840a3c00e46SMartin KaFai Lau 1841d4bea421SDavid Ahern trace_fib6_table_lookup(net, f6i, table, fl6); 1842d52d3997SMartin KaFai Lau 18431d053da9SDavid Ahern return f6i; 18441d053da9SDavid Ahern } 18451d053da9SDavid Ahern 18461d053da9SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, 18471d053da9SDavid Ahern int oif, struct flowi6 *fl6, 18481d053da9SDavid Ahern const struct sk_buff *skb, int flags) 18491d053da9SDavid Ahern { 1850*b1d40991SDavid Ahern struct fib6_result res = {}; 18511d053da9SDavid Ahern struct rt6_info *rt; 18521d053da9SDavid Ahern int strict = 0; 18531d053da9SDavid Ahern 18541d053da9SDavid Ahern strict |= flags & RT6_LOOKUP_F_IFACE; 18551d053da9SDavid Ahern strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE; 18561d053da9SDavid Ahern if (net->ipv6.devconf_all->forwarding == 0) 18571d053da9SDavid Ahern strict |= RT6_LOOKUP_F_REACHABLE; 18581d053da9SDavid Ahern 18591d053da9SDavid Ahern rcu_read_lock(); 18601d053da9SDavid Ahern 1861*b1d40991SDavid Ahern res.f6i = fib6_table_lookup(net, table, oif, fl6, strict); 1862*b1d40991SDavid Ahern if (res.f6i == net->ipv6.fib6_null_entry) { 1863421842edSDavid Ahern rt = net->ipv6.ip6_null_entry; 186466f5d6ceSWei Wang rcu_read_unlock(); 1865d3843fe5SWei Wang dst_hold(&rt->dst); 1866d3843fe5SWei Wang return rt; 1867d3843fe5SWei Wang } 186823fb93a4SDavid Ahern 1869*b1d40991SDavid Ahern fib6_select_path(net, &res, fl6, oif, false, skb, strict); 1870d83009d4SDavid Ahern 187123fb93a4SDavid Ahern /*Search through exception table */ 1872*b1d40991SDavid Ahern rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr); 187323fb93a4SDavid Ahern if (rt) { 187410585b43SDavid Ahern if (ip6_hold_safe(net, &rt)) 18751da177e4SLinus Torvalds dst_use_noref(&rt->dst, jiffies); 1876d4ead6b3SDavid Ahern 187766f5d6ceSWei Wang rcu_read_unlock(); 1878d52d3997SMartin KaFai Lau return rt; 18793da59bd9SMartin KaFai Lau } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && 1880*b1d40991SDavid Ahern !res.nh->fib_nh_gw_family)) { 18813da59bd9SMartin KaFai Lau /* Create a RTF_CACHE clone which will not be 18823da59bd9SMartin KaFai Lau * owned by the fib6 tree. It is for the special case where 18833da59bd9SMartin KaFai Lau * the daddr in the skb during the neighbor look-up is different 18843da59bd9SMartin KaFai Lau * from the fl6->daddr used to look-up route here. 18853da59bd9SMartin KaFai Lau */ 18863da59bd9SMartin KaFai Lau struct rt6_info *uncached_rt; 18873da59bd9SMartin KaFai Lau 1888*b1d40991SDavid Ahern uncached_rt = ip6_rt_cache_alloc(res.f6i, &fl6->daddr, NULL); 1889d52d3997SMartin KaFai Lau 18904d85cd0cSDavid Ahern rcu_read_unlock(); 18913da59bd9SMartin KaFai Lau 18921cfb71eeSWei Wang if (uncached_rt) { 18931cfb71eeSWei Wang /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() 18941cfb71eeSWei Wang * No need for another dst_hold() 18951cfb71eeSWei Wang */ 18968d0b94afSMartin KaFai Lau rt6_uncached_list_add(uncached_rt); 189781eb8447SWei Wang atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 18981cfb71eeSWei Wang } else { 18993da59bd9SMartin KaFai Lau uncached_rt = net->ipv6.ip6_null_entry; 19003da59bd9SMartin KaFai Lau dst_hold(&uncached_rt->dst); 19011cfb71eeSWei Wang } 1902b811580dSDavid Ahern 19033da59bd9SMartin KaFai Lau return uncached_rt; 1904d52d3997SMartin KaFai Lau } else { 1905d52d3997SMartin KaFai Lau /* Get a percpu copy */ 1906d52d3997SMartin KaFai Lau 1907d52d3997SMartin KaFai Lau struct rt6_info *pcpu_rt; 1908d52d3997SMartin KaFai Lau 1909951f788aSEric Dumazet local_bh_disable(); 1910*b1d40991SDavid Ahern pcpu_rt = rt6_get_pcpu_route(res.f6i); 1911d52d3997SMartin KaFai Lau 191293531c67SDavid Ahern if (!pcpu_rt) 1913*b1d40991SDavid Ahern pcpu_rt = rt6_make_pcpu_route(net, res.f6i); 191493531c67SDavid Ahern 1915951f788aSEric Dumazet local_bh_enable(); 1916951f788aSEric Dumazet rcu_read_unlock(); 1917d4bea421SDavid Ahern 1918d52d3997SMartin KaFai Lau return pcpu_rt; 1919d52d3997SMartin KaFai Lau } 1920c71099acSThomas Graf } 19219ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route); 1922c71099acSThomas Graf 1923b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net, 1924b75cc8f9SDavid Ahern struct fib6_table *table, 1925b75cc8f9SDavid Ahern struct flowi6 *fl6, 1926b75cc8f9SDavid Ahern const struct sk_buff *skb, 1927b75cc8f9SDavid Ahern int flags) 19284acad72dSPavel Emelyanov { 1929b75cc8f9SDavid Ahern return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags); 19304acad72dSPavel Emelyanov } 19314acad72dSPavel Emelyanov 1932d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net, 193372331bc0SShmulik Ladkani struct net_device *dev, 1934b75cc8f9SDavid Ahern struct flowi6 *fl6, 1935b75cc8f9SDavid Ahern const struct sk_buff *skb, 1936b75cc8f9SDavid Ahern int flags) 193772331bc0SShmulik Ladkani { 193872331bc0SShmulik Ladkani if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) 193972331bc0SShmulik Ladkani flags |= RT6_LOOKUP_F_IFACE; 194072331bc0SShmulik Ladkani 1941b75cc8f9SDavid Ahern return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input); 194272331bc0SShmulik Ladkani } 1943d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup); 194472331bc0SShmulik Ladkani 194523aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb, 19465e5d6fedSRoopa Prabhu struct flow_keys *keys, 19475e5d6fedSRoopa Prabhu struct flow_keys *flkeys) 194823aebdacSJakub Sitnicki { 194923aebdacSJakub Sitnicki const struct ipv6hdr *outer_iph = ipv6_hdr(skb); 195023aebdacSJakub Sitnicki const struct ipv6hdr *key_iph = outer_iph; 19515e5d6fedSRoopa Prabhu struct flow_keys *_flkeys = flkeys; 195223aebdacSJakub Sitnicki const struct ipv6hdr *inner_iph; 195323aebdacSJakub Sitnicki const struct icmp6hdr *icmph; 195423aebdacSJakub Sitnicki struct ipv6hdr _inner_iph; 1955cea67a2dSEric Dumazet struct icmp6hdr _icmph; 195623aebdacSJakub Sitnicki 195723aebdacSJakub Sitnicki if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) 195823aebdacSJakub Sitnicki goto out; 195923aebdacSJakub Sitnicki 1960cea67a2dSEric Dumazet icmph = skb_header_pointer(skb, skb_transport_offset(skb), 1961cea67a2dSEric Dumazet sizeof(_icmph), &_icmph); 1962cea67a2dSEric Dumazet if (!icmph) 1963cea67a2dSEric Dumazet goto out; 1964cea67a2dSEric Dumazet 196523aebdacSJakub Sitnicki if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && 196623aebdacSJakub Sitnicki icmph->icmp6_type != ICMPV6_PKT_TOOBIG && 196723aebdacSJakub Sitnicki icmph->icmp6_type != ICMPV6_TIME_EXCEED && 196823aebdacSJakub Sitnicki icmph->icmp6_type != ICMPV6_PARAMPROB) 196923aebdacSJakub Sitnicki goto out; 197023aebdacSJakub Sitnicki 197123aebdacSJakub Sitnicki inner_iph = skb_header_pointer(skb, 197223aebdacSJakub Sitnicki skb_transport_offset(skb) + sizeof(*icmph), 197323aebdacSJakub Sitnicki sizeof(_inner_iph), &_inner_iph); 197423aebdacSJakub Sitnicki if (!inner_iph) 197523aebdacSJakub Sitnicki goto out; 197623aebdacSJakub Sitnicki 197723aebdacSJakub Sitnicki key_iph = inner_iph; 19785e5d6fedSRoopa Prabhu _flkeys = NULL; 197923aebdacSJakub Sitnicki out: 19805e5d6fedSRoopa Prabhu if (_flkeys) { 19815e5d6fedSRoopa Prabhu keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src; 19825e5d6fedSRoopa Prabhu keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst; 19835e5d6fedSRoopa Prabhu keys->tags.flow_label = _flkeys->tags.flow_label; 19845e5d6fedSRoopa Prabhu keys->basic.ip_proto = _flkeys->basic.ip_proto; 19855e5d6fedSRoopa Prabhu } else { 198623aebdacSJakub Sitnicki keys->addrs.v6addrs.src = key_iph->saddr; 198723aebdacSJakub Sitnicki keys->addrs.v6addrs.dst = key_iph->daddr; 1988fa1be7e0SMichal Kubecek keys->tags.flow_label = ip6_flowlabel(key_iph); 198923aebdacSJakub Sitnicki keys->basic.ip_proto = key_iph->nexthdr; 199023aebdacSJakub Sitnicki } 19915e5d6fedSRoopa Prabhu } 199223aebdacSJakub Sitnicki 199323aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */ 1994b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, 1995b4bac172SDavid Ahern const struct sk_buff *skb, struct flow_keys *flkeys) 199623aebdacSJakub Sitnicki { 199723aebdacSJakub Sitnicki struct flow_keys hash_keys; 19989a2a537aSDavid Ahern u32 mhash; 199923aebdacSJakub Sitnicki 2000bbfa047aSDavid S. Miller switch (ip6_multipath_hash_policy(net)) { 2001b4bac172SDavid Ahern case 0: 20026f74b6c2SDavid Ahern memset(&hash_keys, 0, sizeof(hash_keys)); 20036f74b6c2SDavid Ahern hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 20049a2a537aSDavid Ahern if (skb) { 20055e5d6fedSRoopa Prabhu ip6_multipath_l3_keys(skb, &hash_keys, flkeys); 20069a2a537aSDavid Ahern } else { 20079a2a537aSDavid Ahern hash_keys.addrs.v6addrs.src = fl6->saddr; 20089a2a537aSDavid Ahern hash_keys.addrs.v6addrs.dst = fl6->daddr; 2009fa1be7e0SMichal Kubecek hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); 20109a2a537aSDavid Ahern hash_keys.basic.ip_proto = fl6->flowi6_proto; 201123aebdacSJakub Sitnicki } 2012b4bac172SDavid Ahern break; 2013b4bac172SDavid Ahern case 1: 2014b4bac172SDavid Ahern if (skb) { 2015b4bac172SDavid Ahern unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; 2016b4bac172SDavid Ahern struct flow_keys keys; 2017b4bac172SDavid Ahern 2018b4bac172SDavid Ahern /* short-circuit if we already have L4 hash present */ 2019b4bac172SDavid Ahern if (skb->l4_hash) 2020b4bac172SDavid Ahern return skb_get_hash_raw(skb) >> 1; 2021b4bac172SDavid Ahern 2022b4bac172SDavid Ahern memset(&hash_keys, 0, sizeof(hash_keys)); 2023b4bac172SDavid Ahern 2024b4bac172SDavid Ahern if (!flkeys) { 2025b4bac172SDavid Ahern skb_flow_dissect_flow_keys(skb, &keys, flag); 2026b4bac172SDavid Ahern flkeys = &keys; 2027b4bac172SDavid Ahern } 2028b4bac172SDavid Ahern hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2029b4bac172SDavid Ahern hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src; 2030b4bac172SDavid Ahern hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst; 2031b4bac172SDavid Ahern hash_keys.ports.src = flkeys->ports.src; 2032b4bac172SDavid Ahern hash_keys.ports.dst = flkeys->ports.dst; 2033b4bac172SDavid Ahern hash_keys.basic.ip_proto = flkeys->basic.ip_proto; 2034b4bac172SDavid Ahern } else { 2035b4bac172SDavid Ahern memset(&hash_keys, 0, sizeof(hash_keys)); 2036b4bac172SDavid Ahern hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; 2037b4bac172SDavid Ahern hash_keys.addrs.v6addrs.src = fl6->saddr; 2038b4bac172SDavid Ahern hash_keys.addrs.v6addrs.dst = fl6->daddr; 2039b4bac172SDavid Ahern hash_keys.ports.src = fl6->fl6_sport; 2040b4bac172SDavid Ahern hash_keys.ports.dst = fl6->fl6_dport; 2041b4bac172SDavid Ahern hash_keys.basic.ip_proto = fl6->flowi6_proto; 2042b4bac172SDavid Ahern } 2043b4bac172SDavid Ahern break; 2044b4bac172SDavid Ahern } 20459a2a537aSDavid Ahern mhash = flow_hash_from_keys(&hash_keys); 204623aebdacSJakub Sitnicki 20479a2a537aSDavid Ahern return mhash >> 1; 204823aebdacSJakub Sitnicki } 204923aebdacSJakub Sitnicki 2050c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb) 2051c71099acSThomas Graf { 2052b71d1d42SEric Dumazet const struct ipv6hdr *iph = ipv6_hdr(skb); 2053c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(skb->dev); 2054adaa70bbSThomas Graf int flags = RT6_LOOKUP_F_HAS_SADDR; 2055904af04dSJiri Benc struct ip_tunnel_info *tun_info; 20564c9483b2SDavid S. Miller struct flowi6 fl6 = { 2057e0d56fddSDavid Ahern .flowi6_iif = skb->dev->ifindex, 20584c9483b2SDavid S. Miller .daddr = iph->daddr, 20594c9483b2SDavid S. Miller .saddr = iph->saddr, 20606502ca52SYOSHIFUJI Hideaki / 吉藤英明 .flowlabel = ip6_flowinfo(iph), 20614c9483b2SDavid S. Miller .flowi6_mark = skb->mark, 20624c9483b2SDavid S. Miller .flowi6_proto = iph->nexthdr, 2063c71099acSThomas Graf }; 20645e5d6fedSRoopa Prabhu struct flow_keys *flkeys = NULL, _flkeys; 2065adaa70bbSThomas Graf 2066904af04dSJiri Benc tun_info = skb_tunnel_info(skb); 206746fa062aSJiri Benc if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) 2068904af04dSJiri Benc fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; 20695e5d6fedSRoopa Prabhu 20705e5d6fedSRoopa Prabhu if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys)) 20715e5d6fedSRoopa Prabhu flkeys = &_flkeys; 20725e5d6fedSRoopa Prabhu 207323aebdacSJakub Sitnicki if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) 2074b4bac172SDavid Ahern fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys); 207506e9d040SJiri Benc skb_dst_drop(skb); 2076b75cc8f9SDavid Ahern skb_dst_set(skb, 2077b75cc8f9SDavid Ahern ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); 2078c71099acSThomas Graf } 2079c71099acSThomas Graf 2080b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net, 2081b75cc8f9SDavid Ahern struct fib6_table *table, 2082b75cc8f9SDavid Ahern struct flowi6 *fl6, 2083b75cc8f9SDavid Ahern const struct sk_buff *skb, 2084b75cc8f9SDavid Ahern int flags) 2085c71099acSThomas Graf { 2086b75cc8f9SDavid Ahern return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags); 2087c71099acSThomas Graf } 2088c71099acSThomas Graf 20896f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, 20906f21c96aSPaolo Abeni struct flowi6 *fl6, int flags) 2091c71099acSThomas Graf { 2092d46a9d67SDavid Ahern bool any_src; 2093c71099acSThomas Graf 20943ede0bbcSRobert Shearman if (ipv6_addr_type(&fl6->daddr) & 20953ede0bbcSRobert Shearman (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) { 20964c1feac5SDavid Ahern struct dst_entry *dst; 20974c1feac5SDavid Ahern 20984c1feac5SDavid Ahern dst = l3mdev_link_scope_lookup(net, fl6); 2099ca254490SDavid Ahern if (dst) 2100ca254490SDavid Ahern return dst; 21014c1feac5SDavid Ahern } 2102ca254490SDavid Ahern 21031fb9489bSPavel Emelyanov fl6->flowi6_iif = LOOPBACK_IFINDEX; 21044dc27d1cSDavid McCullough 2105d46a9d67SDavid Ahern any_src = ipv6_addr_any(&fl6->saddr); 2106741a11d9SDavid Ahern if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) || 2107d46a9d67SDavid Ahern (fl6->flowi6_oif && any_src)) 210877d16f45SYOSHIFUJI Hideaki flags |= RT6_LOOKUP_F_IFACE; 2109c71099acSThomas Graf 2110d46a9d67SDavid Ahern if (!any_src) 2111adaa70bbSThomas Graf flags |= RT6_LOOKUP_F_HAS_SADDR; 21120c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 else if (sk) 21130c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs); 2114adaa70bbSThomas Graf 2115b75cc8f9SDavid Ahern return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output); 21161da177e4SLinus Torvalds } 21176f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags); 21181da177e4SLinus Torvalds 21192774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig) 212014e50e57SDavid S. Miller { 21215c1e6aa3SDavid S. Miller struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig; 21221dbe3252SWei Wang struct net_device *loopback_dev = net->loopback_dev; 212314e50e57SDavid S. Miller struct dst_entry *new = NULL; 212414e50e57SDavid S. Miller 21251dbe3252SWei Wang rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, 212662cf27e5SSteffen Klassert DST_OBSOLETE_DEAD, 0); 212714e50e57SDavid S. Miller if (rt) { 21280a1f5962SMartin KaFai Lau rt6_info_init(rt); 212981eb8447SWei Wang atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); 21300a1f5962SMartin KaFai Lau 2131d8d1f30bSChangli Gao new = &rt->dst; 213214e50e57SDavid S. Miller new->__use = 1; 2133352e512cSHerbert Xu new->input = dst_discard; 2134ede2059dSEric W. Biederman new->output = dst_discard_out; 213514e50e57SDavid S. Miller 2136defb3519SDavid S. Miller dst_copy_metrics(new, &ort->dst); 213714e50e57SDavid S. Miller 21381dbe3252SWei Wang rt->rt6i_idev = in6_dev_get(loopback_dev); 21394e3fd7a0SAlexey Dobriyan rt->rt6i_gateway = ort->rt6i_gateway; 21400a1f5962SMartin KaFai Lau rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU; 214114e50e57SDavid S. Miller 214214e50e57SDavid S. Miller memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key)); 214314e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES 214414e50e57SDavid S. Miller memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); 214514e50e57SDavid S. Miller #endif 214614e50e57SDavid S. Miller } 214714e50e57SDavid S. Miller 214869ead7afSDavid S. Miller dst_release(dst_orig); 214969ead7afSDavid S. Miller return new ? new : ERR_PTR(-ENOMEM); 215014e50e57SDavid S. Miller } 215114e50e57SDavid S. Miller 21521da177e4SLinus Torvalds /* 21531da177e4SLinus Torvalds * Destination cache support functions 21541da177e4SLinus Torvalds */ 21551da177e4SLinus Torvalds 21568d1c802bSDavid Ahern static bool fib6_check(struct fib6_info *f6i, u32 cookie) 21573da59bd9SMartin KaFai Lau { 215836143645SSteffen Klassert u32 rt_cookie = 0; 2159c5cff856SWei Wang 21608ae86971SDavid Ahern if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie) 216193531c67SDavid Ahern return false; 216293531c67SDavid Ahern 216393531c67SDavid Ahern if (fib6_check_expired(f6i)) 216493531c67SDavid Ahern return false; 216593531c67SDavid Ahern 216693531c67SDavid Ahern return true; 216793531c67SDavid Ahern } 216893531c67SDavid Ahern 2169a68886a6SDavid Ahern static struct dst_entry *rt6_check(struct rt6_info *rt, 2170a68886a6SDavid Ahern struct fib6_info *from, 2171a68886a6SDavid Ahern u32 cookie) 21723da59bd9SMartin KaFai Lau { 2173c5cff856SWei Wang u32 rt_cookie = 0; 2174c5cff856SWei Wang 2175a68886a6SDavid Ahern if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || 217693531c67SDavid Ahern rt_cookie != cookie) 21773da59bd9SMartin KaFai Lau return NULL; 21783da59bd9SMartin KaFai Lau 21793da59bd9SMartin KaFai Lau if (rt6_check_expired(rt)) 21803da59bd9SMartin KaFai Lau return NULL; 21813da59bd9SMartin KaFai Lau 21823da59bd9SMartin KaFai Lau return &rt->dst; 21833da59bd9SMartin KaFai Lau } 21843da59bd9SMartin KaFai Lau 2185a68886a6SDavid Ahern static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, 2186a68886a6SDavid Ahern struct fib6_info *from, 2187a68886a6SDavid Ahern u32 cookie) 21883da59bd9SMartin KaFai Lau { 21895973fb1eSMartin KaFai Lau if (!__rt6_check_expired(rt) && 21905973fb1eSMartin KaFai Lau rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 2191a68886a6SDavid Ahern fib6_check(from, cookie)) 21923da59bd9SMartin KaFai Lau return &rt->dst; 21933da59bd9SMartin KaFai Lau else 21943da59bd9SMartin KaFai Lau return NULL; 21953da59bd9SMartin KaFai Lau } 21963da59bd9SMartin KaFai Lau 21971da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 21981da177e4SLinus Torvalds { 2199a87b7dc9SDavid Ahern struct dst_entry *dst_ret; 2200a68886a6SDavid Ahern struct fib6_info *from; 22011da177e4SLinus Torvalds struct rt6_info *rt; 22021da177e4SLinus Torvalds 2203a87b7dc9SDavid Ahern rt = container_of(dst, struct rt6_info, dst); 2204a87b7dc9SDavid Ahern 2205a87b7dc9SDavid Ahern rcu_read_lock(); 22061da177e4SLinus Torvalds 22076f3118b5SNicolas Dichtel /* All IPV6 dsts are created with ->obsolete set to the value 22086f3118b5SNicolas Dichtel * DST_OBSOLETE_FORCE_CHK which forces validation calls down 22096f3118b5SNicolas Dichtel * into this function always. 22106f3118b5SNicolas Dichtel */ 2211e3bc10bdSHannes Frederic Sowa 2212a68886a6SDavid Ahern from = rcu_dereference(rt->from); 22134b32b5adSMartin KaFai Lau 2214a68886a6SDavid Ahern if (from && (rt->rt6i_flags & RTF_PCPU || 2215a68886a6SDavid Ahern unlikely(!list_empty(&rt->rt6i_uncached)))) 2216a68886a6SDavid Ahern dst_ret = rt6_dst_from_check(rt, from, cookie); 22173da59bd9SMartin KaFai Lau else 2218a68886a6SDavid Ahern dst_ret = rt6_check(rt, from, cookie); 2219a87b7dc9SDavid Ahern 2220a87b7dc9SDavid Ahern rcu_read_unlock(); 2221a87b7dc9SDavid Ahern 2222a87b7dc9SDavid Ahern return dst_ret; 22231da177e4SLinus Torvalds } 22241da177e4SLinus Torvalds 22251da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) 22261da177e4SLinus Torvalds { 22271da177e4SLinus Torvalds struct rt6_info *rt = (struct rt6_info *) dst; 22281da177e4SLinus Torvalds 22291da177e4SLinus Torvalds if (rt) { 223054c1a859SYOSHIFUJI Hideaki / 吉藤英明 if (rt->rt6i_flags & RTF_CACHE) { 2231c3c14da0SDavid Ahern rcu_read_lock(); 223254c1a859SYOSHIFUJI Hideaki / 吉藤英明 if (rt6_check_expired(rt)) { 223393531c67SDavid Ahern rt6_remove_exception_rt(rt); 223454c1a859SYOSHIFUJI Hideaki / 吉藤英明 dst = NULL; 22351da177e4SLinus Torvalds } 2236c3c14da0SDavid Ahern rcu_read_unlock(); 223754c1a859SYOSHIFUJI Hideaki / 吉藤英明 } else { 223854c1a859SYOSHIFUJI Hideaki / 吉藤英明 dst_release(dst); 223954c1a859SYOSHIFUJI Hideaki / 吉藤英明 dst = NULL; 224054c1a859SYOSHIFUJI Hideaki / 吉藤英明 } 224154c1a859SYOSHIFUJI Hideaki / 吉藤英明 } 224254c1a859SYOSHIFUJI Hideaki / 吉藤英明 return dst; 22431da177e4SLinus Torvalds } 22441da177e4SLinus Torvalds 22451da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb) 22461da177e4SLinus Torvalds { 22471da177e4SLinus Torvalds struct rt6_info *rt; 22481da177e4SLinus Torvalds 22493ffe533cSAlexey Dobriyan icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); 22501da177e4SLinus Torvalds 2251adf30907SEric Dumazet rt = (struct rt6_info *) skb_dst(skb); 22521da177e4SLinus Torvalds if (rt) { 22538a14e46fSDavid Ahern rcu_read_lock(); 22541eb4f758SHannes Frederic Sowa if (rt->rt6i_flags & RTF_CACHE) { 225593531c67SDavid Ahern rt6_remove_exception_rt(rt); 2256c5cff856SWei Wang } else { 2257a68886a6SDavid Ahern struct fib6_info *from; 2258c5cff856SWei Wang struct fib6_node *fn; 2259c5cff856SWei Wang 2260a68886a6SDavid Ahern from = rcu_dereference(rt->from); 2261a68886a6SDavid Ahern if (from) { 2262a68886a6SDavid Ahern fn = rcu_dereference(from->fib6_node); 2263c5cff856SWei Wang if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 2264c5cff856SWei Wang fn->fn_sernum = -1; 2265a68886a6SDavid Ahern } 22661da177e4SLinus Torvalds } 22671da177e4SLinus Torvalds rcu_read_unlock(); 22681da177e4SLinus Torvalds } 22691da177e4SLinus Torvalds } 22701da177e4SLinus Torvalds 22716a3e030fSDavid Ahern static void rt6_update_expires(struct rt6_info *rt0, int timeout) 22726a3e030fSDavid Ahern { 2273a68886a6SDavid Ahern if (!(rt0->rt6i_flags & RTF_EXPIRES)) { 2274a68886a6SDavid Ahern struct fib6_info *from; 2275a68886a6SDavid Ahern 2276a68886a6SDavid Ahern rcu_read_lock(); 2277a68886a6SDavid Ahern from = rcu_dereference(rt0->from); 2278a68886a6SDavid Ahern if (from) 2279a68886a6SDavid Ahern rt0->dst.expires = from->expires; 2280a68886a6SDavid Ahern rcu_read_unlock(); 2281a68886a6SDavid Ahern } 22826a3e030fSDavid Ahern 22836a3e030fSDavid Ahern dst_set_expires(&rt0->dst, timeout); 22846a3e030fSDavid Ahern rt0->rt6i_flags |= RTF_EXPIRES; 22856700c270SDavid S. Miller } 22861da177e4SLinus Torvalds 228745e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) 228845e4fd26SMartin KaFai Lau { 228945e4fd26SMartin KaFai Lau struct net *net = dev_net(rt->dst.dev); 229045e4fd26SMartin KaFai Lau 2291d4ead6b3SDavid Ahern dst_metric_set(&rt->dst, RTAX_MTU, mtu); 229245e4fd26SMartin KaFai Lau rt->rt6i_flags |= RTF_MODIFIED; 229345e4fd26SMartin KaFai Lau rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); 229445e4fd26SMartin KaFai Lau } 229545e4fd26SMartin KaFai Lau 22960d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) 22970d3f6d29SMartin KaFai Lau { 22980d3f6d29SMartin KaFai Lau return !(rt->rt6i_flags & RTF_CACHE) && 22991490ed2aSPaolo Abeni (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from)); 23000d3f6d29SMartin KaFai Lau } 23010d3f6d29SMartin KaFai Lau 230245e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 230345e4fd26SMartin KaFai Lau const struct ipv6hdr *iph, u32 mtu) 23041da177e4SLinus Torvalds { 23050dec879fSJulian Anastasov const struct in6_addr *daddr, *saddr; 23061da177e4SLinus Torvalds struct rt6_info *rt6 = (struct rt6_info *)dst; 23071da177e4SLinus Torvalds 230819bda36cSXin Long if (dst_metric_locked(dst, RTAX_MTU)) 230919bda36cSXin Long return; 231019bda36cSXin Long 231145e4fd26SMartin KaFai Lau if (iph) { 231245e4fd26SMartin KaFai Lau daddr = &iph->daddr; 231345e4fd26SMartin KaFai Lau saddr = &iph->saddr; 231445e4fd26SMartin KaFai Lau } else if (sk) { 231545e4fd26SMartin KaFai Lau daddr = &sk->sk_v6_daddr; 231645e4fd26SMartin KaFai Lau saddr = &inet6_sk(sk)->saddr; 231745e4fd26SMartin KaFai Lau } else { 23180dec879fSJulian Anastasov daddr = NULL; 23190dec879fSJulian Anastasov saddr = NULL; 23201da177e4SLinus Torvalds } 23210dec879fSJulian Anastasov dst_confirm_neigh(dst, daddr); 23220dec879fSJulian Anastasov mtu = max_t(u32, mtu, IPV6_MIN_MTU); 23230dec879fSJulian Anastasov if (mtu >= dst_mtu(dst)) 23240dec879fSJulian Anastasov return; 23250dec879fSJulian Anastasov 23260dec879fSJulian Anastasov if (!rt6_cache_allowed_for_pmtu(rt6)) { 23270dec879fSJulian Anastasov rt6_do_update_pmtu(rt6, mtu); 23282b760fcfSWei Wang /* update rt6_ex->stamp for cache */ 23292b760fcfSWei Wang if (rt6->rt6i_flags & RTF_CACHE) 23302b760fcfSWei Wang rt6_update_exception_stamp_rt(rt6); 23310dec879fSJulian Anastasov } else if (daddr) { 2332a68886a6SDavid Ahern struct fib6_info *from; 23330dec879fSJulian Anastasov struct rt6_info *nrt6; 23340dec879fSJulian Anastasov 23354d85cd0cSDavid Ahern rcu_read_lock(); 2336a68886a6SDavid Ahern from = rcu_dereference(rt6->from); 23379c69a132SJonathan Lemon if (!from) { 23389c69a132SJonathan Lemon rcu_read_unlock(); 23399c69a132SJonathan Lemon return; 23409c69a132SJonathan Lemon } 2341a68886a6SDavid Ahern nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); 234245e4fd26SMartin KaFai Lau if (nrt6) { 234345e4fd26SMartin KaFai Lau rt6_do_update_pmtu(nrt6, mtu); 2344a68886a6SDavid Ahern if (rt6_insert_exception(nrt6, from)) 23452b760fcfSWei Wang dst_release_immediate(&nrt6->dst); 234645e4fd26SMartin KaFai Lau } 2347a68886a6SDavid Ahern rcu_read_unlock(); 234845e4fd26SMartin KaFai Lau } 234945e4fd26SMartin KaFai Lau } 235045e4fd26SMartin KaFai Lau 235145e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 235245e4fd26SMartin KaFai Lau struct sk_buff *skb, u32 mtu) 235345e4fd26SMartin KaFai Lau { 235445e4fd26SMartin KaFai Lau __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 23551da177e4SLinus Torvalds } 23561da177e4SLinus Torvalds 235742ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, 2358e2d118a1SLorenzo Colitti int oif, u32 mark, kuid_t uid) 235981aded24SDavid S. Miller { 236081aded24SDavid S. Miller const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 236181aded24SDavid S. Miller struct dst_entry *dst; 2362dc92095dSMaciej Żenczykowski struct flowi6 fl6 = { 2363dc92095dSMaciej Żenczykowski .flowi6_oif = oif, 2364dc92095dSMaciej Żenczykowski .flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark), 2365dc92095dSMaciej Żenczykowski .daddr = iph->daddr, 2366dc92095dSMaciej Żenczykowski .saddr = iph->saddr, 2367dc92095dSMaciej Żenczykowski .flowlabel = ip6_flowinfo(iph), 2368dc92095dSMaciej Żenczykowski .flowi6_uid = uid, 2369dc92095dSMaciej Żenczykowski }; 237081aded24SDavid S. Miller 237181aded24SDavid S. Miller dst = ip6_route_output(net, NULL, &fl6); 237281aded24SDavid S. Miller if (!dst->error) 237345e4fd26SMartin KaFai Lau __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 237481aded24SDavid S. Miller dst_release(dst); 237581aded24SDavid S. Miller } 237681aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu); 237781aded24SDavid S. Miller 237881aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) 237981aded24SDavid S. Miller { 23807ddacfa5SDavid Ahern int oif = sk->sk_bound_dev_if; 238133c162a9SMartin KaFai Lau struct dst_entry *dst; 238233c162a9SMartin KaFai Lau 23837ddacfa5SDavid Ahern if (!oif && skb->dev) 23847ddacfa5SDavid Ahern oif = l3mdev_master_ifindex(skb->dev); 23857ddacfa5SDavid Ahern 23867ddacfa5SDavid Ahern ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid); 238733c162a9SMartin KaFai Lau 238833c162a9SMartin KaFai Lau dst = __sk_dst_get(sk); 238933c162a9SMartin KaFai Lau if (!dst || !dst->obsolete || 239033c162a9SMartin KaFai Lau dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) 239133c162a9SMartin KaFai Lau return; 239233c162a9SMartin KaFai Lau 239333c162a9SMartin KaFai Lau bh_lock_sock(sk); 239433c162a9SMartin KaFai Lau if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) 239533c162a9SMartin KaFai Lau ip6_datagram_dst_update(sk, false); 239633c162a9SMartin KaFai Lau bh_unlock_sock(sk); 239781aded24SDavid S. Miller } 239881aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); 239981aded24SDavid S. Miller 24007d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst, 24017d6850f7SAlexey Kodanev const struct flowi6 *fl6) 24027d6850f7SAlexey Kodanev { 24037d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES 24047d6850f7SAlexey Kodanev struct ipv6_pinfo *np = inet6_sk(sk); 24057d6850f7SAlexey Kodanev #endif 24067d6850f7SAlexey Kodanev 24077d6850f7SAlexey Kodanev ip6_dst_store(sk, dst, 24087d6850f7SAlexey Kodanev ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ? 24097d6850f7SAlexey Kodanev &sk->sk_v6_daddr : NULL, 24107d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES 24117d6850f7SAlexey Kodanev ipv6_addr_equal(&fl6->saddr, &np->saddr) ? 24127d6850f7SAlexey Kodanev &np->saddr : 24137d6850f7SAlexey Kodanev #endif 24147d6850f7SAlexey Kodanev NULL); 24157d6850f7SAlexey Kodanev } 24167d6850f7SAlexey Kodanev 24170b34eb00SDavid Ahern static bool ip6_redirect_nh_match(struct fib6_info *f6i, 24180b34eb00SDavid Ahern struct fib6_nh *nh, 24190b34eb00SDavid Ahern struct flowi6 *fl6, 24200b34eb00SDavid Ahern const struct in6_addr *gw, 24210b34eb00SDavid Ahern struct rt6_info **ret) 24220b34eb00SDavid Ahern { 24230b34eb00SDavid Ahern if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family || 24240b34eb00SDavid Ahern fl6->flowi6_oif != nh->fib_nh_dev->ifindex) 24250b34eb00SDavid Ahern return false; 24260b34eb00SDavid Ahern 24270b34eb00SDavid Ahern /* rt_cache's gateway might be different from its 'parent' 24280b34eb00SDavid Ahern * in the case of an ip redirect. 24290b34eb00SDavid Ahern * So we keep searching in the exception table if the gateway 24300b34eb00SDavid Ahern * is different. 24310b34eb00SDavid Ahern */ 24320b34eb00SDavid Ahern if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) { 24330b34eb00SDavid Ahern struct rt6_info *rt_cache; 24340b34eb00SDavid Ahern 24350b34eb00SDavid Ahern rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); 24360b34eb00SDavid Ahern if (rt_cache && 24370b34eb00SDavid Ahern ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) { 24380b34eb00SDavid Ahern *ret = rt_cache; 24390b34eb00SDavid Ahern return true; 24400b34eb00SDavid Ahern } 24410b34eb00SDavid Ahern return false; 24420b34eb00SDavid Ahern } 24430b34eb00SDavid Ahern return true; 24440b34eb00SDavid Ahern } 24450b34eb00SDavid Ahern 2446b55b76b2SDuan Jiong /* Handle redirects */ 2447b55b76b2SDuan Jiong struct ip6rd_flowi { 2448b55b76b2SDuan Jiong struct flowi6 fl6; 2449b55b76b2SDuan Jiong struct in6_addr gateway; 2450b55b76b2SDuan Jiong }; 2451b55b76b2SDuan Jiong 2452b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net, 2453b55b76b2SDuan Jiong struct fib6_table *table, 2454b55b76b2SDuan Jiong struct flowi6 *fl6, 2455b75cc8f9SDavid Ahern const struct sk_buff *skb, 2456b55b76b2SDuan Jiong int flags) 2457b55b76b2SDuan Jiong { 2458b55b76b2SDuan Jiong struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; 24590b34eb00SDavid Ahern struct rt6_info *ret = NULL; 24608d1c802bSDavid Ahern struct fib6_info *rt; 2461b55b76b2SDuan Jiong struct fib6_node *fn; 2462b55b76b2SDuan Jiong 2463b55b76b2SDuan Jiong /* Get the "current" route for this destination and 246467c408cfSAlexander Alemayhu * check if the redirect has come from appropriate router. 2465b55b76b2SDuan Jiong * 2466b55b76b2SDuan Jiong * RFC 4861 specifies that redirects should only be 2467b55b76b2SDuan Jiong * accepted if they come from the nexthop to the target. 2468b55b76b2SDuan Jiong * Due to the way the routes are chosen, this notion 2469b55b76b2SDuan Jiong * is a bit fuzzy and one might need to check all possible 2470b55b76b2SDuan Jiong * routes. 2471b55b76b2SDuan Jiong */ 2472b55b76b2SDuan Jiong 247366f5d6ceSWei Wang rcu_read_lock(); 24746454743bSDavid Ahern fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); 2475b55b76b2SDuan Jiong restart: 247666f5d6ceSWei Wang for_each_fib6_node_rt_rcu(fn) { 247714895687SDavid Ahern if (fib6_check_expired(rt)) 2478b55b76b2SDuan Jiong continue; 247993c2fb25SDavid Ahern if (rt->fib6_flags & RTF_REJECT) 2480b55b76b2SDuan Jiong break; 24810b34eb00SDavid Ahern if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6, 24820b34eb00SDavid Ahern &rdfl->gateway, &ret)) 24830b34eb00SDavid Ahern goto out; 2484b55b76b2SDuan Jiong } 2485b55b76b2SDuan Jiong 2486b55b76b2SDuan Jiong if (!rt) 2487421842edSDavid Ahern rt = net->ipv6.fib6_null_entry; 248893c2fb25SDavid Ahern else if (rt->fib6_flags & RTF_REJECT) { 248923fb93a4SDavid Ahern ret = net->ipv6.ip6_null_entry; 2490b0a1ba59SMartin KaFai Lau goto out; 2491b0a1ba59SMartin KaFai Lau } 2492b0a1ba59SMartin KaFai Lau 2493421842edSDavid Ahern if (rt == net->ipv6.fib6_null_entry) { 2494a3c00e46SMartin KaFai Lau fn = fib6_backtrack(fn, &fl6->saddr); 2495a3c00e46SMartin KaFai Lau if (fn) 2496a3c00e46SMartin KaFai Lau goto restart; 2497b55b76b2SDuan Jiong } 2498a3c00e46SMartin KaFai Lau 2499b0a1ba59SMartin KaFai Lau out: 250023fb93a4SDavid Ahern if (ret) 250110585b43SDavid Ahern ip6_hold_safe(net, &ret); 250223fb93a4SDavid Ahern else 250323fb93a4SDavid Ahern ret = ip6_create_rt_rcu(rt); 2504b55b76b2SDuan Jiong 250566f5d6ceSWei Wang rcu_read_unlock(); 2506b55b76b2SDuan Jiong 2507b65f164dSPaolo Abeni trace_fib6_table_lookup(net, rt, table, fl6); 250823fb93a4SDavid Ahern return ret; 2509b55b76b2SDuan Jiong }; 2510b55b76b2SDuan Jiong 2511b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net, 2512b55b76b2SDuan Jiong const struct flowi6 *fl6, 2513b75cc8f9SDavid Ahern const struct sk_buff *skb, 2514b55b76b2SDuan Jiong const struct in6_addr *gateway) 2515b55b76b2SDuan Jiong { 2516b55b76b2SDuan Jiong int flags = RT6_LOOKUP_F_HAS_SADDR; 2517b55b76b2SDuan Jiong struct ip6rd_flowi rdfl; 2518b55b76b2SDuan Jiong 2519b55b76b2SDuan Jiong rdfl.fl6 = *fl6; 2520b55b76b2SDuan Jiong rdfl.gateway = *gateway; 2521b55b76b2SDuan Jiong 2522b75cc8f9SDavid Ahern return fib6_rule_lookup(net, &rdfl.fl6, skb, 2523b55b76b2SDuan Jiong flags, __ip6_route_redirect); 2524b55b76b2SDuan Jiong } 2525b55b76b2SDuan Jiong 2526e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark, 2527e2d118a1SLorenzo Colitti kuid_t uid) 25283a5ad2eeSDavid S. Miller { 25293a5ad2eeSDavid S. Miller const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; 25303a5ad2eeSDavid S. Miller struct dst_entry *dst; 25311f7f10acSMaciej Żenczykowski struct flowi6 fl6 = { 25321f7f10acSMaciej Żenczykowski .flowi6_iif = LOOPBACK_IFINDEX, 25331f7f10acSMaciej Żenczykowski .flowi6_oif = oif, 25341f7f10acSMaciej Żenczykowski .flowi6_mark = mark, 25351f7f10acSMaciej Żenczykowski .daddr = iph->daddr, 25361f7f10acSMaciej Żenczykowski .saddr = iph->saddr, 25371f7f10acSMaciej Żenczykowski .flowlabel = ip6_flowinfo(iph), 25381f7f10acSMaciej Żenczykowski .flowi6_uid = uid, 25391f7f10acSMaciej Żenczykowski }; 25403a5ad2eeSDavid S. Miller 2541b75cc8f9SDavid Ahern dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr); 25426700c270SDavid S. Miller rt6_do_redirect(dst, NULL, skb); 25433a5ad2eeSDavid S. Miller dst_release(dst); 25443a5ad2eeSDavid S. Miller } 25453a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect); 25463a5ad2eeSDavid S. Miller 2547d456336dSMaciej Żenczykowski void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif) 2548c92a59ecSDuan Jiong { 2549c92a59ecSDuan Jiong const struct ipv6hdr *iph = ipv6_hdr(skb); 2550c92a59ecSDuan Jiong const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb); 2551c92a59ecSDuan Jiong struct dst_entry *dst; 25520b26fb17SMaciej Żenczykowski struct flowi6 fl6 = { 25530b26fb17SMaciej Żenczykowski .flowi6_iif = LOOPBACK_IFINDEX, 25540b26fb17SMaciej Żenczykowski .flowi6_oif = oif, 25550b26fb17SMaciej Żenczykowski .daddr = msg->dest, 25560b26fb17SMaciej Żenczykowski .saddr = iph->daddr, 25570b26fb17SMaciej Żenczykowski .flowi6_uid = sock_net_uid(net, NULL), 25580b26fb17SMaciej Żenczykowski }; 2559c92a59ecSDuan Jiong 2560b75cc8f9SDavid Ahern dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr); 2561c92a59ecSDuan Jiong rt6_do_redirect(dst, NULL, skb); 2562c92a59ecSDuan Jiong dst_release(dst); 2563c92a59ecSDuan Jiong } 2564c92a59ecSDuan Jiong 25653a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk) 25663a5ad2eeSDavid S. Miller { 2567e2d118a1SLorenzo Colitti ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark, 2568e2d118a1SLorenzo Colitti sk->sk_uid); 25693a5ad2eeSDavid S. Miller } 25703a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect); 25713a5ad2eeSDavid S. Miller 25720dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst) 25731da177e4SLinus Torvalds { 25740dbaee3bSDavid S. Miller struct net_device *dev = dst->dev; 25750dbaee3bSDavid S. Miller unsigned int mtu = dst_mtu(dst); 25760dbaee3bSDavid S. Miller struct net *net = dev_net(dev); 25770dbaee3bSDavid S. Miller 25781da177e4SLinus Torvalds mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 25791da177e4SLinus Torvalds 25805578689aSDaniel Lezcano if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 25815578689aSDaniel Lezcano mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 25821da177e4SLinus Torvalds 25831da177e4SLinus Torvalds /* 25841da177e4SLinus Torvalds * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 25851da177e4SLinus Torvalds * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 25861da177e4SLinus Torvalds * IPV6_MAXPLEN is also valid and means: "any MSS, 25871da177e4SLinus Torvalds * rely only on pmtu discovery" 25881da177e4SLinus Torvalds */ 25891da177e4SLinus Torvalds if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr)) 25901da177e4SLinus Torvalds mtu = IPV6_MAXPLEN; 25911da177e4SLinus Torvalds return mtu; 25921da177e4SLinus Torvalds } 25931da177e4SLinus Torvalds 2594ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst) 2595d33e4553SDavid S. Miller { 2596d33e4553SDavid S. Miller struct inet6_dev *idev; 2597d4ead6b3SDavid Ahern unsigned int mtu; 2598618f9bc7SSteffen Klassert 25994b32b5adSMartin KaFai Lau mtu = dst_metric_raw(dst, RTAX_MTU); 26004b32b5adSMartin KaFai Lau if (mtu) 26014b32b5adSMartin KaFai Lau goto out; 26024b32b5adSMartin KaFai Lau 2603618f9bc7SSteffen Klassert mtu = IPV6_MIN_MTU; 2604d33e4553SDavid S. Miller 2605d33e4553SDavid S. Miller rcu_read_lock(); 2606d33e4553SDavid S. Miller idev = __in6_dev_get(dst->dev); 2607d33e4553SDavid S. Miller if (idev) 2608d33e4553SDavid S. Miller mtu = idev->cnf.mtu6; 2609d33e4553SDavid S. Miller rcu_read_unlock(); 2610d33e4553SDavid S. Miller 261130f78d8eSEric Dumazet out: 261214972cbdSRoopa Prabhu mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 261314972cbdSRoopa Prabhu 261414972cbdSRoopa Prabhu return mtu - lwtunnel_headroom(dst->lwtstate, mtu); 2615d33e4553SDavid S. Miller } 2616d33e4553SDavid S. Miller 2617901731b8SDavid Ahern /* MTU selection: 2618901731b8SDavid Ahern * 1. mtu on route is locked - use it 2619901731b8SDavid Ahern * 2. mtu from nexthop exception 2620901731b8SDavid Ahern * 3. mtu from egress device 2621901731b8SDavid Ahern * 2622901731b8SDavid Ahern * based on ip6_dst_mtu_forward and exception logic of 2623901731b8SDavid Ahern * rt6_find_cached_rt; called with rcu_read_lock 2624901731b8SDavid Ahern */ 2625901731b8SDavid Ahern u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, 2626901731b8SDavid Ahern struct in6_addr *saddr) 2627901731b8SDavid Ahern { 2628901731b8SDavid Ahern struct rt6_exception_bucket *bucket; 2629901731b8SDavid Ahern struct rt6_exception *rt6_ex; 2630901731b8SDavid Ahern struct in6_addr *src_key; 2631901731b8SDavid Ahern struct inet6_dev *idev; 2632901731b8SDavid Ahern u32 mtu = 0; 2633901731b8SDavid Ahern 2634901731b8SDavid Ahern if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) { 2635901731b8SDavid Ahern mtu = f6i->fib6_pmtu; 2636901731b8SDavid Ahern if (mtu) 2637901731b8SDavid Ahern goto out; 2638901731b8SDavid Ahern } 2639901731b8SDavid Ahern 2640901731b8SDavid Ahern src_key = NULL; 2641901731b8SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES 2642901731b8SDavid Ahern if (f6i->fib6_src.plen) 2643901731b8SDavid Ahern src_key = saddr; 2644901731b8SDavid Ahern #endif 2645901731b8SDavid Ahern 2646901731b8SDavid Ahern bucket = rcu_dereference(f6i->rt6i_exception_bucket); 2647901731b8SDavid Ahern rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); 2648901731b8SDavid Ahern if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) 2649901731b8SDavid Ahern mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU); 2650901731b8SDavid Ahern 2651901731b8SDavid Ahern if (likely(!mtu)) { 2652901731b8SDavid Ahern struct net_device *dev = fib6_info_nh_dev(f6i); 2653901731b8SDavid Ahern 2654901731b8SDavid Ahern mtu = IPV6_MIN_MTU; 2655901731b8SDavid Ahern idev = __in6_dev_get(dev); 2656901731b8SDavid Ahern if (idev && idev->cnf.mtu6 > mtu) 2657901731b8SDavid Ahern mtu = idev->cnf.mtu6; 2658901731b8SDavid Ahern } 2659901731b8SDavid Ahern 2660901731b8SDavid Ahern mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); 2661901731b8SDavid Ahern out: 2662901731b8SDavid Ahern return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu); 2663901731b8SDavid Ahern } 2664901731b8SDavid Ahern 26653b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev, 266687a11578SDavid S. Miller struct flowi6 *fl6) 26671da177e4SLinus Torvalds { 266887a11578SDavid S. Miller struct dst_entry *dst; 26691da177e4SLinus Torvalds struct rt6_info *rt; 26701da177e4SLinus Torvalds struct inet6_dev *idev = in6_dev_get(dev); 2671c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(dev); 26721da177e4SLinus Torvalds 267338308473SDavid S. Miller if (unlikely(!idev)) 2674122bdf67SEric Dumazet return ERR_PTR(-ENODEV); 26751da177e4SLinus Torvalds 2676ad706862SMartin KaFai Lau rt = ip6_dst_alloc(net, dev, 0); 267738308473SDavid S. Miller if (unlikely(!rt)) { 26781da177e4SLinus Torvalds in6_dev_put(idev); 267987a11578SDavid S. Miller dst = ERR_PTR(-ENOMEM); 26801da177e4SLinus Torvalds goto out; 26811da177e4SLinus Torvalds } 26821da177e4SLinus Torvalds 26838e2ec639SYan, Zheng rt->dst.flags |= DST_HOST; 2684588753f1SBrendan McGrath rt->dst.input = ip6_input; 26858e2ec639SYan, Zheng rt->dst.output = ip6_output; 2686550bab42SJulian Anastasov rt->rt6i_gateway = fl6->daddr; 268787a11578SDavid S. Miller rt->rt6i_dst.addr = fl6->daddr; 26888e2ec639SYan, Zheng rt->rt6i_dst.plen = 128; 26898e2ec639SYan, Zheng rt->rt6i_idev = idev; 269014edd87dSLi RongQing dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0); 26911da177e4SLinus Torvalds 26924c981e28SIdo Schimmel /* Add this dst into uncached_list so that rt6_disable_ip() can 2693587fea74SWei Wang * do proper release of the net_device 2694587fea74SWei Wang */ 2695587fea74SWei Wang rt6_uncached_list_add(rt); 269681eb8447SWei Wang atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); 26971da177e4SLinus Torvalds 269887a11578SDavid S. Miller dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); 269987a11578SDavid S. Miller 27001da177e4SLinus Torvalds out: 270187a11578SDavid S. Miller return dst; 27021da177e4SLinus Torvalds } 27031da177e4SLinus Torvalds 2704569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops) 27051da177e4SLinus Torvalds { 270686393e52SAlexey Dobriyan struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); 27077019b78eSDaniel Lezcano int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; 27087019b78eSDaniel Lezcano int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; 27097019b78eSDaniel Lezcano int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 27107019b78eSDaniel Lezcano int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 27117019b78eSDaniel Lezcano unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 2712fc66f95cSEric Dumazet int entries; 27131da177e4SLinus Torvalds 2714fc66f95cSEric Dumazet entries = dst_entries_get_fast(ops); 271549a18d86SMichal Kubeček if (time_after(rt_last_gc + rt_min_interval, jiffies) && 2716fc66f95cSEric Dumazet entries <= rt_max_size) 27171da177e4SLinus Torvalds goto out; 27181da177e4SLinus Torvalds 27196891a346SBenjamin Thery net->ipv6.ip6_rt_gc_expire++; 272014956643SLi RongQing fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); 2721fc66f95cSEric Dumazet entries = dst_entries_get_slow(ops); 2722fc66f95cSEric Dumazet if (entries < ops->gc_thresh) 27237019b78eSDaniel Lezcano net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 27241da177e4SLinus Torvalds out: 27257019b78eSDaniel Lezcano net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 2726fc66f95cSEric Dumazet return entries > rt_max_size; 27271da177e4SLinus Torvalds } 27281da177e4SLinus Torvalds 27298c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net, 27308c14586fSDavid Ahern struct fib6_config *cfg, 2731f4797b33SDavid Ahern const struct in6_addr *gw_addr, 2732f4797b33SDavid Ahern u32 tbid, int flags) 27338c14586fSDavid Ahern { 27348c14586fSDavid Ahern struct flowi6 fl6 = { 27358c14586fSDavid Ahern .flowi6_oif = cfg->fc_ifindex, 27368c14586fSDavid Ahern .daddr = *gw_addr, 27378c14586fSDavid Ahern .saddr = cfg->fc_prefsrc, 27388c14586fSDavid Ahern }; 27398c14586fSDavid Ahern struct fib6_table *table; 27408c14586fSDavid Ahern struct rt6_info *rt; 27418c14586fSDavid Ahern 2742f4797b33SDavid Ahern table = fib6_get_table(net, tbid); 27438c14586fSDavid Ahern if (!table) 27448c14586fSDavid Ahern return NULL; 27458c14586fSDavid Ahern 27468c14586fSDavid Ahern if (!ipv6_addr_any(&cfg->fc_prefsrc)) 27478c14586fSDavid Ahern flags |= RT6_LOOKUP_F_HAS_SADDR; 27488c14586fSDavid Ahern 2749f4797b33SDavid Ahern flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE; 2750b75cc8f9SDavid Ahern rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags); 27518c14586fSDavid Ahern 27528c14586fSDavid Ahern /* if table lookup failed, fall back to full lookup */ 27538c14586fSDavid Ahern if (rt == net->ipv6.ip6_null_entry) { 27548c14586fSDavid Ahern ip6_rt_put(rt); 27558c14586fSDavid Ahern rt = NULL; 27568c14586fSDavid Ahern } 27578c14586fSDavid Ahern 27588c14586fSDavid Ahern return rt; 27598c14586fSDavid Ahern } 27608c14586fSDavid Ahern 2761fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net, 2762fc1e64e1SDavid Ahern struct fib6_config *cfg, 27639fbb704cSDavid Ahern const struct net_device *dev, 2764fc1e64e1SDavid Ahern struct netlink_ext_ack *extack) 2765fc1e64e1SDavid Ahern { 276644750f84SDavid Ahern u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; 2767fc1e64e1SDavid Ahern const struct in6_addr *gw_addr = &cfg->fc_gateway; 2768fc1e64e1SDavid Ahern u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT; 2769bf1dc8baSPaolo Abeni struct fib6_info *from; 2770fc1e64e1SDavid Ahern struct rt6_info *grt; 2771fc1e64e1SDavid Ahern int err; 2772fc1e64e1SDavid Ahern 2773fc1e64e1SDavid Ahern err = 0; 2774fc1e64e1SDavid Ahern grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); 2775fc1e64e1SDavid Ahern if (grt) { 2776bf1dc8baSPaolo Abeni rcu_read_lock(); 2777bf1dc8baSPaolo Abeni from = rcu_dereference(grt->from); 277858e354c0SDavid Ahern if (!grt->dst.error && 27794ed591c8SDavid Ahern /* ignore match if it is the default route */ 2780bf1dc8baSPaolo Abeni from && !ipv6_addr_any(&from->fib6_dst.addr) && 278158e354c0SDavid Ahern (grt->rt6i_flags & flags || dev != grt->dst.dev)) { 278244750f84SDavid Ahern NL_SET_ERR_MSG(extack, 278344750f84SDavid Ahern "Nexthop has invalid gateway or device mismatch"); 2784fc1e64e1SDavid Ahern err = -EINVAL; 2785fc1e64e1SDavid Ahern } 2786bf1dc8baSPaolo Abeni rcu_read_unlock(); 2787fc1e64e1SDavid Ahern 2788fc1e64e1SDavid Ahern ip6_rt_put(grt); 2789fc1e64e1SDavid Ahern } 2790fc1e64e1SDavid Ahern 2791fc1e64e1SDavid Ahern return err; 2792fc1e64e1SDavid Ahern } 2793fc1e64e1SDavid Ahern 27941edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net, 27951edce99fSDavid Ahern struct fib6_config *cfg, 27961edce99fSDavid Ahern struct net_device **_dev, 27971edce99fSDavid Ahern struct inet6_dev **idev) 27981edce99fSDavid Ahern { 27991edce99fSDavid Ahern const struct in6_addr *gw_addr = &cfg->fc_gateway; 28001edce99fSDavid Ahern struct net_device *dev = _dev ? *_dev : NULL; 28011edce99fSDavid Ahern struct rt6_info *grt = NULL; 28021edce99fSDavid Ahern int err = -EHOSTUNREACH; 28031edce99fSDavid Ahern 28041edce99fSDavid Ahern if (cfg->fc_table) { 2805f4797b33SDavid Ahern int flags = RT6_LOOKUP_F_IFACE; 2806f4797b33SDavid Ahern 2807f4797b33SDavid Ahern grt = ip6_nh_lookup_table(net, cfg, gw_addr, 2808f4797b33SDavid Ahern cfg->fc_table, flags); 28091edce99fSDavid Ahern if (grt) { 28101edce99fSDavid Ahern if (grt->rt6i_flags & RTF_GATEWAY || 28111edce99fSDavid Ahern (dev && dev != grt->dst.dev)) { 28121edce99fSDavid Ahern ip6_rt_put(grt); 28131edce99fSDavid Ahern grt = NULL; 28141edce99fSDavid Ahern } 28151edce99fSDavid Ahern } 28161edce99fSDavid Ahern } 28171edce99fSDavid Ahern 28181edce99fSDavid Ahern if (!grt) 2819b75cc8f9SDavid Ahern grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1); 28201edce99fSDavid Ahern 28211edce99fSDavid Ahern if (!grt) 28221edce99fSDavid Ahern goto out; 28231edce99fSDavid Ahern 28241edce99fSDavid Ahern if (dev) { 28251edce99fSDavid Ahern if (dev != grt->dst.dev) { 28261edce99fSDavid Ahern ip6_rt_put(grt); 28271edce99fSDavid Ahern goto out; 28281edce99fSDavid Ahern } 28291edce99fSDavid Ahern } else { 28301edce99fSDavid Ahern *_dev = dev = grt->dst.dev; 28311edce99fSDavid Ahern *idev = grt->rt6i_idev; 28321edce99fSDavid Ahern dev_hold(dev); 28331edce99fSDavid Ahern in6_dev_hold(grt->rt6i_idev); 28341edce99fSDavid Ahern } 28351edce99fSDavid Ahern 28361edce99fSDavid Ahern if (!(grt->rt6i_flags & RTF_GATEWAY)) 28371edce99fSDavid Ahern err = 0; 28381edce99fSDavid Ahern 28391edce99fSDavid Ahern ip6_rt_put(grt); 28401edce99fSDavid Ahern 28411edce99fSDavid Ahern out: 28421edce99fSDavid Ahern return err; 28431edce99fSDavid Ahern } 28441edce99fSDavid Ahern 28459fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg, 28469fbb704cSDavid Ahern struct net_device **_dev, struct inet6_dev **idev, 28479fbb704cSDavid Ahern struct netlink_ext_ack *extack) 28489fbb704cSDavid Ahern { 28499fbb704cSDavid Ahern const struct in6_addr *gw_addr = &cfg->fc_gateway; 28509fbb704cSDavid Ahern int gwa_type = ipv6_addr_type(gw_addr); 2851232378e8SDavid Ahern bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true; 28529fbb704cSDavid Ahern const struct net_device *dev = *_dev; 2853232378e8SDavid Ahern bool need_addr_check = !dev; 28549fbb704cSDavid Ahern int err = -EINVAL; 28559fbb704cSDavid Ahern 28569fbb704cSDavid Ahern /* if gw_addr is local we will fail to detect this in case 28579fbb704cSDavid Ahern * address is still TENTATIVE (DAD in progress). rt6_lookup() 28589fbb704cSDavid Ahern * will return already-added prefix route via interface that 28599fbb704cSDavid Ahern * prefix route was assigned to, which might be non-loopback. 28609fbb704cSDavid Ahern */ 2861232378e8SDavid Ahern if (dev && 2862232378e8SDavid Ahern ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { 2863232378e8SDavid Ahern NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); 28649fbb704cSDavid Ahern goto out; 28659fbb704cSDavid Ahern } 28669fbb704cSDavid Ahern 28679fbb704cSDavid Ahern if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) { 28689fbb704cSDavid Ahern /* IPv6 strictly inhibits using not link-local 28699fbb704cSDavid Ahern * addresses as nexthop address. 28709fbb704cSDavid Ahern * Otherwise, router will not able to send redirects. 28719fbb704cSDavid Ahern * It is very good, but in some (rare!) circumstances 28729fbb704cSDavid Ahern * (SIT, PtP, NBMA NOARP links) it is handy to allow 28739fbb704cSDavid Ahern * some exceptions. --ANK 28749fbb704cSDavid Ahern * We allow IPv4-mapped nexthops to support RFC4798-type 28759fbb704cSDavid Ahern * addressing 28769fbb704cSDavid Ahern */ 28779fbb704cSDavid Ahern if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) { 28789fbb704cSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid gateway address"); 28799fbb704cSDavid Ahern goto out; 28809fbb704cSDavid Ahern } 28819fbb704cSDavid Ahern 28829fbb704cSDavid Ahern if (cfg->fc_flags & RTNH_F_ONLINK) 28839fbb704cSDavid Ahern err = ip6_route_check_nh_onlink(net, cfg, dev, extack); 28849fbb704cSDavid Ahern else 28859fbb704cSDavid Ahern err = ip6_route_check_nh(net, cfg, _dev, idev); 28869fbb704cSDavid Ahern 28879fbb704cSDavid Ahern if (err) 28889fbb704cSDavid Ahern goto out; 28899fbb704cSDavid Ahern } 28909fbb704cSDavid Ahern 28919fbb704cSDavid Ahern /* reload in case device was changed */ 28929fbb704cSDavid Ahern dev = *_dev; 28939fbb704cSDavid Ahern 28949fbb704cSDavid Ahern err = -EINVAL; 28959fbb704cSDavid Ahern if (!dev) { 28969fbb704cSDavid Ahern NL_SET_ERR_MSG(extack, "Egress device not specified"); 28979fbb704cSDavid Ahern goto out; 28989fbb704cSDavid Ahern } else if (dev->flags & IFF_LOOPBACK) { 28999fbb704cSDavid Ahern NL_SET_ERR_MSG(extack, 29009fbb704cSDavid Ahern "Egress device can not be loopback device for this route"); 29019fbb704cSDavid Ahern goto out; 29029fbb704cSDavid Ahern } 2903232378e8SDavid Ahern 2904232378e8SDavid Ahern /* if we did not check gw_addr above, do so now that the 2905232378e8SDavid Ahern * egress device has been resolved. 2906232378e8SDavid Ahern */ 2907232378e8SDavid Ahern if (need_addr_check && 2908232378e8SDavid Ahern ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) { 2909232378e8SDavid Ahern NL_SET_ERR_MSG(extack, "Gateway can not be a local address"); 2910232378e8SDavid Ahern goto out; 2911232378e8SDavid Ahern } 2912232378e8SDavid Ahern 29139fbb704cSDavid Ahern err = 0; 29149fbb704cSDavid Ahern out: 29159fbb704cSDavid Ahern return err; 29169fbb704cSDavid Ahern } 29179fbb704cSDavid Ahern 291883c44251SDavid Ahern static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) 291983c44251SDavid Ahern { 292083c44251SDavid Ahern if ((flags & RTF_REJECT) || 292183c44251SDavid Ahern (dev && (dev->flags & IFF_LOOPBACK) && 292283c44251SDavid Ahern !(addr_type & IPV6_ADDR_LOOPBACK) && 292383c44251SDavid Ahern !(flags & RTF_LOCAL))) 292483c44251SDavid Ahern return true; 292583c44251SDavid Ahern 292683c44251SDavid Ahern return false; 292783c44251SDavid Ahern } 292883c44251SDavid Ahern 292983c44251SDavid Ahern int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, 293083c44251SDavid Ahern struct fib6_config *cfg, gfp_t gfp_flags, 293183c44251SDavid Ahern struct netlink_ext_ack *extack) 293283c44251SDavid Ahern { 293383c44251SDavid Ahern struct net_device *dev = NULL; 293483c44251SDavid Ahern struct inet6_dev *idev = NULL; 293583c44251SDavid Ahern int addr_type; 293683c44251SDavid Ahern int err; 293783c44251SDavid Ahern 2938f1741730SDavid Ahern fib6_nh->fib_nh_family = AF_INET6; 2939f1741730SDavid Ahern 294083c44251SDavid Ahern err = -ENODEV; 294183c44251SDavid Ahern if (cfg->fc_ifindex) { 294283c44251SDavid Ahern dev = dev_get_by_index(net, cfg->fc_ifindex); 294383c44251SDavid Ahern if (!dev) 294483c44251SDavid Ahern goto out; 294583c44251SDavid Ahern idev = in6_dev_get(dev); 294683c44251SDavid Ahern if (!idev) 294783c44251SDavid Ahern goto out; 294883c44251SDavid Ahern } 294983c44251SDavid Ahern 295083c44251SDavid Ahern if (cfg->fc_flags & RTNH_F_ONLINK) { 295183c44251SDavid Ahern if (!dev) { 295283c44251SDavid Ahern NL_SET_ERR_MSG(extack, 295383c44251SDavid Ahern "Nexthop device required for onlink"); 295483c44251SDavid Ahern goto out; 295583c44251SDavid Ahern } 295683c44251SDavid Ahern 295783c44251SDavid Ahern if (!(dev->flags & IFF_UP)) { 295883c44251SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 295983c44251SDavid Ahern err = -ENETDOWN; 296083c44251SDavid Ahern goto out; 296183c44251SDavid Ahern } 296283c44251SDavid Ahern 2963ad1601aeSDavid Ahern fib6_nh->fib_nh_flags |= RTNH_F_ONLINK; 296483c44251SDavid Ahern } 296583c44251SDavid Ahern 2966ad1601aeSDavid Ahern fib6_nh->fib_nh_weight = 1; 296783c44251SDavid Ahern 296883c44251SDavid Ahern /* We cannot add true routes via loopback here, 296983c44251SDavid Ahern * they would result in kernel looping; promote them to reject routes 297083c44251SDavid Ahern */ 297183c44251SDavid Ahern addr_type = ipv6_addr_type(&cfg->fc_dst); 297283c44251SDavid Ahern if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { 297383c44251SDavid Ahern /* hold loopback dev/idev if we haven't done so. */ 297483c44251SDavid Ahern if (dev != net->loopback_dev) { 297583c44251SDavid Ahern if (dev) { 297683c44251SDavid Ahern dev_put(dev); 297783c44251SDavid Ahern in6_dev_put(idev); 297883c44251SDavid Ahern } 297983c44251SDavid Ahern dev = net->loopback_dev; 298083c44251SDavid Ahern dev_hold(dev); 298183c44251SDavid Ahern idev = in6_dev_get(dev); 298283c44251SDavid Ahern if (!idev) { 298383c44251SDavid Ahern err = -ENODEV; 298483c44251SDavid Ahern goto out; 298583c44251SDavid Ahern } 298683c44251SDavid Ahern } 298783c44251SDavid Ahern goto set_dev; 298883c44251SDavid Ahern } 298983c44251SDavid Ahern 299083c44251SDavid Ahern if (cfg->fc_flags & RTF_GATEWAY) { 299183c44251SDavid Ahern err = ip6_validate_gw(net, cfg, &dev, &idev, extack); 299283c44251SDavid Ahern if (err) 299383c44251SDavid Ahern goto out; 299483c44251SDavid Ahern 2995ad1601aeSDavid Ahern fib6_nh->fib_nh_gw6 = cfg->fc_gateway; 2996bdf00467SDavid Ahern fib6_nh->fib_nh_gw_family = AF_INET6; 299783c44251SDavid Ahern } 299883c44251SDavid Ahern 299983c44251SDavid Ahern err = -ENODEV; 300083c44251SDavid Ahern if (!dev) 300183c44251SDavid Ahern goto out; 300283c44251SDavid Ahern 300383c44251SDavid Ahern if (idev->cnf.disable_ipv6) { 300483c44251SDavid Ahern NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); 300583c44251SDavid Ahern err = -EACCES; 300683c44251SDavid Ahern goto out; 300783c44251SDavid Ahern } 300883c44251SDavid Ahern 300983c44251SDavid Ahern if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { 301083c44251SDavid Ahern NL_SET_ERR_MSG(extack, "Nexthop device is not up"); 301183c44251SDavid Ahern err = -ENETDOWN; 301283c44251SDavid Ahern goto out; 301383c44251SDavid Ahern } 301483c44251SDavid Ahern 301583c44251SDavid Ahern if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) && 301683c44251SDavid Ahern !netif_carrier_ok(dev)) 3017ad1601aeSDavid Ahern fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; 301883c44251SDavid Ahern 3019979e276eSDavid Ahern err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, 3020979e276eSDavid Ahern cfg->fc_encap_type, cfg, gfp_flags, extack); 3021979e276eSDavid Ahern if (err) 3022979e276eSDavid Ahern goto out; 302383c44251SDavid Ahern set_dev: 3024ad1601aeSDavid Ahern fib6_nh->fib_nh_dev = dev; 3025f1741730SDavid Ahern fib6_nh->fib_nh_oif = dev->ifindex; 302683c44251SDavid Ahern err = 0; 302783c44251SDavid Ahern out: 302883c44251SDavid Ahern if (idev) 302983c44251SDavid Ahern in6_dev_put(idev); 303083c44251SDavid Ahern 303183c44251SDavid Ahern if (err) { 3032ad1601aeSDavid Ahern lwtstate_put(fib6_nh->fib_nh_lws); 3033ad1601aeSDavid Ahern fib6_nh->fib_nh_lws = NULL; 303483c44251SDavid Ahern if (dev) 303583c44251SDavid Ahern dev_put(dev); 303683c44251SDavid Ahern } 303783c44251SDavid Ahern 303883c44251SDavid Ahern return err; 303983c44251SDavid Ahern } 304083c44251SDavid Ahern 3041dac7d0f2SDavid Ahern void fib6_nh_release(struct fib6_nh *fib6_nh) 3042dac7d0f2SDavid Ahern { 3043979e276eSDavid Ahern fib_nh_common_release(&fib6_nh->nh_common); 3044dac7d0f2SDavid Ahern } 3045dac7d0f2SDavid Ahern 30468d1c802bSDavid Ahern static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, 3047acb54e3cSDavid Ahern gfp_t gfp_flags, 3048333c4301SDavid Ahern struct netlink_ext_ack *extack) 30491da177e4SLinus Torvalds { 30505578689aSDaniel Lezcano struct net *net = cfg->fc_nlinfo.nl_net; 30518d1c802bSDavid Ahern struct fib6_info *rt = NULL; 3052c71099acSThomas Graf struct fib6_table *table; 30538c5b83f0SRoopa Prabhu int err = -EINVAL; 305483c44251SDavid Ahern int addr_type; 30551da177e4SLinus Torvalds 3056557c44beSDavid Ahern /* RTF_PCPU is an internal flag; can not be set by userspace */ 3057d5d531cbSDavid Ahern if (cfg->fc_flags & RTF_PCPU) { 3058d5d531cbSDavid Ahern NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU"); 3059557c44beSDavid Ahern goto out; 3060d5d531cbSDavid Ahern } 3061557c44beSDavid Ahern 30622ea2352eSWei Wang /* RTF_CACHE is an internal flag; can not be set by userspace */ 30632ea2352eSWei Wang if (cfg->fc_flags & RTF_CACHE) { 30642ea2352eSWei Wang NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); 30652ea2352eSWei Wang goto out; 30662ea2352eSWei Wang } 30672ea2352eSWei Wang 3068e8478e80SDavid Ahern if (cfg->fc_type > RTN_MAX) { 3069e8478e80SDavid Ahern NL_SET_ERR_MSG(extack, "Invalid route type"); 3070e8478e80SDavid Ahern goto out; 3071e8478e80SDavid Ahern } 3072e8478e80SDavid Ahern 3073d5d531cbSDavid Ahern if (cfg->fc_dst_len > 128) { 3074d5d531cbSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid prefix length"); 30758c5b83f0SRoopa Prabhu goto out; 3076d5d531cbSDavid Ahern } 3077d5d531cbSDavid Ahern if (cfg->fc_src_len > 128) { 3078d5d531cbSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid source address length"); 3079d5d531cbSDavid Ahern goto out; 3080d5d531cbSDavid Ahern } 30811da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES 3082d5d531cbSDavid Ahern if (cfg->fc_src_len) { 3083d5d531cbSDavid Ahern NL_SET_ERR_MSG(extack, 3084d5d531cbSDavid Ahern "Specifying source address requires IPV6_SUBTREES to be enabled"); 30858c5b83f0SRoopa Prabhu goto out; 3086d5d531cbSDavid Ahern } 30871da177e4SLinus Torvalds #endif 3088fc1e64e1SDavid Ahern 3089c71099acSThomas Graf err = -ENOBUFS; 309038308473SDavid S. Miller if (cfg->fc_nlinfo.nlh && 3091d71314b4SMatti Vaittinen !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) { 3092d71314b4SMatti Vaittinen table = fib6_get_table(net, cfg->fc_table); 309338308473SDavid S. Miller if (!table) { 3094f3213831SJoe Perches pr_warn("NLM_F_CREATE should be specified when creating new route\n"); 3095d71314b4SMatti Vaittinen table = fib6_new_table(net, cfg->fc_table); 3096d71314b4SMatti Vaittinen } 3097d71314b4SMatti Vaittinen } else { 3098d71314b4SMatti Vaittinen table = fib6_new_table(net, cfg->fc_table); 3099d71314b4SMatti Vaittinen } 310038308473SDavid S. Miller 310138308473SDavid S. Miller if (!table) 3102c71099acSThomas Graf goto out; 3103c71099acSThomas Graf 31041da177e4SLinus Torvalds err = -ENOMEM; 310593531c67SDavid Ahern rt = fib6_info_alloc(gfp_flags); 310693531c67SDavid Ahern if (!rt) 31071da177e4SLinus Torvalds goto out; 310893531c67SDavid Ahern 3109d7e774f3SDavid Ahern rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len, 3110d7e774f3SDavid Ahern extack); 3111767a2217SDavid Ahern if (IS_ERR(rt->fib6_metrics)) { 3112767a2217SDavid Ahern err = PTR_ERR(rt->fib6_metrics); 3113fda21d46SEric Dumazet /* Do not leave garbage there. */ 3114fda21d46SEric Dumazet rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics; 3115767a2217SDavid Ahern goto out; 3116767a2217SDavid Ahern } 3117767a2217SDavid Ahern 311893531c67SDavid Ahern if (cfg->fc_flags & RTF_ADDRCONF) 311993531c67SDavid Ahern rt->dst_nocount = true; 31201da177e4SLinus Torvalds 31211716a961SGao feng if (cfg->fc_flags & RTF_EXPIRES) 312214895687SDavid Ahern fib6_set_expires(rt, jiffies + 31231716a961SGao feng clock_t_to_jiffies(cfg->fc_expires)); 31241716a961SGao feng else 312514895687SDavid Ahern fib6_clean_expires(rt); 31261da177e4SLinus Torvalds 312786872cb5SThomas Graf if (cfg->fc_protocol == RTPROT_UNSPEC) 312886872cb5SThomas Graf cfg->fc_protocol = RTPROT_BOOT; 312993c2fb25SDavid Ahern rt->fib6_protocol = cfg->fc_protocol; 313086872cb5SThomas Graf 313183c44251SDavid Ahern rt->fib6_table = table; 313283c44251SDavid Ahern rt->fib6_metric = cfg->fc_metric; 313383c44251SDavid Ahern rt->fib6_type = cfg->fc_type; 31342b2450caSDavid Ahern rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY; 313519e42e45SRoopa Prabhu 313693c2fb25SDavid Ahern ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); 313793c2fb25SDavid Ahern rt->fib6_dst.plen = cfg->fc_dst_len; 313893c2fb25SDavid Ahern if (rt->fib6_dst.plen == 128) 31393b6761d1SDavid Ahern rt->dst_host = true; 31401da177e4SLinus Torvalds 31411da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 314293c2fb25SDavid Ahern ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); 314393c2fb25SDavid Ahern rt->fib6_src.plen = cfg->fc_src_len; 31441da177e4SLinus Torvalds #endif 314583c44251SDavid Ahern err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack); 31461da177e4SLinus Torvalds if (err) 31471da177e4SLinus Torvalds goto out; 31489fbb704cSDavid Ahern 314983c44251SDavid Ahern /* We cannot add true routes via loopback here, 315083c44251SDavid Ahern * they would result in kernel looping; promote them to reject routes 315183c44251SDavid Ahern */ 315283c44251SDavid Ahern addr_type = ipv6_addr_type(&cfg->fc_dst); 3153ad1601aeSDavid Ahern if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type)) 315483c44251SDavid Ahern rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; 3155955ec4cbSDavid Ahern 3156c3968a85SDaniel Walter if (!ipv6_addr_any(&cfg->fc_prefsrc)) { 315783c44251SDavid Ahern struct net_device *dev = fib6_info_nh_dev(rt); 315883c44251SDavid Ahern 3159c3968a85SDaniel Walter if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { 3160d5d531cbSDavid Ahern NL_SET_ERR_MSG(extack, "Invalid source address"); 3161c3968a85SDaniel Walter err = -EINVAL; 3162c3968a85SDaniel Walter goto out; 3163c3968a85SDaniel Walter } 316493c2fb25SDavid Ahern rt->fib6_prefsrc.addr = cfg->fc_prefsrc; 316593c2fb25SDavid Ahern rt->fib6_prefsrc.plen = 128; 3166c3968a85SDaniel Walter } else 316793c2fb25SDavid Ahern rt->fib6_prefsrc.plen = 0; 3168c3968a85SDaniel Walter 31698c5b83f0SRoopa Prabhu return rt; 31701da177e4SLinus Torvalds out: 317193531c67SDavid Ahern fib6_info_release(rt); 31728c5b83f0SRoopa Prabhu return ERR_PTR(err); 31736b9ea5a6SRoopa Prabhu } 31746b9ea5a6SRoopa Prabhu 3175acb54e3cSDavid Ahern int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, 3176333c4301SDavid Ahern struct netlink_ext_ack *extack) 31776b9ea5a6SRoopa Prabhu { 31788d1c802bSDavid Ahern struct fib6_info *rt; 31796b9ea5a6SRoopa Prabhu int err; 31806b9ea5a6SRoopa Prabhu 3181acb54e3cSDavid Ahern rt = ip6_route_info_create(cfg, gfp_flags, extack); 3182d4ead6b3SDavid Ahern if (IS_ERR(rt)) 3183d4ead6b3SDavid Ahern return PTR_ERR(rt); 31846b9ea5a6SRoopa Prabhu 3185d4ead6b3SDavid Ahern err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack); 318693531c67SDavid Ahern fib6_info_release(rt); 31876b9ea5a6SRoopa Prabhu 31881da177e4SLinus Torvalds return err; 31891da177e4SLinus Torvalds } 31901da177e4SLinus Torvalds 31918d1c802bSDavid Ahern static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info) 31921da177e4SLinus Torvalds { 3193afb1d4b5SDavid Ahern struct net *net = info->nl_net; 3194c71099acSThomas Graf struct fib6_table *table; 3195afb1d4b5SDavid Ahern int err; 31961da177e4SLinus Torvalds 3197421842edSDavid Ahern if (rt == net->ipv6.fib6_null_entry) { 31986825a26cSGao feng err = -ENOENT; 31996825a26cSGao feng goto out; 32006825a26cSGao feng } 32016c813a72SPatrick McHardy 320293c2fb25SDavid Ahern table = rt->fib6_table; 320366f5d6ceSWei Wang spin_lock_bh(&table->tb6_lock); 320486872cb5SThomas Graf err = fib6_del(rt, info); 320566f5d6ceSWei Wang spin_unlock_bh(&table->tb6_lock); 32061da177e4SLinus Torvalds 32076825a26cSGao feng out: 320893531c67SDavid Ahern fib6_info_release(rt); 32091da177e4SLinus Torvalds return err; 32101da177e4SLinus Torvalds } 32111da177e4SLinus Torvalds 32128d1c802bSDavid Ahern int ip6_del_rt(struct net *net, struct fib6_info *rt) 3213e0a1ad73SThomas Graf { 3214afb1d4b5SDavid Ahern struct nl_info info = { .nl_net = net }; 3215afb1d4b5SDavid Ahern 3216528c4cebSDenis V. Lunev return __ip6_del_rt(rt, &info); 3217e0a1ad73SThomas Graf } 3218e0a1ad73SThomas Graf 32198d1c802bSDavid Ahern static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg) 32200ae81335SDavid Ahern { 32210ae81335SDavid Ahern struct nl_info *info = &cfg->fc_nlinfo; 3222e3330039SWANG Cong struct net *net = info->nl_net; 322316a16cd3SDavid Ahern struct sk_buff *skb = NULL; 32240ae81335SDavid Ahern struct fib6_table *table; 3225e3330039SWANG Cong int err = -ENOENT; 32260ae81335SDavid Ahern 3227421842edSDavid Ahern if (rt == net->ipv6.fib6_null_entry) 3228e3330039SWANG Cong goto out_put; 322993c2fb25SDavid Ahern table = rt->fib6_table; 323066f5d6ceSWei Wang spin_lock_bh(&table->tb6_lock); 32310ae81335SDavid Ahern 323293c2fb25SDavid Ahern if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) { 32338d1c802bSDavid Ahern struct fib6_info *sibling, *next_sibling; 32340ae81335SDavid Ahern 323516a16cd3SDavid Ahern /* prefer to send a single notification with all hops */ 323616a16cd3SDavid Ahern skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 323716a16cd3SDavid Ahern if (skb) { 323816a16cd3SDavid Ahern u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; 323916a16cd3SDavid Ahern 3240d4ead6b3SDavid Ahern if (rt6_fill_node(net, skb, rt, NULL, 324116a16cd3SDavid Ahern NULL, NULL, 0, RTM_DELROUTE, 324216a16cd3SDavid Ahern info->portid, seq, 0) < 0) { 324316a16cd3SDavid Ahern kfree_skb(skb); 324416a16cd3SDavid Ahern skb = NULL; 324516a16cd3SDavid Ahern } else 324616a16cd3SDavid Ahern info->skip_notify = 1; 324716a16cd3SDavid Ahern } 324816a16cd3SDavid Ahern 32490ae81335SDavid Ahern list_for_each_entry_safe(sibling, next_sibling, 325093c2fb25SDavid Ahern &rt->fib6_siblings, 325193c2fb25SDavid Ahern fib6_siblings) { 32520ae81335SDavid Ahern err = fib6_del(sibling, info); 32530ae81335SDavid Ahern if (err) 3254e3330039SWANG Cong goto out_unlock; 32550ae81335SDavid Ahern } 32560ae81335SDavid Ahern } 32570ae81335SDavid Ahern 32580ae81335SDavid Ahern err = fib6_del(rt, info); 3259e3330039SWANG Cong out_unlock: 326066f5d6ceSWei Wang spin_unlock_bh(&table->tb6_lock); 3261e3330039SWANG Cong out_put: 326293531c67SDavid Ahern fib6_info_release(rt); 326316a16cd3SDavid Ahern 326416a16cd3SDavid Ahern if (skb) { 3265e3330039SWANG Cong rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 326616a16cd3SDavid Ahern info->nlh, gfp_any()); 326716a16cd3SDavid Ahern } 32680ae81335SDavid Ahern return err; 32690ae81335SDavid Ahern } 32700ae81335SDavid Ahern 327123fb93a4SDavid Ahern static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg) 327223fb93a4SDavid Ahern { 327323fb93a4SDavid Ahern int rc = -ESRCH; 327423fb93a4SDavid Ahern 327523fb93a4SDavid Ahern if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex) 327623fb93a4SDavid Ahern goto out; 327723fb93a4SDavid Ahern 327823fb93a4SDavid Ahern if (cfg->fc_flags & RTF_GATEWAY && 327923fb93a4SDavid Ahern !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) 328023fb93a4SDavid Ahern goto out; 3281761f6026SXin Long 328223fb93a4SDavid Ahern rc = rt6_remove_exception_rt(rt); 328323fb93a4SDavid Ahern out: 328423fb93a4SDavid Ahern return rc; 328523fb93a4SDavid Ahern } 328623fb93a4SDavid Ahern 3287333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg, 3288333c4301SDavid Ahern struct netlink_ext_ack *extack) 32891da177e4SLinus Torvalds { 32908d1c802bSDavid Ahern struct rt6_info *rt_cache; 3291c71099acSThomas Graf struct fib6_table *table; 32928d1c802bSDavid Ahern struct fib6_info *rt; 32931da177e4SLinus Torvalds struct fib6_node *fn; 32941da177e4SLinus Torvalds int err = -ESRCH; 32951da177e4SLinus Torvalds 32965578689aSDaniel Lezcano table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 3297d5d531cbSDavid Ahern if (!table) { 3298d5d531cbSDavid Ahern NL_SET_ERR_MSG(extack, "FIB table does not exist"); 3299c71099acSThomas Graf return err; 3300d5d531cbSDavid Ahern } 33011da177e4SLinus Torvalds 330266f5d6ceSWei Wang rcu_read_lock(); 3303c71099acSThomas Graf 3304c71099acSThomas Graf fn = fib6_locate(&table->tb6_root, 330586872cb5SThomas Graf &cfg->fc_dst, cfg->fc_dst_len, 330638fbeeeeSWei Wang &cfg->fc_src, cfg->fc_src_len, 33072b760fcfSWei Wang !(cfg->fc_flags & RTF_CACHE)); 33081da177e4SLinus Torvalds 33091da177e4SLinus Torvalds if (fn) { 331066f5d6ceSWei Wang for_each_fib6_node_rt_rcu(fn) { 3311ad1601aeSDavid Ahern struct fib6_nh *nh; 3312ad1601aeSDavid Ahern 33132b760fcfSWei Wang if (cfg->fc_flags & RTF_CACHE) { 331423fb93a4SDavid Ahern int rc; 331523fb93a4SDavid Ahern 33162b760fcfSWei Wang rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, 33172b760fcfSWei Wang &cfg->fc_src); 331823fb93a4SDavid Ahern if (rt_cache) { 331923fb93a4SDavid Ahern rc = ip6_del_cached_rt(rt_cache, cfg); 33209e575010SEric Dumazet if (rc != -ESRCH) { 33219e575010SEric Dumazet rcu_read_unlock(); 332223fb93a4SDavid Ahern return rc; 332323fb93a4SDavid Ahern } 33249e575010SEric Dumazet } 33251f56a01fSMartin KaFai Lau continue; 33262b760fcfSWei Wang } 3327ad1601aeSDavid Ahern 3328ad1601aeSDavid Ahern nh = &rt->fib6_nh; 332986872cb5SThomas Graf if (cfg->fc_ifindex && 3330ad1601aeSDavid Ahern (!nh->fib_nh_dev || 3331ad1601aeSDavid Ahern nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) 33321da177e4SLinus Torvalds continue; 333386872cb5SThomas Graf if (cfg->fc_flags & RTF_GATEWAY && 3334ad1601aeSDavid Ahern !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) 33351da177e4SLinus Torvalds continue; 333693c2fb25SDavid Ahern if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) 33371da177e4SLinus Torvalds continue; 333893c2fb25SDavid Ahern if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol) 3339c2ed1880SMantas M continue; 3340e873e4b9SWei Wang if (!fib6_info_hold_safe(rt)) 3341e873e4b9SWei Wang continue; 334266f5d6ceSWei Wang rcu_read_unlock(); 33431da177e4SLinus Torvalds 33440ae81335SDavid Ahern /* if gateway was specified only delete the one hop */ 33450ae81335SDavid Ahern if (cfg->fc_flags & RTF_GATEWAY) 334686872cb5SThomas Graf return __ip6_del_rt(rt, &cfg->fc_nlinfo); 33470ae81335SDavid Ahern 33480ae81335SDavid Ahern return __ip6_del_rt_siblings(rt, cfg); 33491da177e4SLinus Torvalds } 33501da177e4SLinus Torvalds } 335166f5d6ceSWei Wang rcu_read_unlock(); 33521da177e4SLinus Torvalds 33531da177e4SLinus Torvalds return err; 33541da177e4SLinus Torvalds } 33551da177e4SLinus Torvalds 33566700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) 3357a6279458SYOSHIFUJI Hideaki { 3358a6279458SYOSHIFUJI Hideaki struct netevent_redirect netevent; 3359e8599ff4SDavid S. Miller struct rt6_info *rt, *nrt = NULL; 3360e8599ff4SDavid S. Miller struct ndisc_options ndopts; 3361e8599ff4SDavid S. Miller struct inet6_dev *in6_dev; 3362e8599ff4SDavid S. Miller struct neighbour *neigh; 3363a68886a6SDavid Ahern struct fib6_info *from; 336471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 struct rd_msg *msg; 33656e157b6aSDavid S. Miller int optlen, on_link; 33666e157b6aSDavid S. Miller u8 *lladdr; 3367e8599ff4SDavid S. Miller 336829a3cad5SSimon Horman optlen = skb_tail_pointer(skb) - skb_transport_header(skb); 336971bcdba0SYOSHIFUJI Hideaki / 吉藤英明 optlen -= sizeof(*msg); 3370e8599ff4SDavid S. Miller 3371e8599ff4SDavid S. Miller if (optlen < 0) { 33726e157b6aSDavid S. Miller net_dbg_ratelimited("rt6_do_redirect: packet too short\n"); 3373e8599ff4SDavid S. Miller return; 3374e8599ff4SDavid S. Miller } 3375e8599ff4SDavid S. Miller 337671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 msg = (struct rd_msg *)icmp6_hdr(skb); 3377e8599ff4SDavid S. Miller 337871bcdba0SYOSHIFUJI Hideaki / 吉藤英明 if (ipv6_addr_is_multicast(&msg->dest)) { 33796e157b6aSDavid S. Miller net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n"); 3380e8599ff4SDavid S. Miller return; 3381e8599ff4SDavid S. Miller } 3382e8599ff4SDavid S. Miller 33836e157b6aSDavid S. Miller on_link = 0; 338471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 if (ipv6_addr_equal(&msg->dest, &msg->target)) { 3385e8599ff4SDavid S. Miller on_link = 1; 338671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 } else if (ipv6_addr_type(&msg->target) != 3387e8599ff4SDavid S. Miller (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { 33886e157b6aSDavid S. Miller net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n"); 3389e8599ff4SDavid S. Miller return; 3390e8599ff4SDavid S. Miller } 3391e8599ff4SDavid S. Miller 3392e8599ff4SDavid S. Miller in6_dev = __in6_dev_get(skb->dev); 3393e8599ff4SDavid S. Miller if (!in6_dev) 3394e8599ff4SDavid S. Miller return; 3395e8599ff4SDavid S. Miller if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) 3396e8599ff4SDavid S. Miller return; 3397e8599ff4SDavid S. Miller 3398e8599ff4SDavid S. Miller /* RFC2461 8.1: 3399e8599ff4SDavid S. Miller * The IP source address of the Redirect MUST be the same as the current 3400e8599ff4SDavid S. Miller * first-hop router for the specified ICMP Destination Address. 3401e8599ff4SDavid S. Miller */ 3402e8599ff4SDavid S. Miller 3403f997c55cSAlexander Aring if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) { 3404e8599ff4SDavid S. Miller net_dbg_ratelimited("rt6_redirect: invalid ND options\n"); 3405e8599ff4SDavid S. Miller return; 3406e8599ff4SDavid S. Miller } 34076e157b6aSDavid S. Miller 34086e157b6aSDavid S. Miller lladdr = NULL; 3409e8599ff4SDavid S. Miller if (ndopts.nd_opts_tgt_lladdr) { 3410e8599ff4SDavid S. Miller lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, 3411e8599ff4SDavid S. Miller skb->dev); 3412e8599ff4SDavid S. Miller if (!lladdr) { 3413e8599ff4SDavid S. Miller net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n"); 3414e8599ff4SDavid S. Miller return; 3415e8599ff4SDavid S. Miller } 3416e8599ff4SDavid S. Miller } 3417e8599ff4SDavid S. Miller 34186e157b6aSDavid S. Miller rt = (struct rt6_info *) dst; 3419ec13ad1dSMatthias Schiffer if (rt->rt6i_flags & RTF_REJECT) { 34206e157b6aSDavid S. Miller net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n"); 34216e157b6aSDavid S. Miller return; 34226e157b6aSDavid S. Miller } 34236e157b6aSDavid S. Miller 34246e157b6aSDavid S. Miller /* Redirect received -> path was valid. 34256e157b6aSDavid S. Miller * Look, redirects are sent only in response to data packets, 34266e157b6aSDavid S. Miller * so that this nexthop apparently is reachable. --ANK 34276e157b6aSDavid S. Miller */ 34280dec879fSJulian Anastasov dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr); 34296e157b6aSDavid S. Miller 343071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1); 3431e8599ff4SDavid S. Miller if (!neigh) 3432e8599ff4SDavid S. Miller return; 3433e8599ff4SDavid S. Miller 34341da177e4SLinus Torvalds /* 34351da177e4SLinus Torvalds * We have finally decided to accept it. 34361da177e4SLinus Torvalds */ 34371da177e4SLinus Torvalds 3438f997c55cSAlexander Aring ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, 34391da177e4SLinus Torvalds NEIGH_UPDATE_F_WEAK_OVERRIDE| 34401da177e4SLinus Torvalds NEIGH_UPDATE_F_OVERRIDE| 34411da177e4SLinus Torvalds (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER| 3442f997c55cSAlexander Aring NEIGH_UPDATE_F_ISROUTER)), 3443f997c55cSAlexander Aring NDISC_REDIRECT, &ndopts); 34441da177e4SLinus Torvalds 34454d85cd0cSDavid Ahern rcu_read_lock(); 3446a68886a6SDavid Ahern from = rcu_dereference(rt->from); 3447e873e4b9SWei Wang /* This fib6_info_hold() is safe here because we hold reference to rt 3448e873e4b9SWei Wang * and rt already holds reference to fib6_info. 3449e873e4b9SWei Wang */ 34508a14e46fSDavid Ahern fib6_info_hold(from); 34514d85cd0cSDavid Ahern rcu_read_unlock(); 34528a14e46fSDavid Ahern 34538a14e46fSDavid Ahern nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); 345438308473SDavid S. Miller if (!nrt) 34551da177e4SLinus Torvalds goto out; 34561da177e4SLinus Torvalds 34571da177e4SLinus Torvalds nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE; 34581da177e4SLinus Torvalds if (on_link) 34591da177e4SLinus Torvalds nrt->rt6i_flags &= ~RTF_GATEWAY; 34601da177e4SLinus Torvalds 34614e3fd7a0SAlexey Dobriyan nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; 34621da177e4SLinus Torvalds 34632b760fcfSWei Wang /* No need to remove rt from the exception table if rt is 34642b760fcfSWei Wang * a cached route because rt6_insert_exception() will 34652b760fcfSWei Wang * takes care of it 34662b760fcfSWei Wang */ 34678a14e46fSDavid Ahern if (rt6_insert_exception(nrt, from)) { 34682b760fcfSWei Wang dst_release_immediate(&nrt->dst); 34692b760fcfSWei Wang goto out; 34702b760fcfSWei Wang } 34711da177e4SLinus Torvalds 3472d8d1f30bSChangli Gao netevent.old = &rt->dst; 3473d8d1f30bSChangli Gao netevent.new = &nrt->dst; 347471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 netevent.daddr = &msg->dest; 347560592833SYOSHIFUJI Hideaki / 吉藤英明 netevent.neigh = neigh; 34768d71740cSTom Tucker call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); 34778d71740cSTom Tucker 34781da177e4SLinus Torvalds out: 34798a14e46fSDavid Ahern fib6_info_release(from); 3480e8599ff4SDavid S. Miller neigh_release(neigh); 34816e157b6aSDavid S. Miller } 34826e157b6aSDavid S. Miller 348370ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO 34848d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net, 3485b71d1d42SEric Dumazet const struct in6_addr *prefix, int prefixlen, 3486830218c1SDavid Ahern const struct in6_addr *gwaddr, 3487830218c1SDavid Ahern struct net_device *dev) 348870ceb4f5SYOSHIFUJI Hideaki { 3489830218c1SDavid Ahern u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO; 3490830218c1SDavid Ahern int ifindex = dev->ifindex; 349170ceb4f5SYOSHIFUJI Hideaki struct fib6_node *fn; 34928d1c802bSDavid Ahern struct fib6_info *rt = NULL; 3493c71099acSThomas Graf struct fib6_table *table; 349470ceb4f5SYOSHIFUJI Hideaki 3495830218c1SDavid Ahern table = fib6_get_table(net, tb_id); 349638308473SDavid S. Miller if (!table) 3497c71099acSThomas Graf return NULL; 3498c71099acSThomas Graf 349966f5d6ceSWei Wang rcu_read_lock(); 350038fbeeeeSWei Wang fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); 350170ceb4f5SYOSHIFUJI Hideaki if (!fn) 350270ceb4f5SYOSHIFUJI Hideaki goto out; 350370ceb4f5SYOSHIFUJI Hideaki 350466f5d6ceSWei Wang for_each_fib6_node_rt_rcu(fn) { 3505ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) 350670ceb4f5SYOSHIFUJI Hideaki continue; 35072b2450caSDavid Ahern if (!(rt->fib6_flags & RTF_ROUTEINFO) || 3508bdf00467SDavid Ahern !rt->fib6_nh.fib_nh_gw_family) 350970ceb4f5SYOSHIFUJI Hideaki continue; 3510ad1601aeSDavid Ahern if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) 351170ceb4f5SYOSHIFUJI Hideaki continue; 3512e873e4b9SWei Wang if (!fib6_info_hold_safe(rt)) 3513e873e4b9SWei Wang continue; 351470ceb4f5SYOSHIFUJI Hideaki break; 351570ceb4f5SYOSHIFUJI Hideaki } 351670ceb4f5SYOSHIFUJI Hideaki out: 351766f5d6ceSWei Wang rcu_read_unlock(); 351870ceb4f5SYOSHIFUJI Hideaki return rt; 351970ceb4f5SYOSHIFUJI Hideaki } 352070ceb4f5SYOSHIFUJI Hideaki 35218d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net, 3522b71d1d42SEric Dumazet const struct in6_addr *prefix, int prefixlen, 3523830218c1SDavid Ahern const struct in6_addr *gwaddr, 3524830218c1SDavid Ahern struct net_device *dev, 352595c96174SEric Dumazet unsigned int pref) 352670ceb4f5SYOSHIFUJI Hideaki { 352786872cb5SThomas Graf struct fib6_config cfg = { 3528238fc7eaSRami Rosen .fc_metric = IP6_RT_PRIO_USER, 3529830218c1SDavid Ahern .fc_ifindex = dev->ifindex, 353086872cb5SThomas Graf .fc_dst_len = prefixlen, 353186872cb5SThomas Graf .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | 353286872cb5SThomas Graf RTF_UP | RTF_PREF(pref), 3533b91d5329SXin Long .fc_protocol = RTPROT_RA, 3534e8478e80SDavid Ahern .fc_type = RTN_UNICAST, 353515e47304SEric W. Biederman .fc_nlinfo.portid = 0, 3536efa2cea0SDaniel Lezcano .fc_nlinfo.nlh = NULL, 3537efa2cea0SDaniel Lezcano .fc_nlinfo.nl_net = net, 353886872cb5SThomas Graf }; 353970ceb4f5SYOSHIFUJI Hideaki 3540830218c1SDavid Ahern cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO, 35414e3fd7a0SAlexey Dobriyan cfg.fc_dst = *prefix; 35424e3fd7a0SAlexey Dobriyan cfg.fc_gateway = *gwaddr; 354386872cb5SThomas Graf 3544e317da96SYOSHIFUJI Hideaki /* We should treat it as a default route if prefix length is 0. */ 3545e317da96SYOSHIFUJI Hideaki if (!prefixlen) 354686872cb5SThomas Graf cfg.fc_flags |= RTF_DEFAULT; 354770ceb4f5SYOSHIFUJI Hideaki 3548acb54e3cSDavid Ahern ip6_route_add(&cfg, GFP_ATOMIC, NULL); 354970ceb4f5SYOSHIFUJI Hideaki 3550830218c1SDavid Ahern return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev); 355170ceb4f5SYOSHIFUJI Hideaki } 355270ceb4f5SYOSHIFUJI Hideaki #endif 355370ceb4f5SYOSHIFUJI Hideaki 35548d1c802bSDavid Ahern struct fib6_info *rt6_get_dflt_router(struct net *net, 3555afb1d4b5SDavid Ahern const struct in6_addr *addr, 3556afb1d4b5SDavid Ahern struct net_device *dev) 35571da177e4SLinus Torvalds { 3558830218c1SDavid Ahern u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT; 35598d1c802bSDavid Ahern struct fib6_info *rt; 3560c71099acSThomas Graf struct fib6_table *table; 35611da177e4SLinus Torvalds 3562afb1d4b5SDavid Ahern table = fib6_get_table(net, tb_id); 356338308473SDavid S. Miller if (!table) 3564c71099acSThomas Graf return NULL; 35651da177e4SLinus Torvalds 356666f5d6ceSWei Wang rcu_read_lock(); 356766f5d6ceSWei Wang for_each_fib6_node_rt_rcu(&table->tb6_root) { 3568ad1601aeSDavid Ahern struct fib6_nh *nh = &rt->fib6_nh; 3569ad1601aeSDavid Ahern 3570ad1601aeSDavid Ahern if (dev == nh->fib_nh_dev && 357193c2fb25SDavid Ahern ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && 3572ad1601aeSDavid Ahern ipv6_addr_equal(&nh->fib_nh_gw6, addr)) 35731da177e4SLinus Torvalds break; 35741da177e4SLinus Torvalds } 3575e873e4b9SWei Wang if (rt && !fib6_info_hold_safe(rt)) 3576e873e4b9SWei Wang rt = NULL; 357766f5d6ceSWei Wang rcu_read_unlock(); 35781da177e4SLinus Torvalds return rt; 35791da177e4SLinus Torvalds } 35801da177e4SLinus Torvalds 35818d1c802bSDavid Ahern struct fib6_info *rt6_add_dflt_router(struct net *net, 3582afb1d4b5SDavid Ahern const struct in6_addr *gwaddr, 3583ebacaaa0SYOSHIFUJI Hideaki struct net_device *dev, 3584ebacaaa0SYOSHIFUJI Hideaki unsigned int pref) 35851da177e4SLinus Torvalds { 358686872cb5SThomas Graf struct fib6_config cfg = { 3587ca254490SDavid Ahern .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT, 3588238fc7eaSRami Rosen .fc_metric = IP6_RT_PRIO_USER, 358986872cb5SThomas Graf .fc_ifindex = dev->ifindex, 359086872cb5SThomas Graf .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 359186872cb5SThomas Graf RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 3592b91d5329SXin Long .fc_protocol = RTPROT_RA, 3593e8478e80SDavid Ahern .fc_type = RTN_UNICAST, 359415e47304SEric W. Biederman .fc_nlinfo.portid = 0, 35955578689aSDaniel Lezcano .fc_nlinfo.nlh = NULL, 3596afb1d4b5SDavid Ahern .fc_nlinfo.nl_net = net, 359786872cb5SThomas Graf }; 35981da177e4SLinus Torvalds 35994e3fd7a0SAlexey Dobriyan cfg.fc_gateway = *gwaddr; 36001da177e4SLinus Torvalds 3601acb54e3cSDavid Ahern if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) { 3602830218c1SDavid Ahern struct fib6_table *table; 3603830218c1SDavid Ahern 3604830218c1SDavid Ahern table = fib6_get_table(dev_net(dev), cfg.fc_table); 3605830218c1SDavid Ahern if (table) 3606830218c1SDavid Ahern table->flags |= RT6_TABLE_HAS_DFLT_ROUTER; 3607830218c1SDavid Ahern } 36081da177e4SLinus Torvalds 3609afb1d4b5SDavid Ahern return rt6_get_dflt_router(net, gwaddr, dev); 36101da177e4SLinus Torvalds } 36111da177e4SLinus Torvalds 3612afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net, 3613afb1d4b5SDavid Ahern struct fib6_table *table) 36141da177e4SLinus Torvalds { 36158d1c802bSDavid Ahern struct fib6_info *rt; 36161da177e4SLinus Torvalds 36171da177e4SLinus Torvalds restart: 361866f5d6ceSWei Wang rcu_read_lock(); 361966f5d6ceSWei Wang for_each_fib6_node_rt_rcu(&table->tb6_root) { 3620dcd1f572SDavid Ahern struct net_device *dev = fib6_info_nh_dev(rt); 3621dcd1f572SDavid Ahern struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL; 3622dcd1f572SDavid Ahern 362393c2fb25SDavid Ahern if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) && 3624e873e4b9SWei Wang (!idev || idev->cnf.accept_ra != 2) && 3625e873e4b9SWei Wang fib6_info_hold_safe(rt)) { 362666f5d6ceSWei Wang rcu_read_unlock(); 3627afb1d4b5SDavid Ahern ip6_del_rt(net, rt); 36281da177e4SLinus Torvalds goto restart; 36291da177e4SLinus Torvalds } 36301da177e4SLinus Torvalds } 363166f5d6ceSWei Wang rcu_read_unlock(); 3632830218c1SDavid Ahern 3633830218c1SDavid Ahern table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; 3634830218c1SDavid Ahern } 3635830218c1SDavid Ahern 3636830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net) 3637830218c1SDavid Ahern { 3638830218c1SDavid Ahern struct fib6_table *table; 3639830218c1SDavid Ahern struct hlist_head *head; 3640830218c1SDavid Ahern unsigned int h; 3641830218c1SDavid Ahern 3642830218c1SDavid Ahern rcu_read_lock(); 3643830218c1SDavid Ahern 3644830218c1SDavid Ahern for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { 3645830218c1SDavid Ahern head = &net->ipv6.fib_table_hash[h]; 3646830218c1SDavid Ahern hlist_for_each_entry_rcu(table, head, tb6_hlist) { 3647830218c1SDavid Ahern if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER) 3648afb1d4b5SDavid Ahern __rt6_purge_dflt_routers(net, table); 3649830218c1SDavid Ahern } 3650830218c1SDavid Ahern } 3651830218c1SDavid Ahern 3652830218c1SDavid Ahern rcu_read_unlock(); 36531da177e4SLinus Torvalds } 36541da177e4SLinus Torvalds 36555578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net, 36565578689aSDaniel Lezcano struct in6_rtmsg *rtmsg, 365786872cb5SThomas Graf struct fib6_config *cfg) 365886872cb5SThomas Graf { 36598823a3acSMaciej Żenczykowski *cfg = (struct fib6_config){ 36608823a3acSMaciej Żenczykowski .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? 36618823a3acSMaciej Żenczykowski : RT6_TABLE_MAIN, 36628823a3acSMaciej Żenczykowski .fc_ifindex = rtmsg->rtmsg_ifindex, 366367f69513SDavid Ahern .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, 36648823a3acSMaciej Żenczykowski .fc_expires = rtmsg->rtmsg_info, 36658823a3acSMaciej Żenczykowski .fc_dst_len = rtmsg->rtmsg_dst_len, 36668823a3acSMaciej Żenczykowski .fc_src_len = rtmsg->rtmsg_src_len, 36678823a3acSMaciej Żenczykowski .fc_flags = rtmsg->rtmsg_flags, 36688823a3acSMaciej Żenczykowski .fc_type = rtmsg->rtmsg_type, 366986872cb5SThomas Graf 36708823a3acSMaciej Żenczykowski .fc_nlinfo.nl_net = net, 367186872cb5SThomas Graf 36728823a3acSMaciej Żenczykowski .fc_dst = rtmsg->rtmsg_dst, 36738823a3acSMaciej Żenczykowski .fc_src = rtmsg->rtmsg_src, 36748823a3acSMaciej Żenczykowski .fc_gateway = rtmsg->rtmsg_gateway, 36758823a3acSMaciej Żenczykowski }; 367686872cb5SThomas Graf } 367786872cb5SThomas Graf 36785578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 36791da177e4SLinus Torvalds { 368086872cb5SThomas Graf struct fib6_config cfg; 36811da177e4SLinus Torvalds struct in6_rtmsg rtmsg; 36821da177e4SLinus Torvalds int err; 36831da177e4SLinus Torvalds 36841da177e4SLinus Torvalds switch (cmd) { 36851da177e4SLinus Torvalds case SIOCADDRT: /* Add a route */ 36861da177e4SLinus Torvalds case SIOCDELRT: /* Delete a route */ 3687af31f412SEric W. Biederman if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) 36881da177e4SLinus Torvalds return -EPERM; 36891da177e4SLinus Torvalds err = copy_from_user(&rtmsg, arg, 36901da177e4SLinus Torvalds sizeof(struct in6_rtmsg)); 36911da177e4SLinus Torvalds if (err) 36921da177e4SLinus Torvalds return -EFAULT; 36931da177e4SLinus Torvalds 36945578689aSDaniel Lezcano rtmsg_to_fib6_config(net, &rtmsg, &cfg); 369586872cb5SThomas Graf 36961da177e4SLinus Torvalds rtnl_lock(); 36971da177e4SLinus Torvalds switch (cmd) { 36981da177e4SLinus Torvalds case SIOCADDRT: 3699acb54e3cSDavid Ahern err = ip6_route_add(&cfg, GFP_KERNEL, NULL); 37001da177e4SLinus Torvalds break; 37011da177e4SLinus Torvalds case SIOCDELRT: 3702333c4301SDavid Ahern err = ip6_route_del(&cfg, NULL); 37031da177e4SLinus Torvalds break; 37041da177e4SLinus Torvalds default: 37051da177e4SLinus Torvalds err = -EINVAL; 37061da177e4SLinus Torvalds } 37071da177e4SLinus Torvalds rtnl_unlock(); 37081da177e4SLinus Torvalds 37091da177e4SLinus Torvalds return err; 37103ff50b79SStephen Hemminger } 37111da177e4SLinus Torvalds 37121da177e4SLinus Torvalds return -EINVAL; 37131da177e4SLinus Torvalds } 37141da177e4SLinus Torvalds 37151da177e4SLinus Torvalds /* 37161da177e4SLinus Torvalds * Drop the packet on the floor 37171da177e4SLinus Torvalds */ 37181da177e4SLinus Torvalds 3719d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) 37201da177e4SLinus Torvalds { 3721612f09e8SYOSHIFUJI Hideaki int type; 3722adf30907SEric Dumazet struct dst_entry *dst = skb_dst(skb); 3723612f09e8SYOSHIFUJI Hideaki switch (ipstats_mib_noroutes) { 3724612f09e8SYOSHIFUJI Hideaki case IPSTATS_MIB_INNOROUTES: 37250660e03fSArnaldo Carvalho de Melo type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); 372645bb0060SUlrich Weber if (type == IPV6_ADDR_ANY) { 3727bdb7cc64SStephen Suryaputra IP6_INC_STATS(dev_net(dst->dev), 3728bdb7cc64SStephen Suryaputra __in6_dev_get_safely(skb->dev), 37293bd653c8SDenis V. Lunev IPSTATS_MIB_INADDRERRORS); 3730612f09e8SYOSHIFUJI Hideaki break; 3731612f09e8SYOSHIFUJI Hideaki } 3732612f09e8SYOSHIFUJI Hideaki /* FALLTHROUGH */ 3733612f09e8SYOSHIFUJI Hideaki case IPSTATS_MIB_OUTNOROUTES: 37343bd653c8SDenis V. Lunev IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), 37353bd653c8SDenis V. Lunev ipstats_mib_noroutes); 3736612f09e8SYOSHIFUJI Hideaki break; 3737612f09e8SYOSHIFUJI Hideaki } 37383ffe533cSAlexey Dobriyan icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); 37391da177e4SLinus Torvalds kfree_skb(skb); 37401da177e4SLinus Torvalds return 0; 37411da177e4SLinus Torvalds } 37421da177e4SLinus Torvalds 37439ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb) 37449ce8ade0SThomas Graf { 3745612f09e8SYOSHIFUJI Hideaki return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); 37469ce8ade0SThomas Graf } 37479ce8ade0SThomas Graf 3748ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) 37491da177e4SLinus Torvalds { 3750adf30907SEric Dumazet skb->dev = skb_dst(skb)->dev; 3751612f09e8SYOSHIFUJI Hideaki return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); 37521da177e4SLinus Torvalds } 37531da177e4SLinus Torvalds 37549ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb) 37559ce8ade0SThomas Graf { 3756612f09e8SYOSHIFUJI Hideaki return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); 37579ce8ade0SThomas Graf } 37589ce8ade0SThomas Graf 3759ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb) 37609ce8ade0SThomas Graf { 3761adf30907SEric Dumazet skb->dev = skb_dst(skb)->dev; 3762612f09e8SYOSHIFUJI Hideaki return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); 37639ce8ade0SThomas Graf } 37649ce8ade0SThomas Graf 37651da177e4SLinus Torvalds /* 37661da177e4SLinus Torvalds * Allocate a dst for local (unicast / anycast) address. 37671da177e4SLinus Torvalds */ 37681da177e4SLinus Torvalds 3769360a9887SDavid Ahern struct fib6_info *addrconf_f6i_alloc(struct net *net, 3770afb1d4b5SDavid Ahern struct inet6_dev *idev, 37711da177e4SLinus Torvalds const struct in6_addr *addr, 3772acb54e3cSDavid Ahern bool anycast, gfp_t gfp_flags) 37731da177e4SLinus Torvalds { 3774c7a1ce39SDavid Ahern struct fib6_config cfg = { 3775c7a1ce39SDavid Ahern .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, 3776c7a1ce39SDavid Ahern .fc_ifindex = idev->dev->ifindex, 3777c7a1ce39SDavid Ahern .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP, 3778c7a1ce39SDavid Ahern .fc_dst = *addr, 3779c7a1ce39SDavid Ahern .fc_dst_len = 128, 3780c7a1ce39SDavid Ahern .fc_protocol = RTPROT_KERNEL, 3781c7a1ce39SDavid Ahern .fc_nlinfo.nl_net = net, 3782c7a1ce39SDavid Ahern .fc_ignore_dev_down = true, 3783c7a1ce39SDavid Ahern }; 37845f02ce24SDavid Ahern 3785e8478e80SDavid Ahern if (anycast) { 3786c7a1ce39SDavid Ahern cfg.fc_type = RTN_ANYCAST; 3787c7a1ce39SDavid Ahern cfg.fc_flags |= RTF_ANYCAST; 3788e8478e80SDavid Ahern } else { 3789c7a1ce39SDavid Ahern cfg.fc_type = RTN_LOCAL; 3790c7a1ce39SDavid Ahern cfg.fc_flags |= RTF_LOCAL; 3791e8478e80SDavid Ahern } 37921da177e4SLinus Torvalds 3793c7a1ce39SDavid Ahern return ip6_route_info_create(&cfg, gfp_flags, NULL); 37941da177e4SLinus Torvalds } 37951da177e4SLinus Torvalds 3796c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */ 3797c3968a85SDaniel Walter struct arg_dev_net_ip { 3798c3968a85SDaniel Walter struct net_device *dev; 3799c3968a85SDaniel Walter struct net *net; 3800c3968a85SDaniel Walter struct in6_addr *addr; 3801c3968a85SDaniel Walter }; 3802c3968a85SDaniel Walter 38038d1c802bSDavid Ahern static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) 3804c3968a85SDaniel Walter { 3805c3968a85SDaniel Walter struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev; 3806c3968a85SDaniel Walter struct net *net = ((struct arg_dev_net_ip *)arg)->net; 3807c3968a85SDaniel Walter struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; 3808c3968a85SDaniel Walter 3809ad1601aeSDavid Ahern if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) && 3810421842edSDavid Ahern rt != net->ipv6.fib6_null_entry && 381193c2fb25SDavid Ahern ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { 381260006a48SWei Wang spin_lock_bh(&rt6_exception_lock); 3813c3968a85SDaniel Walter /* remove prefsrc entry */ 381493c2fb25SDavid Ahern rt->fib6_prefsrc.plen = 0; 381560006a48SWei Wang spin_unlock_bh(&rt6_exception_lock); 3816c3968a85SDaniel Walter } 3817c3968a85SDaniel Walter return 0; 3818c3968a85SDaniel Walter } 3819c3968a85SDaniel Walter 3820c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) 3821c3968a85SDaniel Walter { 3822c3968a85SDaniel Walter struct net *net = dev_net(ifp->idev->dev); 3823c3968a85SDaniel Walter struct arg_dev_net_ip adni = { 3824c3968a85SDaniel Walter .dev = ifp->idev->dev, 3825c3968a85SDaniel Walter .net = net, 3826c3968a85SDaniel Walter .addr = &ifp->addr, 3827c3968a85SDaniel Walter }; 38280c3584d5SLi RongQing fib6_clean_all(net, fib6_remove_prefsrc, &adni); 3829c3968a85SDaniel Walter } 3830c3968a85SDaniel Walter 38312b2450caSDavid Ahern #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT) 3832be7a010dSDuan Jiong 3833be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */ 38348d1c802bSDavid Ahern static int fib6_clean_tohost(struct fib6_info *rt, void *arg) 3835be7a010dSDuan Jiong { 3836be7a010dSDuan Jiong struct in6_addr *gateway = (struct in6_addr *)arg; 3837be7a010dSDuan Jiong 383893c2fb25SDavid Ahern if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && 3839bdf00467SDavid Ahern rt->fib6_nh.fib_nh_gw_family && 3840ad1601aeSDavid Ahern ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { 3841be7a010dSDuan Jiong return -1; 3842be7a010dSDuan Jiong } 3843b16cb459SWei Wang 3844b16cb459SWei Wang /* Further clean up cached routes in exception table. 3845b16cb459SWei Wang * This is needed because cached route may have a different 3846b16cb459SWei Wang * gateway than its 'parent' in the case of an ip redirect. 3847b16cb459SWei Wang */ 3848b16cb459SWei Wang rt6_exceptions_clean_tohost(rt, gateway); 3849b16cb459SWei Wang 3850be7a010dSDuan Jiong return 0; 3851be7a010dSDuan Jiong } 3852be7a010dSDuan Jiong 3853be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) 3854be7a010dSDuan Jiong { 3855be7a010dSDuan Jiong fib6_clean_all(net, fib6_clean_tohost, gateway); 3856be7a010dSDuan Jiong } 3857be7a010dSDuan Jiong 38582127d95aSIdo Schimmel struct arg_netdev_event { 38592127d95aSIdo Schimmel const struct net_device *dev; 38604c981e28SIdo Schimmel union { 38612127d95aSIdo Schimmel unsigned int nh_flags; 38624c981e28SIdo Schimmel unsigned long event; 38634c981e28SIdo Schimmel }; 38642127d95aSIdo Schimmel }; 38652127d95aSIdo Schimmel 38668d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) 3867d7dedee1SIdo Schimmel { 38688d1c802bSDavid Ahern struct fib6_info *iter; 3869d7dedee1SIdo Schimmel struct fib6_node *fn; 3870d7dedee1SIdo Schimmel 387193c2fb25SDavid Ahern fn = rcu_dereference_protected(rt->fib6_node, 387293c2fb25SDavid Ahern lockdep_is_held(&rt->fib6_table->tb6_lock)); 3873d7dedee1SIdo Schimmel iter = rcu_dereference_protected(fn->leaf, 387493c2fb25SDavid Ahern lockdep_is_held(&rt->fib6_table->tb6_lock)); 3875d7dedee1SIdo Schimmel while (iter) { 387693c2fb25SDavid Ahern if (iter->fib6_metric == rt->fib6_metric && 387733bd5ac5SDavid Ahern rt6_qualify_for_ecmp(iter)) 3878d7dedee1SIdo Schimmel return iter; 38798fb11a9aSDavid Ahern iter = rcu_dereference_protected(iter->fib6_next, 388093c2fb25SDavid Ahern lockdep_is_held(&rt->fib6_table->tb6_lock)); 3881d7dedee1SIdo Schimmel } 3882d7dedee1SIdo Schimmel 3883d7dedee1SIdo Schimmel return NULL; 3884d7dedee1SIdo Schimmel } 3885d7dedee1SIdo Schimmel 38868d1c802bSDavid Ahern static bool rt6_is_dead(const struct fib6_info *rt) 3887d7dedee1SIdo Schimmel { 3888ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD || 3889ad1601aeSDavid Ahern (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && 3890ad1601aeSDavid Ahern ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev))) 3891d7dedee1SIdo Schimmel return true; 3892d7dedee1SIdo Schimmel 3893d7dedee1SIdo Schimmel return false; 3894d7dedee1SIdo Schimmel } 3895d7dedee1SIdo Schimmel 38968d1c802bSDavid Ahern static int rt6_multipath_total_weight(const struct fib6_info *rt) 3897d7dedee1SIdo Schimmel { 38988d1c802bSDavid Ahern struct fib6_info *iter; 3899d7dedee1SIdo Schimmel int total = 0; 3900d7dedee1SIdo Schimmel 3901d7dedee1SIdo Schimmel if (!rt6_is_dead(rt)) 3902ad1601aeSDavid Ahern total += rt->fib6_nh.fib_nh_weight; 3903d7dedee1SIdo Schimmel 390493c2fb25SDavid Ahern list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { 3905d7dedee1SIdo Schimmel if (!rt6_is_dead(iter)) 3906ad1601aeSDavid Ahern total += iter->fib6_nh.fib_nh_weight; 3907d7dedee1SIdo Schimmel } 3908d7dedee1SIdo Schimmel 3909d7dedee1SIdo Schimmel return total; 3910d7dedee1SIdo Schimmel } 3911d7dedee1SIdo Schimmel 39128d1c802bSDavid Ahern static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) 3913d7dedee1SIdo Schimmel { 3914d7dedee1SIdo Schimmel int upper_bound = -1; 3915d7dedee1SIdo Schimmel 3916d7dedee1SIdo Schimmel if (!rt6_is_dead(rt)) { 3917ad1601aeSDavid Ahern *weight += rt->fib6_nh.fib_nh_weight; 3918d7dedee1SIdo Schimmel upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, 3919d7dedee1SIdo Schimmel total) - 1; 3920d7dedee1SIdo Schimmel } 3921ad1601aeSDavid Ahern atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound); 3922d7dedee1SIdo Schimmel } 3923d7dedee1SIdo Schimmel 39248d1c802bSDavid Ahern static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) 3925d7dedee1SIdo Schimmel { 39268d1c802bSDavid Ahern struct fib6_info *iter; 3927d7dedee1SIdo Schimmel int weight = 0; 3928d7dedee1SIdo Schimmel 3929d7dedee1SIdo Schimmel rt6_upper_bound_set(rt, &weight, total); 3930d7dedee1SIdo Schimmel 393193c2fb25SDavid Ahern list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 3932d7dedee1SIdo Schimmel rt6_upper_bound_set(iter, &weight, total); 3933d7dedee1SIdo Schimmel } 3934d7dedee1SIdo Schimmel 39358d1c802bSDavid Ahern void rt6_multipath_rebalance(struct fib6_info *rt) 3936d7dedee1SIdo Schimmel { 39378d1c802bSDavid Ahern struct fib6_info *first; 3938d7dedee1SIdo Schimmel int total; 3939d7dedee1SIdo Schimmel 3940d7dedee1SIdo Schimmel /* In case the entire multipath route was marked for flushing, 3941d7dedee1SIdo Schimmel * then there is no need to rebalance upon the removal of every 3942d7dedee1SIdo Schimmel * sibling route. 3943d7dedee1SIdo Schimmel */ 394493c2fb25SDavid Ahern if (!rt->fib6_nsiblings || rt->should_flush) 3945d7dedee1SIdo Schimmel return; 3946d7dedee1SIdo Schimmel 3947d7dedee1SIdo Schimmel /* During lookup routes are evaluated in order, so we need to 3948d7dedee1SIdo Schimmel * make sure upper bounds are assigned from the first sibling 3949d7dedee1SIdo Schimmel * onwards. 3950d7dedee1SIdo Schimmel */ 3951d7dedee1SIdo Schimmel first = rt6_multipath_first_sibling(rt); 3952d7dedee1SIdo Schimmel if (WARN_ON_ONCE(!first)) 3953d7dedee1SIdo Schimmel return; 3954d7dedee1SIdo Schimmel 3955d7dedee1SIdo Schimmel total = rt6_multipath_total_weight(first); 3956d7dedee1SIdo Schimmel rt6_multipath_upper_bound_set(first, total); 3957d7dedee1SIdo Schimmel } 3958d7dedee1SIdo Schimmel 39598d1c802bSDavid Ahern static int fib6_ifup(struct fib6_info *rt, void *p_arg) 39602127d95aSIdo Schimmel { 39612127d95aSIdo Schimmel const struct arg_netdev_event *arg = p_arg; 39627aef6859SDavid Ahern struct net *net = dev_net(arg->dev); 39632127d95aSIdo Schimmel 3964ad1601aeSDavid Ahern if (rt != net->ipv6.fib6_null_entry && 3965ad1601aeSDavid Ahern rt->fib6_nh.fib_nh_dev == arg->dev) { 3966ad1601aeSDavid Ahern rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags; 39677aef6859SDavid Ahern fib6_update_sernum_upto_root(net, rt); 3968d7dedee1SIdo Schimmel rt6_multipath_rebalance(rt); 39691de178edSIdo Schimmel } 39702127d95aSIdo Schimmel 39712127d95aSIdo Schimmel return 0; 39722127d95aSIdo Schimmel } 39732127d95aSIdo Schimmel 39742127d95aSIdo Schimmel void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) 39752127d95aSIdo Schimmel { 39762127d95aSIdo Schimmel struct arg_netdev_event arg = { 39772127d95aSIdo Schimmel .dev = dev, 39786802f3adSIdo Schimmel { 39792127d95aSIdo Schimmel .nh_flags = nh_flags, 39806802f3adSIdo Schimmel }, 39812127d95aSIdo Schimmel }; 39822127d95aSIdo Schimmel 39832127d95aSIdo Schimmel if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev)) 39842127d95aSIdo Schimmel arg.nh_flags |= RTNH_F_LINKDOWN; 39852127d95aSIdo Schimmel 39862127d95aSIdo Schimmel fib6_clean_all(dev_net(dev), fib6_ifup, &arg); 39872127d95aSIdo Schimmel } 39882127d95aSIdo Schimmel 39898d1c802bSDavid Ahern static bool rt6_multipath_uses_dev(const struct fib6_info *rt, 39901de178edSIdo Schimmel const struct net_device *dev) 39911de178edSIdo Schimmel { 39928d1c802bSDavid Ahern struct fib6_info *iter; 39931de178edSIdo Schimmel 3994ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_dev == dev) 39951de178edSIdo Schimmel return true; 399693c2fb25SDavid Ahern list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 3997ad1601aeSDavid Ahern if (iter->fib6_nh.fib_nh_dev == dev) 39981de178edSIdo Schimmel return true; 39991de178edSIdo Schimmel 40001de178edSIdo Schimmel return false; 40011de178edSIdo Schimmel } 40021de178edSIdo Schimmel 40038d1c802bSDavid Ahern static void rt6_multipath_flush(struct fib6_info *rt) 40041de178edSIdo Schimmel { 40058d1c802bSDavid Ahern struct fib6_info *iter; 40061de178edSIdo Schimmel 40071de178edSIdo Schimmel rt->should_flush = 1; 400893c2fb25SDavid Ahern list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 40091de178edSIdo Schimmel iter->should_flush = 1; 40101de178edSIdo Schimmel } 40111de178edSIdo Schimmel 40128d1c802bSDavid Ahern static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, 40131de178edSIdo Schimmel const struct net_device *down_dev) 40141de178edSIdo Schimmel { 40158d1c802bSDavid Ahern struct fib6_info *iter; 40161de178edSIdo Schimmel unsigned int dead = 0; 40171de178edSIdo Schimmel 4018ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_dev == down_dev || 4019ad1601aeSDavid Ahern rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 40201de178edSIdo Schimmel dead++; 402193c2fb25SDavid Ahern list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4022ad1601aeSDavid Ahern if (iter->fib6_nh.fib_nh_dev == down_dev || 4023ad1601aeSDavid Ahern iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD) 40241de178edSIdo Schimmel dead++; 40251de178edSIdo Schimmel 40261de178edSIdo Schimmel return dead; 40271de178edSIdo Schimmel } 40281de178edSIdo Schimmel 40298d1c802bSDavid Ahern static void rt6_multipath_nh_flags_set(struct fib6_info *rt, 40301de178edSIdo Schimmel const struct net_device *dev, 40311de178edSIdo Schimmel unsigned int nh_flags) 40321de178edSIdo Schimmel { 40338d1c802bSDavid Ahern struct fib6_info *iter; 40341de178edSIdo Schimmel 4035ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_dev == dev) 4036ad1601aeSDavid Ahern rt->fib6_nh.fib_nh_flags |= nh_flags; 403793c2fb25SDavid Ahern list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) 4038ad1601aeSDavid Ahern if (iter->fib6_nh.fib_nh_dev == dev) 4039ad1601aeSDavid Ahern iter->fib6_nh.fib_nh_flags |= nh_flags; 40401de178edSIdo Schimmel } 40411de178edSIdo Schimmel 4042a1a22c12SDavid Ahern /* called with write lock held for table with rt */ 40438d1c802bSDavid Ahern static int fib6_ifdown(struct fib6_info *rt, void *p_arg) 40441da177e4SLinus Torvalds { 40454c981e28SIdo Schimmel const struct arg_netdev_event *arg = p_arg; 40464c981e28SIdo Schimmel const struct net_device *dev = arg->dev; 40477aef6859SDavid Ahern struct net *net = dev_net(dev); 40488ed67789SDaniel Lezcano 4049421842edSDavid Ahern if (rt == net->ipv6.fib6_null_entry) 405027c6fa73SIdo Schimmel return 0; 405127c6fa73SIdo Schimmel 405227c6fa73SIdo Schimmel switch (arg->event) { 405327c6fa73SIdo Schimmel case NETDEV_UNREGISTER: 4054ad1601aeSDavid Ahern return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; 405527c6fa73SIdo Schimmel case NETDEV_DOWN: 40561de178edSIdo Schimmel if (rt->should_flush) 405727c6fa73SIdo Schimmel return -1; 405893c2fb25SDavid Ahern if (!rt->fib6_nsiblings) 4059ad1601aeSDavid Ahern return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; 40601de178edSIdo Schimmel if (rt6_multipath_uses_dev(rt, dev)) { 40611de178edSIdo Schimmel unsigned int count; 40621de178edSIdo Schimmel 40631de178edSIdo Schimmel count = rt6_multipath_dead_count(rt, dev); 406493c2fb25SDavid Ahern if (rt->fib6_nsiblings + 1 == count) { 40651de178edSIdo Schimmel rt6_multipath_flush(rt); 40661de178edSIdo Schimmel return -1; 40671de178edSIdo Schimmel } 40681de178edSIdo Schimmel rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD | 40691de178edSIdo Schimmel RTNH_F_LINKDOWN); 40707aef6859SDavid Ahern fib6_update_sernum(net, rt); 4071d7dedee1SIdo Schimmel rt6_multipath_rebalance(rt); 40721de178edSIdo Schimmel } 40731de178edSIdo Schimmel return -2; 407427c6fa73SIdo Schimmel case NETDEV_CHANGE: 4075ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_dev != dev || 407693c2fb25SDavid Ahern rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) 407727c6fa73SIdo Schimmel break; 4078ad1601aeSDavid Ahern rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN; 4079d7dedee1SIdo Schimmel rt6_multipath_rebalance(rt); 408027c6fa73SIdo Schimmel break; 40812b241361SIdo Schimmel } 4082c159d30cSDavid S. Miller 40831da177e4SLinus Torvalds return 0; 40841da177e4SLinus Torvalds } 40851da177e4SLinus Torvalds 408627c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event) 40871da177e4SLinus Torvalds { 40884c981e28SIdo Schimmel struct arg_netdev_event arg = { 40898ed67789SDaniel Lezcano .dev = dev, 40906802f3adSIdo Schimmel { 40914c981e28SIdo Schimmel .event = event, 40926802f3adSIdo Schimmel }, 40938ed67789SDaniel Lezcano }; 40947c6bb7d2SDavid Ahern struct net *net = dev_net(dev); 40958ed67789SDaniel Lezcano 40967c6bb7d2SDavid Ahern if (net->ipv6.sysctl.skip_notify_on_dev_down) 40977c6bb7d2SDavid Ahern fib6_clean_all_skip_notify(net, fib6_ifdown, &arg); 40987c6bb7d2SDavid Ahern else 40997c6bb7d2SDavid Ahern fib6_clean_all(net, fib6_ifdown, &arg); 41004c981e28SIdo Schimmel } 41014c981e28SIdo Schimmel 41024c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event) 41034c981e28SIdo Schimmel { 41044c981e28SIdo Schimmel rt6_sync_down_dev(dev, event); 41054c981e28SIdo Schimmel rt6_uncached_list_flush_dev(dev_net(dev), dev); 41064c981e28SIdo Schimmel neigh_ifdown(&nd_tbl, dev); 41071da177e4SLinus Torvalds } 41081da177e4SLinus Torvalds 410995c96174SEric Dumazet struct rt6_mtu_change_arg { 41101da177e4SLinus Torvalds struct net_device *dev; 411195c96174SEric Dumazet unsigned int mtu; 41121da177e4SLinus Torvalds }; 41131da177e4SLinus Torvalds 41148d1c802bSDavid Ahern static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) 41151da177e4SLinus Torvalds { 41161da177e4SLinus Torvalds struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 41171da177e4SLinus Torvalds struct inet6_dev *idev; 41181da177e4SLinus Torvalds 41191da177e4SLinus Torvalds /* In IPv6 pmtu discovery is not optional, 41201da177e4SLinus Torvalds so that RTAX_MTU lock cannot disable it. 41211da177e4SLinus Torvalds We still use this lock to block changes 41221da177e4SLinus Torvalds caused by addrconf/ndisc. 41231da177e4SLinus Torvalds */ 41241da177e4SLinus Torvalds 41251da177e4SLinus Torvalds idev = __in6_dev_get(arg->dev); 412638308473SDavid S. Miller if (!idev) 41271da177e4SLinus Torvalds return 0; 41281da177e4SLinus Torvalds 41291da177e4SLinus Torvalds /* For administrative MTU increase, there is no way to discover 41301da177e4SLinus Torvalds IPv6 PMTU increase, so PMTU increase should be updated here. 41311da177e4SLinus Torvalds Since RFC 1981 doesn't include administrative MTU increase 41321da177e4SLinus Torvalds update PMTU increase is a MUST. (i.e. jumbo frame) 41331da177e4SLinus Torvalds */ 4134ad1601aeSDavid Ahern if (rt->fib6_nh.fib_nh_dev == arg->dev && 4135d4ead6b3SDavid Ahern !fib6_metric_locked(rt, RTAX_MTU)) { 4136d4ead6b3SDavid Ahern u32 mtu = rt->fib6_pmtu; 4137d4ead6b3SDavid Ahern 4138d4ead6b3SDavid Ahern if (mtu >= arg->mtu || 4139d4ead6b3SDavid Ahern (mtu < arg->mtu && mtu == idev->cnf.mtu6)) 4140d4ead6b3SDavid Ahern fib6_metric_set(rt, RTAX_MTU, arg->mtu); 4141d4ead6b3SDavid Ahern 4142f5bbe7eeSWei Wang spin_lock_bh(&rt6_exception_lock); 4143e9fa1495SStefano Brivio rt6_exceptions_update_pmtu(idev, rt, arg->mtu); 4144f5bbe7eeSWei Wang spin_unlock_bh(&rt6_exception_lock); 41454b32b5adSMartin KaFai Lau } 41461da177e4SLinus Torvalds return 0; 41471da177e4SLinus Torvalds } 41481da177e4SLinus Torvalds 414995c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu) 41501da177e4SLinus Torvalds { 4151c71099acSThomas Graf struct rt6_mtu_change_arg arg = { 4152c71099acSThomas Graf .dev = dev, 4153c71099acSThomas Graf .mtu = mtu, 4154c71099acSThomas Graf }; 41551da177e4SLinus Torvalds 41560c3584d5SLi RongQing fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); 41571da177e4SLinus Torvalds } 41581da177e4SLinus Torvalds 4159ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { 41605176f91eSThomas Graf [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, 4161aa8f8778SEric Dumazet [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) }, 416286872cb5SThomas Graf [RTA_OIF] = { .type = NLA_U32 }, 4163ab364a6fSThomas Graf [RTA_IIF] = { .type = NLA_U32 }, 416486872cb5SThomas Graf [RTA_PRIORITY] = { .type = NLA_U32 }, 416586872cb5SThomas Graf [RTA_METRICS] = { .type = NLA_NESTED }, 416651ebd318SNicolas Dichtel [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 4167c78ba6d6SLubomir Rintel [RTA_PREF] = { .type = NLA_U8 }, 416819e42e45SRoopa Prabhu [RTA_ENCAP_TYPE] = { .type = NLA_U16 }, 416919e42e45SRoopa Prabhu [RTA_ENCAP] = { .type = NLA_NESTED }, 417032bc201eSXin Long [RTA_EXPIRES] = { .type = NLA_U32 }, 4171622ec2c9SLorenzo Colitti [RTA_UID] = { .type = NLA_U32 }, 41723b45a410SLiping Zhang [RTA_MARK] = { .type = NLA_U32 }, 4173aa8f8778SEric Dumazet [RTA_TABLE] = { .type = NLA_U32 }, 4174eacb9384SRoopa Prabhu [RTA_IP_PROTO] = { .type = NLA_U8 }, 4175eacb9384SRoopa Prabhu [RTA_SPORT] = { .type = NLA_U16 }, 4176eacb9384SRoopa Prabhu [RTA_DPORT] = { .type = NLA_U16 }, 417786872cb5SThomas Graf }; 417886872cb5SThomas Graf 417986872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, 4180333c4301SDavid Ahern struct fib6_config *cfg, 4181333c4301SDavid Ahern struct netlink_ext_ack *extack) 41821da177e4SLinus Torvalds { 418386872cb5SThomas Graf struct rtmsg *rtm; 418486872cb5SThomas Graf struct nlattr *tb[RTA_MAX+1]; 4185c78ba6d6SLubomir Rintel unsigned int pref; 418686872cb5SThomas Graf int err; 41871da177e4SLinus Torvalds 4188fceb6435SJohannes Berg err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, 4189dac9c979SDavid Ahern extack); 419086872cb5SThomas Graf if (err < 0) 419186872cb5SThomas Graf goto errout; 41921da177e4SLinus Torvalds 419386872cb5SThomas Graf err = -EINVAL; 419486872cb5SThomas Graf rtm = nlmsg_data(nlh); 419586872cb5SThomas Graf 419684db8407SMaciej Żenczykowski *cfg = (struct fib6_config){ 419784db8407SMaciej Żenczykowski .fc_table = rtm->rtm_table, 419884db8407SMaciej Żenczykowski .fc_dst_len = rtm->rtm_dst_len, 419984db8407SMaciej Żenczykowski .fc_src_len = rtm->rtm_src_len, 420084db8407SMaciej Żenczykowski .fc_flags = RTF_UP, 420184db8407SMaciej Żenczykowski .fc_protocol = rtm->rtm_protocol, 420284db8407SMaciej Żenczykowski .fc_type = rtm->rtm_type, 420384db8407SMaciej Żenczykowski 420484db8407SMaciej Żenczykowski .fc_nlinfo.portid = NETLINK_CB(skb).portid, 420584db8407SMaciej Żenczykowski .fc_nlinfo.nlh = nlh, 420684db8407SMaciej Żenczykowski .fc_nlinfo.nl_net = sock_net(skb->sk), 420784db8407SMaciej Żenczykowski }; 420886872cb5SThomas Graf 4209ef2c7d7bSNicolas Dichtel if (rtm->rtm_type == RTN_UNREACHABLE || 4210ef2c7d7bSNicolas Dichtel rtm->rtm_type == RTN_BLACKHOLE || 4211b4949ab2SNicolas Dichtel rtm->rtm_type == RTN_PROHIBIT || 4212b4949ab2SNicolas Dichtel rtm->rtm_type == RTN_THROW) 421386872cb5SThomas Graf cfg->fc_flags |= RTF_REJECT; 421486872cb5SThomas Graf 4215ab79ad14SMaciej Żenczykowski if (rtm->rtm_type == RTN_LOCAL) 4216ab79ad14SMaciej Żenczykowski cfg->fc_flags |= RTF_LOCAL; 4217ab79ad14SMaciej Żenczykowski 42181f56a01fSMartin KaFai Lau if (rtm->rtm_flags & RTM_F_CLONED) 42191f56a01fSMartin KaFai Lau cfg->fc_flags |= RTF_CACHE; 42201f56a01fSMartin KaFai Lau 4221fc1e64e1SDavid Ahern cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK); 4222fc1e64e1SDavid Ahern 422386872cb5SThomas Graf if (tb[RTA_GATEWAY]) { 422467b61f6cSJiri Benc cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); 422586872cb5SThomas Graf cfg->fc_flags |= RTF_GATEWAY; 42261da177e4SLinus Torvalds } 4227e3818541SDavid Ahern if (tb[RTA_VIA]) { 4228e3818541SDavid Ahern NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute"); 4229e3818541SDavid Ahern goto errout; 4230e3818541SDavid Ahern } 423186872cb5SThomas Graf 423286872cb5SThomas Graf if (tb[RTA_DST]) { 423386872cb5SThomas Graf int plen = (rtm->rtm_dst_len + 7) >> 3; 423486872cb5SThomas Graf 423586872cb5SThomas Graf if (nla_len(tb[RTA_DST]) < plen) 423686872cb5SThomas Graf goto errout; 423786872cb5SThomas Graf 423886872cb5SThomas Graf nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); 42391da177e4SLinus Torvalds } 424086872cb5SThomas Graf 424186872cb5SThomas Graf if (tb[RTA_SRC]) { 424286872cb5SThomas Graf int plen = (rtm->rtm_src_len + 7) >> 3; 424386872cb5SThomas Graf 424486872cb5SThomas Graf if (nla_len(tb[RTA_SRC]) < plen) 424586872cb5SThomas Graf goto errout; 424686872cb5SThomas Graf 424786872cb5SThomas Graf nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); 42481da177e4SLinus Torvalds } 424986872cb5SThomas Graf 4250c3968a85SDaniel Walter if (tb[RTA_PREFSRC]) 425167b61f6cSJiri Benc cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); 4252c3968a85SDaniel Walter 425386872cb5SThomas Graf if (tb[RTA_OIF]) 425486872cb5SThomas Graf cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); 425586872cb5SThomas Graf 425686872cb5SThomas Graf if (tb[RTA_PRIORITY]) 425786872cb5SThomas Graf cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); 425886872cb5SThomas Graf 425986872cb5SThomas Graf if (tb[RTA_METRICS]) { 426086872cb5SThomas Graf cfg->fc_mx = nla_data(tb[RTA_METRICS]); 426186872cb5SThomas Graf cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); 42621da177e4SLinus Torvalds } 426386872cb5SThomas Graf 426486872cb5SThomas Graf if (tb[RTA_TABLE]) 426586872cb5SThomas Graf cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); 426686872cb5SThomas Graf 426751ebd318SNicolas Dichtel if (tb[RTA_MULTIPATH]) { 426851ebd318SNicolas Dichtel cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]); 426951ebd318SNicolas Dichtel cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); 42709ed59592SDavid Ahern 42719ed59592SDavid Ahern err = lwtunnel_valid_encap_type_attr(cfg->fc_mp, 4272c255bd68SDavid Ahern cfg->fc_mp_len, extack); 42739ed59592SDavid Ahern if (err < 0) 42749ed59592SDavid Ahern goto errout; 427551ebd318SNicolas Dichtel } 427651ebd318SNicolas Dichtel 4277c78ba6d6SLubomir Rintel if (tb[RTA_PREF]) { 4278c78ba6d6SLubomir Rintel pref = nla_get_u8(tb[RTA_PREF]); 4279c78ba6d6SLubomir Rintel if (pref != ICMPV6_ROUTER_PREF_LOW && 4280c78ba6d6SLubomir Rintel pref != ICMPV6_ROUTER_PREF_HIGH) 4281c78ba6d6SLubomir Rintel pref = ICMPV6_ROUTER_PREF_MEDIUM; 4282c78ba6d6SLubomir Rintel cfg->fc_flags |= RTF_PREF(pref); 4283c78ba6d6SLubomir Rintel } 4284c78ba6d6SLubomir Rintel 428519e42e45SRoopa Prabhu if (tb[RTA_ENCAP]) 428619e42e45SRoopa Prabhu cfg->fc_encap = tb[RTA_ENCAP]; 428719e42e45SRoopa Prabhu 42889ed59592SDavid Ahern if (tb[RTA_ENCAP_TYPE]) { 428919e42e45SRoopa Prabhu cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]); 429019e42e45SRoopa Prabhu 4291c255bd68SDavid Ahern err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack); 42929ed59592SDavid Ahern if (err < 0) 42939ed59592SDavid Ahern goto errout; 42949ed59592SDavid Ahern } 42959ed59592SDavid Ahern 429632bc201eSXin Long if (tb[RTA_EXPIRES]) { 429732bc201eSXin Long unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ); 429832bc201eSXin Long 429932bc201eSXin Long if (addrconf_finite_timeout(timeout)) { 430032bc201eSXin Long cfg->fc_expires = jiffies_to_clock_t(timeout * HZ); 430132bc201eSXin Long cfg->fc_flags |= RTF_EXPIRES; 430232bc201eSXin Long } 430332bc201eSXin Long } 430432bc201eSXin Long 430586872cb5SThomas Graf err = 0; 430686872cb5SThomas Graf errout: 430786872cb5SThomas Graf return err; 43081da177e4SLinus Torvalds } 43091da177e4SLinus Torvalds 43106b9ea5a6SRoopa Prabhu struct rt6_nh { 43118d1c802bSDavid Ahern struct fib6_info *fib6_info; 43126b9ea5a6SRoopa Prabhu struct fib6_config r_cfg; 43136b9ea5a6SRoopa Prabhu struct list_head next; 43146b9ea5a6SRoopa Prabhu }; 43156b9ea5a6SRoopa Prabhu 4316d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net, 4317d4ead6b3SDavid Ahern struct list_head *rt6_nh_list, 43188d1c802bSDavid Ahern struct fib6_info *rt, 43198d1c802bSDavid Ahern struct fib6_config *r_cfg) 43206b9ea5a6SRoopa Prabhu { 43216b9ea5a6SRoopa Prabhu struct rt6_nh *nh; 43226b9ea5a6SRoopa Prabhu int err = -EEXIST; 43236b9ea5a6SRoopa Prabhu 43246b9ea5a6SRoopa Prabhu list_for_each_entry(nh, rt6_nh_list, next) { 43258d1c802bSDavid Ahern /* check if fib6_info already exists */ 43268d1c802bSDavid Ahern if (rt6_duplicate_nexthop(nh->fib6_info, rt)) 43276b9ea5a6SRoopa Prabhu return err; 43286b9ea5a6SRoopa Prabhu } 43296b9ea5a6SRoopa Prabhu 43306b9ea5a6SRoopa Prabhu nh = kzalloc(sizeof(*nh), GFP_KERNEL); 43316b9ea5a6SRoopa Prabhu if (!nh) 43326b9ea5a6SRoopa Prabhu return -ENOMEM; 43338d1c802bSDavid Ahern nh->fib6_info = rt; 43346b9ea5a6SRoopa Prabhu memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg)); 43356b9ea5a6SRoopa Prabhu list_add_tail(&nh->next, rt6_nh_list); 43366b9ea5a6SRoopa Prabhu 43376b9ea5a6SRoopa Prabhu return 0; 43386b9ea5a6SRoopa Prabhu } 43396b9ea5a6SRoopa Prabhu 43408d1c802bSDavid Ahern static void ip6_route_mpath_notify(struct fib6_info *rt, 43418d1c802bSDavid Ahern struct fib6_info *rt_last, 43423b1137feSDavid Ahern struct nl_info *info, 43433b1137feSDavid Ahern __u16 nlflags) 43443b1137feSDavid Ahern { 43453b1137feSDavid Ahern /* if this is an APPEND route, then rt points to the first route 43463b1137feSDavid Ahern * inserted and rt_last points to last route inserted. Userspace 43473b1137feSDavid Ahern * wants a consistent dump of the route which starts at the first 43483b1137feSDavid Ahern * nexthop. Since sibling routes are always added at the end of 43493b1137feSDavid Ahern * the list, find the first sibling of the last route appended 43503b1137feSDavid Ahern */ 435193c2fb25SDavid Ahern if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) { 435293c2fb25SDavid Ahern rt = list_first_entry(&rt_last->fib6_siblings, 43538d1c802bSDavid Ahern struct fib6_info, 435493c2fb25SDavid Ahern fib6_siblings); 43553b1137feSDavid Ahern } 43563b1137feSDavid Ahern 43573b1137feSDavid Ahern if (rt) 43583b1137feSDavid Ahern inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); 43593b1137feSDavid Ahern } 43603b1137feSDavid Ahern 4361333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg, 4362333c4301SDavid Ahern struct netlink_ext_ack *extack) 436351ebd318SNicolas Dichtel { 43648d1c802bSDavid Ahern struct fib6_info *rt_notif = NULL, *rt_last = NULL; 43653b1137feSDavid Ahern struct nl_info *info = &cfg->fc_nlinfo; 436651ebd318SNicolas Dichtel struct fib6_config r_cfg; 436751ebd318SNicolas Dichtel struct rtnexthop *rtnh; 43688d1c802bSDavid Ahern struct fib6_info *rt; 43696b9ea5a6SRoopa Prabhu struct rt6_nh *err_nh; 43706b9ea5a6SRoopa Prabhu struct rt6_nh *nh, *nh_safe; 43713b1137feSDavid Ahern __u16 nlflags; 437251ebd318SNicolas Dichtel int remaining; 437351ebd318SNicolas Dichtel int attrlen; 43746b9ea5a6SRoopa Prabhu int err = 1; 43756b9ea5a6SRoopa Prabhu int nhn = 0; 43766b9ea5a6SRoopa Prabhu int replace = (cfg->fc_nlinfo.nlh && 43776b9ea5a6SRoopa Prabhu (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE)); 43786b9ea5a6SRoopa Prabhu LIST_HEAD(rt6_nh_list); 437951ebd318SNicolas Dichtel 43803b1137feSDavid Ahern nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE; 43813b1137feSDavid Ahern if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND) 43823b1137feSDavid Ahern nlflags |= NLM_F_APPEND; 43833b1137feSDavid Ahern 438435f1b4e9SMichal Kubeček remaining = cfg->fc_mp_len; 438551ebd318SNicolas Dichtel rtnh = (struct rtnexthop *)cfg->fc_mp; 438651ebd318SNicolas Dichtel 43876b9ea5a6SRoopa Prabhu /* Parse a Multipath Entry and build a list (rt6_nh_list) of 43888d1c802bSDavid Ahern * fib6_info structs per nexthop 43896b9ea5a6SRoopa Prabhu */ 439051ebd318SNicolas Dichtel while (rtnh_ok(rtnh, remaining)) { 439151ebd318SNicolas Dichtel memcpy(&r_cfg, cfg, sizeof(*cfg)); 439251ebd318SNicolas Dichtel if (rtnh->rtnh_ifindex) 439351ebd318SNicolas Dichtel r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 439451ebd318SNicolas Dichtel 439551ebd318SNicolas Dichtel attrlen = rtnh_attrlen(rtnh); 439651ebd318SNicolas Dichtel if (attrlen > 0) { 439751ebd318SNicolas Dichtel struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 439851ebd318SNicolas Dichtel 439951ebd318SNicolas Dichtel nla = nla_find(attrs, attrlen, RTA_GATEWAY); 440051ebd318SNicolas Dichtel if (nla) { 440167b61f6cSJiri Benc r_cfg.fc_gateway = nla_get_in6_addr(nla); 440251ebd318SNicolas Dichtel r_cfg.fc_flags |= RTF_GATEWAY; 440351ebd318SNicolas Dichtel } 440419e42e45SRoopa Prabhu r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); 440519e42e45SRoopa Prabhu nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); 440619e42e45SRoopa Prabhu if (nla) 440719e42e45SRoopa Prabhu r_cfg.fc_encap_type = nla_get_u16(nla); 440851ebd318SNicolas Dichtel } 44096b9ea5a6SRoopa Prabhu 441068e2ffdeSDavid Ahern r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK); 4411acb54e3cSDavid Ahern rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack); 44128c5b83f0SRoopa Prabhu if (IS_ERR(rt)) { 44138c5b83f0SRoopa Prabhu err = PTR_ERR(rt); 44148c5b83f0SRoopa Prabhu rt = NULL; 44156b9ea5a6SRoopa Prabhu goto cleanup; 44168c5b83f0SRoopa Prabhu } 4417b5d2d75eSDavid Ahern if (!rt6_qualify_for_ecmp(rt)) { 4418b5d2d75eSDavid Ahern err = -EINVAL; 4419b5d2d75eSDavid Ahern NL_SET_ERR_MSG(extack, 4420b5d2d75eSDavid Ahern "Device only routes can not be added for IPv6 using the multipath API."); 4421b5d2d75eSDavid Ahern fib6_info_release(rt); 4422b5d2d75eSDavid Ahern goto cleanup; 4423b5d2d75eSDavid Ahern } 44246b9ea5a6SRoopa Prabhu 4425ad1601aeSDavid Ahern rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1; 4426398958aeSIdo Schimmel 4427d4ead6b3SDavid Ahern err = ip6_route_info_append(info->nl_net, &rt6_nh_list, 4428d4ead6b3SDavid Ahern rt, &r_cfg); 442951ebd318SNicolas Dichtel if (err) { 443093531c67SDavid Ahern fib6_info_release(rt); 44316b9ea5a6SRoopa Prabhu goto cleanup; 443251ebd318SNicolas Dichtel } 44336b9ea5a6SRoopa Prabhu 44346b9ea5a6SRoopa Prabhu rtnh = rtnh_next(rtnh, &remaining); 443551ebd318SNicolas Dichtel } 44366b9ea5a6SRoopa Prabhu 44373b1137feSDavid Ahern /* for add and replace send one notification with all nexthops. 44383b1137feSDavid Ahern * Skip the notification in fib6_add_rt2node and send one with 44393b1137feSDavid Ahern * the full route when done 44403b1137feSDavid Ahern */ 44413b1137feSDavid Ahern info->skip_notify = 1; 44423b1137feSDavid Ahern 44436b9ea5a6SRoopa Prabhu err_nh = NULL; 44446b9ea5a6SRoopa Prabhu list_for_each_entry(nh, &rt6_nh_list, next) { 44458d1c802bSDavid Ahern err = __ip6_ins_rt(nh->fib6_info, info, extack); 44468d1c802bSDavid Ahern fib6_info_release(nh->fib6_info); 44473b1137feSDavid Ahern 4448f7225172SDavid Ahern if (!err) { 4449f7225172SDavid Ahern /* save reference to last route successfully inserted */ 4450f7225172SDavid Ahern rt_last = nh->fib6_info; 4451f7225172SDavid Ahern 44526b9ea5a6SRoopa Prabhu /* save reference to first route for notification */ 4453f7225172SDavid Ahern if (!rt_notif) 44548d1c802bSDavid Ahern rt_notif = nh->fib6_info; 4455f7225172SDavid Ahern } 44566b9ea5a6SRoopa Prabhu 44578d1c802bSDavid Ahern /* nh->fib6_info is used or freed at this point, reset to NULL*/ 44588d1c802bSDavid Ahern nh->fib6_info = NULL; 44596b9ea5a6SRoopa Prabhu if (err) { 44606b9ea5a6SRoopa Prabhu if (replace && nhn) 4461a5a82d84SJakub Kicinski NL_SET_ERR_MSG_MOD(extack, 4462a5a82d84SJakub Kicinski "multipath route replace failed (check consistency of installed routes)"); 44636b9ea5a6SRoopa Prabhu err_nh = nh; 44646b9ea5a6SRoopa Prabhu goto add_errout; 44656b9ea5a6SRoopa Prabhu } 44666b9ea5a6SRoopa Prabhu 44671a72418bSNicolas Dichtel /* Because each route is added like a single route we remove 446827596472SMichal Kubeček * these flags after the first nexthop: if there is a collision, 446927596472SMichal Kubeček * we have already failed to add the first nexthop: 447027596472SMichal Kubeček * fib6_add_rt2node() has rejected it; when replacing, old 447127596472SMichal Kubeček * nexthops have been replaced by first new, the rest should 447227596472SMichal Kubeček * be added to it. 44731a72418bSNicolas Dichtel */ 447427596472SMichal Kubeček cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | 447527596472SMichal Kubeček NLM_F_REPLACE); 44766b9ea5a6SRoopa Prabhu nhn++; 44776b9ea5a6SRoopa Prabhu } 44786b9ea5a6SRoopa Prabhu 44793b1137feSDavid Ahern /* success ... tell user about new route */ 44803b1137feSDavid Ahern ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 44816b9ea5a6SRoopa Prabhu goto cleanup; 44826b9ea5a6SRoopa Prabhu 44836b9ea5a6SRoopa Prabhu add_errout: 44843b1137feSDavid Ahern /* send notification for routes that were added so that 44853b1137feSDavid Ahern * the delete notifications sent by ip6_route_del are 44863b1137feSDavid Ahern * coherent 44873b1137feSDavid Ahern */ 44883b1137feSDavid Ahern if (rt_notif) 44893b1137feSDavid Ahern ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags); 44903b1137feSDavid Ahern 44916b9ea5a6SRoopa Prabhu /* Delete routes that were already added */ 44926b9ea5a6SRoopa Prabhu list_for_each_entry(nh, &rt6_nh_list, next) { 44936b9ea5a6SRoopa Prabhu if (err_nh == nh) 44946b9ea5a6SRoopa Prabhu break; 4495333c4301SDavid Ahern ip6_route_del(&nh->r_cfg, extack); 44966b9ea5a6SRoopa Prabhu } 44976b9ea5a6SRoopa Prabhu 44986b9ea5a6SRoopa Prabhu cleanup: 44996b9ea5a6SRoopa Prabhu list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) { 45008d1c802bSDavid Ahern if (nh->fib6_info) 45018d1c802bSDavid Ahern fib6_info_release(nh->fib6_info); 45026b9ea5a6SRoopa Prabhu list_del(&nh->next); 45036b9ea5a6SRoopa Prabhu kfree(nh); 45046b9ea5a6SRoopa Prabhu } 45056b9ea5a6SRoopa Prabhu 45066b9ea5a6SRoopa Prabhu return err; 45076b9ea5a6SRoopa Prabhu } 45086b9ea5a6SRoopa Prabhu 4509333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg, 4510333c4301SDavid Ahern struct netlink_ext_ack *extack) 45116b9ea5a6SRoopa Prabhu { 45126b9ea5a6SRoopa Prabhu struct fib6_config r_cfg; 45136b9ea5a6SRoopa Prabhu struct rtnexthop *rtnh; 45146b9ea5a6SRoopa Prabhu int remaining; 45156b9ea5a6SRoopa Prabhu int attrlen; 45166b9ea5a6SRoopa Prabhu int err = 1, last_err = 0; 45176b9ea5a6SRoopa Prabhu 45186b9ea5a6SRoopa Prabhu remaining = cfg->fc_mp_len; 45196b9ea5a6SRoopa Prabhu rtnh = (struct rtnexthop *)cfg->fc_mp; 45206b9ea5a6SRoopa Prabhu 45216b9ea5a6SRoopa Prabhu /* Parse a Multipath Entry */ 45226b9ea5a6SRoopa Prabhu while (rtnh_ok(rtnh, remaining)) { 45236b9ea5a6SRoopa Prabhu memcpy(&r_cfg, cfg, sizeof(*cfg)); 45246b9ea5a6SRoopa Prabhu if (rtnh->rtnh_ifindex) 45256b9ea5a6SRoopa Prabhu r_cfg.fc_ifindex = rtnh->rtnh_ifindex; 45266b9ea5a6SRoopa Prabhu 45276b9ea5a6SRoopa Prabhu attrlen = rtnh_attrlen(rtnh); 45286b9ea5a6SRoopa Prabhu if (attrlen > 0) { 45296b9ea5a6SRoopa Prabhu struct nlattr *nla, *attrs = rtnh_attrs(rtnh); 45306b9ea5a6SRoopa Prabhu 45316b9ea5a6SRoopa Prabhu nla = nla_find(attrs, attrlen, RTA_GATEWAY); 45326b9ea5a6SRoopa Prabhu if (nla) { 45336b9ea5a6SRoopa Prabhu nla_memcpy(&r_cfg.fc_gateway, nla, 16); 45346b9ea5a6SRoopa Prabhu r_cfg.fc_flags |= RTF_GATEWAY; 45356b9ea5a6SRoopa Prabhu } 45366b9ea5a6SRoopa Prabhu } 4537333c4301SDavid Ahern err = ip6_route_del(&r_cfg, extack); 45386b9ea5a6SRoopa Prabhu if (err) 45396b9ea5a6SRoopa Prabhu last_err = err; 45406b9ea5a6SRoopa Prabhu 454151ebd318SNicolas Dichtel rtnh = rtnh_next(rtnh, &remaining); 454251ebd318SNicolas Dichtel } 454351ebd318SNicolas Dichtel 454451ebd318SNicolas Dichtel return last_err; 454551ebd318SNicolas Dichtel } 454651ebd318SNicolas Dichtel 4547c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, 4548c21ef3e3SDavid Ahern struct netlink_ext_ack *extack) 45491da177e4SLinus Torvalds { 455086872cb5SThomas Graf struct fib6_config cfg; 455186872cb5SThomas Graf int err; 45521da177e4SLinus Torvalds 4553333c4301SDavid Ahern err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 455486872cb5SThomas Graf if (err < 0) 455586872cb5SThomas Graf return err; 455686872cb5SThomas Graf 455751ebd318SNicolas Dichtel if (cfg.fc_mp) 4558333c4301SDavid Ahern return ip6_route_multipath_del(&cfg, extack); 45590ae81335SDavid Ahern else { 45600ae81335SDavid Ahern cfg.fc_delete_all_nh = 1; 4561333c4301SDavid Ahern return ip6_route_del(&cfg, extack); 45621da177e4SLinus Torvalds } 45630ae81335SDavid Ahern } 45641da177e4SLinus Torvalds 4565c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, 4566c21ef3e3SDavid Ahern struct netlink_ext_ack *extack) 45671da177e4SLinus Torvalds { 456886872cb5SThomas Graf struct fib6_config cfg; 456986872cb5SThomas Graf int err; 45701da177e4SLinus Torvalds 4571333c4301SDavid Ahern err = rtm_to_fib6_config(skb, nlh, &cfg, extack); 457286872cb5SThomas Graf if (err < 0) 457386872cb5SThomas Graf return err; 457486872cb5SThomas Graf 457567f69513SDavid Ahern if (cfg.fc_metric == 0) 457667f69513SDavid Ahern cfg.fc_metric = IP6_RT_PRIO_USER; 457767f69513SDavid Ahern 457851ebd318SNicolas Dichtel if (cfg.fc_mp) 4579333c4301SDavid Ahern return ip6_route_multipath_add(&cfg, extack); 458051ebd318SNicolas Dichtel else 4581acb54e3cSDavid Ahern return ip6_route_add(&cfg, GFP_KERNEL, extack); 45821da177e4SLinus Torvalds } 45831da177e4SLinus Torvalds 45848d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt) 4585339bf98fSThomas Graf { 4586beb1afacSDavid Ahern int nexthop_len = 0; 4587beb1afacSDavid Ahern 458893c2fb25SDavid Ahern if (rt->fib6_nsiblings) { 4589beb1afacSDavid Ahern nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ 4590beb1afacSDavid Ahern + NLA_ALIGN(sizeof(struct rtnexthop)) 4591beb1afacSDavid Ahern + nla_total_size(16) /* RTA_GATEWAY */ 4592ad1601aeSDavid Ahern + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws); 4593beb1afacSDavid Ahern 459493c2fb25SDavid Ahern nexthop_len *= rt->fib6_nsiblings; 4595beb1afacSDavid Ahern } 4596beb1afacSDavid Ahern 4597339bf98fSThomas Graf return NLMSG_ALIGN(sizeof(struct rtmsg)) 4598339bf98fSThomas Graf + nla_total_size(16) /* RTA_SRC */ 4599339bf98fSThomas Graf + nla_total_size(16) /* RTA_DST */ 4600339bf98fSThomas Graf + nla_total_size(16) /* RTA_GATEWAY */ 4601339bf98fSThomas Graf + nla_total_size(16) /* RTA_PREFSRC */ 4602339bf98fSThomas Graf + nla_total_size(4) /* RTA_TABLE */ 4603339bf98fSThomas Graf + nla_total_size(4) /* RTA_IIF */ 4604339bf98fSThomas Graf + nla_total_size(4) /* RTA_OIF */ 4605339bf98fSThomas Graf + nla_total_size(4) /* RTA_PRIORITY */ 46066a2b9ce0SNoriaki TAKAMIYA + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ 4607ea697639SDaniel Borkmann + nla_total_size(sizeof(struct rta_cacheinfo)) 4608c78ba6d6SLubomir Rintel + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ 460919e42e45SRoopa Prabhu + nla_total_size(1) /* RTA_PREF */ 4610ad1601aeSDavid Ahern + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws) 4611beb1afacSDavid Ahern + nexthop_len; 4612beb1afacSDavid Ahern } 4613beb1afacSDavid Ahern 4614d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb, 46158d1c802bSDavid Ahern struct fib6_info *rt, struct dst_entry *dst, 4616d4ead6b3SDavid Ahern struct in6_addr *dest, struct in6_addr *src, 461715e47304SEric W. Biederman int iif, int type, u32 portid, u32 seq, 4618f8cfe2ceSDavid Ahern unsigned int flags) 46191da177e4SLinus Torvalds { 462022d0bd82SXin Long struct rt6_info *rt6 = (struct rt6_info *)dst; 462122d0bd82SXin Long struct rt6key *rt6_dst, *rt6_src; 462222d0bd82SXin Long u32 *pmetrics, table, rt6_flags; 46231da177e4SLinus Torvalds struct nlmsghdr *nlh; 462422d0bd82SXin Long struct rtmsg *rtm; 4625d4ead6b3SDavid Ahern long expires = 0; 46261da177e4SLinus Torvalds 462715e47304SEric W. Biederman nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); 462838308473SDavid S. Miller if (!nlh) 462926932566SPatrick McHardy return -EMSGSIZE; 46302d7202bfSThomas Graf 463122d0bd82SXin Long if (rt6) { 463222d0bd82SXin Long rt6_dst = &rt6->rt6i_dst; 463322d0bd82SXin Long rt6_src = &rt6->rt6i_src; 463422d0bd82SXin Long rt6_flags = rt6->rt6i_flags; 463522d0bd82SXin Long } else { 463622d0bd82SXin Long rt6_dst = &rt->fib6_dst; 463722d0bd82SXin Long rt6_src = &rt->fib6_src; 463822d0bd82SXin Long rt6_flags = rt->fib6_flags; 463922d0bd82SXin Long } 464022d0bd82SXin Long 46412d7202bfSThomas Graf rtm = nlmsg_data(nlh); 46421da177e4SLinus Torvalds rtm->rtm_family = AF_INET6; 464322d0bd82SXin Long rtm->rtm_dst_len = rt6_dst->plen; 464422d0bd82SXin Long rtm->rtm_src_len = rt6_src->plen; 46451da177e4SLinus Torvalds rtm->rtm_tos = 0; 464693c2fb25SDavid Ahern if (rt->fib6_table) 464793c2fb25SDavid Ahern table = rt->fib6_table->tb6_id; 4648c71099acSThomas Graf else 46499e762a4aSPatrick McHardy table = RT6_TABLE_UNSPEC; 465097f0082aSKalash Nainwal rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT; 4651c78679e8SDavid S. Miller if (nla_put_u32(skb, RTA_TABLE, table)) 4652c78679e8SDavid S. Miller goto nla_put_failure; 4653e8478e80SDavid Ahern 4654e8478e80SDavid Ahern rtm->rtm_type = rt->fib6_type; 46551da177e4SLinus Torvalds rtm->rtm_flags = 0; 46561da177e4SLinus Torvalds rtm->rtm_scope = RT_SCOPE_UNIVERSE; 465793c2fb25SDavid Ahern rtm->rtm_protocol = rt->fib6_protocol; 46581da177e4SLinus Torvalds 465922d0bd82SXin Long if (rt6_flags & RTF_CACHE) 46601da177e4SLinus Torvalds rtm->rtm_flags |= RTM_F_CLONED; 46611da177e4SLinus Torvalds 4662d4ead6b3SDavid Ahern if (dest) { 4663d4ead6b3SDavid Ahern if (nla_put_in6_addr(skb, RTA_DST, dest)) 4664c78679e8SDavid S. Miller goto nla_put_failure; 46651da177e4SLinus Torvalds rtm->rtm_dst_len = 128; 46661da177e4SLinus Torvalds } else if (rtm->rtm_dst_len) 466722d0bd82SXin Long if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr)) 4668c78679e8SDavid S. Miller goto nla_put_failure; 46691da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES 46701da177e4SLinus Torvalds if (src) { 4671930345eaSJiri Benc if (nla_put_in6_addr(skb, RTA_SRC, src)) 4672c78679e8SDavid S. Miller goto nla_put_failure; 46731da177e4SLinus Torvalds rtm->rtm_src_len = 128; 4674c78679e8SDavid S. Miller } else if (rtm->rtm_src_len && 467522d0bd82SXin Long nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr)) 4676c78679e8SDavid S. Miller goto nla_put_failure; 46771da177e4SLinus Torvalds #endif 46787bc570c8SYOSHIFUJI Hideaki if (iif) { 46797bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE 468022d0bd82SXin Long if (ipv6_addr_is_multicast(&rt6_dst->addr)) { 4681fd61c6baSDavid Ahern int err = ip6mr_get_route(net, skb, rtm, portid); 46822cf75070SNikolay Aleksandrov 46837bc570c8SYOSHIFUJI Hideaki if (err == 0) 46847bc570c8SYOSHIFUJI Hideaki return 0; 4685fd61c6baSDavid Ahern if (err < 0) 46867bc570c8SYOSHIFUJI Hideaki goto nla_put_failure; 46877bc570c8SYOSHIFUJI Hideaki } else 46887bc570c8SYOSHIFUJI Hideaki #endif 4689c78679e8SDavid S. Miller if (nla_put_u32(skb, RTA_IIF, iif)) 4690c78679e8SDavid S. Miller goto nla_put_failure; 4691d4ead6b3SDavid Ahern } else if (dest) { 46921da177e4SLinus Torvalds struct in6_addr saddr_buf; 4693d4ead6b3SDavid Ahern if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 && 4694930345eaSJiri Benc nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 4695c78679e8SDavid S. Miller goto nla_put_failure; 4696c3968a85SDaniel Walter } 4697c3968a85SDaniel Walter 469893c2fb25SDavid Ahern if (rt->fib6_prefsrc.plen) { 4699c3968a85SDaniel Walter struct in6_addr saddr_buf; 470093c2fb25SDavid Ahern saddr_buf = rt->fib6_prefsrc.addr; 4701930345eaSJiri Benc if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) 4702c78679e8SDavid S. Miller goto nla_put_failure; 47031da177e4SLinus Torvalds } 47042d7202bfSThomas Graf 4705d4ead6b3SDavid Ahern pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics; 4706d4ead6b3SDavid Ahern if (rtnetlink_put_metrics(skb, pmetrics) < 0) 47072d7202bfSThomas Graf goto nla_put_failure; 47082d7202bfSThomas Graf 470993c2fb25SDavid Ahern if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric)) 4710beb1afacSDavid Ahern goto nla_put_failure; 4711beb1afacSDavid Ahern 4712beb1afacSDavid Ahern /* For multipath routes, walk the siblings list and add 4713beb1afacSDavid Ahern * each as a nexthop within RTA_MULTIPATH. 4714beb1afacSDavid Ahern */ 471522d0bd82SXin Long if (rt6) { 471622d0bd82SXin Long if (rt6_flags & RTF_GATEWAY && 471722d0bd82SXin Long nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway)) 471822d0bd82SXin Long goto nla_put_failure; 471922d0bd82SXin Long 472022d0bd82SXin Long if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex)) 472122d0bd82SXin Long goto nla_put_failure; 472222d0bd82SXin Long } else if (rt->fib6_nsiblings) { 47238d1c802bSDavid Ahern struct fib6_info *sibling, *next_sibling; 4724beb1afacSDavid Ahern struct nlattr *mp; 4725beb1afacSDavid Ahern 4726beb1afacSDavid Ahern mp = nla_nest_start(skb, RTA_MULTIPATH); 4727beb1afacSDavid Ahern if (!mp) 4728beb1afacSDavid Ahern goto nla_put_failure; 4729beb1afacSDavid Ahern 4730c0a72077SDavid Ahern if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common, 4731c0a72077SDavid Ahern rt->fib6_nh.fib_nh_weight) < 0) 4732beb1afacSDavid Ahern goto nla_put_failure; 4733beb1afacSDavid Ahern 4734beb1afacSDavid Ahern list_for_each_entry_safe(sibling, next_sibling, 473593c2fb25SDavid Ahern &rt->fib6_siblings, fib6_siblings) { 4736c0a72077SDavid Ahern if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common, 4737c0a72077SDavid Ahern sibling->fib6_nh.fib_nh_weight) < 0) 473894f826b8SEric Dumazet goto nla_put_failure; 473994f826b8SEric Dumazet } 47402d7202bfSThomas Graf 4741beb1afacSDavid Ahern nla_nest_end(skb, mp); 4742beb1afacSDavid Ahern } else { 4743c0a72077SDavid Ahern if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common, 4744c0a72077SDavid Ahern &rtm->rtm_flags, false) < 0) 4745c78679e8SDavid S. Miller goto nla_put_failure; 4746beb1afacSDavid Ahern } 47478253947eSLi Wei 474822d0bd82SXin Long if (rt6_flags & RTF_EXPIRES) { 474914895687SDavid Ahern expires = dst ? dst->expires : rt->expires; 475014895687SDavid Ahern expires -= jiffies; 475114895687SDavid Ahern } 475269cdf8f9SYOSHIFUJI Hideaki 4753d4ead6b3SDavid Ahern if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0) 4754e3703b3dSThomas Graf goto nla_put_failure; 47551da177e4SLinus Torvalds 475622d0bd82SXin Long if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags))) 4757c78ba6d6SLubomir Rintel goto nla_put_failure; 4758c78ba6d6SLubomir Rintel 475919e42e45SRoopa Prabhu 4760053c095aSJohannes Berg nlmsg_end(skb, nlh); 4761053c095aSJohannes Berg return 0; 47622d7202bfSThomas Graf 47632d7202bfSThomas Graf nla_put_failure: 476426932566SPatrick McHardy nlmsg_cancel(skb, nlh); 476526932566SPatrick McHardy return -EMSGSIZE; 47661da177e4SLinus Torvalds } 47671da177e4SLinus Torvalds 476813e38901SDavid Ahern static bool fib6_info_uses_dev(const struct fib6_info *f6i, 476913e38901SDavid Ahern const struct net_device *dev) 477013e38901SDavid Ahern { 4771ad1601aeSDavid Ahern if (f6i->fib6_nh.fib_nh_dev == dev) 477213e38901SDavid Ahern return true; 477313e38901SDavid Ahern 477413e38901SDavid Ahern if (f6i->fib6_nsiblings) { 477513e38901SDavid Ahern struct fib6_info *sibling, *next_sibling; 477613e38901SDavid Ahern 477713e38901SDavid Ahern list_for_each_entry_safe(sibling, next_sibling, 477813e38901SDavid Ahern &f6i->fib6_siblings, fib6_siblings) { 4779ad1601aeSDavid Ahern if (sibling->fib6_nh.fib_nh_dev == dev) 478013e38901SDavid Ahern return true; 478113e38901SDavid Ahern } 478213e38901SDavid Ahern } 478313e38901SDavid Ahern 478413e38901SDavid Ahern return false; 478513e38901SDavid Ahern } 478613e38901SDavid Ahern 47878d1c802bSDavid Ahern int rt6_dump_route(struct fib6_info *rt, void *p_arg) 47881da177e4SLinus Torvalds { 47891da177e4SLinus Torvalds struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; 479013e38901SDavid Ahern struct fib_dump_filter *filter = &arg->filter; 479113e38901SDavid Ahern unsigned int flags = NLM_F_MULTI; 47921f17e2f2SDavid Ahern struct net *net = arg->net; 47931f17e2f2SDavid Ahern 4794421842edSDavid Ahern if (rt == net->ipv6.fib6_null_entry) 47951f17e2f2SDavid Ahern return 0; 47961da177e4SLinus Torvalds 479713e38901SDavid Ahern if ((filter->flags & RTM_F_PREFIX) && 479893c2fb25SDavid Ahern !(rt->fib6_flags & RTF_PREFIX_RT)) { 4799f8cfe2ceSDavid Ahern /* success since this is not a prefix route */ 4800f8cfe2ceSDavid Ahern return 1; 4801f8cfe2ceSDavid Ahern } 480213e38901SDavid Ahern if (filter->filter_set) { 480313e38901SDavid Ahern if ((filter->rt_type && rt->fib6_type != filter->rt_type) || 480413e38901SDavid Ahern (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) || 480513e38901SDavid Ahern (filter->protocol && rt->fib6_protocol != filter->protocol)) { 480613e38901SDavid Ahern return 1; 480713e38901SDavid Ahern } 480813e38901SDavid Ahern flags |= NLM_F_DUMP_FILTERED; 4809f8cfe2ceSDavid Ahern } 48101da177e4SLinus Torvalds 4811d4ead6b3SDavid Ahern return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0, 4812d4ead6b3SDavid Ahern RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid, 481313e38901SDavid Ahern arg->cb->nlh->nlmsg_seq, flags); 48141da177e4SLinus Torvalds } 48151da177e4SLinus Torvalds 48160eff0a27SJakub Kicinski static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, 48170eff0a27SJakub Kicinski const struct nlmsghdr *nlh, 48180eff0a27SJakub Kicinski struct nlattr **tb, 48190eff0a27SJakub Kicinski struct netlink_ext_ack *extack) 48200eff0a27SJakub Kicinski { 48210eff0a27SJakub Kicinski struct rtmsg *rtm; 48220eff0a27SJakub Kicinski int i, err; 48230eff0a27SJakub Kicinski 48240eff0a27SJakub Kicinski if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { 48250eff0a27SJakub Kicinski NL_SET_ERR_MSG_MOD(extack, 48260eff0a27SJakub Kicinski "Invalid header for get route request"); 48270eff0a27SJakub Kicinski return -EINVAL; 48280eff0a27SJakub Kicinski } 48290eff0a27SJakub Kicinski 48300eff0a27SJakub Kicinski if (!netlink_strict_get_check(skb)) 48310eff0a27SJakub Kicinski return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, 48320eff0a27SJakub Kicinski rtm_ipv6_policy, extack); 48330eff0a27SJakub Kicinski 48340eff0a27SJakub Kicinski rtm = nlmsg_data(nlh); 48350eff0a27SJakub Kicinski if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || 48360eff0a27SJakub Kicinski (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) || 48370eff0a27SJakub Kicinski rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || 48380eff0a27SJakub Kicinski rtm->rtm_type) { 48390eff0a27SJakub Kicinski NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request"); 48400eff0a27SJakub Kicinski return -EINVAL; 48410eff0a27SJakub Kicinski } 48420eff0a27SJakub Kicinski if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) { 48430eff0a27SJakub Kicinski NL_SET_ERR_MSG_MOD(extack, 48440eff0a27SJakub Kicinski "Invalid flags for get route request"); 48450eff0a27SJakub Kicinski return -EINVAL; 48460eff0a27SJakub Kicinski } 48470eff0a27SJakub Kicinski 48480eff0a27SJakub Kicinski err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, 48490eff0a27SJakub Kicinski rtm_ipv6_policy, extack); 48500eff0a27SJakub Kicinski if (err) 48510eff0a27SJakub Kicinski return err; 48520eff0a27SJakub Kicinski 48530eff0a27SJakub Kicinski if ((tb[RTA_SRC] && !rtm->rtm_src_len) || 48540eff0a27SJakub Kicinski (tb[RTA_DST] && !rtm->rtm_dst_len)) { 48550eff0a27SJakub Kicinski NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6"); 48560eff0a27SJakub Kicinski return -EINVAL; 48570eff0a27SJakub Kicinski } 48580eff0a27SJakub Kicinski 48590eff0a27SJakub Kicinski for (i = 0; i <= RTA_MAX; i++) { 48600eff0a27SJakub Kicinski if (!tb[i]) 48610eff0a27SJakub Kicinski continue; 48620eff0a27SJakub Kicinski 48630eff0a27SJakub Kicinski switch (i) { 48640eff0a27SJakub Kicinski case RTA_SRC: 48650eff0a27SJakub Kicinski case RTA_DST: 48660eff0a27SJakub Kicinski case RTA_IIF: 48670eff0a27SJakub Kicinski case RTA_OIF: 48680eff0a27SJakub Kicinski case RTA_MARK: 48690eff0a27SJakub Kicinski case RTA_UID: 48700eff0a27SJakub Kicinski case RTA_SPORT: 48710eff0a27SJakub Kicinski case RTA_DPORT: 48720eff0a27SJakub Kicinski case RTA_IP_PROTO: 48730eff0a27SJakub Kicinski break; 48740eff0a27SJakub Kicinski default: 48750eff0a27SJakub Kicinski NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); 48760eff0a27SJakub Kicinski return -EINVAL; 48770eff0a27SJakub Kicinski } 48780eff0a27SJakub Kicinski } 48790eff0a27SJakub Kicinski 48800eff0a27SJakub Kicinski return 0; 48810eff0a27SJakub Kicinski } 48820eff0a27SJakub Kicinski 4883c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, 4884c21ef3e3SDavid Ahern struct netlink_ext_ack *extack) 48851da177e4SLinus Torvalds { 48863b1e0a65SYOSHIFUJI Hideaki struct net *net = sock_net(in_skb->sk); 4887ab364a6fSThomas Graf struct nlattr *tb[RTA_MAX+1]; 488818c3a61cSRoopa Prabhu int err, iif = 0, oif = 0; 4889a68886a6SDavid Ahern struct fib6_info *from; 489018c3a61cSRoopa Prabhu struct dst_entry *dst; 48911da177e4SLinus Torvalds struct rt6_info *rt; 4892ab364a6fSThomas Graf struct sk_buff *skb; 4893ab364a6fSThomas Graf struct rtmsg *rtm; 4894744486d4SMaciej Żenczykowski struct flowi6 fl6 = {}; 489518c3a61cSRoopa Prabhu bool fibmatch; 4896ab364a6fSThomas Graf 48970eff0a27SJakub Kicinski err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); 4898ab364a6fSThomas Graf if (err < 0) 4899ab364a6fSThomas Graf goto errout; 4900ab364a6fSThomas Graf 4901ab364a6fSThomas Graf err = -EINVAL; 490238b7097bSHannes Frederic Sowa rtm = nlmsg_data(nlh); 490338b7097bSHannes Frederic Sowa fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); 490418c3a61cSRoopa Prabhu fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); 4905ab364a6fSThomas Graf 4906ab364a6fSThomas Graf if (tb[RTA_SRC]) { 4907ab364a6fSThomas Graf if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) 4908ab364a6fSThomas Graf goto errout; 4909ab364a6fSThomas Graf 49104e3fd7a0SAlexey Dobriyan fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]); 4911ab364a6fSThomas Graf } 4912ab364a6fSThomas Graf 4913ab364a6fSThomas Graf if (tb[RTA_DST]) { 4914ab364a6fSThomas Graf if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) 4915ab364a6fSThomas Graf goto errout; 4916ab364a6fSThomas Graf 49174e3fd7a0SAlexey Dobriyan fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]); 4918ab364a6fSThomas Graf } 4919ab364a6fSThomas Graf 4920ab364a6fSThomas Graf if (tb[RTA_IIF]) 4921ab364a6fSThomas Graf iif = nla_get_u32(tb[RTA_IIF]); 4922ab364a6fSThomas Graf 4923ab364a6fSThomas Graf if (tb[RTA_OIF]) 492472331bc0SShmulik Ladkani oif = nla_get_u32(tb[RTA_OIF]); 4925ab364a6fSThomas Graf 49262e47b291SLorenzo Colitti if (tb[RTA_MARK]) 49272e47b291SLorenzo Colitti fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); 49282e47b291SLorenzo Colitti 4929622ec2c9SLorenzo Colitti if (tb[RTA_UID]) 4930622ec2c9SLorenzo Colitti fl6.flowi6_uid = make_kuid(current_user_ns(), 4931622ec2c9SLorenzo Colitti nla_get_u32(tb[RTA_UID])); 4932622ec2c9SLorenzo Colitti else 4933622ec2c9SLorenzo Colitti fl6.flowi6_uid = iif ? INVALID_UID : current_uid(); 4934622ec2c9SLorenzo Colitti 4935eacb9384SRoopa Prabhu if (tb[RTA_SPORT]) 4936eacb9384SRoopa Prabhu fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]); 4937eacb9384SRoopa Prabhu 4938eacb9384SRoopa Prabhu if (tb[RTA_DPORT]) 4939eacb9384SRoopa Prabhu fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]); 4940eacb9384SRoopa Prabhu 4941eacb9384SRoopa Prabhu if (tb[RTA_IP_PROTO]) { 4942eacb9384SRoopa Prabhu err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], 49435e1a99eaSHangbin Liu &fl6.flowi6_proto, AF_INET6, 49445e1a99eaSHangbin Liu extack); 4945eacb9384SRoopa Prabhu if (err) 4946eacb9384SRoopa Prabhu goto errout; 4947eacb9384SRoopa Prabhu } 4948eacb9384SRoopa Prabhu 4949ab364a6fSThomas Graf if (iif) { 4950ab364a6fSThomas Graf struct net_device *dev; 495172331bc0SShmulik Ladkani int flags = 0; 495272331bc0SShmulik Ladkani 4953121622dbSFlorian Westphal rcu_read_lock(); 4954121622dbSFlorian Westphal 4955121622dbSFlorian Westphal dev = dev_get_by_index_rcu(net, iif); 4956ab364a6fSThomas Graf if (!dev) { 4957121622dbSFlorian Westphal rcu_read_unlock(); 4958ab364a6fSThomas Graf err = -ENODEV; 4959ab364a6fSThomas Graf goto errout; 4960ab364a6fSThomas Graf } 496172331bc0SShmulik Ladkani 496272331bc0SShmulik Ladkani fl6.flowi6_iif = iif; 496372331bc0SShmulik Ladkani 496472331bc0SShmulik Ladkani if (!ipv6_addr_any(&fl6.saddr)) 496572331bc0SShmulik Ladkani flags |= RT6_LOOKUP_F_HAS_SADDR; 496672331bc0SShmulik Ladkani 4967b75cc8f9SDavid Ahern dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags); 4968121622dbSFlorian Westphal 4969121622dbSFlorian Westphal rcu_read_unlock(); 497072331bc0SShmulik Ladkani } else { 497172331bc0SShmulik Ladkani fl6.flowi6_oif = oif; 497272331bc0SShmulik Ladkani 497318c3a61cSRoopa Prabhu dst = ip6_route_output(net, NULL, &fl6); 497418c3a61cSRoopa Prabhu } 497518c3a61cSRoopa Prabhu 497618c3a61cSRoopa Prabhu 497718c3a61cSRoopa Prabhu rt = container_of(dst, struct rt6_info, dst); 497818c3a61cSRoopa Prabhu if (rt->dst.error) { 497918c3a61cSRoopa Prabhu err = rt->dst.error; 498018c3a61cSRoopa Prabhu ip6_rt_put(rt); 498118c3a61cSRoopa Prabhu goto errout; 4982ab364a6fSThomas Graf } 49831da177e4SLinus Torvalds 49849d6acb3bSWANG Cong if (rt == net->ipv6.ip6_null_entry) { 49859d6acb3bSWANG Cong err = rt->dst.error; 49869d6acb3bSWANG Cong ip6_rt_put(rt); 49879d6acb3bSWANG Cong goto errout; 49889d6acb3bSWANG Cong } 49899d6acb3bSWANG Cong 49901da177e4SLinus Torvalds skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 499138308473SDavid S. Miller if (!skb) { 499294e187c0SAmerigo Wang ip6_rt_put(rt); 4993ab364a6fSThomas Graf err = -ENOBUFS; 4994ab364a6fSThomas Graf goto errout; 4995ab364a6fSThomas Graf } 49961da177e4SLinus Torvalds 4997d8d1f30bSChangli Gao skb_dst_set(skb, &rt->dst); 4998a68886a6SDavid Ahern 4999a68886a6SDavid Ahern rcu_read_lock(); 5000a68886a6SDavid Ahern from = rcu_dereference(rt->from); 5001a68886a6SDavid Ahern 500218c3a61cSRoopa Prabhu if (fibmatch) 5003a68886a6SDavid Ahern err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, 500418c3a61cSRoopa Prabhu RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 500518c3a61cSRoopa Prabhu nlh->nlmsg_seq, 0); 500618c3a61cSRoopa Prabhu else 5007a68886a6SDavid Ahern err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, 5008a68886a6SDavid Ahern &fl6.saddr, iif, RTM_NEWROUTE, 5009d4ead6b3SDavid Ahern NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 5010d4ead6b3SDavid Ahern 0); 5011a68886a6SDavid Ahern rcu_read_unlock(); 5012a68886a6SDavid Ahern 50131da177e4SLinus Torvalds if (err < 0) { 5014ab364a6fSThomas Graf kfree_skb(skb); 5015ab364a6fSThomas Graf goto errout; 50161da177e4SLinus Torvalds } 50171da177e4SLinus Torvalds 501815e47304SEric W. Biederman err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); 5019ab364a6fSThomas Graf errout: 50201da177e4SLinus Torvalds return err; 50211da177e4SLinus Torvalds } 50221da177e4SLinus Torvalds 50238d1c802bSDavid Ahern void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, 502437a1d361SRoopa Prabhu unsigned int nlm_flags) 50251da177e4SLinus Torvalds { 50261da177e4SLinus Torvalds struct sk_buff *skb; 50275578689aSDaniel Lezcano struct net *net = info->nl_net; 5028528c4cebSDenis V. Lunev u32 seq; 5029528c4cebSDenis V. Lunev int err; 50300d51aa80SJamal Hadi Salim 5031528c4cebSDenis V. Lunev err = -ENOBUFS; 503238308473SDavid S. Miller seq = info->nlh ? info->nlh->nlmsg_seq : 0; 503386872cb5SThomas Graf 503419e42e45SRoopa Prabhu skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any()); 503538308473SDavid S. Miller if (!skb) 503621713ebcSThomas Graf goto errout; 50371da177e4SLinus Torvalds 5038d4ead6b3SDavid Ahern err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0, 5039f8cfe2ceSDavid Ahern event, info->portid, seq, nlm_flags); 504026932566SPatrick McHardy if (err < 0) { 504126932566SPatrick McHardy /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ 504226932566SPatrick McHardy WARN_ON(err == -EMSGSIZE); 504326932566SPatrick McHardy kfree_skb(skb); 504426932566SPatrick McHardy goto errout; 504526932566SPatrick McHardy } 504615e47304SEric W. Biederman rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, 50475578689aSDaniel Lezcano info->nlh, gfp_any()); 50481ce85fe4SPablo Neira Ayuso return; 504921713ebcSThomas Graf errout: 505021713ebcSThomas Graf if (err < 0) 50515578689aSDaniel Lezcano rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 50521da177e4SLinus Torvalds } 50531da177e4SLinus Torvalds 50548ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this, 5055351638e7SJiri Pirko unsigned long event, void *ptr) 50568ed67789SDaniel Lezcano { 5057351638e7SJiri Pirko struct net_device *dev = netdev_notifier_info_to_dev(ptr); 5058c346dca1SYOSHIFUJI Hideaki struct net *net = dev_net(dev); 50598ed67789SDaniel Lezcano 5060242d3a49SWANG Cong if (!(dev->flags & IFF_LOOPBACK)) 5061242d3a49SWANG Cong return NOTIFY_OK; 5062242d3a49SWANG Cong 5063242d3a49SWANG Cong if (event == NETDEV_REGISTER) { 5064ad1601aeSDavid Ahern net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev; 5065d8d1f30bSChangli Gao net->ipv6.ip6_null_entry->dst.dev = dev; 50668ed67789SDaniel Lezcano net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); 50678ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES 5068d8d1f30bSChangli Gao net->ipv6.ip6_prohibit_entry->dst.dev = dev; 50698ed67789SDaniel Lezcano net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev); 5070d8d1f30bSChangli Gao net->ipv6.ip6_blk_hole_entry->dst.dev = dev; 50718ed67789SDaniel Lezcano net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev); 50728ed67789SDaniel Lezcano #endif 507376da0704SWANG Cong } else if (event == NETDEV_UNREGISTER && 507476da0704SWANG Cong dev->reg_state != NETREG_UNREGISTERED) { 507576da0704SWANG Cong /* NETDEV_UNREGISTER could be fired for multiple times by 507676da0704SWANG Cong * netdev_wait_allrefs(). Make sure we only call this once. 507776da0704SWANG Cong */ 507812d94a80SEric Dumazet in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); 5079242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES 508012d94a80SEric Dumazet in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); 508112d94a80SEric Dumazet in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); 5082242d3a49SWANG Cong #endif 50838ed67789SDaniel Lezcano } 50848ed67789SDaniel Lezcano 50858ed67789SDaniel Lezcano return NOTIFY_OK; 50868ed67789SDaniel Lezcano } 50878ed67789SDaniel Lezcano 50881da177e4SLinus Torvalds /* 50891da177e4SLinus Torvalds * /proc 50901da177e4SLinus Torvalds */ 50911da177e4SLinus Torvalds 50921da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS 50931da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v) 50941da177e4SLinus Torvalds { 509569ddb805SDaniel Lezcano struct net *net = (struct net *)seq->private; 50961da177e4SLinus Torvalds seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", 509769ddb805SDaniel Lezcano net->ipv6.rt6_stats->fib_nodes, 509869ddb805SDaniel Lezcano net->ipv6.rt6_stats->fib_route_nodes, 509981eb8447SWei Wang atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc), 510069ddb805SDaniel Lezcano net->ipv6.rt6_stats->fib_rt_entries, 510169ddb805SDaniel Lezcano net->ipv6.rt6_stats->fib_rt_cache, 5102fc66f95cSEric Dumazet dst_entries_get_slow(&net->ipv6.ip6_dst_ops), 510369ddb805SDaniel Lezcano net->ipv6.rt6_stats->fib_discarded_routes); 51041da177e4SLinus Torvalds 51051da177e4SLinus Torvalds return 0; 51061da177e4SLinus Torvalds } 51071da177e4SLinus Torvalds #endif /* CONFIG_PROC_FS */ 51081da177e4SLinus Torvalds 51091da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL 51101da177e4SLinus Torvalds 51111da177e4SLinus Torvalds static 5112fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, 51131da177e4SLinus Torvalds void __user *buffer, size_t *lenp, loff_t *ppos) 51141da177e4SLinus Torvalds { 5115c486da34SLucian Adrian Grijincu struct net *net; 5116c486da34SLucian Adrian Grijincu int delay; 5117f0fb9b28SAditya Pakki int ret; 5118c486da34SLucian Adrian Grijincu if (!write) 5119c486da34SLucian Adrian Grijincu return -EINVAL; 5120c486da34SLucian Adrian Grijincu 5121c486da34SLucian Adrian Grijincu net = (struct net *)ctl->extra1; 5122c486da34SLucian Adrian Grijincu delay = net->ipv6.sysctl.flush_delay; 5123f0fb9b28SAditya Pakki ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 5124f0fb9b28SAditya Pakki if (ret) 5125f0fb9b28SAditya Pakki return ret; 5126f0fb9b28SAditya Pakki 51272ac3ac8fSMichal Kubeček fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); 51281da177e4SLinus Torvalds return 0; 51291da177e4SLinus Torvalds } 51301da177e4SLinus Torvalds 51317c6bb7d2SDavid Ahern static int zero; 51327c6bb7d2SDavid Ahern static int one = 1; 51337c6bb7d2SDavid Ahern 5134ed792e28SDavid Ahern static struct ctl_table ipv6_route_table_template[] = { 51351da177e4SLinus Torvalds { 51361da177e4SLinus Torvalds .procname = "flush", 51374990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.flush_delay, 51381da177e4SLinus Torvalds .maxlen = sizeof(int), 513989c8b3a1SDave Jones .mode = 0200, 51406d9f239aSAlexey Dobriyan .proc_handler = ipv6_sysctl_rtcache_flush 51411da177e4SLinus Torvalds }, 51421da177e4SLinus Torvalds { 51431da177e4SLinus Torvalds .procname = "gc_thresh", 51449a7ec3a9SDaniel Lezcano .data = &ip6_dst_ops_template.gc_thresh, 51451da177e4SLinus Torvalds .maxlen = sizeof(int), 51461da177e4SLinus Torvalds .mode = 0644, 51476d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 51481da177e4SLinus Torvalds }, 51491da177e4SLinus Torvalds { 51501da177e4SLinus Torvalds .procname = "max_size", 51514990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_max_size, 51521da177e4SLinus Torvalds .maxlen = sizeof(int), 51531da177e4SLinus Torvalds .mode = 0644, 51546d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec, 51551da177e4SLinus Torvalds }, 51561da177e4SLinus Torvalds { 51571da177e4SLinus Torvalds .procname = "gc_min_interval", 51584990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 51591da177e4SLinus Torvalds .maxlen = sizeof(int), 51601da177e4SLinus Torvalds .mode = 0644, 51616d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 51621da177e4SLinus Torvalds }, 51631da177e4SLinus Torvalds { 51641da177e4SLinus Torvalds .procname = "gc_timeout", 51654990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout, 51661da177e4SLinus Torvalds .maxlen = sizeof(int), 51671da177e4SLinus Torvalds .mode = 0644, 51686d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 51691da177e4SLinus Torvalds }, 51701da177e4SLinus Torvalds { 51711da177e4SLinus Torvalds .procname = "gc_interval", 51724990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval, 51731da177e4SLinus Torvalds .maxlen = sizeof(int), 51741da177e4SLinus Torvalds .mode = 0644, 51756d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 51761da177e4SLinus Torvalds }, 51771da177e4SLinus Torvalds { 51781da177e4SLinus Torvalds .procname = "gc_elasticity", 51794990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, 51801da177e4SLinus Torvalds .maxlen = sizeof(int), 51811da177e4SLinus Torvalds .mode = 0644, 5182f3d3f616SMin Zhang .proc_handler = proc_dointvec, 51831da177e4SLinus Torvalds }, 51841da177e4SLinus Torvalds { 51851da177e4SLinus Torvalds .procname = "mtu_expires", 51864990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires, 51871da177e4SLinus Torvalds .maxlen = sizeof(int), 51881da177e4SLinus Torvalds .mode = 0644, 51896d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_jiffies, 51901da177e4SLinus Torvalds }, 51911da177e4SLinus Torvalds { 51921da177e4SLinus Torvalds .procname = "min_adv_mss", 51934990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, 51941da177e4SLinus Torvalds .maxlen = sizeof(int), 51951da177e4SLinus Torvalds .mode = 0644, 5196f3d3f616SMin Zhang .proc_handler = proc_dointvec, 51971da177e4SLinus Torvalds }, 51981da177e4SLinus Torvalds { 51991da177e4SLinus Torvalds .procname = "gc_min_interval_ms", 52004990509fSDaniel Lezcano .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval, 52011da177e4SLinus Torvalds .maxlen = sizeof(int), 52021da177e4SLinus Torvalds .mode = 0644, 52036d9f239aSAlexey Dobriyan .proc_handler = proc_dointvec_ms_jiffies, 52041da177e4SLinus Torvalds }, 52057c6bb7d2SDavid Ahern { 52067c6bb7d2SDavid Ahern .procname = "skip_notify_on_dev_down", 52077c6bb7d2SDavid Ahern .data = &init_net.ipv6.sysctl.skip_notify_on_dev_down, 52087c6bb7d2SDavid Ahern .maxlen = sizeof(int), 52097c6bb7d2SDavid Ahern .mode = 0644, 52107c6bb7d2SDavid Ahern .proc_handler = proc_dointvec, 52117c6bb7d2SDavid Ahern .extra1 = &zero, 52127c6bb7d2SDavid Ahern .extra2 = &one, 52137c6bb7d2SDavid Ahern }, 5214f8572d8fSEric W. Biederman { } 52151da177e4SLinus Torvalds }; 52161da177e4SLinus Torvalds 52172c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net) 5218760f2d01SDaniel Lezcano { 5219760f2d01SDaniel Lezcano struct ctl_table *table; 5220760f2d01SDaniel Lezcano 5221760f2d01SDaniel Lezcano table = kmemdup(ipv6_route_table_template, 5222760f2d01SDaniel Lezcano sizeof(ipv6_route_table_template), 5223760f2d01SDaniel Lezcano GFP_KERNEL); 52245ee09105SYOSHIFUJI Hideaki 52255ee09105SYOSHIFUJI Hideaki if (table) { 52265ee09105SYOSHIFUJI Hideaki table[0].data = &net->ipv6.sysctl.flush_delay; 5227c486da34SLucian Adrian Grijincu table[0].extra1 = net; 522886393e52SAlexey Dobriyan table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh; 52295ee09105SYOSHIFUJI Hideaki table[2].data = &net->ipv6.sysctl.ip6_rt_max_size; 52305ee09105SYOSHIFUJI Hideaki table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 52315ee09105SYOSHIFUJI Hideaki table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout; 52325ee09105SYOSHIFUJI Hideaki table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval; 52335ee09105SYOSHIFUJI Hideaki table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity; 52345ee09105SYOSHIFUJI Hideaki table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires; 52355ee09105SYOSHIFUJI Hideaki table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss; 52369c69fabeSAlexey Dobriyan table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval; 52377c6bb7d2SDavid Ahern table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down; 5238464dc801SEric W. Biederman 5239464dc801SEric W. Biederman /* Don't export sysctls to unprivileged users */ 5240464dc801SEric W. Biederman if (net->user_ns != &init_user_ns) 5241464dc801SEric W. Biederman table[0].procname = NULL; 52425ee09105SYOSHIFUJI Hideaki } 52435ee09105SYOSHIFUJI Hideaki 5244760f2d01SDaniel Lezcano return table; 5245760f2d01SDaniel Lezcano } 52461da177e4SLinus Torvalds #endif 52471da177e4SLinus Torvalds 52482c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net) 5249cdb18761SDaniel Lezcano { 5250633d424bSPavel Emelyanov int ret = -ENOMEM; 52518ed67789SDaniel Lezcano 525286393e52SAlexey Dobriyan memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 525386393e52SAlexey Dobriyan sizeof(net->ipv6.ip6_dst_ops)); 5254f2fc6a54SBenjamin Thery 5255fc66f95cSEric Dumazet if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) 5256fc66f95cSEric Dumazet goto out_ip6_dst_ops; 5257fc66f95cSEric Dumazet 5258421842edSDavid Ahern net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template, 5259421842edSDavid Ahern sizeof(*net->ipv6.fib6_null_entry), 5260421842edSDavid Ahern GFP_KERNEL); 5261421842edSDavid Ahern if (!net->ipv6.fib6_null_entry) 5262421842edSDavid Ahern goto out_ip6_dst_entries; 5263421842edSDavid Ahern 52648ed67789SDaniel Lezcano net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 52658ed67789SDaniel Lezcano sizeof(*net->ipv6.ip6_null_entry), 52668ed67789SDaniel Lezcano GFP_KERNEL); 52678ed67789SDaniel Lezcano if (!net->ipv6.ip6_null_entry) 5268421842edSDavid Ahern goto out_fib6_null_entry; 5269d8d1f30bSChangli Gao net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 527062fa8a84SDavid S. Miller dst_init_metrics(&net->ipv6.ip6_null_entry->dst, 527162fa8a84SDavid S. Miller ip6_template_metrics, true); 52728ed67789SDaniel Lezcano 52738ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES 5274feca7d8cSVincent Bernat net->ipv6.fib6_has_custom_rules = false; 52758ed67789SDaniel Lezcano net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, 52768ed67789SDaniel Lezcano sizeof(*net->ipv6.ip6_prohibit_entry), 52778ed67789SDaniel Lezcano GFP_KERNEL); 527868fffc67SPeter Zijlstra if (!net->ipv6.ip6_prohibit_entry) 527968fffc67SPeter Zijlstra goto out_ip6_null_entry; 5280d8d1f30bSChangli Gao net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops; 528162fa8a84SDavid S. Miller dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst, 528262fa8a84SDavid S. Miller ip6_template_metrics, true); 52838ed67789SDaniel Lezcano 52848ed67789SDaniel Lezcano net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, 52858ed67789SDaniel Lezcano sizeof(*net->ipv6.ip6_blk_hole_entry), 52868ed67789SDaniel Lezcano GFP_KERNEL); 528768fffc67SPeter Zijlstra if (!net->ipv6.ip6_blk_hole_entry) 528868fffc67SPeter Zijlstra goto out_ip6_prohibit_entry; 5289d8d1f30bSChangli Gao net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops; 529062fa8a84SDavid S. Miller dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst, 529162fa8a84SDavid S. Miller ip6_template_metrics, true); 52928ed67789SDaniel Lezcano #endif 52938ed67789SDaniel Lezcano 5294b339a47cSPeter Zijlstra net->ipv6.sysctl.flush_delay = 0; 5295b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_max_size = 4096; 5296b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; 5297b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; 5298b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; 5299b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; 5300b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; 5301b339a47cSPeter Zijlstra net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; 53027c6bb7d2SDavid Ahern net->ipv6.sysctl.skip_notify_on_dev_down = 0; 5303b339a47cSPeter Zijlstra 53046891a346SBenjamin Thery net->ipv6.ip6_rt_gc_expire = 30*HZ; 53056891a346SBenjamin Thery 53068ed67789SDaniel Lezcano ret = 0; 53078ed67789SDaniel Lezcano out: 53088ed67789SDaniel Lezcano return ret; 5309f2fc6a54SBenjamin Thery 531068fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES 531168fffc67SPeter Zijlstra out_ip6_prohibit_entry: 531268fffc67SPeter Zijlstra kfree(net->ipv6.ip6_prohibit_entry); 531368fffc67SPeter Zijlstra out_ip6_null_entry: 531468fffc67SPeter Zijlstra kfree(net->ipv6.ip6_null_entry); 531568fffc67SPeter Zijlstra #endif 5316421842edSDavid Ahern out_fib6_null_entry: 5317421842edSDavid Ahern kfree(net->ipv6.fib6_null_entry); 5318fc66f95cSEric Dumazet out_ip6_dst_entries: 5319fc66f95cSEric Dumazet dst_entries_destroy(&net->ipv6.ip6_dst_ops); 5320f2fc6a54SBenjamin Thery out_ip6_dst_ops: 5321f2fc6a54SBenjamin Thery goto out; 5322cdb18761SDaniel Lezcano } 5323cdb18761SDaniel Lezcano 53242c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net) 5325cdb18761SDaniel Lezcano { 5326421842edSDavid Ahern kfree(net->ipv6.fib6_null_entry); 53278ed67789SDaniel Lezcano kfree(net->ipv6.ip6_null_entry); 53288ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES 53298ed67789SDaniel Lezcano kfree(net->ipv6.ip6_prohibit_entry); 53308ed67789SDaniel Lezcano kfree(net->ipv6.ip6_blk_hole_entry); 53318ed67789SDaniel Lezcano #endif 533241bb78b4SXiaotian Feng dst_entries_destroy(&net->ipv6.ip6_dst_ops); 5333cdb18761SDaniel Lezcano } 5334cdb18761SDaniel Lezcano 5335d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net) 5336d189634eSThomas Graf { 5337d189634eSThomas Graf #ifdef CONFIG_PROC_FS 5338c3506372SChristoph Hellwig proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops, 5339c3506372SChristoph Hellwig sizeof(struct ipv6_route_iter)); 53403617d949SChristoph Hellwig proc_create_net_single("rt6_stats", 0444, net->proc_net, 53413617d949SChristoph Hellwig rt6_stats_seq_show, NULL); 5342d189634eSThomas Graf #endif 5343d189634eSThomas Graf return 0; 5344d189634eSThomas Graf } 5345d189634eSThomas Graf 5346d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net) 5347d189634eSThomas Graf { 5348d189634eSThomas Graf #ifdef CONFIG_PROC_FS 5349ece31ffdSGao feng remove_proc_entry("ipv6_route", net->proc_net); 5350ece31ffdSGao feng remove_proc_entry("rt6_stats", net->proc_net); 5351d189634eSThomas Graf #endif 5352d189634eSThomas Graf } 5353d189634eSThomas Graf 5354cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = { 5355cdb18761SDaniel Lezcano .init = ip6_route_net_init, 5356cdb18761SDaniel Lezcano .exit = ip6_route_net_exit, 5357cdb18761SDaniel Lezcano }; 5358cdb18761SDaniel Lezcano 5359c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net) 5360c3426b47SDavid S. Miller { 5361c3426b47SDavid S. Miller struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); 5362c3426b47SDavid S. Miller 5363c3426b47SDavid S. Miller if (!bp) 5364c3426b47SDavid S. Miller return -ENOMEM; 5365c3426b47SDavid S. Miller inet_peer_base_init(bp); 5366c3426b47SDavid S. Miller net->ipv6.peers = bp; 5367c3426b47SDavid S. Miller return 0; 5368c3426b47SDavid S. Miller } 5369c3426b47SDavid S. Miller 5370c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net) 5371c3426b47SDavid S. Miller { 5372c3426b47SDavid S. Miller struct inet_peer_base *bp = net->ipv6.peers; 5373c3426b47SDavid S. Miller 5374c3426b47SDavid S. Miller net->ipv6.peers = NULL; 537556a6b248SDavid S. Miller inetpeer_invalidate_tree(bp); 5376c3426b47SDavid S. Miller kfree(bp); 5377c3426b47SDavid S. Miller } 5378c3426b47SDavid S. Miller 53792b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = { 5380c3426b47SDavid S. Miller .init = ipv6_inetpeer_init, 5381c3426b47SDavid S. Miller .exit = ipv6_inetpeer_exit, 5382c3426b47SDavid S. Miller }; 5383c3426b47SDavid S. Miller 5384d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = { 5385d189634eSThomas Graf .init = ip6_route_net_init_late, 5386d189634eSThomas Graf .exit = ip6_route_net_exit_late, 5387d189634eSThomas Graf }; 5388d189634eSThomas Graf 53898ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = { 53908ed67789SDaniel Lezcano .notifier_call = ip6_route_dev_notify, 5391242d3a49SWANG Cong .priority = ADDRCONF_NOTIFY_PRIORITY - 10, 53928ed67789SDaniel Lezcano }; 53938ed67789SDaniel Lezcano 53942f460933SWANG Cong void __init ip6_route_init_special_entries(void) 53952f460933SWANG Cong { 53962f460933SWANG Cong /* Registering of the loopback is done before this portion of code, 53972f460933SWANG Cong * the loopback reference in rt6_info will not be taken, do it 53982f460933SWANG Cong * manually for init_net */ 5399ad1601aeSDavid Ahern init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev; 54002f460933SWANG Cong init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; 54012f460933SWANG Cong init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 54022f460933SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES 54032f460933SWANG Cong init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; 54042f460933SWANG Cong init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 54052f460933SWANG Cong init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; 54062f460933SWANG Cong init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); 54072f460933SWANG Cong #endif 54082f460933SWANG Cong } 54092f460933SWANG Cong 5410433d49c3SDaniel Lezcano int __init ip6_route_init(void) 54111da177e4SLinus Torvalds { 5412433d49c3SDaniel Lezcano int ret; 54138d0b94afSMartin KaFai Lau int cpu; 5414433d49c3SDaniel Lezcano 54159a7ec3a9SDaniel Lezcano ret = -ENOMEM; 54169a7ec3a9SDaniel Lezcano ip6_dst_ops_template.kmem_cachep = 54179a7ec3a9SDaniel Lezcano kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, 54189a7ec3a9SDaniel Lezcano SLAB_HWCACHE_ALIGN, NULL); 54199a7ec3a9SDaniel Lezcano if (!ip6_dst_ops_template.kmem_cachep) 5420c19a28e1SFernando Carrijo goto out; 542114e50e57SDavid S. Miller 5422fc66f95cSEric Dumazet ret = dst_entries_init(&ip6_dst_blackhole_ops); 54238ed67789SDaniel Lezcano if (ret) 5424bdb3289fSDaniel Lezcano goto out_kmem_cache; 5425bdb3289fSDaniel Lezcano 5426c3426b47SDavid S. Miller ret = register_pernet_subsys(&ipv6_inetpeer_ops); 5427c3426b47SDavid S. Miller if (ret) 5428e8803b6cSDavid S. Miller goto out_dst_entries; 54292a0c451aSThomas Graf 54307e52b33bSDavid S. Miller ret = register_pernet_subsys(&ip6_route_net_ops); 54317e52b33bSDavid S. Miller if (ret) 54327e52b33bSDavid S. Miller goto out_register_inetpeer; 5433c3426b47SDavid S. Miller 54345dc121e9SArnaud Ebalard ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 54355dc121e9SArnaud Ebalard 5436e8803b6cSDavid S. Miller ret = fib6_init(); 5437433d49c3SDaniel Lezcano if (ret) 54388ed67789SDaniel Lezcano goto out_register_subsys; 5439433d49c3SDaniel Lezcano 5440433d49c3SDaniel Lezcano ret = xfrm6_init(); 5441433d49c3SDaniel Lezcano if (ret) 5442e8803b6cSDavid S. Miller goto out_fib6_init; 5443c35b7e72SDaniel Lezcano 5444433d49c3SDaniel Lezcano ret = fib6_rules_init(); 5445433d49c3SDaniel Lezcano if (ret) 5446433d49c3SDaniel Lezcano goto xfrm6_init; 54477e5449c2SDaniel Lezcano 5448d189634eSThomas Graf ret = register_pernet_subsys(&ip6_route_net_late_ops); 5449d189634eSThomas Graf if (ret) 5450d189634eSThomas Graf goto fib6_rules_init; 5451d189634eSThomas Graf 545216feebcfSFlorian Westphal ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE, 545316feebcfSFlorian Westphal inet6_rtm_newroute, NULL, 0); 545416feebcfSFlorian Westphal if (ret < 0) 545516feebcfSFlorian Westphal goto out_register_late_subsys; 545616feebcfSFlorian Westphal 545716feebcfSFlorian Westphal ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE, 545816feebcfSFlorian Westphal inet6_rtm_delroute, NULL, 0); 545916feebcfSFlorian Westphal if (ret < 0) 546016feebcfSFlorian Westphal goto out_register_late_subsys; 546116feebcfSFlorian Westphal 546216feebcfSFlorian Westphal ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, 546316feebcfSFlorian Westphal inet6_rtm_getroute, NULL, 546416feebcfSFlorian Westphal RTNL_FLAG_DOIT_UNLOCKED); 546516feebcfSFlorian Westphal if (ret < 0) 5466d189634eSThomas Graf goto out_register_late_subsys; 5467433d49c3SDaniel Lezcano 54688ed67789SDaniel Lezcano ret = register_netdevice_notifier(&ip6_route_dev_notifier); 5469cdb18761SDaniel Lezcano if (ret) 5470d189634eSThomas Graf goto out_register_late_subsys; 54718ed67789SDaniel Lezcano 54728d0b94afSMartin KaFai Lau for_each_possible_cpu(cpu) { 54738d0b94afSMartin KaFai Lau struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); 54748d0b94afSMartin KaFai Lau 54758d0b94afSMartin KaFai Lau INIT_LIST_HEAD(&ul->head); 54768d0b94afSMartin KaFai Lau spin_lock_init(&ul->lock); 54778d0b94afSMartin KaFai Lau } 54788d0b94afSMartin KaFai Lau 5479433d49c3SDaniel Lezcano out: 5480433d49c3SDaniel Lezcano return ret; 5481433d49c3SDaniel Lezcano 5482d189634eSThomas Graf out_register_late_subsys: 548316feebcfSFlorian Westphal rtnl_unregister_all(PF_INET6); 5484d189634eSThomas Graf unregister_pernet_subsys(&ip6_route_net_late_ops); 5485433d49c3SDaniel Lezcano fib6_rules_init: 5486433d49c3SDaniel Lezcano fib6_rules_cleanup(); 5487433d49c3SDaniel Lezcano xfrm6_init: 5488433d49c3SDaniel Lezcano xfrm6_fini(); 54892a0c451aSThomas Graf out_fib6_init: 54902a0c451aSThomas Graf fib6_gc_cleanup(); 54918ed67789SDaniel Lezcano out_register_subsys: 54928ed67789SDaniel Lezcano unregister_pernet_subsys(&ip6_route_net_ops); 54937e52b33bSDavid S. Miller out_register_inetpeer: 54947e52b33bSDavid S. Miller unregister_pernet_subsys(&ipv6_inetpeer_ops); 5495fc66f95cSEric Dumazet out_dst_entries: 5496fc66f95cSEric Dumazet dst_entries_destroy(&ip6_dst_blackhole_ops); 5497433d49c3SDaniel Lezcano out_kmem_cache: 5498f2fc6a54SBenjamin Thery kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 5499433d49c3SDaniel Lezcano goto out; 55001da177e4SLinus Torvalds } 55011da177e4SLinus Torvalds 55021da177e4SLinus Torvalds void ip6_route_cleanup(void) 55031da177e4SLinus Torvalds { 55048ed67789SDaniel Lezcano unregister_netdevice_notifier(&ip6_route_dev_notifier); 5505d189634eSThomas Graf unregister_pernet_subsys(&ip6_route_net_late_ops); 5506101367c2SThomas Graf fib6_rules_cleanup(); 55071da177e4SLinus Torvalds xfrm6_fini(); 55081da177e4SLinus Torvalds fib6_gc_cleanup(); 5509c3426b47SDavid S. Miller unregister_pernet_subsys(&ipv6_inetpeer_ops); 55108ed67789SDaniel Lezcano unregister_pernet_subsys(&ip6_route_net_ops); 551141bb78b4SXiaotian Feng dst_entries_destroy(&ip6_dst_blackhole_ops); 5512f2fc6a54SBenjamin Thery kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 55131da177e4SLinus Torvalds } 5514