xref: /openbmc/linux/net/ipv6/route.c (revision b1d40991506aa9f1de310a2e74ef8e3bec6ba215)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
4735732d01SWei Wang #include <linux/jhash.h>
48457c4cbcSEric W. Biederman #include <net/net_namespace.h>
491da177e4SLinus Torvalds #include <net/snmp.h>
501da177e4SLinus Torvalds #include <net/ipv6.h>
511da177e4SLinus Torvalds #include <net/ip6_fib.h>
521da177e4SLinus Torvalds #include <net/ip6_route.h>
531da177e4SLinus Torvalds #include <net/ndisc.h>
541da177e4SLinus Torvalds #include <net/addrconf.h>
551da177e4SLinus Torvalds #include <net/tcp.h>
561da177e4SLinus Torvalds #include <linux/rtnetlink.h>
571da177e4SLinus Torvalds #include <net/dst.h>
58904af04dSJiri Benc #include <net/dst_metadata.h>
591da177e4SLinus Torvalds #include <net/xfrm.h>
608d71740cSTom Tucker #include <net/netevent.h>
6121713ebcSThomas Graf #include <net/netlink.h>
6251ebd318SNicolas Dichtel #include <net/nexthop.h>
6319e42e45SRoopa Prabhu #include <net/lwtunnel.h>
64904af04dSJiri Benc #include <net/ip_tunnels.h>
65ca254490SDavid Ahern #include <net/l3mdev.h>
66eacb9384SRoopa Prabhu #include <net/ip.h>
677c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
701da177e4SLinus Torvalds #include <linux/sysctl.h>
711da177e4SLinus Torvalds #endif
721da177e4SLinus Torvalds 
7330d444d3SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type);
7430d444d3SDavid Ahern 
7530d444d3SDavid Ahern #define CREATE_TRACE_POINTS
7630d444d3SDavid Ahern #include <trace/events/fib6.h>
7730d444d3SDavid Ahern EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
7830d444d3SDavid Ahern #undef CREATE_TRACE_POINTS
7930d444d3SDavid Ahern 
80afc154e9SHannes Frederic Sowa enum rt6_nud_state {
817e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
827e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
837e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
84afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
85afc154e9SHannes Frederic Sowa };
86afc154e9SHannes Frederic Sowa 
871da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
880dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
89ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
901da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
911da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
921da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
931da177e4SLinus Torvalds 				       struct net_device *dev, int how);
94569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
97ede2059dSEric W. Biederman static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
987150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
99ede2059dSEric W. Biederman static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1001da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
1016700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1026700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
1036700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
1046700c270SDavid S. Miller 					struct sk_buff *skb);
105702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
106702cea56SDavid Ahern 			   int strict);
1078d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt);
108d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
1098d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
110d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
11116a16cd3SDavid Ahern 			 int iif, int type, u32 portid, u32 seq,
11216a16cd3SDavid Ahern 			 unsigned int flags);
1138d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
11435732d01SWei Wang 					   struct in6_addr *daddr,
11535732d01SWei Wang 					   struct in6_addr *saddr);
1161da177e4SLinus Torvalds 
11770ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1188d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
119b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
120830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
121830218c1SDavid Ahern 					   struct net_device *dev,
12295c96174SEric Dumazet 					   unsigned int pref);
1238d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
124b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
125830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
126830218c1SDavid Ahern 					   struct net_device *dev);
12770ceb4f5SYOSHIFUJI Hideaki #endif
12870ceb4f5SYOSHIFUJI Hideaki 
1298d0b94afSMartin KaFai Lau struct uncached_list {
1308d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1318d0b94afSMartin KaFai Lau 	struct list_head	head;
1328d0b94afSMartin KaFai Lau };
1338d0b94afSMartin KaFai Lau 
1348d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1358d0b94afSMartin KaFai Lau 
136510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt)
1378d0b94afSMartin KaFai Lau {
1388d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1398d0b94afSMartin KaFai Lau 
1408d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1418d0b94afSMartin KaFai Lau 
1428d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1438d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1448d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1458d0b94afSMartin KaFai Lau }
1468d0b94afSMartin KaFai Lau 
147510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt)
1488d0b94afSMartin KaFai Lau {
1498d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1508d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
15181eb8447SWei Wang 		struct net *net = dev_net(rt->dst.dev);
1528d0b94afSMartin KaFai Lau 
1538d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1548d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
15581eb8447SWei Wang 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
1568d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1578d0b94afSMartin KaFai Lau 	}
1588d0b94afSMartin KaFai Lau }
1598d0b94afSMartin KaFai Lau 
1608d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1618d0b94afSMartin KaFai Lau {
1628d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1638d0b94afSMartin KaFai Lau 	int cpu;
1648d0b94afSMartin KaFai Lau 
165e332bc67SEric W. Biederman 	if (dev == loopback_dev)
166e332bc67SEric W. Biederman 		return;
167e332bc67SEric W. Biederman 
1688d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1698d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1708d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1718d0b94afSMartin KaFai Lau 
1728d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1738d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1748d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1758d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1768d0b94afSMartin KaFai Lau 
177e332bc67SEric W. Biederman 			if (rt_idev->dev == dev) {
1788d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1798d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1808d0b94afSMartin KaFai Lau 			}
1818d0b94afSMartin KaFai Lau 
182e332bc67SEric W. Biederman 			if (rt_dev == dev) {
1838d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1848d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1858d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1868d0b94afSMartin KaFai Lau 			}
1878d0b94afSMartin KaFai Lau 		}
1888d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1898d0b94afSMartin KaFai Lau 	}
1908d0b94afSMartin KaFai Lau }
1918d0b94afSMartin KaFai Lau 
192f8a1b43bSDavid Ahern static inline const void *choose_neigh_daddr(const struct in6_addr *p,
193f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
194f894cbf8SDavid S. Miller 					     const void *daddr)
19539232973SDavid S. Miller {
196a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19739232973SDavid S. Miller 		return (const void *) p;
198f894cbf8SDavid S. Miller 	else if (skb)
199f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
20039232973SDavid S. Miller 	return daddr;
20139232973SDavid S. Miller }
20239232973SDavid S. Miller 
203f8a1b43bSDavid Ahern struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204f8a1b43bSDavid Ahern 				   struct net_device *dev,
205f894cbf8SDavid S. Miller 				   struct sk_buff *skb,
206f894cbf8SDavid S. Miller 				   const void *daddr)
207d3aaeb38SDavid S. Miller {
20839232973SDavid S. Miller 	struct neighbour *n;
20939232973SDavid S. Miller 
210f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(gw, skb, daddr);
211f8a1b43bSDavid Ahern 	n = __ipv6_neigh_lookup(dev, daddr);
212f83c7790SDavid S. Miller 	if (n)
213f83c7790SDavid S. Miller 		return n;
2147adf3246SStefano Brivio 
2157adf3246SStefano Brivio 	n = neigh_create(&nd_tbl, daddr, dev);
2167adf3246SStefano Brivio 	return IS_ERR(n) ? NULL : n;
217f8a1b43bSDavid Ahern }
218f8a1b43bSDavid Ahern 
219f8a1b43bSDavid Ahern static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220f8a1b43bSDavid Ahern 					      struct sk_buff *skb,
221f8a1b43bSDavid Ahern 					      const void *daddr)
222f8a1b43bSDavid Ahern {
223f8a1b43bSDavid Ahern 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224f8a1b43bSDavid Ahern 
225f8a1b43bSDavid Ahern 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
226f83c7790SDavid S. Miller }
227f83c7790SDavid S. Miller 
22863fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
22963fca65dSJulian Anastasov {
23063fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
23163fca65dSJulian Anastasov 	struct rt6_info *rt = (struct rt6_info *)dst;
23263fca65dSJulian Anastasov 
233f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
23463fca65dSJulian Anastasov 	if (!daddr)
23563fca65dSJulian Anastasov 		return;
23663fca65dSJulian Anastasov 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
23763fca65dSJulian Anastasov 		return;
23863fca65dSJulian Anastasov 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
23963fca65dSJulian Anastasov 		return;
24063fca65dSJulian Anastasov 	__ipv6_confirm_neigh(dev, daddr);
24163fca65dSJulian Anastasov }
24263fca65dSJulian Anastasov 
2439a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2441da177e4SLinus Torvalds 	.family			=	AF_INET6,
2451da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2461da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2471da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2480dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
249ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
250d4ead6b3SDavid Ahern 	.cow_metrics		=	dst_cow_metrics_generic,
2511da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2521da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2531da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2541da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2551da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2566e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2579f8955ccSEric W. Biederman 	.local_out		=	__ip6_local_out,
258f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
25963fca65dSJulian Anastasov 	.confirm_neigh		=	ip6_confirm_neigh,
2601da177e4SLinus Torvalds };
2611da177e4SLinus Torvalds 
262ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
263ec831ea7SRoland Dreier {
264618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265618f9bc7SSteffen Klassert 
266618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
267ec831ea7SRoland Dreier }
268ec831ea7SRoland Dreier 
2696700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2706700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
27114e50e57SDavid S. Miller {
27214e50e57SDavid S. Miller }
27314e50e57SDavid S. Miller 
2746700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2756700c270SDavid S. Miller 				      struct sk_buff *skb)
276b587ee3bSDavid S. Miller {
277b587ee3bSDavid S. Miller }
278b587ee3bSDavid S. Miller 
27914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
28014e50e57SDavid S. Miller 	.family			=	AF_INET6,
28114e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
28214e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
283ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
284214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
28514e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
286b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2870a1f5962SMartin KaFai Lau 	.cow_metrics		=	dst_cow_metrics_generic,
288f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
28914e50e57SDavid S. Miller };
29014e50e57SDavid S. Miller 
29162fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
29214edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
29362fa8a84SDavid S. Miller };
29462fa8a84SDavid S. Miller 
2958d1c802bSDavid Ahern static const struct fib6_info fib6_null_entry_template = {
29693c2fb25SDavid Ahern 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
29793c2fb25SDavid Ahern 	.fib6_protocol  = RTPROT_KERNEL,
29893c2fb25SDavid Ahern 	.fib6_metric	= ~(u32)0,
29993c2fb25SDavid Ahern 	.fib6_ref	= ATOMIC_INIT(1),
300421842edSDavid Ahern 	.fib6_type	= RTN_UNREACHABLE,
301421842edSDavid Ahern 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
302421842edSDavid Ahern };
303421842edSDavid Ahern 
304fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
3051da177e4SLinus Torvalds 	.dst = {
3061da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
3071da177e4SLinus Torvalds 		.__use		= 1,
3082c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
3091da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
3101da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
3111da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
3121da177e4SLinus Torvalds 	},
3131da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3141da177e4SLinus Torvalds };
3151da177e4SLinus Torvalds 
316101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
317101367c2SThomas Graf 
318fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
319101367c2SThomas Graf 	.dst = {
320101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
321101367c2SThomas Graf 		.__use		= 1,
3222c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
323101367c2SThomas Graf 		.error		= -EACCES,
3249ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
3259ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
326101367c2SThomas Graf 	},
327101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
328101367c2SThomas Graf };
329101367c2SThomas Graf 
330fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
331101367c2SThomas Graf 	.dst = {
332101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
333101367c2SThomas Graf 		.__use		= 1,
3342c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
335101367c2SThomas Graf 		.error		= -EINVAL,
336352e512cSHerbert Xu 		.input		= dst_discard,
337ede2059dSEric W. Biederman 		.output		= dst_discard_out,
338101367c2SThomas Graf 	},
339101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
340101367c2SThomas Graf };
341101367c2SThomas Graf 
342101367c2SThomas Graf #endif
343101367c2SThomas Graf 
344ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt)
345ebfa45f0SMartin KaFai Lau {
346ebfa45f0SMartin KaFai Lau 	struct dst_entry *dst = &rt->dst;
347ebfa45f0SMartin KaFai Lau 
348ebfa45f0SMartin KaFai Lau 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
349ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_uncached);
350ebfa45f0SMartin KaFai Lau }
351ebfa45f0SMartin KaFai Lau 
3521da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
35393531c67SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
354ad706862SMartin KaFai Lau 			       int flags)
3551da177e4SLinus Torvalds {
35697bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
357b2a9c0edSWei Wang 					1, DST_OBSOLETE_FORCE_CHK, flags);
358cf911662SDavid S. Miller 
35981eb8447SWei Wang 	if (rt) {
360ebfa45f0SMartin KaFai Lau 		rt6_info_init(rt);
36181eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
36281eb8447SWei Wang 	}
3638104891bSSteffen Klassert 
364cf911662SDavid S. Miller 	return rt;
3651da177e4SLinus Torvalds }
3669ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc);
367d52d3997SMartin KaFai Lau 
3681da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3691da177e4SLinus Torvalds {
3701da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
371a68886a6SDavid Ahern 	struct fib6_info *from;
3728d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3731da177e4SLinus Torvalds 
3741620a336SDavid Ahern 	ip_dst_metrics_put(dst);
3758d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3768d0b94afSMartin KaFai Lau 
3778d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
37838308473SDavid S. Miller 	if (idev) {
3791da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3801da177e4SLinus Torvalds 		in6_dev_put(idev);
3811da177e4SLinus Torvalds 	}
3821716a961SGao feng 
383a68886a6SDavid Ahern 	rcu_read_lock();
384a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
385a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, NULL);
38693531c67SDavid Ahern 	fib6_info_release(from);
387a68886a6SDavid Ahern 	rcu_read_unlock();
388b3419363SDavid S. Miller }
389b3419363SDavid S. Miller 
3901da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3911da177e4SLinus Torvalds 			   int how)
3921da177e4SLinus Torvalds {
3931da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3941da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3955a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
396c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3971da177e4SLinus Torvalds 
398e5645f51SWei Wang 	if (idev && idev->dev != loopback_dev) {
399e5645f51SWei Wang 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
40038308473SDavid S. Miller 		if (loopback_idev) {
4011da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
4021da177e4SLinus Torvalds 			in6_dev_put(idev);
4031da177e4SLinus Torvalds 		}
4041da177e4SLinus Torvalds 	}
40597cac082SDavid S. Miller }
4061da177e4SLinus Torvalds 
4075973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt)
4085973fb1eSMartin KaFai Lau {
4095973fb1eSMartin KaFai Lau 	if (rt->rt6i_flags & RTF_EXPIRES)
4105973fb1eSMartin KaFai Lau 		return time_after(jiffies, rt->dst.expires);
4115973fb1eSMartin KaFai Lau 	else
4125973fb1eSMartin KaFai Lau 		return false;
4135973fb1eSMartin KaFai Lau }
4145973fb1eSMartin KaFai Lau 
415a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4161da177e4SLinus Torvalds {
417a68886a6SDavid Ahern 	struct fib6_info *from;
418a68886a6SDavid Ahern 
419a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
420a68886a6SDavid Ahern 
4211716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4221716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
423a50feda5SEric Dumazet 			return true;
424a68886a6SDavid Ahern 	} else if (from) {
4251e2ea8adSXin Long 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
426a68886a6SDavid Ahern 			fib6_check_expired(from);
4271716a961SGao feng 	}
428a50feda5SEric Dumazet 	return false;
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds 
431*b1d40991SDavid Ahern void fib6_select_path(const struct net *net, struct fib6_result *res,
432*b1d40991SDavid Ahern 		      struct flowi6 *fl6, int oif, bool have_oif_match,
433*b1d40991SDavid Ahern 		      const struct sk_buff *skb, int strict)
43451ebd318SNicolas Dichtel {
4358d1c802bSDavid Ahern 	struct fib6_info *sibling, *next_sibling;
436*b1d40991SDavid Ahern 	struct fib6_info *match = res->f6i;
437*b1d40991SDavid Ahern 
438*b1d40991SDavid Ahern 	if (!match->fib6_nsiblings || have_oif_match)
439*b1d40991SDavid Ahern 		goto out;
44051ebd318SNicolas Dichtel 
441b673d6ccSJakub Sitnicki 	/* We might have already computed the hash for ICMPv6 errors. In such
442b673d6ccSJakub Sitnicki 	 * case it will always be non-zero. Otherwise now is the time to do it.
443b673d6ccSJakub Sitnicki 	 */
444b673d6ccSJakub Sitnicki 	if (!fl6->mp_hash)
445b4bac172SDavid Ahern 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
446b673d6ccSJakub Sitnicki 
447ad1601aeSDavid Ahern 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
448*b1d40991SDavid Ahern 		goto out;
449bbfcd776SIdo Schimmel 
45093c2fb25SDavid Ahern 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
45193c2fb25SDavid Ahern 				 fib6_siblings) {
452702cea56SDavid Ahern 		const struct fib6_nh *nh = &sibling->fib6_nh;
4535e670d84SDavid Ahern 		int nh_upper_bound;
4545e670d84SDavid Ahern 
455702cea56SDavid Ahern 		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
4565e670d84SDavid Ahern 		if (fl6->mp_hash > nh_upper_bound)
4573d709f69SIdo Schimmel 			continue;
458702cea56SDavid Ahern 		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
45952bd4c0cSNicolas Dichtel 			break;
46051ebd318SNicolas Dichtel 		match = sibling;
46151ebd318SNicolas Dichtel 		break;
46251ebd318SNicolas Dichtel 	}
4633d709f69SIdo Schimmel 
464*b1d40991SDavid Ahern out:
465*b1d40991SDavid Ahern 	res->f6i = match;
466*b1d40991SDavid Ahern 	res->nh = &match->fib6_nh;
46751ebd318SNicolas Dichtel }
46851ebd318SNicolas Dichtel 
4691da177e4SLinus Torvalds /*
47066f5d6ceSWei Wang  *	Route lookup. rcu_read_lock() should be held.
4711da177e4SLinus Torvalds  */
4721da177e4SLinus Torvalds 
4730c59d006SDavid Ahern static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
4740c59d006SDavid Ahern 			       const struct in6_addr *saddr, int oif, int flags)
4750c59d006SDavid Ahern {
4760c59d006SDavid Ahern 	const struct net_device *dev;
4770c59d006SDavid Ahern 
4780c59d006SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD)
4790c59d006SDavid Ahern 		return false;
4800c59d006SDavid Ahern 
4810c59d006SDavid Ahern 	dev = nh->fib_nh_dev;
4820c59d006SDavid Ahern 	if (oif) {
4830c59d006SDavid Ahern 		if (dev->ifindex == oif)
4840c59d006SDavid Ahern 			return true;
4850c59d006SDavid Ahern 	} else {
4860c59d006SDavid Ahern 		if (ipv6_chk_addr(net, saddr, dev,
4870c59d006SDavid Ahern 				  flags & RT6_LOOKUP_F_IFACE))
4880c59d006SDavid Ahern 			return true;
4890c59d006SDavid Ahern 	}
4900c59d006SDavid Ahern 
4910c59d006SDavid Ahern 	return false;
4920c59d006SDavid Ahern }
4930c59d006SDavid Ahern 
4948d1c802bSDavid Ahern static inline struct fib6_info *rt6_device_match(struct net *net,
4958d1c802bSDavid Ahern 						 struct fib6_info *rt,
496b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4971da177e4SLinus Torvalds 						    int oif,
498d420895eSYOSHIFUJI Hideaki 						    int flags)
4991da177e4SLinus Torvalds {
5000c59d006SDavid Ahern 	const struct fib6_nh *nh;
5018d1c802bSDavid Ahern 	struct fib6_info *sprt;
5021da177e4SLinus Torvalds 
5035e670d84SDavid Ahern 	if (!oif && ipv6_addr_any(saddr) &&
504ad1601aeSDavid Ahern 	    !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
5058067bb8cSIdo Schimmel 		return rt;
506dd3abc4eSYOSHIFUJI Hideaki 
5078fb11a9aSDavid Ahern 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5080c59d006SDavid Ahern 		nh = &sprt->fib6_nh;
5090c59d006SDavid Ahern 		if (__rt6_device_match(net, nh, saddr, oif, flags))
5101da177e4SLinus Torvalds 			return sprt;
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
513eea68cd3SDavid Ahern 	if (oif && flags & RT6_LOOKUP_F_IFACE)
514421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
5151da177e4SLinus Torvalds 
516ad1601aeSDavid Ahern 	return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
5171da177e4SLinus Torvalds }
5181da177e4SLinus Torvalds 
51927097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
520c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
521c2f17e82SHannes Frederic Sowa 	struct work_struct work;
522c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
523c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
524c2f17e82SHannes Frederic Sowa };
525c2f17e82SHannes Frederic Sowa 
526c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
527c2f17e82SHannes Frederic Sowa {
528c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
529c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
530c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
531c2f17e82SHannes Frederic Sowa 
532c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
533adc176c5SErik Nordmark 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
534c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
535662f5533SMichael Büsch 	kfree(work);
536c2f17e82SHannes Frederic Sowa }
537c2f17e82SHannes Frederic Sowa 
538cc3a86c8SDavid Ahern static void rt6_probe(struct fib6_nh *fib6_nh)
53927097255SYOSHIFUJI Hideaki {
540f547fac6SSabrina Dubroca 	struct __rt6_probe_work *work = NULL;
5415e670d84SDavid Ahern 	const struct in6_addr *nh_gw;
542f2c31e32SEric Dumazet 	struct neighbour *neigh;
5435e670d84SDavid Ahern 	struct net_device *dev;
544f547fac6SSabrina Dubroca 	struct inet6_dev *idev;
5455e670d84SDavid Ahern 
54627097255SYOSHIFUJI Hideaki 	/*
54727097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
54827097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
54927097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
55027097255SYOSHIFUJI Hideaki 	 *
55127097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
55227097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
55327097255SYOSHIFUJI Hideaki 	 */
554cc3a86c8SDavid Ahern 	if (fib6_nh->fib_nh_gw_family)
555fdd6681dSAmerigo Wang 		return;
5565e670d84SDavid Ahern 
557cc3a86c8SDavid Ahern 	nh_gw = &fib6_nh->fib_nh_gw6;
558cc3a86c8SDavid Ahern 	dev = fib6_nh->fib_nh_dev;
5592152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
560f547fac6SSabrina Dubroca 	idev = __in6_dev_get(dev);
5615e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
5622152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5638d6c31bfSMartin KaFai Lau 		if (neigh->nud_state & NUD_VALID)
5648d6c31bfSMartin KaFai Lau 			goto out;
5658d6c31bfSMartin KaFai Lau 
5662152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
567990edb42SMartin KaFai Lau 		if (!(neigh->nud_state & NUD_VALID) &&
568990edb42SMartin KaFai Lau 		    time_after(jiffies,
569dcd1f572SDavid Ahern 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
570c2f17e82SHannes Frederic Sowa 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
571990edb42SMartin KaFai Lau 			if (work)
5727e980569SJiri Benc 				__neigh_set_probe_once(neigh);
573990edb42SMartin KaFai Lau 		}
574c2f17e82SHannes Frederic Sowa 		write_unlock(&neigh->lock);
575cc3a86c8SDavid Ahern 	} else if (time_after(jiffies, fib6_nh->last_probe +
576f547fac6SSabrina Dubroca 				       idev->cnf.rtr_probe_interval)) {
577990edb42SMartin KaFai Lau 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
578990edb42SMartin KaFai Lau 	}
579c2f17e82SHannes Frederic Sowa 
580c2f17e82SHannes Frederic Sowa 	if (work) {
581cc3a86c8SDavid Ahern 		fib6_nh->last_probe = jiffies;
582c2f17e82SHannes Frederic Sowa 		INIT_WORK(&work->work, rt6_probe_deferred);
5835e670d84SDavid Ahern 		work->target = *nh_gw;
5845e670d84SDavid Ahern 		dev_hold(dev);
5855e670d84SDavid Ahern 		work->dev = dev;
586c2f17e82SHannes Frederic Sowa 		schedule_work(&work->work);
587c2f17e82SHannes Frederic Sowa 	}
588990edb42SMartin KaFai Lau 
5898d6c31bfSMartin KaFai Lau out:
5902152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
591f2c31e32SEric Dumazet }
59227097255SYOSHIFUJI Hideaki #else
593cc3a86c8SDavid Ahern static inline void rt6_probe(struct fib6_nh *fib6_nh)
59427097255SYOSHIFUJI Hideaki {
59527097255SYOSHIFUJI Hideaki }
59627097255SYOSHIFUJI Hideaki #endif
59727097255SYOSHIFUJI Hideaki 
5981da177e4SLinus Torvalds /*
599554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
6001da177e4SLinus Torvalds  */
6011ba9a895SDavid Ahern static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
6021da177e4SLinus Torvalds {
603afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
6045e670d84SDavid Ahern 	struct neighbour *neigh;
605f2c31e32SEric Dumazet 
606145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
6071ba9a895SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
6081ba9a895SDavid Ahern 					  &fib6_nh->fib_nh_gw6);
609145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
610145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
611554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
612afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
613398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
614a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
615afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6167e980569SJiri Benc 		else
6177e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
618398bcbebSYOSHIFUJI Hideaki #endif
619145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
620afc154e9SHannes Frederic Sowa 	} else {
621afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6227e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
623a5a81f0bSPaul Marks 	}
624145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
625145a3621SYOSHIFUJI Hideaki / 吉藤英明 
626a5a81f0bSPaul Marks 	return ret;
6271da177e4SLinus Torvalds }
6281da177e4SLinus Torvalds 
629702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
630702cea56SDavid Ahern 			   int strict)
631554cfb7eSYOSHIFUJI Hideaki {
6326e1809a5SDavid Ahern 	int m = 0;
6334d0c5911SYOSHIFUJI Hideaki 
6346e1809a5SDavid Ahern 	if (!oif || nh->fib_nh_dev->ifindex == oif)
6356e1809a5SDavid Ahern 		m = 2;
6366e1809a5SDavid Ahern 
63777d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
638afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
639ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
640702cea56SDavid Ahern 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
641ebacaaa0SYOSHIFUJI Hideaki #endif
6421ba9a895SDavid Ahern 	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
643702cea56SDavid Ahern 	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
6441ba9a895SDavid Ahern 		int n = rt6_check_neigh(nh);
645afc154e9SHannes Frederic Sowa 		if (n < 0)
646afc154e9SHannes Frederic Sowa 			return n;
647afc154e9SHannes Frederic Sowa 	}
648554cfb7eSYOSHIFUJI Hideaki 	return m;
649554cfb7eSYOSHIFUJI Hideaki }
650554cfb7eSYOSHIFUJI Hideaki 
65128679ed1SDavid Ahern static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
65228679ed1SDavid Ahern 		       int oif, int strict, int *mpri, bool *do_rr)
653554cfb7eSYOSHIFUJI Hideaki {
654afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
65528679ed1SDavid Ahern 	bool rc = false;
65628679ed1SDavid Ahern 	int m;
65735103d11SAndy Gospodarek 
65828679ed1SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD)
6598067bb8cSIdo Schimmel 		goto out;
6608067bb8cSIdo Schimmel 
66128679ed1SDavid Ahern 	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
66228679ed1SDavid Ahern 	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
663d5d32e4bSDavid Ahern 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
66435103d11SAndy Gospodarek 		goto out;
665554cfb7eSYOSHIFUJI Hideaki 
66628679ed1SDavid Ahern 	m = rt6_score_route(nh, fib6_flags, oif, strict);
6677e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
668afc154e9SHannes Frederic Sowa 		match_do_rr = true;
669afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6707e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
671f11e6659SDavid S. Miller 		goto out;
6721da177e4SLinus Torvalds 	}
673f11e6659SDavid S. Miller 
674afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
67528679ed1SDavid Ahern 		rt6_probe(nh);
676afc154e9SHannes Frederic Sowa 
6777e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
678afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
679afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
680afc154e9SHannes Frederic Sowa 		*mpri = m;
68128679ed1SDavid Ahern 		rc = true;
682afc154e9SHannes Frederic Sowa 	}
683f11e6659SDavid S. Miller out:
68428679ed1SDavid Ahern 	return rc;
6851da177e4SLinus Torvalds }
6861da177e4SLinus Torvalds 
68730c15f03SDavid Ahern static void __find_rr_leaf(struct fib6_info *rt_start,
68830c15f03SDavid Ahern 			   struct fib6_info *nomatch, u32 metric,
68930c15f03SDavid Ahern 			   struct fib6_info **match, struct fib6_info **cont,
69030c15f03SDavid Ahern 			   int oif, int strict, bool *do_rr, int *mpri)
69130c15f03SDavid Ahern {
69230c15f03SDavid Ahern 	struct fib6_info *rt;
69330c15f03SDavid Ahern 
69430c15f03SDavid Ahern 	for (rt = rt_start;
69530c15f03SDavid Ahern 	     rt && rt != nomatch;
69630c15f03SDavid Ahern 	     rt = rcu_dereference(rt->fib6_next)) {
69730c15f03SDavid Ahern 		struct fib6_nh *nh;
69830c15f03SDavid Ahern 
69930c15f03SDavid Ahern 		if (cont && rt->fib6_metric != metric) {
70030c15f03SDavid Ahern 			*cont = rt;
70130c15f03SDavid Ahern 			return;
70230c15f03SDavid Ahern 		}
70330c15f03SDavid Ahern 
70430c15f03SDavid Ahern 		if (fib6_check_expired(rt))
70530c15f03SDavid Ahern 			continue;
70630c15f03SDavid Ahern 
70730c15f03SDavid Ahern 		nh = &rt->fib6_nh;
70830c15f03SDavid Ahern 		if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr))
70930c15f03SDavid Ahern 			*match = rt;
71030c15f03SDavid Ahern 	}
71130c15f03SDavid Ahern }
71230c15f03SDavid Ahern 
7138d1c802bSDavid Ahern static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
7148d1c802bSDavid Ahern 				      struct fib6_info *leaf,
7158d1c802bSDavid Ahern 				      struct fib6_info *rr_head,
716afc154e9SHannes Frederic Sowa 				      u32 metric, int oif, int strict,
717afc154e9SHannes Frederic Sowa 				      bool *do_rr)
718f11e6659SDavid S. Miller {
71930c15f03SDavid Ahern 	struct fib6_info *match = NULL, *cont = NULL;
720f11e6659SDavid S. Miller 	int mpri = -1;
721f11e6659SDavid S. Miller 
72230c15f03SDavid Ahern 	__find_rr_leaf(rr_head, NULL, metric, &match, &cont,
72330c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
7249fbdcfafSSteffen Klassert 
72530c15f03SDavid Ahern 	__find_rr_leaf(leaf, rr_head, metric, &match, &cont,
72630c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
7279fbdcfafSSteffen Klassert 
7289fbdcfafSSteffen Klassert 	if (match || !cont)
7299fbdcfafSSteffen Klassert 		return match;
7309fbdcfafSSteffen Klassert 
73130c15f03SDavid Ahern 	__find_rr_leaf(cont, NULL, metric, &match, NULL,
73230c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
733f11e6659SDavid S. Miller 
734f11e6659SDavid S. Miller 	return match;
735f11e6659SDavid S. Miller }
736f11e6659SDavid S. Miller 
7378d1c802bSDavid Ahern static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
7388d1040e8SWei Wang 				   int oif, int strict)
739f11e6659SDavid S. Miller {
7408d1c802bSDavid Ahern 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
7418d1c802bSDavid Ahern 	struct fib6_info *match, *rt0;
742afc154e9SHannes Frederic Sowa 	bool do_rr = false;
74317ecf590SWei Wang 	int key_plen;
744f11e6659SDavid S. Miller 
745421842edSDavid Ahern 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
746421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
7478d1040e8SWei Wang 
74866f5d6ceSWei Wang 	rt0 = rcu_dereference(fn->rr_ptr);
749f11e6659SDavid S. Miller 	if (!rt0)
75066f5d6ceSWei Wang 		rt0 = leaf;
751f11e6659SDavid S. Miller 
75217ecf590SWei Wang 	/* Double check to make sure fn is not an intermediate node
75317ecf590SWei Wang 	 * and fn->leaf does not points to its child's leaf
75417ecf590SWei Wang 	 * (This might happen if all routes under fn are deleted from
75517ecf590SWei Wang 	 * the tree and fib6_repair_tree() is called on the node.)
75617ecf590SWei Wang 	 */
75793c2fb25SDavid Ahern 	key_plen = rt0->fib6_dst.plen;
75817ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES
75993c2fb25SDavid Ahern 	if (rt0->fib6_src.plen)
76093c2fb25SDavid Ahern 		key_plen = rt0->fib6_src.plen;
76117ecf590SWei Wang #endif
76217ecf590SWei Wang 	if (fn->fn_bit != key_plen)
763421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
76417ecf590SWei Wang 
76593c2fb25SDavid Ahern 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
766afc154e9SHannes Frederic Sowa 			     &do_rr);
767f11e6659SDavid S. Miller 
768afc154e9SHannes Frederic Sowa 	if (do_rr) {
7698fb11a9aSDavid Ahern 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
770f11e6659SDavid S. Miller 
771554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
77293c2fb25SDavid Ahern 		if (!next || next->fib6_metric != rt0->fib6_metric)
7738d1040e8SWei Wang 			next = leaf;
774f11e6659SDavid S. Miller 
77566f5d6ceSWei Wang 		if (next != rt0) {
77693c2fb25SDavid Ahern 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
77766f5d6ceSWei Wang 			/* make sure next is not being deleted from the tree */
77893c2fb25SDavid Ahern 			if (next->fib6_node)
77966f5d6ceSWei Wang 				rcu_assign_pointer(fn->rr_ptr, next);
78093c2fb25SDavid Ahern 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
78166f5d6ceSWei Wang 		}
782554cfb7eSYOSHIFUJI Hideaki 	}
783554cfb7eSYOSHIFUJI Hideaki 
784421842edSDavid Ahern 	return match ? match : net->ipv6.fib6_null_entry;
7851da177e4SLinus Torvalds }
7861da177e4SLinus Torvalds 
7878d1c802bSDavid Ahern static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
7888b9df265SMartin KaFai Lau {
789bdf00467SDavid Ahern 	return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family;
7908b9df265SMartin KaFai Lau }
7918b9df265SMartin KaFai Lau 
79270ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
79370ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
794b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
79570ceb4f5SYOSHIFUJI Hideaki {
796c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
79770ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
79870ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
79970ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
8004bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
8018d1c802bSDavid Ahern 	struct fib6_info *rt;
80270ceb4f5SYOSHIFUJI Hideaki 
80370ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
80470ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
80570ceb4f5SYOSHIFUJI Hideaki 	}
80670ceb4f5SYOSHIFUJI Hideaki 
80770ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
80870ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
80970ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
81070ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
81170ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
81270ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
81370ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
81470ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
81570ceb4f5SYOSHIFUJI Hideaki 		}
81670ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
81770ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
81870ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
81970ceb4f5SYOSHIFUJI Hideaki 		}
82070ceb4f5SYOSHIFUJI Hideaki 	}
82170ceb4f5SYOSHIFUJI Hideaki 
82270ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
82370ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
8243933fc95SJens Rosenboom 		return -EINVAL;
82570ceb4f5SYOSHIFUJI Hideaki 
8264bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
82770ceb4f5SYOSHIFUJI Hideaki 
82870ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
82970ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
83070ceb4f5SYOSHIFUJI Hideaki 	else {
83170ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
83270ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
83370ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
83470ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
83570ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
83670ceb4f5SYOSHIFUJI Hideaki 	}
83770ceb4f5SYOSHIFUJI Hideaki 
838f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
839afb1d4b5SDavid Ahern 		rt = rt6_get_dflt_router(net, gwaddr, dev);
840f104a567SDuan Jiong 	else
841f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
842830218c1SDavid Ahern 					gwaddr, dev);
84370ceb4f5SYOSHIFUJI Hideaki 
84470ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
845afb1d4b5SDavid Ahern 		ip6_del_rt(net, rt);
84670ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
84770ceb4f5SYOSHIFUJI Hideaki 	}
84870ceb4f5SYOSHIFUJI Hideaki 
84970ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
850830218c1SDavid Ahern 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
851830218c1SDavid Ahern 					dev, pref);
85270ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
85393c2fb25SDavid Ahern 		rt->fib6_flags = RTF_ROUTEINFO |
85493c2fb25SDavid Ahern 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
85570ceb4f5SYOSHIFUJI Hideaki 
85670ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8571716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
85814895687SDavid Ahern 			fib6_clean_expires(rt);
8591716a961SGao feng 		else
86014895687SDavid Ahern 			fib6_set_expires(rt, jiffies + HZ * lifetime);
8611716a961SGao feng 
86293531c67SDavid Ahern 		fib6_info_release(rt);
86370ceb4f5SYOSHIFUJI Hideaki 	}
86470ceb4f5SYOSHIFUJI Hideaki 	return 0;
86570ceb4f5SYOSHIFUJI Hideaki }
86670ceb4f5SYOSHIFUJI Hideaki #endif
86770ceb4f5SYOSHIFUJI Hideaki 
868ae90d867SDavid Ahern /*
869ae90d867SDavid Ahern  *	Misc support functions
870ae90d867SDavid Ahern  */
871ae90d867SDavid Ahern 
872ae90d867SDavid Ahern /* called with rcu_lock held */
8738d1c802bSDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
874ae90d867SDavid Ahern {
875ad1601aeSDavid Ahern 	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
876ae90d867SDavid Ahern 
87793c2fb25SDavid Ahern 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
878ae90d867SDavid Ahern 		/* for copies of local routes, dst->dev needs to be the
879ae90d867SDavid Ahern 		 * device if it is a master device, the master device if
880ae90d867SDavid Ahern 		 * device is enslaved, and the loopback as the default
881ae90d867SDavid Ahern 		 */
882ae90d867SDavid Ahern 		if (netif_is_l3_slave(dev) &&
88393c2fb25SDavid Ahern 		    !rt6_need_strict(&rt->fib6_dst.addr))
884ae90d867SDavid Ahern 			dev = l3mdev_master_dev_rcu(dev);
885ae90d867SDavid Ahern 		else if (!netif_is_l3_master(dev))
886ae90d867SDavid Ahern 			dev = dev_net(dev)->loopback_dev;
887ae90d867SDavid Ahern 		/* last case is netif_is_l3_master(dev) is true in which
888ae90d867SDavid Ahern 		 * case we want dev returned to be dev
889ae90d867SDavid Ahern 		 */
890ae90d867SDavid Ahern 	}
891ae90d867SDavid Ahern 
892ae90d867SDavid Ahern 	return dev;
893ae90d867SDavid Ahern }
894ae90d867SDavid Ahern 
8956edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = {
8966edb3c96SDavid Ahern 	[RTN_UNSPEC]	= 0,
8976edb3c96SDavid Ahern 	[RTN_UNICAST]	= 0,
8986edb3c96SDavid Ahern 	[RTN_LOCAL]	= 0,
8996edb3c96SDavid Ahern 	[RTN_BROADCAST]	= 0,
9006edb3c96SDavid Ahern 	[RTN_ANYCAST]	= 0,
9016edb3c96SDavid Ahern 	[RTN_MULTICAST]	= 0,
9026edb3c96SDavid Ahern 	[RTN_BLACKHOLE]	= -EINVAL,
9036edb3c96SDavid Ahern 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
9046edb3c96SDavid Ahern 	[RTN_PROHIBIT]	= -EACCES,
9056edb3c96SDavid Ahern 	[RTN_THROW]	= -EAGAIN,
9066edb3c96SDavid Ahern 	[RTN_NAT]	= -EINVAL,
9076edb3c96SDavid Ahern 	[RTN_XRESOLVE]	= -EINVAL,
9086edb3c96SDavid Ahern };
9096edb3c96SDavid Ahern 
9106edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type)
9116edb3c96SDavid Ahern {
9126edb3c96SDavid Ahern 	return fib6_prop[fib6_type];
9136edb3c96SDavid Ahern }
9146edb3c96SDavid Ahern 
9158d1c802bSDavid Ahern static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
9163b6761d1SDavid Ahern {
9173b6761d1SDavid Ahern 	unsigned short flags = 0;
9183b6761d1SDavid Ahern 
9193b6761d1SDavid Ahern 	if (rt->dst_nocount)
9203b6761d1SDavid Ahern 		flags |= DST_NOCOUNT;
9213b6761d1SDavid Ahern 	if (rt->dst_nopolicy)
9223b6761d1SDavid Ahern 		flags |= DST_NOPOLICY;
9233b6761d1SDavid Ahern 	if (rt->dst_host)
9243b6761d1SDavid Ahern 		flags |= DST_HOST;
9253b6761d1SDavid Ahern 
9263b6761d1SDavid Ahern 	return flags;
9273b6761d1SDavid Ahern }
9283b6761d1SDavid Ahern 
9298d1c802bSDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
9306edb3c96SDavid Ahern {
9316edb3c96SDavid Ahern 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
9326edb3c96SDavid Ahern 
9336edb3c96SDavid Ahern 	switch (ort->fib6_type) {
9346edb3c96SDavid Ahern 	case RTN_BLACKHOLE:
9356edb3c96SDavid Ahern 		rt->dst.output = dst_discard_out;
9366edb3c96SDavid Ahern 		rt->dst.input = dst_discard;
9376edb3c96SDavid Ahern 		break;
9386edb3c96SDavid Ahern 	case RTN_PROHIBIT:
9396edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_prohibit_out;
9406edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_prohibit;
9416edb3c96SDavid Ahern 		break;
9426edb3c96SDavid Ahern 	case RTN_THROW:
9436edb3c96SDavid Ahern 	case RTN_UNREACHABLE:
9446edb3c96SDavid Ahern 	default:
9456edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_discard_out;
9466edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_discard;
9476edb3c96SDavid Ahern 		break;
9486edb3c96SDavid Ahern 	}
9496edb3c96SDavid Ahern }
9506edb3c96SDavid Ahern 
9518d1c802bSDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
9526edb3c96SDavid Ahern {
95393c2fb25SDavid Ahern 	if (ort->fib6_flags & RTF_REJECT) {
9546edb3c96SDavid Ahern 		ip6_rt_init_dst_reject(rt, ort);
9556edb3c96SDavid Ahern 		return;
9566edb3c96SDavid Ahern 	}
9576edb3c96SDavid Ahern 
9586edb3c96SDavid Ahern 	rt->dst.error = 0;
9596edb3c96SDavid Ahern 	rt->dst.output = ip6_output;
9606edb3c96SDavid Ahern 
961d23c4b63SHangbin Liu 	if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
9626edb3c96SDavid Ahern 		rt->dst.input = ip6_input;
96393c2fb25SDavid Ahern 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
9646edb3c96SDavid Ahern 		rt->dst.input = ip6_mc_input;
9656edb3c96SDavid Ahern 	} else {
9666edb3c96SDavid Ahern 		rt->dst.input = ip6_forward;
9676edb3c96SDavid Ahern 	}
9686edb3c96SDavid Ahern 
969ad1601aeSDavid Ahern 	if (ort->fib6_nh.fib_nh_lws) {
970ad1601aeSDavid Ahern 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
9716edb3c96SDavid Ahern 		lwtunnel_set_redirect(&rt->dst);
9726edb3c96SDavid Ahern 	}
9736edb3c96SDavid Ahern 
9746edb3c96SDavid Ahern 	rt->dst.lastuse = jiffies;
9756edb3c96SDavid Ahern }
9766edb3c96SDavid Ahern 
977e873e4b9SWei Wang /* Caller must already hold reference to @from */
9788d1c802bSDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
979ae90d867SDavid Ahern {
980ae90d867SDavid Ahern 	rt->rt6i_flags &= ~RTF_EXPIRES;
981a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, from);
982e1255ed4SDavid Ahern 	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
983ae90d867SDavid Ahern }
984ae90d867SDavid Ahern 
985e873e4b9SWei Wang /* Caller must already hold reference to @ort */
9868d1c802bSDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
987ae90d867SDavid Ahern {
988dcd1f572SDavid Ahern 	struct net_device *dev = fib6_info_nh_dev(ort);
989dcd1f572SDavid Ahern 
9906edb3c96SDavid Ahern 	ip6_rt_init_dst(rt, ort);
9916edb3c96SDavid Ahern 
99293c2fb25SDavid Ahern 	rt->rt6i_dst = ort->fib6_dst;
993dcd1f572SDavid Ahern 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
99493c2fb25SDavid Ahern 	rt->rt6i_flags = ort->fib6_flags;
995bdf00467SDavid Ahern 	if (ort->fib6_nh.fib_nh_gw_family) {
996ad1601aeSDavid Ahern 		rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
9972b2450caSDavid Ahern 		rt->rt6i_flags |= RTF_GATEWAY;
9982b2450caSDavid Ahern 	}
999ae90d867SDavid Ahern 	rt6_set_from(rt, ort);
1000ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
100193c2fb25SDavid Ahern 	rt->rt6i_src = ort->fib6_src;
1002ae90d867SDavid Ahern #endif
1003ae90d867SDavid Ahern }
1004ae90d867SDavid Ahern 
1005a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1006a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
1007a3c00e46SMartin KaFai Lau {
100866f5d6ceSWei Wang 	struct fib6_node *pn, *sn;
1009a3c00e46SMartin KaFai Lau 	while (1) {
1010a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
1011a3c00e46SMartin KaFai Lau 			return NULL;
101266f5d6ceSWei Wang 		pn = rcu_dereference(fn->parent);
101366f5d6ceSWei Wang 		sn = FIB6_SUBTREE(pn);
101466f5d6ceSWei Wang 		if (sn && sn != fn)
10156454743bSDavid Ahern 			fn = fib6_node_lookup(sn, NULL, saddr);
1016a3c00e46SMartin KaFai Lau 		else
1017a3c00e46SMartin KaFai Lau 			fn = pn;
1018a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
1019a3c00e46SMartin KaFai Lau 			return fn;
1020a3c00e46SMartin KaFai Lau 	}
1021a3c00e46SMartin KaFai Lau }
1022c71099acSThomas Graf 
102310585b43SDavid Ahern static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1024d3843fe5SWei Wang {
1025d3843fe5SWei Wang 	struct rt6_info *rt = *prt;
1026d3843fe5SWei Wang 
1027d3843fe5SWei Wang 	if (dst_hold_safe(&rt->dst))
1028d3843fe5SWei Wang 		return true;
102910585b43SDavid Ahern 	if (net) {
1030d3843fe5SWei Wang 		rt = net->ipv6.ip6_null_entry;
1031d3843fe5SWei Wang 		dst_hold(&rt->dst);
1032d3843fe5SWei Wang 	} else {
1033d3843fe5SWei Wang 		rt = NULL;
1034d3843fe5SWei Wang 	}
1035d3843fe5SWei Wang 	*prt = rt;
1036d3843fe5SWei Wang 	return false;
1037d3843fe5SWei Wang }
1038d3843fe5SWei Wang 
1039dec9b0e2SDavid Ahern /* called with rcu_lock held */
10408d1c802bSDavid Ahern static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1041dec9b0e2SDavid Ahern {
10423b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
1043ad1601aeSDavid Ahern 	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
1044dec9b0e2SDavid Ahern 	struct rt6_info *nrt;
1045dec9b0e2SDavid Ahern 
1046e873e4b9SWei Wang 	if (!fib6_info_hold_safe(rt))
10471c87e79aSXin Long 		goto fallback;
1048e873e4b9SWei Wang 
104993531c67SDavid Ahern 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
10501c87e79aSXin Long 	if (!nrt) {
1051e873e4b9SWei Wang 		fib6_info_release(rt);
10521c87e79aSXin Long 		goto fallback;
10531c87e79aSXin Long 	}
1054dec9b0e2SDavid Ahern 
10551c87e79aSXin Long 	ip6_rt_copy_init(nrt, rt);
10561c87e79aSXin Long 	return nrt;
10571c87e79aSXin Long 
10581c87e79aSXin Long fallback:
10591c87e79aSXin Long 	nrt = dev_net(dev)->ipv6.ip6_null_entry;
10601c87e79aSXin Long 	dst_hold(&nrt->dst);
1061dec9b0e2SDavid Ahern 	return nrt;
1062dec9b0e2SDavid Ahern }
1063dec9b0e2SDavid Ahern 
10648ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
10658ed67789SDaniel Lezcano 					     struct fib6_table *table,
1066b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
1067b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
1068b75cc8f9SDavid Ahern 					     int flags)
10691da177e4SLinus Torvalds {
1070*b1d40991SDavid Ahern 	struct fib6_result res = {};
10711da177e4SLinus Torvalds 	struct fib6_node *fn;
107223fb93a4SDavid Ahern 	struct rt6_info *rt;
10731da177e4SLinus Torvalds 
1074b6cdbc85SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1075b6cdbc85SDavid Ahern 		flags &= ~RT6_LOOKUP_F_IFACE;
1076b6cdbc85SDavid Ahern 
107766f5d6ceSWei Wang 	rcu_read_lock();
10786454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1079c71099acSThomas Graf restart:
1080*b1d40991SDavid Ahern 	res.f6i = rcu_dereference(fn->leaf);
1081*b1d40991SDavid Ahern 	if (!res.f6i)
1082*b1d40991SDavid Ahern 		res.f6i = net->ipv6.fib6_null_entry;
1083af52a52cSDavid Ahern 	else
1084*b1d40991SDavid Ahern 		res.f6i = rt6_device_match(net, res.f6i, &fl6->saddr,
108566f5d6ceSWei Wang 					   fl6->flowi6_oif, flags);
1086af52a52cSDavid Ahern 
1087*b1d40991SDavid Ahern 	if (res.f6i == net->ipv6.fib6_null_entry) {
1088a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1089a3c00e46SMartin KaFai Lau 		if (fn)
1090a3c00e46SMartin KaFai Lau 			goto restart;
1091af52a52cSDavid Ahern 
1092af52a52cSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
1093af52a52cSDavid Ahern 		dst_hold(&rt->dst);
1094af52a52cSDavid Ahern 		goto out;
1095a3c00e46SMartin KaFai Lau 	}
10962b760fcfSWei Wang 
1097*b1d40991SDavid Ahern 	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1098*b1d40991SDavid Ahern 			 fl6->flowi6_oif != 0, skb, flags);
1099*b1d40991SDavid Ahern 
11004c9483b2SDavid S. Miller 	/* Search through exception table */
1101*b1d40991SDavid Ahern 	rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr);
110223fb93a4SDavid Ahern 	if (rt) {
110310585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
1104d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
110523fb93a4SDavid Ahern 	} else {
1106*b1d40991SDavid Ahern 		rt = ip6_create_rt_rcu(res.f6i);
1107dec9b0e2SDavid Ahern 	}
1108d3843fe5SWei Wang 
1109af52a52cSDavid Ahern out:
1110*b1d40991SDavid Ahern 	trace_fib6_table_lookup(net, res.f6i, table, fl6);
1111af52a52cSDavid Ahern 
111266f5d6ceSWei Wang 	rcu_read_unlock();
1113b811580dSDavid Ahern 
11141da177e4SLinus Torvalds 	return rt;
1115c71099acSThomas Graf }
1116c71099acSThomas Graf 
1117ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1118b75cc8f9SDavid Ahern 				   const struct sk_buff *skb, int flags)
1119ea6e574eSFlorian Westphal {
1120b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1121ea6e574eSFlorian Westphal }
1122ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
1123ea6e574eSFlorian Westphal 
11249acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1125b75cc8f9SDavid Ahern 			    const struct in6_addr *saddr, int oif,
1126b75cc8f9SDavid Ahern 			    const struct sk_buff *skb, int strict)
1127c71099acSThomas Graf {
11284c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11294c9483b2SDavid S. Miller 		.flowi6_oif = oif,
11304c9483b2SDavid S. Miller 		.daddr = *daddr,
1131c71099acSThomas Graf 	};
1132c71099acSThomas Graf 	struct dst_entry *dst;
113377d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1134c71099acSThomas Graf 
1135adaa70bbSThomas Graf 	if (saddr) {
11364c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1137adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1138adaa70bbSThomas Graf 	}
1139adaa70bbSThomas Graf 
1140b75cc8f9SDavid Ahern 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1141c71099acSThomas Graf 	if (dst->error == 0)
1142c71099acSThomas Graf 		return (struct rt6_info *) dst;
1143c71099acSThomas Graf 
1144c71099acSThomas Graf 	dst_release(dst);
1145c71099acSThomas Graf 
11461da177e4SLinus Torvalds 	return NULL;
11471da177e4SLinus Torvalds }
11487159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
11497159039aSYOSHIFUJI Hideaki 
1150c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
11511cfb71eeSWei Wang  * It takes new route entry, the addition fails by any reason the
11521cfb71eeSWei Wang  * route is released.
11531cfb71eeSWei Wang  * Caller must hold dst before calling it.
11541da177e4SLinus Torvalds  */
11551da177e4SLinus Torvalds 
11568d1c802bSDavid Ahern static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1157333c4301SDavid Ahern 			struct netlink_ext_ack *extack)
11581da177e4SLinus Torvalds {
11591da177e4SLinus Torvalds 	int err;
1160c71099acSThomas Graf 	struct fib6_table *table;
11611da177e4SLinus Torvalds 
116293c2fb25SDavid Ahern 	table = rt->fib6_table;
116366f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
1164d4ead6b3SDavid Ahern 	err = fib6_add(&table->tb6_root, rt, info, extack);
116566f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
11661da177e4SLinus Torvalds 
11671da177e4SLinus Torvalds 	return err;
11681da177e4SLinus Torvalds }
11691da177e4SLinus Torvalds 
11708d1c802bSDavid Ahern int ip6_ins_rt(struct net *net, struct fib6_info *rt)
117140e22e8fSThomas Graf {
1172afb1d4b5SDavid Ahern 	struct nl_info info = {	.nl_net = net, };
1173e715b6d3SFlorian Westphal 
1174d4ead6b3SDavid Ahern 	return __ip6_ins_rt(rt, &info, NULL);
117540e22e8fSThomas Graf }
117640e22e8fSThomas Graf 
11778d1c802bSDavid Ahern static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
117821efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
1179b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
11801da177e4SLinus Torvalds {
11814832c30dSDavid Ahern 	struct net_device *dev;
11821da177e4SLinus Torvalds 	struct rt6_info *rt;
11831da177e4SLinus Torvalds 
11841da177e4SLinus Torvalds 	/*
11851da177e4SLinus Torvalds 	 *	Clone the route.
11861da177e4SLinus Torvalds 	 */
11871da177e4SLinus Torvalds 
1188e873e4b9SWei Wang 	if (!fib6_info_hold_safe(ort))
1189e873e4b9SWei Wang 		return NULL;
1190e873e4b9SWei Wang 
11914832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(ort);
119293531c67SDavid Ahern 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1193e873e4b9SWei Wang 	if (!rt) {
1194e873e4b9SWei Wang 		fib6_info_release(ort);
119583a09abdSMartin KaFai Lau 		return NULL;
1196e873e4b9SWei Wang 	}
119783a09abdSMartin KaFai Lau 
119883a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
11998b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
120083a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
120183a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
120283a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
12038b9df265SMartin KaFai Lau 
12048b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
120593c2fb25SDavid Ahern 		if (ort->fib6_dst.plen != 128 &&
120693c2fb25SDavid Ahern 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
120758c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
12081da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
12091da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
12104e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
12111da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
12121da177e4SLinus Torvalds 		}
12131da177e4SLinus Torvalds #endif
121495a9a5baSYOSHIFUJI Hideaki 	}
121595a9a5baSYOSHIFUJI Hideaki 
1216299d9939SYOSHIFUJI Hideaki 	return rt;
1217299d9939SYOSHIFUJI Hideaki }
1218299d9939SYOSHIFUJI Hideaki 
12198d1c802bSDavid Ahern static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1220d52d3997SMartin KaFai Lau {
12213b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
12224832c30dSDavid Ahern 	struct net_device *dev;
1223d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
1224d52d3997SMartin KaFai Lau 
1225e873e4b9SWei Wang 	if (!fib6_info_hold_safe(rt))
1226e873e4b9SWei Wang 		return NULL;
1227e873e4b9SWei Wang 
12284832c30dSDavid Ahern 	rcu_read_lock();
12294832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(rt);
123093531c67SDavid Ahern 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
12314832c30dSDavid Ahern 	rcu_read_unlock();
1232e873e4b9SWei Wang 	if (!pcpu_rt) {
1233e873e4b9SWei Wang 		fib6_info_release(rt);
1234d52d3997SMartin KaFai Lau 		return NULL;
1235e873e4b9SWei Wang 	}
1236d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
1237d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1238d52d3997SMartin KaFai Lau 	return pcpu_rt;
1239d52d3997SMartin KaFai Lau }
1240d52d3997SMartin KaFai Lau 
124166f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */
12428d1c802bSDavid Ahern static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1243d52d3997SMartin KaFai Lau {
1244a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
1245d52d3997SMartin KaFai Lau 
1246d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1247d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1248d52d3997SMartin KaFai Lau 
1249d4ead6b3SDavid Ahern 	if (pcpu_rt)
125010585b43SDavid Ahern 		ip6_hold_safe(NULL, &pcpu_rt);
1251d3843fe5SWei Wang 
1252a73e4195SMartin KaFai Lau 	return pcpu_rt;
1253a73e4195SMartin KaFai Lau }
1254a73e4195SMartin KaFai Lau 
1255afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net,
12568d1c802bSDavid Ahern 					    struct fib6_info *rt)
1257a73e4195SMartin KaFai Lau {
1258a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1259d52d3997SMartin KaFai Lau 
1260d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1261d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
12629c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
12639c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1264d52d3997SMartin KaFai Lau 	}
1265d52d3997SMartin KaFai Lau 
1266a94b9367SWei Wang 	dst_hold(&pcpu_rt->dst);
1267a73e4195SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1268d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1269951f788aSEric Dumazet 	BUG_ON(prev);
1270a94b9367SWei Wang 
1271d52d3997SMartin KaFai Lau 	return pcpu_rt;
1272d52d3997SMartin KaFai Lau }
1273d52d3997SMartin KaFai Lau 
127435732d01SWei Wang /* exception hash table implementation
127535732d01SWei Wang  */
127635732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock);
127735732d01SWei Wang 
127835732d01SWei Wang /* Remove rt6_ex from hash table and free the memory
127935732d01SWei Wang  * Caller must hold rt6_exception_lock
128035732d01SWei Wang  */
128135732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
128235732d01SWei Wang 				 struct rt6_exception *rt6_ex)
128335732d01SWei Wang {
1284f5b51fe8SPaolo Abeni 	struct fib6_info *from;
1285b2427e67SColin Ian King 	struct net *net;
128681eb8447SWei Wang 
128735732d01SWei Wang 	if (!bucket || !rt6_ex)
128835732d01SWei Wang 		return;
1289b2427e67SColin Ian King 
1290b2427e67SColin Ian King 	net = dev_net(rt6_ex->rt6i->dst.dev);
1291f5b51fe8SPaolo Abeni 	net->ipv6.rt6_stats->fib_rt_cache--;
1292f5b51fe8SPaolo Abeni 
1293f5b51fe8SPaolo Abeni 	/* purge completely the exception to allow releasing the held resources:
1294f5b51fe8SPaolo Abeni 	 * some [sk] cache may keep the dst around for unlimited time
1295f5b51fe8SPaolo Abeni 	 */
1296f5b51fe8SPaolo Abeni 	from = rcu_dereference_protected(rt6_ex->rt6i->from,
1297f5b51fe8SPaolo Abeni 					 lockdep_is_held(&rt6_exception_lock));
1298f5b51fe8SPaolo Abeni 	rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1299f5b51fe8SPaolo Abeni 	fib6_info_release(from);
1300f5b51fe8SPaolo Abeni 	dst_dev_put(&rt6_ex->rt6i->dst);
1301f5b51fe8SPaolo Abeni 
130235732d01SWei Wang 	hlist_del_rcu(&rt6_ex->hlist);
130377634cc6SDavid Ahern 	dst_release(&rt6_ex->rt6i->dst);
130435732d01SWei Wang 	kfree_rcu(rt6_ex, rcu);
130535732d01SWei Wang 	WARN_ON_ONCE(!bucket->depth);
130635732d01SWei Wang 	bucket->depth--;
130735732d01SWei Wang }
130835732d01SWei Wang 
130935732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory
131035732d01SWei Wang  * Caller must hold rt6_exception_lock
131135732d01SWei Wang  */
131235732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
131335732d01SWei Wang {
131435732d01SWei Wang 	struct rt6_exception *rt6_ex, *oldest = NULL;
131535732d01SWei Wang 
131635732d01SWei Wang 	if (!bucket)
131735732d01SWei Wang 		return;
131835732d01SWei Wang 
131935732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
132035732d01SWei Wang 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
132135732d01SWei Wang 			oldest = rt6_ex;
132235732d01SWei Wang 	}
132335732d01SWei Wang 	rt6_remove_exception(bucket, oldest);
132435732d01SWei Wang }
132535732d01SWei Wang 
132635732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst,
132735732d01SWei Wang 			      const struct in6_addr *src)
132835732d01SWei Wang {
132935732d01SWei Wang 	static u32 seed __read_mostly;
133035732d01SWei Wang 	u32 val;
133135732d01SWei Wang 
133235732d01SWei Wang 	net_get_random_once(&seed, sizeof(seed));
133335732d01SWei Wang 	val = jhash(dst, sizeof(*dst), seed);
133435732d01SWei Wang 
133535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
133635732d01SWei Wang 	if (src)
133735732d01SWei Wang 		val = jhash(src, sizeof(*src), val);
133835732d01SWei Wang #endif
133935732d01SWei Wang 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
134035732d01SWei Wang }
134135732d01SWei Wang 
134235732d01SWei Wang /* Helper function to find the cached rt in the hash table
134335732d01SWei Wang  * and update bucket pointer to point to the bucket for this
134435732d01SWei Wang  * (daddr, saddr) pair
134535732d01SWei Wang  * Caller must hold rt6_exception_lock
134635732d01SWei Wang  */
134735732d01SWei Wang static struct rt6_exception *
134835732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
134935732d01SWei Wang 			      const struct in6_addr *daddr,
135035732d01SWei Wang 			      const struct in6_addr *saddr)
135135732d01SWei Wang {
135235732d01SWei Wang 	struct rt6_exception *rt6_ex;
135335732d01SWei Wang 	u32 hval;
135435732d01SWei Wang 
135535732d01SWei Wang 	if (!(*bucket) || !daddr)
135635732d01SWei Wang 		return NULL;
135735732d01SWei Wang 
135835732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
135935732d01SWei Wang 	*bucket += hval;
136035732d01SWei Wang 
136135732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
136235732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
136335732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
136435732d01SWei Wang 
136535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
136635732d01SWei Wang 		if (matched && saddr)
136735732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
136835732d01SWei Wang #endif
136935732d01SWei Wang 		if (matched)
137035732d01SWei Wang 			return rt6_ex;
137135732d01SWei Wang 	}
137235732d01SWei Wang 	return NULL;
137335732d01SWei Wang }
137435732d01SWei Wang 
137535732d01SWei Wang /* Helper function to find the cached rt in the hash table
137635732d01SWei Wang  * and update bucket pointer to point to the bucket for this
137735732d01SWei Wang  * (daddr, saddr) pair
137835732d01SWei Wang  * Caller must hold rcu_read_lock()
137935732d01SWei Wang  */
138035732d01SWei Wang static struct rt6_exception *
138135732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
138235732d01SWei Wang 			 const struct in6_addr *daddr,
138335732d01SWei Wang 			 const struct in6_addr *saddr)
138435732d01SWei Wang {
138535732d01SWei Wang 	struct rt6_exception *rt6_ex;
138635732d01SWei Wang 	u32 hval;
138735732d01SWei Wang 
138835732d01SWei Wang 	WARN_ON_ONCE(!rcu_read_lock_held());
138935732d01SWei Wang 
139035732d01SWei Wang 	if (!(*bucket) || !daddr)
139135732d01SWei Wang 		return NULL;
139235732d01SWei Wang 
139335732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
139435732d01SWei Wang 	*bucket += hval;
139535732d01SWei Wang 
139635732d01SWei Wang 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
139735732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
139835732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
139935732d01SWei Wang 
140035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
140135732d01SWei Wang 		if (matched && saddr)
140235732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
140335732d01SWei Wang #endif
140435732d01SWei Wang 		if (matched)
140535732d01SWei Wang 			return rt6_ex;
140635732d01SWei Wang 	}
140735732d01SWei Wang 	return NULL;
140835732d01SWei Wang }
140935732d01SWei Wang 
14108d1c802bSDavid Ahern static unsigned int fib6_mtu(const struct fib6_info *rt)
141135732d01SWei Wang {
1412d4ead6b3SDavid Ahern 	unsigned int mtu;
1413d4ead6b3SDavid Ahern 
1414dcd1f572SDavid Ahern 	if (rt->fib6_pmtu) {
1415dcd1f572SDavid Ahern 		mtu = rt->fib6_pmtu;
1416dcd1f572SDavid Ahern 	} else {
1417dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
1418dcd1f572SDavid Ahern 		struct inet6_dev *idev;
1419dcd1f572SDavid Ahern 
1420dcd1f572SDavid Ahern 		rcu_read_lock();
1421dcd1f572SDavid Ahern 		idev = __in6_dev_get(dev);
1422dcd1f572SDavid Ahern 		mtu = idev->cnf.mtu6;
1423dcd1f572SDavid Ahern 		rcu_read_unlock();
1424dcd1f572SDavid Ahern 	}
1425dcd1f572SDavid Ahern 
1426d4ead6b3SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1427d4ead6b3SDavid Ahern 
1428ad1601aeSDavid Ahern 	return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
1429d4ead6b3SDavid Ahern }
1430d4ead6b3SDavid Ahern 
143135732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt,
14328d1c802bSDavid Ahern 				struct fib6_info *ort)
143335732d01SWei Wang {
14345e670d84SDavid Ahern 	struct net *net = dev_net(nrt->dst.dev);
143535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
143635732d01SWei Wang 	struct in6_addr *src_key = NULL;
143735732d01SWei Wang 	struct rt6_exception *rt6_ex;
143835732d01SWei Wang 	int err = 0;
143935732d01SWei Wang 
144035732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
144135732d01SWei Wang 
144235732d01SWei Wang 	if (ort->exception_bucket_flushed) {
144335732d01SWei Wang 		err = -EINVAL;
144435732d01SWei Wang 		goto out;
144535732d01SWei Wang 	}
144635732d01SWei Wang 
144735732d01SWei Wang 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
144835732d01SWei Wang 					lockdep_is_held(&rt6_exception_lock));
144935732d01SWei Wang 	if (!bucket) {
145035732d01SWei Wang 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
145135732d01SWei Wang 				 GFP_ATOMIC);
145235732d01SWei Wang 		if (!bucket) {
145335732d01SWei Wang 			err = -ENOMEM;
145435732d01SWei Wang 			goto out;
145535732d01SWei Wang 		}
145635732d01SWei Wang 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
145735732d01SWei Wang 	}
145835732d01SWei Wang 
145935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
146035732d01SWei Wang 	/* rt6i_src.plen != 0 indicates ort is in subtree
146135732d01SWei Wang 	 * and exception table is indexed by a hash of
146235732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
146335732d01SWei Wang 	 * Otherwise, the exception table is indexed by
146435732d01SWei Wang 	 * a hash of only rt6i_dst.
146535732d01SWei Wang 	 */
146693c2fb25SDavid Ahern 	if (ort->fib6_src.plen)
146735732d01SWei Wang 		src_key = &nrt->rt6i_src.addr;
146835732d01SWei Wang #endif
1469f5bbe7eeSWei Wang 	/* rt6_mtu_change() might lower mtu on ort.
1470f5bbe7eeSWei Wang 	 * Only insert this exception route if its mtu
1471f5bbe7eeSWei Wang 	 * is less than ort's mtu value.
1472f5bbe7eeSWei Wang 	 */
1473d4ead6b3SDavid Ahern 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1474f5bbe7eeSWei Wang 		err = -EINVAL;
1475f5bbe7eeSWei Wang 		goto out;
1476f5bbe7eeSWei Wang 	}
147760006a48SWei Wang 
147835732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
147935732d01SWei Wang 					       src_key);
148035732d01SWei Wang 	if (rt6_ex)
148135732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
148235732d01SWei Wang 
148335732d01SWei Wang 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
148435732d01SWei Wang 	if (!rt6_ex) {
148535732d01SWei Wang 		err = -ENOMEM;
148635732d01SWei Wang 		goto out;
148735732d01SWei Wang 	}
148835732d01SWei Wang 	rt6_ex->rt6i = nrt;
148935732d01SWei Wang 	rt6_ex->stamp = jiffies;
149035732d01SWei Wang 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
149135732d01SWei Wang 	bucket->depth++;
149281eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache++;
149335732d01SWei Wang 
149435732d01SWei Wang 	if (bucket->depth > FIB6_MAX_DEPTH)
149535732d01SWei Wang 		rt6_exception_remove_oldest(bucket);
149635732d01SWei Wang 
149735732d01SWei Wang out:
149835732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
149935732d01SWei Wang 
150035732d01SWei Wang 	/* Update fn->fn_sernum to invalidate all cached dst */
1501b886d5f2SPaolo Abeni 	if (!err) {
150293c2fb25SDavid Ahern 		spin_lock_bh(&ort->fib6_table->tb6_lock);
15037aef6859SDavid Ahern 		fib6_update_sernum(net, ort);
150493c2fb25SDavid Ahern 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1505b886d5f2SPaolo Abeni 		fib6_force_start_gc(net);
1506b886d5f2SPaolo Abeni 	}
150735732d01SWei Wang 
150835732d01SWei Wang 	return err;
150935732d01SWei Wang }
151035732d01SWei Wang 
15118d1c802bSDavid Ahern void rt6_flush_exceptions(struct fib6_info *rt)
151235732d01SWei Wang {
151335732d01SWei Wang 	struct rt6_exception_bucket *bucket;
151435732d01SWei Wang 	struct rt6_exception *rt6_ex;
151535732d01SWei Wang 	struct hlist_node *tmp;
151635732d01SWei Wang 	int i;
151735732d01SWei Wang 
151835732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
151935732d01SWei Wang 	/* Prevent rt6_insert_exception() to recreate the bucket list */
152035732d01SWei Wang 	rt->exception_bucket_flushed = 1;
152135732d01SWei Wang 
152235732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
152335732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
152435732d01SWei Wang 	if (!bucket)
152535732d01SWei Wang 		goto out;
152635732d01SWei Wang 
152735732d01SWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
152835732d01SWei Wang 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
152935732d01SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
153035732d01SWei Wang 		WARN_ON_ONCE(bucket->depth);
153135732d01SWei Wang 		bucket++;
153235732d01SWei Wang 	}
153335732d01SWei Wang 
153435732d01SWei Wang out:
153535732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
153635732d01SWei Wang }
153735732d01SWei Wang 
153835732d01SWei Wang /* Find cached rt in the hash table inside passed in rt
153935732d01SWei Wang  * Caller has to hold rcu_read_lock()
154035732d01SWei Wang  */
15418d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
154235732d01SWei Wang 					   struct in6_addr *daddr,
154335732d01SWei Wang 					   struct in6_addr *saddr)
154435732d01SWei Wang {
154535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
154635732d01SWei Wang 	struct in6_addr *src_key = NULL;
154735732d01SWei Wang 	struct rt6_exception *rt6_ex;
154835732d01SWei Wang 	struct rt6_info *res = NULL;
154935732d01SWei Wang 
155035732d01SWei Wang 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
155135732d01SWei Wang 
155235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
155335732d01SWei Wang 	/* rt6i_src.plen != 0 indicates rt is in subtree
155435732d01SWei Wang 	 * and exception table is indexed by a hash of
155535732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
155635732d01SWei Wang 	 * Otherwise, the exception table is indexed by
155735732d01SWei Wang 	 * a hash of only rt6i_dst.
155835732d01SWei Wang 	 */
155993c2fb25SDavid Ahern 	if (rt->fib6_src.plen)
156035732d01SWei Wang 		src_key = saddr;
156135732d01SWei Wang #endif
156235732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
156335732d01SWei Wang 
156435732d01SWei Wang 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
156535732d01SWei Wang 		res = rt6_ex->rt6i;
156635732d01SWei Wang 
156735732d01SWei Wang 	return res;
156835732d01SWei Wang }
156935732d01SWei Wang 
157035732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */
157123fb93a4SDavid Ahern static int rt6_remove_exception_rt(struct rt6_info *rt)
157235732d01SWei Wang {
157335732d01SWei Wang 	struct rt6_exception_bucket *bucket;
157435732d01SWei Wang 	struct in6_addr *src_key = NULL;
157535732d01SWei Wang 	struct rt6_exception *rt6_ex;
15768a14e46fSDavid Ahern 	struct fib6_info *from;
157735732d01SWei Wang 	int err;
157835732d01SWei Wang 
1579091311deSEric Dumazet 	from = rcu_dereference(rt->from);
158035732d01SWei Wang 	if (!from ||
1581442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
158235732d01SWei Wang 		return -EINVAL;
158335732d01SWei Wang 
158435732d01SWei Wang 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
158535732d01SWei Wang 		return -ENOENT;
158635732d01SWei Wang 
158735732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
158835732d01SWei Wang 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
158935732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
159035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
159135732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
159235732d01SWei Wang 	 * and exception table is indexed by a hash of
159335732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
159435732d01SWei Wang 	 * Otherwise, the exception table is indexed by
159535732d01SWei Wang 	 * a hash of only rt6i_dst.
159635732d01SWei Wang 	 */
159793c2fb25SDavid Ahern 	if (from->fib6_src.plen)
159835732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
159935732d01SWei Wang #endif
160035732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
160135732d01SWei Wang 					       &rt->rt6i_dst.addr,
160235732d01SWei Wang 					       src_key);
160335732d01SWei Wang 	if (rt6_ex) {
160435732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
160535732d01SWei Wang 		err = 0;
160635732d01SWei Wang 	} else {
160735732d01SWei Wang 		err = -ENOENT;
160835732d01SWei Wang 	}
160935732d01SWei Wang 
161035732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
161135732d01SWei Wang 	return err;
161235732d01SWei Wang }
161335732d01SWei Wang 
161435732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and
161535732d01SWei Wang  * refresh its stamp
161635732d01SWei Wang  */
161735732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
161835732d01SWei Wang {
161935732d01SWei Wang 	struct rt6_exception_bucket *bucket;
162035732d01SWei Wang 	struct in6_addr *src_key = NULL;
162135732d01SWei Wang 	struct rt6_exception *rt6_ex;
1622193f3685SPaolo Abeni 	struct fib6_info *from;
162335732d01SWei Wang 
162435732d01SWei Wang 	rcu_read_lock();
1625193f3685SPaolo Abeni 	from = rcu_dereference(rt->from);
1626193f3685SPaolo Abeni 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1627193f3685SPaolo Abeni 		goto unlock;
1628193f3685SPaolo Abeni 
162935732d01SWei Wang 	bucket = rcu_dereference(from->rt6i_exception_bucket);
163035732d01SWei Wang 
163135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
163235732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
163335732d01SWei Wang 	 * and exception table is indexed by a hash of
163435732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
163535732d01SWei Wang 	 * Otherwise, the exception table is indexed by
163635732d01SWei Wang 	 * a hash of only rt6i_dst.
163735732d01SWei Wang 	 */
163893c2fb25SDavid Ahern 	if (from->fib6_src.plen)
163935732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
164035732d01SWei Wang #endif
164135732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket,
164235732d01SWei Wang 					  &rt->rt6i_dst.addr,
164335732d01SWei Wang 					  src_key);
164435732d01SWei Wang 	if (rt6_ex)
164535732d01SWei Wang 		rt6_ex->stamp = jiffies;
164635732d01SWei Wang 
1647193f3685SPaolo Abeni unlock:
164835732d01SWei Wang 	rcu_read_unlock();
164935732d01SWei Wang }
165035732d01SWei Wang 
1651e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1652e9fa1495SStefano Brivio 					 struct rt6_info *rt, int mtu)
1653e9fa1495SStefano Brivio {
1654e9fa1495SStefano Brivio 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1655e9fa1495SStefano Brivio 	 * lowest MTU in the path: always allow updating the route PMTU to
1656e9fa1495SStefano Brivio 	 * reflect PMTU decreases.
1657e9fa1495SStefano Brivio 	 *
1658e9fa1495SStefano Brivio 	 * If the new MTU is higher, and the route PMTU is equal to the local
1659e9fa1495SStefano Brivio 	 * MTU, this means the old MTU is the lowest in the path, so allow
1660e9fa1495SStefano Brivio 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1661e9fa1495SStefano Brivio 	 * handle this.
1662e9fa1495SStefano Brivio 	 */
1663e9fa1495SStefano Brivio 
1664e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) >= mtu)
1665e9fa1495SStefano Brivio 		return true;
1666e9fa1495SStefano Brivio 
1667e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1668e9fa1495SStefano Brivio 		return true;
1669e9fa1495SStefano Brivio 
1670e9fa1495SStefano Brivio 	return false;
1671e9fa1495SStefano Brivio }
1672e9fa1495SStefano Brivio 
1673e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
16748d1c802bSDavid Ahern 				       struct fib6_info *rt, int mtu)
1675f5bbe7eeSWei Wang {
1676f5bbe7eeSWei Wang 	struct rt6_exception_bucket *bucket;
1677f5bbe7eeSWei Wang 	struct rt6_exception *rt6_ex;
1678f5bbe7eeSWei Wang 	int i;
1679f5bbe7eeSWei Wang 
1680f5bbe7eeSWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1681f5bbe7eeSWei Wang 					lockdep_is_held(&rt6_exception_lock));
1682f5bbe7eeSWei Wang 
1683e9fa1495SStefano Brivio 	if (!bucket)
1684e9fa1495SStefano Brivio 		return;
1685e9fa1495SStefano Brivio 
1686f5bbe7eeSWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1687f5bbe7eeSWei Wang 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1688f5bbe7eeSWei Wang 			struct rt6_info *entry = rt6_ex->rt6i;
1689e9fa1495SStefano Brivio 
1690e9fa1495SStefano Brivio 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1691d4ead6b3SDavid Ahern 			 * route), the metrics of its rt->from have already
1692f5bbe7eeSWei Wang 			 * been updated.
1693f5bbe7eeSWei Wang 			 */
1694d4ead6b3SDavid Ahern 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1695e9fa1495SStefano Brivio 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1696d4ead6b3SDavid Ahern 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1697f5bbe7eeSWei Wang 		}
1698f5bbe7eeSWei Wang 		bucket++;
1699f5bbe7eeSWei Wang 	}
1700f5bbe7eeSWei Wang }
1701f5bbe7eeSWei Wang 
1702b16cb459SWei Wang #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1703b16cb459SWei Wang 
17048d1c802bSDavid Ahern static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1705b16cb459SWei Wang 					struct in6_addr *gateway)
1706b16cb459SWei Wang {
1707b16cb459SWei Wang 	struct rt6_exception_bucket *bucket;
1708b16cb459SWei Wang 	struct rt6_exception *rt6_ex;
1709b16cb459SWei Wang 	struct hlist_node *tmp;
1710b16cb459SWei Wang 	int i;
1711b16cb459SWei Wang 
1712b16cb459SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1713b16cb459SWei Wang 		return;
1714b16cb459SWei Wang 
1715b16cb459SWei Wang 	spin_lock_bh(&rt6_exception_lock);
1716b16cb459SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1717b16cb459SWei Wang 				     lockdep_is_held(&rt6_exception_lock));
1718b16cb459SWei Wang 
1719b16cb459SWei Wang 	if (bucket) {
1720b16cb459SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1721b16cb459SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1722b16cb459SWei Wang 						  &bucket->chain, hlist) {
1723b16cb459SWei Wang 				struct rt6_info *entry = rt6_ex->rt6i;
1724b16cb459SWei Wang 
1725b16cb459SWei Wang 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1726b16cb459SWei Wang 				    RTF_CACHE_GATEWAY &&
1727b16cb459SWei Wang 				    ipv6_addr_equal(gateway,
1728b16cb459SWei Wang 						    &entry->rt6i_gateway)) {
1729b16cb459SWei Wang 					rt6_remove_exception(bucket, rt6_ex);
1730b16cb459SWei Wang 				}
1731b16cb459SWei Wang 			}
1732b16cb459SWei Wang 			bucket++;
1733b16cb459SWei Wang 		}
1734b16cb459SWei Wang 	}
1735b16cb459SWei Wang 
1736b16cb459SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
1737b16cb459SWei Wang }
1738b16cb459SWei Wang 
1739c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1740c757faa8SWei Wang 				      struct rt6_exception *rt6_ex,
1741c757faa8SWei Wang 				      struct fib6_gc_args *gc_args,
1742c757faa8SWei Wang 				      unsigned long now)
1743c757faa8SWei Wang {
1744c757faa8SWei Wang 	struct rt6_info *rt = rt6_ex->rt6i;
1745c757faa8SWei Wang 
17461859bac0SPaolo Abeni 	/* we are pruning and obsoleting aged-out and non gateway exceptions
17471859bac0SPaolo Abeni 	 * even if others have still references to them, so that on next
17481859bac0SPaolo Abeni 	 * dst_check() such references can be dropped.
17491859bac0SPaolo Abeni 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
17501859bac0SPaolo Abeni 	 * expired, independently from their aging, as per RFC 8201 section 4
17511859bac0SPaolo Abeni 	 */
175231afeb42SWei Wang 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
175331afeb42SWei Wang 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1754c757faa8SWei Wang 			RT6_TRACE("aging clone %p\n", rt);
1755c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1756c757faa8SWei Wang 			return;
175731afeb42SWei Wang 		}
175831afeb42SWei Wang 	} else if (time_after(jiffies, rt->dst.expires)) {
175931afeb42SWei Wang 		RT6_TRACE("purging expired route %p\n", rt);
176031afeb42SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
176131afeb42SWei Wang 		return;
176231afeb42SWei Wang 	}
176331afeb42SWei Wang 
176431afeb42SWei Wang 	if (rt->rt6i_flags & RTF_GATEWAY) {
1765c757faa8SWei Wang 		struct neighbour *neigh;
1766c757faa8SWei Wang 		__u8 neigh_flags = 0;
1767c757faa8SWei Wang 
17681bfa26ffSEric Dumazet 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
17691bfa26ffSEric Dumazet 		if (neigh)
1770c757faa8SWei Wang 			neigh_flags = neigh->flags;
17711bfa26ffSEric Dumazet 
1772c757faa8SWei Wang 		if (!(neigh_flags & NTF_ROUTER)) {
1773c757faa8SWei Wang 			RT6_TRACE("purging route %p via non-router but gateway\n",
1774c757faa8SWei Wang 				  rt);
1775c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1776c757faa8SWei Wang 			return;
1777c757faa8SWei Wang 		}
1778c757faa8SWei Wang 	}
177931afeb42SWei Wang 
1780c757faa8SWei Wang 	gc_args->more++;
1781c757faa8SWei Wang }
1782c757faa8SWei Wang 
17838d1c802bSDavid Ahern void rt6_age_exceptions(struct fib6_info *rt,
1784c757faa8SWei Wang 			struct fib6_gc_args *gc_args,
1785c757faa8SWei Wang 			unsigned long now)
1786c757faa8SWei Wang {
1787c757faa8SWei Wang 	struct rt6_exception_bucket *bucket;
1788c757faa8SWei Wang 	struct rt6_exception *rt6_ex;
1789c757faa8SWei Wang 	struct hlist_node *tmp;
1790c757faa8SWei Wang 	int i;
1791c757faa8SWei Wang 
1792c757faa8SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1793c757faa8SWei Wang 		return;
1794c757faa8SWei Wang 
17951bfa26ffSEric Dumazet 	rcu_read_lock_bh();
17961bfa26ffSEric Dumazet 	spin_lock(&rt6_exception_lock);
1797c757faa8SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1798c757faa8SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
1799c757faa8SWei Wang 
1800c757faa8SWei Wang 	if (bucket) {
1801c757faa8SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1802c757faa8SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1803c757faa8SWei Wang 						  &bucket->chain, hlist) {
1804c757faa8SWei Wang 				rt6_age_examine_exception(bucket, rt6_ex,
1805c757faa8SWei Wang 							  gc_args, now);
1806c757faa8SWei Wang 			}
1807c757faa8SWei Wang 			bucket++;
1808c757faa8SWei Wang 		}
1809c757faa8SWei Wang 	}
18101bfa26ffSEric Dumazet 	spin_unlock(&rt6_exception_lock);
18111bfa26ffSEric Dumazet 	rcu_read_unlock_bh();
1812c757faa8SWei Wang }
1813c757faa8SWei Wang 
18141d053da9SDavid Ahern /* must be called with rcu lock held */
18151d053da9SDavid Ahern struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
18161d053da9SDavid Ahern 				    int oif, struct flowi6 *fl6, int strict)
18171da177e4SLinus Torvalds {
1818367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
18198d1c802bSDavid Ahern 	struct fib6_info *f6i;
18201da177e4SLinus Torvalds 
18216454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1822367efcb9SMartin KaFai Lau 	saved_fn = fn;
18231da177e4SLinus Torvalds 
1824ca254490SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1825ca254490SDavid Ahern 		oif = 0;
1826ca254490SDavid Ahern 
1827a3c00e46SMartin KaFai Lau redo_rt6_select:
182823fb93a4SDavid Ahern 	f6i = rt6_select(net, fn, oif, strict);
182923fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1830a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1831a3c00e46SMartin KaFai Lau 		if (fn)
1832a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1833367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1834367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1835367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1836367efcb9SMartin KaFai Lau 			fn = saved_fn;
1837367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1838367efcb9SMartin KaFai Lau 		}
1839a3c00e46SMartin KaFai Lau 	}
1840a3c00e46SMartin KaFai Lau 
1841d4bea421SDavid Ahern 	trace_fib6_table_lookup(net, f6i, table, fl6);
1842d52d3997SMartin KaFai Lau 
18431d053da9SDavid Ahern 	return f6i;
18441d053da9SDavid Ahern }
18451d053da9SDavid Ahern 
18461d053da9SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
18471d053da9SDavid Ahern 			       int oif, struct flowi6 *fl6,
18481d053da9SDavid Ahern 			       const struct sk_buff *skb, int flags)
18491d053da9SDavid Ahern {
1850*b1d40991SDavid Ahern 	struct fib6_result res = {};
18511d053da9SDavid Ahern 	struct rt6_info *rt;
18521d053da9SDavid Ahern 	int strict = 0;
18531d053da9SDavid Ahern 
18541d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IFACE;
18551d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
18561d053da9SDavid Ahern 	if (net->ipv6.devconf_all->forwarding == 0)
18571d053da9SDavid Ahern 		strict |= RT6_LOOKUP_F_REACHABLE;
18581d053da9SDavid Ahern 
18591d053da9SDavid Ahern 	rcu_read_lock();
18601d053da9SDavid Ahern 
1861*b1d40991SDavid Ahern 	res.f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1862*b1d40991SDavid Ahern 	if (res.f6i == net->ipv6.fib6_null_entry) {
1863421842edSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
186466f5d6ceSWei Wang 		rcu_read_unlock();
1865d3843fe5SWei Wang 		dst_hold(&rt->dst);
1866d3843fe5SWei Wang 		return rt;
1867d3843fe5SWei Wang 	}
186823fb93a4SDavid Ahern 
1869*b1d40991SDavid Ahern 	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
1870d83009d4SDavid Ahern 
187123fb93a4SDavid Ahern 	/*Search through exception table */
1872*b1d40991SDavid Ahern 	rt = rt6_find_cached_rt(res.f6i, &fl6->daddr, &fl6->saddr);
187323fb93a4SDavid Ahern 	if (rt) {
187410585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
18751da177e4SLinus Torvalds 			dst_use_noref(&rt->dst, jiffies);
1876d4ead6b3SDavid Ahern 
187766f5d6ceSWei Wang 		rcu_read_unlock();
1878d52d3997SMartin KaFai Lau 		return rt;
18793da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1880*b1d40991SDavid Ahern 			    !res.nh->fib_nh_gw_family)) {
18813da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
18823da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
18833da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
18843da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
18853da59bd9SMartin KaFai Lau 		 */
18863da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
18873da59bd9SMartin KaFai Lau 
1888*b1d40991SDavid Ahern 		uncached_rt = ip6_rt_cache_alloc(res.f6i, &fl6->daddr, NULL);
1889d52d3997SMartin KaFai Lau 
18904d85cd0cSDavid Ahern 		rcu_read_unlock();
18913da59bd9SMartin KaFai Lau 
18921cfb71eeSWei Wang 		if (uncached_rt) {
18931cfb71eeSWei Wang 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
18941cfb71eeSWei Wang 			 * No need for another dst_hold()
18951cfb71eeSWei Wang 			 */
18968d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
189781eb8447SWei Wang 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
18981cfb71eeSWei Wang 		} else {
18993da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
19003da59bd9SMartin KaFai Lau 			dst_hold(&uncached_rt->dst);
19011cfb71eeSWei Wang 		}
1902b811580dSDavid Ahern 
19033da59bd9SMartin KaFai Lau 		return uncached_rt;
1904d52d3997SMartin KaFai Lau 	} else {
1905d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1906d52d3997SMartin KaFai Lau 
1907d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1908d52d3997SMartin KaFai Lau 
1909951f788aSEric Dumazet 		local_bh_disable();
1910*b1d40991SDavid Ahern 		pcpu_rt = rt6_get_pcpu_route(res.f6i);
1911d52d3997SMartin KaFai Lau 
191293531c67SDavid Ahern 		if (!pcpu_rt)
1913*b1d40991SDavid Ahern 			pcpu_rt = rt6_make_pcpu_route(net, res.f6i);
191493531c67SDavid Ahern 
1915951f788aSEric Dumazet 		local_bh_enable();
1916951f788aSEric Dumazet 		rcu_read_unlock();
1917d4bea421SDavid Ahern 
1918d52d3997SMartin KaFai Lau 		return pcpu_rt;
1919d52d3997SMartin KaFai Lau 	}
1920c71099acSThomas Graf }
19219ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route);
1922c71099acSThomas Graf 
1923b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net,
1924b75cc8f9SDavid Ahern 					    struct fib6_table *table,
1925b75cc8f9SDavid Ahern 					    struct flowi6 *fl6,
1926b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
1927b75cc8f9SDavid Ahern 					    int flags)
19284acad72dSPavel Emelyanov {
1929b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
19304acad72dSPavel Emelyanov }
19314acad72dSPavel Emelyanov 
1932d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net,
193372331bc0SShmulik Ladkani 					 struct net_device *dev,
1934b75cc8f9SDavid Ahern 					 struct flowi6 *fl6,
1935b75cc8f9SDavid Ahern 					 const struct sk_buff *skb,
1936b75cc8f9SDavid Ahern 					 int flags)
193772331bc0SShmulik Ladkani {
193872331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
193972331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
194072331bc0SShmulik Ladkani 
1941b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
194272331bc0SShmulik Ladkani }
1943d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
194472331bc0SShmulik Ladkani 
194523aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb,
19465e5d6fedSRoopa Prabhu 				  struct flow_keys *keys,
19475e5d6fedSRoopa Prabhu 				  struct flow_keys *flkeys)
194823aebdacSJakub Sitnicki {
194923aebdacSJakub Sitnicki 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
195023aebdacSJakub Sitnicki 	const struct ipv6hdr *key_iph = outer_iph;
19515e5d6fedSRoopa Prabhu 	struct flow_keys *_flkeys = flkeys;
195223aebdacSJakub Sitnicki 	const struct ipv6hdr *inner_iph;
195323aebdacSJakub Sitnicki 	const struct icmp6hdr *icmph;
195423aebdacSJakub Sitnicki 	struct ipv6hdr _inner_iph;
1955cea67a2dSEric Dumazet 	struct icmp6hdr _icmph;
195623aebdacSJakub Sitnicki 
195723aebdacSJakub Sitnicki 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
195823aebdacSJakub Sitnicki 		goto out;
195923aebdacSJakub Sitnicki 
1960cea67a2dSEric Dumazet 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1961cea67a2dSEric Dumazet 				   sizeof(_icmph), &_icmph);
1962cea67a2dSEric Dumazet 	if (!icmph)
1963cea67a2dSEric Dumazet 		goto out;
1964cea67a2dSEric Dumazet 
196523aebdacSJakub Sitnicki 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
196623aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
196723aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
196823aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
196923aebdacSJakub Sitnicki 		goto out;
197023aebdacSJakub Sitnicki 
197123aebdacSJakub Sitnicki 	inner_iph = skb_header_pointer(skb,
197223aebdacSJakub Sitnicki 				       skb_transport_offset(skb) + sizeof(*icmph),
197323aebdacSJakub Sitnicki 				       sizeof(_inner_iph), &_inner_iph);
197423aebdacSJakub Sitnicki 	if (!inner_iph)
197523aebdacSJakub Sitnicki 		goto out;
197623aebdacSJakub Sitnicki 
197723aebdacSJakub Sitnicki 	key_iph = inner_iph;
19785e5d6fedSRoopa Prabhu 	_flkeys = NULL;
197923aebdacSJakub Sitnicki out:
19805e5d6fedSRoopa Prabhu 	if (_flkeys) {
19815e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
19825e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
19835e5d6fedSRoopa Prabhu 		keys->tags.flow_label = _flkeys->tags.flow_label;
19845e5d6fedSRoopa Prabhu 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
19855e5d6fedSRoopa Prabhu 	} else {
198623aebdacSJakub Sitnicki 		keys->addrs.v6addrs.src = key_iph->saddr;
198723aebdacSJakub Sitnicki 		keys->addrs.v6addrs.dst = key_iph->daddr;
1988fa1be7e0SMichal Kubecek 		keys->tags.flow_label = ip6_flowlabel(key_iph);
198923aebdacSJakub Sitnicki 		keys->basic.ip_proto = key_iph->nexthdr;
199023aebdacSJakub Sitnicki 	}
19915e5d6fedSRoopa Prabhu }
199223aebdacSJakub Sitnicki 
199323aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */
1994b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1995b4bac172SDavid Ahern 		       const struct sk_buff *skb, struct flow_keys *flkeys)
199623aebdacSJakub Sitnicki {
199723aebdacSJakub Sitnicki 	struct flow_keys hash_keys;
19989a2a537aSDavid Ahern 	u32 mhash;
199923aebdacSJakub Sitnicki 
2000bbfa047aSDavid S. Miller 	switch (ip6_multipath_hash_policy(net)) {
2001b4bac172SDavid Ahern 	case 0:
20026f74b6c2SDavid Ahern 		memset(&hash_keys, 0, sizeof(hash_keys));
20036f74b6c2SDavid Ahern 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
20049a2a537aSDavid Ahern 		if (skb) {
20055e5d6fedSRoopa Prabhu 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
20069a2a537aSDavid Ahern 		} else {
20079a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
20089a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2009fa1be7e0SMichal Kubecek 			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
20109a2a537aSDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
201123aebdacSJakub Sitnicki 		}
2012b4bac172SDavid Ahern 		break;
2013b4bac172SDavid Ahern 	case 1:
2014b4bac172SDavid Ahern 		if (skb) {
2015b4bac172SDavid Ahern 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2016b4bac172SDavid Ahern 			struct flow_keys keys;
2017b4bac172SDavid Ahern 
2018b4bac172SDavid Ahern 			/* short-circuit if we already have L4 hash present */
2019b4bac172SDavid Ahern 			if (skb->l4_hash)
2020b4bac172SDavid Ahern 				return skb_get_hash_raw(skb) >> 1;
2021b4bac172SDavid Ahern 
2022b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2023b4bac172SDavid Ahern 
2024b4bac172SDavid Ahern                         if (!flkeys) {
2025b4bac172SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2026b4bac172SDavid Ahern 				flkeys = &keys;
2027b4bac172SDavid Ahern 			}
2028b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2029b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2030b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2031b4bac172SDavid Ahern 			hash_keys.ports.src = flkeys->ports.src;
2032b4bac172SDavid Ahern 			hash_keys.ports.dst = flkeys->ports.dst;
2033b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2034b4bac172SDavid Ahern 		} else {
2035b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2036b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2037b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2038b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2039b4bac172SDavid Ahern 			hash_keys.ports.src = fl6->fl6_sport;
2040b4bac172SDavid Ahern 			hash_keys.ports.dst = fl6->fl6_dport;
2041b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2042b4bac172SDavid Ahern 		}
2043b4bac172SDavid Ahern 		break;
2044b4bac172SDavid Ahern 	}
20459a2a537aSDavid Ahern 	mhash = flow_hash_from_keys(&hash_keys);
204623aebdacSJakub Sitnicki 
20479a2a537aSDavid Ahern 	return mhash >> 1;
204823aebdacSJakub Sitnicki }
204923aebdacSJakub Sitnicki 
2050c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
2051c71099acSThomas Graf {
2052b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2053c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
2054adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2055904af04dSJiri Benc 	struct ip_tunnel_info *tun_info;
20564c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
2057e0d56fddSDavid Ahern 		.flowi6_iif = skb->dev->ifindex,
20584c9483b2SDavid S. Miller 		.daddr = iph->daddr,
20594c9483b2SDavid S. Miller 		.saddr = iph->saddr,
20606502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
20614c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
20624c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
2063c71099acSThomas Graf 	};
20645e5d6fedSRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
2065adaa70bbSThomas Graf 
2066904af04dSJiri Benc 	tun_info = skb_tunnel_info(skb);
206746fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2068904af04dSJiri Benc 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
20695e5d6fedSRoopa Prabhu 
20705e5d6fedSRoopa Prabhu 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
20715e5d6fedSRoopa Prabhu 		flkeys = &_flkeys;
20725e5d6fedSRoopa Prabhu 
207323aebdacSJakub Sitnicki 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2074b4bac172SDavid Ahern 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
207506e9d040SJiri Benc 	skb_dst_drop(skb);
2076b75cc8f9SDavid Ahern 	skb_dst_set(skb,
2077b75cc8f9SDavid Ahern 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2078c71099acSThomas Graf }
2079c71099acSThomas Graf 
2080b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net,
2081b75cc8f9SDavid Ahern 					     struct fib6_table *table,
2082b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
2083b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2084b75cc8f9SDavid Ahern 					     int flags)
2085c71099acSThomas Graf {
2086b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2087c71099acSThomas Graf }
2088c71099acSThomas Graf 
20896f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
20906f21c96aSPaolo Abeni 					 struct flowi6 *fl6, int flags)
2091c71099acSThomas Graf {
2092d46a9d67SDavid Ahern 	bool any_src;
2093c71099acSThomas Graf 
20943ede0bbcSRobert Shearman 	if (ipv6_addr_type(&fl6->daddr) &
20953ede0bbcSRobert Shearman 	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
20964c1feac5SDavid Ahern 		struct dst_entry *dst;
20974c1feac5SDavid Ahern 
20984c1feac5SDavid Ahern 		dst = l3mdev_link_scope_lookup(net, fl6);
2099ca254490SDavid Ahern 		if (dst)
2100ca254490SDavid Ahern 			return dst;
21014c1feac5SDavid Ahern 	}
2102ca254490SDavid Ahern 
21031fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
21044dc27d1cSDavid McCullough 
2105d46a9d67SDavid Ahern 	any_src = ipv6_addr_any(&fl6->saddr);
2106741a11d9SDavid Ahern 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2107d46a9d67SDavid Ahern 	    (fl6->flowi6_oif && any_src))
210877d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
2109c71099acSThomas Graf 
2110d46a9d67SDavid Ahern 	if (!any_src)
2111adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
21120c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
21130c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2114adaa70bbSThomas Graf 
2115b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
21161da177e4SLinus Torvalds }
21176f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags);
21181da177e4SLinus Torvalds 
21192774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
212014e50e57SDavid S. Miller {
21215c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
21221dbe3252SWei Wang 	struct net_device *loopback_dev = net->loopback_dev;
212314e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
212414e50e57SDavid S. Miller 
21251dbe3252SWei Wang 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
212662cf27e5SSteffen Klassert 		       DST_OBSOLETE_DEAD, 0);
212714e50e57SDavid S. Miller 	if (rt) {
21280a1f5962SMartin KaFai Lau 		rt6_info_init(rt);
212981eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
21300a1f5962SMartin KaFai Lau 
2131d8d1f30bSChangli Gao 		new = &rt->dst;
213214e50e57SDavid S. Miller 		new->__use = 1;
2133352e512cSHerbert Xu 		new->input = dst_discard;
2134ede2059dSEric W. Biederman 		new->output = dst_discard_out;
213514e50e57SDavid S. Miller 
2136defb3519SDavid S. Miller 		dst_copy_metrics(new, &ort->dst);
213714e50e57SDavid S. Miller 
21381dbe3252SWei Wang 		rt->rt6i_idev = in6_dev_get(loopback_dev);
21394e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
21400a1f5962SMartin KaFai Lau 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
214114e50e57SDavid S. Miller 
214214e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
214314e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
214414e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
214514e50e57SDavid S. Miller #endif
214614e50e57SDavid S. Miller 	}
214714e50e57SDavid S. Miller 
214869ead7afSDavid S. Miller 	dst_release(dst_orig);
214969ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
215014e50e57SDavid S. Miller }
215114e50e57SDavid S. Miller 
21521da177e4SLinus Torvalds /*
21531da177e4SLinus Torvalds  *	Destination cache support functions
21541da177e4SLinus Torvalds  */
21551da177e4SLinus Torvalds 
21568d1c802bSDavid Ahern static bool fib6_check(struct fib6_info *f6i, u32 cookie)
21573da59bd9SMartin KaFai Lau {
215836143645SSteffen Klassert 	u32 rt_cookie = 0;
2159c5cff856SWei Wang 
21608ae86971SDavid Ahern 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
216193531c67SDavid Ahern 		return false;
216293531c67SDavid Ahern 
216393531c67SDavid Ahern 	if (fib6_check_expired(f6i))
216493531c67SDavid Ahern 		return false;
216593531c67SDavid Ahern 
216693531c67SDavid Ahern 	return true;
216793531c67SDavid Ahern }
216893531c67SDavid Ahern 
2169a68886a6SDavid Ahern static struct dst_entry *rt6_check(struct rt6_info *rt,
2170a68886a6SDavid Ahern 				   struct fib6_info *from,
2171a68886a6SDavid Ahern 				   u32 cookie)
21723da59bd9SMartin KaFai Lau {
2173c5cff856SWei Wang 	u32 rt_cookie = 0;
2174c5cff856SWei Wang 
2175a68886a6SDavid Ahern 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
217693531c67SDavid Ahern 	    rt_cookie != cookie)
21773da59bd9SMartin KaFai Lau 		return NULL;
21783da59bd9SMartin KaFai Lau 
21793da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
21803da59bd9SMartin KaFai Lau 		return NULL;
21813da59bd9SMartin KaFai Lau 
21823da59bd9SMartin KaFai Lau 	return &rt->dst;
21833da59bd9SMartin KaFai Lau }
21843da59bd9SMartin KaFai Lau 
2185a68886a6SDavid Ahern static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2186a68886a6SDavid Ahern 					    struct fib6_info *from,
2187a68886a6SDavid Ahern 					    u32 cookie)
21883da59bd9SMartin KaFai Lau {
21895973fb1eSMartin KaFai Lau 	if (!__rt6_check_expired(rt) &&
21905973fb1eSMartin KaFai Lau 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2191a68886a6SDavid Ahern 	    fib6_check(from, cookie))
21923da59bd9SMartin KaFai Lau 		return &rt->dst;
21933da59bd9SMartin KaFai Lau 	else
21943da59bd9SMartin KaFai Lau 		return NULL;
21953da59bd9SMartin KaFai Lau }
21963da59bd9SMartin KaFai Lau 
21971da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
21981da177e4SLinus Torvalds {
2199a87b7dc9SDavid Ahern 	struct dst_entry *dst_ret;
2200a68886a6SDavid Ahern 	struct fib6_info *from;
22011da177e4SLinus Torvalds 	struct rt6_info *rt;
22021da177e4SLinus Torvalds 
2203a87b7dc9SDavid Ahern 	rt = container_of(dst, struct rt6_info, dst);
2204a87b7dc9SDavid Ahern 
2205a87b7dc9SDavid Ahern 	rcu_read_lock();
22061da177e4SLinus Torvalds 
22076f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
22086f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
22096f3118b5SNicolas Dichtel 	 * into this function always.
22106f3118b5SNicolas Dichtel 	 */
2211e3bc10bdSHannes Frederic Sowa 
2212a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
22134b32b5adSMartin KaFai Lau 
2214a68886a6SDavid Ahern 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2215a68886a6SDavid Ahern 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2216a68886a6SDavid Ahern 		dst_ret = rt6_dst_from_check(rt, from, cookie);
22173da59bd9SMartin KaFai Lau 	else
2218a68886a6SDavid Ahern 		dst_ret = rt6_check(rt, from, cookie);
2219a87b7dc9SDavid Ahern 
2220a87b7dc9SDavid Ahern 	rcu_read_unlock();
2221a87b7dc9SDavid Ahern 
2222a87b7dc9SDavid Ahern 	return dst_ret;
22231da177e4SLinus Torvalds }
22241da177e4SLinus Torvalds 
22251da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
22261da177e4SLinus Torvalds {
22271da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
22281da177e4SLinus Torvalds 
22291da177e4SLinus Torvalds 	if (rt) {
223054c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
2231c3c14da0SDavid Ahern 			rcu_read_lock();
223254c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
223393531c67SDavid Ahern 				rt6_remove_exception_rt(rt);
223454c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
22351da177e4SLinus Torvalds 			}
2236c3c14da0SDavid Ahern 			rcu_read_unlock();
223754c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
223854c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
223954c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
224054c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
224154c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
224254c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
22431da177e4SLinus Torvalds }
22441da177e4SLinus Torvalds 
22451da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
22461da177e4SLinus Torvalds {
22471da177e4SLinus Torvalds 	struct rt6_info *rt;
22481da177e4SLinus Torvalds 
22493ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
22501da177e4SLinus Torvalds 
2251adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
22521da177e4SLinus Torvalds 	if (rt) {
22538a14e46fSDavid Ahern 		rcu_read_lock();
22541eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
225593531c67SDavid Ahern 			rt6_remove_exception_rt(rt);
2256c5cff856SWei Wang 		} else {
2257a68886a6SDavid Ahern 			struct fib6_info *from;
2258c5cff856SWei Wang 			struct fib6_node *fn;
2259c5cff856SWei Wang 
2260a68886a6SDavid Ahern 			from = rcu_dereference(rt->from);
2261a68886a6SDavid Ahern 			if (from) {
2262a68886a6SDavid Ahern 				fn = rcu_dereference(from->fib6_node);
2263c5cff856SWei Wang 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2264c5cff856SWei Wang 					fn->fn_sernum = -1;
2265a68886a6SDavid Ahern 			}
22661da177e4SLinus Torvalds 		}
22671da177e4SLinus Torvalds 		rcu_read_unlock();
22681da177e4SLinus Torvalds 	}
22691da177e4SLinus Torvalds }
22701da177e4SLinus Torvalds 
22716a3e030fSDavid Ahern static void rt6_update_expires(struct rt6_info *rt0, int timeout)
22726a3e030fSDavid Ahern {
2273a68886a6SDavid Ahern 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2274a68886a6SDavid Ahern 		struct fib6_info *from;
2275a68886a6SDavid Ahern 
2276a68886a6SDavid Ahern 		rcu_read_lock();
2277a68886a6SDavid Ahern 		from = rcu_dereference(rt0->from);
2278a68886a6SDavid Ahern 		if (from)
2279a68886a6SDavid Ahern 			rt0->dst.expires = from->expires;
2280a68886a6SDavid Ahern 		rcu_read_unlock();
2281a68886a6SDavid Ahern 	}
22826a3e030fSDavid Ahern 
22836a3e030fSDavid Ahern 	dst_set_expires(&rt0->dst, timeout);
22846a3e030fSDavid Ahern 	rt0->rt6i_flags |= RTF_EXPIRES;
22856700c270SDavid S. Miller }
22861da177e4SLinus Torvalds 
228745e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
228845e4fd26SMartin KaFai Lau {
228945e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
229045e4fd26SMartin KaFai Lau 
2291d4ead6b3SDavid Ahern 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
229245e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
229345e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
229445e4fd26SMartin KaFai Lau }
229545e4fd26SMartin KaFai Lau 
22960d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
22970d3f6d29SMartin KaFai Lau {
22980d3f6d29SMartin KaFai Lau 	return !(rt->rt6i_flags & RTF_CACHE) &&
22991490ed2aSPaolo Abeni 		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
23000d3f6d29SMartin KaFai Lau }
23010d3f6d29SMartin KaFai Lau 
230245e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
230345e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
23041da177e4SLinus Torvalds {
23050dec879fSJulian Anastasov 	const struct in6_addr *daddr, *saddr;
23061da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
23071da177e4SLinus Torvalds 
230819bda36cSXin Long 	if (dst_metric_locked(dst, RTAX_MTU))
230919bda36cSXin Long 		return;
231019bda36cSXin Long 
231145e4fd26SMartin KaFai Lau 	if (iph) {
231245e4fd26SMartin KaFai Lau 		daddr = &iph->daddr;
231345e4fd26SMartin KaFai Lau 		saddr = &iph->saddr;
231445e4fd26SMartin KaFai Lau 	} else if (sk) {
231545e4fd26SMartin KaFai Lau 		daddr = &sk->sk_v6_daddr;
231645e4fd26SMartin KaFai Lau 		saddr = &inet6_sk(sk)->saddr;
231745e4fd26SMartin KaFai Lau 	} else {
23180dec879fSJulian Anastasov 		daddr = NULL;
23190dec879fSJulian Anastasov 		saddr = NULL;
23201da177e4SLinus Torvalds 	}
23210dec879fSJulian Anastasov 	dst_confirm_neigh(dst, daddr);
23220dec879fSJulian Anastasov 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
23230dec879fSJulian Anastasov 	if (mtu >= dst_mtu(dst))
23240dec879fSJulian Anastasov 		return;
23250dec879fSJulian Anastasov 
23260dec879fSJulian Anastasov 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
23270dec879fSJulian Anastasov 		rt6_do_update_pmtu(rt6, mtu);
23282b760fcfSWei Wang 		/* update rt6_ex->stamp for cache */
23292b760fcfSWei Wang 		if (rt6->rt6i_flags & RTF_CACHE)
23302b760fcfSWei Wang 			rt6_update_exception_stamp_rt(rt6);
23310dec879fSJulian Anastasov 	} else if (daddr) {
2332a68886a6SDavid Ahern 		struct fib6_info *from;
23330dec879fSJulian Anastasov 		struct rt6_info *nrt6;
23340dec879fSJulian Anastasov 
23354d85cd0cSDavid Ahern 		rcu_read_lock();
2336a68886a6SDavid Ahern 		from = rcu_dereference(rt6->from);
23379c69a132SJonathan Lemon 		if (!from) {
23389c69a132SJonathan Lemon 			rcu_read_unlock();
23399c69a132SJonathan Lemon 			return;
23409c69a132SJonathan Lemon 		}
2341a68886a6SDavid Ahern 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
234245e4fd26SMartin KaFai Lau 		if (nrt6) {
234345e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
2344a68886a6SDavid Ahern 			if (rt6_insert_exception(nrt6, from))
23452b760fcfSWei Wang 				dst_release_immediate(&nrt6->dst);
234645e4fd26SMartin KaFai Lau 		}
2347a68886a6SDavid Ahern 		rcu_read_unlock();
234845e4fd26SMartin KaFai Lau 	}
234945e4fd26SMartin KaFai Lau }
235045e4fd26SMartin KaFai Lau 
235145e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
235245e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
235345e4fd26SMartin KaFai Lau {
235445e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
23551da177e4SLinus Torvalds }
23561da177e4SLinus Torvalds 
235742ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2358e2d118a1SLorenzo Colitti 		     int oif, u32 mark, kuid_t uid)
235981aded24SDavid S. Miller {
236081aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
236181aded24SDavid S. Miller 	struct dst_entry *dst;
2362dc92095dSMaciej Żenczykowski 	struct flowi6 fl6 = {
2363dc92095dSMaciej Żenczykowski 		.flowi6_oif = oif,
2364dc92095dSMaciej Żenczykowski 		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2365dc92095dSMaciej Żenczykowski 		.daddr = iph->daddr,
2366dc92095dSMaciej Żenczykowski 		.saddr = iph->saddr,
2367dc92095dSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
2368dc92095dSMaciej Żenczykowski 		.flowi6_uid = uid,
2369dc92095dSMaciej Żenczykowski 	};
237081aded24SDavid S. Miller 
237181aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
237281aded24SDavid S. Miller 	if (!dst->error)
237345e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
237481aded24SDavid S. Miller 	dst_release(dst);
237581aded24SDavid S. Miller }
237681aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
237781aded24SDavid S. Miller 
237881aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
237981aded24SDavid S. Miller {
23807ddacfa5SDavid Ahern 	int oif = sk->sk_bound_dev_if;
238133c162a9SMartin KaFai Lau 	struct dst_entry *dst;
238233c162a9SMartin KaFai Lau 
23837ddacfa5SDavid Ahern 	if (!oif && skb->dev)
23847ddacfa5SDavid Ahern 		oif = l3mdev_master_ifindex(skb->dev);
23857ddacfa5SDavid Ahern 
23867ddacfa5SDavid Ahern 	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
238733c162a9SMartin KaFai Lau 
238833c162a9SMartin KaFai Lau 	dst = __sk_dst_get(sk);
238933c162a9SMartin KaFai Lau 	if (!dst || !dst->obsolete ||
239033c162a9SMartin KaFai Lau 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
239133c162a9SMartin KaFai Lau 		return;
239233c162a9SMartin KaFai Lau 
239333c162a9SMartin KaFai Lau 	bh_lock_sock(sk);
239433c162a9SMartin KaFai Lau 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
239533c162a9SMartin KaFai Lau 		ip6_datagram_dst_update(sk, false);
239633c162a9SMartin KaFai Lau 	bh_unlock_sock(sk);
239781aded24SDavid S. Miller }
239881aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
239981aded24SDavid S. Miller 
24007d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
24017d6850f7SAlexey Kodanev 			   const struct flowi6 *fl6)
24027d6850f7SAlexey Kodanev {
24037d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
24047d6850f7SAlexey Kodanev 	struct ipv6_pinfo *np = inet6_sk(sk);
24057d6850f7SAlexey Kodanev #endif
24067d6850f7SAlexey Kodanev 
24077d6850f7SAlexey Kodanev 	ip6_dst_store(sk, dst,
24087d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
24097d6850f7SAlexey Kodanev 		      &sk->sk_v6_daddr : NULL,
24107d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
24117d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
24127d6850f7SAlexey Kodanev 		      &np->saddr :
24137d6850f7SAlexey Kodanev #endif
24147d6850f7SAlexey Kodanev 		      NULL);
24157d6850f7SAlexey Kodanev }
24167d6850f7SAlexey Kodanev 
24170b34eb00SDavid Ahern static bool ip6_redirect_nh_match(struct fib6_info *f6i,
24180b34eb00SDavid Ahern 				  struct fib6_nh *nh,
24190b34eb00SDavid Ahern 				  struct flowi6 *fl6,
24200b34eb00SDavid Ahern 				  const struct in6_addr *gw,
24210b34eb00SDavid Ahern 				  struct rt6_info **ret)
24220b34eb00SDavid Ahern {
24230b34eb00SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
24240b34eb00SDavid Ahern 	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
24250b34eb00SDavid Ahern 		return false;
24260b34eb00SDavid Ahern 
24270b34eb00SDavid Ahern 	/* rt_cache's gateway might be different from its 'parent'
24280b34eb00SDavid Ahern 	 * in the case of an ip redirect.
24290b34eb00SDavid Ahern 	 * So we keep searching in the exception table if the gateway
24300b34eb00SDavid Ahern 	 * is different.
24310b34eb00SDavid Ahern 	 */
24320b34eb00SDavid Ahern 	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
24330b34eb00SDavid Ahern 		struct rt6_info *rt_cache;
24340b34eb00SDavid Ahern 
24350b34eb00SDavid Ahern 		rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
24360b34eb00SDavid Ahern 		if (rt_cache &&
24370b34eb00SDavid Ahern 		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
24380b34eb00SDavid Ahern 			*ret = rt_cache;
24390b34eb00SDavid Ahern 			return true;
24400b34eb00SDavid Ahern 		}
24410b34eb00SDavid Ahern 		return false;
24420b34eb00SDavid Ahern 	}
24430b34eb00SDavid Ahern 	return true;
24440b34eb00SDavid Ahern }
24450b34eb00SDavid Ahern 
2446b55b76b2SDuan Jiong /* Handle redirects */
2447b55b76b2SDuan Jiong struct ip6rd_flowi {
2448b55b76b2SDuan Jiong 	struct flowi6 fl6;
2449b55b76b2SDuan Jiong 	struct in6_addr gateway;
2450b55b76b2SDuan Jiong };
2451b55b76b2SDuan Jiong 
2452b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
2453b55b76b2SDuan Jiong 					     struct fib6_table *table,
2454b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
2455b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2456b55b76b2SDuan Jiong 					     int flags)
2457b55b76b2SDuan Jiong {
2458b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
24590b34eb00SDavid Ahern 	struct rt6_info *ret = NULL;
24608d1c802bSDavid Ahern 	struct fib6_info *rt;
2461b55b76b2SDuan Jiong 	struct fib6_node *fn;
2462b55b76b2SDuan Jiong 
2463b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
246467c408cfSAlexander Alemayhu 	 * check if the redirect has come from appropriate router.
2465b55b76b2SDuan Jiong 	 *
2466b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
2467b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
2468b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
2469b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
2470b55b76b2SDuan Jiong 	 * routes.
2471b55b76b2SDuan Jiong 	 */
2472b55b76b2SDuan Jiong 
247366f5d6ceSWei Wang 	rcu_read_lock();
24746454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2475b55b76b2SDuan Jiong restart:
247666f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
247714895687SDavid Ahern 		if (fib6_check_expired(rt))
2478b55b76b2SDuan Jiong 			continue;
247993c2fb25SDavid Ahern 		if (rt->fib6_flags & RTF_REJECT)
2480b55b76b2SDuan Jiong 			break;
24810b34eb00SDavid Ahern 		if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6,
24820b34eb00SDavid Ahern 					  &rdfl->gateway, &ret))
24830b34eb00SDavid Ahern 			goto out;
2484b55b76b2SDuan Jiong 	}
2485b55b76b2SDuan Jiong 
2486b55b76b2SDuan Jiong 	if (!rt)
2487421842edSDavid Ahern 		rt = net->ipv6.fib6_null_entry;
248893c2fb25SDavid Ahern 	else if (rt->fib6_flags & RTF_REJECT) {
248923fb93a4SDavid Ahern 		ret = net->ipv6.ip6_null_entry;
2490b0a1ba59SMartin KaFai Lau 		goto out;
2491b0a1ba59SMartin KaFai Lau 	}
2492b0a1ba59SMartin KaFai Lau 
2493421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
2494a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
2495a3c00e46SMartin KaFai Lau 		if (fn)
2496a3c00e46SMartin KaFai Lau 			goto restart;
2497b55b76b2SDuan Jiong 	}
2498a3c00e46SMartin KaFai Lau 
2499b0a1ba59SMartin KaFai Lau out:
250023fb93a4SDavid Ahern 	if (ret)
250110585b43SDavid Ahern 		ip6_hold_safe(net, &ret);
250223fb93a4SDavid Ahern 	else
250323fb93a4SDavid Ahern 		ret = ip6_create_rt_rcu(rt);
2504b55b76b2SDuan Jiong 
250566f5d6ceSWei Wang 	rcu_read_unlock();
2506b55b76b2SDuan Jiong 
2507b65f164dSPaolo Abeni 	trace_fib6_table_lookup(net, rt, table, fl6);
250823fb93a4SDavid Ahern 	return ret;
2509b55b76b2SDuan Jiong };
2510b55b76b2SDuan Jiong 
2511b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
2512b55b76b2SDuan Jiong 					    const struct flowi6 *fl6,
2513b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
2514b55b76b2SDuan Jiong 					    const struct in6_addr *gateway)
2515b55b76b2SDuan Jiong {
2516b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2517b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
2518b55b76b2SDuan Jiong 
2519b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
2520b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
2521b55b76b2SDuan Jiong 
2522b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2523b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
2524b55b76b2SDuan Jiong }
2525b55b76b2SDuan Jiong 
2526e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2527e2d118a1SLorenzo Colitti 		  kuid_t uid)
25283a5ad2eeSDavid S. Miller {
25293a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
25303a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
25311f7f10acSMaciej Żenczykowski 	struct flowi6 fl6 = {
25321f7f10acSMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25331f7f10acSMaciej Żenczykowski 		.flowi6_oif = oif,
25341f7f10acSMaciej Żenczykowski 		.flowi6_mark = mark,
25351f7f10acSMaciej Żenczykowski 		.daddr = iph->daddr,
25361f7f10acSMaciej Żenczykowski 		.saddr = iph->saddr,
25371f7f10acSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
25381f7f10acSMaciej Żenczykowski 		.flowi6_uid = uid,
25391f7f10acSMaciej Żenczykowski 	};
25403a5ad2eeSDavid S. Miller 
2541b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
25426700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
25433a5ad2eeSDavid S. Miller 	dst_release(dst);
25443a5ad2eeSDavid S. Miller }
25453a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
25463a5ad2eeSDavid S. Miller 
2547d456336dSMaciej Żenczykowski void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2548c92a59ecSDuan Jiong {
2549c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2550c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2551c92a59ecSDuan Jiong 	struct dst_entry *dst;
25520b26fb17SMaciej Żenczykowski 	struct flowi6 fl6 = {
25530b26fb17SMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25540b26fb17SMaciej Żenczykowski 		.flowi6_oif = oif,
25550b26fb17SMaciej Żenczykowski 		.daddr = msg->dest,
25560b26fb17SMaciej Żenczykowski 		.saddr = iph->daddr,
25570b26fb17SMaciej Żenczykowski 		.flowi6_uid = sock_net_uid(net, NULL),
25580b26fb17SMaciej Żenczykowski 	};
2559c92a59ecSDuan Jiong 
2560b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2561c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
2562c92a59ecSDuan Jiong 	dst_release(dst);
2563c92a59ecSDuan Jiong }
2564c92a59ecSDuan Jiong 
25653a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
25663a5ad2eeSDavid S. Miller {
2567e2d118a1SLorenzo Colitti 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2568e2d118a1SLorenzo Colitti 		     sk->sk_uid);
25693a5ad2eeSDavid S. Miller }
25703a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
25713a5ad2eeSDavid S. Miller 
25720dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
25731da177e4SLinus Torvalds {
25740dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
25750dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
25760dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
25770dbaee3bSDavid S. Miller 
25781da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
25791da177e4SLinus Torvalds 
25805578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
25815578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
25821da177e4SLinus Torvalds 
25831da177e4SLinus Torvalds 	/*
25841da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
25851da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
25861da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
25871da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
25881da177e4SLinus Torvalds 	 */
25891da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
25901da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
25911da177e4SLinus Torvalds 	return mtu;
25921da177e4SLinus Torvalds }
25931da177e4SLinus Torvalds 
2594ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
2595d33e4553SDavid S. Miller {
2596d33e4553SDavid S. Miller 	struct inet6_dev *idev;
2597d4ead6b3SDavid Ahern 	unsigned int mtu;
2598618f9bc7SSteffen Klassert 
25994b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
26004b32b5adSMartin KaFai Lau 	if (mtu)
26014b32b5adSMartin KaFai Lau 		goto out;
26024b32b5adSMartin KaFai Lau 
2603618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
2604d33e4553SDavid S. Miller 
2605d33e4553SDavid S. Miller 	rcu_read_lock();
2606d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
2607d33e4553SDavid S. Miller 	if (idev)
2608d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
2609d33e4553SDavid S. Miller 	rcu_read_unlock();
2610d33e4553SDavid S. Miller 
261130f78d8eSEric Dumazet out:
261214972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
261314972cbdSRoopa Prabhu 
261414972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2615d33e4553SDavid S. Miller }
2616d33e4553SDavid S. Miller 
2617901731b8SDavid Ahern /* MTU selection:
2618901731b8SDavid Ahern  * 1. mtu on route is locked - use it
2619901731b8SDavid Ahern  * 2. mtu from nexthop exception
2620901731b8SDavid Ahern  * 3. mtu from egress device
2621901731b8SDavid Ahern  *
2622901731b8SDavid Ahern  * based on ip6_dst_mtu_forward and exception logic of
2623901731b8SDavid Ahern  * rt6_find_cached_rt; called with rcu_read_lock
2624901731b8SDavid Ahern  */
2625901731b8SDavid Ahern u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2626901731b8SDavid Ahern 		      struct in6_addr *saddr)
2627901731b8SDavid Ahern {
2628901731b8SDavid Ahern 	struct rt6_exception_bucket *bucket;
2629901731b8SDavid Ahern 	struct rt6_exception *rt6_ex;
2630901731b8SDavid Ahern 	struct in6_addr *src_key;
2631901731b8SDavid Ahern 	struct inet6_dev *idev;
2632901731b8SDavid Ahern 	u32 mtu = 0;
2633901731b8SDavid Ahern 
2634901731b8SDavid Ahern 	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2635901731b8SDavid Ahern 		mtu = f6i->fib6_pmtu;
2636901731b8SDavid Ahern 		if (mtu)
2637901731b8SDavid Ahern 			goto out;
2638901731b8SDavid Ahern 	}
2639901731b8SDavid Ahern 
2640901731b8SDavid Ahern 	src_key = NULL;
2641901731b8SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
2642901731b8SDavid Ahern 	if (f6i->fib6_src.plen)
2643901731b8SDavid Ahern 		src_key = saddr;
2644901731b8SDavid Ahern #endif
2645901731b8SDavid Ahern 
2646901731b8SDavid Ahern 	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2647901731b8SDavid Ahern 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2648901731b8SDavid Ahern 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2649901731b8SDavid Ahern 		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2650901731b8SDavid Ahern 
2651901731b8SDavid Ahern 	if (likely(!mtu)) {
2652901731b8SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(f6i);
2653901731b8SDavid Ahern 
2654901731b8SDavid Ahern 		mtu = IPV6_MIN_MTU;
2655901731b8SDavid Ahern 		idev = __in6_dev_get(dev);
2656901731b8SDavid Ahern 		if (idev && idev->cnf.mtu6 > mtu)
2657901731b8SDavid Ahern 			mtu = idev->cnf.mtu6;
2658901731b8SDavid Ahern 	}
2659901731b8SDavid Ahern 
2660901731b8SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2661901731b8SDavid Ahern out:
2662901731b8SDavid Ahern 	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2663901731b8SDavid Ahern }
2664901731b8SDavid Ahern 
26653b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
266687a11578SDavid S. Miller 				  struct flowi6 *fl6)
26671da177e4SLinus Torvalds {
266887a11578SDavid S. Miller 	struct dst_entry *dst;
26691da177e4SLinus Torvalds 	struct rt6_info *rt;
26701da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
2671c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
26721da177e4SLinus Torvalds 
267338308473SDavid S. Miller 	if (unlikely(!idev))
2674122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
26751da177e4SLinus Torvalds 
2676ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
267738308473SDavid S. Miller 	if (unlikely(!rt)) {
26781da177e4SLinus Torvalds 		in6_dev_put(idev);
267987a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
26801da177e4SLinus Torvalds 		goto out;
26811da177e4SLinus Torvalds 	}
26821da177e4SLinus Torvalds 
26838e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
2684588753f1SBrendan McGrath 	rt->dst.input = ip6_input;
26858e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
2686550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
268787a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
26888e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
26898e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
269014edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
26911da177e4SLinus Torvalds 
26924c981e28SIdo Schimmel 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2693587fea74SWei Wang 	 * do proper release of the net_device
2694587fea74SWei Wang 	 */
2695587fea74SWei Wang 	rt6_uncached_list_add(rt);
269681eb8447SWei Wang 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
26971da177e4SLinus Torvalds 
269887a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
269987a11578SDavid S. Miller 
27001da177e4SLinus Torvalds out:
270187a11578SDavid S. Miller 	return dst;
27021da177e4SLinus Torvalds }
27031da177e4SLinus Torvalds 
2704569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
27051da177e4SLinus Torvalds {
270686393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
27077019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
27087019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
27097019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
27107019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
27117019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2712fc66f95cSEric Dumazet 	int entries;
27131da177e4SLinus Torvalds 
2714fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
271549a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2716fc66f95cSEric Dumazet 	    entries <= rt_max_size)
27171da177e4SLinus Torvalds 		goto out;
27181da177e4SLinus Torvalds 
27196891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
272014956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2721fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
2722fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
27237019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
27241da177e4SLinus Torvalds out:
27257019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2726fc66f95cSEric Dumazet 	return entries > rt_max_size;
27271da177e4SLinus Torvalds }
27281da177e4SLinus Torvalds 
27298c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net,
27308c14586fSDavid Ahern 					    struct fib6_config *cfg,
2731f4797b33SDavid Ahern 					    const struct in6_addr *gw_addr,
2732f4797b33SDavid Ahern 					    u32 tbid, int flags)
27338c14586fSDavid Ahern {
27348c14586fSDavid Ahern 	struct flowi6 fl6 = {
27358c14586fSDavid Ahern 		.flowi6_oif = cfg->fc_ifindex,
27368c14586fSDavid Ahern 		.daddr = *gw_addr,
27378c14586fSDavid Ahern 		.saddr = cfg->fc_prefsrc,
27388c14586fSDavid Ahern 	};
27398c14586fSDavid Ahern 	struct fib6_table *table;
27408c14586fSDavid Ahern 	struct rt6_info *rt;
27418c14586fSDavid Ahern 
2742f4797b33SDavid Ahern 	table = fib6_get_table(net, tbid);
27438c14586fSDavid Ahern 	if (!table)
27448c14586fSDavid Ahern 		return NULL;
27458c14586fSDavid Ahern 
27468c14586fSDavid Ahern 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
27478c14586fSDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
27488c14586fSDavid Ahern 
2749f4797b33SDavid Ahern 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2750b75cc8f9SDavid Ahern 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
27518c14586fSDavid Ahern 
27528c14586fSDavid Ahern 	/* if table lookup failed, fall back to full lookup */
27538c14586fSDavid Ahern 	if (rt == net->ipv6.ip6_null_entry) {
27548c14586fSDavid Ahern 		ip6_rt_put(rt);
27558c14586fSDavid Ahern 		rt = NULL;
27568c14586fSDavid Ahern 	}
27578c14586fSDavid Ahern 
27588c14586fSDavid Ahern 	return rt;
27598c14586fSDavid Ahern }
27608c14586fSDavid Ahern 
2761fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net,
2762fc1e64e1SDavid Ahern 				     struct fib6_config *cfg,
27639fbb704cSDavid Ahern 				     const struct net_device *dev,
2764fc1e64e1SDavid Ahern 				     struct netlink_ext_ack *extack)
2765fc1e64e1SDavid Ahern {
276644750f84SDavid Ahern 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2767fc1e64e1SDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2768fc1e64e1SDavid Ahern 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2769bf1dc8baSPaolo Abeni 	struct fib6_info *from;
2770fc1e64e1SDavid Ahern 	struct rt6_info *grt;
2771fc1e64e1SDavid Ahern 	int err;
2772fc1e64e1SDavid Ahern 
2773fc1e64e1SDavid Ahern 	err = 0;
2774fc1e64e1SDavid Ahern 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2775fc1e64e1SDavid Ahern 	if (grt) {
2776bf1dc8baSPaolo Abeni 		rcu_read_lock();
2777bf1dc8baSPaolo Abeni 		from = rcu_dereference(grt->from);
277858e354c0SDavid Ahern 		if (!grt->dst.error &&
27794ed591c8SDavid Ahern 		    /* ignore match if it is the default route */
2780bf1dc8baSPaolo Abeni 		    from && !ipv6_addr_any(&from->fib6_dst.addr) &&
278158e354c0SDavid Ahern 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
278244750f84SDavid Ahern 			NL_SET_ERR_MSG(extack,
278344750f84SDavid Ahern 				       "Nexthop has invalid gateway or device mismatch");
2784fc1e64e1SDavid Ahern 			err = -EINVAL;
2785fc1e64e1SDavid Ahern 		}
2786bf1dc8baSPaolo Abeni 		rcu_read_unlock();
2787fc1e64e1SDavid Ahern 
2788fc1e64e1SDavid Ahern 		ip6_rt_put(grt);
2789fc1e64e1SDavid Ahern 	}
2790fc1e64e1SDavid Ahern 
2791fc1e64e1SDavid Ahern 	return err;
2792fc1e64e1SDavid Ahern }
2793fc1e64e1SDavid Ahern 
27941edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net,
27951edce99fSDavid Ahern 			      struct fib6_config *cfg,
27961edce99fSDavid Ahern 			      struct net_device **_dev,
27971edce99fSDavid Ahern 			      struct inet6_dev **idev)
27981edce99fSDavid Ahern {
27991edce99fSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28001edce99fSDavid Ahern 	struct net_device *dev = _dev ? *_dev : NULL;
28011edce99fSDavid Ahern 	struct rt6_info *grt = NULL;
28021edce99fSDavid Ahern 	int err = -EHOSTUNREACH;
28031edce99fSDavid Ahern 
28041edce99fSDavid Ahern 	if (cfg->fc_table) {
2805f4797b33SDavid Ahern 		int flags = RT6_LOOKUP_F_IFACE;
2806f4797b33SDavid Ahern 
2807f4797b33SDavid Ahern 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2808f4797b33SDavid Ahern 					  cfg->fc_table, flags);
28091edce99fSDavid Ahern 		if (grt) {
28101edce99fSDavid Ahern 			if (grt->rt6i_flags & RTF_GATEWAY ||
28111edce99fSDavid Ahern 			    (dev && dev != grt->dst.dev)) {
28121edce99fSDavid Ahern 				ip6_rt_put(grt);
28131edce99fSDavid Ahern 				grt = NULL;
28141edce99fSDavid Ahern 			}
28151edce99fSDavid Ahern 		}
28161edce99fSDavid Ahern 	}
28171edce99fSDavid Ahern 
28181edce99fSDavid Ahern 	if (!grt)
2819b75cc8f9SDavid Ahern 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
28201edce99fSDavid Ahern 
28211edce99fSDavid Ahern 	if (!grt)
28221edce99fSDavid Ahern 		goto out;
28231edce99fSDavid Ahern 
28241edce99fSDavid Ahern 	if (dev) {
28251edce99fSDavid Ahern 		if (dev != grt->dst.dev) {
28261edce99fSDavid Ahern 			ip6_rt_put(grt);
28271edce99fSDavid Ahern 			goto out;
28281edce99fSDavid Ahern 		}
28291edce99fSDavid Ahern 	} else {
28301edce99fSDavid Ahern 		*_dev = dev = grt->dst.dev;
28311edce99fSDavid Ahern 		*idev = grt->rt6i_idev;
28321edce99fSDavid Ahern 		dev_hold(dev);
28331edce99fSDavid Ahern 		in6_dev_hold(grt->rt6i_idev);
28341edce99fSDavid Ahern 	}
28351edce99fSDavid Ahern 
28361edce99fSDavid Ahern 	if (!(grt->rt6i_flags & RTF_GATEWAY))
28371edce99fSDavid Ahern 		err = 0;
28381edce99fSDavid Ahern 
28391edce99fSDavid Ahern 	ip6_rt_put(grt);
28401edce99fSDavid Ahern 
28411edce99fSDavid Ahern out:
28421edce99fSDavid Ahern 	return err;
28431edce99fSDavid Ahern }
28441edce99fSDavid Ahern 
28459fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
28469fbb704cSDavid Ahern 			   struct net_device **_dev, struct inet6_dev **idev,
28479fbb704cSDavid Ahern 			   struct netlink_ext_ack *extack)
28489fbb704cSDavid Ahern {
28499fbb704cSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28509fbb704cSDavid Ahern 	int gwa_type = ipv6_addr_type(gw_addr);
2851232378e8SDavid Ahern 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
28529fbb704cSDavid Ahern 	const struct net_device *dev = *_dev;
2853232378e8SDavid Ahern 	bool need_addr_check = !dev;
28549fbb704cSDavid Ahern 	int err = -EINVAL;
28559fbb704cSDavid Ahern 
28569fbb704cSDavid Ahern 	/* if gw_addr is local we will fail to detect this in case
28579fbb704cSDavid Ahern 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
28589fbb704cSDavid Ahern 	 * will return already-added prefix route via interface that
28599fbb704cSDavid Ahern 	 * prefix route was assigned to, which might be non-loopback.
28609fbb704cSDavid Ahern 	 */
2861232378e8SDavid Ahern 	if (dev &&
2862232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2863232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
28649fbb704cSDavid Ahern 		goto out;
28659fbb704cSDavid Ahern 	}
28669fbb704cSDavid Ahern 
28679fbb704cSDavid Ahern 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
28689fbb704cSDavid Ahern 		/* IPv6 strictly inhibits using not link-local
28699fbb704cSDavid Ahern 		 * addresses as nexthop address.
28709fbb704cSDavid Ahern 		 * Otherwise, router will not able to send redirects.
28719fbb704cSDavid Ahern 		 * It is very good, but in some (rare!) circumstances
28729fbb704cSDavid Ahern 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
28739fbb704cSDavid Ahern 		 * some exceptions. --ANK
28749fbb704cSDavid Ahern 		 * We allow IPv4-mapped nexthops to support RFC4798-type
28759fbb704cSDavid Ahern 		 * addressing
28769fbb704cSDavid Ahern 		 */
28779fbb704cSDavid Ahern 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
28789fbb704cSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
28799fbb704cSDavid Ahern 			goto out;
28809fbb704cSDavid Ahern 		}
28819fbb704cSDavid Ahern 
28829fbb704cSDavid Ahern 		if (cfg->fc_flags & RTNH_F_ONLINK)
28839fbb704cSDavid Ahern 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
28849fbb704cSDavid Ahern 		else
28859fbb704cSDavid Ahern 			err = ip6_route_check_nh(net, cfg, _dev, idev);
28869fbb704cSDavid Ahern 
28879fbb704cSDavid Ahern 		if (err)
28889fbb704cSDavid Ahern 			goto out;
28899fbb704cSDavid Ahern 	}
28909fbb704cSDavid Ahern 
28919fbb704cSDavid Ahern 	/* reload in case device was changed */
28929fbb704cSDavid Ahern 	dev = *_dev;
28939fbb704cSDavid Ahern 
28949fbb704cSDavid Ahern 	err = -EINVAL;
28959fbb704cSDavid Ahern 	if (!dev) {
28969fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack, "Egress device not specified");
28979fbb704cSDavid Ahern 		goto out;
28989fbb704cSDavid Ahern 	} else if (dev->flags & IFF_LOOPBACK) {
28999fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack,
29009fbb704cSDavid Ahern 			       "Egress device can not be loopback device for this route");
29019fbb704cSDavid Ahern 		goto out;
29029fbb704cSDavid Ahern 	}
2903232378e8SDavid Ahern 
2904232378e8SDavid Ahern 	/* if we did not check gw_addr above, do so now that the
2905232378e8SDavid Ahern 	 * egress device has been resolved.
2906232378e8SDavid Ahern 	 */
2907232378e8SDavid Ahern 	if (need_addr_check &&
2908232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2909232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2910232378e8SDavid Ahern 		goto out;
2911232378e8SDavid Ahern 	}
2912232378e8SDavid Ahern 
29139fbb704cSDavid Ahern 	err = 0;
29149fbb704cSDavid Ahern out:
29159fbb704cSDavid Ahern 	return err;
29169fbb704cSDavid Ahern }
29179fbb704cSDavid Ahern 
291883c44251SDavid Ahern static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
291983c44251SDavid Ahern {
292083c44251SDavid Ahern 	if ((flags & RTF_REJECT) ||
292183c44251SDavid Ahern 	    (dev && (dev->flags & IFF_LOOPBACK) &&
292283c44251SDavid Ahern 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
292383c44251SDavid Ahern 	     !(flags & RTF_LOCAL)))
292483c44251SDavid Ahern 		return true;
292583c44251SDavid Ahern 
292683c44251SDavid Ahern 	return false;
292783c44251SDavid Ahern }
292883c44251SDavid Ahern 
292983c44251SDavid Ahern int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
293083c44251SDavid Ahern 		 struct fib6_config *cfg, gfp_t gfp_flags,
293183c44251SDavid Ahern 		 struct netlink_ext_ack *extack)
293283c44251SDavid Ahern {
293383c44251SDavid Ahern 	struct net_device *dev = NULL;
293483c44251SDavid Ahern 	struct inet6_dev *idev = NULL;
293583c44251SDavid Ahern 	int addr_type;
293683c44251SDavid Ahern 	int err;
293783c44251SDavid Ahern 
2938f1741730SDavid Ahern 	fib6_nh->fib_nh_family = AF_INET6;
2939f1741730SDavid Ahern 
294083c44251SDavid Ahern 	err = -ENODEV;
294183c44251SDavid Ahern 	if (cfg->fc_ifindex) {
294283c44251SDavid Ahern 		dev = dev_get_by_index(net, cfg->fc_ifindex);
294383c44251SDavid Ahern 		if (!dev)
294483c44251SDavid Ahern 			goto out;
294583c44251SDavid Ahern 		idev = in6_dev_get(dev);
294683c44251SDavid Ahern 		if (!idev)
294783c44251SDavid Ahern 			goto out;
294883c44251SDavid Ahern 	}
294983c44251SDavid Ahern 
295083c44251SDavid Ahern 	if (cfg->fc_flags & RTNH_F_ONLINK) {
295183c44251SDavid Ahern 		if (!dev) {
295283c44251SDavid Ahern 			NL_SET_ERR_MSG(extack,
295383c44251SDavid Ahern 				       "Nexthop device required for onlink");
295483c44251SDavid Ahern 			goto out;
295583c44251SDavid Ahern 		}
295683c44251SDavid Ahern 
295783c44251SDavid Ahern 		if (!(dev->flags & IFF_UP)) {
295883c44251SDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
295983c44251SDavid Ahern 			err = -ENETDOWN;
296083c44251SDavid Ahern 			goto out;
296183c44251SDavid Ahern 		}
296283c44251SDavid Ahern 
2963ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
296483c44251SDavid Ahern 	}
296583c44251SDavid Ahern 
2966ad1601aeSDavid Ahern 	fib6_nh->fib_nh_weight = 1;
296783c44251SDavid Ahern 
296883c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
296983c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
297083c44251SDavid Ahern 	 */
297183c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
297283c44251SDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
297383c44251SDavid Ahern 		/* hold loopback dev/idev if we haven't done so. */
297483c44251SDavid Ahern 		if (dev != net->loopback_dev) {
297583c44251SDavid Ahern 			if (dev) {
297683c44251SDavid Ahern 				dev_put(dev);
297783c44251SDavid Ahern 				in6_dev_put(idev);
297883c44251SDavid Ahern 			}
297983c44251SDavid Ahern 			dev = net->loopback_dev;
298083c44251SDavid Ahern 			dev_hold(dev);
298183c44251SDavid Ahern 			idev = in6_dev_get(dev);
298283c44251SDavid Ahern 			if (!idev) {
298383c44251SDavid Ahern 				err = -ENODEV;
298483c44251SDavid Ahern 				goto out;
298583c44251SDavid Ahern 			}
298683c44251SDavid Ahern 		}
298783c44251SDavid Ahern 		goto set_dev;
298883c44251SDavid Ahern 	}
298983c44251SDavid Ahern 
299083c44251SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY) {
299183c44251SDavid Ahern 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
299283c44251SDavid Ahern 		if (err)
299383c44251SDavid Ahern 			goto out;
299483c44251SDavid Ahern 
2995ad1601aeSDavid Ahern 		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
2996bdf00467SDavid Ahern 		fib6_nh->fib_nh_gw_family = AF_INET6;
299783c44251SDavid Ahern 	}
299883c44251SDavid Ahern 
299983c44251SDavid Ahern 	err = -ENODEV;
300083c44251SDavid Ahern 	if (!dev)
300183c44251SDavid Ahern 		goto out;
300283c44251SDavid Ahern 
300383c44251SDavid Ahern 	if (idev->cnf.disable_ipv6) {
300483c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
300583c44251SDavid Ahern 		err = -EACCES;
300683c44251SDavid Ahern 		goto out;
300783c44251SDavid Ahern 	}
300883c44251SDavid Ahern 
300983c44251SDavid Ahern 	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
301083c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
301183c44251SDavid Ahern 		err = -ENETDOWN;
301283c44251SDavid Ahern 		goto out;
301383c44251SDavid Ahern 	}
301483c44251SDavid Ahern 
301583c44251SDavid Ahern 	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
301683c44251SDavid Ahern 	    !netif_carrier_ok(dev))
3017ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
301883c44251SDavid Ahern 
3019979e276eSDavid Ahern 	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3020979e276eSDavid Ahern 				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3021979e276eSDavid Ahern 	if (err)
3022979e276eSDavid Ahern 		goto out;
302383c44251SDavid Ahern set_dev:
3024ad1601aeSDavid Ahern 	fib6_nh->fib_nh_dev = dev;
3025f1741730SDavid Ahern 	fib6_nh->fib_nh_oif = dev->ifindex;
302683c44251SDavid Ahern 	err = 0;
302783c44251SDavid Ahern out:
302883c44251SDavid Ahern 	if (idev)
302983c44251SDavid Ahern 		in6_dev_put(idev);
303083c44251SDavid Ahern 
303183c44251SDavid Ahern 	if (err) {
3032ad1601aeSDavid Ahern 		lwtstate_put(fib6_nh->fib_nh_lws);
3033ad1601aeSDavid Ahern 		fib6_nh->fib_nh_lws = NULL;
303483c44251SDavid Ahern 		if (dev)
303583c44251SDavid Ahern 			dev_put(dev);
303683c44251SDavid Ahern 	}
303783c44251SDavid Ahern 
303883c44251SDavid Ahern 	return err;
303983c44251SDavid Ahern }
304083c44251SDavid Ahern 
3041dac7d0f2SDavid Ahern void fib6_nh_release(struct fib6_nh *fib6_nh)
3042dac7d0f2SDavid Ahern {
3043979e276eSDavid Ahern 	fib_nh_common_release(&fib6_nh->nh_common);
3044dac7d0f2SDavid Ahern }
3045dac7d0f2SDavid Ahern 
30468d1c802bSDavid Ahern static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3047acb54e3cSDavid Ahern 					      gfp_t gfp_flags,
3048333c4301SDavid Ahern 					      struct netlink_ext_ack *extack)
30491da177e4SLinus Torvalds {
30505578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
30518d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3052c71099acSThomas Graf 	struct fib6_table *table;
30538c5b83f0SRoopa Prabhu 	int err = -EINVAL;
305483c44251SDavid Ahern 	int addr_type;
30551da177e4SLinus Torvalds 
3056557c44beSDavid Ahern 	/* RTF_PCPU is an internal flag; can not be set by userspace */
3057d5d531cbSDavid Ahern 	if (cfg->fc_flags & RTF_PCPU) {
3058d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3059557c44beSDavid Ahern 		goto out;
3060d5d531cbSDavid Ahern 	}
3061557c44beSDavid Ahern 
30622ea2352eSWei Wang 	/* RTF_CACHE is an internal flag; can not be set by userspace */
30632ea2352eSWei Wang 	if (cfg->fc_flags & RTF_CACHE) {
30642ea2352eSWei Wang 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
30652ea2352eSWei Wang 		goto out;
30662ea2352eSWei Wang 	}
30672ea2352eSWei Wang 
3068e8478e80SDavid Ahern 	if (cfg->fc_type > RTN_MAX) {
3069e8478e80SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid route type");
3070e8478e80SDavid Ahern 		goto out;
3071e8478e80SDavid Ahern 	}
3072e8478e80SDavid Ahern 
3073d5d531cbSDavid Ahern 	if (cfg->fc_dst_len > 128) {
3074d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
30758c5b83f0SRoopa Prabhu 		goto out;
3076d5d531cbSDavid Ahern 	}
3077d5d531cbSDavid Ahern 	if (cfg->fc_src_len > 128) {
3078d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid source address length");
3079d5d531cbSDavid Ahern 		goto out;
3080d5d531cbSDavid Ahern 	}
30811da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
3082d5d531cbSDavid Ahern 	if (cfg->fc_src_len) {
3083d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack,
3084d5d531cbSDavid Ahern 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
30858c5b83f0SRoopa Prabhu 		goto out;
3086d5d531cbSDavid Ahern 	}
30871da177e4SLinus Torvalds #endif
3088fc1e64e1SDavid Ahern 
3089c71099acSThomas Graf 	err = -ENOBUFS;
309038308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
3091d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3092d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
309338308473SDavid S. Miller 		if (!table) {
3094f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3095d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
3096d71314b4SMatti Vaittinen 		}
3097d71314b4SMatti Vaittinen 	} else {
3098d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
3099d71314b4SMatti Vaittinen 	}
310038308473SDavid S. Miller 
310138308473SDavid S. Miller 	if (!table)
3102c71099acSThomas Graf 		goto out;
3103c71099acSThomas Graf 
31041da177e4SLinus Torvalds 	err = -ENOMEM;
310593531c67SDavid Ahern 	rt = fib6_info_alloc(gfp_flags);
310693531c67SDavid Ahern 	if (!rt)
31071da177e4SLinus Torvalds 		goto out;
310893531c67SDavid Ahern 
3109d7e774f3SDavid Ahern 	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3110d7e774f3SDavid Ahern 					       extack);
3111767a2217SDavid Ahern 	if (IS_ERR(rt->fib6_metrics)) {
3112767a2217SDavid Ahern 		err = PTR_ERR(rt->fib6_metrics);
3113fda21d46SEric Dumazet 		/* Do not leave garbage there. */
3114fda21d46SEric Dumazet 		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3115767a2217SDavid Ahern 		goto out;
3116767a2217SDavid Ahern 	}
3117767a2217SDavid Ahern 
311893531c67SDavid Ahern 	if (cfg->fc_flags & RTF_ADDRCONF)
311993531c67SDavid Ahern 		rt->dst_nocount = true;
31201da177e4SLinus Torvalds 
31211716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
312214895687SDavid Ahern 		fib6_set_expires(rt, jiffies +
31231716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
31241716a961SGao feng 	else
312514895687SDavid Ahern 		fib6_clean_expires(rt);
31261da177e4SLinus Torvalds 
312786872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
312886872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
312993c2fb25SDavid Ahern 	rt->fib6_protocol = cfg->fc_protocol;
313086872cb5SThomas Graf 
313183c44251SDavid Ahern 	rt->fib6_table = table;
313283c44251SDavid Ahern 	rt->fib6_metric = cfg->fc_metric;
313383c44251SDavid Ahern 	rt->fib6_type = cfg->fc_type;
31342b2450caSDavid Ahern 	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
313519e42e45SRoopa Prabhu 
313693c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
313793c2fb25SDavid Ahern 	rt->fib6_dst.plen = cfg->fc_dst_len;
313893c2fb25SDavid Ahern 	if (rt->fib6_dst.plen == 128)
31393b6761d1SDavid Ahern 		rt->dst_host = true;
31401da177e4SLinus Torvalds 
31411da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
314293c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
314393c2fb25SDavid Ahern 	rt->fib6_src.plen = cfg->fc_src_len;
31441da177e4SLinus Torvalds #endif
314583c44251SDavid Ahern 	err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
31461da177e4SLinus Torvalds 	if (err)
31471da177e4SLinus Torvalds 		goto out;
31489fbb704cSDavid Ahern 
314983c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
315083c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
315183c44251SDavid Ahern 	 */
315283c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
3153ad1601aeSDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
315483c44251SDavid Ahern 		rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3155955ec4cbSDavid Ahern 
3156c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
315783c44251SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
315883c44251SDavid Ahern 
3159c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3160d5d531cbSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid source address");
3161c3968a85SDaniel Walter 			err = -EINVAL;
3162c3968a85SDaniel Walter 			goto out;
3163c3968a85SDaniel Walter 		}
316493c2fb25SDavid Ahern 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
316593c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 128;
3166c3968a85SDaniel Walter 	} else
316793c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
3168c3968a85SDaniel Walter 
31698c5b83f0SRoopa Prabhu 	return rt;
31701da177e4SLinus Torvalds out:
317193531c67SDavid Ahern 	fib6_info_release(rt);
31728c5b83f0SRoopa Prabhu 	return ERR_PTR(err);
31736b9ea5a6SRoopa Prabhu }
31746b9ea5a6SRoopa Prabhu 
3175acb54e3cSDavid Ahern int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3176333c4301SDavid Ahern 		  struct netlink_ext_ack *extack)
31776b9ea5a6SRoopa Prabhu {
31788d1c802bSDavid Ahern 	struct fib6_info *rt;
31796b9ea5a6SRoopa Prabhu 	int err;
31806b9ea5a6SRoopa Prabhu 
3181acb54e3cSDavid Ahern 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3182d4ead6b3SDavid Ahern 	if (IS_ERR(rt))
3183d4ead6b3SDavid Ahern 		return PTR_ERR(rt);
31846b9ea5a6SRoopa Prabhu 
3185d4ead6b3SDavid Ahern 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
318693531c67SDavid Ahern 	fib6_info_release(rt);
31876b9ea5a6SRoopa Prabhu 
31881da177e4SLinus Torvalds 	return err;
31891da177e4SLinus Torvalds }
31901da177e4SLinus Torvalds 
31918d1c802bSDavid Ahern static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
31921da177e4SLinus Torvalds {
3193afb1d4b5SDavid Ahern 	struct net *net = info->nl_net;
3194c71099acSThomas Graf 	struct fib6_table *table;
3195afb1d4b5SDavid Ahern 	int err;
31961da177e4SLinus Torvalds 
3197421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
31986825a26cSGao feng 		err = -ENOENT;
31996825a26cSGao feng 		goto out;
32006825a26cSGao feng 	}
32016c813a72SPatrick McHardy 
320293c2fb25SDavid Ahern 	table = rt->fib6_table;
320366f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
320486872cb5SThomas Graf 	err = fib6_del(rt, info);
320566f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
32061da177e4SLinus Torvalds 
32076825a26cSGao feng out:
320893531c67SDavid Ahern 	fib6_info_release(rt);
32091da177e4SLinus Torvalds 	return err;
32101da177e4SLinus Torvalds }
32111da177e4SLinus Torvalds 
32128d1c802bSDavid Ahern int ip6_del_rt(struct net *net, struct fib6_info *rt)
3213e0a1ad73SThomas Graf {
3214afb1d4b5SDavid Ahern 	struct nl_info info = { .nl_net = net };
3215afb1d4b5SDavid Ahern 
3216528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
3217e0a1ad73SThomas Graf }
3218e0a1ad73SThomas Graf 
32198d1c802bSDavid Ahern static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
32200ae81335SDavid Ahern {
32210ae81335SDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
3222e3330039SWANG Cong 	struct net *net = info->nl_net;
322316a16cd3SDavid Ahern 	struct sk_buff *skb = NULL;
32240ae81335SDavid Ahern 	struct fib6_table *table;
3225e3330039SWANG Cong 	int err = -ENOENT;
32260ae81335SDavid Ahern 
3227421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
3228e3330039SWANG Cong 		goto out_put;
322993c2fb25SDavid Ahern 	table = rt->fib6_table;
323066f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
32310ae81335SDavid Ahern 
323293c2fb25SDavid Ahern 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
32338d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
32340ae81335SDavid Ahern 
323516a16cd3SDavid Ahern 		/* prefer to send a single notification with all hops */
323616a16cd3SDavid Ahern 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
323716a16cd3SDavid Ahern 		if (skb) {
323816a16cd3SDavid Ahern 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
323916a16cd3SDavid Ahern 
3240d4ead6b3SDavid Ahern 			if (rt6_fill_node(net, skb, rt, NULL,
324116a16cd3SDavid Ahern 					  NULL, NULL, 0, RTM_DELROUTE,
324216a16cd3SDavid Ahern 					  info->portid, seq, 0) < 0) {
324316a16cd3SDavid Ahern 				kfree_skb(skb);
324416a16cd3SDavid Ahern 				skb = NULL;
324516a16cd3SDavid Ahern 			} else
324616a16cd3SDavid Ahern 				info->skip_notify = 1;
324716a16cd3SDavid Ahern 		}
324816a16cd3SDavid Ahern 
32490ae81335SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
325093c2fb25SDavid Ahern 					 &rt->fib6_siblings,
325193c2fb25SDavid Ahern 					 fib6_siblings) {
32520ae81335SDavid Ahern 			err = fib6_del(sibling, info);
32530ae81335SDavid Ahern 			if (err)
3254e3330039SWANG Cong 				goto out_unlock;
32550ae81335SDavid Ahern 		}
32560ae81335SDavid Ahern 	}
32570ae81335SDavid Ahern 
32580ae81335SDavid Ahern 	err = fib6_del(rt, info);
3259e3330039SWANG Cong out_unlock:
326066f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
3261e3330039SWANG Cong out_put:
326293531c67SDavid Ahern 	fib6_info_release(rt);
326316a16cd3SDavid Ahern 
326416a16cd3SDavid Ahern 	if (skb) {
3265e3330039SWANG Cong 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
326616a16cd3SDavid Ahern 			    info->nlh, gfp_any());
326716a16cd3SDavid Ahern 	}
32680ae81335SDavid Ahern 	return err;
32690ae81335SDavid Ahern }
32700ae81335SDavid Ahern 
327123fb93a4SDavid Ahern static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
327223fb93a4SDavid Ahern {
327323fb93a4SDavid Ahern 	int rc = -ESRCH;
327423fb93a4SDavid Ahern 
327523fb93a4SDavid Ahern 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
327623fb93a4SDavid Ahern 		goto out;
327723fb93a4SDavid Ahern 
327823fb93a4SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY &&
327923fb93a4SDavid Ahern 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
328023fb93a4SDavid Ahern 		goto out;
3281761f6026SXin Long 
328223fb93a4SDavid Ahern 	rc = rt6_remove_exception_rt(rt);
328323fb93a4SDavid Ahern out:
328423fb93a4SDavid Ahern 	return rc;
328523fb93a4SDavid Ahern }
328623fb93a4SDavid Ahern 
3287333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg,
3288333c4301SDavid Ahern 			 struct netlink_ext_ack *extack)
32891da177e4SLinus Torvalds {
32908d1c802bSDavid Ahern 	struct rt6_info *rt_cache;
3291c71099acSThomas Graf 	struct fib6_table *table;
32928d1c802bSDavid Ahern 	struct fib6_info *rt;
32931da177e4SLinus Torvalds 	struct fib6_node *fn;
32941da177e4SLinus Torvalds 	int err = -ESRCH;
32951da177e4SLinus Torvalds 
32965578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3297d5d531cbSDavid Ahern 	if (!table) {
3298d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3299c71099acSThomas Graf 		return err;
3300d5d531cbSDavid Ahern 	}
33011da177e4SLinus Torvalds 
330266f5d6ceSWei Wang 	rcu_read_lock();
3303c71099acSThomas Graf 
3304c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
330586872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
330638fbeeeeSWei Wang 			 &cfg->fc_src, cfg->fc_src_len,
33072b760fcfSWei Wang 			 !(cfg->fc_flags & RTF_CACHE));
33081da177e4SLinus Torvalds 
33091da177e4SLinus Torvalds 	if (fn) {
331066f5d6ceSWei Wang 		for_each_fib6_node_rt_rcu(fn) {
3311ad1601aeSDavid Ahern 			struct fib6_nh *nh;
3312ad1601aeSDavid Ahern 
33132b760fcfSWei Wang 			if (cfg->fc_flags & RTF_CACHE) {
331423fb93a4SDavid Ahern 				int rc;
331523fb93a4SDavid Ahern 
33162b760fcfSWei Wang 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
33172b760fcfSWei Wang 							      &cfg->fc_src);
331823fb93a4SDavid Ahern 				if (rt_cache) {
331923fb93a4SDavid Ahern 					rc = ip6_del_cached_rt(rt_cache, cfg);
33209e575010SEric Dumazet 					if (rc != -ESRCH) {
33219e575010SEric Dumazet 						rcu_read_unlock();
332223fb93a4SDavid Ahern 						return rc;
332323fb93a4SDavid Ahern 					}
33249e575010SEric Dumazet 				}
33251f56a01fSMartin KaFai Lau 				continue;
33262b760fcfSWei Wang 			}
3327ad1601aeSDavid Ahern 
3328ad1601aeSDavid Ahern 			nh = &rt->fib6_nh;
332986872cb5SThomas Graf 			if (cfg->fc_ifindex &&
3330ad1601aeSDavid Ahern 			    (!nh->fib_nh_dev ||
3331ad1601aeSDavid Ahern 			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
33321da177e4SLinus Torvalds 				continue;
333386872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
3334ad1601aeSDavid Ahern 			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
33351da177e4SLinus Torvalds 				continue;
333693c2fb25SDavid Ahern 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
33371da177e4SLinus Torvalds 				continue;
333893c2fb25SDavid Ahern 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3339c2ed1880SMantas M 				continue;
3340e873e4b9SWei Wang 			if (!fib6_info_hold_safe(rt))
3341e873e4b9SWei Wang 				continue;
334266f5d6ceSWei Wang 			rcu_read_unlock();
33431da177e4SLinus Torvalds 
33440ae81335SDavid Ahern 			/* if gateway was specified only delete the one hop */
33450ae81335SDavid Ahern 			if (cfg->fc_flags & RTF_GATEWAY)
334686872cb5SThomas Graf 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
33470ae81335SDavid Ahern 
33480ae81335SDavid Ahern 			return __ip6_del_rt_siblings(rt, cfg);
33491da177e4SLinus Torvalds 		}
33501da177e4SLinus Torvalds 	}
335166f5d6ceSWei Wang 	rcu_read_unlock();
33521da177e4SLinus Torvalds 
33531da177e4SLinus Torvalds 	return err;
33541da177e4SLinus Torvalds }
33551da177e4SLinus Torvalds 
33566700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3357a6279458SYOSHIFUJI Hideaki {
3358a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
3359e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
3360e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
3361e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
3362e8599ff4SDavid S. Miller 	struct neighbour *neigh;
3363a68886a6SDavid Ahern 	struct fib6_info *from;
336471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
33656e157b6aSDavid S. Miller 	int optlen, on_link;
33666e157b6aSDavid S. Miller 	u8 *lladdr;
3367e8599ff4SDavid S. Miller 
336829a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
336971bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
3370e8599ff4SDavid S. Miller 
3371e8599ff4SDavid S. Miller 	if (optlen < 0) {
33726e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3373e8599ff4SDavid S. Miller 		return;
3374e8599ff4SDavid S. Miller 	}
3375e8599ff4SDavid S. Miller 
337671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
3377e8599ff4SDavid S. Miller 
337871bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
33796e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3380e8599ff4SDavid S. Miller 		return;
3381e8599ff4SDavid S. Miller 	}
3382e8599ff4SDavid S. Miller 
33836e157b6aSDavid S. Miller 	on_link = 0;
338471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3385e8599ff4SDavid S. Miller 		on_link = 1;
338671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
3387e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
33886e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3389e8599ff4SDavid S. Miller 		return;
3390e8599ff4SDavid S. Miller 	}
3391e8599ff4SDavid S. Miller 
3392e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
3393e8599ff4SDavid S. Miller 	if (!in6_dev)
3394e8599ff4SDavid S. Miller 		return;
3395e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3396e8599ff4SDavid S. Miller 		return;
3397e8599ff4SDavid S. Miller 
3398e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
3399e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
3400e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
3401e8599ff4SDavid S. Miller 	 */
3402e8599ff4SDavid S. Miller 
3403f997c55cSAlexander Aring 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3404e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3405e8599ff4SDavid S. Miller 		return;
3406e8599ff4SDavid S. Miller 	}
34076e157b6aSDavid S. Miller 
34086e157b6aSDavid S. Miller 	lladdr = NULL;
3409e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
3410e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3411e8599ff4SDavid S. Miller 					     skb->dev);
3412e8599ff4SDavid S. Miller 		if (!lladdr) {
3413e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3414e8599ff4SDavid S. Miller 			return;
3415e8599ff4SDavid S. Miller 		}
3416e8599ff4SDavid S. Miller 	}
3417e8599ff4SDavid S. Miller 
34186e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
3419ec13ad1dSMatthias Schiffer 	if (rt->rt6i_flags & RTF_REJECT) {
34206e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
34216e157b6aSDavid S. Miller 		return;
34226e157b6aSDavid S. Miller 	}
34236e157b6aSDavid S. Miller 
34246e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
34256e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
34266e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
34276e157b6aSDavid S. Miller 	 */
34280dec879fSJulian Anastasov 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
34296e157b6aSDavid S. Miller 
343071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3431e8599ff4SDavid S. Miller 	if (!neigh)
3432e8599ff4SDavid S. Miller 		return;
3433e8599ff4SDavid S. Miller 
34341da177e4SLinus Torvalds 	/*
34351da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
34361da177e4SLinus Torvalds 	 */
34371da177e4SLinus Torvalds 
3438f997c55cSAlexander Aring 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
34391da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
34401da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
34411da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3442f997c55cSAlexander Aring 				     NEIGH_UPDATE_F_ISROUTER)),
3443f997c55cSAlexander Aring 		     NDISC_REDIRECT, &ndopts);
34441da177e4SLinus Torvalds 
34454d85cd0cSDavid Ahern 	rcu_read_lock();
3446a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
3447e873e4b9SWei Wang 	/* This fib6_info_hold() is safe here because we hold reference to rt
3448e873e4b9SWei Wang 	 * and rt already holds reference to fib6_info.
3449e873e4b9SWei Wang 	 */
34508a14e46fSDavid Ahern 	fib6_info_hold(from);
34514d85cd0cSDavid Ahern 	rcu_read_unlock();
34528a14e46fSDavid Ahern 
34538a14e46fSDavid Ahern 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
345438308473SDavid S. Miller 	if (!nrt)
34551da177e4SLinus Torvalds 		goto out;
34561da177e4SLinus Torvalds 
34571da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
34581da177e4SLinus Torvalds 	if (on_link)
34591da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
34601da177e4SLinus Torvalds 
34614e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
34621da177e4SLinus Torvalds 
34632b760fcfSWei Wang 	/* No need to remove rt from the exception table if rt is
34642b760fcfSWei Wang 	 * a cached route because rt6_insert_exception() will
34652b760fcfSWei Wang 	 * takes care of it
34662b760fcfSWei Wang 	 */
34678a14e46fSDavid Ahern 	if (rt6_insert_exception(nrt, from)) {
34682b760fcfSWei Wang 		dst_release_immediate(&nrt->dst);
34692b760fcfSWei Wang 		goto out;
34702b760fcfSWei Wang 	}
34711da177e4SLinus Torvalds 
3472d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
3473d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
347471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
347560592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
34768d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
34778d71740cSTom Tucker 
34781da177e4SLinus Torvalds out:
34798a14e46fSDavid Ahern 	fib6_info_release(from);
3480e8599ff4SDavid S. Miller 	neigh_release(neigh);
34816e157b6aSDavid S. Miller }
34826e157b6aSDavid S. Miller 
348370ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
34848d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
3485b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3486830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3487830218c1SDavid Ahern 					   struct net_device *dev)
348870ceb4f5SYOSHIFUJI Hideaki {
3489830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3490830218c1SDavid Ahern 	int ifindex = dev->ifindex;
349170ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
34928d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3493c71099acSThomas Graf 	struct fib6_table *table;
349470ceb4f5SYOSHIFUJI Hideaki 
3495830218c1SDavid Ahern 	table = fib6_get_table(net, tb_id);
349638308473SDavid S. Miller 	if (!table)
3497c71099acSThomas Graf 		return NULL;
3498c71099acSThomas Graf 
349966f5d6ceSWei Wang 	rcu_read_lock();
350038fbeeeeSWei Wang 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
350170ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
350270ceb4f5SYOSHIFUJI Hideaki 		goto out;
350370ceb4f5SYOSHIFUJI Hideaki 
350466f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
3505ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
350670ceb4f5SYOSHIFUJI Hideaki 			continue;
35072b2450caSDavid Ahern 		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
3508bdf00467SDavid Ahern 		    !rt->fib6_nh.fib_nh_gw_family)
350970ceb4f5SYOSHIFUJI Hideaki 			continue;
3510ad1601aeSDavid Ahern 		if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
351170ceb4f5SYOSHIFUJI Hideaki 			continue;
3512e873e4b9SWei Wang 		if (!fib6_info_hold_safe(rt))
3513e873e4b9SWei Wang 			continue;
351470ceb4f5SYOSHIFUJI Hideaki 		break;
351570ceb4f5SYOSHIFUJI Hideaki 	}
351670ceb4f5SYOSHIFUJI Hideaki out:
351766f5d6ceSWei Wang 	rcu_read_unlock();
351870ceb4f5SYOSHIFUJI Hideaki 	return rt;
351970ceb4f5SYOSHIFUJI Hideaki }
352070ceb4f5SYOSHIFUJI Hideaki 
35218d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
3522b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3523830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3524830218c1SDavid Ahern 					   struct net_device *dev,
352595c96174SEric Dumazet 					   unsigned int pref)
352670ceb4f5SYOSHIFUJI Hideaki {
352786872cb5SThomas Graf 	struct fib6_config cfg = {
3528238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
3529830218c1SDavid Ahern 		.fc_ifindex	= dev->ifindex,
353086872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
353186872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
353286872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
3533b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3534e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
353515e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
3536efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3537efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
353886872cb5SThomas Graf 	};
353970ceb4f5SYOSHIFUJI Hideaki 
3540830218c1SDavid Ahern 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
35414e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
35424e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
354386872cb5SThomas Graf 
3544e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
3545e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
354686872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
354770ceb4f5SYOSHIFUJI Hideaki 
3548acb54e3cSDavid Ahern 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
354970ceb4f5SYOSHIFUJI Hideaki 
3550830218c1SDavid Ahern 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
355170ceb4f5SYOSHIFUJI Hideaki }
355270ceb4f5SYOSHIFUJI Hideaki #endif
355370ceb4f5SYOSHIFUJI Hideaki 
35548d1c802bSDavid Ahern struct fib6_info *rt6_get_dflt_router(struct net *net,
3555afb1d4b5SDavid Ahern 				     const struct in6_addr *addr,
3556afb1d4b5SDavid Ahern 				     struct net_device *dev)
35571da177e4SLinus Torvalds {
3558830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
35598d1c802bSDavid Ahern 	struct fib6_info *rt;
3560c71099acSThomas Graf 	struct fib6_table *table;
35611da177e4SLinus Torvalds 
3562afb1d4b5SDavid Ahern 	table = fib6_get_table(net, tb_id);
356338308473SDavid S. Miller 	if (!table)
3564c71099acSThomas Graf 		return NULL;
35651da177e4SLinus Torvalds 
356666f5d6ceSWei Wang 	rcu_read_lock();
356766f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3568ad1601aeSDavid Ahern 		struct fib6_nh *nh = &rt->fib6_nh;
3569ad1601aeSDavid Ahern 
3570ad1601aeSDavid Ahern 		if (dev == nh->fib_nh_dev &&
357193c2fb25SDavid Ahern 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3572ad1601aeSDavid Ahern 		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
35731da177e4SLinus Torvalds 			break;
35741da177e4SLinus Torvalds 	}
3575e873e4b9SWei Wang 	if (rt && !fib6_info_hold_safe(rt))
3576e873e4b9SWei Wang 		rt = NULL;
357766f5d6ceSWei Wang 	rcu_read_unlock();
35781da177e4SLinus Torvalds 	return rt;
35791da177e4SLinus Torvalds }
35801da177e4SLinus Torvalds 
35818d1c802bSDavid Ahern struct fib6_info *rt6_add_dflt_router(struct net *net,
3582afb1d4b5SDavid Ahern 				     const struct in6_addr *gwaddr,
3583ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
3584ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
35851da177e4SLinus Torvalds {
358686872cb5SThomas Graf 	struct fib6_config cfg = {
3587ca254490SDavid Ahern 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3588238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
358986872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
359086872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
359186872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3592b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3593e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
359415e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
35955578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3596afb1d4b5SDavid Ahern 		.fc_nlinfo.nl_net = net,
359786872cb5SThomas Graf 	};
35981da177e4SLinus Torvalds 
35994e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
36001da177e4SLinus Torvalds 
3601acb54e3cSDavid Ahern 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3602830218c1SDavid Ahern 		struct fib6_table *table;
3603830218c1SDavid Ahern 
3604830218c1SDavid Ahern 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3605830218c1SDavid Ahern 		if (table)
3606830218c1SDavid Ahern 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3607830218c1SDavid Ahern 	}
36081da177e4SLinus Torvalds 
3609afb1d4b5SDavid Ahern 	return rt6_get_dflt_router(net, gwaddr, dev);
36101da177e4SLinus Torvalds }
36111da177e4SLinus Torvalds 
3612afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net,
3613afb1d4b5SDavid Ahern 				     struct fib6_table *table)
36141da177e4SLinus Torvalds {
36158d1c802bSDavid Ahern 	struct fib6_info *rt;
36161da177e4SLinus Torvalds 
36171da177e4SLinus Torvalds restart:
361866f5d6ceSWei Wang 	rcu_read_lock();
361966f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3620dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
3621dcd1f572SDavid Ahern 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3622dcd1f572SDavid Ahern 
362393c2fb25SDavid Ahern 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3624e873e4b9SWei Wang 		    (!idev || idev->cnf.accept_ra != 2) &&
3625e873e4b9SWei Wang 		    fib6_info_hold_safe(rt)) {
362666f5d6ceSWei Wang 			rcu_read_unlock();
3627afb1d4b5SDavid Ahern 			ip6_del_rt(net, rt);
36281da177e4SLinus Torvalds 			goto restart;
36291da177e4SLinus Torvalds 		}
36301da177e4SLinus Torvalds 	}
363166f5d6ceSWei Wang 	rcu_read_unlock();
3632830218c1SDavid Ahern 
3633830218c1SDavid Ahern 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3634830218c1SDavid Ahern }
3635830218c1SDavid Ahern 
3636830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net)
3637830218c1SDavid Ahern {
3638830218c1SDavid Ahern 	struct fib6_table *table;
3639830218c1SDavid Ahern 	struct hlist_head *head;
3640830218c1SDavid Ahern 	unsigned int h;
3641830218c1SDavid Ahern 
3642830218c1SDavid Ahern 	rcu_read_lock();
3643830218c1SDavid Ahern 
3644830218c1SDavid Ahern 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3645830218c1SDavid Ahern 		head = &net->ipv6.fib_table_hash[h];
3646830218c1SDavid Ahern 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3647830218c1SDavid Ahern 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3648afb1d4b5SDavid Ahern 				__rt6_purge_dflt_routers(net, table);
3649830218c1SDavid Ahern 		}
3650830218c1SDavid Ahern 	}
3651830218c1SDavid Ahern 
3652830218c1SDavid Ahern 	rcu_read_unlock();
36531da177e4SLinus Torvalds }
36541da177e4SLinus Torvalds 
36555578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
36565578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
365786872cb5SThomas Graf 				 struct fib6_config *cfg)
365886872cb5SThomas Graf {
36598823a3acSMaciej Żenczykowski 	*cfg = (struct fib6_config){
36608823a3acSMaciej Żenczykowski 		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
36618823a3acSMaciej Żenczykowski 			 : RT6_TABLE_MAIN,
36628823a3acSMaciej Żenczykowski 		.fc_ifindex = rtmsg->rtmsg_ifindex,
366367f69513SDavid Ahern 		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
36648823a3acSMaciej Żenczykowski 		.fc_expires = rtmsg->rtmsg_info,
36658823a3acSMaciej Żenczykowski 		.fc_dst_len = rtmsg->rtmsg_dst_len,
36668823a3acSMaciej Żenczykowski 		.fc_src_len = rtmsg->rtmsg_src_len,
36678823a3acSMaciej Żenczykowski 		.fc_flags = rtmsg->rtmsg_flags,
36688823a3acSMaciej Żenczykowski 		.fc_type = rtmsg->rtmsg_type,
366986872cb5SThomas Graf 
36708823a3acSMaciej Żenczykowski 		.fc_nlinfo.nl_net = net,
367186872cb5SThomas Graf 
36728823a3acSMaciej Żenczykowski 		.fc_dst = rtmsg->rtmsg_dst,
36738823a3acSMaciej Żenczykowski 		.fc_src = rtmsg->rtmsg_src,
36748823a3acSMaciej Żenczykowski 		.fc_gateway = rtmsg->rtmsg_gateway,
36758823a3acSMaciej Żenczykowski 	};
367686872cb5SThomas Graf }
367786872cb5SThomas Graf 
36785578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
36791da177e4SLinus Torvalds {
368086872cb5SThomas Graf 	struct fib6_config cfg;
36811da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
36821da177e4SLinus Torvalds 	int err;
36831da177e4SLinus Torvalds 
36841da177e4SLinus Torvalds 	switch (cmd) {
36851da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
36861da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
3687af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
36881da177e4SLinus Torvalds 			return -EPERM;
36891da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
36901da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
36911da177e4SLinus Torvalds 		if (err)
36921da177e4SLinus Torvalds 			return -EFAULT;
36931da177e4SLinus Torvalds 
36945578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
369586872cb5SThomas Graf 
36961da177e4SLinus Torvalds 		rtnl_lock();
36971da177e4SLinus Torvalds 		switch (cmd) {
36981da177e4SLinus Torvalds 		case SIOCADDRT:
3699acb54e3cSDavid Ahern 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
37001da177e4SLinus Torvalds 			break;
37011da177e4SLinus Torvalds 		case SIOCDELRT:
3702333c4301SDavid Ahern 			err = ip6_route_del(&cfg, NULL);
37031da177e4SLinus Torvalds 			break;
37041da177e4SLinus Torvalds 		default:
37051da177e4SLinus Torvalds 			err = -EINVAL;
37061da177e4SLinus Torvalds 		}
37071da177e4SLinus Torvalds 		rtnl_unlock();
37081da177e4SLinus Torvalds 
37091da177e4SLinus Torvalds 		return err;
37103ff50b79SStephen Hemminger 	}
37111da177e4SLinus Torvalds 
37121da177e4SLinus Torvalds 	return -EINVAL;
37131da177e4SLinus Torvalds }
37141da177e4SLinus Torvalds 
37151da177e4SLinus Torvalds /*
37161da177e4SLinus Torvalds  *	Drop the packet on the floor
37171da177e4SLinus Torvalds  */
37181da177e4SLinus Torvalds 
3719d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
37201da177e4SLinus Torvalds {
3721612f09e8SYOSHIFUJI Hideaki 	int type;
3722adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3723612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
3724612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
37250660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
372645bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
3727bdb7cc64SStephen Suryaputra 			IP6_INC_STATS(dev_net(dst->dev),
3728bdb7cc64SStephen Suryaputra 				      __in6_dev_get_safely(skb->dev),
37293bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
3730612f09e8SYOSHIFUJI Hideaki 			break;
3731612f09e8SYOSHIFUJI Hideaki 		}
3732612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
3733612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
37343bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
37353bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
3736612f09e8SYOSHIFUJI Hideaki 		break;
3737612f09e8SYOSHIFUJI Hideaki 	}
37383ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
37391da177e4SLinus Torvalds 	kfree_skb(skb);
37401da177e4SLinus Torvalds 	return 0;
37411da177e4SLinus Torvalds }
37421da177e4SLinus Torvalds 
37439ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
37449ce8ade0SThomas Graf {
3745612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
37469ce8ade0SThomas Graf }
37479ce8ade0SThomas Graf 
3748ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
37491da177e4SLinus Torvalds {
3750adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3751612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
37521da177e4SLinus Torvalds }
37531da177e4SLinus Torvalds 
37549ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
37559ce8ade0SThomas Graf {
3756612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
37579ce8ade0SThomas Graf }
37589ce8ade0SThomas Graf 
3759ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
37609ce8ade0SThomas Graf {
3761adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3762612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
37639ce8ade0SThomas Graf }
37649ce8ade0SThomas Graf 
37651da177e4SLinus Torvalds /*
37661da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
37671da177e4SLinus Torvalds  */
37681da177e4SLinus Torvalds 
3769360a9887SDavid Ahern struct fib6_info *addrconf_f6i_alloc(struct net *net,
3770afb1d4b5SDavid Ahern 				     struct inet6_dev *idev,
37711da177e4SLinus Torvalds 				     const struct in6_addr *addr,
3772acb54e3cSDavid Ahern 				     bool anycast, gfp_t gfp_flags)
37731da177e4SLinus Torvalds {
3774c7a1ce39SDavid Ahern 	struct fib6_config cfg = {
3775c7a1ce39SDavid Ahern 		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3776c7a1ce39SDavid Ahern 		.fc_ifindex = idev->dev->ifindex,
3777c7a1ce39SDavid Ahern 		.fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3778c7a1ce39SDavid Ahern 		.fc_dst = *addr,
3779c7a1ce39SDavid Ahern 		.fc_dst_len = 128,
3780c7a1ce39SDavid Ahern 		.fc_protocol = RTPROT_KERNEL,
3781c7a1ce39SDavid Ahern 		.fc_nlinfo.nl_net = net,
3782c7a1ce39SDavid Ahern 		.fc_ignore_dev_down = true,
3783c7a1ce39SDavid Ahern 	};
37845f02ce24SDavid Ahern 
3785e8478e80SDavid Ahern 	if (anycast) {
3786c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_ANYCAST;
3787c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_ANYCAST;
3788e8478e80SDavid Ahern 	} else {
3789c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_LOCAL;
3790c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_LOCAL;
3791e8478e80SDavid Ahern 	}
37921da177e4SLinus Torvalds 
3793c7a1ce39SDavid Ahern 	return ip6_route_info_create(&cfg, gfp_flags, NULL);
37941da177e4SLinus Torvalds }
37951da177e4SLinus Torvalds 
3796c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
3797c3968a85SDaniel Walter struct arg_dev_net_ip {
3798c3968a85SDaniel Walter 	struct net_device *dev;
3799c3968a85SDaniel Walter 	struct net *net;
3800c3968a85SDaniel Walter 	struct in6_addr *addr;
3801c3968a85SDaniel Walter };
3802c3968a85SDaniel Walter 
38038d1c802bSDavid Ahern static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3804c3968a85SDaniel Walter {
3805c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3806c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3807c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3808c3968a85SDaniel Walter 
3809ad1601aeSDavid Ahern 	if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
3810421842edSDavid Ahern 	    rt != net->ipv6.fib6_null_entry &&
381193c2fb25SDavid Ahern 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
381260006a48SWei Wang 		spin_lock_bh(&rt6_exception_lock);
3813c3968a85SDaniel Walter 		/* remove prefsrc entry */
381493c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
381560006a48SWei Wang 		spin_unlock_bh(&rt6_exception_lock);
3816c3968a85SDaniel Walter 	}
3817c3968a85SDaniel Walter 	return 0;
3818c3968a85SDaniel Walter }
3819c3968a85SDaniel Walter 
3820c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3821c3968a85SDaniel Walter {
3822c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
3823c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
3824c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
3825c3968a85SDaniel Walter 		.net = net,
3826c3968a85SDaniel Walter 		.addr = &ifp->addr,
3827c3968a85SDaniel Walter 	};
38280c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3829c3968a85SDaniel Walter }
3830c3968a85SDaniel Walter 
38312b2450caSDavid Ahern #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
3832be7a010dSDuan Jiong 
3833be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
38348d1c802bSDavid Ahern static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3835be7a010dSDuan Jiong {
3836be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
3837be7a010dSDuan Jiong 
383893c2fb25SDavid Ahern 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3839bdf00467SDavid Ahern 	    rt->fib6_nh.fib_nh_gw_family &&
3840ad1601aeSDavid Ahern 	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
3841be7a010dSDuan Jiong 		return -1;
3842be7a010dSDuan Jiong 	}
3843b16cb459SWei Wang 
3844b16cb459SWei Wang 	/* Further clean up cached routes in exception table.
3845b16cb459SWei Wang 	 * This is needed because cached route may have a different
3846b16cb459SWei Wang 	 * gateway than its 'parent' in the case of an ip redirect.
3847b16cb459SWei Wang 	 */
3848b16cb459SWei Wang 	rt6_exceptions_clean_tohost(rt, gateway);
3849b16cb459SWei Wang 
3850be7a010dSDuan Jiong 	return 0;
3851be7a010dSDuan Jiong }
3852be7a010dSDuan Jiong 
3853be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3854be7a010dSDuan Jiong {
3855be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3856be7a010dSDuan Jiong }
3857be7a010dSDuan Jiong 
38582127d95aSIdo Schimmel struct arg_netdev_event {
38592127d95aSIdo Schimmel 	const struct net_device *dev;
38604c981e28SIdo Schimmel 	union {
38612127d95aSIdo Schimmel 		unsigned int nh_flags;
38624c981e28SIdo Schimmel 		unsigned long event;
38634c981e28SIdo Schimmel 	};
38642127d95aSIdo Schimmel };
38652127d95aSIdo Schimmel 
38668d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3867d7dedee1SIdo Schimmel {
38688d1c802bSDavid Ahern 	struct fib6_info *iter;
3869d7dedee1SIdo Schimmel 	struct fib6_node *fn;
3870d7dedee1SIdo Schimmel 
387193c2fb25SDavid Ahern 	fn = rcu_dereference_protected(rt->fib6_node,
387293c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3873d7dedee1SIdo Schimmel 	iter = rcu_dereference_protected(fn->leaf,
387493c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3875d7dedee1SIdo Schimmel 	while (iter) {
387693c2fb25SDavid Ahern 		if (iter->fib6_metric == rt->fib6_metric &&
387733bd5ac5SDavid Ahern 		    rt6_qualify_for_ecmp(iter))
3878d7dedee1SIdo Schimmel 			return iter;
38798fb11a9aSDavid Ahern 		iter = rcu_dereference_protected(iter->fib6_next,
388093c2fb25SDavid Ahern 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3881d7dedee1SIdo Schimmel 	}
3882d7dedee1SIdo Schimmel 
3883d7dedee1SIdo Schimmel 	return NULL;
3884d7dedee1SIdo Schimmel }
3885d7dedee1SIdo Schimmel 
38868d1c802bSDavid Ahern static bool rt6_is_dead(const struct fib6_info *rt)
3887d7dedee1SIdo Schimmel {
3888ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3889ad1601aeSDavid Ahern 	    (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3890ad1601aeSDavid Ahern 	     ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
3891d7dedee1SIdo Schimmel 		return true;
3892d7dedee1SIdo Schimmel 
3893d7dedee1SIdo Schimmel 	return false;
3894d7dedee1SIdo Schimmel }
3895d7dedee1SIdo Schimmel 
38968d1c802bSDavid Ahern static int rt6_multipath_total_weight(const struct fib6_info *rt)
3897d7dedee1SIdo Schimmel {
38988d1c802bSDavid Ahern 	struct fib6_info *iter;
3899d7dedee1SIdo Schimmel 	int total = 0;
3900d7dedee1SIdo Schimmel 
3901d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt))
3902ad1601aeSDavid Ahern 		total += rt->fib6_nh.fib_nh_weight;
3903d7dedee1SIdo Schimmel 
390493c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3905d7dedee1SIdo Schimmel 		if (!rt6_is_dead(iter))
3906ad1601aeSDavid Ahern 			total += iter->fib6_nh.fib_nh_weight;
3907d7dedee1SIdo Schimmel 	}
3908d7dedee1SIdo Schimmel 
3909d7dedee1SIdo Schimmel 	return total;
3910d7dedee1SIdo Schimmel }
3911d7dedee1SIdo Schimmel 
39128d1c802bSDavid Ahern static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3913d7dedee1SIdo Schimmel {
3914d7dedee1SIdo Schimmel 	int upper_bound = -1;
3915d7dedee1SIdo Schimmel 
3916d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt)) {
3917ad1601aeSDavid Ahern 		*weight += rt->fib6_nh.fib_nh_weight;
3918d7dedee1SIdo Schimmel 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3919d7dedee1SIdo Schimmel 						    total) - 1;
3920d7dedee1SIdo Schimmel 	}
3921ad1601aeSDavid Ahern 	atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
3922d7dedee1SIdo Schimmel }
3923d7dedee1SIdo Schimmel 
39248d1c802bSDavid Ahern static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3925d7dedee1SIdo Schimmel {
39268d1c802bSDavid Ahern 	struct fib6_info *iter;
3927d7dedee1SIdo Schimmel 	int weight = 0;
3928d7dedee1SIdo Schimmel 
3929d7dedee1SIdo Schimmel 	rt6_upper_bound_set(rt, &weight, total);
3930d7dedee1SIdo Schimmel 
393193c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3932d7dedee1SIdo Schimmel 		rt6_upper_bound_set(iter, &weight, total);
3933d7dedee1SIdo Schimmel }
3934d7dedee1SIdo Schimmel 
39358d1c802bSDavid Ahern void rt6_multipath_rebalance(struct fib6_info *rt)
3936d7dedee1SIdo Schimmel {
39378d1c802bSDavid Ahern 	struct fib6_info *first;
3938d7dedee1SIdo Schimmel 	int total;
3939d7dedee1SIdo Schimmel 
3940d7dedee1SIdo Schimmel 	/* In case the entire multipath route was marked for flushing,
3941d7dedee1SIdo Schimmel 	 * then there is no need to rebalance upon the removal of every
3942d7dedee1SIdo Schimmel 	 * sibling route.
3943d7dedee1SIdo Schimmel 	 */
394493c2fb25SDavid Ahern 	if (!rt->fib6_nsiblings || rt->should_flush)
3945d7dedee1SIdo Schimmel 		return;
3946d7dedee1SIdo Schimmel 
3947d7dedee1SIdo Schimmel 	/* During lookup routes are evaluated in order, so we need to
3948d7dedee1SIdo Schimmel 	 * make sure upper bounds are assigned from the first sibling
3949d7dedee1SIdo Schimmel 	 * onwards.
3950d7dedee1SIdo Schimmel 	 */
3951d7dedee1SIdo Schimmel 	first = rt6_multipath_first_sibling(rt);
3952d7dedee1SIdo Schimmel 	if (WARN_ON_ONCE(!first))
3953d7dedee1SIdo Schimmel 		return;
3954d7dedee1SIdo Schimmel 
3955d7dedee1SIdo Schimmel 	total = rt6_multipath_total_weight(first);
3956d7dedee1SIdo Schimmel 	rt6_multipath_upper_bound_set(first, total);
3957d7dedee1SIdo Schimmel }
3958d7dedee1SIdo Schimmel 
39598d1c802bSDavid Ahern static int fib6_ifup(struct fib6_info *rt, void *p_arg)
39602127d95aSIdo Schimmel {
39612127d95aSIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
39627aef6859SDavid Ahern 	struct net *net = dev_net(arg->dev);
39632127d95aSIdo Schimmel 
3964ad1601aeSDavid Ahern 	if (rt != net->ipv6.fib6_null_entry &&
3965ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_dev == arg->dev) {
3966ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
39677aef6859SDavid Ahern 		fib6_update_sernum_upto_root(net, rt);
3968d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
39691de178edSIdo Schimmel 	}
39702127d95aSIdo Schimmel 
39712127d95aSIdo Schimmel 	return 0;
39722127d95aSIdo Schimmel }
39732127d95aSIdo Schimmel 
39742127d95aSIdo Schimmel void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
39752127d95aSIdo Schimmel {
39762127d95aSIdo Schimmel 	struct arg_netdev_event arg = {
39772127d95aSIdo Schimmel 		.dev = dev,
39786802f3adSIdo Schimmel 		{
39792127d95aSIdo Schimmel 			.nh_flags = nh_flags,
39806802f3adSIdo Schimmel 		},
39812127d95aSIdo Schimmel 	};
39822127d95aSIdo Schimmel 
39832127d95aSIdo Schimmel 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
39842127d95aSIdo Schimmel 		arg.nh_flags |= RTNH_F_LINKDOWN;
39852127d95aSIdo Schimmel 
39862127d95aSIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
39872127d95aSIdo Schimmel }
39882127d95aSIdo Schimmel 
39898d1c802bSDavid Ahern static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
39901de178edSIdo Schimmel 				   const struct net_device *dev)
39911de178edSIdo Schimmel {
39928d1c802bSDavid Ahern 	struct fib6_info *iter;
39931de178edSIdo Schimmel 
3994ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
39951de178edSIdo Schimmel 		return true;
399693c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3997ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
39981de178edSIdo Schimmel 			return true;
39991de178edSIdo Schimmel 
40001de178edSIdo Schimmel 	return false;
40011de178edSIdo Schimmel }
40021de178edSIdo Schimmel 
40038d1c802bSDavid Ahern static void rt6_multipath_flush(struct fib6_info *rt)
40041de178edSIdo Schimmel {
40058d1c802bSDavid Ahern 	struct fib6_info *iter;
40061de178edSIdo Schimmel 
40071de178edSIdo Schimmel 	rt->should_flush = 1;
400893c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
40091de178edSIdo Schimmel 		iter->should_flush = 1;
40101de178edSIdo Schimmel }
40111de178edSIdo Schimmel 
40128d1c802bSDavid Ahern static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
40131de178edSIdo Schimmel 					     const struct net_device *down_dev)
40141de178edSIdo Schimmel {
40158d1c802bSDavid Ahern 	struct fib6_info *iter;
40161de178edSIdo Schimmel 	unsigned int dead = 0;
40171de178edSIdo Schimmel 
4018ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == down_dev ||
4019ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
40201de178edSIdo Schimmel 		dead++;
402193c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4022ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == down_dev ||
4023ad1601aeSDavid Ahern 		    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
40241de178edSIdo Schimmel 			dead++;
40251de178edSIdo Schimmel 
40261de178edSIdo Schimmel 	return dead;
40271de178edSIdo Schimmel }
40281de178edSIdo Schimmel 
40298d1c802bSDavid Ahern static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
40301de178edSIdo Schimmel 				       const struct net_device *dev,
40311de178edSIdo Schimmel 				       unsigned int nh_flags)
40321de178edSIdo Schimmel {
40338d1c802bSDavid Ahern 	struct fib6_info *iter;
40341de178edSIdo Schimmel 
4035ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
4036ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= nh_flags;
403793c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4038ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
4039ad1601aeSDavid Ahern 			iter->fib6_nh.fib_nh_flags |= nh_flags;
40401de178edSIdo Schimmel }
40411de178edSIdo Schimmel 
4042a1a22c12SDavid Ahern /* called with write lock held for table with rt */
40438d1c802bSDavid Ahern static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
40441da177e4SLinus Torvalds {
40454c981e28SIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
40464c981e28SIdo Schimmel 	const struct net_device *dev = arg->dev;
40477aef6859SDavid Ahern 	struct net *net = dev_net(dev);
40488ed67789SDaniel Lezcano 
4049421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
405027c6fa73SIdo Schimmel 		return 0;
405127c6fa73SIdo Schimmel 
405227c6fa73SIdo Schimmel 	switch (arg->event) {
405327c6fa73SIdo Schimmel 	case NETDEV_UNREGISTER:
4054ad1601aeSDavid Ahern 		return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
405527c6fa73SIdo Schimmel 	case NETDEV_DOWN:
40561de178edSIdo Schimmel 		if (rt->should_flush)
405727c6fa73SIdo Schimmel 			return -1;
405893c2fb25SDavid Ahern 		if (!rt->fib6_nsiblings)
4059ad1601aeSDavid Ahern 			return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
40601de178edSIdo Schimmel 		if (rt6_multipath_uses_dev(rt, dev)) {
40611de178edSIdo Schimmel 			unsigned int count;
40621de178edSIdo Schimmel 
40631de178edSIdo Schimmel 			count = rt6_multipath_dead_count(rt, dev);
406493c2fb25SDavid Ahern 			if (rt->fib6_nsiblings + 1 == count) {
40651de178edSIdo Schimmel 				rt6_multipath_flush(rt);
40661de178edSIdo Schimmel 				return -1;
40671de178edSIdo Schimmel 			}
40681de178edSIdo Schimmel 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
40691de178edSIdo Schimmel 						   RTNH_F_LINKDOWN);
40707aef6859SDavid Ahern 			fib6_update_sernum(net, rt);
4071d7dedee1SIdo Schimmel 			rt6_multipath_rebalance(rt);
40721de178edSIdo Schimmel 		}
40731de178edSIdo Schimmel 		return -2;
407427c6fa73SIdo Schimmel 	case NETDEV_CHANGE:
4075ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev != dev ||
407693c2fb25SDavid Ahern 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
407727c6fa73SIdo Schimmel 			break;
4078ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
4079d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
408027c6fa73SIdo Schimmel 		break;
40812b241361SIdo Schimmel 	}
4082c159d30cSDavid S. Miller 
40831da177e4SLinus Torvalds 	return 0;
40841da177e4SLinus Torvalds }
40851da177e4SLinus Torvalds 
408627c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
40871da177e4SLinus Torvalds {
40884c981e28SIdo Schimmel 	struct arg_netdev_event arg = {
40898ed67789SDaniel Lezcano 		.dev = dev,
40906802f3adSIdo Schimmel 		{
40914c981e28SIdo Schimmel 			.event = event,
40926802f3adSIdo Schimmel 		},
40938ed67789SDaniel Lezcano 	};
40947c6bb7d2SDavid Ahern 	struct net *net = dev_net(dev);
40958ed67789SDaniel Lezcano 
40967c6bb7d2SDavid Ahern 	if (net->ipv6.sysctl.skip_notify_on_dev_down)
40977c6bb7d2SDavid Ahern 		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
40987c6bb7d2SDavid Ahern 	else
40997c6bb7d2SDavid Ahern 		fib6_clean_all(net, fib6_ifdown, &arg);
41004c981e28SIdo Schimmel }
41014c981e28SIdo Schimmel 
41024c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event)
41034c981e28SIdo Schimmel {
41044c981e28SIdo Schimmel 	rt6_sync_down_dev(dev, event);
41054c981e28SIdo Schimmel 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
41064c981e28SIdo Schimmel 	neigh_ifdown(&nd_tbl, dev);
41071da177e4SLinus Torvalds }
41081da177e4SLinus Torvalds 
410995c96174SEric Dumazet struct rt6_mtu_change_arg {
41101da177e4SLinus Torvalds 	struct net_device *dev;
411195c96174SEric Dumazet 	unsigned int mtu;
41121da177e4SLinus Torvalds };
41131da177e4SLinus Torvalds 
41148d1c802bSDavid Ahern static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
41151da177e4SLinus Torvalds {
41161da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
41171da177e4SLinus Torvalds 	struct inet6_dev *idev;
41181da177e4SLinus Torvalds 
41191da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
41201da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
41211da177e4SLinus Torvalds 	   We still use this lock to block changes
41221da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
41231da177e4SLinus Torvalds 	*/
41241da177e4SLinus Torvalds 
41251da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
412638308473SDavid S. Miller 	if (!idev)
41271da177e4SLinus Torvalds 		return 0;
41281da177e4SLinus Torvalds 
41291da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
41301da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
41311da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
41321da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
41331da177e4SLinus Torvalds 	 */
4134ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
4135d4ead6b3SDavid Ahern 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4136d4ead6b3SDavid Ahern 		u32 mtu = rt->fib6_pmtu;
4137d4ead6b3SDavid Ahern 
4138d4ead6b3SDavid Ahern 		if (mtu >= arg->mtu ||
4139d4ead6b3SDavid Ahern 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4140d4ead6b3SDavid Ahern 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4141d4ead6b3SDavid Ahern 
4142f5bbe7eeSWei Wang 		spin_lock_bh(&rt6_exception_lock);
4143e9fa1495SStefano Brivio 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4144f5bbe7eeSWei Wang 		spin_unlock_bh(&rt6_exception_lock);
41454b32b5adSMartin KaFai Lau 	}
41461da177e4SLinus Torvalds 	return 0;
41471da177e4SLinus Torvalds }
41481da177e4SLinus Torvalds 
414995c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
41501da177e4SLinus Torvalds {
4151c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
4152c71099acSThomas Graf 		.dev = dev,
4153c71099acSThomas Graf 		.mtu = mtu,
4154c71099acSThomas Graf 	};
41551da177e4SLinus Torvalds 
41560c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
41571da177e4SLinus Torvalds }
41581da177e4SLinus Torvalds 
4159ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
41605176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4161aa8f8778SEric Dumazet 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
416286872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
4163ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
416486872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
416586872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
416651ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4167c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
416819e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
416919e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
417032bc201eSXin Long 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4171622ec2c9SLorenzo Colitti 	[RTA_UID]		= { .type = NLA_U32 },
41723b45a410SLiping Zhang 	[RTA_MARK]		= { .type = NLA_U32 },
4173aa8f8778SEric Dumazet 	[RTA_TABLE]		= { .type = NLA_U32 },
4174eacb9384SRoopa Prabhu 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4175eacb9384SRoopa Prabhu 	[RTA_SPORT]		= { .type = NLA_U16 },
4176eacb9384SRoopa Prabhu 	[RTA_DPORT]		= { .type = NLA_U16 },
417786872cb5SThomas Graf };
417886872cb5SThomas Graf 
417986872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4180333c4301SDavid Ahern 			      struct fib6_config *cfg,
4181333c4301SDavid Ahern 			      struct netlink_ext_ack *extack)
41821da177e4SLinus Torvalds {
418386872cb5SThomas Graf 	struct rtmsg *rtm;
418486872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
4185c78ba6d6SLubomir Rintel 	unsigned int pref;
418686872cb5SThomas Graf 	int err;
41871da177e4SLinus Torvalds 
4188fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4189dac9c979SDavid Ahern 			  extack);
419086872cb5SThomas Graf 	if (err < 0)
419186872cb5SThomas Graf 		goto errout;
41921da177e4SLinus Torvalds 
419386872cb5SThomas Graf 	err = -EINVAL;
419486872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
419586872cb5SThomas Graf 
419684db8407SMaciej Żenczykowski 	*cfg = (struct fib6_config){
419784db8407SMaciej Żenczykowski 		.fc_table = rtm->rtm_table,
419884db8407SMaciej Żenczykowski 		.fc_dst_len = rtm->rtm_dst_len,
419984db8407SMaciej Żenczykowski 		.fc_src_len = rtm->rtm_src_len,
420084db8407SMaciej Żenczykowski 		.fc_flags = RTF_UP,
420184db8407SMaciej Żenczykowski 		.fc_protocol = rtm->rtm_protocol,
420284db8407SMaciej Żenczykowski 		.fc_type = rtm->rtm_type,
420384db8407SMaciej Żenczykowski 
420484db8407SMaciej Żenczykowski 		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
420584db8407SMaciej Żenczykowski 		.fc_nlinfo.nlh = nlh,
420684db8407SMaciej Żenczykowski 		.fc_nlinfo.nl_net = sock_net(skb->sk),
420784db8407SMaciej Żenczykowski 	};
420886872cb5SThomas Graf 
4209ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4210ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
4211b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
4212b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
421386872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
421486872cb5SThomas Graf 
4215ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
4216ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
4217ab79ad14SMaciej Żenczykowski 
42181f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
42191f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
42201f56a01fSMartin KaFai Lau 
4221fc1e64e1SDavid Ahern 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4222fc1e64e1SDavid Ahern 
422386872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
422467b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
422586872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
42261da177e4SLinus Torvalds 	}
4227e3818541SDavid Ahern 	if (tb[RTA_VIA]) {
4228e3818541SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4229e3818541SDavid Ahern 		goto errout;
4230e3818541SDavid Ahern 	}
423186872cb5SThomas Graf 
423286872cb5SThomas Graf 	if (tb[RTA_DST]) {
423386872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
423486872cb5SThomas Graf 
423586872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
423686872cb5SThomas Graf 			goto errout;
423786872cb5SThomas Graf 
423886872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
42391da177e4SLinus Torvalds 	}
424086872cb5SThomas Graf 
424186872cb5SThomas Graf 	if (tb[RTA_SRC]) {
424286872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
424386872cb5SThomas Graf 
424486872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
424586872cb5SThomas Graf 			goto errout;
424686872cb5SThomas Graf 
424786872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
42481da177e4SLinus Torvalds 	}
424986872cb5SThomas Graf 
4250c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
425167b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4252c3968a85SDaniel Walter 
425386872cb5SThomas Graf 	if (tb[RTA_OIF])
425486872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
425586872cb5SThomas Graf 
425686872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
425786872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
425886872cb5SThomas Graf 
425986872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
426086872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
426186872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
42621da177e4SLinus Torvalds 	}
426386872cb5SThomas Graf 
426486872cb5SThomas Graf 	if (tb[RTA_TABLE])
426586872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
426686872cb5SThomas Graf 
426751ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
426851ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
426951ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
42709ed59592SDavid Ahern 
42719ed59592SDavid Ahern 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4272c255bd68SDavid Ahern 						     cfg->fc_mp_len, extack);
42739ed59592SDavid Ahern 		if (err < 0)
42749ed59592SDavid Ahern 			goto errout;
427551ebd318SNicolas Dichtel 	}
427651ebd318SNicolas Dichtel 
4277c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
4278c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
4279c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4280c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4281c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4282c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
4283c78ba6d6SLubomir Rintel 	}
4284c78ba6d6SLubomir Rintel 
428519e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
428619e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
428719e42e45SRoopa Prabhu 
42889ed59592SDavid Ahern 	if (tb[RTA_ENCAP_TYPE]) {
428919e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
429019e42e45SRoopa Prabhu 
4291c255bd68SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
42929ed59592SDavid Ahern 		if (err < 0)
42939ed59592SDavid Ahern 			goto errout;
42949ed59592SDavid Ahern 	}
42959ed59592SDavid Ahern 
429632bc201eSXin Long 	if (tb[RTA_EXPIRES]) {
429732bc201eSXin Long 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
429832bc201eSXin Long 
429932bc201eSXin Long 		if (addrconf_finite_timeout(timeout)) {
430032bc201eSXin Long 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
430132bc201eSXin Long 			cfg->fc_flags |= RTF_EXPIRES;
430232bc201eSXin Long 		}
430332bc201eSXin Long 	}
430432bc201eSXin Long 
430586872cb5SThomas Graf 	err = 0;
430686872cb5SThomas Graf errout:
430786872cb5SThomas Graf 	return err;
43081da177e4SLinus Torvalds }
43091da177e4SLinus Torvalds 
43106b9ea5a6SRoopa Prabhu struct rt6_nh {
43118d1c802bSDavid Ahern 	struct fib6_info *fib6_info;
43126b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
43136b9ea5a6SRoopa Prabhu 	struct list_head next;
43146b9ea5a6SRoopa Prabhu };
43156b9ea5a6SRoopa Prabhu 
4316d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net,
4317d4ead6b3SDavid Ahern 				 struct list_head *rt6_nh_list,
43188d1c802bSDavid Ahern 				 struct fib6_info *rt,
43198d1c802bSDavid Ahern 				 struct fib6_config *r_cfg)
43206b9ea5a6SRoopa Prabhu {
43216b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
43226b9ea5a6SRoopa Prabhu 	int err = -EEXIST;
43236b9ea5a6SRoopa Prabhu 
43246b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
43258d1c802bSDavid Ahern 		/* check if fib6_info already exists */
43268d1c802bSDavid Ahern 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
43276b9ea5a6SRoopa Prabhu 			return err;
43286b9ea5a6SRoopa Prabhu 	}
43296b9ea5a6SRoopa Prabhu 
43306b9ea5a6SRoopa Prabhu 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
43316b9ea5a6SRoopa Prabhu 	if (!nh)
43326b9ea5a6SRoopa Prabhu 		return -ENOMEM;
43338d1c802bSDavid Ahern 	nh->fib6_info = rt;
43346b9ea5a6SRoopa Prabhu 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
43356b9ea5a6SRoopa Prabhu 	list_add_tail(&nh->next, rt6_nh_list);
43366b9ea5a6SRoopa Prabhu 
43376b9ea5a6SRoopa Prabhu 	return 0;
43386b9ea5a6SRoopa Prabhu }
43396b9ea5a6SRoopa Prabhu 
43408d1c802bSDavid Ahern static void ip6_route_mpath_notify(struct fib6_info *rt,
43418d1c802bSDavid Ahern 				   struct fib6_info *rt_last,
43423b1137feSDavid Ahern 				   struct nl_info *info,
43433b1137feSDavid Ahern 				   __u16 nlflags)
43443b1137feSDavid Ahern {
43453b1137feSDavid Ahern 	/* if this is an APPEND route, then rt points to the first route
43463b1137feSDavid Ahern 	 * inserted and rt_last points to last route inserted. Userspace
43473b1137feSDavid Ahern 	 * wants a consistent dump of the route which starts at the first
43483b1137feSDavid Ahern 	 * nexthop. Since sibling routes are always added at the end of
43493b1137feSDavid Ahern 	 * the list, find the first sibling of the last route appended
43503b1137feSDavid Ahern 	 */
435193c2fb25SDavid Ahern 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
435293c2fb25SDavid Ahern 		rt = list_first_entry(&rt_last->fib6_siblings,
43538d1c802bSDavid Ahern 				      struct fib6_info,
435493c2fb25SDavid Ahern 				      fib6_siblings);
43553b1137feSDavid Ahern 	}
43563b1137feSDavid Ahern 
43573b1137feSDavid Ahern 	if (rt)
43583b1137feSDavid Ahern 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
43593b1137feSDavid Ahern }
43603b1137feSDavid Ahern 
4361333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg,
4362333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
436351ebd318SNicolas Dichtel {
43648d1c802bSDavid Ahern 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
43653b1137feSDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
436651ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
436751ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
43688d1c802bSDavid Ahern 	struct fib6_info *rt;
43696b9ea5a6SRoopa Prabhu 	struct rt6_nh *err_nh;
43706b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh, *nh_safe;
43713b1137feSDavid Ahern 	__u16 nlflags;
437251ebd318SNicolas Dichtel 	int remaining;
437351ebd318SNicolas Dichtel 	int attrlen;
43746b9ea5a6SRoopa Prabhu 	int err = 1;
43756b9ea5a6SRoopa Prabhu 	int nhn = 0;
43766b9ea5a6SRoopa Prabhu 	int replace = (cfg->fc_nlinfo.nlh &&
43776b9ea5a6SRoopa Prabhu 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
43786b9ea5a6SRoopa Prabhu 	LIST_HEAD(rt6_nh_list);
437951ebd318SNicolas Dichtel 
43803b1137feSDavid Ahern 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
43813b1137feSDavid Ahern 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
43823b1137feSDavid Ahern 		nlflags |= NLM_F_APPEND;
43833b1137feSDavid Ahern 
438435f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
438551ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
438651ebd318SNicolas Dichtel 
43876b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
43888d1c802bSDavid Ahern 	 * fib6_info structs per nexthop
43896b9ea5a6SRoopa Prabhu 	 */
439051ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
439151ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
439251ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
439351ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
439451ebd318SNicolas Dichtel 
439551ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
439651ebd318SNicolas Dichtel 		if (attrlen > 0) {
439751ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
439851ebd318SNicolas Dichtel 
439951ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
440051ebd318SNicolas Dichtel 			if (nla) {
440167b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
440251ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
440351ebd318SNicolas Dichtel 			}
440419e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
440519e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
440619e42e45SRoopa Prabhu 			if (nla)
440719e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
440851ebd318SNicolas Dichtel 		}
44096b9ea5a6SRoopa Prabhu 
441068e2ffdeSDavid Ahern 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4411acb54e3cSDavid Ahern 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
44128c5b83f0SRoopa Prabhu 		if (IS_ERR(rt)) {
44138c5b83f0SRoopa Prabhu 			err = PTR_ERR(rt);
44148c5b83f0SRoopa Prabhu 			rt = NULL;
44156b9ea5a6SRoopa Prabhu 			goto cleanup;
44168c5b83f0SRoopa Prabhu 		}
4417b5d2d75eSDavid Ahern 		if (!rt6_qualify_for_ecmp(rt)) {
4418b5d2d75eSDavid Ahern 			err = -EINVAL;
4419b5d2d75eSDavid Ahern 			NL_SET_ERR_MSG(extack,
4420b5d2d75eSDavid Ahern 				       "Device only routes can not be added for IPv6 using the multipath API.");
4421b5d2d75eSDavid Ahern 			fib6_info_release(rt);
4422b5d2d75eSDavid Ahern 			goto cleanup;
4423b5d2d75eSDavid Ahern 		}
44246b9ea5a6SRoopa Prabhu 
4425ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
4426398958aeSIdo Schimmel 
4427d4ead6b3SDavid Ahern 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4428d4ead6b3SDavid Ahern 					    rt, &r_cfg);
442951ebd318SNicolas Dichtel 		if (err) {
443093531c67SDavid Ahern 			fib6_info_release(rt);
44316b9ea5a6SRoopa Prabhu 			goto cleanup;
443251ebd318SNicolas Dichtel 		}
44336b9ea5a6SRoopa Prabhu 
44346b9ea5a6SRoopa Prabhu 		rtnh = rtnh_next(rtnh, &remaining);
443551ebd318SNicolas Dichtel 	}
44366b9ea5a6SRoopa Prabhu 
44373b1137feSDavid Ahern 	/* for add and replace send one notification with all nexthops.
44383b1137feSDavid Ahern 	 * Skip the notification in fib6_add_rt2node and send one with
44393b1137feSDavid Ahern 	 * the full route when done
44403b1137feSDavid Ahern 	 */
44413b1137feSDavid Ahern 	info->skip_notify = 1;
44423b1137feSDavid Ahern 
44436b9ea5a6SRoopa Prabhu 	err_nh = NULL;
44446b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
44458d1c802bSDavid Ahern 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
44468d1c802bSDavid Ahern 		fib6_info_release(nh->fib6_info);
44473b1137feSDavid Ahern 
4448f7225172SDavid Ahern 		if (!err) {
4449f7225172SDavid Ahern 			/* save reference to last route successfully inserted */
4450f7225172SDavid Ahern 			rt_last = nh->fib6_info;
4451f7225172SDavid Ahern 
44526b9ea5a6SRoopa Prabhu 			/* save reference to first route for notification */
4453f7225172SDavid Ahern 			if (!rt_notif)
44548d1c802bSDavid Ahern 				rt_notif = nh->fib6_info;
4455f7225172SDavid Ahern 		}
44566b9ea5a6SRoopa Prabhu 
44578d1c802bSDavid Ahern 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
44588d1c802bSDavid Ahern 		nh->fib6_info = NULL;
44596b9ea5a6SRoopa Prabhu 		if (err) {
44606b9ea5a6SRoopa Prabhu 			if (replace && nhn)
4461a5a82d84SJakub Kicinski 				NL_SET_ERR_MSG_MOD(extack,
4462a5a82d84SJakub Kicinski 						   "multipath route replace failed (check consistency of installed routes)");
44636b9ea5a6SRoopa Prabhu 			err_nh = nh;
44646b9ea5a6SRoopa Prabhu 			goto add_errout;
44656b9ea5a6SRoopa Prabhu 		}
44666b9ea5a6SRoopa Prabhu 
44671a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
446827596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
446927596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
447027596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
447127596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
447227596472SMichal Kubeček 		 * be added to it.
44731a72418bSNicolas Dichtel 		 */
447427596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
447527596472SMichal Kubeček 						     NLM_F_REPLACE);
44766b9ea5a6SRoopa Prabhu 		nhn++;
44776b9ea5a6SRoopa Prabhu 	}
44786b9ea5a6SRoopa Prabhu 
44793b1137feSDavid Ahern 	/* success ... tell user about new route */
44803b1137feSDavid Ahern 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
44816b9ea5a6SRoopa Prabhu 	goto cleanup;
44826b9ea5a6SRoopa Prabhu 
44836b9ea5a6SRoopa Prabhu add_errout:
44843b1137feSDavid Ahern 	/* send notification for routes that were added so that
44853b1137feSDavid Ahern 	 * the delete notifications sent by ip6_route_del are
44863b1137feSDavid Ahern 	 * coherent
44873b1137feSDavid Ahern 	 */
44883b1137feSDavid Ahern 	if (rt_notif)
44893b1137feSDavid Ahern 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
44903b1137feSDavid Ahern 
44916b9ea5a6SRoopa Prabhu 	/* Delete routes that were already added */
44926b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
44936b9ea5a6SRoopa Prabhu 		if (err_nh == nh)
44946b9ea5a6SRoopa Prabhu 			break;
4495333c4301SDavid Ahern 		ip6_route_del(&nh->r_cfg, extack);
44966b9ea5a6SRoopa Prabhu 	}
44976b9ea5a6SRoopa Prabhu 
44986b9ea5a6SRoopa Prabhu cleanup:
44996b9ea5a6SRoopa Prabhu 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
45008d1c802bSDavid Ahern 		if (nh->fib6_info)
45018d1c802bSDavid Ahern 			fib6_info_release(nh->fib6_info);
45026b9ea5a6SRoopa Prabhu 		list_del(&nh->next);
45036b9ea5a6SRoopa Prabhu 		kfree(nh);
45046b9ea5a6SRoopa Prabhu 	}
45056b9ea5a6SRoopa Prabhu 
45066b9ea5a6SRoopa Prabhu 	return err;
45076b9ea5a6SRoopa Prabhu }
45086b9ea5a6SRoopa Prabhu 
4509333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg,
4510333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
45116b9ea5a6SRoopa Prabhu {
45126b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
45136b9ea5a6SRoopa Prabhu 	struct rtnexthop *rtnh;
45146b9ea5a6SRoopa Prabhu 	int remaining;
45156b9ea5a6SRoopa Prabhu 	int attrlen;
45166b9ea5a6SRoopa Prabhu 	int err = 1, last_err = 0;
45176b9ea5a6SRoopa Prabhu 
45186b9ea5a6SRoopa Prabhu 	remaining = cfg->fc_mp_len;
45196b9ea5a6SRoopa Prabhu 	rtnh = (struct rtnexthop *)cfg->fc_mp;
45206b9ea5a6SRoopa Prabhu 
45216b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry */
45226b9ea5a6SRoopa Prabhu 	while (rtnh_ok(rtnh, remaining)) {
45236b9ea5a6SRoopa Prabhu 		memcpy(&r_cfg, cfg, sizeof(*cfg));
45246b9ea5a6SRoopa Prabhu 		if (rtnh->rtnh_ifindex)
45256b9ea5a6SRoopa Prabhu 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
45266b9ea5a6SRoopa Prabhu 
45276b9ea5a6SRoopa Prabhu 		attrlen = rtnh_attrlen(rtnh);
45286b9ea5a6SRoopa Prabhu 		if (attrlen > 0) {
45296b9ea5a6SRoopa Prabhu 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
45306b9ea5a6SRoopa Prabhu 
45316b9ea5a6SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
45326b9ea5a6SRoopa Prabhu 			if (nla) {
45336b9ea5a6SRoopa Prabhu 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
45346b9ea5a6SRoopa Prabhu 				r_cfg.fc_flags |= RTF_GATEWAY;
45356b9ea5a6SRoopa Prabhu 			}
45366b9ea5a6SRoopa Prabhu 		}
4537333c4301SDavid Ahern 		err = ip6_route_del(&r_cfg, extack);
45386b9ea5a6SRoopa Prabhu 		if (err)
45396b9ea5a6SRoopa Prabhu 			last_err = err;
45406b9ea5a6SRoopa Prabhu 
454151ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
454251ebd318SNicolas Dichtel 	}
454351ebd318SNicolas Dichtel 
454451ebd318SNicolas Dichtel 	return last_err;
454551ebd318SNicolas Dichtel }
454651ebd318SNicolas Dichtel 
4547c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4548c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
45491da177e4SLinus Torvalds {
455086872cb5SThomas Graf 	struct fib6_config cfg;
455186872cb5SThomas Graf 	int err;
45521da177e4SLinus Torvalds 
4553333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
455486872cb5SThomas Graf 	if (err < 0)
455586872cb5SThomas Graf 		return err;
455686872cb5SThomas Graf 
455751ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4558333c4301SDavid Ahern 		return ip6_route_multipath_del(&cfg, extack);
45590ae81335SDavid Ahern 	else {
45600ae81335SDavid Ahern 		cfg.fc_delete_all_nh = 1;
4561333c4301SDavid Ahern 		return ip6_route_del(&cfg, extack);
45621da177e4SLinus Torvalds 	}
45630ae81335SDavid Ahern }
45641da177e4SLinus Torvalds 
4565c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4566c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
45671da177e4SLinus Torvalds {
456886872cb5SThomas Graf 	struct fib6_config cfg;
456986872cb5SThomas Graf 	int err;
45701da177e4SLinus Torvalds 
4571333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
457286872cb5SThomas Graf 	if (err < 0)
457386872cb5SThomas Graf 		return err;
457486872cb5SThomas Graf 
457567f69513SDavid Ahern 	if (cfg.fc_metric == 0)
457667f69513SDavid Ahern 		cfg.fc_metric = IP6_RT_PRIO_USER;
457767f69513SDavid Ahern 
457851ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4579333c4301SDavid Ahern 		return ip6_route_multipath_add(&cfg, extack);
458051ebd318SNicolas Dichtel 	else
4581acb54e3cSDavid Ahern 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
45821da177e4SLinus Torvalds }
45831da177e4SLinus Torvalds 
45848d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt)
4585339bf98fSThomas Graf {
4586beb1afacSDavid Ahern 	int nexthop_len = 0;
4587beb1afacSDavid Ahern 
458893c2fb25SDavid Ahern 	if (rt->fib6_nsiblings) {
4589beb1afacSDavid Ahern 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4590beb1afacSDavid Ahern 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4591beb1afacSDavid Ahern 			    + nla_total_size(16) /* RTA_GATEWAY */
4592ad1601aeSDavid Ahern 			    + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
4593beb1afacSDavid Ahern 
459493c2fb25SDavid Ahern 		nexthop_len *= rt->fib6_nsiblings;
4595beb1afacSDavid Ahern 	}
4596beb1afacSDavid Ahern 
4597339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4598339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
4599339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
4600339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
4601339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
4602339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
4603339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
4604339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
4605339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
46066a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4607ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4608c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
460919e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
4610ad1601aeSDavid Ahern 	       + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
4611beb1afacSDavid Ahern 	       + nexthop_len;
4612beb1afacSDavid Ahern }
4613beb1afacSDavid Ahern 
4614d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
46158d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
4616d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
461715e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
4618f8cfe2ceSDavid Ahern 			 unsigned int flags)
46191da177e4SLinus Torvalds {
462022d0bd82SXin Long 	struct rt6_info *rt6 = (struct rt6_info *)dst;
462122d0bd82SXin Long 	struct rt6key *rt6_dst, *rt6_src;
462222d0bd82SXin Long 	u32 *pmetrics, table, rt6_flags;
46231da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
462422d0bd82SXin Long 	struct rtmsg *rtm;
4625d4ead6b3SDavid Ahern 	long expires = 0;
46261da177e4SLinus Torvalds 
462715e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
462838308473SDavid S. Miller 	if (!nlh)
462926932566SPatrick McHardy 		return -EMSGSIZE;
46302d7202bfSThomas Graf 
463122d0bd82SXin Long 	if (rt6) {
463222d0bd82SXin Long 		rt6_dst = &rt6->rt6i_dst;
463322d0bd82SXin Long 		rt6_src = &rt6->rt6i_src;
463422d0bd82SXin Long 		rt6_flags = rt6->rt6i_flags;
463522d0bd82SXin Long 	} else {
463622d0bd82SXin Long 		rt6_dst = &rt->fib6_dst;
463722d0bd82SXin Long 		rt6_src = &rt->fib6_src;
463822d0bd82SXin Long 		rt6_flags = rt->fib6_flags;
463922d0bd82SXin Long 	}
464022d0bd82SXin Long 
46412d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
46421da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
464322d0bd82SXin Long 	rtm->rtm_dst_len = rt6_dst->plen;
464422d0bd82SXin Long 	rtm->rtm_src_len = rt6_src->plen;
46451da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
464693c2fb25SDavid Ahern 	if (rt->fib6_table)
464793c2fb25SDavid Ahern 		table = rt->fib6_table->tb6_id;
4648c71099acSThomas Graf 	else
46499e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
465097f0082aSKalash Nainwal 	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4651c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
4652c78679e8SDavid S. Miller 		goto nla_put_failure;
4653e8478e80SDavid Ahern 
4654e8478e80SDavid Ahern 	rtm->rtm_type = rt->fib6_type;
46551da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
46561da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
465793c2fb25SDavid Ahern 	rtm->rtm_protocol = rt->fib6_protocol;
46581da177e4SLinus Torvalds 
465922d0bd82SXin Long 	if (rt6_flags & RTF_CACHE)
46601da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
46611da177e4SLinus Torvalds 
4662d4ead6b3SDavid Ahern 	if (dest) {
4663d4ead6b3SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4664c78679e8SDavid S. Miller 			goto nla_put_failure;
46651da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
46661da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
466722d0bd82SXin Long 		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4668c78679e8SDavid S. Miller 			goto nla_put_failure;
46691da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
46701da177e4SLinus Torvalds 	if (src) {
4671930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4672c78679e8SDavid S. Miller 			goto nla_put_failure;
46731da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
4674c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
467522d0bd82SXin Long 		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4676c78679e8SDavid S. Miller 		goto nla_put_failure;
46771da177e4SLinus Torvalds #endif
46787bc570c8SYOSHIFUJI Hideaki 	if (iif) {
46797bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
468022d0bd82SXin Long 		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4681fd61c6baSDavid Ahern 			int err = ip6mr_get_route(net, skb, rtm, portid);
46822cf75070SNikolay Aleksandrov 
46837bc570c8SYOSHIFUJI Hideaki 			if (err == 0)
46847bc570c8SYOSHIFUJI Hideaki 				return 0;
4685fd61c6baSDavid Ahern 			if (err < 0)
46867bc570c8SYOSHIFUJI Hideaki 				goto nla_put_failure;
46877bc570c8SYOSHIFUJI Hideaki 		} else
46887bc570c8SYOSHIFUJI Hideaki #endif
4689c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
4690c78679e8SDavid S. Miller 				goto nla_put_failure;
4691d4ead6b3SDavid Ahern 	} else if (dest) {
46921da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
4693d4ead6b3SDavid Ahern 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4694930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4695c78679e8SDavid S. Miller 			goto nla_put_failure;
4696c3968a85SDaniel Walter 	}
4697c3968a85SDaniel Walter 
469893c2fb25SDavid Ahern 	if (rt->fib6_prefsrc.plen) {
4699c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
470093c2fb25SDavid Ahern 		saddr_buf = rt->fib6_prefsrc.addr;
4701930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4702c78679e8SDavid S. Miller 			goto nla_put_failure;
47031da177e4SLinus Torvalds 	}
47042d7202bfSThomas Graf 
4705d4ead6b3SDavid Ahern 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4706d4ead6b3SDavid Ahern 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
47072d7202bfSThomas Graf 		goto nla_put_failure;
47082d7202bfSThomas Graf 
470993c2fb25SDavid Ahern 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4710beb1afacSDavid Ahern 		goto nla_put_failure;
4711beb1afacSDavid Ahern 
4712beb1afacSDavid Ahern 	/* For multipath routes, walk the siblings list and add
4713beb1afacSDavid Ahern 	 * each as a nexthop within RTA_MULTIPATH.
4714beb1afacSDavid Ahern 	 */
471522d0bd82SXin Long 	if (rt6) {
471622d0bd82SXin Long 		if (rt6_flags & RTF_GATEWAY &&
471722d0bd82SXin Long 		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
471822d0bd82SXin Long 			goto nla_put_failure;
471922d0bd82SXin Long 
472022d0bd82SXin Long 		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
472122d0bd82SXin Long 			goto nla_put_failure;
472222d0bd82SXin Long 	} else if (rt->fib6_nsiblings) {
47238d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
4724beb1afacSDavid Ahern 		struct nlattr *mp;
4725beb1afacSDavid Ahern 
4726beb1afacSDavid Ahern 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4727beb1afacSDavid Ahern 		if (!mp)
4728beb1afacSDavid Ahern 			goto nla_put_failure;
4729beb1afacSDavid Ahern 
4730c0a72077SDavid Ahern 		if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4731c0a72077SDavid Ahern 				    rt->fib6_nh.fib_nh_weight) < 0)
4732beb1afacSDavid Ahern 			goto nla_put_failure;
4733beb1afacSDavid Ahern 
4734beb1afacSDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
473593c2fb25SDavid Ahern 					 &rt->fib6_siblings, fib6_siblings) {
4736c0a72077SDavid Ahern 			if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4737c0a72077SDavid Ahern 					    sibling->fib6_nh.fib_nh_weight) < 0)
473894f826b8SEric Dumazet 				goto nla_put_failure;
473994f826b8SEric Dumazet 		}
47402d7202bfSThomas Graf 
4741beb1afacSDavid Ahern 		nla_nest_end(skb, mp);
4742beb1afacSDavid Ahern 	} else {
4743c0a72077SDavid Ahern 		if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4744c0a72077SDavid Ahern 				     &rtm->rtm_flags, false) < 0)
4745c78679e8SDavid S. Miller 			goto nla_put_failure;
4746beb1afacSDavid Ahern 	}
47478253947eSLi Wei 
474822d0bd82SXin Long 	if (rt6_flags & RTF_EXPIRES) {
474914895687SDavid Ahern 		expires = dst ? dst->expires : rt->expires;
475014895687SDavid Ahern 		expires -= jiffies;
475114895687SDavid Ahern 	}
475269cdf8f9SYOSHIFUJI Hideaki 
4753d4ead6b3SDavid Ahern 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4754e3703b3dSThomas Graf 		goto nla_put_failure;
47551da177e4SLinus Torvalds 
475622d0bd82SXin Long 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4757c78ba6d6SLubomir Rintel 		goto nla_put_failure;
4758c78ba6d6SLubomir Rintel 
475919e42e45SRoopa Prabhu 
4760053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
4761053c095aSJohannes Berg 	return 0;
47622d7202bfSThomas Graf 
47632d7202bfSThomas Graf nla_put_failure:
476426932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
476526932566SPatrick McHardy 	return -EMSGSIZE;
47661da177e4SLinus Torvalds }
47671da177e4SLinus Torvalds 
476813e38901SDavid Ahern static bool fib6_info_uses_dev(const struct fib6_info *f6i,
476913e38901SDavid Ahern 			       const struct net_device *dev)
477013e38901SDavid Ahern {
4771ad1601aeSDavid Ahern 	if (f6i->fib6_nh.fib_nh_dev == dev)
477213e38901SDavid Ahern 		return true;
477313e38901SDavid Ahern 
477413e38901SDavid Ahern 	if (f6i->fib6_nsiblings) {
477513e38901SDavid Ahern 		struct fib6_info *sibling, *next_sibling;
477613e38901SDavid Ahern 
477713e38901SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
477813e38901SDavid Ahern 					 &f6i->fib6_siblings, fib6_siblings) {
4779ad1601aeSDavid Ahern 			if (sibling->fib6_nh.fib_nh_dev == dev)
478013e38901SDavid Ahern 				return true;
478113e38901SDavid Ahern 		}
478213e38901SDavid Ahern 	}
478313e38901SDavid Ahern 
478413e38901SDavid Ahern 	return false;
478513e38901SDavid Ahern }
478613e38901SDavid Ahern 
47878d1c802bSDavid Ahern int rt6_dump_route(struct fib6_info *rt, void *p_arg)
47881da177e4SLinus Torvalds {
47891da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
479013e38901SDavid Ahern 	struct fib_dump_filter *filter = &arg->filter;
479113e38901SDavid Ahern 	unsigned int flags = NLM_F_MULTI;
47921f17e2f2SDavid Ahern 	struct net *net = arg->net;
47931f17e2f2SDavid Ahern 
4794421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
47951f17e2f2SDavid Ahern 		return 0;
47961da177e4SLinus Torvalds 
479713e38901SDavid Ahern 	if ((filter->flags & RTM_F_PREFIX) &&
479893c2fb25SDavid Ahern 	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4799f8cfe2ceSDavid Ahern 		/* success since this is not a prefix route */
4800f8cfe2ceSDavid Ahern 		return 1;
4801f8cfe2ceSDavid Ahern 	}
480213e38901SDavid Ahern 	if (filter->filter_set) {
480313e38901SDavid Ahern 		if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
480413e38901SDavid Ahern 		    (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
480513e38901SDavid Ahern 		    (filter->protocol && rt->fib6_protocol != filter->protocol)) {
480613e38901SDavid Ahern 			return 1;
480713e38901SDavid Ahern 		}
480813e38901SDavid Ahern 		flags |= NLM_F_DUMP_FILTERED;
4809f8cfe2ceSDavid Ahern 	}
48101da177e4SLinus Torvalds 
4811d4ead6b3SDavid Ahern 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4812d4ead6b3SDavid Ahern 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
481313e38901SDavid Ahern 			     arg->cb->nlh->nlmsg_seq, flags);
48141da177e4SLinus Torvalds }
48151da177e4SLinus Torvalds 
48160eff0a27SJakub Kicinski static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
48170eff0a27SJakub Kicinski 					const struct nlmsghdr *nlh,
48180eff0a27SJakub Kicinski 					struct nlattr **tb,
48190eff0a27SJakub Kicinski 					struct netlink_ext_ack *extack)
48200eff0a27SJakub Kicinski {
48210eff0a27SJakub Kicinski 	struct rtmsg *rtm;
48220eff0a27SJakub Kicinski 	int i, err;
48230eff0a27SJakub Kicinski 
48240eff0a27SJakub Kicinski 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
48250eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48260eff0a27SJakub Kicinski 				   "Invalid header for get route request");
48270eff0a27SJakub Kicinski 		return -EINVAL;
48280eff0a27SJakub Kicinski 	}
48290eff0a27SJakub Kicinski 
48300eff0a27SJakub Kicinski 	if (!netlink_strict_get_check(skb))
48310eff0a27SJakub Kicinski 		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
48320eff0a27SJakub Kicinski 				   rtm_ipv6_policy, extack);
48330eff0a27SJakub Kicinski 
48340eff0a27SJakub Kicinski 	rtm = nlmsg_data(nlh);
48350eff0a27SJakub Kicinski 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
48360eff0a27SJakub Kicinski 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
48370eff0a27SJakub Kicinski 	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
48380eff0a27SJakub Kicinski 	    rtm->rtm_type) {
48390eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
48400eff0a27SJakub Kicinski 		return -EINVAL;
48410eff0a27SJakub Kicinski 	}
48420eff0a27SJakub Kicinski 	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
48430eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48440eff0a27SJakub Kicinski 				   "Invalid flags for get route request");
48450eff0a27SJakub Kicinski 		return -EINVAL;
48460eff0a27SJakub Kicinski 	}
48470eff0a27SJakub Kicinski 
48480eff0a27SJakub Kicinski 	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
48490eff0a27SJakub Kicinski 				 rtm_ipv6_policy, extack);
48500eff0a27SJakub Kicinski 	if (err)
48510eff0a27SJakub Kicinski 		return err;
48520eff0a27SJakub Kicinski 
48530eff0a27SJakub Kicinski 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
48540eff0a27SJakub Kicinski 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
48550eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
48560eff0a27SJakub Kicinski 		return -EINVAL;
48570eff0a27SJakub Kicinski 	}
48580eff0a27SJakub Kicinski 
48590eff0a27SJakub Kicinski 	for (i = 0; i <= RTA_MAX; i++) {
48600eff0a27SJakub Kicinski 		if (!tb[i])
48610eff0a27SJakub Kicinski 			continue;
48620eff0a27SJakub Kicinski 
48630eff0a27SJakub Kicinski 		switch (i) {
48640eff0a27SJakub Kicinski 		case RTA_SRC:
48650eff0a27SJakub Kicinski 		case RTA_DST:
48660eff0a27SJakub Kicinski 		case RTA_IIF:
48670eff0a27SJakub Kicinski 		case RTA_OIF:
48680eff0a27SJakub Kicinski 		case RTA_MARK:
48690eff0a27SJakub Kicinski 		case RTA_UID:
48700eff0a27SJakub Kicinski 		case RTA_SPORT:
48710eff0a27SJakub Kicinski 		case RTA_DPORT:
48720eff0a27SJakub Kicinski 		case RTA_IP_PROTO:
48730eff0a27SJakub Kicinski 			break;
48740eff0a27SJakub Kicinski 		default:
48750eff0a27SJakub Kicinski 			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
48760eff0a27SJakub Kicinski 			return -EINVAL;
48770eff0a27SJakub Kicinski 		}
48780eff0a27SJakub Kicinski 	}
48790eff0a27SJakub Kicinski 
48800eff0a27SJakub Kicinski 	return 0;
48810eff0a27SJakub Kicinski }
48820eff0a27SJakub Kicinski 
4883c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4884c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
48851da177e4SLinus Torvalds {
48863b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
4887ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
488818c3a61cSRoopa Prabhu 	int err, iif = 0, oif = 0;
4889a68886a6SDavid Ahern 	struct fib6_info *from;
489018c3a61cSRoopa Prabhu 	struct dst_entry *dst;
48911da177e4SLinus Torvalds 	struct rt6_info *rt;
4892ab364a6fSThomas Graf 	struct sk_buff *skb;
4893ab364a6fSThomas Graf 	struct rtmsg *rtm;
4894744486d4SMaciej Żenczykowski 	struct flowi6 fl6 = {};
489518c3a61cSRoopa Prabhu 	bool fibmatch;
4896ab364a6fSThomas Graf 
48970eff0a27SJakub Kicinski 	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4898ab364a6fSThomas Graf 	if (err < 0)
4899ab364a6fSThomas Graf 		goto errout;
4900ab364a6fSThomas Graf 
4901ab364a6fSThomas Graf 	err = -EINVAL;
490238b7097bSHannes Frederic Sowa 	rtm = nlmsg_data(nlh);
490338b7097bSHannes Frederic Sowa 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
490418c3a61cSRoopa Prabhu 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4905ab364a6fSThomas Graf 
4906ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
4907ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4908ab364a6fSThomas Graf 			goto errout;
4909ab364a6fSThomas Graf 
49104e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4911ab364a6fSThomas Graf 	}
4912ab364a6fSThomas Graf 
4913ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
4914ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4915ab364a6fSThomas Graf 			goto errout;
4916ab364a6fSThomas Graf 
49174e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4918ab364a6fSThomas Graf 	}
4919ab364a6fSThomas Graf 
4920ab364a6fSThomas Graf 	if (tb[RTA_IIF])
4921ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
4922ab364a6fSThomas Graf 
4923ab364a6fSThomas Graf 	if (tb[RTA_OIF])
492472331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
4925ab364a6fSThomas Graf 
49262e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
49272e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
49282e47b291SLorenzo Colitti 
4929622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
4930622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4931622ec2c9SLorenzo Colitti 					   nla_get_u32(tb[RTA_UID]));
4932622ec2c9SLorenzo Colitti 	else
4933622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4934622ec2c9SLorenzo Colitti 
4935eacb9384SRoopa Prabhu 	if (tb[RTA_SPORT])
4936eacb9384SRoopa Prabhu 		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4937eacb9384SRoopa Prabhu 
4938eacb9384SRoopa Prabhu 	if (tb[RTA_DPORT])
4939eacb9384SRoopa Prabhu 		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4940eacb9384SRoopa Prabhu 
4941eacb9384SRoopa Prabhu 	if (tb[RTA_IP_PROTO]) {
4942eacb9384SRoopa Prabhu 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
49435e1a99eaSHangbin Liu 						  &fl6.flowi6_proto, AF_INET6,
49445e1a99eaSHangbin Liu 						  extack);
4945eacb9384SRoopa Prabhu 		if (err)
4946eacb9384SRoopa Prabhu 			goto errout;
4947eacb9384SRoopa Prabhu 	}
4948eacb9384SRoopa Prabhu 
4949ab364a6fSThomas Graf 	if (iif) {
4950ab364a6fSThomas Graf 		struct net_device *dev;
495172331bc0SShmulik Ladkani 		int flags = 0;
495272331bc0SShmulik Ladkani 
4953121622dbSFlorian Westphal 		rcu_read_lock();
4954121622dbSFlorian Westphal 
4955121622dbSFlorian Westphal 		dev = dev_get_by_index_rcu(net, iif);
4956ab364a6fSThomas Graf 		if (!dev) {
4957121622dbSFlorian Westphal 			rcu_read_unlock();
4958ab364a6fSThomas Graf 			err = -ENODEV;
4959ab364a6fSThomas Graf 			goto errout;
4960ab364a6fSThomas Graf 		}
496172331bc0SShmulik Ladkani 
496272331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
496372331bc0SShmulik Ladkani 
496472331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
496572331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
496672331bc0SShmulik Ladkani 
4967b75cc8f9SDavid Ahern 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4968121622dbSFlorian Westphal 
4969121622dbSFlorian Westphal 		rcu_read_unlock();
497072331bc0SShmulik Ladkani 	} else {
497172331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
497272331bc0SShmulik Ladkani 
497318c3a61cSRoopa Prabhu 		dst = ip6_route_output(net, NULL, &fl6);
497418c3a61cSRoopa Prabhu 	}
497518c3a61cSRoopa Prabhu 
497618c3a61cSRoopa Prabhu 
497718c3a61cSRoopa Prabhu 	rt = container_of(dst, struct rt6_info, dst);
497818c3a61cSRoopa Prabhu 	if (rt->dst.error) {
497918c3a61cSRoopa Prabhu 		err = rt->dst.error;
498018c3a61cSRoopa Prabhu 		ip6_rt_put(rt);
498118c3a61cSRoopa Prabhu 		goto errout;
4982ab364a6fSThomas Graf 	}
49831da177e4SLinus Torvalds 
49849d6acb3bSWANG Cong 	if (rt == net->ipv6.ip6_null_entry) {
49859d6acb3bSWANG Cong 		err = rt->dst.error;
49869d6acb3bSWANG Cong 		ip6_rt_put(rt);
49879d6acb3bSWANG Cong 		goto errout;
49889d6acb3bSWANG Cong 	}
49899d6acb3bSWANG Cong 
49901da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
499138308473SDavid S. Miller 	if (!skb) {
499294e187c0SAmerigo Wang 		ip6_rt_put(rt);
4993ab364a6fSThomas Graf 		err = -ENOBUFS;
4994ab364a6fSThomas Graf 		goto errout;
4995ab364a6fSThomas Graf 	}
49961da177e4SLinus Torvalds 
4997d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
4998a68886a6SDavid Ahern 
4999a68886a6SDavid Ahern 	rcu_read_lock();
5000a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
5001a68886a6SDavid Ahern 
500218c3a61cSRoopa Prabhu 	if (fibmatch)
5003a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
500418c3a61cSRoopa Prabhu 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
500518c3a61cSRoopa Prabhu 				    nlh->nlmsg_seq, 0);
500618c3a61cSRoopa Prabhu 	else
5007a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5008a68886a6SDavid Ahern 				    &fl6.saddr, iif, RTM_NEWROUTE,
5009d4ead6b3SDavid Ahern 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5010d4ead6b3SDavid Ahern 				    0);
5011a68886a6SDavid Ahern 	rcu_read_unlock();
5012a68886a6SDavid Ahern 
50131da177e4SLinus Torvalds 	if (err < 0) {
5014ab364a6fSThomas Graf 		kfree_skb(skb);
5015ab364a6fSThomas Graf 		goto errout;
50161da177e4SLinus Torvalds 	}
50171da177e4SLinus Torvalds 
501815e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5019ab364a6fSThomas Graf errout:
50201da177e4SLinus Torvalds 	return err;
50211da177e4SLinus Torvalds }
50221da177e4SLinus Torvalds 
50238d1c802bSDavid Ahern void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
502437a1d361SRoopa Prabhu 		     unsigned int nlm_flags)
50251da177e4SLinus Torvalds {
50261da177e4SLinus Torvalds 	struct sk_buff *skb;
50275578689aSDaniel Lezcano 	struct net *net = info->nl_net;
5028528c4cebSDenis V. Lunev 	u32 seq;
5029528c4cebSDenis V. Lunev 	int err;
50300d51aa80SJamal Hadi Salim 
5031528c4cebSDenis V. Lunev 	err = -ENOBUFS;
503238308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
503386872cb5SThomas Graf 
503419e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
503538308473SDavid S. Miller 	if (!skb)
503621713ebcSThomas Graf 		goto errout;
50371da177e4SLinus Torvalds 
5038d4ead6b3SDavid Ahern 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5039f8cfe2ceSDavid Ahern 			    event, info->portid, seq, nlm_flags);
504026932566SPatrick McHardy 	if (err < 0) {
504126932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
504226932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
504326932566SPatrick McHardy 		kfree_skb(skb);
504426932566SPatrick McHardy 		goto errout;
504526932566SPatrick McHardy 	}
504615e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
50475578689aSDaniel Lezcano 		    info->nlh, gfp_any());
50481ce85fe4SPablo Neira Ayuso 	return;
504921713ebcSThomas Graf errout:
505021713ebcSThomas Graf 	if (err < 0)
50515578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
50521da177e4SLinus Torvalds }
50531da177e4SLinus Torvalds 
50548ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
5055351638e7SJiri Pirko 				unsigned long event, void *ptr)
50568ed67789SDaniel Lezcano {
5057351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5058c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
50598ed67789SDaniel Lezcano 
5060242d3a49SWANG Cong 	if (!(dev->flags & IFF_LOOPBACK))
5061242d3a49SWANG Cong 		return NOTIFY_OK;
5062242d3a49SWANG Cong 
5063242d3a49SWANG Cong 	if (event == NETDEV_REGISTER) {
5064ad1601aeSDavid Ahern 		net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
5065d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
50668ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
50678ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5068d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
50698ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5070d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
50718ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
50728ed67789SDaniel Lezcano #endif
507376da0704SWANG Cong 	 } else if (event == NETDEV_UNREGISTER &&
507476da0704SWANG Cong 		    dev->reg_state != NETREG_UNREGISTERED) {
507576da0704SWANG Cong 		/* NETDEV_UNREGISTER could be fired for multiple times by
507676da0704SWANG Cong 		 * netdev_wait_allrefs(). Make sure we only call this once.
507776da0704SWANG Cong 		 */
507812d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5079242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES
508012d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
508112d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5082242d3a49SWANG Cong #endif
50838ed67789SDaniel Lezcano 	}
50848ed67789SDaniel Lezcano 
50858ed67789SDaniel Lezcano 	return NOTIFY_OK;
50868ed67789SDaniel Lezcano }
50878ed67789SDaniel Lezcano 
50881da177e4SLinus Torvalds /*
50891da177e4SLinus Torvalds  *	/proc
50901da177e4SLinus Torvalds  */
50911da177e4SLinus Torvalds 
50921da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
50931da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
50941da177e4SLinus Torvalds {
509569ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
50961da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
509769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
509869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
509981eb8447SWei Wang 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
510069ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
510169ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
5102fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
510369ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
51041da177e4SLinus Torvalds 
51051da177e4SLinus Torvalds 	return 0;
51061da177e4SLinus Torvalds }
51071da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
51081da177e4SLinus Torvalds 
51091da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
51101da177e4SLinus Torvalds 
51111da177e4SLinus Torvalds static
5112fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
51131da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
51141da177e4SLinus Torvalds {
5115c486da34SLucian Adrian Grijincu 	struct net *net;
5116c486da34SLucian Adrian Grijincu 	int delay;
5117f0fb9b28SAditya Pakki 	int ret;
5118c486da34SLucian Adrian Grijincu 	if (!write)
5119c486da34SLucian Adrian Grijincu 		return -EINVAL;
5120c486da34SLucian Adrian Grijincu 
5121c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
5122c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
5123f0fb9b28SAditya Pakki 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5124f0fb9b28SAditya Pakki 	if (ret)
5125f0fb9b28SAditya Pakki 		return ret;
5126f0fb9b28SAditya Pakki 
51272ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
51281da177e4SLinus Torvalds 	return 0;
51291da177e4SLinus Torvalds }
51301da177e4SLinus Torvalds 
51317c6bb7d2SDavid Ahern static int zero;
51327c6bb7d2SDavid Ahern static int one = 1;
51337c6bb7d2SDavid Ahern 
5134ed792e28SDavid Ahern static struct ctl_table ipv6_route_table_template[] = {
51351da177e4SLinus Torvalds 	{
51361da177e4SLinus Torvalds 		.procname	=	"flush",
51374990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
51381da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
513989c8b3a1SDave Jones 		.mode		=	0200,
51406d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
51411da177e4SLinus Torvalds 	},
51421da177e4SLinus Torvalds 	{
51431da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
51449a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
51451da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51461da177e4SLinus Torvalds 		.mode		=	0644,
51476d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
51481da177e4SLinus Torvalds 	},
51491da177e4SLinus Torvalds 	{
51501da177e4SLinus Torvalds 		.procname	=	"max_size",
51514990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
51521da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51531da177e4SLinus Torvalds 		.mode		=	0644,
51546d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
51551da177e4SLinus Torvalds 	},
51561da177e4SLinus Torvalds 	{
51571da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
51584990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
51591da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51601da177e4SLinus Torvalds 		.mode		=	0644,
51616d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51621da177e4SLinus Torvalds 	},
51631da177e4SLinus Torvalds 	{
51641da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
51654990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
51661da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51671da177e4SLinus Torvalds 		.mode		=	0644,
51686d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51691da177e4SLinus Torvalds 	},
51701da177e4SLinus Torvalds 	{
51711da177e4SLinus Torvalds 		.procname	=	"gc_interval",
51724990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
51731da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51741da177e4SLinus Torvalds 		.mode		=	0644,
51756d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51761da177e4SLinus Torvalds 	},
51771da177e4SLinus Torvalds 	{
51781da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
51794990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
51801da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51811da177e4SLinus Torvalds 		.mode		=	0644,
5182f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
51831da177e4SLinus Torvalds 	},
51841da177e4SLinus Torvalds 	{
51851da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
51864990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
51871da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51881da177e4SLinus Torvalds 		.mode		=	0644,
51896d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51901da177e4SLinus Torvalds 	},
51911da177e4SLinus Torvalds 	{
51921da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
51934990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
51941da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51951da177e4SLinus Torvalds 		.mode		=	0644,
5196f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
51971da177e4SLinus Torvalds 	},
51981da177e4SLinus Torvalds 	{
51991da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
52004990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
52011da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52021da177e4SLinus Torvalds 		.mode		=	0644,
52036d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
52041da177e4SLinus Torvalds 	},
52057c6bb7d2SDavid Ahern 	{
52067c6bb7d2SDavid Ahern 		.procname	=	"skip_notify_on_dev_down",
52077c6bb7d2SDavid Ahern 		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
52087c6bb7d2SDavid Ahern 		.maxlen		=	sizeof(int),
52097c6bb7d2SDavid Ahern 		.mode		=	0644,
52107c6bb7d2SDavid Ahern 		.proc_handler	=	proc_dointvec,
52117c6bb7d2SDavid Ahern 		.extra1		=	&zero,
52127c6bb7d2SDavid Ahern 		.extra2		=	&one,
52137c6bb7d2SDavid Ahern 	},
5214f8572d8fSEric W. Biederman 	{ }
52151da177e4SLinus Torvalds };
52161da177e4SLinus Torvalds 
52172c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5218760f2d01SDaniel Lezcano {
5219760f2d01SDaniel Lezcano 	struct ctl_table *table;
5220760f2d01SDaniel Lezcano 
5221760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
5222760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
5223760f2d01SDaniel Lezcano 			GFP_KERNEL);
52245ee09105SYOSHIFUJI Hideaki 
52255ee09105SYOSHIFUJI Hideaki 	if (table) {
52265ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
5227c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
522886393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
52295ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
52305ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52315ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
52325ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
52335ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
52345ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
52355ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
52369c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52377c6bb7d2SDavid Ahern 		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5238464dc801SEric W. Biederman 
5239464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
5240464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
5241464dc801SEric W. Biederman 			table[0].procname = NULL;
52425ee09105SYOSHIFUJI Hideaki 	}
52435ee09105SYOSHIFUJI Hideaki 
5244760f2d01SDaniel Lezcano 	return table;
5245760f2d01SDaniel Lezcano }
52461da177e4SLinus Torvalds #endif
52471da177e4SLinus Torvalds 
52482c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
5249cdb18761SDaniel Lezcano {
5250633d424bSPavel Emelyanov 	int ret = -ENOMEM;
52518ed67789SDaniel Lezcano 
525286393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
525386393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
5254f2fc6a54SBenjamin Thery 
5255fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5256fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
5257fc66f95cSEric Dumazet 
5258421842edSDavid Ahern 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5259421842edSDavid Ahern 					    sizeof(*net->ipv6.fib6_null_entry),
5260421842edSDavid Ahern 					    GFP_KERNEL);
5261421842edSDavid Ahern 	if (!net->ipv6.fib6_null_entry)
5262421842edSDavid Ahern 		goto out_ip6_dst_entries;
5263421842edSDavid Ahern 
52648ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
52658ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
52668ed67789SDaniel Lezcano 					   GFP_KERNEL);
52678ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
5268421842edSDavid Ahern 		goto out_fib6_null_entry;
5269d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
527062fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
527162fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
52728ed67789SDaniel Lezcano 
52738ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5274feca7d8cSVincent Bernat 	net->ipv6.fib6_has_custom_rules = false;
52758ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
52768ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
52778ed67789SDaniel Lezcano 					       GFP_KERNEL);
527868fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
527968fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
5280d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
528162fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
528262fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
52838ed67789SDaniel Lezcano 
52848ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
52858ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
52868ed67789SDaniel Lezcano 					       GFP_KERNEL);
528768fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
528868fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
5289d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
529062fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
529162fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
52928ed67789SDaniel Lezcano #endif
52938ed67789SDaniel Lezcano 
5294b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
5295b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5296b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5297b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5298b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5299b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5300b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5301b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
53027c6bb7d2SDavid Ahern 	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5303b339a47cSPeter Zijlstra 
53046891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
53056891a346SBenjamin Thery 
53068ed67789SDaniel Lezcano 	ret = 0;
53078ed67789SDaniel Lezcano out:
53088ed67789SDaniel Lezcano 	return ret;
5309f2fc6a54SBenjamin Thery 
531068fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
531168fffc67SPeter Zijlstra out_ip6_prohibit_entry:
531268fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
531368fffc67SPeter Zijlstra out_ip6_null_entry:
531468fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
531568fffc67SPeter Zijlstra #endif
5316421842edSDavid Ahern out_fib6_null_entry:
5317421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
5318fc66f95cSEric Dumazet out_ip6_dst_entries:
5319fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5320f2fc6a54SBenjamin Thery out_ip6_dst_ops:
5321f2fc6a54SBenjamin Thery 	goto out;
5322cdb18761SDaniel Lezcano }
5323cdb18761SDaniel Lezcano 
53242c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
5325cdb18761SDaniel Lezcano {
5326421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
53278ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
53288ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
53298ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
53308ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
53318ed67789SDaniel Lezcano #endif
533241bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5333cdb18761SDaniel Lezcano }
5334cdb18761SDaniel Lezcano 
5335d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
5336d189634eSThomas Graf {
5337d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5338c3506372SChristoph Hellwig 	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5339c3506372SChristoph Hellwig 			sizeof(struct ipv6_route_iter));
53403617d949SChristoph Hellwig 	proc_create_net_single("rt6_stats", 0444, net->proc_net,
53413617d949SChristoph Hellwig 			rt6_stats_seq_show, NULL);
5342d189634eSThomas Graf #endif
5343d189634eSThomas Graf 	return 0;
5344d189634eSThomas Graf }
5345d189634eSThomas Graf 
5346d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
5347d189634eSThomas Graf {
5348d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5349ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
5350ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
5351d189634eSThomas Graf #endif
5352d189634eSThomas Graf }
5353d189634eSThomas Graf 
5354cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
5355cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
5356cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
5357cdb18761SDaniel Lezcano };
5358cdb18761SDaniel Lezcano 
5359c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
5360c3426b47SDavid S. Miller {
5361c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5362c3426b47SDavid S. Miller 
5363c3426b47SDavid S. Miller 	if (!bp)
5364c3426b47SDavid S. Miller 		return -ENOMEM;
5365c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
5366c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
5367c3426b47SDavid S. Miller 	return 0;
5368c3426b47SDavid S. Miller }
5369c3426b47SDavid S. Miller 
5370c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
5371c3426b47SDavid S. Miller {
5372c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
5373c3426b47SDavid S. Miller 
5374c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
537556a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
5376c3426b47SDavid S. Miller 	kfree(bp);
5377c3426b47SDavid S. Miller }
5378c3426b47SDavid S. Miller 
53792b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
5380c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
5381c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
5382c3426b47SDavid S. Miller };
5383c3426b47SDavid S. Miller 
5384d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
5385d189634eSThomas Graf 	.init = ip6_route_net_init_late,
5386d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
5387d189634eSThomas Graf };
5388d189634eSThomas Graf 
53898ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
53908ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
5391242d3a49SWANG Cong 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
53928ed67789SDaniel Lezcano };
53938ed67789SDaniel Lezcano 
53942f460933SWANG Cong void __init ip6_route_init_special_entries(void)
53952f460933SWANG Cong {
53962f460933SWANG Cong 	/* Registering of the loopback is done before this portion of code,
53972f460933SWANG Cong 	 * the loopback reference in rt6_info will not be taken, do it
53982f460933SWANG Cong 	 * manually for init_net */
5399ad1601aeSDavid Ahern 	init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
54002f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
54012f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54022f460933SWANG Cong   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
54032f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
54042f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54052f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
54062f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54072f460933SWANG Cong   #endif
54082f460933SWANG Cong }
54092f460933SWANG Cong 
5410433d49c3SDaniel Lezcano int __init ip6_route_init(void)
54111da177e4SLinus Torvalds {
5412433d49c3SDaniel Lezcano 	int ret;
54138d0b94afSMartin KaFai Lau 	int cpu;
5414433d49c3SDaniel Lezcano 
54159a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
54169a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
54179a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
54189a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
54199a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
5420c19a28e1SFernando Carrijo 		goto out;
542114e50e57SDavid S. Miller 
5422fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
54238ed67789SDaniel Lezcano 	if (ret)
5424bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
5425bdb3289fSDaniel Lezcano 
5426c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5427c3426b47SDavid S. Miller 	if (ret)
5428e8803b6cSDavid S. Miller 		goto out_dst_entries;
54292a0c451aSThomas Graf 
54307e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
54317e52b33bSDavid S. Miller 	if (ret)
54327e52b33bSDavid S. Miller 		goto out_register_inetpeer;
5433c3426b47SDavid S. Miller 
54345dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
54355dc121e9SArnaud Ebalard 
5436e8803b6cSDavid S. Miller 	ret = fib6_init();
5437433d49c3SDaniel Lezcano 	if (ret)
54388ed67789SDaniel Lezcano 		goto out_register_subsys;
5439433d49c3SDaniel Lezcano 
5440433d49c3SDaniel Lezcano 	ret = xfrm6_init();
5441433d49c3SDaniel Lezcano 	if (ret)
5442e8803b6cSDavid S. Miller 		goto out_fib6_init;
5443c35b7e72SDaniel Lezcano 
5444433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
5445433d49c3SDaniel Lezcano 	if (ret)
5446433d49c3SDaniel Lezcano 		goto xfrm6_init;
54477e5449c2SDaniel Lezcano 
5448d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5449d189634eSThomas Graf 	if (ret)
5450d189634eSThomas Graf 		goto fib6_rules_init;
5451d189634eSThomas Graf 
545216feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
545316feebcfSFlorian Westphal 				   inet6_rtm_newroute, NULL, 0);
545416feebcfSFlorian Westphal 	if (ret < 0)
545516feebcfSFlorian Westphal 		goto out_register_late_subsys;
545616feebcfSFlorian Westphal 
545716feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
545816feebcfSFlorian Westphal 				   inet6_rtm_delroute, NULL, 0);
545916feebcfSFlorian Westphal 	if (ret < 0)
546016feebcfSFlorian Westphal 		goto out_register_late_subsys;
546116feebcfSFlorian Westphal 
546216feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
546316feebcfSFlorian Westphal 				   inet6_rtm_getroute, NULL,
546416feebcfSFlorian Westphal 				   RTNL_FLAG_DOIT_UNLOCKED);
546516feebcfSFlorian Westphal 	if (ret < 0)
5466d189634eSThomas Graf 		goto out_register_late_subsys;
5467433d49c3SDaniel Lezcano 
54688ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5469cdb18761SDaniel Lezcano 	if (ret)
5470d189634eSThomas Graf 		goto out_register_late_subsys;
54718ed67789SDaniel Lezcano 
54728d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
54738d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
54748d0b94afSMartin KaFai Lau 
54758d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
54768d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
54778d0b94afSMartin KaFai Lau 	}
54788d0b94afSMartin KaFai Lau 
5479433d49c3SDaniel Lezcano out:
5480433d49c3SDaniel Lezcano 	return ret;
5481433d49c3SDaniel Lezcano 
5482d189634eSThomas Graf out_register_late_subsys:
548316feebcfSFlorian Westphal 	rtnl_unregister_all(PF_INET6);
5484d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5485433d49c3SDaniel Lezcano fib6_rules_init:
5486433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
5487433d49c3SDaniel Lezcano xfrm6_init:
5488433d49c3SDaniel Lezcano 	xfrm6_fini();
54892a0c451aSThomas Graf out_fib6_init:
54902a0c451aSThomas Graf 	fib6_gc_cleanup();
54918ed67789SDaniel Lezcano out_register_subsys:
54928ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
54937e52b33bSDavid S. Miller out_register_inetpeer:
54947e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5495fc66f95cSEric Dumazet out_dst_entries:
5496fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5497433d49c3SDaniel Lezcano out_kmem_cache:
5498f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5499433d49c3SDaniel Lezcano 	goto out;
55001da177e4SLinus Torvalds }
55011da177e4SLinus Torvalds 
55021da177e4SLinus Torvalds void ip6_route_cleanup(void)
55031da177e4SLinus Torvalds {
55048ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5505d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5506101367c2SThomas Graf 	fib6_rules_cleanup();
55071da177e4SLinus Torvalds 	xfrm6_fini();
55081da177e4SLinus Torvalds 	fib6_gc_cleanup();
5509c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
55108ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
551141bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5512f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
55131da177e4SLinus Torvalds }
5514