xref: /openbmc/linux/net/ipv6/route.c (revision 979e276ebebd537782797c439c9cb42b6d3aba27)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
4735732d01SWei Wang #include <linux/jhash.h>
48457c4cbcSEric W. Biederman #include <net/net_namespace.h>
491da177e4SLinus Torvalds #include <net/snmp.h>
501da177e4SLinus Torvalds #include <net/ipv6.h>
511da177e4SLinus Torvalds #include <net/ip6_fib.h>
521da177e4SLinus Torvalds #include <net/ip6_route.h>
531da177e4SLinus Torvalds #include <net/ndisc.h>
541da177e4SLinus Torvalds #include <net/addrconf.h>
551da177e4SLinus Torvalds #include <net/tcp.h>
561da177e4SLinus Torvalds #include <linux/rtnetlink.h>
571da177e4SLinus Torvalds #include <net/dst.h>
58904af04dSJiri Benc #include <net/dst_metadata.h>
591da177e4SLinus Torvalds #include <net/xfrm.h>
608d71740cSTom Tucker #include <net/netevent.h>
6121713ebcSThomas Graf #include <net/netlink.h>
6251ebd318SNicolas Dichtel #include <net/nexthop.h>
6319e42e45SRoopa Prabhu #include <net/lwtunnel.h>
64904af04dSJiri Benc #include <net/ip_tunnels.h>
65ca254490SDavid Ahern #include <net/l3mdev.h>
66eacb9384SRoopa Prabhu #include <net/ip.h>
677c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
701da177e4SLinus Torvalds #include <linux/sysctl.h>
711da177e4SLinus Torvalds #endif
721da177e4SLinus Torvalds 
7330d444d3SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type);
7430d444d3SDavid Ahern 
7530d444d3SDavid Ahern #define CREATE_TRACE_POINTS
7630d444d3SDavid Ahern #include <trace/events/fib6.h>
7730d444d3SDavid Ahern EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
7830d444d3SDavid Ahern #undef CREATE_TRACE_POINTS
7930d444d3SDavid Ahern 
80afc154e9SHannes Frederic Sowa enum rt6_nud_state {
817e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
827e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
837e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
84afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
85afc154e9SHannes Frederic Sowa };
86afc154e9SHannes Frederic Sowa 
871da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
880dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
89ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
901da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
911da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
921da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
931da177e4SLinus Torvalds 				       struct net_device *dev, int how);
94569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
97ede2059dSEric W. Biederman static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
987150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
99ede2059dSEric W. Biederman static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1001da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
1016700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1026700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
1036700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
1046700c270SDavid S. Miller 					struct sk_buff *skb);
1058d1c802bSDavid Ahern static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
1068d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt);
107d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
1088d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
109d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
11016a16cd3SDavid Ahern 			 int iif, int type, u32 portid, u32 seq,
11116a16cd3SDavid Ahern 			 unsigned int flags);
1128d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
11335732d01SWei Wang 					   struct in6_addr *daddr,
11435732d01SWei Wang 					   struct in6_addr *saddr);
1151da177e4SLinus Torvalds 
11670ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1178d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
118b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
119830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
120830218c1SDavid Ahern 					   struct net_device *dev,
12195c96174SEric Dumazet 					   unsigned int pref);
1228d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
123b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
124830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
125830218c1SDavid Ahern 					   struct net_device *dev);
12670ceb4f5SYOSHIFUJI Hideaki #endif
12770ceb4f5SYOSHIFUJI Hideaki 
1288d0b94afSMartin KaFai Lau struct uncached_list {
1298d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1308d0b94afSMartin KaFai Lau 	struct list_head	head;
1318d0b94afSMartin KaFai Lau };
1328d0b94afSMartin KaFai Lau 
1338d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1348d0b94afSMartin KaFai Lau 
135510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt)
1368d0b94afSMartin KaFai Lau {
1378d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1388d0b94afSMartin KaFai Lau 
1398d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1408d0b94afSMartin KaFai Lau 
1418d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1428d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1438d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1448d0b94afSMartin KaFai Lau }
1458d0b94afSMartin KaFai Lau 
146510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt)
1478d0b94afSMartin KaFai Lau {
1488d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1498d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
15081eb8447SWei Wang 		struct net *net = dev_net(rt->dst.dev);
1518d0b94afSMartin KaFai Lau 
1528d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1538d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
15481eb8447SWei Wang 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
1558d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1568d0b94afSMartin KaFai Lau 	}
1578d0b94afSMartin KaFai Lau }
1588d0b94afSMartin KaFai Lau 
1598d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1608d0b94afSMartin KaFai Lau {
1618d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1628d0b94afSMartin KaFai Lau 	int cpu;
1638d0b94afSMartin KaFai Lau 
164e332bc67SEric W. Biederman 	if (dev == loopback_dev)
165e332bc67SEric W. Biederman 		return;
166e332bc67SEric W. Biederman 
1678d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1688d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1698d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1708d0b94afSMartin KaFai Lau 
1718d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1728d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1738d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1748d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1758d0b94afSMartin KaFai Lau 
176e332bc67SEric W. Biederman 			if (rt_idev->dev == dev) {
1778d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1788d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1798d0b94afSMartin KaFai Lau 			}
1808d0b94afSMartin KaFai Lau 
181e332bc67SEric W. Biederman 			if (rt_dev == dev) {
1828d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1838d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1848d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1858d0b94afSMartin KaFai Lau 			}
1868d0b94afSMartin KaFai Lau 		}
1878d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1888d0b94afSMartin KaFai Lau 	}
1898d0b94afSMartin KaFai Lau }
1908d0b94afSMartin KaFai Lau 
191f8a1b43bSDavid Ahern static inline const void *choose_neigh_daddr(const struct in6_addr *p,
192f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
193f894cbf8SDavid S. Miller 					     const void *daddr)
19439232973SDavid S. Miller {
195a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19639232973SDavid S. Miller 		return (const void *) p;
197f894cbf8SDavid S. Miller 	else if (skb)
198f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
19939232973SDavid S. Miller 	return daddr;
20039232973SDavid S. Miller }
20139232973SDavid S. Miller 
202f8a1b43bSDavid Ahern struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203f8a1b43bSDavid Ahern 				   struct net_device *dev,
204f894cbf8SDavid S. Miller 				   struct sk_buff *skb,
205f894cbf8SDavid S. Miller 				   const void *daddr)
206d3aaeb38SDavid S. Miller {
20739232973SDavid S. Miller 	struct neighbour *n;
20839232973SDavid S. Miller 
209f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(gw, skb, daddr);
210f8a1b43bSDavid Ahern 	n = __ipv6_neigh_lookup(dev, daddr);
211f83c7790SDavid S. Miller 	if (n)
212f83c7790SDavid S. Miller 		return n;
2137adf3246SStefano Brivio 
2147adf3246SStefano Brivio 	n = neigh_create(&nd_tbl, daddr, dev);
2157adf3246SStefano Brivio 	return IS_ERR(n) ? NULL : n;
216f8a1b43bSDavid Ahern }
217f8a1b43bSDavid Ahern 
218f8a1b43bSDavid Ahern static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
219f8a1b43bSDavid Ahern 					      struct sk_buff *skb,
220f8a1b43bSDavid Ahern 					      const void *daddr)
221f8a1b43bSDavid Ahern {
222f8a1b43bSDavid Ahern 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
223f8a1b43bSDavid Ahern 
224f8a1b43bSDavid Ahern 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
225f83c7790SDavid S. Miller }
226f83c7790SDavid S. Miller 
22763fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
22863fca65dSJulian Anastasov {
22963fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
23063fca65dSJulian Anastasov 	struct rt6_info *rt = (struct rt6_info *)dst;
23163fca65dSJulian Anastasov 
232f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
23363fca65dSJulian Anastasov 	if (!daddr)
23463fca65dSJulian Anastasov 		return;
23563fca65dSJulian Anastasov 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
23663fca65dSJulian Anastasov 		return;
23763fca65dSJulian Anastasov 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
23863fca65dSJulian Anastasov 		return;
23963fca65dSJulian Anastasov 	__ipv6_confirm_neigh(dev, daddr);
24063fca65dSJulian Anastasov }
24163fca65dSJulian Anastasov 
2429a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2431da177e4SLinus Torvalds 	.family			=	AF_INET6,
2441da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2451da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2461da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2470dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
248ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
249d4ead6b3SDavid Ahern 	.cow_metrics		=	dst_cow_metrics_generic,
2501da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2511da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2521da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2531da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2541da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2556e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2569f8955ccSEric W. Biederman 	.local_out		=	__ip6_local_out,
257f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
25863fca65dSJulian Anastasov 	.confirm_neigh		=	ip6_confirm_neigh,
2591da177e4SLinus Torvalds };
2601da177e4SLinus Torvalds 
261ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
262ec831ea7SRoland Dreier {
263618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
264618f9bc7SSteffen Klassert 
265618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
266ec831ea7SRoland Dreier }
267ec831ea7SRoland Dreier 
2686700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2696700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
27014e50e57SDavid S. Miller {
27114e50e57SDavid S. Miller }
27214e50e57SDavid S. Miller 
2736700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2746700c270SDavid S. Miller 				      struct sk_buff *skb)
275b587ee3bSDavid S. Miller {
276b587ee3bSDavid S. Miller }
277b587ee3bSDavid S. Miller 
27814e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
27914e50e57SDavid S. Miller 	.family			=	AF_INET6,
28014e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
28114e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
282ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
283214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
28414e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
285b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2860a1f5962SMartin KaFai Lau 	.cow_metrics		=	dst_cow_metrics_generic,
287f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
28814e50e57SDavid S. Miller };
28914e50e57SDavid S. Miller 
29062fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
29114edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
29262fa8a84SDavid S. Miller };
29362fa8a84SDavid S. Miller 
2948d1c802bSDavid Ahern static const struct fib6_info fib6_null_entry_template = {
29593c2fb25SDavid Ahern 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
29693c2fb25SDavid Ahern 	.fib6_protocol  = RTPROT_KERNEL,
29793c2fb25SDavid Ahern 	.fib6_metric	= ~(u32)0,
29893c2fb25SDavid Ahern 	.fib6_ref	= ATOMIC_INIT(1),
299421842edSDavid Ahern 	.fib6_type	= RTN_UNREACHABLE,
300421842edSDavid Ahern 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
301421842edSDavid Ahern };
302421842edSDavid Ahern 
303fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
3041da177e4SLinus Torvalds 	.dst = {
3051da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
3061da177e4SLinus Torvalds 		.__use		= 1,
3072c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
3081da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
3091da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
3101da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
3111da177e4SLinus Torvalds 	},
3121da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3131da177e4SLinus Torvalds };
3141da177e4SLinus Torvalds 
315101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
316101367c2SThomas Graf 
317fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
318101367c2SThomas Graf 	.dst = {
319101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
320101367c2SThomas Graf 		.__use		= 1,
3212c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
322101367c2SThomas Graf 		.error		= -EACCES,
3239ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
3249ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
325101367c2SThomas Graf 	},
326101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
327101367c2SThomas Graf };
328101367c2SThomas Graf 
329fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
330101367c2SThomas Graf 	.dst = {
331101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
332101367c2SThomas Graf 		.__use		= 1,
3332c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
334101367c2SThomas Graf 		.error		= -EINVAL,
335352e512cSHerbert Xu 		.input		= dst_discard,
336ede2059dSEric W. Biederman 		.output		= dst_discard_out,
337101367c2SThomas Graf 	},
338101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
339101367c2SThomas Graf };
340101367c2SThomas Graf 
341101367c2SThomas Graf #endif
342101367c2SThomas Graf 
343ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt)
344ebfa45f0SMartin KaFai Lau {
345ebfa45f0SMartin KaFai Lau 	struct dst_entry *dst = &rt->dst;
346ebfa45f0SMartin KaFai Lau 
347ebfa45f0SMartin KaFai Lau 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
348ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_uncached);
349ebfa45f0SMartin KaFai Lau }
350ebfa45f0SMartin KaFai Lau 
3511da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
35293531c67SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
353ad706862SMartin KaFai Lau 			       int flags)
3541da177e4SLinus Torvalds {
35597bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
356b2a9c0edSWei Wang 					1, DST_OBSOLETE_FORCE_CHK, flags);
357cf911662SDavid S. Miller 
35881eb8447SWei Wang 	if (rt) {
359ebfa45f0SMartin KaFai Lau 		rt6_info_init(rt);
36081eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
36181eb8447SWei Wang 	}
3628104891bSSteffen Klassert 
363cf911662SDavid S. Miller 	return rt;
3641da177e4SLinus Torvalds }
3659ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc);
366d52d3997SMartin KaFai Lau 
3671da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3681da177e4SLinus Torvalds {
3691da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
370a68886a6SDavid Ahern 	struct fib6_info *from;
3718d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3721da177e4SLinus Torvalds 
3731620a336SDavid Ahern 	ip_dst_metrics_put(dst);
3748d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3758d0b94afSMartin KaFai Lau 
3768d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
37738308473SDavid S. Miller 	if (idev) {
3781da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3791da177e4SLinus Torvalds 		in6_dev_put(idev);
3801da177e4SLinus Torvalds 	}
3811716a961SGao feng 
382a68886a6SDavid Ahern 	rcu_read_lock();
383a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
384a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, NULL);
38593531c67SDavid Ahern 	fib6_info_release(from);
386a68886a6SDavid Ahern 	rcu_read_unlock();
387b3419363SDavid S. Miller }
388b3419363SDavid S. Miller 
3891da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3901da177e4SLinus Torvalds 			   int how)
3911da177e4SLinus Torvalds {
3921da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3931da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3945a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
395c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3961da177e4SLinus Torvalds 
397e5645f51SWei Wang 	if (idev && idev->dev != loopback_dev) {
398e5645f51SWei Wang 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
39938308473SDavid S. Miller 		if (loopback_idev) {
4001da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
4011da177e4SLinus Torvalds 			in6_dev_put(idev);
4021da177e4SLinus Torvalds 		}
4031da177e4SLinus Torvalds 	}
40497cac082SDavid S. Miller }
4051da177e4SLinus Torvalds 
4065973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt)
4075973fb1eSMartin KaFai Lau {
4085973fb1eSMartin KaFai Lau 	if (rt->rt6i_flags & RTF_EXPIRES)
4095973fb1eSMartin KaFai Lau 		return time_after(jiffies, rt->dst.expires);
4105973fb1eSMartin KaFai Lau 	else
4115973fb1eSMartin KaFai Lau 		return false;
4125973fb1eSMartin KaFai Lau }
4135973fb1eSMartin KaFai Lau 
414a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4151da177e4SLinus Torvalds {
416a68886a6SDavid Ahern 	struct fib6_info *from;
417a68886a6SDavid Ahern 
418a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
419a68886a6SDavid Ahern 
4201716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4211716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
422a50feda5SEric Dumazet 			return true;
423a68886a6SDavid Ahern 	} else if (from) {
4241e2ea8adSXin Long 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
425a68886a6SDavid Ahern 			fib6_check_expired(from);
4261716a961SGao feng 	}
427a50feda5SEric Dumazet 	return false;
4281da177e4SLinus Torvalds }
4291da177e4SLinus Torvalds 
4303b290a31SDavid Ahern struct fib6_info *fib6_multipath_select(const struct net *net,
4318d1c802bSDavid Ahern 					struct fib6_info *match,
43252bd4c0cSNicolas Dichtel 					struct flowi6 *fl6, int oif,
433b75cc8f9SDavid Ahern 					const struct sk_buff *skb,
43452bd4c0cSNicolas Dichtel 					int strict)
43551ebd318SNicolas Dichtel {
4368d1c802bSDavid Ahern 	struct fib6_info *sibling, *next_sibling;
43751ebd318SNicolas Dichtel 
438b673d6ccSJakub Sitnicki 	/* We might have already computed the hash for ICMPv6 errors. In such
439b673d6ccSJakub Sitnicki 	 * case it will always be non-zero. Otherwise now is the time to do it.
440b673d6ccSJakub Sitnicki 	 */
441b673d6ccSJakub Sitnicki 	if (!fl6->mp_hash)
442b4bac172SDavid Ahern 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
443b673d6ccSJakub Sitnicki 
444ad1601aeSDavid Ahern 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
4453d709f69SIdo Schimmel 		return match;
446bbfcd776SIdo Schimmel 
44793c2fb25SDavid Ahern 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
44893c2fb25SDavid Ahern 				 fib6_siblings) {
4495e670d84SDavid Ahern 		int nh_upper_bound;
4505e670d84SDavid Ahern 
451ad1601aeSDavid Ahern 		nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound);
4525e670d84SDavid Ahern 		if (fl6->mp_hash > nh_upper_bound)
4533d709f69SIdo Schimmel 			continue;
45452bd4c0cSNicolas Dichtel 		if (rt6_score_route(sibling, oif, strict) < 0)
45552bd4c0cSNicolas Dichtel 			break;
45651ebd318SNicolas Dichtel 		match = sibling;
45751ebd318SNicolas Dichtel 		break;
45851ebd318SNicolas Dichtel 	}
4593d709f69SIdo Schimmel 
46051ebd318SNicolas Dichtel 	return match;
46151ebd318SNicolas Dichtel }
46251ebd318SNicolas Dichtel 
4631da177e4SLinus Torvalds /*
46466f5d6ceSWei Wang  *	Route lookup. rcu_read_lock() should be held.
4651da177e4SLinus Torvalds  */
4661da177e4SLinus Torvalds 
4678d1c802bSDavid Ahern static inline struct fib6_info *rt6_device_match(struct net *net,
4688d1c802bSDavid Ahern 						 struct fib6_info *rt,
469b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4701da177e4SLinus Torvalds 						    int oif,
471d420895eSYOSHIFUJI Hideaki 						    int flags)
4721da177e4SLinus Torvalds {
4738d1c802bSDavid Ahern 	struct fib6_info *sprt;
4741da177e4SLinus Torvalds 
4755e670d84SDavid Ahern 	if (!oif && ipv6_addr_any(saddr) &&
476ad1601aeSDavid Ahern 	    !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
4778067bb8cSIdo Schimmel 		return rt;
478dd3abc4eSYOSHIFUJI Hideaki 
4798fb11a9aSDavid Ahern 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
480ad1601aeSDavid Ahern 		const struct net_device *dev = sprt->fib6_nh.fib_nh_dev;
481dd3abc4eSYOSHIFUJI Hideaki 
482ad1601aeSDavid Ahern 		if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
4838067bb8cSIdo Schimmel 			continue;
4848067bb8cSIdo Schimmel 
485dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
4861da177e4SLinus Torvalds 			if (dev->ifindex == oif)
4871da177e4SLinus Torvalds 				return sprt;
488dd3abc4eSYOSHIFUJI Hideaki 		} else {
489dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
490dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
491dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
492dd3abc4eSYOSHIFUJI Hideaki 		}
4931da177e4SLinus Torvalds 	}
4941da177e4SLinus Torvalds 
495eea68cd3SDavid Ahern 	if (oif && flags & RT6_LOOKUP_F_IFACE)
496421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
4971da177e4SLinus Torvalds 
498ad1601aeSDavid Ahern 	return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
4991da177e4SLinus Torvalds }
5001da177e4SLinus Torvalds 
50127097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
502c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
503c2f17e82SHannes Frederic Sowa 	struct work_struct work;
504c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
505c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
506c2f17e82SHannes Frederic Sowa };
507c2f17e82SHannes Frederic Sowa 
508c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
509c2f17e82SHannes Frederic Sowa {
510c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
511c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
512c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
513c2f17e82SHannes Frederic Sowa 
514c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
515adc176c5SErik Nordmark 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
516c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
517662f5533SMichael Büsch 	kfree(work);
518c2f17e82SHannes Frederic Sowa }
519c2f17e82SHannes Frederic Sowa 
5208d1c802bSDavid Ahern static void rt6_probe(struct fib6_info *rt)
52127097255SYOSHIFUJI Hideaki {
522f547fac6SSabrina Dubroca 	struct __rt6_probe_work *work = NULL;
5235e670d84SDavid Ahern 	const struct in6_addr *nh_gw;
524f2c31e32SEric Dumazet 	struct neighbour *neigh;
5255e670d84SDavid Ahern 	struct net_device *dev;
526f547fac6SSabrina Dubroca 	struct inet6_dev *idev;
5275e670d84SDavid Ahern 
52827097255SYOSHIFUJI Hideaki 	/*
52927097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
53027097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
53127097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
53227097255SYOSHIFUJI Hideaki 	 *
53327097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
53427097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
53527097255SYOSHIFUJI Hideaki 	 */
5362b2450caSDavid Ahern 	if (!rt || !rt->fib6_nh.fib_nh_has_gw)
537fdd6681dSAmerigo Wang 		return;
5385e670d84SDavid Ahern 
539ad1601aeSDavid Ahern 	nh_gw = &rt->fib6_nh.fib_nh_gw6;
540ad1601aeSDavid Ahern 	dev = rt->fib6_nh.fib_nh_dev;
5412152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
542f547fac6SSabrina Dubroca 	idev = __in6_dev_get(dev);
5435e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
5442152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5458d6c31bfSMartin KaFai Lau 		if (neigh->nud_state & NUD_VALID)
5468d6c31bfSMartin KaFai Lau 			goto out;
5478d6c31bfSMartin KaFai Lau 
5482152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
549990edb42SMartin KaFai Lau 		if (!(neigh->nud_state & NUD_VALID) &&
550990edb42SMartin KaFai Lau 		    time_after(jiffies,
551dcd1f572SDavid Ahern 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
552c2f17e82SHannes Frederic Sowa 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
553990edb42SMartin KaFai Lau 			if (work)
5547e980569SJiri Benc 				__neigh_set_probe_once(neigh);
555990edb42SMartin KaFai Lau 		}
556c2f17e82SHannes Frederic Sowa 		write_unlock(&neigh->lock);
557f547fac6SSabrina Dubroca 	} else if (time_after(jiffies, rt->last_probe +
558f547fac6SSabrina Dubroca 				       idev->cnf.rtr_probe_interval)) {
559990edb42SMartin KaFai Lau 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
560990edb42SMartin KaFai Lau 	}
561c2f17e82SHannes Frederic Sowa 
562c2f17e82SHannes Frederic Sowa 	if (work) {
563f547fac6SSabrina Dubroca 		rt->last_probe = jiffies;
564c2f17e82SHannes Frederic Sowa 		INIT_WORK(&work->work, rt6_probe_deferred);
5655e670d84SDavid Ahern 		work->target = *nh_gw;
5665e670d84SDavid Ahern 		dev_hold(dev);
5675e670d84SDavid Ahern 		work->dev = dev;
568c2f17e82SHannes Frederic Sowa 		schedule_work(&work->work);
569c2f17e82SHannes Frederic Sowa 	}
570990edb42SMartin KaFai Lau 
5718d6c31bfSMartin KaFai Lau out:
5722152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
573f2c31e32SEric Dumazet }
57427097255SYOSHIFUJI Hideaki #else
5758d1c802bSDavid Ahern static inline void rt6_probe(struct fib6_info *rt)
57627097255SYOSHIFUJI Hideaki {
57727097255SYOSHIFUJI Hideaki }
57827097255SYOSHIFUJI Hideaki #endif
57927097255SYOSHIFUJI Hideaki 
5801da177e4SLinus Torvalds /*
581554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
5821da177e4SLinus Torvalds  */
5838d1c802bSDavid Ahern static inline int rt6_check_dev(struct fib6_info *rt, int oif)
5841da177e4SLinus Torvalds {
585ad1601aeSDavid Ahern 	const struct net_device *dev = rt->fib6_nh.fib_nh_dev;
5865e670d84SDavid Ahern 
587161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
588554cfb7eSYOSHIFUJI Hideaki 		return 2;
589554cfb7eSYOSHIFUJI Hideaki 	return 0;
5901da177e4SLinus Torvalds }
5911da177e4SLinus Torvalds 
5928d1c802bSDavid Ahern static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
5931da177e4SLinus Torvalds {
594afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5955e670d84SDavid Ahern 	struct neighbour *neigh;
596f2c31e32SEric Dumazet 
59793c2fb25SDavid Ahern 	if (rt->fib6_flags & RTF_NONEXTHOP ||
5982b2450caSDavid Ahern 	    !rt->fib6_nh.fib_nh_has_gw)
599afc154e9SHannes Frederic Sowa 		return RT6_NUD_SUCCEED;
600145a3621SYOSHIFUJI Hideaki / 吉藤英明 
601145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
602ad1601aeSDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev,
603ad1601aeSDavid Ahern 					  &rt->fib6_nh.fib_nh_gw6);
604145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
605145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
606554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
607afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
608398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
609a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
610afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6117e980569SJiri Benc 		else
6127e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
613398bcbebSYOSHIFUJI Hideaki #endif
614145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
615afc154e9SHannes Frederic Sowa 	} else {
616afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6177e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
618a5a81f0bSPaul Marks 	}
619145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
620145a3621SYOSHIFUJI Hideaki / 吉藤英明 
621a5a81f0bSPaul Marks 	return ret;
6221da177e4SLinus Torvalds }
6231da177e4SLinus Torvalds 
6248d1c802bSDavid Ahern static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
625554cfb7eSYOSHIFUJI Hideaki {
626a5a81f0bSPaul Marks 	int m;
6274d0c5911SYOSHIFUJI Hideaki 
6284d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
62977d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
630afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
631ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
63293c2fb25SDavid Ahern 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
633ebacaaa0SYOSHIFUJI Hideaki #endif
634afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE) {
635afc154e9SHannes Frederic Sowa 		int n = rt6_check_neigh(rt);
636afc154e9SHannes Frederic Sowa 		if (n < 0)
637afc154e9SHannes Frederic Sowa 			return n;
638afc154e9SHannes Frederic Sowa 	}
639554cfb7eSYOSHIFUJI Hideaki 	return m;
640554cfb7eSYOSHIFUJI Hideaki }
641554cfb7eSYOSHIFUJI Hideaki 
6428d1c802bSDavid Ahern static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
6438d1c802bSDavid Ahern 				   int *mpri, struct fib6_info *match,
644afc154e9SHannes Frederic Sowa 				   bool *do_rr)
645554cfb7eSYOSHIFUJI Hideaki {
646554cfb7eSYOSHIFUJI Hideaki 	int m;
647afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
64835103d11SAndy Gospodarek 
649ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
6508067bb8cSIdo Schimmel 		goto out;
6518067bb8cSIdo Schimmel 
652ad1601aeSDavid Ahern 	if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) &&
653ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
654d5d32e4bSDavid Ahern 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
65535103d11SAndy Gospodarek 		goto out;
656554cfb7eSYOSHIFUJI Hideaki 
65714895687SDavid Ahern 	if (fib6_check_expired(rt))
658f11e6659SDavid S. Miller 		goto out;
659554cfb7eSYOSHIFUJI Hideaki 
660554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
6617e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
662afc154e9SHannes Frederic Sowa 		match_do_rr = true;
663afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6647e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
665f11e6659SDavid S. Miller 		goto out;
6661da177e4SLinus Torvalds 	}
667f11e6659SDavid S. Miller 
668afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
669afc154e9SHannes Frederic Sowa 		rt6_probe(rt);
670afc154e9SHannes Frederic Sowa 
6717e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
672afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
673afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
674afc154e9SHannes Frederic Sowa 		*mpri = m;
675afc154e9SHannes Frederic Sowa 		match = rt;
676afc154e9SHannes Frederic Sowa 	}
677f11e6659SDavid S. Miller out:
678f11e6659SDavid S. Miller 	return match;
6791da177e4SLinus Torvalds }
6801da177e4SLinus Torvalds 
6818d1c802bSDavid Ahern static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
6828d1c802bSDavid Ahern 				     struct fib6_info *leaf,
6838d1c802bSDavid Ahern 				     struct fib6_info *rr_head,
684afc154e9SHannes Frederic Sowa 				     u32 metric, int oif, int strict,
685afc154e9SHannes Frederic Sowa 				     bool *do_rr)
686f11e6659SDavid S. Miller {
6878d1c802bSDavid Ahern 	struct fib6_info *rt, *match, *cont;
688f11e6659SDavid S. Miller 	int mpri = -1;
689f11e6659SDavid S. Miller 
690f11e6659SDavid S. Miller 	match = NULL;
6919fbdcfafSSteffen Klassert 	cont = NULL;
6928fb11a9aSDavid Ahern 	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
69393c2fb25SDavid Ahern 		if (rt->fib6_metric != metric) {
6949fbdcfafSSteffen Klassert 			cont = rt;
6959fbdcfafSSteffen Klassert 			break;
6969fbdcfafSSteffen Klassert 		}
6979fbdcfafSSteffen Klassert 
698afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
6999fbdcfafSSteffen Klassert 	}
7009fbdcfafSSteffen Klassert 
70166f5d6ceSWei Wang 	for (rt = leaf; rt && rt != rr_head;
7028fb11a9aSDavid Ahern 	     rt = rcu_dereference(rt->fib6_next)) {
70393c2fb25SDavid Ahern 		if (rt->fib6_metric != metric) {
7049fbdcfafSSteffen Klassert 			cont = rt;
7059fbdcfafSSteffen Klassert 			break;
7069fbdcfafSSteffen Klassert 		}
7079fbdcfafSSteffen Klassert 
7089fbdcfafSSteffen Klassert 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7099fbdcfafSSteffen Klassert 	}
7109fbdcfafSSteffen Klassert 
7119fbdcfafSSteffen Klassert 	if (match || !cont)
7129fbdcfafSSteffen Klassert 		return match;
7139fbdcfafSSteffen Klassert 
7148fb11a9aSDavid Ahern 	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
715afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
716f11e6659SDavid S. Miller 
717f11e6659SDavid S. Miller 	return match;
718f11e6659SDavid S. Miller }
719f11e6659SDavid S. Miller 
7208d1c802bSDavid Ahern static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
7218d1040e8SWei Wang 				   int oif, int strict)
722f11e6659SDavid S. Miller {
7238d1c802bSDavid Ahern 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
7248d1c802bSDavid Ahern 	struct fib6_info *match, *rt0;
725afc154e9SHannes Frederic Sowa 	bool do_rr = false;
72617ecf590SWei Wang 	int key_plen;
727f11e6659SDavid S. Miller 
728421842edSDavid Ahern 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
729421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
7308d1040e8SWei Wang 
73166f5d6ceSWei Wang 	rt0 = rcu_dereference(fn->rr_ptr);
732f11e6659SDavid S. Miller 	if (!rt0)
73366f5d6ceSWei Wang 		rt0 = leaf;
734f11e6659SDavid S. Miller 
73517ecf590SWei Wang 	/* Double check to make sure fn is not an intermediate node
73617ecf590SWei Wang 	 * and fn->leaf does not points to its child's leaf
73717ecf590SWei Wang 	 * (This might happen if all routes under fn are deleted from
73817ecf590SWei Wang 	 * the tree and fib6_repair_tree() is called on the node.)
73917ecf590SWei Wang 	 */
74093c2fb25SDavid Ahern 	key_plen = rt0->fib6_dst.plen;
74117ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES
74293c2fb25SDavid Ahern 	if (rt0->fib6_src.plen)
74393c2fb25SDavid Ahern 		key_plen = rt0->fib6_src.plen;
74417ecf590SWei Wang #endif
74517ecf590SWei Wang 	if (fn->fn_bit != key_plen)
746421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
74717ecf590SWei Wang 
74893c2fb25SDavid Ahern 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
749afc154e9SHannes Frederic Sowa 			     &do_rr);
750f11e6659SDavid S. Miller 
751afc154e9SHannes Frederic Sowa 	if (do_rr) {
7528fb11a9aSDavid Ahern 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
753f11e6659SDavid S. Miller 
754554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
75593c2fb25SDavid Ahern 		if (!next || next->fib6_metric != rt0->fib6_metric)
7568d1040e8SWei Wang 			next = leaf;
757f11e6659SDavid S. Miller 
75866f5d6ceSWei Wang 		if (next != rt0) {
75993c2fb25SDavid Ahern 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
76066f5d6ceSWei Wang 			/* make sure next is not being deleted from the tree */
76193c2fb25SDavid Ahern 			if (next->fib6_node)
76266f5d6ceSWei Wang 				rcu_assign_pointer(fn->rr_ptr, next);
76393c2fb25SDavid Ahern 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
76466f5d6ceSWei Wang 		}
765554cfb7eSYOSHIFUJI Hideaki 	}
766554cfb7eSYOSHIFUJI Hideaki 
767421842edSDavid Ahern 	return match ? match : net->ipv6.fib6_null_entry;
7681da177e4SLinus Torvalds }
7691da177e4SLinus Torvalds 
7708d1c802bSDavid Ahern static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
7718b9df265SMartin KaFai Lau {
7722b2450caSDavid Ahern 	return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw;
7738b9df265SMartin KaFai Lau }
7748b9df265SMartin KaFai Lau 
77570ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
77670ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
777b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
77870ceb4f5SYOSHIFUJI Hideaki {
779c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
78070ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
78170ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
78270ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
7834bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
7848d1c802bSDavid Ahern 	struct fib6_info *rt;
78570ceb4f5SYOSHIFUJI Hideaki 
78670ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
78770ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
78870ceb4f5SYOSHIFUJI Hideaki 	}
78970ceb4f5SYOSHIFUJI Hideaki 
79070ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
79170ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
79270ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
79370ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
79470ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
79570ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
79670ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
79770ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
79870ceb4f5SYOSHIFUJI Hideaki 		}
79970ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
80070ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
80170ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
80270ceb4f5SYOSHIFUJI Hideaki 		}
80370ceb4f5SYOSHIFUJI Hideaki 	}
80470ceb4f5SYOSHIFUJI Hideaki 
80570ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
80670ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
8073933fc95SJens Rosenboom 		return -EINVAL;
80870ceb4f5SYOSHIFUJI Hideaki 
8094bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
81070ceb4f5SYOSHIFUJI Hideaki 
81170ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
81270ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
81370ceb4f5SYOSHIFUJI Hideaki 	else {
81470ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
81570ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
81670ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
81770ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
81870ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
81970ceb4f5SYOSHIFUJI Hideaki 	}
82070ceb4f5SYOSHIFUJI Hideaki 
821f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
822afb1d4b5SDavid Ahern 		rt = rt6_get_dflt_router(net, gwaddr, dev);
823f104a567SDuan Jiong 	else
824f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
825830218c1SDavid Ahern 					gwaddr, dev);
82670ceb4f5SYOSHIFUJI Hideaki 
82770ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
828afb1d4b5SDavid Ahern 		ip6_del_rt(net, rt);
82970ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
83070ceb4f5SYOSHIFUJI Hideaki 	}
83170ceb4f5SYOSHIFUJI Hideaki 
83270ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
833830218c1SDavid Ahern 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
834830218c1SDavid Ahern 					dev, pref);
83570ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
83693c2fb25SDavid Ahern 		rt->fib6_flags = RTF_ROUTEINFO |
83793c2fb25SDavid Ahern 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
83870ceb4f5SYOSHIFUJI Hideaki 
83970ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8401716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
84114895687SDavid Ahern 			fib6_clean_expires(rt);
8421716a961SGao feng 		else
84314895687SDavid Ahern 			fib6_set_expires(rt, jiffies + HZ * lifetime);
8441716a961SGao feng 
84593531c67SDavid Ahern 		fib6_info_release(rt);
84670ceb4f5SYOSHIFUJI Hideaki 	}
84770ceb4f5SYOSHIFUJI Hideaki 	return 0;
84870ceb4f5SYOSHIFUJI Hideaki }
84970ceb4f5SYOSHIFUJI Hideaki #endif
85070ceb4f5SYOSHIFUJI Hideaki 
851ae90d867SDavid Ahern /*
852ae90d867SDavid Ahern  *	Misc support functions
853ae90d867SDavid Ahern  */
854ae90d867SDavid Ahern 
855ae90d867SDavid Ahern /* called with rcu_lock held */
8568d1c802bSDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
857ae90d867SDavid Ahern {
858ad1601aeSDavid Ahern 	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
859ae90d867SDavid Ahern 
86093c2fb25SDavid Ahern 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
861ae90d867SDavid Ahern 		/* for copies of local routes, dst->dev needs to be the
862ae90d867SDavid Ahern 		 * device if it is a master device, the master device if
863ae90d867SDavid Ahern 		 * device is enslaved, and the loopback as the default
864ae90d867SDavid Ahern 		 */
865ae90d867SDavid Ahern 		if (netif_is_l3_slave(dev) &&
86693c2fb25SDavid Ahern 		    !rt6_need_strict(&rt->fib6_dst.addr))
867ae90d867SDavid Ahern 			dev = l3mdev_master_dev_rcu(dev);
868ae90d867SDavid Ahern 		else if (!netif_is_l3_master(dev))
869ae90d867SDavid Ahern 			dev = dev_net(dev)->loopback_dev;
870ae90d867SDavid Ahern 		/* last case is netif_is_l3_master(dev) is true in which
871ae90d867SDavid Ahern 		 * case we want dev returned to be dev
872ae90d867SDavid Ahern 		 */
873ae90d867SDavid Ahern 	}
874ae90d867SDavid Ahern 
875ae90d867SDavid Ahern 	return dev;
876ae90d867SDavid Ahern }
877ae90d867SDavid Ahern 
8786edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = {
8796edb3c96SDavid Ahern 	[RTN_UNSPEC]	= 0,
8806edb3c96SDavid Ahern 	[RTN_UNICAST]	= 0,
8816edb3c96SDavid Ahern 	[RTN_LOCAL]	= 0,
8826edb3c96SDavid Ahern 	[RTN_BROADCAST]	= 0,
8836edb3c96SDavid Ahern 	[RTN_ANYCAST]	= 0,
8846edb3c96SDavid Ahern 	[RTN_MULTICAST]	= 0,
8856edb3c96SDavid Ahern 	[RTN_BLACKHOLE]	= -EINVAL,
8866edb3c96SDavid Ahern 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
8876edb3c96SDavid Ahern 	[RTN_PROHIBIT]	= -EACCES,
8886edb3c96SDavid Ahern 	[RTN_THROW]	= -EAGAIN,
8896edb3c96SDavid Ahern 	[RTN_NAT]	= -EINVAL,
8906edb3c96SDavid Ahern 	[RTN_XRESOLVE]	= -EINVAL,
8916edb3c96SDavid Ahern };
8926edb3c96SDavid Ahern 
8936edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type)
8946edb3c96SDavid Ahern {
8956edb3c96SDavid Ahern 	return fib6_prop[fib6_type];
8966edb3c96SDavid Ahern }
8976edb3c96SDavid Ahern 
8988d1c802bSDavid Ahern static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
8993b6761d1SDavid Ahern {
9003b6761d1SDavid Ahern 	unsigned short flags = 0;
9013b6761d1SDavid Ahern 
9023b6761d1SDavid Ahern 	if (rt->dst_nocount)
9033b6761d1SDavid Ahern 		flags |= DST_NOCOUNT;
9043b6761d1SDavid Ahern 	if (rt->dst_nopolicy)
9053b6761d1SDavid Ahern 		flags |= DST_NOPOLICY;
9063b6761d1SDavid Ahern 	if (rt->dst_host)
9073b6761d1SDavid Ahern 		flags |= DST_HOST;
9083b6761d1SDavid Ahern 
9093b6761d1SDavid Ahern 	return flags;
9103b6761d1SDavid Ahern }
9113b6761d1SDavid Ahern 
9128d1c802bSDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
9136edb3c96SDavid Ahern {
9146edb3c96SDavid Ahern 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
9156edb3c96SDavid Ahern 
9166edb3c96SDavid Ahern 	switch (ort->fib6_type) {
9176edb3c96SDavid Ahern 	case RTN_BLACKHOLE:
9186edb3c96SDavid Ahern 		rt->dst.output = dst_discard_out;
9196edb3c96SDavid Ahern 		rt->dst.input = dst_discard;
9206edb3c96SDavid Ahern 		break;
9216edb3c96SDavid Ahern 	case RTN_PROHIBIT:
9226edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_prohibit_out;
9236edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_prohibit;
9246edb3c96SDavid Ahern 		break;
9256edb3c96SDavid Ahern 	case RTN_THROW:
9266edb3c96SDavid Ahern 	case RTN_UNREACHABLE:
9276edb3c96SDavid Ahern 	default:
9286edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_discard_out;
9296edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_discard;
9306edb3c96SDavid Ahern 		break;
9316edb3c96SDavid Ahern 	}
9326edb3c96SDavid Ahern }
9336edb3c96SDavid Ahern 
9348d1c802bSDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
9356edb3c96SDavid Ahern {
93693c2fb25SDavid Ahern 	if (ort->fib6_flags & RTF_REJECT) {
9376edb3c96SDavid Ahern 		ip6_rt_init_dst_reject(rt, ort);
9386edb3c96SDavid Ahern 		return;
9396edb3c96SDavid Ahern 	}
9406edb3c96SDavid Ahern 
9416edb3c96SDavid Ahern 	rt->dst.error = 0;
9426edb3c96SDavid Ahern 	rt->dst.output = ip6_output;
9436edb3c96SDavid Ahern 
944d23c4b63SHangbin Liu 	if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
9456edb3c96SDavid Ahern 		rt->dst.input = ip6_input;
94693c2fb25SDavid Ahern 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
9476edb3c96SDavid Ahern 		rt->dst.input = ip6_mc_input;
9486edb3c96SDavid Ahern 	} else {
9496edb3c96SDavid Ahern 		rt->dst.input = ip6_forward;
9506edb3c96SDavid Ahern 	}
9516edb3c96SDavid Ahern 
952ad1601aeSDavid Ahern 	if (ort->fib6_nh.fib_nh_lws) {
953ad1601aeSDavid Ahern 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
9546edb3c96SDavid Ahern 		lwtunnel_set_redirect(&rt->dst);
9556edb3c96SDavid Ahern 	}
9566edb3c96SDavid Ahern 
9576edb3c96SDavid Ahern 	rt->dst.lastuse = jiffies;
9586edb3c96SDavid Ahern }
9596edb3c96SDavid Ahern 
960e873e4b9SWei Wang /* Caller must already hold reference to @from */
9618d1c802bSDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
962ae90d867SDavid Ahern {
963ae90d867SDavid Ahern 	rt->rt6i_flags &= ~RTF_EXPIRES;
964a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, from);
965e1255ed4SDavid Ahern 	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
966ae90d867SDavid Ahern }
967ae90d867SDavid Ahern 
968e873e4b9SWei Wang /* Caller must already hold reference to @ort */
9698d1c802bSDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
970ae90d867SDavid Ahern {
971dcd1f572SDavid Ahern 	struct net_device *dev = fib6_info_nh_dev(ort);
972dcd1f572SDavid Ahern 
9736edb3c96SDavid Ahern 	ip6_rt_init_dst(rt, ort);
9746edb3c96SDavid Ahern 
97593c2fb25SDavid Ahern 	rt->rt6i_dst = ort->fib6_dst;
976dcd1f572SDavid Ahern 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
97793c2fb25SDavid Ahern 	rt->rt6i_flags = ort->fib6_flags;
9782b2450caSDavid Ahern 	if (ort->fib6_nh.fib_nh_has_gw) {
979ad1601aeSDavid Ahern 		rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
9802b2450caSDavid Ahern 		rt->rt6i_flags |= RTF_GATEWAY;
9812b2450caSDavid Ahern 	}
982ae90d867SDavid Ahern 	rt6_set_from(rt, ort);
983ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
98493c2fb25SDavid Ahern 	rt->rt6i_src = ort->fib6_src;
985ae90d867SDavid Ahern #endif
986ae90d867SDavid Ahern }
987ae90d867SDavid Ahern 
988a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
989a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
990a3c00e46SMartin KaFai Lau {
99166f5d6ceSWei Wang 	struct fib6_node *pn, *sn;
992a3c00e46SMartin KaFai Lau 	while (1) {
993a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
994a3c00e46SMartin KaFai Lau 			return NULL;
99566f5d6ceSWei Wang 		pn = rcu_dereference(fn->parent);
99666f5d6ceSWei Wang 		sn = FIB6_SUBTREE(pn);
99766f5d6ceSWei Wang 		if (sn && sn != fn)
9986454743bSDavid Ahern 			fn = fib6_node_lookup(sn, NULL, saddr);
999a3c00e46SMartin KaFai Lau 		else
1000a3c00e46SMartin KaFai Lau 			fn = pn;
1001a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
1002a3c00e46SMartin KaFai Lau 			return fn;
1003a3c00e46SMartin KaFai Lau 	}
1004a3c00e46SMartin KaFai Lau }
1005c71099acSThomas Graf 
100610585b43SDavid Ahern static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1007d3843fe5SWei Wang {
1008d3843fe5SWei Wang 	struct rt6_info *rt = *prt;
1009d3843fe5SWei Wang 
1010d3843fe5SWei Wang 	if (dst_hold_safe(&rt->dst))
1011d3843fe5SWei Wang 		return true;
101210585b43SDavid Ahern 	if (net) {
1013d3843fe5SWei Wang 		rt = net->ipv6.ip6_null_entry;
1014d3843fe5SWei Wang 		dst_hold(&rt->dst);
1015d3843fe5SWei Wang 	} else {
1016d3843fe5SWei Wang 		rt = NULL;
1017d3843fe5SWei Wang 	}
1018d3843fe5SWei Wang 	*prt = rt;
1019d3843fe5SWei Wang 	return false;
1020d3843fe5SWei Wang }
1021d3843fe5SWei Wang 
1022dec9b0e2SDavid Ahern /* called with rcu_lock held */
10238d1c802bSDavid Ahern static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1024dec9b0e2SDavid Ahern {
10253b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
1026ad1601aeSDavid Ahern 	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
1027dec9b0e2SDavid Ahern 	struct rt6_info *nrt;
1028dec9b0e2SDavid Ahern 
1029e873e4b9SWei Wang 	if (!fib6_info_hold_safe(rt))
10301c87e79aSXin Long 		goto fallback;
1031e873e4b9SWei Wang 
103293531c67SDavid Ahern 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
10331c87e79aSXin Long 	if (!nrt) {
1034e873e4b9SWei Wang 		fib6_info_release(rt);
10351c87e79aSXin Long 		goto fallback;
10361c87e79aSXin Long 	}
1037dec9b0e2SDavid Ahern 
10381c87e79aSXin Long 	ip6_rt_copy_init(nrt, rt);
10391c87e79aSXin Long 	return nrt;
10401c87e79aSXin Long 
10411c87e79aSXin Long fallback:
10421c87e79aSXin Long 	nrt = dev_net(dev)->ipv6.ip6_null_entry;
10431c87e79aSXin Long 	dst_hold(&nrt->dst);
1044dec9b0e2SDavid Ahern 	return nrt;
1045dec9b0e2SDavid Ahern }
1046dec9b0e2SDavid Ahern 
10478ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
10488ed67789SDaniel Lezcano 					     struct fib6_table *table,
1049b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
1050b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
1051b75cc8f9SDavid Ahern 					     int flags)
10521da177e4SLinus Torvalds {
10538d1c802bSDavid Ahern 	struct fib6_info *f6i;
10541da177e4SLinus Torvalds 	struct fib6_node *fn;
105523fb93a4SDavid Ahern 	struct rt6_info *rt;
10561da177e4SLinus Torvalds 
1057b6cdbc85SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1058b6cdbc85SDavid Ahern 		flags &= ~RT6_LOOKUP_F_IFACE;
1059b6cdbc85SDavid Ahern 
106066f5d6ceSWei Wang 	rcu_read_lock();
10616454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1062c71099acSThomas Graf restart:
106323fb93a4SDavid Ahern 	f6i = rcu_dereference(fn->leaf);
106423fb93a4SDavid Ahern 	if (!f6i) {
106523fb93a4SDavid Ahern 		f6i = net->ipv6.fib6_null_entry;
106666f5d6ceSWei Wang 	} else {
106723fb93a4SDavid Ahern 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
106866f5d6ceSWei Wang 				      fl6->flowi6_oif, flags);
106993c2fb25SDavid Ahern 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
10703b290a31SDavid Ahern 			f6i = fib6_multipath_select(net, f6i, fl6,
10713b290a31SDavid Ahern 						    fl6->flowi6_oif, skb,
10723b290a31SDavid Ahern 						    flags);
107366f5d6ceSWei Wang 	}
107423fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1075a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1076a3c00e46SMartin KaFai Lau 		if (fn)
1077a3c00e46SMartin KaFai Lau 			goto restart;
1078a3c00e46SMartin KaFai Lau 	}
10792b760fcfSWei Wang 
1080d4bea421SDavid Ahern 	trace_fib6_table_lookup(net, f6i, table, fl6);
1081d4bea421SDavid Ahern 
10824c9483b2SDavid S. Miller 	/* Search through exception table */
108323fb93a4SDavid Ahern 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
108423fb93a4SDavid Ahern 	if (rt) {
108510585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
1086d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
108723fb93a4SDavid Ahern 	} else if (f6i == net->ipv6.fib6_null_entry) {
1088dec9b0e2SDavid Ahern 		rt = net->ipv6.ip6_null_entry;
1089dec9b0e2SDavid Ahern 		dst_hold(&rt->dst);
109023fb93a4SDavid Ahern 	} else {
109123fb93a4SDavid Ahern 		rt = ip6_create_rt_rcu(f6i);
1092dec9b0e2SDavid Ahern 	}
1093d3843fe5SWei Wang 
109466f5d6ceSWei Wang 	rcu_read_unlock();
1095b811580dSDavid Ahern 
10961da177e4SLinus Torvalds 	return rt;
1097c71099acSThomas Graf }
1098c71099acSThomas Graf 
1099ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1100b75cc8f9SDavid Ahern 				   const struct sk_buff *skb, int flags)
1101ea6e574eSFlorian Westphal {
1102b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1103ea6e574eSFlorian Westphal }
1104ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
1105ea6e574eSFlorian Westphal 
11069acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1107b75cc8f9SDavid Ahern 			    const struct in6_addr *saddr, int oif,
1108b75cc8f9SDavid Ahern 			    const struct sk_buff *skb, int strict)
1109c71099acSThomas Graf {
11104c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11114c9483b2SDavid S. Miller 		.flowi6_oif = oif,
11124c9483b2SDavid S. Miller 		.daddr = *daddr,
1113c71099acSThomas Graf 	};
1114c71099acSThomas Graf 	struct dst_entry *dst;
111577d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1116c71099acSThomas Graf 
1117adaa70bbSThomas Graf 	if (saddr) {
11184c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1119adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1120adaa70bbSThomas Graf 	}
1121adaa70bbSThomas Graf 
1122b75cc8f9SDavid Ahern 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1123c71099acSThomas Graf 	if (dst->error == 0)
1124c71099acSThomas Graf 		return (struct rt6_info *) dst;
1125c71099acSThomas Graf 
1126c71099acSThomas Graf 	dst_release(dst);
1127c71099acSThomas Graf 
11281da177e4SLinus Torvalds 	return NULL;
11291da177e4SLinus Torvalds }
11307159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
11317159039aSYOSHIFUJI Hideaki 
1132c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
11331cfb71eeSWei Wang  * It takes new route entry, the addition fails by any reason the
11341cfb71eeSWei Wang  * route is released.
11351cfb71eeSWei Wang  * Caller must hold dst before calling it.
11361da177e4SLinus Torvalds  */
11371da177e4SLinus Torvalds 
11388d1c802bSDavid Ahern static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1139333c4301SDavid Ahern 			struct netlink_ext_ack *extack)
11401da177e4SLinus Torvalds {
11411da177e4SLinus Torvalds 	int err;
1142c71099acSThomas Graf 	struct fib6_table *table;
11431da177e4SLinus Torvalds 
114493c2fb25SDavid Ahern 	table = rt->fib6_table;
114566f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
1146d4ead6b3SDavid Ahern 	err = fib6_add(&table->tb6_root, rt, info, extack);
114766f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
11481da177e4SLinus Torvalds 
11491da177e4SLinus Torvalds 	return err;
11501da177e4SLinus Torvalds }
11511da177e4SLinus Torvalds 
11528d1c802bSDavid Ahern int ip6_ins_rt(struct net *net, struct fib6_info *rt)
115340e22e8fSThomas Graf {
1154afb1d4b5SDavid Ahern 	struct nl_info info = {	.nl_net = net, };
1155e715b6d3SFlorian Westphal 
1156d4ead6b3SDavid Ahern 	return __ip6_ins_rt(rt, &info, NULL);
115740e22e8fSThomas Graf }
115840e22e8fSThomas Graf 
11598d1c802bSDavid Ahern static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
116021efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
1161b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
11621da177e4SLinus Torvalds {
11634832c30dSDavid Ahern 	struct net_device *dev;
11641da177e4SLinus Torvalds 	struct rt6_info *rt;
11651da177e4SLinus Torvalds 
11661da177e4SLinus Torvalds 	/*
11671da177e4SLinus Torvalds 	 *	Clone the route.
11681da177e4SLinus Torvalds 	 */
11691da177e4SLinus Torvalds 
1170e873e4b9SWei Wang 	if (!fib6_info_hold_safe(ort))
1171e873e4b9SWei Wang 		return NULL;
1172e873e4b9SWei Wang 
11734832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(ort);
117493531c67SDavid Ahern 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1175e873e4b9SWei Wang 	if (!rt) {
1176e873e4b9SWei Wang 		fib6_info_release(ort);
117783a09abdSMartin KaFai Lau 		return NULL;
1178e873e4b9SWei Wang 	}
117983a09abdSMartin KaFai Lau 
118083a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
11818b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
118283a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
118383a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
118483a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
11858b9df265SMartin KaFai Lau 
11868b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
118793c2fb25SDavid Ahern 		if (ort->fib6_dst.plen != 128 &&
118893c2fb25SDavid Ahern 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
118958c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
11901da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
11911da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
11924e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
11931da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
11941da177e4SLinus Torvalds 		}
11951da177e4SLinus Torvalds #endif
119695a9a5baSYOSHIFUJI Hideaki 	}
119795a9a5baSYOSHIFUJI Hideaki 
1198299d9939SYOSHIFUJI Hideaki 	return rt;
1199299d9939SYOSHIFUJI Hideaki }
1200299d9939SYOSHIFUJI Hideaki 
12018d1c802bSDavid Ahern static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1202d52d3997SMartin KaFai Lau {
12033b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
12044832c30dSDavid Ahern 	struct net_device *dev;
1205d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
1206d52d3997SMartin KaFai Lau 
1207e873e4b9SWei Wang 	if (!fib6_info_hold_safe(rt))
1208e873e4b9SWei Wang 		return NULL;
1209e873e4b9SWei Wang 
12104832c30dSDavid Ahern 	rcu_read_lock();
12114832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(rt);
121293531c67SDavid Ahern 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
12134832c30dSDavid Ahern 	rcu_read_unlock();
1214e873e4b9SWei Wang 	if (!pcpu_rt) {
1215e873e4b9SWei Wang 		fib6_info_release(rt);
1216d52d3997SMartin KaFai Lau 		return NULL;
1217e873e4b9SWei Wang 	}
1218d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
1219d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1220d52d3997SMartin KaFai Lau 	return pcpu_rt;
1221d52d3997SMartin KaFai Lau }
1222d52d3997SMartin KaFai Lau 
122366f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */
12248d1c802bSDavid Ahern static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1225d52d3997SMartin KaFai Lau {
1226a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
1227d52d3997SMartin KaFai Lau 
1228d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1229d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1230d52d3997SMartin KaFai Lau 
1231d4ead6b3SDavid Ahern 	if (pcpu_rt)
123210585b43SDavid Ahern 		ip6_hold_safe(NULL, &pcpu_rt);
1233d3843fe5SWei Wang 
1234a73e4195SMartin KaFai Lau 	return pcpu_rt;
1235a73e4195SMartin KaFai Lau }
1236a73e4195SMartin KaFai Lau 
1237afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net,
12388d1c802bSDavid Ahern 					    struct fib6_info *rt)
1239a73e4195SMartin KaFai Lau {
1240a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1241d52d3997SMartin KaFai Lau 
1242d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1243d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
12449c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
12459c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1246d52d3997SMartin KaFai Lau 	}
1247d52d3997SMartin KaFai Lau 
1248a94b9367SWei Wang 	dst_hold(&pcpu_rt->dst);
1249a73e4195SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1250d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1251951f788aSEric Dumazet 	BUG_ON(prev);
1252a94b9367SWei Wang 
1253d52d3997SMartin KaFai Lau 	return pcpu_rt;
1254d52d3997SMartin KaFai Lau }
1255d52d3997SMartin KaFai Lau 
125635732d01SWei Wang /* exception hash table implementation
125735732d01SWei Wang  */
125835732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock);
125935732d01SWei Wang 
126035732d01SWei Wang /* Remove rt6_ex from hash table and free the memory
126135732d01SWei Wang  * Caller must hold rt6_exception_lock
126235732d01SWei Wang  */
126335732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
126435732d01SWei Wang 				 struct rt6_exception *rt6_ex)
126535732d01SWei Wang {
1266f5b51fe8SPaolo Abeni 	struct fib6_info *from;
1267b2427e67SColin Ian King 	struct net *net;
126881eb8447SWei Wang 
126935732d01SWei Wang 	if (!bucket || !rt6_ex)
127035732d01SWei Wang 		return;
1271b2427e67SColin Ian King 
1272b2427e67SColin Ian King 	net = dev_net(rt6_ex->rt6i->dst.dev);
1273f5b51fe8SPaolo Abeni 	net->ipv6.rt6_stats->fib_rt_cache--;
1274f5b51fe8SPaolo Abeni 
1275f5b51fe8SPaolo Abeni 	/* purge completely the exception to allow releasing the held resources:
1276f5b51fe8SPaolo Abeni 	 * some [sk] cache may keep the dst around for unlimited time
1277f5b51fe8SPaolo Abeni 	 */
1278f5b51fe8SPaolo Abeni 	from = rcu_dereference_protected(rt6_ex->rt6i->from,
1279f5b51fe8SPaolo Abeni 					 lockdep_is_held(&rt6_exception_lock));
1280f5b51fe8SPaolo Abeni 	rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1281f5b51fe8SPaolo Abeni 	fib6_info_release(from);
1282f5b51fe8SPaolo Abeni 	dst_dev_put(&rt6_ex->rt6i->dst);
1283f5b51fe8SPaolo Abeni 
128435732d01SWei Wang 	hlist_del_rcu(&rt6_ex->hlist);
128577634cc6SDavid Ahern 	dst_release(&rt6_ex->rt6i->dst);
128635732d01SWei Wang 	kfree_rcu(rt6_ex, rcu);
128735732d01SWei Wang 	WARN_ON_ONCE(!bucket->depth);
128835732d01SWei Wang 	bucket->depth--;
128935732d01SWei Wang }
129035732d01SWei Wang 
129135732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory
129235732d01SWei Wang  * Caller must hold rt6_exception_lock
129335732d01SWei Wang  */
129435732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
129535732d01SWei Wang {
129635732d01SWei Wang 	struct rt6_exception *rt6_ex, *oldest = NULL;
129735732d01SWei Wang 
129835732d01SWei Wang 	if (!bucket)
129935732d01SWei Wang 		return;
130035732d01SWei Wang 
130135732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
130235732d01SWei Wang 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
130335732d01SWei Wang 			oldest = rt6_ex;
130435732d01SWei Wang 	}
130535732d01SWei Wang 	rt6_remove_exception(bucket, oldest);
130635732d01SWei Wang }
130735732d01SWei Wang 
130835732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst,
130935732d01SWei Wang 			      const struct in6_addr *src)
131035732d01SWei Wang {
131135732d01SWei Wang 	static u32 seed __read_mostly;
131235732d01SWei Wang 	u32 val;
131335732d01SWei Wang 
131435732d01SWei Wang 	net_get_random_once(&seed, sizeof(seed));
131535732d01SWei Wang 	val = jhash(dst, sizeof(*dst), seed);
131635732d01SWei Wang 
131735732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
131835732d01SWei Wang 	if (src)
131935732d01SWei Wang 		val = jhash(src, sizeof(*src), val);
132035732d01SWei Wang #endif
132135732d01SWei Wang 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
132235732d01SWei Wang }
132335732d01SWei Wang 
132435732d01SWei Wang /* Helper function to find the cached rt in the hash table
132535732d01SWei Wang  * and update bucket pointer to point to the bucket for this
132635732d01SWei Wang  * (daddr, saddr) pair
132735732d01SWei Wang  * Caller must hold rt6_exception_lock
132835732d01SWei Wang  */
132935732d01SWei Wang static struct rt6_exception *
133035732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
133135732d01SWei Wang 			      const struct in6_addr *daddr,
133235732d01SWei Wang 			      const struct in6_addr *saddr)
133335732d01SWei Wang {
133435732d01SWei Wang 	struct rt6_exception *rt6_ex;
133535732d01SWei Wang 	u32 hval;
133635732d01SWei Wang 
133735732d01SWei Wang 	if (!(*bucket) || !daddr)
133835732d01SWei Wang 		return NULL;
133935732d01SWei Wang 
134035732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
134135732d01SWei Wang 	*bucket += hval;
134235732d01SWei Wang 
134335732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
134435732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
134535732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
134635732d01SWei Wang 
134735732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
134835732d01SWei Wang 		if (matched && saddr)
134935732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
135035732d01SWei Wang #endif
135135732d01SWei Wang 		if (matched)
135235732d01SWei Wang 			return rt6_ex;
135335732d01SWei Wang 	}
135435732d01SWei Wang 	return NULL;
135535732d01SWei Wang }
135635732d01SWei Wang 
135735732d01SWei Wang /* Helper function to find the cached rt in the hash table
135835732d01SWei Wang  * and update bucket pointer to point to the bucket for this
135935732d01SWei Wang  * (daddr, saddr) pair
136035732d01SWei Wang  * Caller must hold rcu_read_lock()
136135732d01SWei Wang  */
136235732d01SWei Wang static struct rt6_exception *
136335732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
136435732d01SWei Wang 			 const struct in6_addr *daddr,
136535732d01SWei Wang 			 const struct in6_addr *saddr)
136635732d01SWei Wang {
136735732d01SWei Wang 	struct rt6_exception *rt6_ex;
136835732d01SWei Wang 	u32 hval;
136935732d01SWei Wang 
137035732d01SWei Wang 	WARN_ON_ONCE(!rcu_read_lock_held());
137135732d01SWei Wang 
137235732d01SWei Wang 	if (!(*bucket) || !daddr)
137335732d01SWei Wang 		return NULL;
137435732d01SWei Wang 
137535732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
137635732d01SWei Wang 	*bucket += hval;
137735732d01SWei Wang 
137835732d01SWei Wang 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
137935732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
138035732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
138135732d01SWei Wang 
138235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
138335732d01SWei Wang 		if (matched && saddr)
138435732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
138535732d01SWei Wang #endif
138635732d01SWei Wang 		if (matched)
138735732d01SWei Wang 			return rt6_ex;
138835732d01SWei Wang 	}
138935732d01SWei Wang 	return NULL;
139035732d01SWei Wang }
139135732d01SWei Wang 
13928d1c802bSDavid Ahern static unsigned int fib6_mtu(const struct fib6_info *rt)
139335732d01SWei Wang {
1394d4ead6b3SDavid Ahern 	unsigned int mtu;
1395d4ead6b3SDavid Ahern 
1396dcd1f572SDavid Ahern 	if (rt->fib6_pmtu) {
1397dcd1f572SDavid Ahern 		mtu = rt->fib6_pmtu;
1398dcd1f572SDavid Ahern 	} else {
1399dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
1400dcd1f572SDavid Ahern 		struct inet6_dev *idev;
1401dcd1f572SDavid Ahern 
1402dcd1f572SDavid Ahern 		rcu_read_lock();
1403dcd1f572SDavid Ahern 		idev = __in6_dev_get(dev);
1404dcd1f572SDavid Ahern 		mtu = idev->cnf.mtu6;
1405dcd1f572SDavid Ahern 		rcu_read_unlock();
1406dcd1f572SDavid Ahern 	}
1407dcd1f572SDavid Ahern 
1408d4ead6b3SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1409d4ead6b3SDavid Ahern 
1410ad1601aeSDavid Ahern 	return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
1411d4ead6b3SDavid Ahern }
1412d4ead6b3SDavid Ahern 
141335732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt,
14148d1c802bSDavid Ahern 				struct fib6_info *ort)
141535732d01SWei Wang {
14165e670d84SDavid Ahern 	struct net *net = dev_net(nrt->dst.dev);
141735732d01SWei Wang 	struct rt6_exception_bucket *bucket;
141835732d01SWei Wang 	struct in6_addr *src_key = NULL;
141935732d01SWei Wang 	struct rt6_exception *rt6_ex;
142035732d01SWei Wang 	int err = 0;
142135732d01SWei Wang 
142235732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
142335732d01SWei Wang 
142435732d01SWei Wang 	if (ort->exception_bucket_flushed) {
142535732d01SWei Wang 		err = -EINVAL;
142635732d01SWei Wang 		goto out;
142735732d01SWei Wang 	}
142835732d01SWei Wang 
142935732d01SWei Wang 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
143035732d01SWei Wang 					lockdep_is_held(&rt6_exception_lock));
143135732d01SWei Wang 	if (!bucket) {
143235732d01SWei Wang 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
143335732d01SWei Wang 				 GFP_ATOMIC);
143435732d01SWei Wang 		if (!bucket) {
143535732d01SWei Wang 			err = -ENOMEM;
143635732d01SWei Wang 			goto out;
143735732d01SWei Wang 		}
143835732d01SWei Wang 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
143935732d01SWei Wang 	}
144035732d01SWei Wang 
144135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
144235732d01SWei Wang 	/* rt6i_src.plen != 0 indicates ort is in subtree
144335732d01SWei Wang 	 * and exception table is indexed by a hash of
144435732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
144535732d01SWei Wang 	 * Otherwise, the exception table is indexed by
144635732d01SWei Wang 	 * a hash of only rt6i_dst.
144735732d01SWei Wang 	 */
144893c2fb25SDavid Ahern 	if (ort->fib6_src.plen)
144935732d01SWei Wang 		src_key = &nrt->rt6i_src.addr;
145035732d01SWei Wang #endif
1451f5bbe7eeSWei Wang 	/* rt6_mtu_change() might lower mtu on ort.
1452f5bbe7eeSWei Wang 	 * Only insert this exception route if its mtu
1453f5bbe7eeSWei Wang 	 * is less than ort's mtu value.
1454f5bbe7eeSWei Wang 	 */
1455d4ead6b3SDavid Ahern 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1456f5bbe7eeSWei Wang 		err = -EINVAL;
1457f5bbe7eeSWei Wang 		goto out;
1458f5bbe7eeSWei Wang 	}
145960006a48SWei Wang 
146035732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
146135732d01SWei Wang 					       src_key);
146235732d01SWei Wang 	if (rt6_ex)
146335732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
146435732d01SWei Wang 
146535732d01SWei Wang 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
146635732d01SWei Wang 	if (!rt6_ex) {
146735732d01SWei Wang 		err = -ENOMEM;
146835732d01SWei Wang 		goto out;
146935732d01SWei Wang 	}
147035732d01SWei Wang 	rt6_ex->rt6i = nrt;
147135732d01SWei Wang 	rt6_ex->stamp = jiffies;
147235732d01SWei Wang 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
147335732d01SWei Wang 	bucket->depth++;
147481eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache++;
147535732d01SWei Wang 
147635732d01SWei Wang 	if (bucket->depth > FIB6_MAX_DEPTH)
147735732d01SWei Wang 		rt6_exception_remove_oldest(bucket);
147835732d01SWei Wang 
147935732d01SWei Wang out:
148035732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
148135732d01SWei Wang 
148235732d01SWei Wang 	/* Update fn->fn_sernum to invalidate all cached dst */
1483b886d5f2SPaolo Abeni 	if (!err) {
148493c2fb25SDavid Ahern 		spin_lock_bh(&ort->fib6_table->tb6_lock);
14857aef6859SDavid Ahern 		fib6_update_sernum(net, ort);
148693c2fb25SDavid Ahern 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1487b886d5f2SPaolo Abeni 		fib6_force_start_gc(net);
1488b886d5f2SPaolo Abeni 	}
148935732d01SWei Wang 
149035732d01SWei Wang 	return err;
149135732d01SWei Wang }
149235732d01SWei Wang 
14938d1c802bSDavid Ahern void rt6_flush_exceptions(struct fib6_info *rt)
149435732d01SWei Wang {
149535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
149635732d01SWei Wang 	struct rt6_exception *rt6_ex;
149735732d01SWei Wang 	struct hlist_node *tmp;
149835732d01SWei Wang 	int i;
149935732d01SWei Wang 
150035732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
150135732d01SWei Wang 	/* Prevent rt6_insert_exception() to recreate the bucket list */
150235732d01SWei Wang 	rt->exception_bucket_flushed = 1;
150335732d01SWei Wang 
150435732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
150535732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
150635732d01SWei Wang 	if (!bucket)
150735732d01SWei Wang 		goto out;
150835732d01SWei Wang 
150935732d01SWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
151035732d01SWei Wang 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
151135732d01SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
151235732d01SWei Wang 		WARN_ON_ONCE(bucket->depth);
151335732d01SWei Wang 		bucket++;
151435732d01SWei Wang 	}
151535732d01SWei Wang 
151635732d01SWei Wang out:
151735732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
151835732d01SWei Wang }
151935732d01SWei Wang 
152035732d01SWei Wang /* Find cached rt in the hash table inside passed in rt
152135732d01SWei Wang  * Caller has to hold rcu_read_lock()
152235732d01SWei Wang  */
15238d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
152435732d01SWei Wang 					   struct in6_addr *daddr,
152535732d01SWei Wang 					   struct in6_addr *saddr)
152635732d01SWei Wang {
152735732d01SWei Wang 	struct rt6_exception_bucket *bucket;
152835732d01SWei Wang 	struct in6_addr *src_key = NULL;
152935732d01SWei Wang 	struct rt6_exception *rt6_ex;
153035732d01SWei Wang 	struct rt6_info *res = NULL;
153135732d01SWei Wang 
153235732d01SWei Wang 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
153335732d01SWei Wang 
153435732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
153535732d01SWei Wang 	/* rt6i_src.plen != 0 indicates rt is in subtree
153635732d01SWei Wang 	 * and exception table is indexed by a hash of
153735732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
153835732d01SWei Wang 	 * Otherwise, the exception table is indexed by
153935732d01SWei Wang 	 * a hash of only rt6i_dst.
154035732d01SWei Wang 	 */
154193c2fb25SDavid Ahern 	if (rt->fib6_src.plen)
154235732d01SWei Wang 		src_key = saddr;
154335732d01SWei Wang #endif
154435732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
154535732d01SWei Wang 
154635732d01SWei Wang 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
154735732d01SWei Wang 		res = rt6_ex->rt6i;
154835732d01SWei Wang 
154935732d01SWei Wang 	return res;
155035732d01SWei Wang }
155135732d01SWei Wang 
155235732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */
155323fb93a4SDavid Ahern static int rt6_remove_exception_rt(struct rt6_info *rt)
155435732d01SWei Wang {
155535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
155635732d01SWei Wang 	struct in6_addr *src_key = NULL;
155735732d01SWei Wang 	struct rt6_exception *rt6_ex;
15588a14e46fSDavid Ahern 	struct fib6_info *from;
155935732d01SWei Wang 	int err;
156035732d01SWei Wang 
1561091311deSEric Dumazet 	from = rcu_dereference(rt->from);
156235732d01SWei Wang 	if (!from ||
1563442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
156435732d01SWei Wang 		return -EINVAL;
156535732d01SWei Wang 
156635732d01SWei Wang 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
156735732d01SWei Wang 		return -ENOENT;
156835732d01SWei Wang 
156935732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
157035732d01SWei Wang 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
157135732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
157235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
157335732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
157435732d01SWei Wang 	 * and exception table is indexed by a hash of
157535732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
157635732d01SWei Wang 	 * Otherwise, the exception table is indexed by
157735732d01SWei Wang 	 * a hash of only rt6i_dst.
157835732d01SWei Wang 	 */
157993c2fb25SDavid Ahern 	if (from->fib6_src.plen)
158035732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
158135732d01SWei Wang #endif
158235732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
158335732d01SWei Wang 					       &rt->rt6i_dst.addr,
158435732d01SWei Wang 					       src_key);
158535732d01SWei Wang 	if (rt6_ex) {
158635732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
158735732d01SWei Wang 		err = 0;
158835732d01SWei Wang 	} else {
158935732d01SWei Wang 		err = -ENOENT;
159035732d01SWei Wang 	}
159135732d01SWei Wang 
159235732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
159335732d01SWei Wang 	return err;
159435732d01SWei Wang }
159535732d01SWei Wang 
159635732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and
159735732d01SWei Wang  * refresh its stamp
159835732d01SWei Wang  */
159935732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
160035732d01SWei Wang {
160135732d01SWei Wang 	struct rt6_exception_bucket *bucket;
160235732d01SWei Wang 	struct in6_addr *src_key = NULL;
160335732d01SWei Wang 	struct rt6_exception *rt6_ex;
1604193f3685SPaolo Abeni 	struct fib6_info *from;
160535732d01SWei Wang 
160635732d01SWei Wang 	rcu_read_lock();
1607193f3685SPaolo Abeni 	from = rcu_dereference(rt->from);
1608193f3685SPaolo Abeni 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1609193f3685SPaolo Abeni 		goto unlock;
1610193f3685SPaolo Abeni 
161135732d01SWei Wang 	bucket = rcu_dereference(from->rt6i_exception_bucket);
161235732d01SWei Wang 
161335732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
161435732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
161535732d01SWei Wang 	 * and exception table is indexed by a hash of
161635732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
161735732d01SWei Wang 	 * Otherwise, the exception table is indexed by
161835732d01SWei Wang 	 * a hash of only rt6i_dst.
161935732d01SWei Wang 	 */
162093c2fb25SDavid Ahern 	if (from->fib6_src.plen)
162135732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
162235732d01SWei Wang #endif
162335732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket,
162435732d01SWei Wang 					  &rt->rt6i_dst.addr,
162535732d01SWei Wang 					  src_key);
162635732d01SWei Wang 	if (rt6_ex)
162735732d01SWei Wang 		rt6_ex->stamp = jiffies;
162835732d01SWei Wang 
1629193f3685SPaolo Abeni unlock:
163035732d01SWei Wang 	rcu_read_unlock();
163135732d01SWei Wang }
163235732d01SWei Wang 
1633e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1634e9fa1495SStefano Brivio 					 struct rt6_info *rt, int mtu)
1635e9fa1495SStefano Brivio {
1636e9fa1495SStefano Brivio 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1637e9fa1495SStefano Brivio 	 * lowest MTU in the path: always allow updating the route PMTU to
1638e9fa1495SStefano Brivio 	 * reflect PMTU decreases.
1639e9fa1495SStefano Brivio 	 *
1640e9fa1495SStefano Brivio 	 * If the new MTU is higher, and the route PMTU is equal to the local
1641e9fa1495SStefano Brivio 	 * MTU, this means the old MTU is the lowest in the path, so allow
1642e9fa1495SStefano Brivio 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1643e9fa1495SStefano Brivio 	 * handle this.
1644e9fa1495SStefano Brivio 	 */
1645e9fa1495SStefano Brivio 
1646e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) >= mtu)
1647e9fa1495SStefano Brivio 		return true;
1648e9fa1495SStefano Brivio 
1649e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1650e9fa1495SStefano Brivio 		return true;
1651e9fa1495SStefano Brivio 
1652e9fa1495SStefano Brivio 	return false;
1653e9fa1495SStefano Brivio }
1654e9fa1495SStefano Brivio 
1655e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
16568d1c802bSDavid Ahern 				       struct fib6_info *rt, int mtu)
1657f5bbe7eeSWei Wang {
1658f5bbe7eeSWei Wang 	struct rt6_exception_bucket *bucket;
1659f5bbe7eeSWei Wang 	struct rt6_exception *rt6_ex;
1660f5bbe7eeSWei Wang 	int i;
1661f5bbe7eeSWei Wang 
1662f5bbe7eeSWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1663f5bbe7eeSWei Wang 					lockdep_is_held(&rt6_exception_lock));
1664f5bbe7eeSWei Wang 
1665e9fa1495SStefano Brivio 	if (!bucket)
1666e9fa1495SStefano Brivio 		return;
1667e9fa1495SStefano Brivio 
1668f5bbe7eeSWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1669f5bbe7eeSWei Wang 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1670f5bbe7eeSWei Wang 			struct rt6_info *entry = rt6_ex->rt6i;
1671e9fa1495SStefano Brivio 
1672e9fa1495SStefano Brivio 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1673d4ead6b3SDavid Ahern 			 * route), the metrics of its rt->from have already
1674f5bbe7eeSWei Wang 			 * been updated.
1675f5bbe7eeSWei Wang 			 */
1676d4ead6b3SDavid Ahern 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1677e9fa1495SStefano Brivio 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1678d4ead6b3SDavid Ahern 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1679f5bbe7eeSWei Wang 		}
1680f5bbe7eeSWei Wang 		bucket++;
1681f5bbe7eeSWei Wang 	}
1682f5bbe7eeSWei Wang }
1683f5bbe7eeSWei Wang 
1684b16cb459SWei Wang #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1685b16cb459SWei Wang 
16868d1c802bSDavid Ahern static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1687b16cb459SWei Wang 					struct in6_addr *gateway)
1688b16cb459SWei Wang {
1689b16cb459SWei Wang 	struct rt6_exception_bucket *bucket;
1690b16cb459SWei Wang 	struct rt6_exception *rt6_ex;
1691b16cb459SWei Wang 	struct hlist_node *tmp;
1692b16cb459SWei Wang 	int i;
1693b16cb459SWei Wang 
1694b16cb459SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1695b16cb459SWei Wang 		return;
1696b16cb459SWei Wang 
1697b16cb459SWei Wang 	spin_lock_bh(&rt6_exception_lock);
1698b16cb459SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1699b16cb459SWei Wang 				     lockdep_is_held(&rt6_exception_lock));
1700b16cb459SWei Wang 
1701b16cb459SWei Wang 	if (bucket) {
1702b16cb459SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1703b16cb459SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1704b16cb459SWei Wang 						  &bucket->chain, hlist) {
1705b16cb459SWei Wang 				struct rt6_info *entry = rt6_ex->rt6i;
1706b16cb459SWei Wang 
1707b16cb459SWei Wang 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1708b16cb459SWei Wang 				    RTF_CACHE_GATEWAY &&
1709b16cb459SWei Wang 				    ipv6_addr_equal(gateway,
1710b16cb459SWei Wang 						    &entry->rt6i_gateway)) {
1711b16cb459SWei Wang 					rt6_remove_exception(bucket, rt6_ex);
1712b16cb459SWei Wang 				}
1713b16cb459SWei Wang 			}
1714b16cb459SWei Wang 			bucket++;
1715b16cb459SWei Wang 		}
1716b16cb459SWei Wang 	}
1717b16cb459SWei Wang 
1718b16cb459SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
1719b16cb459SWei Wang }
1720b16cb459SWei Wang 
1721c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1722c757faa8SWei Wang 				      struct rt6_exception *rt6_ex,
1723c757faa8SWei Wang 				      struct fib6_gc_args *gc_args,
1724c757faa8SWei Wang 				      unsigned long now)
1725c757faa8SWei Wang {
1726c757faa8SWei Wang 	struct rt6_info *rt = rt6_ex->rt6i;
1727c757faa8SWei Wang 
17281859bac0SPaolo Abeni 	/* we are pruning and obsoleting aged-out and non gateway exceptions
17291859bac0SPaolo Abeni 	 * even if others have still references to them, so that on next
17301859bac0SPaolo Abeni 	 * dst_check() such references can be dropped.
17311859bac0SPaolo Abeni 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
17321859bac0SPaolo Abeni 	 * expired, independently from their aging, as per RFC 8201 section 4
17331859bac0SPaolo Abeni 	 */
173431afeb42SWei Wang 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
173531afeb42SWei Wang 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1736c757faa8SWei Wang 			RT6_TRACE("aging clone %p\n", rt);
1737c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1738c757faa8SWei Wang 			return;
173931afeb42SWei Wang 		}
174031afeb42SWei Wang 	} else if (time_after(jiffies, rt->dst.expires)) {
174131afeb42SWei Wang 		RT6_TRACE("purging expired route %p\n", rt);
174231afeb42SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
174331afeb42SWei Wang 		return;
174431afeb42SWei Wang 	}
174531afeb42SWei Wang 
174631afeb42SWei Wang 	if (rt->rt6i_flags & RTF_GATEWAY) {
1747c757faa8SWei Wang 		struct neighbour *neigh;
1748c757faa8SWei Wang 		__u8 neigh_flags = 0;
1749c757faa8SWei Wang 
17501bfa26ffSEric Dumazet 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
17511bfa26ffSEric Dumazet 		if (neigh)
1752c757faa8SWei Wang 			neigh_flags = neigh->flags;
17531bfa26ffSEric Dumazet 
1754c757faa8SWei Wang 		if (!(neigh_flags & NTF_ROUTER)) {
1755c757faa8SWei Wang 			RT6_TRACE("purging route %p via non-router but gateway\n",
1756c757faa8SWei Wang 				  rt);
1757c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1758c757faa8SWei Wang 			return;
1759c757faa8SWei Wang 		}
1760c757faa8SWei Wang 	}
176131afeb42SWei Wang 
1762c757faa8SWei Wang 	gc_args->more++;
1763c757faa8SWei Wang }
1764c757faa8SWei Wang 
17658d1c802bSDavid Ahern void rt6_age_exceptions(struct fib6_info *rt,
1766c757faa8SWei Wang 			struct fib6_gc_args *gc_args,
1767c757faa8SWei Wang 			unsigned long now)
1768c757faa8SWei Wang {
1769c757faa8SWei Wang 	struct rt6_exception_bucket *bucket;
1770c757faa8SWei Wang 	struct rt6_exception *rt6_ex;
1771c757faa8SWei Wang 	struct hlist_node *tmp;
1772c757faa8SWei Wang 	int i;
1773c757faa8SWei Wang 
1774c757faa8SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1775c757faa8SWei Wang 		return;
1776c757faa8SWei Wang 
17771bfa26ffSEric Dumazet 	rcu_read_lock_bh();
17781bfa26ffSEric Dumazet 	spin_lock(&rt6_exception_lock);
1779c757faa8SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1780c757faa8SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
1781c757faa8SWei Wang 
1782c757faa8SWei Wang 	if (bucket) {
1783c757faa8SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1784c757faa8SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1785c757faa8SWei Wang 						  &bucket->chain, hlist) {
1786c757faa8SWei Wang 				rt6_age_examine_exception(bucket, rt6_ex,
1787c757faa8SWei Wang 							  gc_args, now);
1788c757faa8SWei Wang 			}
1789c757faa8SWei Wang 			bucket++;
1790c757faa8SWei Wang 		}
1791c757faa8SWei Wang 	}
17921bfa26ffSEric Dumazet 	spin_unlock(&rt6_exception_lock);
17931bfa26ffSEric Dumazet 	rcu_read_unlock_bh();
1794c757faa8SWei Wang }
1795c757faa8SWei Wang 
17961d053da9SDavid Ahern /* must be called with rcu lock held */
17971d053da9SDavid Ahern struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
17981d053da9SDavid Ahern 				    int oif, struct flowi6 *fl6, int strict)
17991da177e4SLinus Torvalds {
1800367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
18018d1c802bSDavid Ahern 	struct fib6_info *f6i;
18021da177e4SLinus Torvalds 
18036454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1804367efcb9SMartin KaFai Lau 	saved_fn = fn;
18051da177e4SLinus Torvalds 
1806ca254490SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1807ca254490SDavid Ahern 		oif = 0;
1808ca254490SDavid Ahern 
1809a3c00e46SMartin KaFai Lau redo_rt6_select:
181023fb93a4SDavid Ahern 	f6i = rt6_select(net, fn, oif, strict);
181123fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1812a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1813a3c00e46SMartin KaFai Lau 		if (fn)
1814a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1815367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1816367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1817367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1818367efcb9SMartin KaFai Lau 			fn = saved_fn;
1819367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1820367efcb9SMartin KaFai Lau 		}
1821a3c00e46SMartin KaFai Lau 	}
1822a3c00e46SMartin KaFai Lau 
1823d4bea421SDavid Ahern 	trace_fib6_table_lookup(net, f6i, table, fl6);
1824d52d3997SMartin KaFai Lau 
18251d053da9SDavid Ahern 	return f6i;
18261d053da9SDavid Ahern }
18271d053da9SDavid Ahern 
18281d053da9SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
18291d053da9SDavid Ahern 			       int oif, struct flowi6 *fl6,
18301d053da9SDavid Ahern 			       const struct sk_buff *skb, int flags)
18311d053da9SDavid Ahern {
18321d053da9SDavid Ahern 	struct fib6_info *f6i;
18331d053da9SDavid Ahern 	struct rt6_info *rt;
18341d053da9SDavid Ahern 	int strict = 0;
18351d053da9SDavid Ahern 
18361d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IFACE;
18371d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
18381d053da9SDavid Ahern 	if (net->ipv6.devconf_all->forwarding == 0)
18391d053da9SDavid Ahern 		strict |= RT6_LOOKUP_F_REACHABLE;
18401d053da9SDavid Ahern 
18411d053da9SDavid Ahern 	rcu_read_lock();
18421d053da9SDavid Ahern 
18431d053da9SDavid Ahern 	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
18441d053da9SDavid Ahern 	if (f6i->fib6_nsiblings)
18451d053da9SDavid Ahern 		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
18461d053da9SDavid Ahern 
184723fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1848421842edSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
184966f5d6ceSWei Wang 		rcu_read_unlock();
1850d3843fe5SWei Wang 		dst_hold(&rt->dst);
1851d3843fe5SWei Wang 		return rt;
1852d3843fe5SWei Wang 	}
185323fb93a4SDavid Ahern 
185423fb93a4SDavid Ahern 	/*Search through exception table */
185523fb93a4SDavid Ahern 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
185623fb93a4SDavid Ahern 	if (rt) {
185710585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
18581da177e4SLinus Torvalds 			dst_use_noref(&rt->dst, jiffies);
1859d4ead6b3SDavid Ahern 
186066f5d6ceSWei Wang 		rcu_read_unlock();
1861d52d3997SMartin KaFai Lau 		return rt;
18623da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
18632b2450caSDavid Ahern 			    !f6i->fib6_nh.fib_nh_has_gw)) {
18643da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
18653da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
18663da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
18673da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
18683da59bd9SMartin KaFai Lau 		 */
18693da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
18703da59bd9SMartin KaFai Lau 
187123fb93a4SDavid Ahern 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
1872d52d3997SMartin KaFai Lau 
18734d85cd0cSDavid Ahern 		rcu_read_unlock();
18743da59bd9SMartin KaFai Lau 
18751cfb71eeSWei Wang 		if (uncached_rt) {
18761cfb71eeSWei Wang 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
18771cfb71eeSWei Wang 			 * No need for another dst_hold()
18781cfb71eeSWei Wang 			 */
18798d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
188081eb8447SWei Wang 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
18811cfb71eeSWei Wang 		} else {
18823da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
18833da59bd9SMartin KaFai Lau 			dst_hold(&uncached_rt->dst);
18841cfb71eeSWei Wang 		}
1885b811580dSDavid Ahern 
18863da59bd9SMartin KaFai Lau 		return uncached_rt;
1887d52d3997SMartin KaFai Lau 	} else {
1888d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1889d52d3997SMartin KaFai Lau 
1890d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1891d52d3997SMartin KaFai Lau 
1892951f788aSEric Dumazet 		local_bh_disable();
189323fb93a4SDavid Ahern 		pcpu_rt = rt6_get_pcpu_route(f6i);
1894d52d3997SMartin KaFai Lau 
189593531c67SDavid Ahern 		if (!pcpu_rt)
189623fb93a4SDavid Ahern 			pcpu_rt = rt6_make_pcpu_route(net, f6i);
189793531c67SDavid Ahern 
1898951f788aSEric Dumazet 		local_bh_enable();
1899951f788aSEric Dumazet 		rcu_read_unlock();
1900d4bea421SDavid Ahern 
1901d52d3997SMartin KaFai Lau 		return pcpu_rt;
1902d52d3997SMartin KaFai Lau 	}
1903c71099acSThomas Graf }
19049ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route);
1905c71099acSThomas Graf 
1906b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net,
1907b75cc8f9SDavid Ahern 					    struct fib6_table *table,
1908b75cc8f9SDavid Ahern 					    struct flowi6 *fl6,
1909b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
1910b75cc8f9SDavid Ahern 					    int flags)
19114acad72dSPavel Emelyanov {
1912b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
19134acad72dSPavel Emelyanov }
19144acad72dSPavel Emelyanov 
1915d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net,
191672331bc0SShmulik Ladkani 					 struct net_device *dev,
1917b75cc8f9SDavid Ahern 					 struct flowi6 *fl6,
1918b75cc8f9SDavid Ahern 					 const struct sk_buff *skb,
1919b75cc8f9SDavid Ahern 					 int flags)
192072331bc0SShmulik Ladkani {
192172331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
192272331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
192372331bc0SShmulik Ladkani 
1924b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
192572331bc0SShmulik Ladkani }
1926d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
192772331bc0SShmulik Ladkani 
192823aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb,
19295e5d6fedSRoopa Prabhu 				  struct flow_keys *keys,
19305e5d6fedSRoopa Prabhu 				  struct flow_keys *flkeys)
193123aebdacSJakub Sitnicki {
193223aebdacSJakub Sitnicki 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
193323aebdacSJakub Sitnicki 	const struct ipv6hdr *key_iph = outer_iph;
19345e5d6fedSRoopa Prabhu 	struct flow_keys *_flkeys = flkeys;
193523aebdacSJakub Sitnicki 	const struct ipv6hdr *inner_iph;
193623aebdacSJakub Sitnicki 	const struct icmp6hdr *icmph;
193723aebdacSJakub Sitnicki 	struct ipv6hdr _inner_iph;
1938cea67a2dSEric Dumazet 	struct icmp6hdr _icmph;
193923aebdacSJakub Sitnicki 
194023aebdacSJakub Sitnicki 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
194123aebdacSJakub Sitnicki 		goto out;
194223aebdacSJakub Sitnicki 
1943cea67a2dSEric Dumazet 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1944cea67a2dSEric Dumazet 				   sizeof(_icmph), &_icmph);
1945cea67a2dSEric Dumazet 	if (!icmph)
1946cea67a2dSEric Dumazet 		goto out;
1947cea67a2dSEric Dumazet 
194823aebdacSJakub Sitnicki 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
194923aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
195023aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
195123aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
195223aebdacSJakub Sitnicki 		goto out;
195323aebdacSJakub Sitnicki 
195423aebdacSJakub Sitnicki 	inner_iph = skb_header_pointer(skb,
195523aebdacSJakub Sitnicki 				       skb_transport_offset(skb) + sizeof(*icmph),
195623aebdacSJakub Sitnicki 				       sizeof(_inner_iph), &_inner_iph);
195723aebdacSJakub Sitnicki 	if (!inner_iph)
195823aebdacSJakub Sitnicki 		goto out;
195923aebdacSJakub Sitnicki 
196023aebdacSJakub Sitnicki 	key_iph = inner_iph;
19615e5d6fedSRoopa Prabhu 	_flkeys = NULL;
196223aebdacSJakub Sitnicki out:
19635e5d6fedSRoopa Prabhu 	if (_flkeys) {
19645e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
19655e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
19665e5d6fedSRoopa Prabhu 		keys->tags.flow_label = _flkeys->tags.flow_label;
19675e5d6fedSRoopa Prabhu 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
19685e5d6fedSRoopa Prabhu 	} else {
196923aebdacSJakub Sitnicki 		keys->addrs.v6addrs.src = key_iph->saddr;
197023aebdacSJakub Sitnicki 		keys->addrs.v6addrs.dst = key_iph->daddr;
1971fa1be7e0SMichal Kubecek 		keys->tags.flow_label = ip6_flowlabel(key_iph);
197223aebdacSJakub Sitnicki 		keys->basic.ip_proto = key_iph->nexthdr;
197323aebdacSJakub Sitnicki 	}
19745e5d6fedSRoopa Prabhu }
197523aebdacSJakub Sitnicki 
197623aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */
1977b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1978b4bac172SDavid Ahern 		       const struct sk_buff *skb, struct flow_keys *flkeys)
197923aebdacSJakub Sitnicki {
198023aebdacSJakub Sitnicki 	struct flow_keys hash_keys;
19819a2a537aSDavid Ahern 	u32 mhash;
198223aebdacSJakub Sitnicki 
1983bbfa047aSDavid S. Miller 	switch (ip6_multipath_hash_policy(net)) {
1984b4bac172SDavid Ahern 	case 0:
19856f74b6c2SDavid Ahern 		memset(&hash_keys, 0, sizeof(hash_keys));
19866f74b6c2SDavid Ahern 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
19879a2a537aSDavid Ahern 		if (skb) {
19885e5d6fedSRoopa Prabhu 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
19899a2a537aSDavid Ahern 		} else {
19909a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
19919a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
1992fa1be7e0SMichal Kubecek 			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
19939a2a537aSDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
199423aebdacSJakub Sitnicki 		}
1995b4bac172SDavid Ahern 		break;
1996b4bac172SDavid Ahern 	case 1:
1997b4bac172SDavid Ahern 		if (skb) {
1998b4bac172SDavid Ahern 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1999b4bac172SDavid Ahern 			struct flow_keys keys;
2000b4bac172SDavid Ahern 
2001b4bac172SDavid Ahern 			/* short-circuit if we already have L4 hash present */
2002b4bac172SDavid Ahern 			if (skb->l4_hash)
2003b4bac172SDavid Ahern 				return skb_get_hash_raw(skb) >> 1;
2004b4bac172SDavid Ahern 
2005b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2006b4bac172SDavid Ahern 
2007b4bac172SDavid Ahern                         if (!flkeys) {
2008b4bac172SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2009b4bac172SDavid Ahern 				flkeys = &keys;
2010b4bac172SDavid Ahern 			}
2011b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2012b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2013b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2014b4bac172SDavid Ahern 			hash_keys.ports.src = flkeys->ports.src;
2015b4bac172SDavid Ahern 			hash_keys.ports.dst = flkeys->ports.dst;
2016b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2017b4bac172SDavid Ahern 		} else {
2018b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2019b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2020b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2021b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2022b4bac172SDavid Ahern 			hash_keys.ports.src = fl6->fl6_sport;
2023b4bac172SDavid Ahern 			hash_keys.ports.dst = fl6->fl6_dport;
2024b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2025b4bac172SDavid Ahern 		}
2026b4bac172SDavid Ahern 		break;
2027b4bac172SDavid Ahern 	}
20289a2a537aSDavid Ahern 	mhash = flow_hash_from_keys(&hash_keys);
202923aebdacSJakub Sitnicki 
20309a2a537aSDavid Ahern 	return mhash >> 1;
203123aebdacSJakub Sitnicki }
203223aebdacSJakub Sitnicki 
2033c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
2034c71099acSThomas Graf {
2035b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2036c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
2037adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2038904af04dSJiri Benc 	struct ip_tunnel_info *tun_info;
20394c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
2040e0d56fddSDavid Ahern 		.flowi6_iif = skb->dev->ifindex,
20414c9483b2SDavid S. Miller 		.daddr = iph->daddr,
20424c9483b2SDavid S. Miller 		.saddr = iph->saddr,
20436502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
20444c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
20454c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
2046c71099acSThomas Graf 	};
20475e5d6fedSRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
2048adaa70bbSThomas Graf 
2049904af04dSJiri Benc 	tun_info = skb_tunnel_info(skb);
205046fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2051904af04dSJiri Benc 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
20525e5d6fedSRoopa Prabhu 
20535e5d6fedSRoopa Prabhu 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
20545e5d6fedSRoopa Prabhu 		flkeys = &_flkeys;
20555e5d6fedSRoopa Prabhu 
205623aebdacSJakub Sitnicki 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2057b4bac172SDavid Ahern 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
205806e9d040SJiri Benc 	skb_dst_drop(skb);
2059b75cc8f9SDavid Ahern 	skb_dst_set(skb,
2060b75cc8f9SDavid Ahern 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2061c71099acSThomas Graf }
2062c71099acSThomas Graf 
2063b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net,
2064b75cc8f9SDavid Ahern 					     struct fib6_table *table,
2065b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
2066b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2067b75cc8f9SDavid Ahern 					     int flags)
2068c71099acSThomas Graf {
2069b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2070c71099acSThomas Graf }
2071c71099acSThomas Graf 
20726f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
20736f21c96aSPaolo Abeni 					 struct flowi6 *fl6, int flags)
2074c71099acSThomas Graf {
2075d46a9d67SDavid Ahern 	bool any_src;
2076c71099acSThomas Graf 
20773ede0bbcSRobert Shearman 	if (ipv6_addr_type(&fl6->daddr) &
20783ede0bbcSRobert Shearman 	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
20794c1feac5SDavid Ahern 		struct dst_entry *dst;
20804c1feac5SDavid Ahern 
20814c1feac5SDavid Ahern 		dst = l3mdev_link_scope_lookup(net, fl6);
2082ca254490SDavid Ahern 		if (dst)
2083ca254490SDavid Ahern 			return dst;
20844c1feac5SDavid Ahern 	}
2085ca254490SDavid Ahern 
20861fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
20874dc27d1cSDavid McCullough 
2088d46a9d67SDavid Ahern 	any_src = ipv6_addr_any(&fl6->saddr);
2089741a11d9SDavid Ahern 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2090d46a9d67SDavid Ahern 	    (fl6->flowi6_oif && any_src))
209177d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
2092c71099acSThomas Graf 
2093d46a9d67SDavid Ahern 	if (!any_src)
2094adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
20950c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
20960c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2097adaa70bbSThomas Graf 
2098b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
20991da177e4SLinus Torvalds }
21006f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags);
21011da177e4SLinus Torvalds 
21022774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
210314e50e57SDavid S. Miller {
21045c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
21051dbe3252SWei Wang 	struct net_device *loopback_dev = net->loopback_dev;
210614e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
210714e50e57SDavid S. Miller 
21081dbe3252SWei Wang 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
210962cf27e5SSteffen Klassert 		       DST_OBSOLETE_DEAD, 0);
211014e50e57SDavid S. Miller 	if (rt) {
21110a1f5962SMartin KaFai Lau 		rt6_info_init(rt);
211281eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
21130a1f5962SMartin KaFai Lau 
2114d8d1f30bSChangli Gao 		new = &rt->dst;
211514e50e57SDavid S. Miller 		new->__use = 1;
2116352e512cSHerbert Xu 		new->input = dst_discard;
2117ede2059dSEric W. Biederman 		new->output = dst_discard_out;
211814e50e57SDavid S. Miller 
2119defb3519SDavid S. Miller 		dst_copy_metrics(new, &ort->dst);
212014e50e57SDavid S. Miller 
21211dbe3252SWei Wang 		rt->rt6i_idev = in6_dev_get(loopback_dev);
21224e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
21230a1f5962SMartin KaFai Lau 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
212414e50e57SDavid S. Miller 
212514e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
212614e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
212714e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
212814e50e57SDavid S. Miller #endif
212914e50e57SDavid S. Miller 	}
213014e50e57SDavid S. Miller 
213169ead7afSDavid S. Miller 	dst_release(dst_orig);
213269ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
213314e50e57SDavid S. Miller }
213414e50e57SDavid S. Miller 
21351da177e4SLinus Torvalds /*
21361da177e4SLinus Torvalds  *	Destination cache support functions
21371da177e4SLinus Torvalds  */
21381da177e4SLinus Torvalds 
21398d1c802bSDavid Ahern static bool fib6_check(struct fib6_info *f6i, u32 cookie)
21403da59bd9SMartin KaFai Lau {
214136143645SSteffen Klassert 	u32 rt_cookie = 0;
2142c5cff856SWei Wang 
21438ae86971SDavid Ahern 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
214493531c67SDavid Ahern 		return false;
214593531c67SDavid Ahern 
214693531c67SDavid Ahern 	if (fib6_check_expired(f6i))
214793531c67SDavid Ahern 		return false;
214893531c67SDavid Ahern 
214993531c67SDavid Ahern 	return true;
215093531c67SDavid Ahern }
215193531c67SDavid Ahern 
2152a68886a6SDavid Ahern static struct dst_entry *rt6_check(struct rt6_info *rt,
2153a68886a6SDavid Ahern 				   struct fib6_info *from,
2154a68886a6SDavid Ahern 				   u32 cookie)
21553da59bd9SMartin KaFai Lau {
2156c5cff856SWei Wang 	u32 rt_cookie = 0;
2157c5cff856SWei Wang 
2158a68886a6SDavid Ahern 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
215993531c67SDavid Ahern 	    rt_cookie != cookie)
21603da59bd9SMartin KaFai Lau 		return NULL;
21613da59bd9SMartin KaFai Lau 
21623da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
21633da59bd9SMartin KaFai Lau 		return NULL;
21643da59bd9SMartin KaFai Lau 
21653da59bd9SMartin KaFai Lau 	return &rt->dst;
21663da59bd9SMartin KaFai Lau }
21673da59bd9SMartin KaFai Lau 
2168a68886a6SDavid Ahern static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2169a68886a6SDavid Ahern 					    struct fib6_info *from,
2170a68886a6SDavid Ahern 					    u32 cookie)
21713da59bd9SMartin KaFai Lau {
21725973fb1eSMartin KaFai Lau 	if (!__rt6_check_expired(rt) &&
21735973fb1eSMartin KaFai Lau 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2174a68886a6SDavid Ahern 	    fib6_check(from, cookie))
21753da59bd9SMartin KaFai Lau 		return &rt->dst;
21763da59bd9SMartin KaFai Lau 	else
21773da59bd9SMartin KaFai Lau 		return NULL;
21783da59bd9SMartin KaFai Lau }
21793da59bd9SMartin KaFai Lau 
21801da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
21811da177e4SLinus Torvalds {
2182a87b7dc9SDavid Ahern 	struct dst_entry *dst_ret;
2183a68886a6SDavid Ahern 	struct fib6_info *from;
21841da177e4SLinus Torvalds 	struct rt6_info *rt;
21851da177e4SLinus Torvalds 
2186a87b7dc9SDavid Ahern 	rt = container_of(dst, struct rt6_info, dst);
2187a87b7dc9SDavid Ahern 
2188a87b7dc9SDavid Ahern 	rcu_read_lock();
21891da177e4SLinus Torvalds 
21906f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
21916f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
21926f3118b5SNicolas Dichtel 	 * into this function always.
21936f3118b5SNicolas Dichtel 	 */
2194e3bc10bdSHannes Frederic Sowa 
2195a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
21964b32b5adSMartin KaFai Lau 
2197a68886a6SDavid Ahern 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2198a68886a6SDavid Ahern 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2199a68886a6SDavid Ahern 		dst_ret = rt6_dst_from_check(rt, from, cookie);
22003da59bd9SMartin KaFai Lau 	else
2201a68886a6SDavid Ahern 		dst_ret = rt6_check(rt, from, cookie);
2202a87b7dc9SDavid Ahern 
2203a87b7dc9SDavid Ahern 	rcu_read_unlock();
2204a87b7dc9SDavid Ahern 
2205a87b7dc9SDavid Ahern 	return dst_ret;
22061da177e4SLinus Torvalds }
22071da177e4SLinus Torvalds 
22081da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
22091da177e4SLinus Torvalds {
22101da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
22111da177e4SLinus Torvalds 
22121da177e4SLinus Torvalds 	if (rt) {
221354c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
2214c3c14da0SDavid Ahern 			rcu_read_lock();
221554c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
221693531c67SDavid Ahern 				rt6_remove_exception_rt(rt);
221754c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
22181da177e4SLinus Torvalds 			}
2219c3c14da0SDavid Ahern 			rcu_read_unlock();
222054c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
222154c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
222254c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
222354c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
222454c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
222554c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
22261da177e4SLinus Torvalds }
22271da177e4SLinus Torvalds 
22281da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
22291da177e4SLinus Torvalds {
22301da177e4SLinus Torvalds 	struct rt6_info *rt;
22311da177e4SLinus Torvalds 
22323ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
22331da177e4SLinus Torvalds 
2234adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
22351da177e4SLinus Torvalds 	if (rt) {
22368a14e46fSDavid Ahern 		rcu_read_lock();
22371eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
223893531c67SDavid Ahern 			rt6_remove_exception_rt(rt);
2239c5cff856SWei Wang 		} else {
2240a68886a6SDavid Ahern 			struct fib6_info *from;
2241c5cff856SWei Wang 			struct fib6_node *fn;
2242c5cff856SWei Wang 
2243a68886a6SDavid Ahern 			from = rcu_dereference(rt->from);
2244a68886a6SDavid Ahern 			if (from) {
2245a68886a6SDavid Ahern 				fn = rcu_dereference(from->fib6_node);
2246c5cff856SWei Wang 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2247c5cff856SWei Wang 					fn->fn_sernum = -1;
2248a68886a6SDavid Ahern 			}
22491da177e4SLinus Torvalds 		}
22501da177e4SLinus Torvalds 		rcu_read_unlock();
22511da177e4SLinus Torvalds 	}
22521da177e4SLinus Torvalds }
22531da177e4SLinus Torvalds 
22546a3e030fSDavid Ahern static void rt6_update_expires(struct rt6_info *rt0, int timeout)
22556a3e030fSDavid Ahern {
2256a68886a6SDavid Ahern 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2257a68886a6SDavid Ahern 		struct fib6_info *from;
2258a68886a6SDavid Ahern 
2259a68886a6SDavid Ahern 		rcu_read_lock();
2260a68886a6SDavid Ahern 		from = rcu_dereference(rt0->from);
2261a68886a6SDavid Ahern 		if (from)
2262a68886a6SDavid Ahern 			rt0->dst.expires = from->expires;
2263a68886a6SDavid Ahern 		rcu_read_unlock();
2264a68886a6SDavid Ahern 	}
22656a3e030fSDavid Ahern 
22666a3e030fSDavid Ahern 	dst_set_expires(&rt0->dst, timeout);
22676a3e030fSDavid Ahern 	rt0->rt6i_flags |= RTF_EXPIRES;
22686700c270SDavid S. Miller }
22691da177e4SLinus Torvalds 
227045e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
227145e4fd26SMartin KaFai Lau {
227245e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
227345e4fd26SMartin KaFai Lau 
2274d4ead6b3SDavid Ahern 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
227545e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
227645e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
227745e4fd26SMartin KaFai Lau }
227845e4fd26SMartin KaFai Lau 
22790d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
22800d3f6d29SMartin KaFai Lau {
22810d3f6d29SMartin KaFai Lau 	return !(rt->rt6i_flags & RTF_CACHE) &&
22821490ed2aSPaolo Abeni 		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
22830d3f6d29SMartin KaFai Lau }
22840d3f6d29SMartin KaFai Lau 
228545e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
228645e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
22871da177e4SLinus Torvalds {
22880dec879fSJulian Anastasov 	const struct in6_addr *daddr, *saddr;
22891da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
22901da177e4SLinus Torvalds 
229119bda36cSXin Long 	if (dst_metric_locked(dst, RTAX_MTU))
229219bda36cSXin Long 		return;
229319bda36cSXin Long 
229445e4fd26SMartin KaFai Lau 	if (iph) {
229545e4fd26SMartin KaFai Lau 		daddr = &iph->daddr;
229645e4fd26SMartin KaFai Lau 		saddr = &iph->saddr;
229745e4fd26SMartin KaFai Lau 	} else if (sk) {
229845e4fd26SMartin KaFai Lau 		daddr = &sk->sk_v6_daddr;
229945e4fd26SMartin KaFai Lau 		saddr = &inet6_sk(sk)->saddr;
230045e4fd26SMartin KaFai Lau 	} else {
23010dec879fSJulian Anastasov 		daddr = NULL;
23020dec879fSJulian Anastasov 		saddr = NULL;
23031da177e4SLinus Torvalds 	}
23040dec879fSJulian Anastasov 	dst_confirm_neigh(dst, daddr);
23050dec879fSJulian Anastasov 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
23060dec879fSJulian Anastasov 	if (mtu >= dst_mtu(dst))
23070dec879fSJulian Anastasov 		return;
23080dec879fSJulian Anastasov 
23090dec879fSJulian Anastasov 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
23100dec879fSJulian Anastasov 		rt6_do_update_pmtu(rt6, mtu);
23112b760fcfSWei Wang 		/* update rt6_ex->stamp for cache */
23122b760fcfSWei Wang 		if (rt6->rt6i_flags & RTF_CACHE)
23132b760fcfSWei Wang 			rt6_update_exception_stamp_rt(rt6);
23140dec879fSJulian Anastasov 	} else if (daddr) {
2315a68886a6SDavid Ahern 		struct fib6_info *from;
23160dec879fSJulian Anastasov 		struct rt6_info *nrt6;
23170dec879fSJulian Anastasov 
23184d85cd0cSDavid Ahern 		rcu_read_lock();
2319a68886a6SDavid Ahern 		from = rcu_dereference(rt6->from);
2320a68886a6SDavid Ahern 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
232145e4fd26SMartin KaFai Lau 		if (nrt6) {
232245e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
2323a68886a6SDavid Ahern 			if (rt6_insert_exception(nrt6, from))
23242b760fcfSWei Wang 				dst_release_immediate(&nrt6->dst);
232545e4fd26SMartin KaFai Lau 		}
2326a68886a6SDavid Ahern 		rcu_read_unlock();
232745e4fd26SMartin KaFai Lau 	}
232845e4fd26SMartin KaFai Lau }
232945e4fd26SMartin KaFai Lau 
233045e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
233145e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
233245e4fd26SMartin KaFai Lau {
233345e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
23341da177e4SLinus Torvalds }
23351da177e4SLinus Torvalds 
233642ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2337e2d118a1SLorenzo Colitti 		     int oif, u32 mark, kuid_t uid)
233881aded24SDavid S. Miller {
233981aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
234081aded24SDavid S. Miller 	struct dst_entry *dst;
2341dc92095dSMaciej Żenczykowski 	struct flowi6 fl6 = {
2342dc92095dSMaciej Żenczykowski 		.flowi6_oif = oif,
2343dc92095dSMaciej Żenczykowski 		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2344dc92095dSMaciej Żenczykowski 		.daddr = iph->daddr,
2345dc92095dSMaciej Żenczykowski 		.saddr = iph->saddr,
2346dc92095dSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
2347dc92095dSMaciej Żenczykowski 		.flowi6_uid = uid,
2348dc92095dSMaciej Żenczykowski 	};
234981aded24SDavid S. Miller 
235081aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
235181aded24SDavid S. Miller 	if (!dst->error)
235245e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
235381aded24SDavid S. Miller 	dst_release(dst);
235481aded24SDavid S. Miller }
235581aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
235681aded24SDavid S. Miller 
235781aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
235881aded24SDavid S. Miller {
23597ddacfa5SDavid Ahern 	int oif = sk->sk_bound_dev_if;
236033c162a9SMartin KaFai Lau 	struct dst_entry *dst;
236133c162a9SMartin KaFai Lau 
23627ddacfa5SDavid Ahern 	if (!oif && skb->dev)
23637ddacfa5SDavid Ahern 		oif = l3mdev_master_ifindex(skb->dev);
23647ddacfa5SDavid Ahern 
23657ddacfa5SDavid Ahern 	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
236633c162a9SMartin KaFai Lau 
236733c162a9SMartin KaFai Lau 	dst = __sk_dst_get(sk);
236833c162a9SMartin KaFai Lau 	if (!dst || !dst->obsolete ||
236933c162a9SMartin KaFai Lau 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
237033c162a9SMartin KaFai Lau 		return;
237133c162a9SMartin KaFai Lau 
237233c162a9SMartin KaFai Lau 	bh_lock_sock(sk);
237333c162a9SMartin KaFai Lau 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
237433c162a9SMartin KaFai Lau 		ip6_datagram_dst_update(sk, false);
237533c162a9SMartin KaFai Lau 	bh_unlock_sock(sk);
237681aded24SDavid S. Miller }
237781aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
237881aded24SDavid S. Miller 
23797d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
23807d6850f7SAlexey Kodanev 			   const struct flowi6 *fl6)
23817d6850f7SAlexey Kodanev {
23827d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
23837d6850f7SAlexey Kodanev 	struct ipv6_pinfo *np = inet6_sk(sk);
23847d6850f7SAlexey Kodanev #endif
23857d6850f7SAlexey Kodanev 
23867d6850f7SAlexey Kodanev 	ip6_dst_store(sk, dst,
23877d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
23887d6850f7SAlexey Kodanev 		      &sk->sk_v6_daddr : NULL,
23897d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
23907d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
23917d6850f7SAlexey Kodanev 		      &np->saddr :
23927d6850f7SAlexey Kodanev #endif
23937d6850f7SAlexey Kodanev 		      NULL);
23947d6850f7SAlexey Kodanev }
23957d6850f7SAlexey Kodanev 
2396b55b76b2SDuan Jiong /* Handle redirects */
2397b55b76b2SDuan Jiong struct ip6rd_flowi {
2398b55b76b2SDuan Jiong 	struct flowi6 fl6;
2399b55b76b2SDuan Jiong 	struct in6_addr gateway;
2400b55b76b2SDuan Jiong };
2401b55b76b2SDuan Jiong 
2402b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
2403b55b76b2SDuan Jiong 					     struct fib6_table *table,
2404b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
2405b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2406b55b76b2SDuan Jiong 					     int flags)
2407b55b76b2SDuan Jiong {
2408b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
240923fb93a4SDavid Ahern 	struct rt6_info *ret = NULL, *rt_cache;
24108d1c802bSDavid Ahern 	struct fib6_info *rt;
2411b55b76b2SDuan Jiong 	struct fib6_node *fn;
2412b55b76b2SDuan Jiong 
2413b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
241467c408cfSAlexander Alemayhu 	 * check if the redirect has come from appropriate router.
2415b55b76b2SDuan Jiong 	 *
2416b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
2417b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
2418b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
2419b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
2420b55b76b2SDuan Jiong 	 * routes.
2421b55b76b2SDuan Jiong 	 */
2422b55b76b2SDuan Jiong 
242366f5d6ceSWei Wang 	rcu_read_lock();
24246454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2425b55b76b2SDuan Jiong restart:
242666f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
2427ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
24288067bb8cSIdo Schimmel 			continue;
242914895687SDavid Ahern 		if (fib6_check_expired(rt))
2430b55b76b2SDuan Jiong 			continue;
243193c2fb25SDavid Ahern 		if (rt->fib6_flags & RTF_REJECT)
2432b55b76b2SDuan Jiong 			break;
24332b2450caSDavid Ahern 		if (!rt->fib6_nh.fib_nh_has_gw)
2434b55b76b2SDuan Jiong 			continue;
2435ad1601aeSDavid Ahern 		if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
2436b55b76b2SDuan Jiong 			continue;
24372b760fcfSWei Wang 		/* rt_cache's gateway might be different from its 'parent'
24382b760fcfSWei Wang 		 * in the case of an ip redirect.
24392b760fcfSWei Wang 		 * So we keep searching in the exception table if the gateway
24402b760fcfSWei Wang 		 * is different.
24412b760fcfSWei Wang 		 */
2442ad1601aeSDavid Ahern 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
24432b760fcfSWei Wang 			rt_cache = rt6_find_cached_rt(rt,
24442b760fcfSWei Wang 						      &fl6->daddr,
24452b760fcfSWei Wang 						      &fl6->saddr);
24462b760fcfSWei Wang 			if (rt_cache &&
24472b760fcfSWei Wang 			    ipv6_addr_equal(&rdfl->gateway,
24482b760fcfSWei Wang 					    &rt_cache->rt6i_gateway)) {
244923fb93a4SDavid Ahern 				ret = rt_cache;
24502b760fcfSWei Wang 				break;
24512b760fcfSWei Wang 			}
2452b55b76b2SDuan Jiong 			continue;
24532b760fcfSWei Wang 		}
2454b55b76b2SDuan Jiong 		break;
2455b55b76b2SDuan Jiong 	}
2456b55b76b2SDuan Jiong 
2457b55b76b2SDuan Jiong 	if (!rt)
2458421842edSDavid Ahern 		rt = net->ipv6.fib6_null_entry;
245993c2fb25SDavid Ahern 	else if (rt->fib6_flags & RTF_REJECT) {
246023fb93a4SDavid Ahern 		ret = net->ipv6.ip6_null_entry;
2461b0a1ba59SMartin KaFai Lau 		goto out;
2462b0a1ba59SMartin KaFai Lau 	}
2463b0a1ba59SMartin KaFai Lau 
2464421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
2465a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
2466a3c00e46SMartin KaFai Lau 		if (fn)
2467a3c00e46SMartin KaFai Lau 			goto restart;
2468b55b76b2SDuan Jiong 	}
2469a3c00e46SMartin KaFai Lau 
2470b0a1ba59SMartin KaFai Lau out:
247123fb93a4SDavid Ahern 	if (ret)
247210585b43SDavid Ahern 		ip6_hold_safe(net, &ret);
247323fb93a4SDavid Ahern 	else
247423fb93a4SDavid Ahern 		ret = ip6_create_rt_rcu(rt);
2475b55b76b2SDuan Jiong 
247666f5d6ceSWei Wang 	rcu_read_unlock();
2477b55b76b2SDuan Jiong 
2478b65f164dSPaolo Abeni 	trace_fib6_table_lookup(net, rt, table, fl6);
247923fb93a4SDavid Ahern 	return ret;
2480b55b76b2SDuan Jiong };
2481b55b76b2SDuan Jiong 
2482b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
2483b55b76b2SDuan Jiong 					    const struct flowi6 *fl6,
2484b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
2485b55b76b2SDuan Jiong 					    const struct in6_addr *gateway)
2486b55b76b2SDuan Jiong {
2487b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2488b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
2489b55b76b2SDuan Jiong 
2490b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
2491b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
2492b55b76b2SDuan Jiong 
2493b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2494b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
2495b55b76b2SDuan Jiong }
2496b55b76b2SDuan Jiong 
2497e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2498e2d118a1SLorenzo Colitti 		  kuid_t uid)
24993a5ad2eeSDavid S. Miller {
25003a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
25013a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
25021f7f10acSMaciej Żenczykowski 	struct flowi6 fl6 = {
25031f7f10acSMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25041f7f10acSMaciej Żenczykowski 		.flowi6_oif = oif,
25051f7f10acSMaciej Żenczykowski 		.flowi6_mark = mark,
25061f7f10acSMaciej Żenczykowski 		.daddr = iph->daddr,
25071f7f10acSMaciej Żenczykowski 		.saddr = iph->saddr,
25081f7f10acSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
25091f7f10acSMaciej Żenczykowski 		.flowi6_uid = uid,
25101f7f10acSMaciej Żenczykowski 	};
25113a5ad2eeSDavid S. Miller 
2512b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
25136700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
25143a5ad2eeSDavid S. Miller 	dst_release(dst);
25153a5ad2eeSDavid S. Miller }
25163a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
25173a5ad2eeSDavid S. Miller 
2518d456336dSMaciej Żenczykowski void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2519c92a59ecSDuan Jiong {
2520c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2521c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2522c92a59ecSDuan Jiong 	struct dst_entry *dst;
25230b26fb17SMaciej Żenczykowski 	struct flowi6 fl6 = {
25240b26fb17SMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25250b26fb17SMaciej Żenczykowski 		.flowi6_oif = oif,
25260b26fb17SMaciej Żenczykowski 		.daddr = msg->dest,
25270b26fb17SMaciej Żenczykowski 		.saddr = iph->daddr,
25280b26fb17SMaciej Żenczykowski 		.flowi6_uid = sock_net_uid(net, NULL),
25290b26fb17SMaciej Żenczykowski 	};
2530c92a59ecSDuan Jiong 
2531b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2532c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
2533c92a59ecSDuan Jiong 	dst_release(dst);
2534c92a59ecSDuan Jiong }
2535c92a59ecSDuan Jiong 
25363a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
25373a5ad2eeSDavid S. Miller {
2538e2d118a1SLorenzo Colitti 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2539e2d118a1SLorenzo Colitti 		     sk->sk_uid);
25403a5ad2eeSDavid S. Miller }
25413a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
25423a5ad2eeSDavid S. Miller 
25430dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
25441da177e4SLinus Torvalds {
25450dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
25460dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
25470dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
25480dbaee3bSDavid S. Miller 
25491da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
25501da177e4SLinus Torvalds 
25515578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
25525578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
25531da177e4SLinus Torvalds 
25541da177e4SLinus Torvalds 	/*
25551da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
25561da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
25571da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
25581da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
25591da177e4SLinus Torvalds 	 */
25601da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
25611da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
25621da177e4SLinus Torvalds 	return mtu;
25631da177e4SLinus Torvalds }
25641da177e4SLinus Torvalds 
2565ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
2566d33e4553SDavid S. Miller {
2567d33e4553SDavid S. Miller 	struct inet6_dev *idev;
2568d4ead6b3SDavid Ahern 	unsigned int mtu;
2569618f9bc7SSteffen Klassert 
25704b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
25714b32b5adSMartin KaFai Lau 	if (mtu)
25724b32b5adSMartin KaFai Lau 		goto out;
25734b32b5adSMartin KaFai Lau 
2574618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
2575d33e4553SDavid S. Miller 
2576d33e4553SDavid S. Miller 	rcu_read_lock();
2577d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
2578d33e4553SDavid S. Miller 	if (idev)
2579d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
2580d33e4553SDavid S. Miller 	rcu_read_unlock();
2581d33e4553SDavid S. Miller 
258230f78d8eSEric Dumazet out:
258314972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
258414972cbdSRoopa Prabhu 
258514972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2586d33e4553SDavid S. Miller }
2587d33e4553SDavid S. Miller 
2588901731b8SDavid Ahern /* MTU selection:
2589901731b8SDavid Ahern  * 1. mtu on route is locked - use it
2590901731b8SDavid Ahern  * 2. mtu from nexthop exception
2591901731b8SDavid Ahern  * 3. mtu from egress device
2592901731b8SDavid Ahern  *
2593901731b8SDavid Ahern  * based on ip6_dst_mtu_forward and exception logic of
2594901731b8SDavid Ahern  * rt6_find_cached_rt; called with rcu_read_lock
2595901731b8SDavid Ahern  */
2596901731b8SDavid Ahern u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2597901731b8SDavid Ahern 		      struct in6_addr *saddr)
2598901731b8SDavid Ahern {
2599901731b8SDavid Ahern 	struct rt6_exception_bucket *bucket;
2600901731b8SDavid Ahern 	struct rt6_exception *rt6_ex;
2601901731b8SDavid Ahern 	struct in6_addr *src_key;
2602901731b8SDavid Ahern 	struct inet6_dev *idev;
2603901731b8SDavid Ahern 	u32 mtu = 0;
2604901731b8SDavid Ahern 
2605901731b8SDavid Ahern 	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2606901731b8SDavid Ahern 		mtu = f6i->fib6_pmtu;
2607901731b8SDavid Ahern 		if (mtu)
2608901731b8SDavid Ahern 			goto out;
2609901731b8SDavid Ahern 	}
2610901731b8SDavid Ahern 
2611901731b8SDavid Ahern 	src_key = NULL;
2612901731b8SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
2613901731b8SDavid Ahern 	if (f6i->fib6_src.plen)
2614901731b8SDavid Ahern 		src_key = saddr;
2615901731b8SDavid Ahern #endif
2616901731b8SDavid Ahern 
2617901731b8SDavid Ahern 	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2618901731b8SDavid Ahern 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2619901731b8SDavid Ahern 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2620901731b8SDavid Ahern 		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2621901731b8SDavid Ahern 
2622901731b8SDavid Ahern 	if (likely(!mtu)) {
2623901731b8SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(f6i);
2624901731b8SDavid Ahern 
2625901731b8SDavid Ahern 		mtu = IPV6_MIN_MTU;
2626901731b8SDavid Ahern 		idev = __in6_dev_get(dev);
2627901731b8SDavid Ahern 		if (idev && idev->cnf.mtu6 > mtu)
2628901731b8SDavid Ahern 			mtu = idev->cnf.mtu6;
2629901731b8SDavid Ahern 	}
2630901731b8SDavid Ahern 
2631901731b8SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2632901731b8SDavid Ahern out:
2633901731b8SDavid Ahern 	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2634901731b8SDavid Ahern }
2635901731b8SDavid Ahern 
26363b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
263787a11578SDavid S. Miller 				  struct flowi6 *fl6)
26381da177e4SLinus Torvalds {
263987a11578SDavid S. Miller 	struct dst_entry *dst;
26401da177e4SLinus Torvalds 	struct rt6_info *rt;
26411da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
2642c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
26431da177e4SLinus Torvalds 
264438308473SDavid S. Miller 	if (unlikely(!idev))
2645122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
26461da177e4SLinus Torvalds 
2647ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
264838308473SDavid S. Miller 	if (unlikely(!rt)) {
26491da177e4SLinus Torvalds 		in6_dev_put(idev);
265087a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
26511da177e4SLinus Torvalds 		goto out;
26521da177e4SLinus Torvalds 	}
26531da177e4SLinus Torvalds 
26548e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
2655588753f1SBrendan McGrath 	rt->dst.input = ip6_input;
26568e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
2657550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
265887a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
26598e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
26608e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
266114edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
26621da177e4SLinus Torvalds 
26634c981e28SIdo Schimmel 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2664587fea74SWei Wang 	 * do proper release of the net_device
2665587fea74SWei Wang 	 */
2666587fea74SWei Wang 	rt6_uncached_list_add(rt);
266781eb8447SWei Wang 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
26681da177e4SLinus Torvalds 
266987a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
267087a11578SDavid S. Miller 
26711da177e4SLinus Torvalds out:
267287a11578SDavid S. Miller 	return dst;
26731da177e4SLinus Torvalds }
26741da177e4SLinus Torvalds 
2675569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
26761da177e4SLinus Torvalds {
267786393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
26787019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
26797019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
26807019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
26817019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
26827019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2683fc66f95cSEric Dumazet 	int entries;
26841da177e4SLinus Torvalds 
2685fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
268649a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2687fc66f95cSEric Dumazet 	    entries <= rt_max_size)
26881da177e4SLinus Torvalds 		goto out;
26891da177e4SLinus Torvalds 
26906891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
269114956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2692fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
2693fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
26947019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
26951da177e4SLinus Torvalds out:
26967019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2697fc66f95cSEric Dumazet 	return entries > rt_max_size;
26981da177e4SLinus Torvalds }
26991da177e4SLinus Torvalds 
27008c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net,
27018c14586fSDavid Ahern 					    struct fib6_config *cfg,
2702f4797b33SDavid Ahern 					    const struct in6_addr *gw_addr,
2703f4797b33SDavid Ahern 					    u32 tbid, int flags)
27048c14586fSDavid Ahern {
27058c14586fSDavid Ahern 	struct flowi6 fl6 = {
27068c14586fSDavid Ahern 		.flowi6_oif = cfg->fc_ifindex,
27078c14586fSDavid Ahern 		.daddr = *gw_addr,
27088c14586fSDavid Ahern 		.saddr = cfg->fc_prefsrc,
27098c14586fSDavid Ahern 	};
27108c14586fSDavid Ahern 	struct fib6_table *table;
27118c14586fSDavid Ahern 	struct rt6_info *rt;
27128c14586fSDavid Ahern 
2713f4797b33SDavid Ahern 	table = fib6_get_table(net, tbid);
27148c14586fSDavid Ahern 	if (!table)
27158c14586fSDavid Ahern 		return NULL;
27168c14586fSDavid Ahern 
27178c14586fSDavid Ahern 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
27188c14586fSDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
27198c14586fSDavid Ahern 
2720f4797b33SDavid Ahern 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2721b75cc8f9SDavid Ahern 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
27228c14586fSDavid Ahern 
27238c14586fSDavid Ahern 	/* if table lookup failed, fall back to full lookup */
27248c14586fSDavid Ahern 	if (rt == net->ipv6.ip6_null_entry) {
27258c14586fSDavid Ahern 		ip6_rt_put(rt);
27268c14586fSDavid Ahern 		rt = NULL;
27278c14586fSDavid Ahern 	}
27288c14586fSDavid Ahern 
27298c14586fSDavid Ahern 	return rt;
27308c14586fSDavid Ahern }
27318c14586fSDavid Ahern 
2732fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net,
2733fc1e64e1SDavid Ahern 				     struct fib6_config *cfg,
27349fbb704cSDavid Ahern 				     const struct net_device *dev,
2735fc1e64e1SDavid Ahern 				     struct netlink_ext_ack *extack)
2736fc1e64e1SDavid Ahern {
273744750f84SDavid Ahern 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2738fc1e64e1SDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2739fc1e64e1SDavid Ahern 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2740bf1dc8baSPaolo Abeni 	struct fib6_info *from;
2741fc1e64e1SDavid Ahern 	struct rt6_info *grt;
2742fc1e64e1SDavid Ahern 	int err;
2743fc1e64e1SDavid Ahern 
2744fc1e64e1SDavid Ahern 	err = 0;
2745fc1e64e1SDavid Ahern 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2746fc1e64e1SDavid Ahern 	if (grt) {
2747bf1dc8baSPaolo Abeni 		rcu_read_lock();
2748bf1dc8baSPaolo Abeni 		from = rcu_dereference(grt->from);
274958e354c0SDavid Ahern 		if (!grt->dst.error &&
27504ed591c8SDavid Ahern 		    /* ignore match if it is the default route */
2751bf1dc8baSPaolo Abeni 		    from && !ipv6_addr_any(&from->fib6_dst.addr) &&
275258e354c0SDavid Ahern 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
275344750f84SDavid Ahern 			NL_SET_ERR_MSG(extack,
275444750f84SDavid Ahern 				       "Nexthop has invalid gateway or device mismatch");
2755fc1e64e1SDavid Ahern 			err = -EINVAL;
2756fc1e64e1SDavid Ahern 		}
2757bf1dc8baSPaolo Abeni 		rcu_read_unlock();
2758fc1e64e1SDavid Ahern 
2759fc1e64e1SDavid Ahern 		ip6_rt_put(grt);
2760fc1e64e1SDavid Ahern 	}
2761fc1e64e1SDavid Ahern 
2762fc1e64e1SDavid Ahern 	return err;
2763fc1e64e1SDavid Ahern }
2764fc1e64e1SDavid Ahern 
27651edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net,
27661edce99fSDavid Ahern 			      struct fib6_config *cfg,
27671edce99fSDavid Ahern 			      struct net_device **_dev,
27681edce99fSDavid Ahern 			      struct inet6_dev **idev)
27691edce99fSDavid Ahern {
27701edce99fSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
27711edce99fSDavid Ahern 	struct net_device *dev = _dev ? *_dev : NULL;
27721edce99fSDavid Ahern 	struct rt6_info *grt = NULL;
27731edce99fSDavid Ahern 	int err = -EHOSTUNREACH;
27741edce99fSDavid Ahern 
27751edce99fSDavid Ahern 	if (cfg->fc_table) {
2776f4797b33SDavid Ahern 		int flags = RT6_LOOKUP_F_IFACE;
2777f4797b33SDavid Ahern 
2778f4797b33SDavid Ahern 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2779f4797b33SDavid Ahern 					  cfg->fc_table, flags);
27801edce99fSDavid Ahern 		if (grt) {
27811edce99fSDavid Ahern 			if (grt->rt6i_flags & RTF_GATEWAY ||
27821edce99fSDavid Ahern 			    (dev && dev != grt->dst.dev)) {
27831edce99fSDavid Ahern 				ip6_rt_put(grt);
27841edce99fSDavid Ahern 				grt = NULL;
27851edce99fSDavid Ahern 			}
27861edce99fSDavid Ahern 		}
27871edce99fSDavid Ahern 	}
27881edce99fSDavid Ahern 
27891edce99fSDavid Ahern 	if (!grt)
2790b75cc8f9SDavid Ahern 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
27911edce99fSDavid Ahern 
27921edce99fSDavid Ahern 	if (!grt)
27931edce99fSDavid Ahern 		goto out;
27941edce99fSDavid Ahern 
27951edce99fSDavid Ahern 	if (dev) {
27961edce99fSDavid Ahern 		if (dev != grt->dst.dev) {
27971edce99fSDavid Ahern 			ip6_rt_put(grt);
27981edce99fSDavid Ahern 			goto out;
27991edce99fSDavid Ahern 		}
28001edce99fSDavid Ahern 	} else {
28011edce99fSDavid Ahern 		*_dev = dev = grt->dst.dev;
28021edce99fSDavid Ahern 		*idev = grt->rt6i_idev;
28031edce99fSDavid Ahern 		dev_hold(dev);
28041edce99fSDavid Ahern 		in6_dev_hold(grt->rt6i_idev);
28051edce99fSDavid Ahern 	}
28061edce99fSDavid Ahern 
28071edce99fSDavid Ahern 	if (!(grt->rt6i_flags & RTF_GATEWAY))
28081edce99fSDavid Ahern 		err = 0;
28091edce99fSDavid Ahern 
28101edce99fSDavid Ahern 	ip6_rt_put(grt);
28111edce99fSDavid Ahern 
28121edce99fSDavid Ahern out:
28131edce99fSDavid Ahern 	return err;
28141edce99fSDavid Ahern }
28151edce99fSDavid Ahern 
28169fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
28179fbb704cSDavid Ahern 			   struct net_device **_dev, struct inet6_dev **idev,
28189fbb704cSDavid Ahern 			   struct netlink_ext_ack *extack)
28199fbb704cSDavid Ahern {
28209fbb704cSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28219fbb704cSDavid Ahern 	int gwa_type = ipv6_addr_type(gw_addr);
2822232378e8SDavid Ahern 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
28239fbb704cSDavid Ahern 	const struct net_device *dev = *_dev;
2824232378e8SDavid Ahern 	bool need_addr_check = !dev;
28259fbb704cSDavid Ahern 	int err = -EINVAL;
28269fbb704cSDavid Ahern 
28279fbb704cSDavid Ahern 	/* if gw_addr is local we will fail to detect this in case
28289fbb704cSDavid Ahern 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
28299fbb704cSDavid Ahern 	 * will return already-added prefix route via interface that
28309fbb704cSDavid Ahern 	 * prefix route was assigned to, which might be non-loopback.
28319fbb704cSDavid Ahern 	 */
2832232378e8SDavid Ahern 	if (dev &&
2833232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2834232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
28359fbb704cSDavid Ahern 		goto out;
28369fbb704cSDavid Ahern 	}
28379fbb704cSDavid Ahern 
28389fbb704cSDavid Ahern 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
28399fbb704cSDavid Ahern 		/* IPv6 strictly inhibits using not link-local
28409fbb704cSDavid Ahern 		 * addresses as nexthop address.
28419fbb704cSDavid Ahern 		 * Otherwise, router will not able to send redirects.
28429fbb704cSDavid Ahern 		 * It is very good, but in some (rare!) circumstances
28439fbb704cSDavid Ahern 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
28449fbb704cSDavid Ahern 		 * some exceptions. --ANK
28459fbb704cSDavid Ahern 		 * We allow IPv4-mapped nexthops to support RFC4798-type
28469fbb704cSDavid Ahern 		 * addressing
28479fbb704cSDavid Ahern 		 */
28489fbb704cSDavid Ahern 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
28499fbb704cSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
28509fbb704cSDavid Ahern 			goto out;
28519fbb704cSDavid Ahern 		}
28529fbb704cSDavid Ahern 
28539fbb704cSDavid Ahern 		if (cfg->fc_flags & RTNH_F_ONLINK)
28549fbb704cSDavid Ahern 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
28559fbb704cSDavid Ahern 		else
28569fbb704cSDavid Ahern 			err = ip6_route_check_nh(net, cfg, _dev, idev);
28579fbb704cSDavid Ahern 
28589fbb704cSDavid Ahern 		if (err)
28599fbb704cSDavid Ahern 			goto out;
28609fbb704cSDavid Ahern 	}
28619fbb704cSDavid Ahern 
28629fbb704cSDavid Ahern 	/* reload in case device was changed */
28639fbb704cSDavid Ahern 	dev = *_dev;
28649fbb704cSDavid Ahern 
28659fbb704cSDavid Ahern 	err = -EINVAL;
28669fbb704cSDavid Ahern 	if (!dev) {
28679fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack, "Egress device not specified");
28689fbb704cSDavid Ahern 		goto out;
28699fbb704cSDavid Ahern 	} else if (dev->flags & IFF_LOOPBACK) {
28709fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack,
28719fbb704cSDavid Ahern 			       "Egress device can not be loopback device for this route");
28729fbb704cSDavid Ahern 		goto out;
28739fbb704cSDavid Ahern 	}
2874232378e8SDavid Ahern 
2875232378e8SDavid Ahern 	/* if we did not check gw_addr above, do so now that the
2876232378e8SDavid Ahern 	 * egress device has been resolved.
2877232378e8SDavid Ahern 	 */
2878232378e8SDavid Ahern 	if (need_addr_check &&
2879232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2880232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2881232378e8SDavid Ahern 		goto out;
2882232378e8SDavid Ahern 	}
2883232378e8SDavid Ahern 
28849fbb704cSDavid Ahern 	err = 0;
28859fbb704cSDavid Ahern out:
28869fbb704cSDavid Ahern 	return err;
28879fbb704cSDavid Ahern }
28889fbb704cSDavid Ahern 
288983c44251SDavid Ahern static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
289083c44251SDavid Ahern {
289183c44251SDavid Ahern 	if ((flags & RTF_REJECT) ||
289283c44251SDavid Ahern 	    (dev && (dev->flags & IFF_LOOPBACK) &&
289383c44251SDavid Ahern 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
289483c44251SDavid Ahern 	     !(flags & RTF_LOCAL)))
289583c44251SDavid Ahern 		return true;
289683c44251SDavid Ahern 
289783c44251SDavid Ahern 	return false;
289883c44251SDavid Ahern }
289983c44251SDavid Ahern 
290083c44251SDavid Ahern int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
290183c44251SDavid Ahern 		 struct fib6_config *cfg, gfp_t gfp_flags,
290283c44251SDavid Ahern 		 struct netlink_ext_ack *extack)
290383c44251SDavid Ahern {
290483c44251SDavid Ahern 	struct net_device *dev = NULL;
290583c44251SDavid Ahern 	struct inet6_dev *idev = NULL;
290683c44251SDavid Ahern 	int addr_type;
290783c44251SDavid Ahern 	int err;
290883c44251SDavid Ahern 
2909f1741730SDavid Ahern 	fib6_nh->fib_nh_family = AF_INET6;
2910f1741730SDavid Ahern 
291183c44251SDavid Ahern 	err = -ENODEV;
291283c44251SDavid Ahern 	if (cfg->fc_ifindex) {
291383c44251SDavid Ahern 		dev = dev_get_by_index(net, cfg->fc_ifindex);
291483c44251SDavid Ahern 		if (!dev)
291583c44251SDavid Ahern 			goto out;
291683c44251SDavid Ahern 		idev = in6_dev_get(dev);
291783c44251SDavid Ahern 		if (!idev)
291883c44251SDavid Ahern 			goto out;
291983c44251SDavid Ahern 	}
292083c44251SDavid Ahern 
292183c44251SDavid Ahern 	if (cfg->fc_flags & RTNH_F_ONLINK) {
292283c44251SDavid Ahern 		if (!dev) {
292383c44251SDavid Ahern 			NL_SET_ERR_MSG(extack,
292483c44251SDavid Ahern 				       "Nexthop device required for onlink");
292583c44251SDavid Ahern 			goto out;
292683c44251SDavid Ahern 		}
292783c44251SDavid Ahern 
292883c44251SDavid Ahern 		if (!(dev->flags & IFF_UP)) {
292983c44251SDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
293083c44251SDavid Ahern 			err = -ENETDOWN;
293183c44251SDavid Ahern 			goto out;
293283c44251SDavid Ahern 		}
293383c44251SDavid Ahern 
2934ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
293583c44251SDavid Ahern 	}
293683c44251SDavid Ahern 
2937ad1601aeSDavid Ahern 	fib6_nh->fib_nh_weight = 1;
293883c44251SDavid Ahern 
293983c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
294083c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
294183c44251SDavid Ahern 	 */
294283c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
294383c44251SDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
294483c44251SDavid Ahern 		/* hold loopback dev/idev if we haven't done so. */
294583c44251SDavid Ahern 		if (dev != net->loopback_dev) {
294683c44251SDavid Ahern 			if (dev) {
294783c44251SDavid Ahern 				dev_put(dev);
294883c44251SDavid Ahern 				in6_dev_put(idev);
294983c44251SDavid Ahern 			}
295083c44251SDavid Ahern 			dev = net->loopback_dev;
295183c44251SDavid Ahern 			dev_hold(dev);
295283c44251SDavid Ahern 			idev = in6_dev_get(dev);
295383c44251SDavid Ahern 			if (!idev) {
295483c44251SDavid Ahern 				err = -ENODEV;
295583c44251SDavid Ahern 				goto out;
295683c44251SDavid Ahern 			}
295783c44251SDavid Ahern 		}
295883c44251SDavid Ahern 		goto set_dev;
295983c44251SDavid Ahern 	}
296083c44251SDavid Ahern 
296183c44251SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY) {
296283c44251SDavid Ahern 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
296383c44251SDavid Ahern 		if (err)
296483c44251SDavid Ahern 			goto out;
296583c44251SDavid Ahern 
2966ad1601aeSDavid Ahern 		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
29672b2450caSDavid Ahern 		fib6_nh->fib_nh_has_gw = 1;
296883c44251SDavid Ahern 	}
296983c44251SDavid Ahern 
297083c44251SDavid Ahern 	err = -ENODEV;
297183c44251SDavid Ahern 	if (!dev)
297283c44251SDavid Ahern 		goto out;
297383c44251SDavid Ahern 
297483c44251SDavid Ahern 	if (idev->cnf.disable_ipv6) {
297583c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
297683c44251SDavid Ahern 		err = -EACCES;
297783c44251SDavid Ahern 		goto out;
297883c44251SDavid Ahern 	}
297983c44251SDavid Ahern 
298083c44251SDavid Ahern 	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
298183c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
298283c44251SDavid Ahern 		err = -ENETDOWN;
298383c44251SDavid Ahern 		goto out;
298483c44251SDavid Ahern 	}
298583c44251SDavid Ahern 
298683c44251SDavid Ahern 	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
298783c44251SDavid Ahern 	    !netif_carrier_ok(dev))
2988ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
298983c44251SDavid Ahern 
2990*979e276eSDavid Ahern 	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
2991*979e276eSDavid Ahern 				 cfg->fc_encap_type, cfg, gfp_flags, extack);
2992*979e276eSDavid Ahern 	if (err)
2993*979e276eSDavid Ahern 		goto out;
299483c44251SDavid Ahern set_dev:
2995ad1601aeSDavid Ahern 	fib6_nh->fib_nh_dev = dev;
2996f1741730SDavid Ahern 	fib6_nh->fib_nh_oif = dev->ifindex;
299783c44251SDavid Ahern 	err = 0;
299883c44251SDavid Ahern out:
299983c44251SDavid Ahern 	if (idev)
300083c44251SDavid Ahern 		in6_dev_put(idev);
300183c44251SDavid Ahern 
300283c44251SDavid Ahern 	if (err) {
3003ad1601aeSDavid Ahern 		lwtstate_put(fib6_nh->fib_nh_lws);
3004ad1601aeSDavid Ahern 		fib6_nh->fib_nh_lws = NULL;
300583c44251SDavid Ahern 		if (dev)
300683c44251SDavid Ahern 			dev_put(dev);
300783c44251SDavid Ahern 	}
300883c44251SDavid Ahern 
300983c44251SDavid Ahern 	return err;
301083c44251SDavid Ahern }
301183c44251SDavid Ahern 
3012dac7d0f2SDavid Ahern void fib6_nh_release(struct fib6_nh *fib6_nh)
3013dac7d0f2SDavid Ahern {
3014*979e276eSDavid Ahern 	fib_nh_common_release(&fib6_nh->nh_common);
3015dac7d0f2SDavid Ahern }
3016dac7d0f2SDavid Ahern 
30178d1c802bSDavid Ahern static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3018acb54e3cSDavid Ahern 					      gfp_t gfp_flags,
3019333c4301SDavid Ahern 					      struct netlink_ext_ack *extack)
30201da177e4SLinus Torvalds {
30215578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
30228d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3023c71099acSThomas Graf 	struct fib6_table *table;
30248c5b83f0SRoopa Prabhu 	int err = -EINVAL;
302583c44251SDavid Ahern 	int addr_type;
30261da177e4SLinus Torvalds 
3027557c44beSDavid Ahern 	/* RTF_PCPU is an internal flag; can not be set by userspace */
3028d5d531cbSDavid Ahern 	if (cfg->fc_flags & RTF_PCPU) {
3029d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3030557c44beSDavid Ahern 		goto out;
3031d5d531cbSDavid Ahern 	}
3032557c44beSDavid Ahern 
30332ea2352eSWei Wang 	/* RTF_CACHE is an internal flag; can not be set by userspace */
30342ea2352eSWei Wang 	if (cfg->fc_flags & RTF_CACHE) {
30352ea2352eSWei Wang 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
30362ea2352eSWei Wang 		goto out;
30372ea2352eSWei Wang 	}
30382ea2352eSWei Wang 
3039e8478e80SDavid Ahern 	if (cfg->fc_type > RTN_MAX) {
3040e8478e80SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid route type");
3041e8478e80SDavid Ahern 		goto out;
3042e8478e80SDavid Ahern 	}
3043e8478e80SDavid Ahern 
3044d5d531cbSDavid Ahern 	if (cfg->fc_dst_len > 128) {
3045d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
30468c5b83f0SRoopa Prabhu 		goto out;
3047d5d531cbSDavid Ahern 	}
3048d5d531cbSDavid Ahern 	if (cfg->fc_src_len > 128) {
3049d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid source address length");
3050d5d531cbSDavid Ahern 		goto out;
3051d5d531cbSDavid Ahern 	}
30521da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
3053d5d531cbSDavid Ahern 	if (cfg->fc_src_len) {
3054d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack,
3055d5d531cbSDavid Ahern 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
30568c5b83f0SRoopa Prabhu 		goto out;
3057d5d531cbSDavid Ahern 	}
30581da177e4SLinus Torvalds #endif
3059fc1e64e1SDavid Ahern 
3060c71099acSThomas Graf 	err = -ENOBUFS;
306138308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
3062d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3063d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
306438308473SDavid S. Miller 		if (!table) {
3065f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3066d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
3067d71314b4SMatti Vaittinen 		}
3068d71314b4SMatti Vaittinen 	} else {
3069d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
3070d71314b4SMatti Vaittinen 	}
307138308473SDavid S. Miller 
307238308473SDavid S. Miller 	if (!table)
3073c71099acSThomas Graf 		goto out;
3074c71099acSThomas Graf 
30751da177e4SLinus Torvalds 	err = -ENOMEM;
307693531c67SDavid Ahern 	rt = fib6_info_alloc(gfp_flags);
307793531c67SDavid Ahern 	if (!rt)
30781da177e4SLinus Torvalds 		goto out;
307993531c67SDavid Ahern 
3080d7e774f3SDavid Ahern 	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3081d7e774f3SDavid Ahern 					       extack);
3082767a2217SDavid Ahern 	if (IS_ERR(rt->fib6_metrics)) {
3083767a2217SDavid Ahern 		err = PTR_ERR(rt->fib6_metrics);
3084fda21d46SEric Dumazet 		/* Do not leave garbage there. */
3085fda21d46SEric Dumazet 		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3086767a2217SDavid Ahern 		goto out;
3087767a2217SDavid Ahern 	}
3088767a2217SDavid Ahern 
308993531c67SDavid Ahern 	if (cfg->fc_flags & RTF_ADDRCONF)
309093531c67SDavid Ahern 		rt->dst_nocount = true;
30911da177e4SLinus Torvalds 
30921716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
309314895687SDavid Ahern 		fib6_set_expires(rt, jiffies +
30941716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
30951716a961SGao feng 	else
309614895687SDavid Ahern 		fib6_clean_expires(rt);
30971da177e4SLinus Torvalds 
309886872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
309986872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
310093c2fb25SDavid Ahern 	rt->fib6_protocol = cfg->fc_protocol;
310186872cb5SThomas Graf 
310283c44251SDavid Ahern 	rt->fib6_table = table;
310383c44251SDavid Ahern 	rt->fib6_metric = cfg->fc_metric;
310483c44251SDavid Ahern 	rt->fib6_type = cfg->fc_type;
31052b2450caSDavid Ahern 	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
310619e42e45SRoopa Prabhu 
310793c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
310893c2fb25SDavid Ahern 	rt->fib6_dst.plen = cfg->fc_dst_len;
310993c2fb25SDavid Ahern 	if (rt->fib6_dst.plen == 128)
31103b6761d1SDavid Ahern 		rt->dst_host = true;
31111da177e4SLinus Torvalds 
31121da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
311393c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
311493c2fb25SDavid Ahern 	rt->fib6_src.plen = cfg->fc_src_len;
31151da177e4SLinus Torvalds #endif
311683c44251SDavid Ahern 	err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
31171da177e4SLinus Torvalds 	if (err)
31181da177e4SLinus Torvalds 		goto out;
31199fbb704cSDavid Ahern 
312083c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
312183c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
312283c44251SDavid Ahern 	 */
312383c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
3124ad1601aeSDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
312583c44251SDavid Ahern 		rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3126955ec4cbSDavid Ahern 
3127c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
312883c44251SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
312983c44251SDavid Ahern 
3130c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3131d5d531cbSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid source address");
3132c3968a85SDaniel Walter 			err = -EINVAL;
3133c3968a85SDaniel Walter 			goto out;
3134c3968a85SDaniel Walter 		}
313593c2fb25SDavid Ahern 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
313693c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 128;
3137c3968a85SDaniel Walter 	} else
313893c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
3139c3968a85SDaniel Walter 
31408c5b83f0SRoopa Prabhu 	return rt;
31411da177e4SLinus Torvalds out:
314293531c67SDavid Ahern 	fib6_info_release(rt);
31438c5b83f0SRoopa Prabhu 	return ERR_PTR(err);
31446b9ea5a6SRoopa Prabhu }
31456b9ea5a6SRoopa Prabhu 
3146acb54e3cSDavid Ahern int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3147333c4301SDavid Ahern 		  struct netlink_ext_ack *extack)
31486b9ea5a6SRoopa Prabhu {
31498d1c802bSDavid Ahern 	struct fib6_info *rt;
31506b9ea5a6SRoopa Prabhu 	int err;
31516b9ea5a6SRoopa Prabhu 
3152acb54e3cSDavid Ahern 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3153d4ead6b3SDavid Ahern 	if (IS_ERR(rt))
3154d4ead6b3SDavid Ahern 		return PTR_ERR(rt);
31556b9ea5a6SRoopa Prabhu 
3156d4ead6b3SDavid Ahern 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
315793531c67SDavid Ahern 	fib6_info_release(rt);
31586b9ea5a6SRoopa Prabhu 
31591da177e4SLinus Torvalds 	return err;
31601da177e4SLinus Torvalds }
31611da177e4SLinus Torvalds 
31628d1c802bSDavid Ahern static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
31631da177e4SLinus Torvalds {
3164afb1d4b5SDavid Ahern 	struct net *net = info->nl_net;
3165c71099acSThomas Graf 	struct fib6_table *table;
3166afb1d4b5SDavid Ahern 	int err;
31671da177e4SLinus Torvalds 
3168421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
31696825a26cSGao feng 		err = -ENOENT;
31706825a26cSGao feng 		goto out;
31716825a26cSGao feng 	}
31726c813a72SPatrick McHardy 
317393c2fb25SDavid Ahern 	table = rt->fib6_table;
317466f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
317586872cb5SThomas Graf 	err = fib6_del(rt, info);
317666f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
31771da177e4SLinus Torvalds 
31786825a26cSGao feng out:
317993531c67SDavid Ahern 	fib6_info_release(rt);
31801da177e4SLinus Torvalds 	return err;
31811da177e4SLinus Torvalds }
31821da177e4SLinus Torvalds 
31838d1c802bSDavid Ahern int ip6_del_rt(struct net *net, struct fib6_info *rt)
3184e0a1ad73SThomas Graf {
3185afb1d4b5SDavid Ahern 	struct nl_info info = { .nl_net = net };
3186afb1d4b5SDavid Ahern 
3187528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
3188e0a1ad73SThomas Graf }
3189e0a1ad73SThomas Graf 
31908d1c802bSDavid Ahern static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
31910ae81335SDavid Ahern {
31920ae81335SDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
3193e3330039SWANG Cong 	struct net *net = info->nl_net;
319416a16cd3SDavid Ahern 	struct sk_buff *skb = NULL;
31950ae81335SDavid Ahern 	struct fib6_table *table;
3196e3330039SWANG Cong 	int err = -ENOENT;
31970ae81335SDavid Ahern 
3198421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
3199e3330039SWANG Cong 		goto out_put;
320093c2fb25SDavid Ahern 	table = rt->fib6_table;
320166f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
32020ae81335SDavid Ahern 
320393c2fb25SDavid Ahern 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
32048d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
32050ae81335SDavid Ahern 
320616a16cd3SDavid Ahern 		/* prefer to send a single notification with all hops */
320716a16cd3SDavid Ahern 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
320816a16cd3SDavid Ahern 		if (skb) {
320916a16cd3SDavid Ahern 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
321016a16cd3SDavid Ahern 
3211d4ead6b3SDavid Ahern 			if (rt6_fill_node(net, skb, rt, NULL,
321216a16cd3SDavid Ahern 					  NULL, NULL, 0, RTM_DELROUTE,
321316a16cd3SDavid Ahern 					  info->portid, seq, 0) < 0) {
321416a16cd3SDavid Ahern 				kfree_skb(skb);
321516a16cd3SDavid Ahern 				skb = NULL;
321616a16cd3SDavid Ahern 			} else
321716a16cd3SDavid Ahern 				info->skip_notify = 1;
321816a16cd3SDavid Ahern 		}
321916a16cd3SDavid Ahern 
32200ae81335SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
322193c2fb25SDavid Ahern 					 &rt->fib6_siblings,
322293c2fb25SDavid Ahern 					 fib6_siblings) {
32230ae81335SDavid Ahern 			err = fib6_del(sibling, info);
32240ae81335SDavid Ahern 			if (err)
3225e3330039SWANG Cong 				goto out_unlock;
32260ae81335SDavid Ahern 		}
32270ae81335SDavid Ahern 	}
32280ae81335SDavid Ahern 
32290ae81335SDavid Ahern 	err = fib6_del(rt, info);
3230e3330039SWANG Cong out_unlock:
323166f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
3232e3330039SWANG Cong out_put:
323393531c67SDavid Ahern 	fib6_info_release(rt);
323416a16cd3SDavid Ahern 
323516a16cd3SDavid Ahern 	if (skb) {
3236e3330039SWANG Cong 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
323716a16cd3SDavid Ahern 			    info->nlh, gfp_any());
323816a16cd3SDavid Ahern 	}
32390ae81335SDavid Ahern 	return err;
32400ae81335SDavid Ahern }
32410ae81335SDavid Ahern 
324223fb93a4SDavid Ahern static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
324323fb93a4SDavid Ahern {
324423fb93a4SDavid Ahern 	int rc = -ESRCH;
324523fb93a4SDavid Ahern 
324623fb93a4SDavid Ahern 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
324723fb93a4SDavid Ahern 		goto out;
324823fb93a4SDavid Ahern 
324923fb93a4SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY &&
325023fb93a4SDavid Ahern 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
325123fb93a4SDavid Ahern 		goto out;
3252761f6026SXin Long 
325323fb93a4SDavid Ahern 	rc = rt6_remove_exception_rt(rt);
325423fb93a4SDavid Ahern out:
325523fb93a4SDavid Ahern 	return rc;
325623fb93a4SDavid Ahern }
325723fb93a4SDavid Ahern 
3258333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg,
3259333c4301SDavid Ahern 			 struct netlink_ext_ack *extack)
32601da177e4SLinus Torvalds {
32618d1c802bSDavid Ahern 	struct rt6_info *rt_cache;
3262c71099acSThomas Graf 	struct fib6_table *table;
32638d1c802bSDavid Ahern 	struct fib6_info *rt;
32641da177e4SLinus Torvalds 	struct fib6_node *fn;
32651da177e4SLinus Torvalds 	int err = -ESRCH;
32661da177e4SLinus Torvalds 
32675578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3268d5d531cbSDavid Ahern 	if (!table) {
3269d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3270c71099acSThomas Graf 		return err;
3271d5d531cbSDavid Ahern 	}
32721da177e4SLinus Torvalds 
327366f5d6ceSWei Wang 	rcu_read_lock();
3274c71099acSThomas Graf 
3275c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
327686872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
327738fbeeeeSWei Wang 			 &cfg->fc_src, cfg->fc_src_len,
32782b760fcfSWei Wang 			 !(cfg->fc_flags & RTF_CACHE));
32791da177e4SLinus Torvalds 
32801da177e4SLinus Torvalds 	if (fn) {
328166f5d6ceSWei Wang 		for_each_fib6_node_rt_rcu(fn) {
3282ad1601aeSDavid Ahern 			struct fib6_nh *nh;
3283ad1601aeSDavid Ahern 
32842b760fcfSWei Wang 			if (cfg->fc_flags & RTF_CACHE) {
328523fb93a4SDavid Ahern 				int rc;
328623fb93a4SDavid Ahern 
32872b760fcfSWei Wang 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
32882b760fcfSWei Wang 							      &cfg->fc_src);
328923fb93a4SDavid Ahern 				if (rt_cache) {
329023fb93a4SDavid Ahern 					rc = ip6_del_cached_rt(rt_cache, cfg);
32919e575010SEric Dumazet 					if (rc != -ESRCH) {
32929e575010SEric Dumazet 						rcu_read_unlock();
329323fb93a4SDavid Ahern 						return rc;
329423fb93a4SDavid Ahern 					}
32959e575010SEric Dumazet 				}
32961f56a01fSMartin KaFai Lau 				continue;
32972b760fcfSWei Wang 			}
3298ad1601aeSDavid Ahern 
3299ad1601aeSDavid Ahern 			nh = &rt->fib6_nh;
330086872cb5SThomas Graf 			if (cfg->fc_ifindex &&
3301ad1601aeSDavid Ahern 			    (!nh->fib_nh_dev ||
3302ad1601aeSDavid Ahern 			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
33031da177e4SLinus Torvalds 				continue;
330486872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
3305ad1601aeSDavid Ahern 			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
33061da177e4SLinus Torvalds 				continue;
330793c2fb25SDavid Ahern 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
33081da177e4SLinus Torvalds 				continue;
330993c2fb25SDavid Ahern 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3310c2ed1880SMantas M 				continue;
3311e873e4b9SWei Wang 			if (!fib6_info_hold_safe(rt))
3312e873e4b9SWei Wang 				continue;
331366f5d6ceSWei Wang 			rcu_read_unlock();
33141da177e4SLinus Torvalds 
33150ae81335SDavid Ahern 			/* if gateway was specified only delete the one hop */
33160ae81335SDavid Ahern 			if (cfg->fc_flags & RTF_GATEWAY)
331786872cb5SThomas Graf 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
33180ae81335SDavid Ahern 
33190ae81335SDavid Ahern 			return __ip6_del_rt_siblings(rt, cfg);
33201da177e4SLinus Torvalds 		}
33211da177e4SLinus Torvalds 	}
332266f5d6ceSWei Wang 	rcu_read_unlock();
33231da177e4SLinus Torvalds 
33241da177e4SLinus Torvalds 	return err;
33251da177e4SLinus Torvalds }
33261da177e4SLinus Torvalds 
33276700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3328a6279458SYOSHIFUJI Hideaki {
3329a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
3330e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
3331e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
3332e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
3333e8599ff4SDavid S. Miller 	struct neighbour *neigh;
3334a68886a6SDavid Ahern 	struct fib6_info *from;
333571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
33366e157b6aSDavid S. Miller 	int optlen, on_link;
33376e157b6aSDavid S. Miller 	u8 *lladdr;
3338e8599ff4SDavid S. Miller 
333929a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
334071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
3341e8599ff4SDavid S. Miller 
3342e8599ff4SDavid S. Miller 	if (optlen < 0) {
33436e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3344e8599ff4SDavid S. Miller 		return;
3345e8599ff4SDavid S. Miller 	}
3346e8599ff4SDavid S. Miller 
334771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
3348e8599ff4SDavid S. Miller 
334971bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
33506e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3351e8599ff4SDavid S. Miller 		return;
3352e8599ff4SDavid S. Miller 	}
3353e8599ff4SDavid S. Miller 
33546e157b6aSDavid S. Miller 	on_link = 0;
335571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3356e8599ff4SDavid S. Miller 		on_link = 1;
335771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
3358e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
33596e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3360e8599ff4SDavid S. Miller 		return;
3361e8599ff4SDavid S. Miller 	}
3362e8599ff4SDavid S. Miller 
3363e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
3364e8599ff4SDavid S. Miller 	if (!in6_dev)
3365e8599ff4SDavid S. Miller 		return;
3366e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3367e8599ff4SDavid S. Miller 		return;
3368e8599ff4SDavid S. Miller 
3369e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
3370e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
3371e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
3372e8599ff4SDavid S. Miller 	 */
3373e8599ff4SDavid S. Miller 
3374f997c55cSAlexander Aring 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3375e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3376e8599ff4SDavid S. Miller 		return;
3377e8599ff4SDavid S. Miller 	}
33786e157b6aSDavid S. Miller 
33796e157b6aSDavid S. Miller 	lladdr = NULL;
3380e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
3381e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3382e8599ff4SDavid S. Miller 					     skb->dev);
3383e8599ff4SDavid S. Miller 		if (!lladdr) {
3384e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3385e8599ff4SDavid S. Miller 			return;
3386e8599ff4SDavid S. Miller 		}
3387e8599ff4SDavid S. Miller 	}
3388e8599ff4SDavid S. Miller 
33896e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
3390ec13ad1dSMatthias Schiffer 	if (rt->rt6i_flags & RTF_REJECT) {
33916e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
33926e157b6aSDavid S. Miller 		return;
33936e157b6aSDavid S. Miller 	}
33946e157b6aSDavid S. Miller 
33956e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
33966e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
33976e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
33986e157b6aSDavid S. Miller 	 */
33990dec879fSJulian Anastasov 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
34006e157b6aSDavid S. Miller 
340171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3402e8599ff4SDavid S. Miller 	if (!neigh)
3403e8599ff4SDavid S. Miller 		return;
3404e8599ff4SDavid S. Miller 
34051da177e4SLinus Torvalds 	/*
34061da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
34071da177e4SLinus Torvalds 	 */
34081da177e4SLinus Torvalds 
3409f997c55cSAlexander Aring 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
34101da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
34111da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
34121da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3413f997c55cSAlexander Aring 				     NEIGH_UPDATE_F_ISROUTER)),
3414f997c55cSAlexander Aring 		     NDISC_REDIRECT, &ndopts);
34151da177e4SLinus Torvalds 
34164d85cd0cSDavid Ahern 	rcu_read_lock();
3417a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
3418e873e4b9SWei Wang 	/* This fib6_info_hold() is safe here because we hold reference to rt
3419e873e4b9SWei Wang 	 * and rt already holds reference to fib6_info.
3420e873e4b9SWei Wang 	 */
34218a14e46fSDavid Ahern 	fib6_info_hold(from);
34224d85cd0cSDavid Ahern 	rcu_read_unlock();
34238a14e46fSDavid Ahern 
34248a14e46fSDavid Ahern 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
342538308473SDavid S. Miller 	if (!nrt)
34261da177e4SLinus Torvalds 		goto out;
34271da177e4SLinus Torvalds 
34281da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
34291da177e4SLinus Torvalds 	if (on_link)
34301da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
34311da177e4SLinus Torvalds 
34324e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
34331da177e4SLinus Torvalds 
34342b760fcfSWei Wang 	/* No need to remove rt from the exception table if rt is
34352b760fcfSWei Wang 	 * a cached route because rt6_insert_exception() will
34362b760fcfSWei Wang 	 * takes care of it
34372b760fcfSWei Wang 	 */
34388a14e46fSDavid Ahern 	if (rt6_insert_exception(nrt, from)) {
34392b760fcfSWei Wang 		dst_release_immediate(&nrt->dst);
34402b760fcfSWei Wang 		goto out;
34412b760fcfSWei Wang 	}
34421da177e4SLinus Torvalds 
3443d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
3444d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
344571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
344660592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
34478d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
34488d71740cSTom Tucker 
34491da177e4SLinus Torvalds out:
34508a14e46fSDavid Ahern 	fib6_info_release(from);
3451e8599ff4SDavid S. Miller 	neigh_release(neigh);
34526e157b6aSDavid S. Miller }
34536e157b6aSDavid S. Miller 
345470ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
34558d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
3456b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3457830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3458830218c1SDavid Ahern 					   struct net_device *dev)
345970ceb4f5SYOSHIFUJI Hideaki {
3460830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3461830218c1SDavid Ahern 	int ifindex = dev->ifindex;
346270ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
34638d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3464c71099acSThomas Graf 	struct fib6_table *table;
346570ceb4f5SYOSHIFUJI Hideaki 
3466830218c1SDavid Ahern 	table = fib6_get_table(net, tb_id);
346738308473SDavid S. Miller 	if (!table)
3468c71099acSThomas Graf 		return NULL;
3469c71099acSThomas Graf 
347066f5d6ceSWei Wang 	rcu_read_lock();
347138fbeeeeSWei Wang 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
347270ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
347370ceb4f5SYOSHIFUJI Hideaki 		goto out;
347470ceb4f5SYOSHIFUJI Hideaki 
347566f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
3476ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
347770ceb4f5SYOSHIFUJI Hideaki 			continue;
34782b2450caSDavid Ahern 		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
34792b2450caSDavid Ahern 		    !rt->fib6_nh.fib_nh_has_gw)
348070ceb4f5SYOSHIFUJI Hideaki 			continue;
3481ad1601aeSDavid Ahern 		if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
348270ceb4f5SYOSHIFUJI Hideaki 			continue;
3483e873e4b9SWei Wang 		if (!fib6_info_hold_safe(rt))
3484e873e4b9SWei Wang 			continue;
348570ceb4f5SYOSHIFUJI Hideaki 		break;
348670ceb4f5SYOSHIFUJI Hideaki 	}
348770ceb4f5SYOSHIFUJI Hideaki out:
348866f5d6ceSWei Wang 	rcu_read_unlock();
348970ceb4f5SYOSHIFUJI Hideaki 	return rt;
349070ceb4f5SYOSHIFUJI Hideaki }
349170ceb4f5SYOSHIFUJI Hideaki 
34928d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
3493b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3494830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3495830218c1SDavid Ahern 					   struct net_device *dev,
349695c96174SEric Dumazet 					   unsigned int pref)
349770ceb4f5SYOSHIFUJI Hideaki {
349886872cb5SThomas Graf 	struct fib6_config cfg = {
3499238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
3500830218c1SDavid Ahern 		.fc_ifindex	= dev->ifindex,
350186872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
350286872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
350386872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
3504b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3505e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
350615e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
3507efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3508efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
350986872cb5SThomas Graf 	};
351070ceb4f5SYOSHIFUJI Hideaki 
3511830218c1SDavid Ahern 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
35124e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
35134e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
351486872cb5SThomas Graf 
3515e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
3516e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
351786872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
351870ceb4f5SYOSHIFUJI Hideaki 
3519acb54e3cSDavid Ahern 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
352070ceb4f5SYOSHIFUJI Hideaki 
3521830218c1SDavid Ahern 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
352270ceb4f5SYOSHIFUJI Hideaki }
352370ceb4f5SYOSHIFUJI Hideaki #endif
352470ceb4f5SYOSHIFUJI Hideaki 
35258d1c802bSDavid Ahern struct fib6_info *rt6_get_dflt_router(struct net *net,
3526afb1d4b5SDavid Ahern 				     const struct in6_addr *addr,
3527afb1d4b5SDavid Ahern 				     struct net_device *dev)
35281da177e4SLinus Torvalds {
3529830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
35308d1c802bSDavid Ahern 	struct fib6_info *rt;
3531c71099acSThomas Graf 	struct fib6_table *table;
35321da177e4SLinus Torvalds 
3533afb1d4b5SDavid Ahern 	table = fib6_get_table(net, tb_id);
353438308473SDavid S. Miller 	if (!table)
3535c71099acSThomas Graf 		return NULL;
35361da177e4SLinus Torvalds 
353766f5d6ceSWei Wang 	rcu_read_lock();
353866f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3539ad1601aeSDavid Ahern 		struct fib6_nh *nh = &rt->fib6_nh;
3540ad1601aeSDavid Ahern 
3541ad1601aeSDavid Ahern 		if (dev == nh->fib_nh_dev &&
354293c2fb25SDavid Ahern 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3543ad1601aeSDavid Ahern 		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
35441da177e4SLinus Torvalds 			break;
35451da177e4SLinus Torvalds 	}
3546e873e4b9SWei Wang 	if (rt && !fib6_info_hold_safe(rt))
3547e873e4b9SWei Wang 		rt = NULL;
354866f5d6ceSWei Wang 	rcu_read_unlock();
35491da177e4SLinus Torvalds 	return rt;
35501da177e4SLinus Torvalds }
35511da177e4SLinus Torvalds 
35528d1c802bSDavid Ahern struct fib6_info *rt6_add_dflt_router(struct net *net,
3553afb1d4b5SDavid Ahern 				     const struct in6_addr *gwaddr,
3554ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
3555ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
35561da177e4SLinus Torvalds {
355786872cb5SThomas Graf 	struct fib6_config cfg = {
3558ca254490SDavid Ahern 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3559238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
356086872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
356186872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
356286872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3563b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3564e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
356515e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
35665578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3567afb1d4b5SDavid Ahern 		.fc_nlinfo.nl_net = net,
356886872cb5SThomas Graf 	};
35691da177e4SLinus Torvalds 
35704e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
35711da177e4SLinus Torvalds 
3572acb54e3cSDavid Ahern 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3573830218c1SDavid Ahern 		struct fib6_table *table;
3574830218c1SDavid Ahern 
3575830218c1SDavid Ahern 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3576830218c1SDavid Ahern 		if (table)
3577830218c1SDavid Ahern 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3578830218c1SDavid Ahern 	}
35791da177e4SLinus Torvalds 
3580afb1d4b5SDavid Ahern 	return rt6_get_dflt_router(net, gwaddr, dev);
35811da177e4SLinus Torvalds }
35821da177e4SLinus Torvalds 
3583afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net,
3584afb1d4b5SDavid Ahern 				     struct fib6_table *table)
35851da177e4SLinus Torvalds {
35868d1c802bSDavid Ahern 	struct fib6_info *rt;
35871da177e4SLinus Torvalds 
35881da177e4SLinus Torvalds restart:
358966f5d6ceSWei Wang 	rcu_read_lock();
359066f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3591dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
3592dcd1f572SDavid Ahern 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3593dcd1f572SDavid Ahern 
359493c2fb25SDavid Ahern 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3595e873e4b9SWei Wang 		    (!idev || idev->cnf.accept_ra != 2) &&
3596e873e4b9SWei Wang 		    fib6_info_hold_safe(rt)) {
359766f5d6ceSWei Wang 			rcu_read_unlock();
3598afb1d4b5SDavid Ahern 			ip6_del_rt(net, rt);
35991da177e4SLinus Torvalds 			goto restart;
36001da177e4SLinus Torvalds 		}
36011da177e4SLinus Torvalds 	}
360266f5d6ceSWei Wang 	rcu_read_unlock();
3603830218c1SDavid Ahern 
3604830218c1SDavid Ahern 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3605830218c1SDavid Ahern }
3606830218c1SDavid Ahern 
3607830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net)
3608830218c1SDavid Ahern {
3609830218c1SDavid Ahern 	struct fib6_table *table;
3610830218c1SDavid Ahern 	struct hlist_head *head;
3611830218c1SDavid Ahern 	unsigned int h;
3612830218c1SDavid Ahern 
3613830218c1SDavid Ahern 	rcu_read_lock();
3614830218c1SDavid Ahern 
3615830218c1SDavid Ahern 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3616830218c1SDavid Ahern 		head = &net->ipv6.fib_table_hash[h];
3617830218c1SDavid Ahern 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3618830218c1SDavid Ahern 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3619afb1d4b5SDavid Ahern 				__rt6_purge_dflt_routers(net, table);
3620830218c1SDavid Ahern 		}
3621830218c1SDavid Ahern 	}
3622830218c1SDavid Ahern 
3623830218c1SDavid Ahern 	rcu_read_unlock();
36241da177e4SLinus Torvalds }
36251da177e4SLinus Torvalds 
36265578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
36275578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
362886872cb5SThomas Graf 				 struct fib6_config *cfg)
362986872cb5SThomas Graf {
36308823a3acSMaciej Żenczykowski 	*cfg = (struct fib6_config){
36318823a3acSMaciej Żenczykowski 		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
36328823a3acSMaciej Żenczykowski 			 : RT6_TABLE_MAIN,
36338823a3acSMaciej Żenczykowski 		.fc_ifindex = rtmsg->rtmsg_ifindex,
363467f69513SDavid Ahern 		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
36358823a3acSMaciej Żenczykowski 		.fc_expires = rtmsg->rtmsg_info,
36368823a3acSMaciej Żenczykowski 		.fc_dst_len = rtmsg->rtmsg_dst_len,
36378823a3acSMaciej Żenczykowski 		.fc_src_len = rtmsg->rtmsg_src_len,
36388823a3acSMaciej Żenczykowski 		.fc_flags = rtmsg->rtmsg_flags,
36398823a3acSMaciej Żenczykowski 		.fc_type = rtmsg->rtmsg_type,
364086872cb5SThomas Graf 
36418823a3acSMaciej Żenczykowski 		.fc_nlinfo.nl_net = net,
364286872cb5SThomas Graf 
36438823a3acSMaciej Żenczykowski 		.fc_dst = rtmsg->rtmsg_dst,
36448823a3acSMaciej Żenczykowski 		.fc_src = rtmsg->rtmsg_src,
36458823a3acSMaciej Żenczykowski 		.fc_gateway = rtmsg->rtmsg_gateway,
36468823a3acSMaciej Żenczykowski 	};
364786872cb5SThomas Graf }
364886872cb5SThomas Graf 
36495578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
36501da177e4SLinus Torvalds {
365186872cb5SThomas Graf 	struct fib6_config cfg;
36521da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
36531da177e4SLinus Torvalds 	int err;
36541da177e4SLinus Torvalds 
36551da177e4SLinus Torvalds 	switch (cmd) {
36561da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
36571da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
3658af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
36591da177e4SLinus Torvalds 			return -EPERM;
36601da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
36611da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
36621da177e4SLinus Torvalds 		if (err)
36631da177e4SLinus Torvalds 			return -EFAULT;
36641da177e4SLinus Torvalds 
36655578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
366686872cb5SThomas Graf 
36671da177e4SLinus Torvalds 		rtnl_lock();
36681da177e4SLinus Torvalds 		switch (cmd) {
36691da177e4SLinus Torvalds 		case SIOCADDRT:
3670acb54e3cSDavid Ahern 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
36711da177e4SLinus Torvalds 			break;
36721da177e4SLinus Torvalds 		case SIOCDELRT:
3673333c4301SDavid Ahern 			err = ip6_route_del(&cfg, NULL);
36741da177e4SLinus Torvalds 			break;
36751da177e4SLinus Torvalds 		default:
36761da177e4SLinus Torvalds 			err = -EINVAL;
36771da177e4SLinus Torvalds 		}
36781da177e4SLinus Torvalds 		rtnl_unlock();
36791da177e4SLinus Torvalds 
36801da177e4SLinus Torvalds 		return err;
36813ff50b79SStephen Hemminger 	}
36821da177e4SLinus Torvalds 
36831da177e4SLinus Torvalds 	return -EINVAL;
36841da177e4SLinus Torvalds }
36851da177e4SLinus Torvalds 
36861da177e4SLinus Torvalds /*
36871da177e4SLinus Torvalds  *	Drop the packet on the floor
36881da177e4SLinus Torvalds  */
36891da177e4SLinus Torvalds 
3690d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
36911da177e4SLinus Torvalds {
3692612f09e8SYOSHIFUJI Hideaki 	int type;
3693adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3694612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
3695612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
36960660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
369745bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
3698bdb7cc64SStephen Suryaputra 			IP6_INC_STATS(dev_net(dst->dev),
3699bdb7cc64SStephen Suryaputra 				      __in6_dev_get_safely(skb->dev),
37003bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
3701612f09e8SYOSHIFUJI Hideaki 			break;
3702612f09e8SYOSHIFUJI Hideaki 		}
3703612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
3704612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
37053bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
37063bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
3707612f09e8SYOSHIFUJI Hideaki 		break;
3708612f09e8SYOSHIFUJI Hideaki 	}
37093ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
37101da177e4SLinus Torvalds 	kfree_skb(skb);
37111da177e4SLinus Torvalds 	return 0;
37121da177e4SLinus Torvalds }
37131da177e4SLinus Torvalds 
37149ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
37159ce8ade0SThomas Graf {
3716612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
37179ce8ade0SThomas Graf }
37189ce8ade0SThomas Graf 
3719ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
37201da177e4SLinus Torvalds {
3721adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3722612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
37231da177e4SLinus Torvalds }
37241da177e4SLinus Torvalds 
37259ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
37269ce8ade0SThomas Graf {
3727612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
37289ce8ade0SThomas Graf }
37299ce8ade0SThomas Graf 
3730ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
37319ce8ade0SThomas Graf {
3732adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3733612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
37349ce8ade0SThomas Graf }
37359ce8ade0SThomas Graf 
37361da177e4SLinus Torvalds /*
37371da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
37381da177e4SLinus Torvalds  */
37391da177e4SLinus Torvalds 
3740360a9887SDavid Ahern struct fib6_info *addrconf_f6i_alloc(struct net *net,
3741afb1d4b5SDavid Ahern 				     struct inet6_dev *idev,
37421da177e4SLinus Torvalds 				     const struct in6_addr *addr,
3743acb54e3cSDavid Ahern 				     bool anycast, gfp_t gfp_flags)
37441da177e4SLinus Torvalds {
3745c7a1ce39SDavid Ahern 	struct fib6_config cfg = {
3746c7a1ce39SDavid Ahern 		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3747c7a1ce39SDavid Ahern 		.fc_ifindex = idev->dev->ifindex,
3748c7a1ce39SDavid Ahern 		.fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3749c7a1ce39SDavid Ahern 		.fc_dst = *addr,
3750c7a1ce39SDavid Ahern 		.fc_dst_len = 128,
3751c7a1ce39SDavid Ahern 		.fc_protocol = RTPROT_KERNEL,
3752c7a1ce39SDavid Ahern 		.fc_nlinfo.nl_net = net,
3753c7a1ce39SDavid Ahern 		.fc_ignore_dev_down = true,
3754c7a1ce39SDavid Ahern 	};
37555f02ce24SDavid Ahern 
3756e8478e80SDavid Ahern 	if (anycast) {
3757c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_ANYCAST;
3758c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_ANYCAST;
3759e8478e80SDavid Ahern 	} else {
3760c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_LOCAL;
3761c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_LOCAL;
3762e8478e80SDavid Ahern 	}
37631da177e4SLinus Torvalds 
3764c7a1ce39SDavid Ahern 	return ip6_route_info_create(&cfg, gfp_flags, NULL);
37651da177e4SLinus Torvalds }
37661da177e4SLinus Torvalds 
3767c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
3768c3968a85SDaniel Walter struct arg_dev_net_ip {
3769c3968a85SDaniel Walter 	struct net_device *dev;
3770c3968a85SDaniel Walter 	struct net *net;
3771c3968a85SDaniel Walter 	struct in6_addr *addr;
3772c3968a85SDaniel Walter };
3773c3968a85SDaniel Walter 
37748d1c802bSDavid Ahern static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3775c3968a85SDaniel Walter {
3776c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3777c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3778c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3779c3968a85SDaniel Walter 
3780ad1601aeSDavid Ahern 	if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
3781421842edSDavid Ahern 	    rt != net->ipv6.fib6_null_entry &&
378293c2fb25SDavid Ahern 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
378360006a48SWei Wang 		spin_lock_bh(&rt6_exception_lock);
3784c3968a85SDaniel Walter 		/* remove prefsrc entry */
378593c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
378660006a48SWei Wang 		spin_unlock_bh(&rt6_exception_lock);
3787c3968a85SDaniel Walter 	}
3788c3968a85SDaniel Walter 	return 0;
3789c3968a85SDaniel Walter }
3790c3968a85SDaniel Walter 
3791c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3792c3968a85SDaniel Walter {
3793c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
3794c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
3795c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
3796c3968a85SDaniel Walter 		.net = net,
3797c3968a85SDaniel Walter 		.addr = &ifp->addr,
3798c3968a85SDaniel Walter 	};
37990c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3800c3968a85SDaniel Walter }
3801c3968a85SDaniel Walter 
38022b2450caSDavid Ahern #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
3803be7a010dSDuan Jiong 
3804be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
38058d1c802bSDavid Ahern static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3806be7a010dSDuan Jiong {
3807be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
3808be7a010dSDuan Jiong 
380993c2fb25SDavid Ahern 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
38102b2450caSDavid Ahern 	    rt->fib6_nh.fib_nh_has_gw &&
3811ad1601aeSDavid Ahern 	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
3812be7a010dSDuan Jiong 		return -1;
3813be7a010dSDuan Jiong 	}
3814b16cb459SWei Wang 
3815b16cb459SWei Wang 	/* Further clean up cached routes in exception table.
3816b16cb459SWei Wang 	 * This is needed because cached route may have a different
3817b16cb459SWei Wang 	 * gateway than its 'parent' in the case of an ip redirect.
3818b16cb459SWei Wang 	 */
3819b16cb459SWei Wang 	rt6_exceptions_clean_tohost(rt, gateway);
3820b16cb459SWei Wang 
3821be7a010dSDuan Jiong 	return 0;
3822be7a010dSDuan Jiong }
3823be7a010dSDuan Jiong 
3824be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3825be7a010dSDuan Jiong {
3826be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3827be7a010dSDuan Jiong }
3828be7a010dSDuan Jiong 
38292127d95aSIdo Schimmel struct arg_netdev_event {
38302127d95aSIdo Schimmel 	const struct net_device *dev;
38314c981e28SIdo Schimmel 	union {
38322127d95aSIdo Schimmel 		unsigned int nh_flags;
38334c981e28SIdo Schimmel 		unsigned long event;
38344c981e28SIdo Schimmel 	};
38352127d95aSIdo Schimmel };
38362127d95aSIdo Schimmel 
38378d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3838d7dedee1SIdo Schimmel {
38398d1c802bSDavid Ahern 	struct fib6_info *iter;
3840d7dedee1SIdo Schimmel 	struct fib6_node *fn;
3841d7dedee1SIdo Schimmel 
384293c2fb25SDavid Ahern 	fn = rcu_dereference_protected(rt->fib6_node,
384393c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3844d7dedee1SIdo Schimmel 	iter = rcu_dereference_protected(fn->leaf,
384593c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3846d7dedee1SIdo Schimmel 	while (iter) {
384793c2fb25SDavid Ahern 		if (iter->fib6_metric == rt->fib6_metric &&
384833bd5ac5SDavid Ahern 		    rt6_qualify_for_ecmp(iter))
3849d7dedee1SIdo Schimmel 			return iter;
38508fb11a9aSDavid Ahern 		iter = rcu_dereference_protected(iter->fib6_next,
385193c2fb25SDavid Ahern 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3852d7dedee1SIdo Schimmel 	}
3853d7dedee1SIdo Schimmel 
3854d7dedee1SIdo Schimmel 	return NULL;
3855d7dedee1SIdo Schimmel }
3856d7dedee1SIdo Schimmel 
38578d1c802bSDavid Ahern static bool rt6_is_dead(const struct fib6_info *rt)
3858d7dedee1SIdo Schimmel {
3859ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3860ad1601aeSDavid Ahern 	    (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3861ad1601aeSDavid Ahern 	     ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
3862d7dedee1SIdo Schimmel 		return true;
3863d7dedee1SIdo Schimmel 
3864d7dedee1SIdo Schimmel 	return false;
3865d7dedee1SIdo Schimmel }
3866d7dedee1SIdo Schimmel 
38678d1c802bSDavid Ahern static int rt6_multipath_total_weight(const struct fib6_info *rt)
3868d7dedee1SIdo Schimmel {
38698d1c802bSDavid Ahern 	struct fib6_info *iter;
3870d7dedee1SIdo Schimmel 	int total = 0;
3871d7dedee1SIdo Schimmel 
3872d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt))
3873ad1601aeSDavid Ahern 		total += rt->fib6_nh.fib_nh_weight;
3874d7dedee1SIdo Schimmel 
387593c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3876d7dedee1SIdo Schimmel 		if (!rt6_is_dead(iter))
3877ad1601aeSDavid Ahern 			total += iter->fib6_nh.fib_nh_weight;
3878d7dedee1SIdo Schimmel 	}
3879d7dedee1SIdo Schimmel 
3880d7dedee1SIdo Schimmel 	return total;
3881d7dedee1SIdo Schimmel }
3882d7dedee1SIdo Schimmel 
38838d1c802bSDavid Ahern static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3884d7dedee1SIdo Schimmel {
3885d7dedee1SIdo Schimmel 	int upper_bound = -1;
3886d7dedee1SIdo Schimmel 
3887d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt)) {
3888ad1601aeSDavid Ahern 		*weight += rt->fib6_nh.fib_nh_weight;
3889d7dedee1SIdo Schimmel 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3890d7dedee1SIdo Schimmel 						    total) - 1;
3891d7dedee1SIdo Schimmel 	}
3892ad1601aeSDavid Ahern 	atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
3893d7dedee1SIdo Schimmel }
3894d7dedee1SIdo Schimmel 
38958d1c802bSDavid Ahern static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3896d7dedee1SIdo Schimmel {
38978d1c802bSDavid Ahern 	struct fib6_info *iter;
3898d7dedee1SIdo Schimmel 	int weight = 0;
3899d7dedee1SIdo Schimmel 
3900d7dedee1SIdo Schimmel 	rt6_upper_bound_set(rt, &weight, total);
3901d7dedee1SIdo Schimmel 
390293c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3903d7dedee1SIdo Schimmel 		rt6_upper_bound_set(iter, &weight, total);
3904d7dedee1SIdo Schimmel }
3905d7dedee1SIdo Schimmel 
39068d1c802bSDavid Ahern void rt6_multipath_rebalance(struct fib6_info *rt)
3907d7dedee1SIdo Schimmel {
39088d1c802bSDavid Ahern 	struct fib6_info *first;
3909d7dedee1SIdo Schimmel 	int total;
3910d7dedee1SIdo Schimmel 
3911d7dedee1SIdo Schimmel 	/* In case the entire multipath route was marked for flushing,
3912d7dedee1SIdo Schimmel 	 * then there is no need to rebalance upon the removal of every
3913d7dedee1SIdo Schimmel 	 * sibling route.
3914d7dedee1SIdo Schimmel 	 */
391593c2fb25SDavid Ahern 	if (!rt->fib6_nsiblings || rt->should_flush)
3916d7dedee1SIdo Schimmel 		return;
3917d7dedee1SIdo Schimmel 
3918d7dedee1SIdo Schimmel 	/* During lookup routes are evaluated in order, so we need to
3919d7dedee1SIdo Schimmel 	 * make sure upper bounds are assigned from the first sibling
3920d7dedee1SIdo Schimmel 	 * onwards.
3921d7dedee1SIdo Schimmel 	 */
3922d7dedee1SIdo Schimmel 	first = rt6_multipath_first_sibling(rt);
3923d7dedee1SIdo Schimmel 	if (WARN_ON_ONCE(!first))
3924d7dedee1SIdo Schimmel 		return;
3925d7dedee1SIdo Schimmel 
3926d7dedee1SIdo Schimmel 	total = rt6_multipath_total_weight(first);
3927d7dedee1SIdo Schimmel 	rt6_multipath_upper_bound_set(first, total);
3928d7dedee1SIdo Schimmel }
3929d7dedee1SIdo Schimmel 
39308d1c802bSDavid Ahern static int fib6_ifup(struct fib6_info *rt, void *p_arg)
39312127d95aSIdo Schimmel {
39322127d95aSIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
39337aef6859SDavid Ahern 	struct net *net = dev_net(arg->dev);
39342127d95aSIdo Schimmel 
3935ad1601aeSDavid Ahern 	if (rt != net->ipv6.fib6_null_entry &&
3936ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_dev == arg->dev) {
3937ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
39387aef6859SDavid Ahern 		fib6_update_sernum_upto_root(net, rt);
3939d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
39401de178edSIdo Schimmel 	}
39412127d95aSIdo Schimmel 
39422127d95aSIdo Schimmel 	return 0;
39432127d95aSIdo Schimmel }
39442127d95aSIdo Schimmel 
39452127d95aSIdo Schimmel void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
39462127d95aSIdo Schimmel {
39472127d95aSIdo Schimmel 	struct arg_netdev_event arg = {
39482127d95aSIdo Schimmel 		.dev = dev,
39496802f3adSIdo Schimmel 		{
39502127d95aSIdo Schimmel 			.nh_flags = nh_flags,
39516802f3adSIdo Schimmel 		},
39522127d95aSIdo Schimmel 	};
39532127d95aSIdo Schimmel 
39542127d95aSIdo Schimmel 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
39552127d95aSIdo Schimmel 		arg.nh_flags |= RTNH_F_LINKDOWN;
39562127d95aSIdo Schimmel 
39572127d95aSIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
39582127d95aSIdo Schimmel }
39592127d95aSIdo Schimmel 
39608d1c802bSDavid Ahern static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
39611de178edSIdo Schimmel 				   const struct net_device *dev)
39621de178edSIdo Schimmel {
39638d1c802bSDavid Ahern 	struct fib6_info *iter;
39641de178edSIdo Schimmel 
3965ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
39661de178edSIdo Schimmel 		return true;
396793c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3968ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
39691de178edSIdo Schimmel 			return true;
39701de178edSIdo Schimmel 
39711de178edSIdo Schimmel 	return false;
39721de178edSIdo Schimmel }
39731de178edSIdo Schimmel 
39748d1c802bSDavid Ahern static void rt6_multipath_flush(struct fib6_info *rt)
39751de178edSIdo Schimmel {
39768d1c802bSDavid Ahern 	struct fib6_info *iter;
39771de178edSIdo Schimmel 
39781de178edSIdo Schimmel 	rt->should_flush = 1;
397993c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
39801de178edSIdo Schimmel 		iter->should_flush = 1;
39811de178edSIdo Schimmel }
39821de178edSIdo Schimmel 
39838d1c802bSDavid Ahern static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
39841de178edSIdo Schimmel 					     const struct net_device *down_dev)
39851de178edSIdo Schimmel {
39868d1c802bSDavid Ahern 	struct fib6_info *iter;
39871de178edSIdo Schimmel 	unsigned int dead = 0;
39881de178edSIdo Schimmel 
3989ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == down_dev ||
3990ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
39911de178edSIdo Schimmel 		dead++;
399293c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3993ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == down_dev ||
3994ad1601aeSDavid Ahern 		    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
39951de178edSIdo Schimmel 			dead++;
39961de178edSIdo Schimmel 
39971de178edSIdo Schimmel 	return dead;
39981de178edSIdo Schimmel }
39991de178edSIdo Schimmel 
40008d1c802bSDavid Ahern static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
40011de178edSIdo Schimmel 				       const struct net_device *dev,
40021de178edSIdo Schimmel 				       unsigned int nh_flags)
40031de178edSIdo Schimmel {
40048d1c802bSDavid Ahern 	struct fib6_info *iter;
40051de178edSIdo Schimmel 
4006ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
4007ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= nh_flags;
400893c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4009ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
4010ad1601aeSDavid Ahern 			iter->fib6_nh.fib_nh_flags |= nh_flags;
40111de178edSIdo Schimmel }
40121de178edSIdo Schimmel 
4013a1a22c12SDavid Ahern /* called with write lock held for table with rt */
40148d1c802bSDavid Ahern static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
40151da177e4SLinus Torvalds {
40164c981e28SIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
40174c981e28SIdo Schimmel 	const struct net_device *dev = arg->dev;
40187aef6859SDavid Ahern 	struct net *net = dev_net(dev);
40198ed67789SDaniel Lezcano 
4020421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
402127c6fa73SIdo Schimmel 		return 0;
402227c6fa73SIdo Schimmel 
402327c6fa73SIdo Schimmel 	switch (arg->event) {
402427c6fa73SIdo Schimmel 	case NETDEV_UNREGISTER:
4025ad1601aeSDavid Ahern 		return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
402627c6fa73SIdo Schimmel 	case NETDEV_DOWN:
40271de178edSIdo Schimmel 		if (rt->should_flush)
402827c6fa73SIdo Schimmel 			return -1;
402993c2fb25SDavid Ahern 		if (!rt->fib6_nsiblings)
4030ad1601aeSDavid Ahern 			return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
40311de178edSIdo Schimmel 		if (rt6_multipath_uses_dev(rt, dev)) {
40321de178edSIdo Schimmel 			unsigned int count;
40331de178edSIdo Schimmel 
40341de178edSIdo Schimmel 			count = rt6_multipath_dead_count(rt, dev);
403593c2fb25SDavid Ahern 			if (rt->fib6_nsiblings + 1 == count) {
40361de178edSIdo Schimmel 				rt6_multipath_flush(rt);
40371de178edSIdo Schimmel 				return -1;
40381de178edSIdo Schimmel 			}
40391de178edSIdo Schimmel 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
40401de178edSIdo Schimmel 						   RTNH_F_LINKDOWN);
40417aef6859SDavid Ahern 			fib6_update_sernum(net, rt);
4042d7dedee1SIdo Schimmel 			rt6_multipath_rebalance(rt);
40431de178edSIdo Schimmel 		}
40441de178edSIdo Schimmel 		return -2;
404527c6fa73SIdo Schimmel 	case NETDEV_CHANGE:
4046ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev != dev ||
404793c2fb25SDavid Ahern 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
404827c6fa73SIdo Schimmel 			break;
4049ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
4050d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
405127c6fa73SIdo Schimmel 		break;
40522b241361SIdo Schimmel 	}
4053c159d30cSDavid S. Miller 
40541da177e4SLinus Torvalds 	return 0;
40551da177e4SLinus Torvalds }
40561da177e4SLinus Torvalds 
405727c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
40581da177e4SLinus Torvalds {
40594c981e28SIdo Schimmel 	struct arg_netdev_event arg = {
40608ed67789SDaniel Lezcano 		.dev = dev,
40616802f3adSIdo Schimmel 		{
40624c981e28SIdo Schimmel 			.event = event,
40636802f3adSIdo Schimmel 		},
40648ed67789SDaniel Lezcano 	};
40657c6bb7d2SDavid Ahern 	struct net *net = dev_net(dev);
40668ed67789SDaniel Lezcano 
40677c6bb7d2SDavid Ahern 	if (net->ipv6.sysctl.skip_notify_on_dev_down)
40687c6bb7d2SDavid Ahern 		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
40697c6bb7d2SDavid Ahern 	else
40707c6bb7d2SDavid Ahern 		fib6_clean_all(net, fib6_ifdown, &arg);
40714c981e28SIdo Schimmel }
40724c981e28SIdo Schimmel 
40734c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event)
40744c981e28SIdo Schimmel {
40754c981e28SIdo Schimmel 	rt6_sync_down_dev(dev, event);
40764c981e28SIdo Schimmel 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
40774c981e28SIdo Schimmel 	neigh_ifdown(&nd_tbl, dev);
40781da177e4SLinus Torvalds }
40791da177e4SLinus Torvalds 
408095c96174SEric Dumazet struct rt6_mtu_change_arg {
40811da177e4SLinus Torvalds 	struct net_device *dev;
408295c96174SEric Dumazet 	unsigned int mtu;
40831da177e4SLinus Torvalds };
40841da177e4SLinus Torvalds 
40858d1c802bSDavid Ahern static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
40861da177e4SLinus Torvalds {
40871da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
40881da177e4SLinus Torvalds 	struct inet6_dev *idev;
40891da177e4SLinus Torvalds 
40901da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
40911da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
40921da177e4SLinus Torvalds 	   We still use this lock to block changes
40931da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
40941da177e4SLinus Torvalds 	*/
40951da177e4SLinus Torvalds 
40961da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
409738308473SDavid S. Miller 	if (!idev)
40981da177e4SLinus Torvalds 		return 0;
40991da177e4SLinus Torvalds 
41001da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
41011da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
41021da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
41031da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
41041da177e4SLinus Torvalds 	 */
4105ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
4106d4ead6b3SDavid Ahern 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4107d4ead6b3SDavid Ahern 		u32 mtu = rt->fib6_pmtu;
4108d4ead6b3SDavid Ahern 
4109d4ead6b3SDavid Ahern 		if (mtu >= arg->mtu ||
4110d4ead6b3SDavid Ahern 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4111d4ead6b3SDavid Ahern 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4112d4ead6b3SDavid Ahern 
4113f5bbe7eeSWei Wang 		spin_lock_bh(&rt6_exception_lock);
4114e9fa1495SStefano Brivio 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4115f5bbe7eeSWei Wang 		spin_unlock_bh(&rt6_exception_lock);
41164b32b5adSMartin KaFai Lau 	}
41171da177e4SLinus Torvalds 	return 0;
41181da177e4SLinus Torvalds }
41191da177e4SLinus Torvalds 
412095c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
41211da177e4SLinus Torvalds {
4122c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
4123c71099acSThomas Graf 		.dev = dev,
4124c71099acSThomas Graf 		.mtu = mtu,
4125c71099acSThomas Graf 	};
41261da177e4SLinus Torvalds 
41270c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
41281da177e4SLinus Torvalds }
41291da177e4SLinus Torvalds 
4130ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
41315176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4132aa8f8778SEric Dumazet 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
413386872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
4134ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
413586872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
413686872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
413751ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4138c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
413919e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
414019e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
414132bc201eSXin Long 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4142622ec2c9SLorenzo Colitti 	[RTA_UID]		= { .type = NLA_U32 },
41433b45a410SLiping Zhang 	[RTA_MARK]		= { .type = NLA_U32 },
4144aa8f8778SEric Dumazet 	[RTA_TABLE]		= { .type = NLA_U32 },
4145eacb9384SRoopa Prabhu 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4146eacb9384SRoopa Prabhu 	[RTA_SPORT]		= { .type = NLA_U16 },
4147eacb9384SRoopa Prabhu 	[RTA_DPORT]		= { .type = NLA_U16 },
414886872cb5SThomas Graf };
414986872cb5SThomas Graf 
415086872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4151333c4301SDavid Ahern 			      struct fib6_config *cfg,
4152333c4301SDavid Ahern 			      struct netlink_ext_ack *extack)
41531da177e4SLinus Torvalds {
415486872cb5SThomas Graf 	struct rtmsg *rtm;
415586872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
4156c78ba6d6SLubomir Rintel 	unsigned int pref;
415786872cb5SThomas Graf 	int err;
41581da177e4SLinus Torvalds 
4159fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4160dac9c979SDavid Ahern 			  extack);
416186872cb5SThomas Graf 	if (err < 0)
416286872cb5SThomas Graf 		goto errout;
41631da177e4SLinus Torvalds 
416486872cb5SThomas Graf 	err = -EINVAL;
416586872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
416686872cb5SThomas Graf 
416784db8407SMaciej Żenczykowski 	*cfg = (struct fib6_config){
416884db8407SMaciej Żenczykowski 		.fc_table = rtm->rtm_table,
416984db8407SMaciej Żenczykowski 		.fc_dst_len = rtm->rtm_dst_len,
417084db8407SMaciej Żenczykowski 		.fc_src_len = rtm->rtm_src_len,
417184db8407SMaciej Żenczykowski 		.fc_flags = RTF_UP,
417284db8407SMaciej Żenczykowski 		.fc_protocol = rtm->rtm_protocol,
417384db8407SMaciej Żenczykowski 		.fc_type = rtm->rtm_type,
417484db8407SMaciej Żenczykowski 
417584db8407SMaciej Żenczykowski 		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
417684db8407SMaciej Żenczykowski 		.fc_nlinfo.nlh = nlh,
417784db8407SMaciej Żenczykowski 		.fc_nlinfo.nl_net = sock_net(skb->sk),
417884db8407SMaciej Żenczykowski 	};
417986872cb5SThomas Graf 
4180ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4181ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
4182b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
4183b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
418486872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
418586872cb5SThomas Graf 
4186ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
4187ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
4188ab79ad14SMaciej Żenczykowski 
41891f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
41901f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
41911f56a01fSMartin KaFai Lau 
4192fc1e64e1SDavid Ahern 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4193fc1e64e1SDavid Ahern 
419486872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
419567b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
419686872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
41971da177e4SLinus Torvalds 	}
4198e3818541SDavid Ahern 	if (tb[RTA_VIA]) {
4199e3818541SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4200e3818541SDavid Ahern 		goto errout;
4201e3818541SDavid Ahern 	}
420286872cb5SThomas Graf 
420386872cb5SThomas Graf 	if (tb[RTA_DST]) {
420486872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
420586872cb5SThomas Graf 
420686872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
420786872cb5SThomas Graf 			goto errout;
420886872cb5SThomas Graf 
420986872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
42101da177e4SLinus Torvalds 	}
421186872cb5SThomas Graf 
421286872cb5SThomas Graf 	if (tb[RTA_SRC]) {
421386872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
421486872cb5SThomas Graf 
421586872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
421686872cb5SThomas Graf 			goto errout;
421786872cb5SThomas Graf 
421886872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
42191da177e4SLinus Torvalds 	}
422086872cb5SThomas Graf 
4221c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
422267b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4223c3968a85SDaniel Walter 
422486872cb5SThomas Graf 	if (tb[RTA_OIF])
422586872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
422686872cb5SThomas Graf 
422786872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
422886872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
422986872cb5SThomas Graf 
423086872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
423186872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
423286872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
42331da177e4SLinus Torvalds 	}
423486872cb5SThomas Graf 
423586872cb5SThomas Graf 	if (tb[RTA_TABLE])
423686872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
423786872cb5SThomas Graf 
423851ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
423951ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
424051ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
42419ed59592SDavid Ahern 
42429ed59592SDavid Ahern 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4243c255bd68SDavid Ahern 						     cfg->fc_mp_len, extack);
42449ed59592SDavid Ahern 		if (err < 0)
42459ed59592SDavid Ahern 			goto errout;
424651ebd318SNicolas Dichtel 	}
424751ebd318SNicolas Dichtel 
4248c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
4249c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
4250c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4251c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4252c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4253c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
4254c78ba6d6SLubomir Rintel 	}
4255c78ba6d6SLubomir Rintel 
425619e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
425719e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
425819e42e45SRoopa Prabhu 
42599ed59592SDavid Ahern 	if (tb[RTA_ENCAP_TYPE]) {
426019e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
426119e42e45SRoopa Prabhu 
4262c255bd68SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
42639ed59592SDavid Ahern 		if (err < 0)
42649ed59592SDavid Ahern 			goto errout;
42659ed59592SDavid Ahern 	}
42669ed59592SDavid Ahern 
426732bc201eSXin Long 	if (tb[RTA_EXPIRES]) {
426832bc201eSXin Long 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
426932bc201eSXin Long 
427032bc201eSXin Long 		if (addrconf_finite_timeout(timeout)) {
427132bc201eSXin Long 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
427232bc201eSXin Long 			cfg->fc_flags |= RTF_EXPIRES;
427332bc201eSXin Long 		}
427432bc201eSXin Long 	}
427532bc201eSXin Long 
427686872cb5SThomas Graf 	err = 0;
427786872cb5SThomas Graf errout:
427886872cb5SThomas Graf 	return err;
42791da177e4SLinus Torvalds }
42801da177e4SLinus Torvalds 
42816b9ea5a6SRoopa Prabhu struct rt6_nh {
42828d1c802bSDavid Ahern 	struct fib6_info *fib6_info;
42836b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
42846b9ea5a6SRoopa Prabhu 	struct list_head next;
42856b9ea5a6SRoopa Prabhu };
42866b9ea5a6SRoopa Prabhu 
4287d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net,
4288d4ead6b3SDavid Ahern 				 struct list_head *rt6_nh_list,
42898d1c802bSDavid Ahern 				 struct fib6_info *rt,
42908d1c802bSDavid Ahern 				 struct fib6_config *r_cfg)
42916b9ea5a6SRoopa Prabhu {
42926b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
42936b9ea5a6SRoopa Prabhu 	int err = -EEXIST;
42946b9ea5a6SRoopa Prabhu 
42956b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
42968d1c802bSDavid Ahern 		/* check if fib6_info already exists */
42978d1c802bSDavid Ahern 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
42986b9ea5a6SRoopa Prabhu 			return err;
42996b9ea5a6SRoopa Prabhu 	}
43006b9ea5a6SRoopa Prabhu 
43016b9ea5a6SRoopa Prabhu 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
43026b9ea5a6SRoopa Prabhu 	if (!nh)
43036b9ea5a6SRoopa Prabhu 		return -ENOMEM;
43048d1c802bSDavid Ahern 	nh->fib6_info = rt;
43056b9ea5a6SRoopa Prabhu 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
43066b9ea5a6SRoopa Prabhu 	list_add_tail(&nh->next, rt6_nh_list);
43076b9ea5a6SRoopa Prabhu 
43086b9ea5a6SRoopa Prabhu 	return 0;
43096b9ea5a6SRoopa Prabhu }
43106b9ea5a6SRoopa Prabhu 
43118d1c802bSDavid Ahern static void ip6_route_mpath_notify(struct fib6_info *rt,
43128d1c802bSDavid Ahern 				   struct fib6_info *rt_last,
43133b1137feSDavid Ahern 				   struct nl_info *info,
43143b1137feSDavid Ahern 				   __u16 nlflags)
43153b1137feSDavid Ahern {
43163b1137feSDavid Ahern 	/* if this is an APPEND route, then rt points to the first route
43173b1137feSDavid Ahern 	 * inserted and rt_last points to last route inserted. Userspace
43183b1137feSDavid Ahern 	 * wants a consistent dump of the route which starts at the first
43193b1137feSDavid Ahern 	 * nexthop. Since sibling routes are always added at the end of
43203b1137feSDavid Ahern 	 * the list, find the first sibling of the last route appended
43213b1137feSDavid Ahern 	 */
432293c2fb25SDavid Ahern 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
432393c2fb25SDavid Ahern 		rt = list_first_entry(&rt_last->fib6_siblings,
43248d1c802bSDavid Ahern 				      struct fib6_info,
432593c2fb25SDavid Ahern 				      fib6_siblings);
43263b1137feSDavid Ahern 	}
43273b1137feSDavid Ahern 
43283b1137feSDavid Ahern 	if (rt)
43293b1137feSDavid Ahern 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
43303b1137feSDavid Ahern }
43313b1137feSDavid Ahern 
4332333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg,
4333333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
433451ebd318SNicolas Dichtel {
43358d1c802bSDavid Ahern 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
43363b1137feSDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
433751ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
433851ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
43398d1c802bSDavid Ahern 	struct fib6_info *rt;
43406b9ea5a6SRoopa Prabhu 	struct rt6_nh *err_nh;
43416b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh, *nh_safe;
43423b1137feSDavid Ahern 	__u16 nlflags;
434351ebd318SNicolas Dichtel 	int remaining;
434451ebd318SNicolas Dichtel 	int attrlen;
43456b9ea5a6SRoopa Prabhu 	int err = 1;
43466b9ea5a6SRoopa Prabhu 	int nhn = 0;
43476b9ea5a6SRoopa Prabhu 	int replace = (cfg->fc_nlinfo.nlh &&
43486b9ea5a6SRoopa Prabhu 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
43496b9ea5a6SRoopa Prabhu 	LIST_HEAD(rt6_nh_list);
435051ebd318SNicolas Dichtel 
43513b1137feSDavid Ahern 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
43523b1137feSDavid Ahern 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
43533b1137feSDavid Ahern 		nlflags |= NLM_F_APPEND;
43543b1137feSDavid Ahern 
435535f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
435651ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
435751ebd318SNicolas Dichtel 
43586b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
43598d1c802bSDavid Ahern 	 * fib6_info structs per nexthop
43606b9ea5a6SRoopa Prabhu 	 */
436151ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
436251ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
436351ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
436451ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
436551ebd318SNicolas Dichtel 
436651ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
436751ebd318SNicolas Dichtel 		if (attrlen > 0) {
436851ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
436951ebd318SNicolas Dichtel 
437051ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
437151ebd318SNicolas Dichtel 			if (nla) {
437267b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
437351ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
437451ebd318SNicolas Dichtel 			}
437519e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
437619e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
437719e42e45SRoopa Prabhu 			if (nla)
437819e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
437951ebd318SNicolas Dichtel 		}
43806b9ea5a6SRoopa Prabhu 
438168e2ffdeSDavid Ahern 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4382acb54e3cSDavid Ahern 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
43838c5b83f0SRoopa Prabhu 		if (IS_ERR(rt)) {
43848c5b83f0SRoopa Prabhu 			err = PTR_ERR(rt);
43858c5b83f0SRoopa Prabhu 			rt = NULL;
43866b9ea5a6SRoopa Prabhu 			goto cleanup;
43878c5b83f0SRoopa Prabhu 		}
4388b5d2d75eSDavid Ahern 		if (!rt6_qualify_for_ecmp(rt)) {
4389b5d2d75eSDavid Ahern 			err = -EINVAL;
4390b5d2d75eSDavid Ahern 			NL_SET_ERR_MSG(extack,
4391b5d2d75eSDavid Ahern 				       "Device only routes can not be added for IPv6 using the multipath API.");
4392b5d2d75eSDavid Ahern 			fib6_info_release(rt);
4393b5d2d75eSDavid Ahern 			goto cleanup;
4394b5d2d75eSDavid Ahern 		}
43956b9ea5a6SRoopa Prabhu 
4396ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
4397398958aeSIdo Schimmel 
4398d4ead6b3SDavid Ahern 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4399d4ead6b3SDavid Ahern 					    rt, &r_cfg);
440051ebd318SNicolas Dichtel 		if (err) {
440193531c67SDavid Ahern 			fib6_info_release(rt);
44026b9ea5a6SRoopa Prabhu 			goto cleanup;
440351ebd318SNicolas Dichtel 		}
44046b9ea5a6SRoopa Prabhu 
44056b9ea5a6SRoopa Prabhu 		rtnh = rtnh_next(rtnh, &remaining);
440651ebd318SNicolas Dichtel 	}
44076b9ea5a6SRoopa Prabhu 
44083b1137feSDavid Ahern 	/* for add and replace send one notification with all nexthops.
44093b1137feSDavid Ahern 	 * Skip the notification in fib6_add_rt2node and send one with
44103b1137feSDavid Ahern 	 * the full route when done
44113b1137feSDavid Ahern 	 */
44123b1137feSDavid Ahern 	info->skip_notify = 1;
44133b1137feSDavid Ahern 
44146b9ea5a6SRoopa Prabhu 	err_nh = NULL;
44156b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
44168d1c802bSDavid Ahern 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
44178d1c802bSDavid Ahern 		fib6_info_release(nh->fib6_info);
44183b1137feSDavid Ahern 
4419f7225172SDavid Ahern 		if (!err) {
4420f7225172SDavid Ahern 			/* save reference to last route successfully inserted */
4421f7225172SDavid Ahern 			rt_last = nh->fib6_info;
4422f7225172SDavid Ahern 
44236b9ea5a6SRoopa Prabhu 			/* save reference to first route for notification */
4424f7225172SDavid Ahern 			if (!rt_notif)
44258d1c802bSDavid Ahern 				rt_notif = nh->fib6_info;
4426f7225172SDavid Ahern 		}
44276b9ea5a6SRoopa Prabhu 
44288d1c802bSDavid Ahern 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
44298d1c802bSDavid Ahern 		nh->fib6_info = NULL;
44306b9ea5a6SRoopa Prabhu 		if (err) {
44316b9ea5a6SRoopa Prabhu 			if (replace && nhn)
4432a5a82d84SJakub Kicinski 				NL_SET_ERR_MSG_MOD(extack,
4433a5a82d84SJakub Kicinski 						   "multipath route replace failed (check consistency of installed routes)");
44346b9ea5a6SRoopa Prabhu 			err_nh = nh;
44356b9ea5a6SRoopa Prabhu 			goto add_errout;
44366b9ea5a6SRoopa Prabhu 		}
44376b9ea5a6SRoopa Prabhu 
44381a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
443927596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
444027596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
444127596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
444227596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
444327596472SMichal Kubeček 		 * be added to it.
44441a72418bSNicolas Dichtel 		 */
444527596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
444627596472SMichal Kubeček 						     NLM_F_REPLACE);
44476b9ea5a6SRoopa Prabhu 		nhn++;
44486b9ea5a6SRoopa Prabhu 	}
44496b9ea5a6SRoopa Prabhu 
44503b1137feSDavid Ahern 	/* success ... tell user about new route */
44513b1137feSDavid Ahern 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
44526b9ea5a6SRoopa Prabhu 	goto cleanup;
44536b9ea5a6SRoopa Prabhu 
44546b9ea5a6SRoopa Prabhu add_errout:
44553b1137feSDavid Ahern 	/* send notification for routes that were added so that
44563b1137feSDavid Ahern 	 * the delete notifications sent by ip6_route_del are
44573b1137feSDavid Ahern 	 * coherent
44583b1137feSDavid Ahern 	 */
44593b1137feSDavid Ahern 	if (rt_notif)
44603b1137feSDavid Ahern 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
44613b1137feSDavid Ahern 
44626b9ea5a6SRoopa Prabhu 	/* Delete routes that were already added */
44636b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
44646b9ea5a6SRoopa Prabhu 		if (err_nh == nh)
44656b9ea5a6SRoopa Prabhu 			break;
4466333c4301SDavid Ahern 		ip6_route_del(&nh->r_cfg, extack);
44676b9ea5a6SRoopa Prabhu 	}
44686b9ea5a6SRoopa Prabhu 
44696b9ea5a6SRoopa Prabhu cleanup:
44706b9ea5a6SRoopa Prabhu 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
44718d1c802bSDavid Ahern 		if (nh->fib6_info)
44728d1c802bSDavid Ahern 			fib6_info_release(nh->fib6_info);
44736b9ea5a6SRoopa Prabhu 		list_del(&nh->next);
44746b9ea5a6SRoopa Prabhu 		kfree(nh);
44756b9ea5a6SRoopa Prabhu 	}
44766b9ea5a6SRoopa Prabhu 
44776b9ea5a6SRoopa Prabhu 	return err;
44786b9ea5a6SRoopa Prabhu }
44796b9ea5a6SRoopa Prabhu 
4480333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg,
4481333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
44826b9ea5a6SRoopa Prabhu {
44836b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
44846b9ea5a6SRoopa Prabhu 	struct rtnexthop *rtnh;
44856b9ea5a6SRoopa Prabhu 	int remaining;
44866b9ea5a6SRoopa Prabhu 	int attrlen;
44876b9ea5a6SRoopa Prabhu 	int err = 1, last_err = 0;
44886b9ea5a6SRoopa Prabhu 
44896b9ea5a6SRoopa Prabhu 	remaining = cfg->fc_mp_len;
44906b9ea5a6SRoopa Prabhu 	rtnh = (struct rtnexthop *)cfg->fc_mp;
44916b9ea5a6SRoopa Prabhu 
44926b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry */
44936b9ea5a6SRoopa Prabhu 	while (rtnh_ok(rtnh, remaining)) {
44946b9ea5a6SRoopa Prabhu 		memcpy(&r_cfg, cfg, sizeof(*cfg));
44956b9ea5a6SRoopa Prabhu 		if (rtnh->rtnh_ifindex)
44966b9ea5a6SRoopa Prabhu 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
44976b9ea5a6SRoopa Prabhu 
44986b9ea5a6SRoopa Prabhu 		attrlen = rtnh_attrlen(rtnh);
44996b9ea5a6SRoopa Prabhu 		if (attrlen > 0) {
45006b9ea5a6SRoopa Prabhu 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
45016b9ea5a6SRoopa Prabhu 
45026b9ea5a6SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
45036b9ea5a6SRoopa Prabhu 			if (nla) {
45046b9ea5a6SRoopa Prabhu 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
45056b9ea5a6SRoopa Prabhu 				r_cfg.fc_flags |= RTF_GATEWAY;
45066b9ea5a6SRoopa Prabhu 			}
45076b9ea5a6SRoopa Prabhu 		}
4508333c4301SDavid Ahern 		err = ip6_route_del(&r_cfg, extack);
45096b9ea5a6SRoopa Prabhu 		if (err)
45106b9ea5a6SRoopa Prabhu 			last_err = err;
45116b9ea5a6SRoopa Prabhu 
451251ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
451351ebd318SNicolas Dichtel 	}
451451ebd318SNicolas Dichtel 
451551ebd318SNicolas Dichtel 	return last_err;
451651ebd318SNicolas Dichtel }
451751ebd318SNicolas Dichtel 
4518c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4519c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
45201da177e4SLinus Torvalds {
452186872cb5SThomas Graf 	struct fib6_config cfg;
452286872cb5SThomas Graf 	int err;
45231da177e4SLinus Torvalds 
4524333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
452586872cb5SThomas Graf 	if (err < 0)
452686872cb5SThomas Graf 		return err;
452786872cb5SThomas Graf 
452851ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4529333c4301SDavid Ahern 		return ip6_route_multipath_del(&cfg, extack);
45300ae81335SDavid Ahern 	else {
45310ae81335SDavid Ahern 		cfg.fc_delete_all_nh = 1;
4532333c4301SDavid Ahern 		return ip6_route_del(&cfg, extack);
45331da177e4SLinus Torvalds 	}
45340ae81335SDavid Ahern }
45351da177e4SLinus Torvalds 
4536c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4537c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
45381da177e4SLinus Torvalds {
453986872cb5SThomas Graf 	struct fib6_config cfg;
454086872cb5SThomas Graf 	int err;
45411da177e4SLinus Torvalds 
4542333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
454386872cb5SThomas Graf 	if (err < 0)
454486872cb5SThomas Graf 		return err;
454586872cb5SThomas Graf 
454667f69513SDavid Ahern 	if (cfg.fc_metric == 0)
454767f69513SDavid Ahern 		cfg.fc_metric = IP6_RT_PRIO_USER;
454867f69513SDavid Ahern 
454951ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4550333c4301SDavid Ahern 		return ip6_route_multipath_add(&cfg, extack);
455151ebd318SNicolas Dichtel 	else
4552acb54e3cSDavid Ahern 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
45531da177e4SLinus Torvalds }
45541da177e4SLinus Torvalds 
45558d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt)
4556339bf98fSThomas Graf {
4557beb1afacSDavid Ahern 	int nexthop_len = 0;
4558beb1afacSDavid Ahern 
455993c2fb25SDavid Ahern 	if (rt->fib6_nsiblings) {
4560beb1afacSDavid Ahern 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4561beb1afacSDavid Ahern 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4562beb1afacSDavid Ahern 			    + nla_total_size(16) /* RTA_GATEWAY */
4563ad1601aeSDavid Ahern 			    + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
4564beb1afacSDavid Ahern 
456593c2fb25SDavid Ahern 		nexthop_len *= rt->fib6_nsiblings;
4566beb1afacSDavid Ahern 	}
4567beb1afacSDavid Ahern 
4568339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4569339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
4570339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
4571339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
4572339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
4573339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
4574339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
4575339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
4576339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
45776a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4578ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4579c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
458019e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
4581ad1601aeSDavid Ahern 	       + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
4582beb1afacSDavid Ahern 	       + nexthop_len;
4583beb1afacSDavid Ahern }
4584beb1afacSDavid Ahern 
4585572bf4ddSDavid Ahern static int rt6_nexthop_info(struct sk_buff *skb, const struct fib6_nh *fib6_nh,
45865be083ceSDavid Ahern 			    unsigned int *flags, bool skip_oif)
4587beb1afacSDavid Ahern {
4588ad1601aeSDavid Ahern 	if (fib6_nh->fib_nh_flags & RTNH_F_DEAD)
4589f9d882eaSIdo Schimmel 		*flags |= RTNH_F_DEAD;
4590f9d882eaSIdo Schimmel 
4591ad1601aeSDavid Ahern 	if (fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
4592beb1afacSDavid Ahern 		*flags |= RTNH_F_LINKDOWN;
4593dcd1f572SDavid Ahern 
4594dcd1f572SDavid Ahern 		rcu_read_lock();
4595ad1601aeSDavid Ahern 		if (ip6_ignore_linkdown(fib6_nh->fib_nh_dev))
4596beb1afacSDavid Ahern 			*flags |= RTNH_F_DEAD;
4597dcd1f572SDavid Ahern 		rcu_read_unlock();
4598beb1afacSDavid Ahern 	}
4599beb1afacSDavid Ahern 
4600572bf4ddSDavid Ahern 	if (fib6_nh->fib_nh_has_gw) {
4601ad1601aeSDavid Ahern 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &fib6_nh->fib_nh_gw6) < 0)
4602beb1afacSDavid Ahern 			goto nla_put_failure;
4603beb1afacSDavid Ahern 	}
4604beb1afacSDavid Ahern 
4605ad1601aeSDavid Ahern 	*flags |= (fib6_nh->fib_nh_flags & RTNH_F_ONLINK);
4606ad1601aeSDavid Ahern 	if (fib6_nh->fib_nh_flags & RTNH_F_OFFLOAD)
460761e4d01eSIdo Schimmel 		*flags |= RTNH_F_OFFLOAD;
460861e4d01eSIdo Schimmel 
46095be083ceSDavid Ahern 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
4610ad1601aeSDavid Ahern 	if (!skip_oif && fib6_nh->fib_nh_dev &&
4611ad1601aeSDavid Ahern 	    nla_put_u32(skb, RTA_OIF, fib6_nh->fib_nh_dev->ifindex))
4612beb1afacSDavid Ahern 		goto nla_put_failure;
4613beb1afacSDavid Ahern 
4614ad1601aeSDavid Ahern 	if (fib6_nh->fib_nh_lws &&
4615ad1601aeSDavid Ahern 	    lwtunnel_fill_encap(skb, fib6_nh->fib_nh_lws) < 0)
4616beb1afacSDavid Ahern 		goto nla_put_failure;
4617beb1afacSDavid Ahern 
4618beb1afacSDavid Ahern 	return 0;
4619beb1afacSDavid Ahern 
4620beb1afacSDavid Ahern nla_put_failure:
4621beb1afacSDavid Ahern 	return -EMSGSIZE;
4622beb1afacSDavid Ahern }
4623beb1afacSDavid Ahern 
46245be083ceSDavid Ahern /* add multipath next hop */
4625572bf4ddSDavid Ahern static int rt6_add_nexthop(struct sk_buff *skb, const struct fib6_nh *fib6_nh)
4626beb1afacSDavid Ahern {
4627ad1601aeSDavid Ahern 	const struct net_device *dev = fib6_nh->fib_nh_dev;
4628beb1afacSDavid Ahern 	struct rtnexthop *rtnh;
4629beb1afacSDavid Ahern 	unsigned int flags = 0;
4630beb1afacSDavid Ahern 
4631beb1afacSDavid Ahern 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4632beb1afacSDavid Ahern 	if (!rtnh)
4633beb1afacSDavid Ahern 		goto nla_put_failure;
4634beb1afacSDavid Ahern 
4635ad1601aeSDavid Ahern 	rtnh->rtnh_hops = fib6_nh->fib_nh_weight - 1;
46365e670d84SDavid Ahern 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4637beb1afacSDavid Ahern 
4638572bf4ddSDavid Ahern 	if (rt6_nexthop_info(skb, fib6_nh, &flags, true) < 0)
4639beb1afacSDavid Ahern 		goto nla_put_failure;
4640beb1afacSDavid Ahern 
4641beb1afacSDavid Ahern 	rtnh->rtnh_flags = flags;
4642beb1afacSDavid Ahern 
4643beb1afacSDavid Ahern 	/* length of rtnetlink header + attributes */
4644beb1afacSDavid Ahern 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4645beb1afacSDavid Ahern 
4646beb1afacSDavid Ahern 	return 0;
4647beb1afacSDavid Ahern 
4648beb1afacSDavid Ahern nla_put_failure:
4649beb1afacSDavid Ahern 	return -EMSGSIZE;
4650339bf98fSThomas Graf }
4651339bf98fSThomas Graf 
4652d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
46538d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
4654d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
465515e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
4656f8cfe2ceSDavid Ahern 			 unsigned int flags)
46571da177e4SLinus Torvalds {
465822d0bd82SXin Long 	struct rt6_info *rt6 = (struct rt6_info *)dst;
465922d0bd82SXin Long 	struct rt6key *rt6_dst, *rt6_src;
466022d0bd82SXin Long 	u32 *pmetrics, table, rt6_flags;
46611da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
466222d0bd82SXin Long 	struct rtmsg *rtm;
4663d4ead6b3SDavid Ahern 	long expires = 0;
46641da177e4SLinus Torvalds 
466515e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
466638308473SDavid S. Miller 	if (!nlh)
466726932566SPatrick McHardy 		return -EMSGSIZE;
46682d7202bfSThomas Graf 
466922d0bd82SXin Long 	if (rt6) {
467022d0bd82SXin Long 		rt6_dst = &rt6->rt6i_dst;
467122d0bd82SXin Long 		rt6_src = &rt6->rt6i_src;
467222d0bd82SXin Long 		rt6_flags = rt6->rt6i_flags;
467322d0bd82SXin Long 	} else {
467422d0bd82SXin Long 		rt6_dst = &rt->fib6_dst;
467522d0bd82SXin Long 		rt6_src = &rt->fib6_src;
467622d0bd82SXin Long 		rt6_flags = rt->fib6_flags;
467722d0bd82SXin Long 	}
467822d0bd82SXin Long 
46792d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
46801da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
468122d0bd82SXin Long 	rtm->rtm_dst_len = rt6_dst->plen;
468222d0bd82SXin Long 	rtm->rtm_src_len = rt6_src->plen;
46831da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
468493c2fb25SDavid Ahern 	if (rt->fib6_table)
468593c2fb25SDavid Ahern 		table = rt->fib6_table->tb6_id;
4686c71099acSThomas Graf 	else
46879e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
468897f0082aSKalash Nainwal 	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4689c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
4690c78679e8SDavid S. Miller 		goto nla_put_failure;
4691e8478e80SDavid Ahern 
4692e8478e80SDavid Ahern 	rtm->rtm_type = rt->fib6_type;
46931da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
46941da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
469593c2fb25SDavid Ahern 	rtm->rtm_protocol = rt->fib6_protocol;
46961da177e4SLinus Torvalds 
469722d0bd82SXin Long 	if (rt6_flags & RTF_CACHE)
46981da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
46991da177e4SLinus Torvalds 
4700d4ead6b3SDavid Ahern 	if (dest) {
4701d4ead6b3SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4702c78679e8SDavid S. Miller 			goto nla_put_failure;
47031da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
47041da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
470522d0bd82SXin Long 		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4706c78679e8SDavid S. Miller 			goto nla_put_failure;
47071da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
47081da177e4SLinus Torvalds 	if (src) {
4709930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4710c78679e8SDavid S. Miller 			goto nla_put_failure;
47111da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
4712c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
471322d0bd82SXin Long 		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4714c78679e8SDavid S. Miller 		goto nla_put_failure;
47151da177e4SLinus Torvalds #endif
47167bc570c8SYOSHIFUJI Hideaki 	if (iif) {
47177bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
471822d0bd82SXin Long 		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4719fd61c6baSDavid Ahern 			int err = ip6mr_get_route(net, skb, rtm, portid);
47202cf75070SNikolay Aleksandrov 
47217bc570c8SYOSHIFUJI Hideaki 			if (err == 0)
47227bc570c8SYOSHIFUJI Hideaki 				return 0;
4723fd61c6baSDavid Ahern 			if (err < 0)
47247bc570c8SYOSHIFUJI Hideaki 				goto nla_put_failure;
47257bc570c8SYOSHIFUJI Hideaki 		} else
47267bc570c8SYOSHIFUJI Hideaki #endif
4727c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
4728c78679e8SDavid S. Miller 				goto nla_put_failure;
4729d4ead6b3SDavid Ahern 	} else if (dest) {
47301da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
4731d4ead6b3SDavid Ahern 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4732930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4733c78679e8SDavid S. Miller 			goto nla_put_failure;
4734c3968a85SDaniel Walter 	}
4735c3968a85SDaniel Walter 
473693c2fb25SDavid Ahern 	if (rt->fib6_prefsrc.plen) {
4737c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
473893c2fb25SDavid Ahern 		saddr_buf = rt->fib6_prefsrc.addr;
4739930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4740c78679e8SDavid S. Miller 			goto nla_put_failure;
47411da177e4SLinus Torvalds 	}
47422d7202bfSThomas Graf 
4743d4ead6b3SDavid Ahern 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4744d4ead6b3SDavid Ahern 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
47452d7202bfSThomas Graf 		goto nla_put_failure;
47462d7202bfSThomas Graf 
474793c2fb25SDavid Ahern 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4748beb1afacSDavid Ahern 		goto nla_put_failure;
4749beb1afacSDavid Ahern 
4750beb1afacSDavid Ahern 	/* For multipath routes, walk the siblings list and add
4751beb1afacSDavid Ahern 	 * each as a nexthop within RTA_MULTIPATH.
4752beb1afacSDavid Ahern 	 */
475322d0bd82SXin Long 	if (rt6) {
475422d0bd82SXin Long 		if (rt6_flags & RTF_GATEWAY &&
475522d0bd82SXin Long 		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
475622d0bd82SXin Long 			goto nla_put_failure;
475722d0bd82SXin Long 
475822d0bd82SXin Long 		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
475922d0bd82SXin Long 			goto nla_put_failure;
476022d0bd82SXin Long 	} else if (rt->fib6_nsiblings) {
47618d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
4762beb1afacSDavid Ahern 		struct nlattr *mp;
4763beb1afacSDavid Ahern 
4764beb1afacSDavid Ahern 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4765beb1afacSDavid Ahern 		if (!mp)
4766beb1afacSDavid Ahern 			goto nla_put_failure;
4767beb1afacSDavid Ahern 
4768572bf4ddSDavid Ahern 		if (rt6_add_nexthop(skb, &rt->fib6_nh) < 0)
4769beb1afacSDavid Ahern 			goto nla_put_failure;
4770beb1afacSDavid Ahern 
4771beb1afacSDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
477293c2fb25SDavid Ahern 					 &rt->fib6_siblings, fib6_siblings) {
4773572bf4ddSDavid Ahern 			if (rt6_add_nexthop(skb, &sibling->fib6_nh) < 0)
477494f826b8SEric Dumazet 				goto nla_put_failure;
477594f826b8SEric Dumazet 		}
47762d7202bfSThomas Graf 
4777beb1afacSDavid Ahern 		nla_nest_end(skb, mp);
4778beb1afacSDavid Ahern 	} else {
4779572bf4ddSDavid Ahern 		if (rt6_nexthop_info(skb, &rt->fib6_nh, &rtm->rtm_flags,
4780572bf4ddSDavid Ahern 				     false) < 0)
4781c78679e8SDavid S. Miller 			goto nla_put_failure;
4782beb1afacSDavid Ahern 	}
47838253947eSLi Wei 
478422d0bd82SXin Long 	if (rt6_flags & RTF_EXPIRES) {
478514895687SDavid Ahern 		expires = dst ? dst->expires : rt->expires;
478614895687SDavid Ahern 		expires -= jiffies;
478714895687SDavid Ahern 	}
478869cdf8f9SYOSHIFUJI Hideaki 
4789d4ead6b3SDavid Ahern 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4790e3703b3dSThomas Graf 		goto nla_put_failure;
47911da177e4SLinus Torvalds 
479222d0bd82SXin Long 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4793c78ba6d6SLubomir Rintel 		goto nla_put_failure;
4794c78ba6d6SLubomir Rintel 
479519e42e45SRoopa Prabhu 
4796053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
4797053c095aSJohannes Berg 	return 0;
47982d7202bfSThomas Graf 
47992d7202bfSThomas Graf nla_put_failure:
480026932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
480126932566SPatrick McHardy 	return -EMSGSIZE;
48021da177e4SLinus Torvalds }
48031da177e4SLinus Torvalds 
480413e38901SDavid Ahern static bool fib6_info_uses_dev(const struct fib6_info *f6i,
480513e38901SDavid Ahern 			       const struct net_device *dev)
480613e38901SDavid Ahern {
4807ad1601aeSDavid Ahern 	if (f6i->fib6_nh.fib_nh_dev == dev)
480813e38901SDavid Ahern 		return true;
480913e38901SDavid Ahern 
481013e38901SDavid Ahern 	if (f6i->fib6_nsiblings) {
481113e38901SDavid Ahern 		struct fib6_info *sibling, *next_sibling;
481213e38901SDavid Ahern 
481313e38901SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
481413e38901SDavid Ahern 					 &f6i->fib6_siblings, fib6_siblings) {
4815ad1601aeSDavid Ahern 			if (sibling->fib6_nh.fib_nh_dev == dev)
481613e38901SDavid Ahern 				return true;
481713e38901SDavid Ahern 		}
481813e38901SDavid Ahern 	}
481913e38901SDavid Ahern 
482013e38901SDavid Ahern 	return false;
482113e38901SDavid Ahern }
482213e38901SDavid Ahern 
48238d1c802bSDavid Ahern int rt6_dump_route(struct fib6_info *rt, void *p_arg)
48241da177e4SLinus Torvalds {
48251da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
482613e38901SDavid Ahern 	struct fib_dump_filter *filter = &arg->filter;
482713e38901SDavid Ahern 	unsigned int flags = NLM_F_MULTI;
48281f17e2f2SDavid Ahern 	struct net *net = arg->net;
48291f17e2f2SDavid Ahern 
4830421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
48311f17e2f2SDavid Ahern 		return 0;
48321da177e4SLinus Torvalds 
483313e38901SDavid Ahern 	if ((filter->flags & RTM_F_PREFIX) &&
483493c2fb25SDavid Ahern 	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4835f8cfe2ceSDavid Ahern 		/* success since this is not a prefix route */
4836f8cfe2ceSDavid Ahern 		return 1;
4837f8cfe2ceSDavid Ahern 	}
483813e38901SDavid Ahern 	if (filter->filter_set) {
483913e38901SDavid Ahern 		if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
484013e38901SDavid Ahern 		    (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
484113e38901SDavid Ahern 		    (filter->protocol && rt->fib6_protocol != filter->protocol)) {
484213e38901SDavid Ahern 			return 1;
484313e38901SDavid Ahern 		}
484413e38901SDavid Ahern 		flags |= NLM_F_DUMP_FILTERED;
4845f8cfe2ceSDavid Ahern 	}
48461da177e4SLinus Torvalds 
4847d4ead6b3SDavid Ahern 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4848d4ead6b3SDavid Ahern 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
484913e38901SDavid Ahern 			     arg->cb->nlh->nlmsg_seq, flags);
48501da177e4SLinus Torvalds }
48511da177e4SLinus Torvalds 
48520eff0a27SJakub Kicinski static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
48530eff0a27SJakub Kicinski 					const struct nlmsghdr *nlh,
48540eff0a27SJakub Kicinski 					struct nlattr **tb,
48550eff0a27SJakub Kicinski 					struct netlink_ext_ack *extack)
48560eff0a27SJakub Kicinski {
48570eff0a27SJakub Kicinski 	struct rtmsg *rtm;
48580eff0a27SJakub Kicinski 	int i, err;
48590eff0a27SJakub Kicinski 
48600eff0a27SJakub Kicinski 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
48610eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48620eff0a27SJakub Kicinski 				   "Invalid header for get route request");
48630eff0a27SJakub Kicinski 		return -EINVAL;
48640eff0a27SJakub Kicinski 	}
48650eff0a27SJakub Kicinski 
48660eff0a27SJakub Kicinski 	if (!netlink_strict_get_check(skb))
48670eff0a27SJakub Kicinski 		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
48680eff0a27SJakub Kicinski 				   rtm_ipv6_policy, extack);
48690eff0a27SJakub Kicinski 
48700eff0a27SJakub Kicinski 	rtm = nlmsg_data(nlh);
48710eff0a27SJakub Kicinski 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
48720eff0a27SJakub Kicinski 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
48730eff0a27SJakub Kicinski 	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
48740eff0a27SJakub Kicinski 	    rtm->rtm_type) {
48750eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
48760eff0a27SJakub Kicinski 		return -EINVAL;
48770eff0a27SJakub Kicinski 	}
48780eff0a27SJakub Kicinski 	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
48790eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48800eff0a27SJakub Kicinski 				   "Invalid flags for get route request");
48810eff0a27SJakub Kicinski 		return -EINVAL;
48820eff0a27SJakub Kicinski 	}
48830eff0a27SJakub Kicinski 
48840eff0a27SJakub Kicinski 	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
48850eff0a27SJakub Kicinski 				 rtm_ipv6_policy, extack);
48860eff0a27SJakub Kicinski 	if (err)
48870eff0a27SJakub Kicinski 		return err;
48880eff0a27SJakub Kicinski 
48890eff0a27SJakub Kicinski 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
48900eff0a27SJakub Kicinski 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
48910eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
48920eff0a27SJakub Kicinski 		return -EINVAL;
48930eff0a27SJakub Kicinski 	}
48940eff0a27SJakub Kicinski 
48950eff0a27SJakub Kicinski 	for (i = 0; i <= RTA_MAX; i++) {
48960eff0a27SJakub Kicinski 		if (!tb[i])
48970eff0a27SJakub Kicinski 			continue;
48980eff0a27SJakub Kicinski 
48990eff0a27SJakub Kicinski 		switch (i) {
49000eff0a27SJakub Kicinski 		case RTA_SRC:
49010eff0a27SJakub Kicinski 		case RTA_DST:
49020eff0a27SJakub Kicinski 		case RTA_IIF:
49030eff0a27SJakub Kicinski 		case RTA_OIF:
49040eff0a27SJakub Kicinski 		case RTA_MARK:
49050eff0a27SJakub Kicinski 		case RTA_UID:
49060eff0a27SJakub Kicinski 		case RTA_SPORT:
49070eff0a27SJakub Kicinski 		case RTA_DPORT:
49080eff0a27SJakub Kicinski 		case RTA_IP_PROTO:
49090eff0a27SJakub Kicinski 			break;
49100eff0a27SJakub Kicinski 		default:
49110eff0a27SJakub Kicinski 			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
49120eff0a27SJakub Kicinski 			return -EINVAL;
49130eff0a27SJakub Kicinski 		}
49140eff0a27SJakub Kicinski 	}
49150eff0a27SJakub Kicinski 
49160eff0a27SJakub Kicinski 	return 0;
49170eff0a27SJakub Kicinski }
49180eff0a27SJakub Kicinski 
4919c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4920c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
49211da177e4SLinus Torvalds {
49223b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
4923ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
492418c3a61cSRoopa Prabhu 	int err, iif = 0, oif = 0;
4925a68886a6SDavid Ahern 	struct fib6_info *from;
492618c3a61cSRoopa Prabhu 	struct dst_entry *dst;
49271da177e4SLinus Torvalds 	struct rt6_info *rt;
4928ab364a6fSThomas Graf 	struct sk_buff *skb;
4929ab364a6fSThomas Graf 	struct rtmsg *rtm;
4930744486d4SMaciej Żenczykowski 	struct flowi6 fl6 = {};
493118c3a61cSRoopa Prabhu 	bool fibmatch;
4932ab364a6fSThomas Graf 
49330eff0a27SJakub Kicinski 	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4934ab364a6fSThomas Graf 	if (err < 0)
4935ab364a6fSThomas Graf 		goto errout;
4936ab364a6fSThomas Graf 
4937ab364a6fSThomas Graf 	err = -EINVAL;
493838b7097bSHannes Frederic Sowa 	rtm = nlmsg_data(nlh);
493938b7097bSHannes Frederic Sowa 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
494018c3a61cSRoopa Prabhu 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4941ab364a6fSThomas Graf 
4942ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
4943ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4944ab364a6fSThomas Graf 			goto errout;
4945ab364a6fSThomas Graf 
49464e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4947ab364a6fSThomas Graf 	}
4948ab364a6fSThomas Graf 
4949ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
4950ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4951ab364a6fSThomas Graf 			goto errout;
4952ab364a6fSThomas Graf 
49534e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4954ab364a6fSThomas Graf 	}
4955ab364a6fSThomas Graf 
4956ab364a6fSThomas Graf 	if (tb[RTA_IIF])
4957ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
4958ab364a6fSThomas Graf 
4959ab364a6fSThomas Graf 	if (tb[RTA_OIF])
496072331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
4961ab364a6fSThomas Graf 
49622e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
49632e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
49642e47b291SLorenzo Colitti 
4965622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
4966622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4967622ec2c9SLorenzo Colitti 					   nla_get_u32(tb[RTA_UID]));
4968622ec2c9SLorenzo Colitti 	else
4969622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4970622ec2c9SLorenzo Colitti 
4971eacb9384SRoopa Prabhu 	if (tb[RTA_SPORT])
4972eacb9384SRoopa Prabhu 		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4973eacb9384SRoopa Prabhu 
4974eacb9384SRoopa Prabhu 	if (tb[RTA_DPORT])
4975eacb9384SRoopa Prabhu 		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4976eacb9384SRoopa Prabhu 
4977eacb9384SRoopa Prabhu 	if (tb[RTA_IP_PROTO]) {
4978eacb9384SRoopa Prabhu 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
49795e1a99eaSHangbin Liu 						  &fl6.flowi6_proto, AF_INET6,
49805e1a99eaSHangbin Liu 						  extack);
4981eacb9384SRoopa Prabhu 		if (err)
4982eacb9384SRoopa Prabhu 			goto errout;
4983eacb9384SRoopa Prabhu 	}
4984eacb9384SRoopa Prabhu 
4985ab364a6fSThomas Graf 	if (iif) {
4986ab364a6fSThomas Graf 		struct net_device *dev;
498772331bc0SShmulik Ladkani 		int flags = 0;
498872331bc0SShmulik Ladkani 
4989121622dbSFlorian Westphal 		rcu_read_lock();
4990121622dbSFlorian Westphal 
4991121622dbSFlorian Westphal 		dev = dev_get_by_index_rcu(net, iif);
4992ab364a6fSThomas Graf 		if (!dev) {
4993121622dbSFlorian Westphal 			rcu_read_unlock();
4994ab364a6fSThomas Graf 			err = -ENODEV;
4995ab364a6fSThomas Graf 			goto errout;
4996ab364a6fSThomas Graf 		}
499772331bc0SShmulik Ladkani 
499872331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
499972331bc0SShmulik Ladkani 
500072331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
500172331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
500272331bc0SShmulik Ladkani 
5003b75cc8f9SDavid Ahern 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5004121622dbSFlorian Westphal 
5005121622dbSFlorian Westphal 		rcu_read_unlock();
500672331bc0SShmulik Ladkani 	} else {
500772331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
500872331bc0SShmulik Ladkani 
500918c3a61cSRoopa Prabhu 		dst = ip6_route_output(net, NULL, &fl6);
501018c3a61cSRoopa Prabhu 	}
501118c3a61cSRoopa Prabhu 
501218c3a61cSRoopa Prabhu 
501318c3a61cSRoopa Prabhu 	rt = container_of(dst, struct rt6_info, dst);
501418c3a61cSRoopa Prabhu 	if (rt->dst.error) {
501518c3a61cSRoopa Prabhu 		err = rt->dst.error;
501618c3a61cSRoopa Prabhu 		ip6_rt_put(rt);
501718c3a61cSRoopa Prabhu 		goto errout;
5018ab364a6fSThomas Graf 	}
50191da177e4SLinus Torvalds 
50209d6acb3bSWANG Cong 	if (rt == net->ipv6.ip6_null_entry) {
50219d6acb3bSWANG Cong 		err = rt->dst.error;
50229d6acb3bSWANG Cong 		ip6_rt_put(rt);
50239d6acb3bSWANG Cong 		goto errout;
50249d6acb3bSWANG Cong 	}
50259d6acb3bSWANG Cong 
50261da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
502738308473SDavid S. Miller 	if (!skb) {
502894e187c0SAmerigo Wang 		ip6_rt_put(rt);
5029ab364a6fSThomas Graf 		err = -ENOBUFS;
5030ab364a6fSThomas Graf 		goto errout;
5031ab364a6fSThomas Graf 	}
50321da177e4SLinus Torvalds 
5033d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
5034a68886a6SDavid Ahern 
5035a68886a6SDavid Ahern 	rcu_read_lock();
5036a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
5037a68886a6SDavid Ahern 
503818c3a61cSRoopa Prabhu 	if (fibmatch)
5039a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
504018c3a61cSRoopa Prabhu 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
504118c3a61cSRoopa Prabhu 				    nlh->nlmsg_seq, 0);
504218c3a61cSRoopa Prabhu 	else
5043a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5044a68886a6SDavid Ahern 				    &fl6.saddr, iif, RTM_NEWROUTE,
5045d4ead6b3SDavid Ahern 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5046d4ead6b3SDavid Ahern 				    0);
5047a68886a6SDavid Ahern 	rcu_read_unlock();
5048a68886a6SDavid Ahern 
50491da177e4SLinus Torvalds 	if (err < 0) {
5050ab364a6fSThomas Graf 		kfree_skb(skb);
5051ab364a6fSThomas Graf 		goto errout;
50521da177e4SLinus Torvalds 	}
50531da177e4SLinus Torvalds 
505415e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5055ab364a6fSThomas Graf errout:
50561da177e4SLinus Torvalds 	return err;
50571da177e4SLinus Torvalds }
50581da177e4SLinus Torvalds 
50598d1c802bSDavid Ahern void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
506037a1d361SRoopa Prabhu 		     unsigned int nlm_flags)
50611da177e4SLinus Torvalds {
50621da177e4SLinus Torvalds 	struct sk_buff *skb;
50635578689aSDaniel Lezcano 	struct net *net = info->nl_net;
5064528c4cebSDenis V. Lunev 	u32 seq;
5065528c4cebSDenis V. Lunev 	int err;
50660d51aa80SJamal Hadi Salim 
5067528c4cebSDenis V. Lunev 	err = -ENOBUFS;
506838308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
506986872cb5SThomas Graf 
507019e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
507138308473SDavid S. Miller 	if (!skb)
507221713ebcSThomas Graf 		goto errout;
50731da177e4SLinus Torvalds 
5074d4ead6b3SDavid Ahern 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5075f8cfe2ceSDavid Ahern 			    event, info->portid, seq, nlm_flags);
507626932566SPatrick McHardy 	if (err < 0) {
507726932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
507826932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
507926932566SPatrick McHardy 		kfree_skb(skb);
508026932566SPatrick McHardy 		goto errout;
508126932566SPatrick McHardy 	}
508215e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
50835578689aSDaniel Lezcano 		    info->nlh, gfp_any());
50841ce85fe4SPablo Neira Ayuso 	return;
508521713ebcSThomas Graf errout:
508621713ebcSThomas Graf 	if (err < 0)
50875578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
50881da177e4SLinus Torvalds }
50891da177e4SLinus Torvalds 
50908ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
5091351638e7SJiri Pirko 				unsigned long event, void *ptr)
50928ed67789SDaniel Lezcano {
5093351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5094c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
50958ed67789SDaniel Lezcano 
5096242d3a49SWANG Cong 	if (!(dev->flags & IFF_LOOPBACK))
5097242d3a49SWANG Cong 		return NOTIFY_OK;
5098242d3a49SWANG Cong 
5099242d3a49SWANG Cong 	if (event == NETDEV_REGISTER) {
5100ad1601aeSDavid Ahern 		net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
5101d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
51028ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
51038ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5104d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
51058ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5106d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
51078ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
51088ed67789SDaniel Lezcano #endif
510976da0704SWANG Cong 	 } else if (event == NETDEV_UNREGISTER &&
511076da0704SWANG Cong 		    dev->reg_state != NETREG_UNREGISTERED) {
511176da0704SWANG Cong 		/* NETDEV_UNREGISTER could be fired for multiple times by
511276da0704SWANG Cong 		 * netdev_wait_allrefs(). Make sure we only call this once.
511376da0704SWANG Cong 		 */
511412d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5115242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES
511612d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
511712d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5118242d3a49SWANG Cong #endif
51198ed67789SDaniel Lezcano 	}
51208ed67789SDaniel Lezcano 
51218ed67789SDaniel Lezcano 	return NOTIFY_OK;
51228ed67789SDaniel Lezcano }
51238ed67789SDaniel Lezcano 
51241da177e4SLinus Torvalds /*
51251da177e4SLinus Torvalds  *	/proc
51261da177e4SLinus Torvalds  */
51271da177e4SLinus Torvalds 
51281da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
51291da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
51301da177e4SLinus Torvalds {
513169ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
51321da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
513369ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
513469ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
513581eb8447SWei Wang 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
513669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
513769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
5138fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
513969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
51401da177e4SLinus Torvalds 
51411da177e4SLinus Torvalds 	return 0;
51421da177e4SLinus Torvalds }
51431da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
51441da177e4SLinus Torvalds 
51451da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
51461da177e4SLinus Torvalds 
51471da177e4SLinus Torvalds static
5148fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
51491da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
51501da177e4SLinus Torvalds {
5151c486da34SLucian Adrian Grijincu 	struct net *net;
5152c486da34SLucian Adrian Grijincu 	int delay;
5153f0fb9b28SAditya Pakki 	int ret;
5154c486da34SLucian Adrian Grijincu 	if (!write)
5155c486da34SLucian Adrian Grijincu 		return -EINVAL;
5156c486da34SLucian Adrian Grijincu 
5157c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
5158c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
5159f0fb9b28SAditya Pakki 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5160f0fb9b28SAditya Pakki 	if (ret)
5161f0fb9b28SAditya Pakki 		return ret;
5162f0fb9b28SAditya Pakki 
51632ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
51641da177e4SLinus Torvalds 	return 0;
51651da177e4SLinus Torvalds }
51661da177e4SLinus Torvalds 
51677c6bb7d2SDavid Ahern static int zero;
51687c6bb7d2SDavid Ahern static int one = 1;
51697c6bb7d2SDavid Ahern 
5170ed792e28SDavid Ahern static struct ctl_table ipv6_route_table_template[] = {
51711da177e4SLinus Torvalds 	{
51721da177e4SLinus Torvalds 		.procname	=	"flush",
51734990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
51741da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
517589c8b3a1SDave Jones 		.mode		=	0200,
51766d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
51771da177e4SLinus Torvalds 	},
51781da177e4SLinus Torvalds 	{
51791da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
51809a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
51811da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51821da177e4SLinus Torvalds 		.mode		=	0644,
51836d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
51841da177e4SLinus Torvalds 	},
51851da177e4SLinus Torvalds 	{
51861da177e4SLinus Torvalds 		.procname	=	"max_size",
51874990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
51881da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51891da177e4SLinus Torvalds 		.mode		=	0644,
51906d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
51911da177e4SLinus Torvalds 	},
51921da177e4SLinus Torvalds 	{
51931da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
51944990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
51951da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51961da177e4SLinus Torvalds 		.mode		=	0644,
51976d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51981da177e4SLinus Torvalds 	},
51991da177e4SLinus Torvalds 	{
52001da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
52014990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
52021da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52031da177e4SLinus Torvalds 		.mode		=	0644,
52046d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52051da177e4SLinus Torvalds 	},
52061da177e4SLinus Torvalds 	{
52071da177e4SLinus Torvalds 		.procname	=	"gc_interval",
52084990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
52091da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52101da177e4SLinus Torvalds 		.mode		=	0644,
52116d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52121da177e4SLinus Torvalds 	},
52131da177e4SLinus Torvalds 	{
52141da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
52154990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
52161da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52171da177e4SLinus Torvalds 		.mode		=	0644,
5218f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
52191da177e4SLinus Torvalds 	},
52201da177e4SLinus Torvalds 	{
52211da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
52224990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
52231da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52241da177e4SLinus Torvalds 		.mode		=	0644,
52256d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52261da177e4SLinus Torvalds 	},
52271da177e4SLinus Torvalds 	{
52281da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
52294990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
52301da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52311da177e4SLinus Torvalds 		.mode		=	0644,
5232f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
52331da177e4SLinus Torvalds 	},
52341da177e4SLinus Torvalds 	{
52351da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
52364990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
52371da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52381da177e4SLinus Torvalds 		.mode		=	0644,
52396d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
52401da177e4SLinus Torvalds 	},
52417c6bb7d2SDavid Ahern 	{
52427c6bb7d2SDavid Ahern 		.procname	=	"skip_notify_on_dev_down",
52437c6bb7d2SDavid Ahern 		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
52447c6bb7d2SDavid Ahern 		.maxlen		=	sizeof(int),
52457c6bb7d2SDavid Ahern 		.mode		=	0644,
52467c6bb7d2SDavid Ahern 		.proc_handler	=	proc_dointvec,
52477c6bb7d2SDavid Ahern 		.extra1		=	&zero,
52487c6bb7d2SDavid Ahern 		.extra2		=	&one,
52497c6bb7d2SDavid Ahern 	},
5250f8572d8fSEric W. Biederman 	{ }
52511da177e4SLinus Torvalds };
52521da177e4SLinus Torvalds 
52532c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5254760f2d01SDaniel Lezcano {
5255760f2d01SDaniel Lezcano 	struct ctl_table *table;
5256760f2d01SDaniel Lezcano 
5257760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
5258760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
5259760f2d01SDaniel Lezcano 			GFP_KERNEL);
52605ee09105SYOSHIFUJI Hideaki 
52615ee09105SYOSHIFUJI Hideaki 	if (table) {
52625ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
5263c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
526486393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
52655ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
52665ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52675ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
52685ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
52695ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
52705ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
52715ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
52729c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52737c6bb7d2SDavid Ahern 		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5274464dc801SEric W. Biederman 
5275464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
5276464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
5277464dc801SEric W. Biederman 			table[0].procname = NULL;
52785ee09105SYOSHIFUJI Hideaki 	}
52795ee09105SYOSHIFUJI Hideaki 
5280760f2d01SDaniel Lezcano 	return table;
5281760f2d01SDaniel Lezcano }
52821da177e4SLinus Torvalds #endif
52831da177e4SLinus Torvalds 
52842c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
5285cdb18761SDaniel Lezcano {
5286633d424bSPavel Emelyanov 	int ret = -ENOMEM;
52878ed67789SDaniel Lezcano 
528886393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
528986393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
5290f2fc6a54SBenjamin Thery 
5291fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5292fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
5293fc66f95cSEric Dumazet 
5294421842edSDavid Ahern 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5295421842edSDavid Ahern 					    sizeof(*net->ipv6.fib6_null_entry),
5296421842edSDavid Ahern 					    GFP_KERNEL);
5297421842edSDavid Ahern 	if (!net->ipv6.fib6_null_entry)
5298421842edSDavid Ahern 		goto out_ip6_dst_entries;
5299421842edSDavid Ahern 
53008ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
53018ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
53028ed67789SDaniel Lezcano 					   GFP_KERNEL);
53038ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
5304421842edSDavid Ahern 		goto out_fib6_null_entry;
5305d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
530662fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
530762fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
53088ed67789SDaniel Lezcano 
53098ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5310feca7d8cSVincent Bernat 	net->ipv6.fib6_has_custom_rules = false;
53118ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
53128ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
53138ed67789SDaniel Lezcano 					       GFP_KERNEL);
531468fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
531568fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
5316d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
531762fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
531862fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
53198ed67789SDaniel Lezcano 
53208ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
53218ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
53228ed67789SDaniel Lezcano 					       GFP_KERNEL);
532368fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
532468fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
5325d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
532662fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
532762fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
53288ed67789SDaniel Lezcano #endif
53298ed67789SDaniel Lezcano 
5330b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
5331b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5332b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5333b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5334b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5335b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5336b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5337b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
53387c6bb7d2SDavid Ahern 	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5339b339a47cSPeter Zijlstra 
53406891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
53416891a346SBenjamin Thery 
53428ed67789SDaniel Lezcano 	ret = 0;
53438ed67789SDaniel Lezcano out:
53448ed67789SDaniel Lezcano 	return ret;
5345f2fc6a54SBenjamin Thery 
534668fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
534768fffc67SPeter Zijlstra out_ip6_prohibit_entry:
534868fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
534968fffc67SPeter Zijlstra out_ip6_null_entry:
535068fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
535168fffc67SPeter Zijlstra #endif
5352421842edSDavid Ahern out_fib6_null_entry:
5353421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
5354fc66f95cSEric Dumazet out_ip6_dst_entries:
5355fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5356f2fc6a54SBenjamin Thery out_ip6_dst_ops:
5357f2fc6a54SBenjamin Thery 	goto out;
5358cdb18761SDaniel Lezcano }
5359cdb18761SDaniel Lezcano 
53602c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
5361cdb18761SDaniel Lezcano {
5362421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
53638ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
53648ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
53658ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
53668ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
53678ed67789SDaniel Lezcano #endif
536841bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5369cdb18761SDaniel Lezcano }
5370cdb18761SDaniel Lezcano 
5371d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
5372d189634eSThomas Graf {
5373d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5374c3506372SChristoph Hellwig 	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5375c3506372SChristoph Hellwig 			sizeof(struct ipv6_route_iter));
53763617d949SChristoph Hellwig 	proc_create_net_single("rt6_stats", 0444, net->proc_net,
53773617d949SChristoph Hellwig 			rt6_stats_seq_show, NULL);
5378d189634eSThomas Graf #endif
5379d189634eSThomas Graf 	return 0;
5380d189634eSThomas Graf }
5381d189634eSThomas Graf 
5382d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
5383d189634eSThomas Graf {
5384d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5385ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
5386ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
5387d189634eSThomas Graf #endif
5388d189634eSThomas Graf }
5389d189634eSThomas Graf 
5390cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
5391cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
5392cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
5393cdb18761SDaniel Lezcano };
5394cdb18761SDaniel Lezcano 
5395c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
5396c3426b47SDavid S. Miller {
5397c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5398c3426b47SDavid S. Miller 
5399c3426b47SDavid S. Miller 	if (!bp)
5400c3426b47SDavid S. Miller 		return -ENOMEM;
5401c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
5402c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
5403c3426b47SDavid S. Miller 	return 0;
5404c3426b47SDavid S. Miller }
5405c3426b47SDavid S. Miller 
5406c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
5407c3426b47SDavid S. Miller {
5408c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
5409c3426b47SDavid S. Miller 
5410c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
541156a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
5412c3426b47SDavid S. Miller 	kfree(bp);
5413c3426b47SDavid S. Miller }
5414c3426b47SDavid S. Miller 
54152b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
5416c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
5417c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
5418c3426b47SDavid S. Miller };
5419c3426b47SDavid S. Miller 
5420d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
5421d189634eSThomas Graf 	.init = ip6_route_net_init_late,
5422d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
5423d189634eSThomas Graf };
5424d189634eSThomas Graf 
54258ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
54268ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
5427242d3a49SWANG Cong 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
54288ed67789SDaniel Lezcano };
54298ed67789SDaniel Lezcano 
54302f460933SWANG Cong void __init ip6_route_init_special_entries(void)
54312f460933SWANG Cong {
54322f460933SWANG Cong 	/* Registering of the loopback is done before this portion of code,
54332f460933SWANG Cong 	 * the loopback reference in rt6_info will not be taken, do it
54342f460933SWANG Cong 	 * manually for init_net */
5435ad1601aeSDavid Ahern 	init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
54362f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
54372f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54382f460933SWANG Cong   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
54392f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
54402f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54412f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
54422f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54432f460933SWANG Cong   #endif
54442f460933SWANG Cong }
54452f460933SWANG Cong 
5446433d49c3SDaniel Lezcano int __init ip6_route_init(void)
54471da177e4SLinus Torvalds {
5448433d49c3SDaniel Lezcano 	int ret;
54498d0b94afSMartin KaFai Lau 	int cpu;
5450433d49c3SDaniel Lezcano 
54519a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
54529a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
54539a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
54549a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
54559a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
5456c19a28e1SFernando Carrijo 		goto out;
545714e50e57SDavid S. Miller 
5458fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
54598ed67789SDaniel Lezcano 	if (ret)
5460bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
5461bdb3289fSDaniel Lezcano 
5462c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5463c3426b47SDavid S. Miller 	if (ret)
5464e8803b6cSDavid S. Miller 		goto out_dst_entries;
54652a0c451aSThomas Graf 
54667e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
54677e52b33bSDavid S. Miller 	if (ret)
54687e52b33bSDavid S. Miller 		goto out_register_inetpeer;
5469c3426b47SDavid S. Miller 
54705dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
54715dc121e9SArnaud Ebalard 
5472e8803b6cSDavid S. Miller 	ret = fib6_init();
5473433d49c3SDaniel Lezcano 	if (ret)
54748ed67789SDaniel Lezcano 		goto out_register_subsys;
5475433d49c3SDaniel Lezcano 
5476433d49c3SDaniel Lezcano 	ret = xfrm6_init();
5477433d49c3SDaniel Lezcano 	if (ret)
5478e8803b6cSDavid S. Miller 		goto out_fib6_init;
5479c35b7e72SDaniel Lezcano 
5480433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
5481433d49c3SDaniel Lezcano 	if (ret)
5482433d49c3SDaniel Lezcano 		goto xfrm6_init;
54837e5449c2SDaniel Lezcano 
5484d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5485d189634eSThomas Graf 	if (ret)
5486d189634eSThomas Graf 		goto fib6_rules_init;
5487d189634eSThomas Graf 
548816feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
548916feebcfSFlorian Westphal 				   inet6_rtm_newroute, NULL, 0);
549016feebcfSFlorian Westphal 	if (ret < 0)
549116feebcfSFlorian Westphal 		goto out_register_late_subsys;
549216feebcfSFlorian Westphal 
549316feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
549416feebcfSFlorian Westphal 				   inet6_rtm_delroute, NULL, 0);
549516feebcfSFlorian Westphal 	if (ret < 0)
549616feebcfSFlorian Westphal 		goto out_register_late_subsys;
549716feebcfSFlorian Westphal 
549816feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
549916feebcfSFlorian Westphal 				   inet6_rtm_getroute, NULL,
550016feebcfSFlorian Westphal 				   RTNL_FLAG_DOIT_UNLOCKED);
550116feebcfSFlorian Westphal 	if (ret < 0)
5502d189634eSThomas Graf 		goto out_register_late_subsys;
5503433d49c3SDaniel Lezcano 
55048ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5505cdb18761SDaniel Lezcano 	if (ret)
5506d189634eSThomas Graf 		goto out_register_late_subsys;
55078ed67789SDaniel Lezcano 
55088d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
55098d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
55108d0b94afSMartin KaFai Lau 
55118d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
55128d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
55138d0b94afSMartin KaFai Lau 	}
55148d0b94afSMartin KaFai Lau 
5515433d49c3SDaniel Lezcano out:
5516433d49c3SDaniel Lezcano 	return ret;
5517433d49c3SDaniel Lezcano 
5518d189634eSThomas Graf out_register_late_subsys:
551916feebcfSFlorian Westphal 	rtnl_unregister_all(PF_INET6);
5520d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5521433d49c3SDaniel Lezcano fib6_rules_init:
5522433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
5523433d49c3SDaniel Lezcano xfrm6_init:
5524433d49c3SDaniel Lezcano 	xfrm6_fini();
55252a0c451aSThomas Graf out_fib6_init:
55262a0c451aSThomas Graf 	fib6_gc_cleanup();
55278ed67789SDaniel Lezcano out_register_subsys:
55288ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
55297e52b33bSDavid S. Miller out_register_inetpeer:
55307e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5531fc66f95cSEric Dumazet out_dst_entries:
5532fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5533433d49c3SDaniel Lezcano out_kmem_cache:
5534f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5535433d49c3SDaniel Lezcano 	goto out;
55361da177e4SLinus Torvalds }
55371da177e4SLinus Torvalds 
55381da177e4SLinus Torvalds void ip6_route_cleanup(void)
55391da177e4SLinus Torvalds {
55408ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5541d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5542101367c2SThomas Graf 	fib6_rules_cleanup();
55431da177e4SLinus Torvalds 	xfrm6_fini();
55441da177e4SLinus Torvalds 	fib6_gc_cleanup();
5545c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
55468ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
554741bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5548f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
55491da177e4SLinus Torvalds }
5550