xref: /openbmc/linux/net/ipv6/route.c (revision 7e4b5128757397132ffff1d7b1be9f992e9cd9f2)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
4735732d01SWei Wang #include <linux/jhash.h>
48457c4cbcSEric W. Biederman #include <net/net_namespace.h>
491da177e4SLinus Torvalds #include <net/snmp.h>
501da177e4SLinus Torvalds #include <net/ipv6.h>
511da177e4SLinus Torvalds #include <net/ip6_fib.h>
521da177e4SLinus Torvalds #include <net/ip6_route.h>
531da177e4SLinus Torvalds #include <net/ndisc.h>
541da177e4SLinus Torvalds #include <net/addrconf.h>
551da177e4SLinus Torvalds #include <net/tcp.h>
561da177e4SLinus Torvalds #include <linux/rtnetlink.h>
571da177e4SLinus Torvalds #include <net/dst.h>
58904af04dSJiri Benc #include <net/dst_metadata.h>
591da177e4SLinus Torvalds #include <net/xfrm.h>
608d71740cSTom Tucker #include <net/netevent.h>
6121713ebcSThomas Graf #include <net/netlink.h>
6251ebd318SNicolas Dichtel #include <net/nexthop.h>
6319e42e45SRoopa Prabhu #include <net/lwtunnel.h>
64904af04dSJiri Benc #include <net/ip_tunnels.h>
65ca254490SDavid Ahern #include <net/l3mdev.h>
66eacb9384SRoopa Prabhu #include <net/ip.h>
677c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
701da177e4SLinus Torvalds #include <linux/sysctl.h>
711da177e4SLinus Torvalds #endif
721da177e4SLinus Torvalds 
7330d444d3SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type);
7430d444d3SDavid Ahern 
7530d444d3SDavid Ahern #define CREATE_TRACE_POINTS
7630d444d3SDavid Ahern #include <trace/events/fib6.h>
7730d444d3SDavid Ahern EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
7830d444d3SDavid Ahern #undef CREATE_TRACE_POINTS
7930d444d3SDavid Ahern 
80afc154e9SHannes Frederic Sowa enum rt6_nud_state {
817e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
827e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
837e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
84afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
85afc154e9SHannes Frederic Sowa };
86afc154e9SHannes Frederic Sowa 
871da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
880dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
89ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
901da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
911da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
921da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
931da177e4SLinus Torvalds 				       struct net_device *dev, int how);
94569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
97ede2059dSEric W. Biederman static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
987150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
99ede2059dSEric W. Biederman static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1001da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
1016700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1026700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
1036700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
1046700c270SDavid S. Miller 					struct sk_buff *skb);
105702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
106702cea56SDavid Ahern 			   int strict);
1078d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt);
108d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
1098d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
110d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
11116a16cd3SDavid Ahern 			 int iif, int type, u32 portid, u32 seq,
11216a16cd3SDavid Ahern 			 unsigned int flags);
113*7e4b5128SDavid Ahern static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
11435732d01SWei Wang 					   struct in6_addr *daddr,
11535732d01SWei Wang 					   struct in6_addr *saddr);
1161da177e4SLinus Torvalds 
11770ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1188d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
119b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
120830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
121830218c1SDavid Ahern 					   struct net_device *dev,
12295c96174SEric Dumazet 					   unsigned int pref);
1238d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
124b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
125830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
126830218c1SDavid Ahern 					   struct net_device *dev);
12770ceb4f5SYOSHIFUJI Hideaki #endif
12870ceb4f5SYOSHIFUJI Hideaki 
1298d0b94afSMartin KaFai Lau struct uncached_list {
1308d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1318d0b94afSMartin KaFai Lau 	struct list_head	head;
1328d0b94afSMartin KaFai Lau };
1338d0b94afSMartin KaFai Lau 
1348d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1358d0b94afSMartin KaFai Lau 
136510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt)
1378d0b94afSMartin KaFai Lau {
1388d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1398d0b94afSMartin KaFai Lau 
1408d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1418d0b94afSMartin KaFai Lau 
1428d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1438d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1448d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1458d0b94afSMartin KaFai Lau }
1468d0b94afSMartin KaFai Lau 
147510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt)
1488d0b94afSMartin KaFai Lau {
1498d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1508d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
15181eb8447SWei Wang 		struct net *net = dev_net(rt->dst.dev);
1528d0b94afSMartin KaFai Lau 
1538d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1548d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
15581eb8447SWei Wang 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
1568d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1578d0b94afSMartin KaFai Lau 	}
1588d0b94afSMartin KaFai Lau }
1598d0b94afSMartin KaFai Lau 
1608d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1618d0b94afSMartin KaFai Lau {
1628d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1638d0b94afSMartin KaFai Lau 	int cpu;
1648d0b94afSMartin KaFai Lau 
165e332bc67SEric W. Biederman 	if (dev == loopback_dev)
166e332bc67SEric W. Biederman 		return;
167e332bc67SEric W. Biederman 
1688d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1698d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1708d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1718d0b94afSMartin KaFai Lau 
1728d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1738d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1748d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1758d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1768d0b94afSMartin KaFai Lau 
177e332bc67SEric W. Biederman 			if (rt_idev->dev == dev) {
1788d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1798d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1808d0b94afSMartin KaFai Lau 			}
1818d0b94afSMartin KaFai Lau 
182e332bc67SEric W. Biederman 			if (rt_dev == dev) {
1838d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1848d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1858d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1868d0b94afSMartin KaFai Lau 			}
1878d0b94afSMartin KaFai Lau 		}
1888d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1898d0b94afSMartin KaFai Lau 	}
1908d0b94afSMartin KaFai Lau }
1918d0b94afSMartin KaFai Lau 
192f8a1b43bSDavid Ahern static inline const void *choose_neigh_daddr(const struct in6_addr *p,
193f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
194f894cbf8SDavid S. Miller 					     const void *daddr)
19539232973SDavid S. Miller {
196a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19739232973SDavid S. Miller 		return (const void *) p;
198f894cbf8SDavid S. Miller 	else if (skb)
199f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
20039232973SDavid S. Miller 	return daddr;
20139232973SDavid S. Miller }
20239232973SDavid S. Miller 
203f8a1b43bSDavid Ahern struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204f8a1b43bSDavid Ahern 				   struct net_device *dev,
205f894cbf8SDavid S. Miller 				   struct sk_buff *skb,
206f894cbf8SDavid S. Miller 				   const void *daddr)
207d3aaeb38SDavid S. Miller {
20839232973SDavid S. Miller 	struct neighbour *n;
20939232973SDavid S. Miller 
210f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(gw, skb, daddr);
211f8a1b43bSDavid Ahern 	n = __ipv6_neigh_lookup(dev, daddr);
212f83c7790SDavid S. Miller 	if (n)
213f83c7790SDavid S. Miller 		return n;
2147adf3246SStefano Brivio 
2157adf3246SStefano Brivio 	n = neigh_create(&nd_tbl, daddr, dev);
2167adf3246SStefano Brivio 	return IS_ERR(n) ? NULL : n;
217f8a1b43bSDavid Ahern }
218f8a1b43bSDavid Ahern 
219f8a1b43bSDavid Ahern static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220f8a1b43bSDavid Ahern 					      struct sk_buff *skb,
221f8a1b43bSDavid Ahern 					      const void *daddr)
222f8a1b43bSDavid Ahern {
223f8a1b43bSDavid Ahern 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224f8a1b43bSDavid Ahern 
225f8a1b43bSDavid Ahern 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
226f83c7790SDavid S. Miller }
227f83c7790SDavid S. Miller 
22863fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
22963fca65dSJulian Anastasov {
23063fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
23163fca65dSJulian Anastasov 	struct rt6_info *rt = (struct rt6_info *)dst;
23263fca65dSJulian Anastasov 
233f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
23463fca65dSJulian Anastasov 	if (!daddr)
23563fca65dSJulian Anastasov 		return;
23663fca65dSJulian Anastasov 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
23763fca65dSJulian Anastasov 		return;
23863fca65dSJulian Anastasov 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
23963fca65dSJulian Anastasov 		return;
24063fca65dSJulian Anastasov 	__ipv6_confirm_neigh(dev, daddr);
24163fca65dSJulian Anastasov }
24263fca65dSJulian Anastasov 
2439a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2441da177e4SLinus Torvalds 	.family			=	AF_INET6,
2451da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2461da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2471da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2480dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
249ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
250d4ead6b3SDavid Ahern 	.cow_metrics		=	dst_cow_metrics_generic,
2511da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2521da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2531da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2541da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2551da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2566e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2579f8955ccSEric W. Biederman 	.local_out		=	__ip6_local_out,
258f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
25963fca65dSJulian Anastasov 	.confirm_neigh		=	ip6_confirm_neigh,
2601da177e4SLinus Torvalds };
2611da177e4SLinus Torvalds 
262ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
263ec831ea7SRoland Dreier {
264618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265618f9bc7SSteffen Klassert 
266618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
267ec831ea7SRoland Dreier }
268ec831ea7SRoland Dreier 
2696700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2706700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
27114e50e57SDavid S. Miller {
27214e50e57SDavid S. Miller }
27314e50e57SDavid S. Miller 
2746700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2756700c270SDavid S. Miller 				      struct sk_buff *skb)
276b587ee3bSDavid S. Miller {
277b587ee3bSDavid S. Miller }
278b587ee3bSDavid S. Miller 
27914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
28014e50e57SDavid S. Miller 	.family			=	AF_INET6,
28114e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
28214e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
283ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
284214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
28514e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
286b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2870a1f5962SMartin KaFai Lau 	.cow_metrics		=	dst_cow_metrics_generic,
288f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
28914e50e57SDavid S. Miller };
29014e50e57SDavid S. Miller 
29162fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
29214edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
29362fa8a84SDavid S. Miller };
29462fa8a84SDavid S. Miller 
2958d1c802bSDavid Ahern static const struct fib6_info fib6_null_entry_template = {
29693c2fb25SDavid Ahern 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
29793c2fb25SDavid Ahern 	.fib6_protocol  = RTPROT_KERNEL,
29893c2fb25SDavid Ahern 	.fib6_metric	= ~(u32)0,
29993c2fb25SDavid Ahern 	.fib6_ref	= ATOMIC_INIT(1),
300421842edSDavid Ahern 	.fib6_type	= RTN_UNREACHABLE,
301421842edSDavid Ahern 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
302421842edSDavid Ahern };
303421842edSDavid Ahern 
304fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
3051da177e4SLinus Torvalds 	.dst = {
3061da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
3071da177e4SLinus Torvalds 		.__use		= 1,
3082c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
3091da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
3101da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
3111da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
3121da177e4SLinus Torvalds 	},
3131da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3141da177e4SLinus Torvalds };
3151da177e4SLinus Torvalds 
316101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
317101367c2SThomas Graf 
318fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
319101367c2SThomas Graf 	.dst = {
320101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
321101367c2SThomas Graf 		.__use		= 1,
3222c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
323101367c2SThomas Graf 		.error		= -EACCES,
3249ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
3259ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
326101367c2SThomas Graf 	},
327101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
328101367c2SThomas Graf };
329101367c2SThomas Graf 
330fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
331101367c2SThomas Graf 	.dst = {
332101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
333101367c2SThomas Graf 		.__use		= 1,
3342c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
335101367c2SThomas Graf 		.error		= -EINVAL,
336352e512cSHerbert Xu 		.input		= dst_discard,
337ede2059dSEric W. Biederman 		.output		= dst_discard_out,
338101367c2SThomas Graf 	},
339101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
340101367c2SThomas Graf };
341101367c2SThomas Graf 
342101367c2SThomas Graf #endif
343101367c2SThomas Graf 
344ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt)
345ebfa45f0SMartin KaFai Lau {
346ebfa45f0SMartin KaFai Lau 	struct dst_entry *dst = &rt->dst;
347ebfa45f0SMartin KaFai Lau 
348ebfa45f0SMartin KaFai Lau 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
349ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_uncached);
350ebfa45f0SMartin KaFai Lau }
351ebfa45f0SMartin KaFai Lau 
3521da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
35393531c67SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
354ad706862SMartin KaFai Lau 			       int flags)
3551da177e4SLinus Torvalds {
35697bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
357b2a9c0edSWei Wang 					1, DST_OBSOLETE_FORCE_CHK, flags);
358cf911662SDavid S. Miller 
35981eb8447SWei Wang 	if (rt) {
360ebfa45f0SMartin KaFai Lau 		rt6_info_init(rt);
36181eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
36281eb8447SWei Wang 	}
3638104891bSSteffen Klassert 
364cf911662SDavid S. Miller 	return rt;
3651da177e4SLinus Torvalds }
3669ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc);
367d52d3997SMartin KaFai Lau 
3681da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3691da177e4SLinus Torvalds {
3701da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
371a68886a6SDavid Ahern 	struct fib6_info *from;
3728d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3731da177e4SLinus Torvalds 
3741620a336SDavid Ahern 	ip_dst_metrics_put(dst);
3758d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3768d0b94afSMartin KaFai Lau 
3778d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
37838308473SDavid S. Miller 	if (idev) {
3791da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3801da177e4SLinus Torvalds 		in6_dev_put(idev);
3811da177e4SLinus Torvalds 	}
3821716a961SGao feng 
383a68886a6SDavid Ahern 	rcu_read_lock();
384a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
385a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, NULL);
38693531c67SDavid Ahern 	fib6_info_release(from);
387a68886a6SDavid Ahern 	rcu_read_unlock();
388b3419363SDavid S. Miller }
389b3419363SDavid S. Miller 
3901da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3911da177e4SLinus Torvalds 			   int how)
3921da177e4SLinus Torvalds {
3931da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3941da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3955a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
396c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3971da177e4SLinus Torvalds 
398e5645f51SWei Wang 	if (idev && idev->dev != loopback_dev) {
399e5645f51SWei Wang 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
40038308473SDavid S. Miller 		if (loopback_idev) {
4011da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
4021da177e4SLinus Torvalds 			in6_dev_put(idev);
4031da177e4SLinus Torvalds 		}
4041da177e4SLinus Torvalds 	}
40597cac082SDavid S. Miller }
4061da177e4SLinus Torvalds 
4075973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt)
4085973fb1eSMartin KaFai Lau {
4095973fb1eSMartin KaFai Lau 	if (rt->rt6i_flags & RTF_EXPIRES)
4105973fb1eSMartin KaFai Lau 		return time_after(jiffies, rt->dst.expires);
4115973fb1eSMartin KaFai Lau 	else
4125973fb1eSMartin KaFai Lau 		return false;
4135973fb1eSMartin KaFai Lau }
4145973fb1eSMartin KaFai Lau 
415a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4161da177e4SLinus Torvalds {
417a68886a6SDavid Ahern 	struct fib6_info *from;
418a68886a6SDavid Ahern 
419a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
420a68886a6SDavid Ahern 
4211716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4221716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
423a50feda5SEric Dumazet 			return true;
424a68886a6SDavid Ahern 	} else if (from) {
4251e2ea8adSXin Long 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
426a68886a6SDavid Ahern 			fib6_check_expired(from);
4271716a961SGao feng 	}
428a50feda5SEric Dumazet 	return false;
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds 
431b1d40991SDavid Ahern void fib6_select_path(const struct net *net, struct fib6_result *res,
432b1d40991SDavid Ahern 		      struct flowi6 *fl6, int oif, bool have_oif_match,
433b1d40991SDavid Ahern 		      const struct sk_buff *skb, int strict)
43451ebd318SNicolas Dichtel {
4358d1c802bSDavid Ahern 	struct fib6_info *sibling, *next_sibling;
436b1d40991SDavid Ahern 	struct fib6_info *match = res->f6i;
437b1d40991SDavid Ahern 
438b1d40991SDavid Ahern 	if (!match->fib6_nsiblings || have_oif_match)
439b1d40991SDavid Ahern 		goto out;
44051ebd318SNicolas Dichtel 
441b673d6ccSJakub Sitnicki 	/* We might have already computed the hash for ICMPv6 errors. In such
442b673d6ccSJakub Sitnicki 	 * case it will always be non-zero. Otherwise now is the time to do it.
443b673d6ccSJakub Sitnicki 	 */
444b673d6ccSJakub Sitnicki 	if (!fl6->mp_hash)
445b4bac172SDavid Ahern 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
446b673d6ccSJakub Sitnicki 
447ad1601aeSDavid Ahern 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
448b1d40991SDavid Ahern 		goto out;
449bbfcd776SIdo Schimmel 
45093c2fb25SDavid Ahern 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
45193c2fb25SDavid Ahern 				 fib6_siblings) {
452702cea56SDavid Ahern 		const struct fib6_nh *nh = &sibling->fib6_nh;
4535e670d84SDavid Ahern 		int nh_upper_bound;
4545e670d84SDavid Ahern 
455702cea56SDavid Ahern 		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
4565e670d84SDavid Ahern 		if (fl6->mp_hash > nh_upper_bound)
4573d709f69SIdo Schimmel 			continue;
458702cea56SDavid Ahern 		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
45952bd4c0cSNicolas Dichtel 			break;
46051ebd318SNicolas Dichtel 		match = sibling;
46151ebd318SNicolas Dichtel 		break;
46251ebd318SNicolas Dichtel 	}
4633d709f69SIdo Schimmel 
464b1d40991SDavid Ahern out:
465b1d40991SDavid Ahern 	res->f6i = match;
466b1d40991SDavid Ahern 	res->nh = &match->fib6_nh;
46751ebd318SNicolas Dichtel }
46851ebd318SNicolas Dichtel 
4691da177e4SLinus Torvalds /*
47066f5d6ceSWei Wang  *	Route lookup. rcu_read_lock() should be held.
4711da177e4SLinus Torvalds  */
4721da177e4SLinus Torvalds 
4730c59d006SDavid Ahern static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
4740c59d006SDavid Ahern 			       const struct in6_addr *saddr, int oif, int flags)
4750c59d006SDavid Ahern {
4760c59d006SDavid Ahern 	const struct net_device *dev;
4770c59d006SDavid Ahern 
4780c59d006SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD)
4790c59d006SDavid Ahern 		return false;
4800c59d006SDavid Ahern 
4810c59d006SDavid Ahern 	dev = nh->fib_nh_dev;
4820c59d006SDavid Ahern 	if (oif) {
4830c59d006SDavid Ahern 		if (dev->ifindex == oif)
4840c59d006SDavid Ahern 			return true;
4850c59d006SDavid Ahern 	} else {
4860c59d006SDavid Ahern 		if (ipv6_chk_addr(net, saddr, dev,
4870c59d006SDavid Ahern 				  flags & RT6_LOOKUP_F_IFACE))
4880c59d006SDavid Ahern 			return true;
4890c59d006SDavid Ahern 	}
4900c59d006SDavid Ahern 
4910c59d006SDavid Ahern 	return false;
4920c59d006SDavid Ahern }
4930c59d006SDavid Ahern 
4948d1c802bSDavid Ahern static inline struct fib6_info *rt6_device_match(struct net *net,
4958d1c802bSDavid Ahern 						 struct fib6_info *rt,
496b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4971da177e4SLinus Torvalds 						    int oif,
498d420895eSYOSHIFUJI Hideaki 						    int flags)
4991da177e4SLinus Torvalds {
5000c59d006SDavid Ahern 	const struct fib6_nh *nh;
5018d1c802bSDavid Ahern 	struct fib6_info *sprt;
5021da177e4SLinus Torvalds 
5035e670d84SDavid Ahern 	if (!oif && ipv6_addr_any(saddr) &&
504ad1601aeSDavid Ahern 	    !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD))
5058067bb8cSIdo Schimmel 		return rt;
506dd3abc4eSYOSHIFUJI Hideaki 
5078fb11a9aSDavid Ahern 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5080c59d006SDavid Ahern 		nh = &sprt->fib6_nh;
5090c59d006SDavid Ahern 		if (__rt6_device_match(net, nh, saddr, oif, flags))
5101da177e4SLinus Torvalds 			return sprt;
5111da177e4SLinus Torvalds 	}
5121da177e4SLinus Torvalds 
513eea68cd3SDavid Ahern 	if (oif && flags & RT6_LOOKUP_F_IFACE)
514421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
5151da177e4SLinus Torvalds 
516ad1601aeSDavid Ahern 	return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
5171da177e4SLinus Torvalds }
5181da177e4SLinus Torvalds 
51927097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
520c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
521c2f17e82SHannes Frederic Sowa 	struct work_struct work;
522c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
523c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
524c2f17e82SHannes Frederic Sowa };
525c2f17e82SHannes Frederic Sowa 
526c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
527c2f17e82SHannes Frederic Sowa {
528c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
529c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
530c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
531c2f17e82SHannes Frederic Sowa 
532c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
533adc176c5SErik Nordmark 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
534c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
535662f5533SMichael Büsch 	kfree(work);
536c2f17e82SHannes Frederic Sowa }
537c2f17e82SHannes Frederic Sowa 
538cc3a86c8SDavid Ahern static void rt6_probe(struct fib6_nh *fib6_nh)
53927097255SYOSHIFUJI Hideaki {
540f547fac6SSabrina Dubroca 	struct __rt6_probe_work *work = NULL;
5415e670d84SDavid Ahern 	const struct in6_addr *nh_gw;
542f2c31e32SEric Dumazet 	struct neighbour *neigh;
5435e670d84SDavid Ahern 	struct net_device *dev;
544f547fac6SSabrina Dubroca 	struct inet6_dev *idev;
5455e670d84SDavid Ahern 
54627097255SYOSHIFUJI Hideaki 	/*
54727097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
54827097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
54927097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
55027097255SYOSHIFUJI Hideaki 	 *
55127097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
55227097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
55327097255SYOSHIFUJI Hideaki 	 */
554cc3a86c8SDavid Ahern 	if (fib6_nh->fib_nh_gw_family)
555fdd6681dSAmerigo Wang 		return;
5565e670d84SDavid Ahern 
557cc3a86c8SDavid Ahern 	nh_gw = &fib6_nh->fib_nh_gw6;
558cc3a86c8SDavid Ahern 	dev = fib6_nh->fib_nh_dev;
5592152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
560f547fac6SSabrina Dubroca 	idev = __in6_dev_get(dev);
5615e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
5622152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5638d6c31bfSMartin KaFai Lau 		if (neigh->nud_state & NUD_VALID)
5648d6c31bfSMartin KaFai Lau 			goto out;
5658d6c31bfSMartin KaFai Lau 
5662152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
567990edb42SMartin KaFai Lau 		if (!(neigh->nud_state & NUD_VALID) &&
568990edb42SMartin KaFai Lau 		    time_after(jiffies,
569dcd1f572SDavid Ahern 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
570c2f17e82SHannes Frederic Sowa 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
571990edb42SMartin KaFai Lau 			if (work)
5727e980569SJiri Benc 				__neigh_set_probe_once(neigh);
573990edb42SMartin KaFai Lau 		}
574c2f17e82SHannes Frederic Sowa 		write_unlock(&neigh->lock);
575cc3a86c8SDavid Ahern 	} else if (time_after(jiffies, fib6_nh->last_probe +
576f547fac6SSabrina Dubroca 				       idev->cnf.rtr_probe_interval)) {
577990edb42SMartin KaFai Lau 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
578990edb42SMartin KaFai Lau 	}
579c2f17e82SHannes Frederic Sowa 
580c2f17e82SHannes Frederic Sowa 	if (work) {
581cc3a86c8SDavid Ahern 		fib6_nh->last_probe = jiffies;
582c2f17e82SHannes Frederic Sowa 		INIT_WORK(&work->work, rt6_probe_deferred);
5835e670d84SDavid Ahern 		work->target = *nh_gw;
5845e670d84SDavid Ahern 		dev_hold(dev);
5855e670d84SDavid Ahern 		work->dev = dev;
586c2f17e82SHannes Frederic Sowa 		schedule_work(&work->work);
587c2f17e82SHannes Frederic Sowa 	}
588990edb42SMartin KaFai Lau 
5898d6c31bfSMartin KaFai Lau out:
5902152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
591f2c31e32SEric Dumazet }
59227097255SYOSHIFUJI Hideaki #else
593cc3a86c8SDavid Ahern static inline void rt6_probe(struct fib6_nh *fib6_nh)
59427097255SYOSHIFUJI Hideaki {
59527097255SYOSHIFUJI Hideaki }
59627097255SYOSHIFUJI Hideaki #endif
59727097255SYOSHIFUJI Hideaki 
5981da177e4SLinus Torvalds /*
599554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
6001da177e4SLinus Torvalds  */
6011ba9a895SDavid Ahern static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
6021da177e4SLinus Torvalds {
603afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
6045e670d84SDavid Ahern 	struct neighbour *neigh;
605f2c31e32SEric Dumazet 
606145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
6071ba9a895SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
6081ba9a895SDavid Ahern 					  &fib6_nh->fib_nh_gw6);
609145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
610145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
611554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
612afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
613398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
614a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
615afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6167e980569SJiri Benc 		else
6177e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
618398bcbebSYOSHIFUJI Hideaki #endif
619145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
620afc154e9SHannes Frederic Sowa 	} else {
621afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6227e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
623a5a81f0bSPaul Marks 	}
624145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
625145a3621SYOSHIFUJI Hideaki / 吉藤英明 
626a5a81f0bSPaul Marks 	return ret;
6271da177e4SLinus Torvalds }
6281da177e4SLinus Torvalds 
629702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
630702cea56SDavid Ahern 			   int strict)
631554cfb7eSYOSHIFUJI Hideaki {
6326e1809a5SDavid Ahern 	int m = 0;
6334d0c5911SYOSHIFUJI Hideaki 
6346e1809a5SDavid Ahern 	if (!oif || nh->fib_nh_dev->ifindex == oif)
6356e1809a5SDavid Ahern 		m = 2;
6366e1809a5SDavid Ahern 
63777d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
638afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
639ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
640702cea56SDavid Ahern 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
641ebacaaa0SYOSHIFUJI Hideaki #endif
6421ba9a895SDavid Ahern 	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
643702cea56SDavid Ahern 	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
6441ba9a895SDavid Ahern 		int n = rt6_check_neigh(nh);
645afc154e9SHannes Frederic Sowa 		if (n < 0)
646afc154e9SHannes Frederic Sowa 			return n;
647afc154e9SHannes Frederic Sowa 	}
648554cfb7eSYOSHIFUJI Hideaki 	return m;
649554cfb7eSYOSHIFUJI Hideaki }
650554cfb7eSYOSHIFUJI Hideaki 
65128679ed1SDavid Ahern static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
65228679ed1SDavid Ahern 		       int oif, int strict, int *mpri, bool *do_rr)
653554cfb7eSYOSHIFUJI Hideaki {
654afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
65528679ed1SDavid Ahern 	bool rc = false;
65628679ed1SDavid Ahern 	int m;
65735103d11SAndy Gospodarek 
65828679ed1SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD)
6598067bb8cSIdo Schimmel 		goto out;
6608067bb8cSIdo Schimmel 
66128679ed1SDavid Ahern 	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
66228679ed1SDavid Ahern 	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
663d5d32e4bSDavid Ahern 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
66435103d11SAndy Gospodarek 		goto out;
665554cfb7eSYOSHIFUJI Hideaki 
66628679ed1SDavid Ahern 	m = rt6_score_route(nh, fib6_flags, oif, strict);
6677e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
668afc154e9SHannes Frederic Sowa 		match_do_rr = true;
669afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6707e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
671f11e6659SDavid S. Miller 		goto out;
6721da177e4SLinus Torvalds 	}
673f11e6659SDavid S. Miller 
674afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
67528679ed1SDavid Ahern 		rt6_probe(nh);
676afc154e9SHannes Frederic Sowa 
6777e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
678afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
679afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
680afc154e9SHannes Frederic Sowa 		*mpri = m;
68128679ed1SDavid Ahern 		rc = true;
682afc154e9SHannes Frederic Sowa 	}
683f11e6659SDavid S. Miller out:
68428679ed1SDavid Ahern 	return rc;
6851da177e4SLinus Torvalds }
6861da177e4SLinus Torvalds 
68730c15f03SDavid Ahern static void __find_rr_leaf(struct fib6_info *rt_start,
68830c15f03SDavid Ahern 			   struct fib6_info *nomatch, u32 metric,
68930c15f03SDavid Ahern 			   struct fib6_info **match, struct fib6_info **cont,
69030c15f03SDavid Ahern 			   int oif, int strict, bool *do_rr, int *mpri)
69130c15f03SDavid Ahern {
69230c15f03SDavid Ahern 	struct fib6_info *rt;
69330c15f03SDavid Ahern 
69430c15f03SDavid Ahern 	for (rt = rt_start;
69530c15f03SDavid Ahern 	     rt && rt != nomatch;
69630c15f03SDavid Ahern 	     rt = rcu_dereference(rt->fib6_next)) {
69730c15f03SDavid Ahern 		struct fib6_nh *nh;
69830c15f03SDavid Ahern 
69930c15f03SDavid Ahern 		if (cont && rt->fib6_metric != metric) {
70030c15f03SDavid Ahern 			*cont = rt;
70130c15f03SDavid Ahern 			return;
70230c15f03SDavid Ahern 		}
70330c15f03SDavid Ahern 
70430c15f03SDavid Ahern 		if (fib6_check_expired(rt))
70530c15f03SDavid Ahern 			continue;
70630c15f03SDavid Ahern 
70730c15f03SDavid Ahern 		nh = &rt->fib6_nh;
70830c15f03SDavid Ahern 		if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr))
70930c15f03SDavid Ahern 			*match = rt;
71030c15f03SDavid Ahern 	}
71130c15f03SDavid Ahern }
71230c15f03SDavid Ahern 
7138d1c802bSDavid Ahern static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
7148d1c802bSDavid Ahern 				      struct fib6_info *leaf,
7158d1c802bSDavid Ahern 				      struct fib6_info *rr_head,
716afc154e9SHannes Frederic Sowa 				      u32 metric, int oif, int strict,
717afc154e9SHannes Frederic Sowa 				      bool *do_rr)
718f11e6659SDavid S. Miller {
71930c15f03SDavid Ahern 	struct fib6_info *match = NULL, *cont = NULL;
720f11e6659SDavid S. Miller 	int mpri = -1;
721f11e6659SDavid S. Miller 
72230c15f03SDavid Ahern 	__find_rr_leaf(rr_head, NULL, metric, &match, &cont,
72330c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
7249fbdcfafSSteffen Klassert 
72530c15f03SDavid Ahern 	__find_rr_leaf(leaf, rr_head, metric, &match, &cont,
72630c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
7279fbdcfafSSteffen Klassert 
7289fbdcfafSSteffen Klassert 	if (match || !cont)
7299fbdcfafSSteffen Klassert 		return match;
7309fbdcfafSSteffen Klassert 
73130c15f03SDavid Ahern 	__find_rr_leaf(cont, NULL, metric, &match, NULL,
73230c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
733f11e6659SDavid S. Miller 
734f11e6659SDavid S. Miller 	return match;
735f11e6659SDavid S. Miller }
736f11e6659SDavid S. Miller 
7378d1c802bSDavid Ahern static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
7388d1040e8SWei Wang 				   int oif, int strict)
739f11e6659SDavid S. Miller {
7408d1c802bSDavid Ahern 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
7418d1c802bSDavid Ahern 	struct fib6_info *match, *rt0;
742afc154e9SHannes Frederic Sowa 	bool do_rr = false;
74317ecf590SWei Wang 	int key_plen;
744f11e6659SDavid S. Miller 
745421842edSDavid Ahern 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
746421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
7478d1040e8SWei Wang 
74866f5d6ceSWei Wang 	rt0 = rcu_dereference(fn->rr_ptr);
749f11e6659SDavid S. Miller 	if (!rt0)
75066f5d6ceSWei Wang 		rt0 = leaf;
751f11e6659SDavid S. Miller 
75217ecf590SWei Wang 	/* Double check to make sure fn is not an intermediate node
75317ecf590SWei Wang 	 * and fn->leaf does not points to its child's leaf
75417ecf590SWei Wang 	 * (This might happen if all routes under fn are deleted from
75517ecf590SWei Wang 	 * the tree and fib6_repair_tree() is called on the node.)
75617ecf590SWei Wang 	 */
75793c2fb25SDavid Ahern 	key_plen = rt0->fib6_dst.plen;
75817ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES
75993c2fb25SDavid Ahern 	if (rt0->fib6_src.plen)
76093c2fb25SDavid Ahern 		key_plen = rt0->fib6_src.plen;
76117ecf590SWei Wang #endif
76217ecf590SWei Wang 	if (fn->fn_bit != key_plen)
763421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
76417ecf590SWei Wang 
76593c2fb25SDavid Ahern 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
766afc154e9SHannes Frederic Sowa 			     &do_rr);
767f11e6659SDavid S. Miller 
768afc154e9SHannes Frederic Sowa 	if (do_rr) {
7698fb11a9aSDavid Ahern 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
770f11e6659SDavid S. Miller 
771554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
77293c2fb25SDavid Ahern 		if (!next || next->fib6_metric != rt0->fib6_metric)
7738d1040e8SWei Wang 			next = leaf;
774f11e6659SDavid S. Miller 
77566f5d6ceSWei Wang 		if (next != rt0) {
77693c2fb25SDavid Ahern 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
77766f5d6ceSWei Wang 			/* make sure next is not being deleted from the tree */
77893c2fb25SDavid Ahern 			if (next->fib6_node)
77966f5d6ceSWei Wang 				rcu_assign_pointer(fn->rr_ptr, next);
78093c2fb25SDavid Ahern 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
78166f5d6ceSWei Wang 		}
782554cfb7eSYOSHIFUJI Hideaki 	}
783554cfb7eSYOSHIFUJI Hideaki 
784421842edSDavid Ahern 	return match ? match : net->ipv6.fib6_null_entry;
7851da177e4SLinus Torvalds }
7861da177e4SLinus Torvalds 
7878d1c802bSDavid Ahern static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
7888b9df265SMartin KaFai Lau {
789bdf00467SDavid Ahern 	return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_gw_family;
7908b9df265SMartin KaFai Lau }
7918b9df265SMartin KaFai Lau 
79270ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
79370ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
794b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
79570ceb4f5SYOSHIFUJI Hideaki {
796c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
79770ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
79870ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
79970ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
8004bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
8018d1c802bSDavid Ahern 	struct fib6_info *rt;
80270ceb4f5SYOSHIFUJI Hideaki 
80370ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
80470ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
80570ceb4f5SYOSHIFUJI Hideaki 	}
80670ceb4f5SYOSHIFUJI Hideaki 
80770ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
80870ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
80970ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
81070ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
81170ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
81270ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
81370ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
81470ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
81570ceb4f5SYOSHIFUJI Hideaki 		}
81670ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
81770ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
81870ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
81970ceb4f5SYOSHIFUJI Hideaki 		}
82070ceb4f5SYOSHIFUJI Hideaki 	}
82170ceb4f5SYOSHIFUJI Hideaki 
82270ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
82370ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
8243933fc95SJens Rosenboom 		return -EINVAL;
82570ceb4f5SYOSHIFUJI Hideaki 
8264bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
82770ceb4f5SYOSHIFUJI Hideaki 
82870ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
82970ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
83070ceb4f5SYOSHIFUJI Hideaki 	else {
83170ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
83270ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
83370ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
83470ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
83570ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
83670ceb4f5SYOSHIFUJI Hideaki 	}
83770ceb4f5SYOSHIFUJI Hideaki 
838f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
839afb1d4b5SDavid Ahern 		rt = rt6_get_dflt_router(net, gwaddr, dev);
840f104a567SDuan Jiong 	else
841f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
842830218c1SDavid Ahern 					gwaddr, dev);
84370ceb4f5SYOSHIFUJI Hideaki 
84470ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
845afb1d4b5SDavid Ahern 		ip6_del_rt(net, rt);
84670ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
84770ceb4f5SYOSHIFUJI Hideaki 	}
84870ceb4f5SYOSHIFUJI Hideaki 
84970ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
850830218c1SDavid Ahern 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
851830218c1SDavid Ahern 					dev, pref);
85270ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
85393c2fb25SDavid Ahern 		rt->fib6_flags = RTF_ROUTEINFO |
85493c2fb25SDavid Ahern 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
85570ceb4f5SYOSHIFUJI Hideaki 
85670ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8571716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
85814895687SDavid Ahern 			fib6_clean_expires(rt);
8591716a961SGao feng 		else
86014895687SDavid Ahern 			fib6_set_expires(rt, jiffies + HZ * lifetime);
8611716a961SGao feng 
86293531c67SDavid Ahern 		fib6_info_release(rt);
86370ceb4f5SYOSHIFUJI Hideaki 	}
86470ceb4f5SYOSHIFUJI Hideaki 	return 0;
86570ceb4f5SYOSHIFUJI Hideaki }
86670ceb4f5SYOSHIFUJI Hideaki #endif
86770ceb4f5SYOSHIFUJI Hideaki 
868ae90d867SDavid Ahern /*
869ae90d867SDavid Ahern  *	Misc support functions
870ae90d867SDavid Ahern  */
871ae90d867SDavid Ahern 
872ae90d867SDavid Ahern /* called with rcu_lock held */
8738d1c802bSDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
874ae90d867SDavid Ahern {
875ad1601aeSDavid Ahern 	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
876ae90d867SDavid Ahern 
87793c2fb25SDavid Ahern 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
878ae90d867SDavid Ahern 		/* for copies of local routes, dst->dev needs to be the
879ae90d867SDavid Ahern 		 * device if it is a master device, the master device if
880ae90d867SDavid Ahern 		 * device is enslaved, and the loopback as the default
881ae90d867SDavid Ahern 		 */
882ae90d867SDavid Ahern 		if (netif_is_l3_slave(dev) &&
88393c2fb25SDavid Ahern 		    !rt6_need_strict(&rt->fib6_dst.addr))
884ae90d867SDavid Ahern 			dev = l3mdev_master_dev_rcu(dev);
885ae90d867SDavid Ahern 		else if (!netif_is_l3_master(dev))
886ae90d867SDavid Ahern 			dev = dev_net(dev)->loopback_dev;
887ae90d867SDavid Ahern 		/* last case is netif_is_l3_master(dev) is true in which
888ae90d867SDavid Ahern 		 * case we want dev returned to be dev
889ae90d867SDavid Ahern 		 */
890ae90d867SDavid Ahern 	}
891ae90d867SDavid Ahern 
892ae90d867SDavid Ahern 	return dev;
893ae90d867SDavid Ahern }
894ae90d867SDavid Ahern 
8956edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = {
8966edb3c96SDavid Ahern 	[RTN_UNSPEC]	= 0,
8976edb3c96SDavid Ahern 	[RTN_UNICAST]	= 0,
8986edb3c96SDavid Ahern 	[RTN_LOCAL]	= 0,
8996edb3c96SDavid Ahern 	[RTN_BROADCAST]	= 0,
9006edb3c96SDavid Ahern 	[RTN_ANYCAST]	= 0,
9016edb3c96SDavid Ahern 	[RTN_MULTICAST]	= 0,
9026edb3c96SDavid Ahern 	[RTN_BLACKHOLE]	= -EINVAL,
9036edb3c96SDavid Ahern 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
9046edb3c96SDavid Ahern 	[RTN_PROHIBIT]	= -EACCES,
9056edb3c96SDavid Ahern 	[RTN_THROW]	= -EAGAIN,
9066edb3c96SDavid Ahern 	[RTN_NAT]	= -EINVAL,
9076edb3c96SDavid Ahern 	[RTN_XRESOLVE]	= -EINVAL,
9086edb3c96SDavid Ahern };
9096edb3c96SDavid Ahern 
9106edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type)
9116edb3c96SDavid Ahern {
9126edb3c96SDavid Ahern 	return fib6_prop[fib6_type];
9136edb3c96SDavid Ahern }
9146edb3c96SDavid Ahern 
9158d1c802bSDavid Ahern static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
9163b6761d1SDavid Ahern {
9173b6761d1SDavid Ahern 	unsigned short flags = 0;
9183b6761d1SDavid Ahern 
9193b6761d1SDavid Ahern 	if (rt->dst_nocount)
9203b6761d1SDavid Ahern 		flags |= DST_NOCOUNT;
9213b6761d1SDavid Ahern 	if (rt->dst_nopolicy)
9223b6761d1SDavid Ahern 		flags |= DST_NOPOLICY;
9233b6761d1SDavid Ahern 	if (rt->dst_host)
9243b6761d1SDavid Ahern 		flags |= DST_HOST;
9253b6761d1SDavid Ahern 
9263b6761d1SDavid Ahern 	return flags;
9273b6761d1SDavid Ahern }
9283b6761d1SDavid Ahern 
9298d1c802bSDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
9306edb3c96SDavid Ahern {
9316edb3c96SDavid Ahern 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
9326edb3c96SDavid Ahern 
9336edb3c96SDavid Ahern 	switch (ort->fib6_type) {
9346edb3c96SDavid Ahern 	case RTN_BLACKHOLE:
9356edb3c96SDavid Ahern 		rt->dst.output = dst_discard_out;
9366edb3c96SDavid Ahern 		rt->dst.input = dst_discard;
9376edb3c96SDavid Ahern 		break;
9386edb3c96SDavid Ahern 	case RTN_PROHIBIT:
9396edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_prohibit_out;
9406edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_prohibit;
9416edb3c96SDavid Ahern 		break;
9426edb3c96SDavid Ahern 	case RTN_THROW:
9436edb3c96SDavid Ahern 	case RTN_UNREACHABLE:
9446edb3c96SDavid Ahern 	default:
9456edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_discard_out;
9466edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_discard;
9476edb3c96SDavid Ahern 		break;
9486edb3c96SDavid Ahern 	}
9496edb3c96SDavid Ahern }
9506edb3c96SDavid Ahern 
9518d1c802bSDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
9526edb3c96SDavid Ahern {
95393c2fb25SDavid Ahern 	if (ort->fib6_flags & RTF_REJECT) {
9546edb3c96SDavid Ahern 		ip6_rt_init_dst_reject(rt, ort);
9556edb3c96SDavid Ahern 		return;
9566edb3c96SDavid Ahern 	}
9576edb3c96SDavid Ahern 
9586edb3c96SDavid Ahern 	rt->dst.error = 0;
9596edb3c96SDavid Ahern 	rt->dst.output = ip6_output;
9606edb3c96SDavid Ahern 
961d23c4b63SHangbin Liu 	if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
9626edb3c96SDavid Ahern 		rt->dst.input = ip6_input;
96393c2fb25SDavid Ahern 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
9646edb3c96SDavid Ahern 		rt->dst.input = ip6_mc_input;
9656edb3c96SDavid Ahern 	} else {
9666edb3c96SDavid Ahern 		rt->dst.input = ip6_forward;
9676edb3c96SDavid Ahern 	}
9686edb3c96SDavid Ahern 
969ad1601aeSDavid Ahern 	if (ort->fib6_nh.fib_nh_lws) {
970ad1601aeSDavid Ahern 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws);
9716edb3c96SDavid Ahern 		lwtunnel_set_redirect(&rt->dst);
9726edb3c96SDavid Ahern 	}
9736edb3c96SDavid Ahern 
9746edb3c96SDavid Ahern 	rt->dst.lastuse = jiffies;
9756edb3c96SDavid Ahern }
9766edb3c96SDavid Ahern 
977e873e4b9SWei Wang /* Caller must already hold reference to @from */
9788d1c802bSDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
979ae90d867SDavid Ahern {
980ae90d867SDavid Ahern 	rt->rt6i_flags &= ~RTF_EXPIRES;
981a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, from);
982e1255ed4SDavid Ahern 	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
983ae90d867SDavid Ahern }
984ae90d867SDavid Ahern 
985e873e4b9SWei Wang /* Caller must already hold reference to @ort */
9868d1c802bSDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
987ae90d867SDavid Ahern {
988dcd1f572SDavid Ahern 	struct net_device *dev = fib6_info_nh_dev(ort);
989dcd1f572SDavid Ahern 
9906edb3c96SDavid Ahern 	ip6_rt_init_dst(rt, ort);
9916edb3c96SDavid Ahern 
99293c2fb25SDavid Ahern 	rt->rt6i_dst = ort->fib6_dst;
993dcd1f572SDavid Ahern 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
99493c2fb25SDavid Ahern 	rt->rt6i_flags = ort->fib6_flags;
995bdf00467SDavid Ahern 	if (ort->fib6_nh.fib_nh_gw_family) {
996ad1601aeSDavid Ahern 		rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6;
9972b2450caSDavid Ahern 		rt->rt6i_flags |= RTF_GATEWAY;
9982b2450caSDavid Ahern 	}
999ae90d867SDavid Ahern 	rt6_set_from(rt, ort);
1000ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
100193c2fb25SDavid Ahern 	rt->rt6i_src = ort->fib6_src;
1002ae90d867SDavid Ahern #endif
1003ae90d867SDavid Ahern }
1004ae90d867SDavid Ahern 
1005a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1006a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
1007a3c00e46SMartin KaFai Lau {
100866f5d6ceSWei Wang 	struct fib6_node *pn, *sn;
1009a3c00e46SMartin KaFai Lau 	while (1) {
1010a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
1011a3c00e46SMartin KaFai Lau 			return NULL;
101266f5d6ceSWei Wang 		pn = rcu_dereference(fn->parent);
101366f5d6ceSWei Wang 		sn = FIB6_SUBTREE(pn);
101466f5d6ceSWei Wang 		if (sn && sn != fn)
10156454743bSDavid Ahern 			fn = fib6_node_lookup(sn, NULL, saddr);
1016a3c00e46SMartin KaFai Lau 		else
1017a3c00e46SMartin KaFai Lau 			fn = pn;
1018a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
1019a3c00e46SMartin KaFai Lau 			return fn;
1020a3c00e46SMartin KaFai Lau 	}
1021a3c00e46SMartin KaFai Lau }
1022c71099acSThomas Graf 
102310585b43SDavid Ahern static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1024d3843fe5SWei Wang {
1025d3843fe5SWei Wang 	struct rt6_info *rt = *prt;
1026d3843fe5SWei Wang 
1027d3843fe5SWei Wang 	if (dst_hold_safe(&rt->dst))
1028d3843fe5SWei Wang 		return true;
102910585b43SDavid Ahern 	if (net) {
1030d3843fe5SWei Wang 		rt = net->ipv6.ip6_null_entry;
1031d3843fe5SWei Wang 		dst_hold(&rt->dst);
1032d3843fe5SWei Wang 	} else {
1033d3843fe5SWei Wang 		rt = NULL;
1034d3843fe5SWei Wang 	}
1035d3843fe5SWei Wang 	*prt = rt;
1036d3843fe5SWei Wang 	return false;
1037d3843fe5SWei Wang }
1038d3843fe5SWei Wang 
1039dec9b0e2SDavid Ahern /* called with rcu_lock held */
10408d1c802bSDavid Ahern static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1041dec9b0e2SDavid Ahern {
10423b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
1043ad1601aeSDavid Ahern 	struct net_device *dev = rt->fib6_nh.fib_nh_dev;
1044dec9b0e2SDavid Ahern 	struct rt6_info *nrt;
1045dec9b0e2SDavid Ahern 
1046e873e4b9SWei Wang 	if (!fib6_info_hold_safe(rt))
10471c87e79aSXin Long 		goto fallback;
1048e873e4b9SWei Wang 
104993531c67SDavid Ahern 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
10501c87e79aSXin Long 	if (!nrt) {
1051e873e4b9SWei Wang 		fib6_info_release(rt);
10521c87e79aSXin Long 		goto fallback;
10531c87e79aSXin Long 	}
1054dec9b0e2SDavid Ahern 
10551c87e79aSXin Long 	ip6_rt_copy_init(nrt, rt);
10561c87e79aSXin Long 	return nrt;
10571c87e79aSXin Long 
10581c87e79aSXin Long fallback:
10591c87e79aSXin Long 	nrt = dev_net(dev)->ipv6.ip6_null_entry;
10601c87e79aSXin Long 	dst_hold(&nrt->dst);
1061dec9b0e2SDavid Ahern 	return nrt;
1062dec9b0e2SDavid Ahern }
1063dec9b0e2SDavid Ahern 
10648ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
10658ed67789SDaniel Lezcano 					     struct fib6_table *table,
1066b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
1067b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
1068b75cc8f9SDavid Ahern 					     int flags)
10691da177e4SLinus Torvalds {
1070b1d40991SDavid Ahern 	struct fib6_result res = {};
10711da177e4SLinus Torvalds 	struct fib6_node *fn;
107223fb93a4SDavid Ahern 	struct rt6_info *rt;
10731da177e4SLinus Torvalds 
1074b6cdbc85SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1075b6cdbc85SDavid Ahern 		flags &= ~RT6_LOOKUP_F_IFACE;
1076b6cdbc85SDavid Ahern 
107766f5d6ceSWei Wang 	rcu_read_lock();
10786454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1079c71099acSThomas Graf restart:
1080b1d40991SDavid Ahern 	res.f6i = rcu_dereference(fn->leaf);
1081b1d40991SDavid Ahern 	if (!res.f6i)
1082b1d40991SDavid Ahern 		res.f6i = net->ipv6.fib6_null_entry;
1083af52a52cSDavid Ahern 	else
1084b1d40991SDavid Ahern 		res.f6i = rt6_device_match(net, res.f6i, &fl6->saddr,
108566f5d6ceSWei Wang 					   fl6->flowi6_oif, flags);
1086af52a52cSDavid Ahern 
1087b1d40991SDavid Ahern 	if (res.f6i == net->ipv6.fib6_null_entry) {
1088a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1089a3c00e46SMartin KaFai Lau 		if (fn)
1090a3c00e46SMartin KaFai Lau 			goto restart;
1091af52a52cSDavid Ahern 
1092af52a52cSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
1093af52a52cSDavid Ahern 		dst_hold(&rt->dst);
1094af52a52cSDavid Ahern 		goto out;
1095a3c00e46SMartin KaFai Lau 	}
10962b760fcfSWei Wang 
1097b1d40991SDavid Ahern 	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1098b1d40991SDavid Ahern 			 fl6->flowi6_oif != 0, skb, flags);
1099b1d40991SDavid Ahern 
11004c9483b2SDavid S. Miller 	/* Search through exception table */
1101*7e4b5128SDavid Ahern 	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
110223fb93a4SDavid Ahern 	if (rt) {
110310585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
1104d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
110523fb93a4SDavid Ahern 	} else {
1106b1d40991SDavid Ahern 		rt = ip6_create_rt_rcu(res.f6i);
1107dec9b0e2SDavid Ahern 	}
1108d3843fe5SWei Wang 
1109af52a52cSDavid Ahern out:
1110b1d40991SDavid Ahern 	trace_fib6_table_lookup(net, res.f6i, table, fl6);
1111af52a52cSDavid Ahern 
111266f5d6ceSWei Wang 	rcu_read_unlock();
1113b811580dSDavid Ahern 
11141da177e4SLinus Torvalds 	return rt;
1115c71099acSThomas Graf }
1116c71099acSThomas Graf 
1117ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1118b75cc8f9SDavid Ahern 				   const struct sk_buff *skb, int flags)
1119ea6e574eSFlorian Westphal {
1120b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1121ea6e574eSFlorian Westphal }
1122ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
1123ea6e574eSFlorian Westphal 
11249acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1125b75cc8f9SDavid Ahern 			    const struct in6_addr *saddr, int oif,
1126b75cc8f9SDavid Ahern 			    const struct sk_buff *skb, int strict)
1127c71099acSThomas Graf {
11284c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11294c9483b2SDavid S. Miller 		.flowi6_oif = oif,
11304c9483b2SDavid S. Miller 		.daddr = *daddr,
1131c71099acSThomas Graf 	};
1132c71099acSThomas Graf 	struct dst_entry *dst;
113377d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1134c71099acSThomas Graf 
1135adaa70bbSThomas Graf 	if (saddr) {
11364c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1137adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1138adaa70bbSThomas Graf 	}
1139adaa70bbSThomas Graf 
1140b75cc8f9SDavid Ahern 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1141c71099acSThomas Graf 	if (dst->error == 0)
1142c71099acSThomas Graf 		return (struct rt6_info *) dst;
1143c71099acSThomas Graf 
1144c71099acSThomas Graf 	dst_release(dst);
1145c71099acSThomas Graf 
11461da177e4SLinus Torvalds 	return NULL;
11471da177e4SLinus Torvalds }
11487159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
11497159039aSYOSHIFUJI Hideaki 
1150c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
11511cfb71eeSWei Wang  * It takes new route entry, the addition fails by any reason the
11521cfb71eeSWei Wang  * route is released.
11531cfb71eeSWei Wang  * Caller must hold dst before calling it.
11541da177e4SLinus Torvalds  */
11551da177e4SLinus Torvalds 
11568d1c802bSDavid Ahern static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1157333c4301SDavid Ahern 			struct netlink_ext_ack *extack)
11581da177e4SLinus Torvalds {
11591da177e4SLinus Torvalds 	int err;
1160c71099acSThomas Graf 	struct fib6_table *table;
11611da177e4SLinus Torvalds 
116293c2fb25SDavid Ahern 	table = rt->fib6_table;
116366f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
1164d4ead6b3SDavid Ahern 	err = fib6_add(&table->tb6_root, rt, info, extack);
116566f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
11661da177e4SLinus Torvalds 
11671da177e4SLinus Torvalds 	return err;
11681da177e4SLinus Torvalds }
11691da177e4SLinus Torvalds 
11708d1c802bSDavid Ahern int ip6_ins_rt(struct net *net, struct fib6_info *rt)
117140e22e8fSThomas Graf {
1172afb1d4b5SDavid Ahern 	struct nl_info info = {	.nl_net = net, };
1173e715b6d3SFlorian Westphal 
1174d4ead6b3SDavid Ahern 	return __ip6_ins_rt(rt, &info, NULL);
117540e22e8fSThomas Graf }
117640e22e8fSThomas Graf 
11778d1c802bSDavid Ahern static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
117821efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
1179b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
11801da177e4SLinus Torvalds {
11814832c30dSDavid Ahern 	struct net_device *dev;
11821da177e4SLinus Torvalds 	struct rt6_info *rt;
11831da177e4SLinus Torvalds 
11841da177e4SLinus Torvalds 	/*
11851da177e4SLinus Torvalds 	 *	Clone the route.
11861da177e4SLinus Torvalds 	 */
11871da177e4SLinus Torvalds 
1188e873e4b9SWei Wang 	if (!fib6_info_hold_safe(ort))
1189e873e4b9SWei Wang 		return NULL;
1190e873e4b9SWei Wang 
11914832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(ort);
119293531c67SDavid Ahern 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1193e873e4b9SWei Wang 	if (!rt) {
1194e873e4b9SWei Wang 		fib6_info_release(ort);
119583a09abdSMartin KaFai Lau 		return NULL;
1196e873e4b9SWei Wang 	}
119783a09abdSMartin KaFai Lau 
119883a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
11998b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
120083a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
120183a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
120283a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
12038b9df265SMartin KaFai Lau 
12048b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
120593c2fb25SDavid Ahern 		if (ort->fib6_dst.plen != 128 &&
120693c2fb25SDavid Ahern 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
120758c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
12081da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
12091da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
12104e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
12111da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
12121da177e4SLinus Torvalds 		}
12131da177e4SLinus Torvalds #endif
121495a9a5baSYOSHIFUJI Hideaki 	}
121595a9a5baSYOSHIFUJI Hideaki 
1216299d9939SYOSHIFUJI Hideaki 	return rt;
1217299d9939SYOSHIFUJI Hideaki }
1218299d9939SYOSHIFUJI Hideaki 
12198d1c802bSDavid Ahern static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1220d52d3997SMartin KaFai Lau {
12213b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
12224832c30dSDavid Ahern 	struct net_device *dev;
1223d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
1224d52d3997SMartin KaFai Lau 
1225e873e4b9SWei Wang 	if (!fib6_info_hold_safe(rt))
1226e873e4b9SWei Wang 		return NULL;
1227e873e4b9SWei Wang 
12284832c30dSDavid Ahern 	rcu_read_lock();
12294832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(rt);
123093531c67SDavid Ahern 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
12314832c30dSDavid Ahern 	rcu_read_unlock();
1232e873e4b9SWei Wang 	if (!pcpu_rt) {
1233e873e4b9SWei Wang 		fib6_info_release(rt);
1234d52d3997SMartin KaFai Lau 		return NULL;
1235e873e4b9SWei Wang 	}
1236d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
1237d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1238d52d3997SMartin KaFai Lau 	return pcpu_rt;
1239d52d3997SMartin KaFai Lau }
1240d52d3997SMartin KaFai Lau 
124166f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */
12428d1c802bSDavid Ahern static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1243d52d3997SMartin KaFai Lau {
1244a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
1245d52d3997SMartin KaFai Lau 
1246d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1247d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1248d52d3997SMartin KaFai Lau 
1249d4ead6b3SDavid Ahern 	if (pcpu_rt)
125010585b43SDavid Ahern 		ip6_hold_safe(NULL, &pcpu_rt);
1251d3843fe5SWei Wang 
1252a73e4195SMartin KaFai Lau 	return pcpu_rt;
1253a73e4195SMartin KaFai Lau }
1254a73e4195SMartin KaFai Lau 
1255afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net,
12568d1c802bSDavid Ahern 					    struct fib6_info *rt)
1257a73e4195SMartin KaFai Lau {
1258a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1259d52d3997SMartin KaFai Lau 
1260d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1261d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
12629c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
12639c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1264d52d3997SMartin KaFai Lau 	}
1265d52d3997SMartin KaFai Lau 
1266a94b9367SWei Wang 	dst_hold(&pcpu_rt->dst);
1267a73e4195SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1268d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1269951f788aSEric Dumazet 	BUG_ON(prev);
1270a94b9367SWei Wang 
1271d52d3997SMartin KaFai Lau 	return pcpu_rt;
1272d52d3997SMartin KaFai Lau }
1273d52d3997SMartin KaFai Lau 
127435732d01SWei Wang /* exception hash table implementation
127535732d01SWei Wang  */
127635732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock);
127735732d01SWei Wang 
127835732d01SWei Wang /* Remove rt6_ex from hash table and free the memory
127935732d01SWei Wang  * Caller must hold rt6_exception_lock
128035732d01SWei Wang  */
128135732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
128235732d01SWei Wang 				 struct rt6_exception *rt6_ex)
128335732d01SWei Wang {
1284f5b51fe8SPaolo Abeni 	struct fib6_info *from;
1285b2427e67SColin Ian King 	struct net *net;
128681eb8447SWei Wang 
128735732d01SWei Wang 	if (!bucket || !rt6_ex)
128835732d01SWei Wang 		return;
1289b2427e67SColin Ian King 
1290b2427e67SColin Ian King 	net = dev_net(rt6_ex->rt6i->dst.dev);
1291f5b51fe8SPaolo Abeni 	net->ipv6.rt6_stats->fib_rt_cache--;
1292f5b51fe8SPaolo Abeni 
1293f5b51fe8SPaolo Abeni 	/* purge completely the exception to allow releasing the held resources:
1294f5b51fe8SPaolo Abeni 	 * some [sk] cache may keep the dst around for unlimited time
1295f5b51fe8SPaolo Abeni 	 */
1296f5b51fe8SPaolo Abeni 	from = rcu_dereference_protected(rt6_ex->rt6i->from,
1297f5b51fe8SPaolo Abeni 					 lockdep_is_held(&rt6_exception_lock));
1298f5b51fe8SPaolo Abeni 	rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1299f5b51fe8SPaolo Abeni 	fib6_info_release(from);
1300f5b51fe8SPaolo Abeni 	dst_dev_put(&rt6_ex->rt6i->dst);
1301f5b51fe8SPaolo Abeni 
130235732d01SWei Wang 	hlist_del_rcu(&rt6_ex->hlist);
130377634cc6SDavid Ahern 	dst_release(&rt6_ex->rt6i->dst);
130435732d01SWei Wang 	kfree_rcu(rt6_ex, rcu);
130535732d01SWei Wang 	WARN_ON_ONCE(!bucket->depth);
130635732d01SWei Wang 	bucket->depth--;
130735732d01SWei Wang }
130835732d01SWei Wang 
130935732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory
131035732d01SWei Wang  * Caller must hold rt6_exception_lock
131135732d01SWei Wang  */
131235732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
131335732d01SWei Wang {
131435732d01SWei Wang 	struct rt6_exception *rt6_ex, *oldest = NULL;
131535732d01SWei Wang 
131635732d01SWei Wang 	if (!bucket)
131735732d01SWei Wang 		return;
131835732d01SWei Wang 
131935732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
132035732d01SWei Wang 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
132135732d01SWei Wang 			oldest = rt6_ex;
132235732d01SWei Wang 	}
132335732d01SWei Wang 	rt6_remove_exception(bucket, oldest);
132435732d01SWei Wang }
132535732d01SWei Wang 
132635732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst,
132735732d01SWei Wang 			      const struct in6_addr *src)
132835732d01SWei Wang {
132935732d01SWei Wang 	static u32 seed __read_mostly;
133035732d01SWei Wang 	u32 val;
133135732d01SWei Wang 
133235732d01SWei Wang 	net_get_random_once(&seed, sizeof(seed));
133335732d01SWei Wang 	val = jhash(dst, sizeof(*dst), seed);
133435732d01SWei Wang 
133535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
133635732d01SWei Wang 	if (src)
133735732d01SWei Wang 		val = jhash(src, sizeof(*src), val);
133835732d01SWei Wang #endif
133935732d01SWei Wang 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
134035732d01SWei Wang }
134135732d01SWei Wang 
134235732d01SWei Wang /* Helper function to find the cached rt in the hash table
134335732d01SWei Wang  * and update bucket pointer to point to the bucket for this
134435732d01SWei Wang  * (daddr, saddr) pair
134535732d01SWei Wang  * Caller must hold rt6_exception_lock
134635732d01SWei Wang  */
134735732d01SWei Wang static struct rt6_exception *
134835732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
134935732d01SWei Wang 			      const struct in6_addr *daddr,
135035732d01SWei Wang 			      const struct in6_addr *saddr)
135135732d01SWei Wang {
135235732d01SWei Wang 	struct rt6_exception *rt6_ex;
135335732d01SWei Wang 	u32 hval;
135435732d01SWei Wang 
135535732d01SWei Wang 	if (!(*bucket) || !daddr)
135635732d01SWei Wang 		return NULL;
135735732d01SWei Wang 
135835732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
135935732d01SWei Wang 	*bucket += hval;
136035732d01SWei Wang 
136135732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
136235732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
136335732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
136435732d01SWei Wang 
136535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
136635732d01SWei Wang 		if (matched && saddr)
136735732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
136835732d01SWei Wang #endif
136935732d01SWei Wang 		if (matched)
137035732d01SWei Wang 			return rt6_ex;
137135732d01SWei Wang 	}
137235732d01SWei Wang 	return NULL;
137335732d01SWei Wang }
137435732d01SWei Wang 
137535732d01SWei Wang /* Helper function to find the cached rt in the hash table
137635732d01SWei Wang  * and update bucket pointer to point to the bucket for this
137735732d01SWei Wang  * (daddr, saddr) pair
137835732d01SWei Wang  * Caller must hold rcu_read_lock()
137935732d01SWei Wang  */
138035732d01SWei Wang static struct rt6_exception *
138135732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
138235732d01SWei Wang 			 const struct in6_addr *daddr,
138335732d01SWei Wang 			 const struct in6_addr *saddr)
138435732d01SWei Wang {
138535732d01SWei Wang 	struct rt6_exception *rt6_ex;
138635732d01SWei Wang 	u32 hval;
138735732d01SWei Wang 
138835732d01SWei Wang 	WARN_ON_ONCE(!rcu_read_lock_held());
138935732d01SWei Wang 
139035732d01SWei Wang 	if (!(*bucket) || !daddr)
139135732d01SWei Wang 		return NULL;
139235732d01SWei Wang 
139335732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
139435732d01SWei Wang 	*bucket += hval;
139535732d01SWei Wang 
139635732d01SWei Wang 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
139735732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
139835732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
139935732d01SWei Wang 
140035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
140135732d01SWei Wang 		if (matched && saddr)
140235732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
140335732d01SWei Wang #endif
140435732d01SWei Wang 		if (matched)
140535732d01SWei Wang 			return rt6_ex;
140635732d01SWei Wang 	}
140735732d01SWei Wang 	return NULL;
140835732d01SWei Wang }
140935732d01SWei Wang 
14108d1c802bSDavid Ahern static unsigned int fib6_mtu(const struct fib6_info *rt)
141135732d01SWei Wang {
1412d4ead6b3SDavid Ahern 	unsigned int mtu;
1413d4ead6b3SDavid Ahern 
1414dcd1f572SDavid Ahern 	if (rt->fib6_pmtu) {
1415dcd1f572SDavid Ahern 		mtu = rt->fib6_pmtu;
1416dcd1f572SDavid Ahern 	} else {
1417dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
1418dcd1f572SDavid Ahern 		struct inet6_dev *idev;
1419dcd1f572SDavid Ahern 
1420dcd1f572SDavid Ahern 		rcu_read_lock();
1421dcd1f572SDavid Ahern 		idev = __in6_dev_get(dev);
1422dcd1f572SDavid Ahern 		mtu = idev->cnf.mtu6;
1423dcd1f572SDavid Ahern 		rcu_read_unlock();
1424dcd1f572SDavid Ahern 	}
1425dcd1f572SDavid Ahern 
1426d4ead6b3SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1427d4ead6b3SDavid Ahern 
1428ad1601aeSDavid Ahern 	return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu);
1429d4ead6b3SDavid Ahern }
1430d4ead6b3SDavid Ahern 
143135732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt,
14328d1c802bSDavid Ahern 				struct fib6_info *ort)
143335732d01SWei Wang {
14345e670d84SDavid Ahern 	struct net *net = dev_net(nrt->dst.dev);
143535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
143635732d01SWei Wang 	struct in6_addr *src_key = NULL;
143735732d01SWei Wang 	struct rt6_exception *rt6_ex;
143835732d01SWei Wang 	int err = 0;
143935732d01SWei Wang 
144035732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
144135732d01SWei Wang 
144235732d01SWei Wang 	if (ort->exception_bucket_flushed) {
144335732d01SWei Wang 		err = -EINVAL;
144435732d01SWei Wang 		goto out;
144535732d01SWei Wang 	}
144635732d01SWei Wang 
144735732d01SWei Wang 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
144835732d01SWei Wang 					lockdep_is_held(&rt6_exception_lock));
144935732d01SWei Wang 	if (!bucket) {
145035732d01SWei Wang 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
145135732d01SWei Wang 				 GFP_ATOMIC);
145235732d01SWei Wang 		if (!bucket) {
145335732d01SWei Wang 			err = -ENOMEM;
145435732d01SWei Wang 			goto out;
145535732d01SWei Wang 		}
145635732d01SWei Wang 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
145735732d01SWei Wang 	}
145835732d01SWei Wang 
145935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
146035732d01SWei Wang 	/* rt6i_src.plen != 0 indicates ort is in subtree
146135732d01SWei Wang 	 * and exception table is indexed by a hash of
146235732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
146335732d01SWei Wang 	 * Otherwise, the exception table is indexed by
146435732d01SWei Wang 	 * a hash of only rt6i_dst.
146535732d01SWei Wang 	 */
146693c2fb25SDavid Ahern 	if (ort->fib6_src.plen)
146735732d01SWei Wang 		src_key = &nrt->rt6i_src.addr;
146835732d01SWei Wang #endif
1469f5bbe7eeSWei Wang 	/* rt6_mtu_change() might lower mtu on ort.
1470f5bbe7eeSWei Wang 	 * Only insert this exception route if its mtu
1471f5bbe7eeSWei Wang 	 * is less than ort's mtu value.
1472f5bbe7eeSWei Wang 	 */
1473d4ead6b3SDavid Ahern 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1474f5bbe7eeSWei Wang 		err = -EINVAL;
1475f5bbe7eeSWei Wang 		goto out;
1476f5bbe7eeSWei Wang 	}
147760006a48SWei Wang 
147835732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
147935732d01SWei Wang 					       src_key);
148035732d01SWei Wang 	if (rt6_ex)
148135732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
148235732d01SWei Wang 
148335732d01SWei Wang 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
148435732d01SWei Wang 	if (!rt6_ex) {
148535732d01SWei Wang 		err = -ENOMEM;
148635732d01SWei Wang 		goto out;
148735732d01SWei Wang 	}
148835732d01SWei Wang 	rt6_ex->rt6i = nrt;
148935732d01SWei Wang 	rt6_ex->stamp = jiffies;
149035732d01SWei Wang 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
149135732d01SWei Wang 	bucket->depth++;
149281eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache++;
149335732d01SWei Wang 
149435732d01SWei Wang 	if (bucket->depth > FIB6_MAX_DEPTH)
149535732d01SWei Wang 		rt6_exception_remove_oldest(bucket);
149635732d01SWei Wang 
149735732d01SWei Wang out:
149835732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
149935732d01SWei Wang 
150035732d01SWei Wang 	/* Update fn->fn_sernum to invalidate all cached dst */
1501b886d5f2SPaolo Abeni 	if (!err) {
150293c2fb25SDavid Ahern 		spin_lock_bh(&ort->fib6_table->tb6_lock);
15037aef6859SDavid Ahern 		fib6_update_sernum(net, ort);
150493c2fb25SDavid Ahern 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1505b886d5f2SPaolo Abeni 		fib6_force_start_gc(net);
1506b886d5f2SPaolo Abeni 	}
150735732d01SWei Wang 
150835732d01SWei Wang 	return err;
150935732d01SWei Wang }
151035732d01SWei Wang 
15118d1c802bSDavid Ahern void rt6_flush_exceptions(struct fib6_info *rt)
151235732d01SWei Wang {
151335732d01SWei Wang 	struct rt6_exception_bucket *bucket;
151435732d01SWei Wang 	struct rt6_exception *rt6_ex;
151535732d01SWei Wang 	struct hlist_node *tmp;
151635732d01SWei Wang 	int i;
151735732d01SWei Wang 
151835732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
151935732d01SWei Wang 	/* Prevent rt6_insert_exception() to recreate the bucket list */
152035732d01SWei Wang 	rt->exception_bucket_flushed = 1;
152135732d01SWei Wang 
152235732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
152335732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
152435732d01SWei Wang 	if (!bucket)
152535732d01SWei Wang 		goto out;
152635732d01SWei Wang 
152735732d01SWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
152835732d01SWei Wang 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
152935732d01SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
153035732d01SWei Wang 		WARN_ON_ONCE(bucket->depth);
153135732d01SWei Wang 		bucket++;
153235732d01SWei Wang 	}
153335732d01SWei Wang 
153435732d01SWei Wang out:
153535732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
153635732d01SWei Wang }
153735732d01SWei Wang 
153835732d01SWei Wang /* Find cached rt in the hash table inside passed in rt
153935732d01SWei Wang  * Caller has to hold rcu_read_lock()
154035732d01SWei Wang  */
1541*7e4b5128SDavid Ahern static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
154235732d01SWei Wang 					   struct in6_addr *daddr,
154335732d01SWei Wang 					   struct in6_addr *saddr)
154435732d01SWei Wang {
154535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
154635732d01SWei Wang 	struct in6_addr *src_key = NULL;
154735732d01SWei Wang 	struct rt6_exception *rt6_ex;
1548*7e4b5128SDavid Ahern 	struct rt6_info *ret = NULL;
154935732d01SWei Wang 
1550*7e4b5128SDavid Ahern 	bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
155135732d01SWei Wang 
155235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
1553*7e4b5128SDavid Ahern 	/* fib6i_src.plen != 0 indicates f6i is in subtree
155435732d01SWei Wang 	 * and exception table is indexed by a hash of
1555*7e4b5128SDavid Ahern 	 * both fib6_dst and fib6_src.
155635732d01SWei Wang 	 * Otherwise, the exception table is indexed by
1557*7e4b5128SDavid Ahern 	 * a hash of only fib6_dst.
155835732d01SWei Wang 	 */
1559*7e4b5128SDavid Ahern 	if (res->f6i->fib6_src.plen)
156035732d01SWei Wang 		src_key = saddr;
156135732d01SWei Wang #endif
156235732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
156335732d01SWei Wang 
156435732d01SWei Wang 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1565*7e4b5128SDavid Ahern 		ret = rt6_ex->rt6i;
156635732d01SWei Wang 
1567*7e4b5128SDavid Ahern 	return ret;
156835732d01SWei Wang }
156935732d01SWei Wang 
157035732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */
157123fb93a4SDavid Ahern static int rt6_remove_exception_rt(struct rt6_info *rt)
157235732d01SWei Wang {
157335732d01SWei Wang 	struct rt6_exception_bucket *bucket;
157435732d01SWei Wang 	struct in6_addr *src_key = NULL;
157535732d01SWei Wang 	struct rt6_exception *rt6_ex;
15768a14e46fSDavid Ahern 	struct fib6_info *from;
157735732d01SWei Wang 	int err;
157835732d01SWei Wang 
1579091311deSEric Dumazet 	from = rcu_dereference(rt->from);
158035732d01SWei Wang 	if (!from ||
1581442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
158235732d01SWei Wang 		return -EINVAL;
158335732d01SWei Wang 
158435732d01SWei Wang 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
158535732d01SWei Wang 		return -ENOENT;
158635732d01SWei Wang 
158735732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
158835732d01SWei Wang 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
158935732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
159035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
159135732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
159235732d01SWei Wang 	 * and exception table is indexed by a hash of
159335732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
159435732d01SWei Wang 	 * Otherwise, the exception table is indexed by
159535732d01SWei Wang 	 * a hash of only rt6i_dst.
159635732d01SWei Wang 	 */
159793c2fb25SDavid Ahern 	if (from->fib6_src.plen)
159835732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
159935732d01SWei Wang #endif
160035732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
160135732d01SWei Wang 					       &rt->rt6i_dst.addr,
160235732d01SWei Wang 					       src_key);
160335732d01SWei Wang 	if (rt6_ex) {
160435732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
160535732d01SWei Wang 		err = 0;
160635732d01SWei Wang 	} else {
160735732d01SWei Wang 		err = -ENOENT;
160835732d01SWei Wang 	}
160935732d01SWei Wang 
161035732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
161135732d01SWei Wang 	return err;
161235732d01SWei Wang }
161335732d01SWei Wang 
161435732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and
161535732d01SWei Wang  * refresh its stamp
161635732d01SWei Wang  */
161735732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
161835732d01SWei Wang {
161935732d01SWei Wang 	struct rt6_exception_bucket *bucket;
162035732d01SWei Wang 	struct in6_addr *src_key = NULL;
162135732d01SWei Wang 	struct rt6_exception *rt6_ex;
1622193f3685SPaolo Abeni 	struct fib6_info *from;
162335732d01SWei Wang 
162435732d01SWei Wang 	rcu_read_lock();
1625193f3685SPaolo Abeni 	from = rcu_dereference(rt->from);
1626193f3685SPaolo Abeni 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1627193f3685SPaolo Abeni 		goto unlock;
1628193f3685SPaolo Abeni 
162935732d01SWei Wang 	bucket = rcu_dereference(from->rt6i_exception_bucket);
163035732d01SWei Wang 
163135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
163235732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
163335732d01SWei Wang 	 * and exception table is indexed by a hash of
163435732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
163535732d01SWei Wang 	 * Otherwise, the exception table is indexed by
163635732d01SWei Wang 	 * a hash of only rt6i_dst.
163735732d01SWei Wang 	 */
163893c2fb25SDavid Ahern 	if (from->fib6_src.plen)
163935732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
164035732d01SWei Wang #endif
164135732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket,
164235732d01SWei Wang 					  &rt->rt6i_dst.addr,
164335732d01SWei Wang 					  src_key);
164435732d01SWei Wang 	if (rt6_ex)
164535732d01SWei Wang 		rt6_ex->stamp = jiffies;
164635732d01SWei Wang 
1647193f3685SPaolo Abeni unlock:
164835732d01SWei Wang 	rcu_read_unlock();
164935732d01SWei Wang }
165035732d01SWei Wang 
1651e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1652e9fa1495SStefano Brivio 					 struct rt6_info *rt, int mtu)
1653e9fa1495SStefano Brivio {
1654e9fa1495SStefano Brivio 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1655e9fa1495SStefano Brivio 	 * lowest MTU in the path: always allow updating the route PMTU to
1656e9fa1495SStefano Brivio 	 * reflect PMTU decreases.
1657e9fa1495SStefano Brivio 	 *
1658e9fa1495SStefano Brivio 	 * If the new MTU is higher, and the route PMTU is equal to the local
1659e9fa1495SStefano Brivio 	 * MTU, this means the old MTU is the lowest in the path, so allow
1660e9fa1495SStefano Brivio 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1661e9fa1495SStefano Brivio 	 * handle this.
1662e9fa1495SStefano Brivio 	 */
1663e9fa1495SStefano Brivio 
1664e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) >= mtu)
1665e9fa1495SStefano Brivio 		return true;
1666e9fa1495SStefano Brivio 
1667e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1668e9fa1495SStefano Brivio 		return true;
1669e9fa1495SStefano Brivio 
1670e9fa1495SStefano Brivio 	return false;
1671e9fa1495SStefano Brivio }
1672e9fa1495SStefano Brivio 
1673e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
16748d1c802bSDavid Ahern 				       struct fib6_info *rt, int mtu)
1675f5bbe7eeSWei Wang {
1676f5bbe7eeSWei Wang 	struct rt6_exception_bucket *bucket;
1677f5bbe7eeSWei Wang 	struct rt6_exception *rt6_ex;
1678f5bbe7eeSWei Wang 	int i;
1679f5bbe7eeSWei Wang 
1680f5bbe7eeSWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1681f5bbe7eeSWei Wang 					lockdep_is_held(&rt6_exception_lock));
1682f5bbe7eeSWei Wang 
1683e9fa1495SStefano Brivio 	if (!bucket)
1684e9fa1495SStefano Brivio 		return;
1685e9fa1495SStefano Brivio 
1686f5bbe7eeSWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1687f5bbe7eeSWei Wang 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1688f5bbe7eeSWei Wang 			struct rt6_info *entry = rt6_ex->rt6i;
1689e9fa1495SStefano Brivio 
1690e9fa1495SStefano Brivio 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1691d4ead6b3SDavid Ahern 			 * route), the metrics of its rt->from have already
1692f5bbe7eeSWei Wang 			 * been updated.
1693f5bbe7eeSWei Wang 			 */
1694d4ead6b3SDavid Ahern 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1695e9fa1495SStefano Brivio 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1696d4ead6b3SDavid Ahern 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1697f5bbe7eeSWei Wang 		}
1698f5bbe7eeSWei Wang 		bucket++;
1699f5bbe7eeSWei Wang 	}
1700f5bbe7eeSWei Wang }
1701f5bbe7eeSWei Wang 
1702b16cb459SWei Wang #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1703b16cb459SWei Wang 
17048d1c802bSDavid Ahern static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1705b16cb459SWei Wang 					struct in6_addr *gateway)
1706b16cb459SWei Wang {
1707b16cb459SWei Wang 	struct rt6_exception_bucket *bucket;
1708b16cb459SWei Wang 	struct rt6_exception *rt6_ex;
1709b16cb459SWei Wang 	struct hlist_node *tmp;
1710b16cb459SWei Wang 	int i;
1711b16cb459SWei Wang 
1712b16cb459SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1713b16cb459SWei Wang 		return;
1714b16cb459SWei Wang 
1715b16cb459SWei Wang 	spin_lock_bh(&rt6_exception_lock);
1716b16cb459SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1717b16cb459SWei Wang 				     lockdep_is_held(&rt6_exception_lock));
1718b16cb459SWei Wang 
1719b16cb459SWei Wang 	if (bucket) {
1720b16cb459SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1721b16cb459SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1722b16cb459SWei Wang 						  &bucket->chain, hlist) {
1723b16cb459SWei Wang 				struct rt6_info *entry = rt6_ex->rt6i;
1724b16cb459SWei Wang 
1725b16cb459SWei Wang 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1726b16cb459SWei Wang 				    RTF_CACHE_GATEWAY &&
1727b16cb459SWei Wang 				    ipv6_addr_equal(gateway,
1728b16cb459SWei Wang 						    &entry->rt6i_gateway)) {
1729b16cb459SWei Wang 					rt6_remove_exception(bucket, rt6_ex);
1730b16cb459SWei Wang 				}
1731b16cb459SWei Wang 			}
1732b16cb459SWei Wang 			bucket++;
1733b16cb459SWei Wang 		}
1734b16cb459SWei Wang 	}
1735b16cb459SWei Wang 
1736b16cb459SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
1737b16cb459SWei Wang }
1738b16cb459SWei Wang 
1739c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1740c757faa8SWei Wang 				      struct rt6_exception *rt6_ex,
1741c757faa8SWei Wang 				      struct fib6_gc_args *gc_args,
1742c757faa8SWei Wang 				      unsigned long now)
1743c757faa8SWei Wang {
1744c757faa8SWei Wang 	struct rt6_info *rt = rt6_ex->rt6i;
1745c757faa8SWei Wang 
17461859bac0SPaolo Abeni 	/* we are pruning and obsoleting aged-out and non gateway exceptions
17471859bac0SPaolo Abeni 	 * even if others have still references to them, so that on next
17481859bac0SPaolo Abeni 	 * dst_check() such references can be dropped.
17491859bac0SPaolo Abeni 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
17501859bac0SPaolo Abeni 	 * expired, independently from their aging, as per RFC 8201 section 4
17511859bac0SPaolo Abeni 	 */
175231afeb42SWei Wang 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
175331afeb42SWei Wang 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1754c757faa8SWei Wang 			RT6_TRACE("aging clone %p\n", rt);
1755c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1756c757faa8SWei Wang 			return;
175731afeb42SWei Wang 		}
175831afeb42SWei Wang 	} else if (time_after(jiffies, rt->dst.expires)) {
175931afeb42SWei Wang 		RT6_TRACE("purging expired route %p\n", rt);
176031afeb42SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
176131afeb42SWei Wang 		return;
176231afeb42SWei Wang 	}
176331afeb42SWei Wang 
176431afeb42SWei Wang 	if (rt->rt6i_flags & RTF_GATEWAY) {
1765c757faa8SWei Wang 		struct neighbour *neigh;
1766c757faa8SWei Wang 		__u8 neigh_flags = 0;
1767c757faa8SWei Wang 
17681bfa26ffSEric Dumazet 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
17691bfa26ffSEric Dumazet 		if (neigh)
1770c757faa8SWei Wang 			neigh_flags = neigh->flags;
17711bfa26ffSEric Dumazet 
1772c757faa8SWei Wang 		if (!(neigh_flags & NTF_ROUTER)) {
1773c757faa8SWei Wang 			RT6_TRACE("purging route %p via non-router but gateway\n",
1774c757faa8SWei Wang 				  rt);
1775c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1776c757faa8SWei Wang 			return;
1777c757faa8SWei Wang 		}
1778c757faa8SWei Wang 	}
177931afeb42SWei Wang 
1780c757faa8SWei Wang 	gc_args->more++;
1781c757faa8SWei Wang }
1782c757faa8SWei Wang 
17838d1c802bSDavid Ahern void rt6_age_exceptions(struct fib6_info *rt,
1784c757faa8SWei Wang 			struct fib6_gc_args *gc_args,
1785c757faa8SWei Wang 			unsigned long now)
1786c757faa8SWei Wang {
1787c757faa8SWei Wang 	struct rt6_exception_bucket *bucket;
1788c757faa8SWei Wang 	struct rt6_exception *rt6_ex;
1789c757faa8SWei Wang 	struct hlist_node *tmp;
1790c757faa8SWei Wang 	int i;
1791c757faa8SWei Wang 
1792c757faa8SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1793c757faa8SWei Wang 		return;
1794c757faa8SWei Wang 
17951bfa26ffSEric Dumazet 	rcu_read_lock_bh();
17961bfa26ffSEric Dumazet 	spin_lock(&rt6_exception_lock);
1797c757faa8SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1798c757faa8SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
1799c757faa8SWei Wang 
1800c757faa8SWei Wang 	if (bucket) {
1801c757faa8SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1802c757faa8SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1803c757faa8SWei Wang 						  &bucket->chain, hlist) {
1804c757faa8SWei Wang 				rt6_age_examine_exception(bucket, rt6_ex,
1805c757faa8SWei Wang 							  gc_args, now);
1806c757faa8SWei Wang 			}
1807c757faa8SWei Wang 			bucket++;
1808c757faa8SWei Wang 		}
1809c757faa8SWei Wang 	}
18101bfa26ffSEric Dumazet 	spin_unlock(&rt6_exception_lock);
18111bfa26ffSEric Dumazet 	rcu_read_unlock_bh();
1812c757faa8SWei Wang }
1813c757faa8SWei Wang 
18141d053da9SDavid Ahern /* must be called with rcu lock held */
18151d053da9SDavid Ahern struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
18161d053da9SDavid Ahern 				    int oif, struct flowi6 *fl6, int strict)
18171da177e4SLinus Torvalds {
1818367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
18198d1c802bSDavid Ahern 	struct fib6_info *f6i;
18201da177e4SLinus Torvalds 
18216454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1822367efcb9SMartin KaFai Lau 	saved_fn = fn;
18231da177e4SLinus Torvalds 
1824ca254490SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1825ca254490SDavid Ahern 		oif = 0;
1826ca254490SDavid Ahern 
1827a3c00e46SMartin KaFai Lau redo_rt6_select:
182823fb93a4SDavid Ahern 	f6i = rt6_select(net, fn, oif, strict);
182923fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1830a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1831a3c00e46SMartin KaFai Lau 		if (fn)
1832a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1833367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1834367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1835367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1836367efcb9SMartin KaFai Lau 			fn = saved_fn;
1837367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1838367efcb9SMartin KaFai Lau 		}
1839a3c00e46SMartin KaFai Lau 	}
1840a3c00e46SMartin KaFai Lau 
1841d4bea421SDavid Ahern 	trace_fib6_table_lookup(net, f6i, table, fl6);
1842d52d3997SMartin KaFai Lau 
18431d053da9SDavid Ahern 	return f6i;
18441d053da9SDavid Ahern }
18451d053da9SDavid Ahern 
18461d053da9SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
18471d053da9SDavid Ahern 			       int oif, struct flowi6 *fl6,
18481d053da9SDavid Ahern 			       const struct sk_buff *skb, int flags)
18491d053da9SDavid Ahern {
1850b1d40991SDavid Ahern 	struct fib6_result res = {};
18511d053da9SDavid Ahern 	struct rt6_info *rt;
18521d053da9SDavid Ahern 	int strict = 0;
18531d053da9SDavid Ahern 
18541d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IFACE;
18551d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
18561d053da9SDavid Ahern 	if (net->ipv6.devconf_all->forwarding == 0)
18571d053da9SDavid Ahern 		strict |= RT6_LOOKUP_F_REACHABLE;
18581d053da9SDavid Ahern 
18591d053da9SDavid Ahern 	rcu_read_lock();
18601d053da9SDavid Ahern 
1861b1d40991SDavid Ahern 	res.f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1862b1d40991SDavid Ahern 	if (res.f6i == net->ipv6.fib6_null_entry) {
1863421842edSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
186466f5d6ceSWei Wang 		rcu_read_unlock();
1865d3843fe5SWei Wang 		dst_hold(&rt->dst);
1866d3843fe5SWei Wang 		return rt;
1867d3843fe5SWei Wang 	}
186823fb93a4SDavid Ahern 
1869b1d40991SDavid Ahern 	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
1870d83009d4SDavid Ahern 
187123fb93a4SDavid Ahern 	/*Search through exception table */
1872*7e4b5128SDavid Ahern 	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
187323fb93a4SDavid Ahern 	if (rt) {
187410585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
18751da177e4SLinus Torvalds 			dst_use_noref(&rt->dst, jiffies);
1876d4ead6b3SDavid Ahern 
187766f5d6ceSWei Wang 		rcu_read_unlock();
1878d52d3997SMartin KaFai Lau 		return rt;
18793da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1880b1d40991SDavid Ahern 			    !res.nh->fib_nh_gw_family)) {
18813da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
18823da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
18833da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
18843da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
18853da59bd9SMartin KaFai Lau 		 */
18863da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
18873da59bd9SMartin KaFai Lau 
1888b1d40991SDavid Ahern 		uncached_rt = ip6_rt_cache_alloc(res.f6i, &fl6->daddr, NULL);
1889d52d3997SMartin KaFai Lau 
18904d85cd0cSDavid Ahern 		rcu_read_unlock();
18913da59bd9SMartin KaFai Lau 
18921cfb71eeSWei Wang 		if (uncached_rt) {
18931cfb71eeSWei Wang 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
18941cfb71eeSWei Wang 			 * No need for another dst_hold()
18951cfb71eeSWei Wang 			 */
18968d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
189781eb8447SWei Wang 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
18981cfb71eeSWei Wang 		} else {
18993da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
19003da59bd9SMartin KaFai Lau 			dst_hold(&uncached_rt->dst);
19011cfb71eeSWei Wang 		}
1902b811580dSDavid Ahern 
19033da59bd9SMartin KaFai Lau 		return uncached_rt;
1904d52d3997SMartin KaFai Lau 	} else {
1905d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1906d52d3997SMartin KaFai Lau 
1907d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1908d52d3997SMartin KaFai Lau 
1909951f788aSEric Dumazet 		local_bh_disable();
1910b1d40991SDavid Ahern 		pcpu_rt = rt6_get_pcpu_route(res.f6i);
1911d52d3997SMartin KaFai Lau 
191293531c67SDavid Ahern 		if (!pcpu_rt)
1913b1d40991SDavid Ahern 			pcpu_rt = rt6_make_pcpu_route(net, res.f6i);
191493531c67SDavid Ahern 
1915951f788aSEric Dumazet 		local_bh_enable();
1916951f788aSEric Dumazet 		rcu_read_unlock();
1917d4bea421SDavid Ahern 
1918d52d3997SMartin KaFai Lau 		return pcpu_rt;
1919d52d3997SMartin KaFai Lau 	}
1920c71099acSThomas Graf }
19219ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route);
1922c71099acSThomas Graf 
1923b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net,
1924b75cc8f9SDavid Ahern 					    struct fib6_table *table,
1925b75cc8f9SDavid Ahern 					    struct flowi6 *fl6,
1926b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
1927b75cc8f9SDavid Ahern 					    int flags)
19284acad72dSPavel Emelyanov {
1929b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
19304acad72dSPavel Emelyanov }
19314acad72dSPavel Emelyanov 
1932d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net,
193372331bc0SShmulik Ladkani 					 struct net_device *dev,
1934b75cc8f9SDavid Ahern 					 struct flowi6 *fl6,
1935b75cc8f9SDavid Ahern 					 const struct sk_buff *skb,
1936b75cc8f9SDavid Ahern 					 int flags)
193772331bc0SShmulik Ladkani {
193872331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
193972331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
194072331bc0SShmulik Ladkani 
1941b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
194272331bc0SShmulik Ladkani }
1943d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
194472331bc0SShmulik Ladkani 
194523aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb,
19465e5d6fedSRoopa Prabhu 				  struct flow_keys *keys,
19475e5d6fedSRoopa Prabhu 				  struct flow_keys *flkeys)
194823aebdacSJakub Sitnicki {
194923aebdacSJakub Sitnicki 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
195023aebdacSJakub Sitnicki 	const struct ipv6hdr *key_iph = outer_iph;
19515e5d6fedSRoopa Prabhu 	struct flow_keys *_flkeys = flkeys;
195223aebdacSJakub Sitnicki 	const struct ipv6hdr *inner_iph;
195323aebdacSJakub Sitnicki 	const struct icmp6hdr *icmph;
195423aebdacSJakub Sitnicki 	struct ipv6hdr _inner_iph;
1955cea67a2dSEric Dumazet 	struct icmp6hdr _icmph;
195623aebdacSJakub Sitnicki 
195723aebdacSJakub Sitnicki 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
195823aebdacSJakub Sitnicki 		goto out;
195923aebdacSJakub Sitnicki 
1960cea67a2dSEric Dumazet 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1961cea67a2dSEric Dumazet 				   sizeof(_icmph), &_icmph);
1962cea67a2dSEric Dumazet 	if (!icmph)
1963cea67a2dSEric Dumazet 		goto out;
1964cea67a2dSEric Dumazet 
196523aebdacSJakub Sitnicki 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
196623aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
196723aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
196823aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
196923aebdacSJakub Sitnicki 		goto out;
197023aebdacSJakub Sitnicki 
197123aebdacSJakub Sitnicki 	inner_iph = skb_header_pointer(skb,
197223aebdacSJakub Sitnicki 				       skb_transport_offset(skb) + sizeof(*icmph),
197323aebdacSJakub Sitnicki 				       sizeof(_inner_iph), &_inner_iph);
197423aebdacSJakub Sitnicki 	if (!inner_iph)
197523aebdacSJakub Sitnicki 		goto out;
197623aebdacSJakub Sitnicki 
197723aebdacSJakub Sitnicki 	key_iph = inner_iph;
19785e5d6fedSRoopa Prabhu 	_flkeys = NULL;
197923aebdacSJakub Sitnicki out:
19805e5d6fedSRoopa Prabhu 	if (_flkeys) {
19815e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
19825e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
19835e5d6fedSRoopa Prabhu 		keys->tags.flow_label = _flkeys->tags.flow_label;
19845e5d6fedSRoopa Prabhu 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
19855e5d6fedSRoopa Prabhu 	} else {
198623aebdacSJakub Sitnicki 		keys->addrs.v6addrs.src = key_iph->saddr;
198723aebdacSJakub Sitnicki 		keys->addrs.v6addrs.dst = key_iph->daddr;
1988fa1be7e0SMichal Kubecek 		keys->tags.flow_label = ip6_flowlabel(key_iph);
198923aebdacSJakub Sitnicki 		keys->basic.ip_proto = key_iph->nexthdr;
199023aebdacSJakub Sitnicki 	}
19915e5d6fedSRoopa Prabhu }
199223aebdacSJakub Sitnicki 
199323aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */
1994b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1995b4bac172SDavid Ahern 		       const struct sk_buff *skb, struct flow_keys *flkeys)
199623aebdacSJakub Sitnicki {
199723aebdacSJakub Sitnicki 	struct flow_keys hash_keys;
19989a2a537aSDavid Ahern 	u32 mhash;
199923aebdacSJakub Sitnicki 
2000bbfa047aSDavid S. Miller 	switch (ip6_multipath_hash_policy(net)) {
2001b4bac172SDavid Ahern 	case 0:
20026f74b6c2SDavid Ahern 		memset(&hash_keys, 0, sizeof(hash_keys));
20036f74b6c2SDavid Ahern 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
20049a2a537aSDavid Ahern 		if (skb) {
20055e5d6fedSRoopa Prabhu 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
20069a2a537aSDavid Ahern 		} else {
20079a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
20089a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2009fa1be7e0SMichal Kubecek 			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
20109a2a537aSDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
201123aebdacSJakub Sitnicki 		}
2012b4bac172SDavid Ahern 		break;
2013b4bac172SDavid Ahern 	case 1:
2014b4bac172SDavid Ahern 		if (skb) {
2015b4bac172SDavid Ahern 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2016b4bac172SDavid Ahern 			struct flow_keys keys;
2017b4bac172SDavid Ahern 
2018b4bac172SDavid Ahern 			/* short-circuit if we already have L4 hash present */
2019b4bac172SDavid Ahern 			if (skb->l4_hash)
2020b4bac172SDavid Ahern 				return skb_get_hash_raw(skb) >> 1;
2021b4bac172SDavid Ahern 
2022b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2023b4bac172SDavid Ahern 
2024b4bac172SDavid Ahern                         if (!flkeys) {
2025b4bac172SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2026b4bac172SDavid Ahern 				flkeys = &keys;
2027b4bac172SDavid Ahern 			}
2028b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2029b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2030b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2031b4bac172SDavid Ahern 			hash_keys.ports.src = flkeys->ports.src;
2032b4bac172SDavid Ahern 			hash_keys.ports.dst = flkeys->ports.dst;
2033b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2034b4bac172SDavid Ahern 		} else {
2035b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2036b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2037b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2038b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2039b4bac172SDavid Ahern 			hash_keys.ports.src = fl6->fl6_sport;
2040b4bac172SDavid Ahern 			hash_keys.ports.dst = fl6->fl6_dport;
2041b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2042b4bac172SDavid Ahern 		}
2043b4bac172SDavid Ahern 		break;
2044b4bac172SDavid Ahern 	}
20459a2a537aSDavid Ahern 	mhash = flow_hash_from_keys(&hash_keys);
204623aebdacSJakub Sitnicki 
20479a2a537aSDavid Ahern 	return mhash >> 1;
204823aebdacSJakub Sitnicki }
204923aebdacSJakub Sitnicki 
2050c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
2051c71099acSThomas Graf {
2052b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2053c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
2054adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2055904af04dSJiri Benc 	struct ip_tunnel_info *tun_info;
20564c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
2057e0d56fddSDavid Ahern 		.flowi6_iif = skb->dev->ifindex,
20584c9483b2SDavid S. Miller 		.daddr = iph->daddr,
20594c9483b2SDavid S. Miller 		.saddr = iph->saddr,
20606502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
20614c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
20624c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
2063c71099acSThomas Graf 	};
20645e5d6fedSRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
2065adaa70bbSThomas Graf 
2066904af04dSJiri Benc 	tun_info = skb_tunnel_info(skb);
206746fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2068904af04dSJiri Benc 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
20695e5d6fedSRoopa Prabhu 
20705e5d6fedSRoopa Prabhu 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
20715e5d6fedSRoopa Prabhu 		flkeys = &_flkeys;
20725e5d6fedSRoopa Prabhu 
207323aebdacSJakub Sitnicki 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2074b4bac172SDavid Ahern 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
207506e9d040SJiri Benc 	skb_dst_drop(skb);
2076b75cc8f9SDavid Ahern 	skb_dst_set(skb,
2077b75cc8f9SDavid Ahern 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2078c71099acSThomas Graf }
2079c71099acSThomas Graf 
2080b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net,
2081b75cc8f9SDavid Ahern 					     struct fib6_table *table,
2082b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
2083b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2084b75cc8f9SDavid Ahern 					     int flags)
2085c71099acSThomas Graf {
2086b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2087c71099acSThomas Graf }
2088c71099acSThomas Graf 
20896f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
20906f21c96aSPaolo Abeni 					 struct flowi6 *fl6, int flags)
2091c71099acSThomas Graf {
2092d46a9d67SDavid Ahern 	bool any_src;
2093c71099acSThomas Graf 
20943ede0bbcSRobert Shearman 	if (ipv6_addr_type(&fl6->daddr) &
20953ede0bbcSRobert Shearman 	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
20964c1feac5SDavid Ahern 		struct dst_entry *dst;
20974c1feac5SDavid Ahern 
20984c1feac5SDavid Ahern 		dst = l3mdev_link_scope_lookup(net, fl6);
2099ca254490SDavid Ahern 		if (dst)
2100ca254490SDavid Ahern 			return dst;
21014c1feac5SDavid Ahern 	}
2102ca254490SDavid Ahern 
21031fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
21044dc27d1cSDavid McCullough 
2105d46a9d67SDavid Ahern 	any_src = ipv6_addr_any(&fl6->saddr);
2106741a11d9SDavid Ahern 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2107d46a9d67SDavid Ahern 	    (fl6->flowi6_oif && any_src))
210877d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
2109c71099acSThomas Graf 
2110d46a9d67SDavid Ahern 	if (!any_src)
2111adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
21120c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
21130c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2114adaa70bbSThomas Graf 
2115b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
21161da177e4SLinus Torvalds }
21176f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags);
21181da177e4SLinus Torvalds 
21192774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
212014e50e57SDavid S. Miller {
21215c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
21221dbe3252SWei Wang 	struct net_device *loopback_dev = net->loopback_dev;
212314e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
212414e50e57SDavid S. Miller 
21251dbe3252SWei Wang 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
212662cf27e5SSteffen Klassert 		       DST_OBSOLETE_DEAD, 0);
212714e50e57SDavid S. Miller 	if (rt) {
21280a1f5962SMartin KaFai Lau 		rt6_info_init(rt);
212981eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
21300a1f5962SMartin KaFai Lau 
2131d8d1f30bSChangli Gao 		new = &rt->dst;
213214e50e57SDavid S. Miller 		new->__use = 1;
2133352e512cSHerbert Xu 		new->input = dst_discard;
2134ede2059dSEric W. Biederman 		new->output = dst_discard_out;
213514e50e57SDavid S. Miller 
2136defb3519SDavid S. Miller 		dst_copy_metrics(new, &ort->dst);
213714e50e57SDavid S. Miller 
21381dbe3252SWei Wang 		rt->rt6i_idev = in6_dev_get(loopback_dev);
21394e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
21400a1f5962SMartin KaFai Lau 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
214114e50e57SDavid S. Miller 
214214e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
214314e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
214414e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
214514e50e57SDavid S. Miller #endif
214614e50e57SDavid S. Miller 	}
214714e50e57SDavid S. Miller 
214869ead7afSDavid S. Miller 	dst_release(dst_orig);
214969ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
215014e50e57SDavid S. Miller }
215114e50e57SDavid S. Miller 
21521da177e4SLinus Torvalds /*
21531da177e4SLinus Torvalds  *	Destination cache support functions
21541da177e4SLinus Torvalds  */
21551da177e4SLinus Torvalds 
21568d1c802bSDavid Ahern static bool fib6_check(struct fib6_info *f6i, u32 cookie)
21573da59bd9SMartin KaFai Lau {
215836143645SSteffen Klassert 	u32 rt_cookie = 0;
2159c5cff856SWei Wang 
21608ae86971SDavid Ahern 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
216193531c67SDavid Ahern 		return false;
216293531c67SDavid Ahern 
216393531c67SDavid Ahern 	if (fib6_check_expired(f6i))
216493531c67SDavid Ahern 		return false;
216593531c67SDavid Ahern 
216693531c67SDavid Ahern 	return true;
216793531c67SDavid Ahern }
216893531c67SDavid Ahern 
2169a68886a6SDavid Ahern static struct dst_entry *rt6_check(struct rt6_info *rt,
2170a68886a6SDavid Ahern 				   struct fib6_info *from,
2171a68886a6SDavid Ahern 				   u32 cookie)
21723da59bd9SMartin KaFai Lau {
2173c5cff856SWei Wang 	u32 rt_cookie = 0;
2174c5cff856SWei Wang 
2175a68886a6SDavid Ahern 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
217693531c67SDavid Ahern 	    rt_cookie != cookie)
21773da59bd9SMartin KaFai Lau 		return NULL;
21783da59bd9SMartin KaFai Lau 
21793da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
21803da59bd9SMartin KaFai Lau 		return NULL;
21813da59bd9SMartin KaFai Lau 
21823da59bd9SMartin KaFai Lau 	return &rt->dst;
21833da59bd9SMartin KaFai Lau }
21843da59bd9SMartin KaFai Lau 
2185a68886a6SDavid Ahern static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2186a68886a6SDavid Ahern 					    struct fib6_info *from,
2187a68886a6SDavid Ahern 					    u32 cookie)
21883da59bd9SMartin KaFai Lau {
21895973fb1eSMartin KaFai Lau 	if (!__rt6_check_expired(rt) &&
21905973fb1eSMartin KaFai Lau 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2191a68886a6SDavid Ahern 	    fib6_check(from, cookie))
21923da59bd9SMartin KaFai Lau 		return &rt->dst;
21933da59bd9SMartin KaFai Lau 	else
21943da59bd9SMartin KaFai Lau 		return NULL;
21953da59bd9SMartin KaFai Lau }
21963da59bd9SMartin KaFai Lau 
21971da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
21981da177e4SLinus Torvalds {
2199a87b7dc9SDavid Ahern 	struct dst_entry *dst_ret;
2200a68886a6SDavid Ahern 	struct fib6_info *from;
22011da177e4SLinus Torvalds 	struct rt6_info *rt;
22021da177e4SLinus Torvalds 
2203a87b7dc9SDavid Ahern 	rt = container_of(dst, struct rt6_info, dst);
2204a87b7dc9SDavid Ahern 
2205a87b7dc9SDavid Ahern 	rcu_read_lock();
22061da177e4SLinus Torvalds 
22076f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
22086f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
22096f3118b5SNicolas Dichtel 	 * into this function always.
22106f3118b5SNicolas Dichtel 	 */
2211e3bc10bdSHannes Frederic Sowa 
2212a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
22134b32b5adSMartin KaFai Lau 
2214a68886a6SDavid Ahern 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2215a68886a6SDavid Ahern 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2216a68886a6SDavid Ahern 		dst_ret = rt6_dst_from_check(rt, from, cookie);
22173da59bd9SMartin KaFai Lau 	else
2218a68886a6SDavid Ahern 		dst_ret = rt6_check(rt, from, cookie);
2219a87b7dc9SDavid Ahern 
2220a87b7dc9SDavid Ahern 	rcu_read_unlock();
2221a87b7dc9SDavid Ahern 
2222a87b7dc9SDavid Ahern 	return dst_ret;
22231da177e4SLinus Torvalds }
22241da177e4SLinus Torvalds 
22251da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
22261da177e4SLinus Torvalds {
22271da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
22281da177e4SLinus Torvalds 
22291da177e4SLinus Torvalds 	if (rt) {
223054c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
2231c3c14da0SDavid Ahern 			rcu_read_lock();
223254c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
223393531c67SDavid Ahern 				rt6_remove_exception_rt(rt);
223454c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
22351da177e4SLinus Torvalds 			}
2236c3c14da0SDavid Ahern 			rcu_read_unlock();
223754c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
223854c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
223954c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
224054c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
224154c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
224254c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
22431da177e4SLinus Torvalds }
22441da177e4SLinus Torvalds 
22451da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
22461da177e4SLinus Torvalds {
22471da177e4SLinus Torvalds 	struct rt6_info *rt;
22481da177e4SLinus Torvalds 
22493ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
22501da177e4SLinus Torvalds 
2251adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
22521da177e4SLinus Torvalds 	if (rt) {
22538a14e46fSDavid Ahern 		rcu_read_lock();
22541eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
225593531c67SDavid Ahern 			rt6_remove_exception_rt(rt);
2256c5cff856SWei Wang 		} else {
2257a68886a6SDavid Ahern 			struct fib6_info *from;
2258c5cff856SWei Wang 			struct fib6_node *fn;
2259c5cff856SWei Wang 
2260a68886a6SDavid Ahern 			from = rcu_dereference(rt->from);
2261a68886a6SDavid Ahern 			if (from) {
2262a68886a6SDavid Ahern 				fn = rcu_dereference(from->fib6_node);
2263c5cff856SWei Wang 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2264c5cff856SWei Wang 					fn->fn_sernum = -1;
2265a68886a6SDavid Ahern 			}
22661da177e4SLinus Torvalds 		}
22671da177e4SLinus Torvalds 		rcu_read_unlock();
22681da177e4SLinus Torvalds 	}
22691da177e4SLinus Torvalds }
22701da177e4SLinus Torvalds 
22716a3e030fSDavid Ahern static void rt6_update_expires(struct rt6_info *rt0, int timeout)
22726a3e030fSDavid Ahern {
2273a68886a6SDavid Ahern 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2274a68886a6SDavid Ahern 		struct fib6_info *from;
2275a68886a6SDavid Ahern 
2276a68886a6SDavid Ahern 		rcu_read_lock();
2277a68886a6SDavid Ahern 		from = rcu_dereference(rt0->from);
2278a68886a6SDavid Ahern 		if (from)
2279a68886a6SDavid Ahern 			rt0->dst.expires = from->expires;
2280a68886a6SDavid Ahern 		rcu_read_unlock();
2281a68886a6SDavid Ahern 	}
22826a3e030fSDavid Ahern 
22836a3e030fSDavid Ahern 	dst_set_expires(&rt0->dst, timeout);
22846a3e030fSDavid Ahern 	rt0->rt6i_flags |= RTF_EXPIRES;
22856700c270SDavid S. Miller }
22861da177e4SLinus Torvalds 
228745e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
228845e4fd26SMartin KaFai Lau {
228945e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
229045e4fd26SMartin KaFai Lau 
2291d4ead6b3SDavid Ahern 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
229245e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
229345e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
229445e4fd26SMartin KaFai Lau }
229545e4fd26SMartin KaFai Lau 
22960d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
22970d3f6d29SMartin KaFai Lau {
22980d3f6d29SMartin KaFai Lau 	return !(rt->rt6i_flags & RTF_CACHE) &&
22991490ed2aSPaolo Abeni 		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
23000d3f6d29SMartin KaFai Lau }
23010d3f6d29SMartin KaFai Lau 
230245e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
230345e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
23041da177e4SLinus Torvalds {
23050dec879fSJulian Anastasov 	const struct in6_addr *daddr, *saddr;
23061da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
23071da177e4SLinus Torvalds 
230819bda36cSXin Long 	if (dst_metric_locked(dst, RTAX_MTU))
230919bda36cSXin Long 		return;
231019bda36cSXin Long 
231145e4fd26SMartin KaFai Lau 	if (iph) {
231245e4fd26SMartin KaFai Lau 		daddr = &iph->daddr;
231345e4fd26SMartin KaFai Lau 		saddr = &iph->saddr;
231445e4fd26SMartin KaFai Lau 	} else if (sk) {
231545e4fd26SMartin KaFai Lau 		daddr = &sk->sk_v6_daddr;
231645e4fd26SMartin KaFai Lau 		saddr = &inet6_sk(sk)->saddr;
231745e4fd26SMartin KaFai Lau 	} else {
23180dec879fSJulian Anastasov 		daddr = NULL;
23190dec879fSJulian Anastasov 		saddr = NULL;
23201da177e4SLinus Torvalds 	}
23210dec879fSJulian Anastasov 	dst_confirm_neigh(dst, daddr);
23220dec879fSJulian Anastasov 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
23230dec879fSJulian Anastasov 	if (mtu >= dst_mtu(dst))
23240dec879fSJulian Anastasov 		return;
23250dec879fSJulian Anastasov 
23260dec879fSJulian Anastasov 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
23270dec879fSJulian Anastasov 		rt6_do_update_pmtu(rt6, mtu);
23282b760fcfSWei Wang 		/* update rt6_ex->stamp for cache */
23292b760fcfSWei Wang 		if (rt6->rt6i_flags & RTF_CACHE)
23302b760fcfSWei Wang 			rt6_update_exception_stamp_rt(rt6);
23310dec879fSJulian Anastasov 	} else if (daddr) {
2332a68886a6SDavid Ahern 		struct fib6_info *from;
23330dec879fSJulian Anastasov 		struct rt6_info *nrt6;
23340dec879fSJulian Anastasov 
23354d85cd0cSDavid Ahern 		rcu_read_lock();
2336a68886a6SDavid Ahern 		from = rcu_dereference(rt6->from);
23379c69a132SJonathan Lemon 		if (!from) {
23389c69a132SJonathan Lemon 			rcu_read_unlock();
23399c69a132SJonathan Lemon 			return;
23409c69a132SJonathan Lemon 		}
2341a68886a6SDavid Ahern 		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
234245e4fd26SMartin KaFai Lau 		if (nrt6) {
234345e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
2344a68886a6SDavid Ahern 			if (rt6_insert_exception(nrt6, from))
23452b760fcfSWei Wang 				dst_release_immediate(&nrt6->dst);
234645e4fd26SMartin KaFai Lau 		}
2347a68886a6SDavid Ahern 		rcu_read_unlock();
234845e4fd26SMartin KaFai Lau 	}
234945e4fd26SMartin KaFai Lau }
235045e4fd26SMartin KaFai Lau 
235145e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
235245e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
235345e4fd26SMartin KaFai Lau {
235445e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
23551da177e4SLinus Torvalds }
23561da177e4SLinus Torvalds 
235742ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2358e2d118a1SLorenzo Colitti 		     int oif, u32 mark, kuid_t uid)
235981aded24SDavid S. Miller {
236081aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
236181aded24SDavid S. Miller 	struct dst_entry *dst;
2362dc92095dSMaciej Żenczykowski 	struct flowi6 fl6 = {
2363dc92095dSMaciej Żenczykowski 		.flowi6_oif = oif,
2364dc92095dSMaciej Żenczykowski 		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2365dc92095dSMaciej Żenczykowski 		.daddr = iph->daddr,
2366dc92095dSMaciej Żenczykowski 		.saddr = iph->saddr,
2367dc92095dSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
2368dc92095dSMaciej Żenczykowski 		.flowi6_uid = uid,
2369dc92095dSMaciej Żenczykowski 	};
237081aded24SDavid S. Miller 
237181aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
237281aded24SDavid S. Miller 	if (!dst->error)
237345e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
237481aded24SDavid S. Miller 	dst_release(dst);
237581aded24SDavid S. Miller }
237681aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
237781aded24SDavid S. Miller 
237881aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
237981aded24SDavid S. Miller {
23807ddacfa5SDavid Ahern 	int oif = sk->sk_bound_dev_if;
238133c162a9SMartin KaFai Lau 	struct dst_entry *dst;
238233c162a9SMartin KaFai Lau 
23837ddacfa5SDavid Ahern 	if (!oif && skb->dev)
23847ddacfa5SDavid Ahern 		oif = l3mdev_master_ifindex(skb->dev);
23857ddacfa5SDavid Ahern 
23867ddacfa5SDavid Ahern 	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
238733c162a9SMartin KaFai Lau 
238833c162a9SMartin KaFai Lau 	dst = __sk_dst_get(sk);
238933c162a9SMartin KaFai Lau 	if (!dst || !dst->obsolete ||
239033c162a9SMartin KaFai Lau 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
239133c162a9SMartin KaFai Lau 		return;
239233c162a9SMartin KaFai Lau 
239333c162a9SMartin KaFai Lau 	bh_lock_sock(sk);
239433c162a9SMartin KaFai Lau 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
239533c162a9SMartin KaFai Lau 		ip6_datagram_dst_update(sk, false);
239633c162a9SMartin KaFai Lau 	bh_unlock_sock(sk);
239781aded24SDavid S. Miller }
239881aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
239981aded24SDavid S. Miller 
24007d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
24017d6850f7SAlexey Kodanev 			   const struct flowi6 *fl6)
24027d6850f7SAlexey Kodanev {
24037d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
24047d6850f7SAlexey Kodanev 	struct ipv6_pinfo *np = inet6_sk(sk);
24057d6850f7SAlexey Kodanev #endif
24067d6850f7SAlexey Kodanev 
24077d6850f7SAlexey Kodanev 	ip6_dst_store(sk, dst,
24087d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
24097d6850f7SAlexey Kodanev 		      &sk->sk_v6_daddr : NULL,
24107d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
24117d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
24127d6850f7SAlexey Kodanev 		      &np->saddr :
24137d6850f7SAlexey Kodanev #endif
24147d6850f7SAlexey Kodanev 		      NULL);
24157d6850f7SAlexey Kodanev }
24167d6850f7SAlexey Kodanev 
24170b34eb00SDavid Ahern static bool ip6_redirect_nh_match(struct fib6_info *f6i,
24180b34eb00SDavid Ahern 				  struct fib6_nh *nh,
24190b34eb00SDavid Ahern 				  struct flowi6 *fl6,
24200b34eb00SDavid Ahern 				  const struct in6_addr *gw,
24210b34eb00SDavid Ahern 				  struct rt6_info **ret)
24220b34eb00SDavid Ahern {
24230b34eb00SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
24240b34eb00SDavid Ahern 	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
24250b34eb00SDavid Ahern 		return false;
24260b34eb00SDavid Ahern 
24270b34eb00SDavid Ahern 	/* rt_cache's gateway might be different from its 'parent'
24280b34eb00SDavid Ahern 	 * in the case of an ip redirect.
24290b34eb00SDavid Ahern 	 * So we keep searching in the exception table if the gateway
24300b34eb00SDavid Ahern 	 * is different.
24310b34eb00SDavid Ahern 	 */
24320b34eb00SDavid Ahern 	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
2433*7e4b5128SDavid Ahern 		struct fib6_result res = {
2434*7e4b5128SDavid Ahern 			.f6i = f6i,
2435*7e4b5128SDavid Ahern 		};
24360b34eb00SDavid Ahern 		struct rt6_info *rt_cache;
24370b34eb00SDavid Ahern 
2438*7e4b5128SDavid Ahern 		rt_cache = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
24390b34eb00SDavid Ahern 		if (rt_cache &&
24400b34eb00SDavid Ahern 		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
24410b34eb00SDavid Ahern 			*ret = rt_cache;
24420b34eb00SDavid Ahern 			return true;
24430b34eb00SDavid Ahern 		}
24440b34eb00SDavid Ahern 		return false;
24450b34eb00SDavid Ahern 	}
24460b34eb00SDavid Ahern 	return true;
24470b34eb00SDavid Ahern }
24480b34eb00SDavid Ahern 
2449b55b76b2SDuan Jiong /* Handle redirects */
2450b55b76b2SDuan Jiong struct ip6rd_flowi {
2451b55b76b2SDuan Jiong 	struct flowi6 fl6;
2452b55b76b2SDuan Jiong 	struct in6_addr gateway;
2453b55b76b2SDuan Jiong };
2454b55b76b2SDuan Jiong 
2455b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
2456b55b76b2SDuan Jiong 					     struct fib6_table *table,
2457b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
2458b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2459b55b76b2SDuan Jiong 					     int flags)
2460b55b76b2SDuan Jiong {
2461b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
24620b34eb00SDavid Ahern 	struct rt6_info *ret = NULL;
24638d1c802bSDavid Ahern 	struct fib6_info *rt;
2464b55b76b2SDuan Jiong 	struct fib6_node *fn;
2465b55b76b2SDuan Jiong 
2466b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
246767c408cfSAlexander Alemayhu 	 * check if the redirect has come from appropriate router.
2468b55b76b2SDuan Jiong 	 *
2469b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
2470b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
2471b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
2472b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
2473b55b76b2SDuan Jiong 	 * routes.
2474b55b76b2SDuan Jiong 	 */
2475b55b76b2SDuan Jiong 
247666f5d6ceSWei Wang 	rcu_read_lock();
24776454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2478b55b76b2SDuan Jiong restart:
247966f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
248014895687SDavid Ahern 		if (fib6_check_expired(rt))
2481b55b76b2SDuan Jiong 			continue;
248293c2fb25SDavid Ahern 		if (rt->fib6_flags & RTF_REJECT)
2483b55b76b2SDuan Jiong 			break;
24840b34eb00SDavid Ahern 		if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6,
24850b34eb00SDavid Ahern 					  &rdfl->gateway, &ret))
24860b34eb00SDavid Ahern 			goto out;
2487b55b76b2SDuan Jiong 	}
2488b55b76b2SDuan Jiong 
2489b55b76b2SDuan Jiong 	if (!rt)
2490421842edSDavid Ahern 		rt = net->ipv6.fib6_null_entry;
249193c2fb25SDavid Ahern 	else if (rt->fib6_flags & RTF_REJECT) {
249223fb93a4SDavid Ahern 		ret = net->ipv6.ip6_null_entry;
2493b0a1ba59SMartin KaFai Lau 		goto out;
2494b0a1ba59SMartin KaFai Lau 	}
2495b0a1ba59SMartin KaFai Lau 
2496421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
2497a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
2498a3c00e46SMartin KaFai Lau 		if (fn)
2499a3c00e46SMartin KaFai Lau 			goto restart;
2500b55b76b2SDuan Jiong 	}
2501a3c00e46SMartin KaFai Lau 
2502b0a1ba59SMartin KaFai Lau out:
250323fb93a4SDavid Ahern 	if (ret)
250410585b43SDavid Ahern 		ip6_hold_safe(net, &ret);
250523fb93a4SDavid Ahern 	else
250623fb93a4SDavid Ahern 		ret = ip6_create_rt_rcu(rt);
2507b55b76b2SDuan Jiong 
250866f5d6ceSWei Wang 	rcu_read_unlock();
2509b55b76b2SDuan Jiong 
2510b65f164dSPaolo Abeni 	trace_fib6_table_lookup(net, rt, table, fl6);
251123fb93a4SDavid Ahern 	return ret;
2512b55b76b2SDuan Jiong };
2513b55b76b2SDuan Jiong 
2514b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
2515b55b76b2SDuan Jiong 					    const struct flowi6 *fl6,
2516b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
2517b55b76b2SDuan Jiong 					    const struct in6_addr *gateway)
2518b55b76b2SDuan Jiong {
2519b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2520b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
2521b55b76b2SDuan Jiong 
2522b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
2523b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
2524b55b76b2SDuan Jiong 
2525b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2526b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
2527b55b76b2SDuan Jiong }
2528b55b76b2SDuan Jiong 
2529e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2530e2d118a1SLorenzo Colitti 		  kuid_t uid)
25313a5ad2eeSDavid S. Miller {
25323a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
25333a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
25341f7f10acSMaciej Żenczykowski 	struct flowi6 fl6 = {
25351f7f10acSMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25361f7f10acSMaciej Żenczykowski 		.flowi6_oif = oif,
25371f7f10acSMaciej Żenczykowski 		.flowi6_mark = mark,
25381f7f10acSMaciej Żenczykowski 		.daddr = iph->daddr,
25391f7f10acSMaciej Żenczykowski 		.saddr = iph->saddr,
25401f7f10acSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
25411f7f10acSMaciej Żenczykowski 		.flowi6_uid = uid,
25421f7f10acSMaciej Żenczykowski 	};
25433a5ad2eeSDavid S. Miller 
2544b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
25456700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
25463a5ad2eeSDavid S. Miller 	dst_release(dst);
25473a5ad2eeSDavid S. Miller }
25483a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
25493a5ad2eeSDavid S. Miller 
2550d456336dSMaciej Żenczykowski void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2551c92a59ecSDuan Jiong {
2552c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2553c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2554c92a59ecSDuan Jiong 	struct dst_entry *dst;
25550b26fb17SMaciej Żenczykowski 	struct flowi6 fl6 = {
25560b26fb17SMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25570b26fb17SMaciej Żenczykowski 		.flowi6_oif = oif,
25580b26fb17SMaciej Żenczykowski 		.daddr = msg->dest,
25590b26fb17SMaciej Żenczykowski 		.saddr = iph->daddr,
25600b26fb17SMaciej Żenczykowski 		.flowi6_uid = sock_net_uid(net, NULL),
25610b26fb17SMaciej Żenczykowski 	};
2562c92a59ecSDuan Jiong 
2563b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2564c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
2565c92a59ecSDuan Jiong 	dst_release(dst);
2566c92a59ecSDuan Jiong }
2567c92a59ecSDuan Jiong 
25683a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
25693a5ad2eeSDavid S. Miller {
2570e2d118a1SLorenzo Colitti 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2571e2d118a1SLorenzo Colitti 		     sk->sk_uid);
25723a5ad2eeSDavid S. Miller }
25733a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
25743a5ad2eeSDavid S. Miller 
25750dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
25761da177e4SLinus Torvalds {
25770dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
25780dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
25790dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
25800dbaee3bSDavid S. Miller 
25811da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
25821da177e4SLinus Torvalds 
25835578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
25845578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
25851da177e4SLinus Torvalds 
25861da177e4SLinus Torvalds 	/*
25871da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
25881da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
25891da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
25901da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
25911da177e4SLinus Torvalds 	 */
25921da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
25931da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
25941da177e4SLinus Torvalds 	return mtu;
25951da177e4SLinus Torvalds }
25961da177e4SLinus Torvalds 
2597ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
2598d33e4553SDavid S. Miller {
2599d33e4553SDavid S. Miller 	struct inet6_dev *idev;
2600d4ead6b3SDavid Ahern 	unsigned int mtu;
2601618f9bc7SSteffen Klassert 
26024b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
26034b32b5adSMartin KaFai Lau 	if (mtu)
26044b32b5adSMartin KaFai Lau 		goto out;
26054b32b5adSMartin KaFai Lau 
2606618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
2607d33e4553SDavid S. Miller 
2608d33e4553SDavid S. Miller 	rcu_read_lock();
2609d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
2610d33e4553SDavid S. Miller 	if (idev)
2611d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
2612d33e4553SDavid S. Miller 	rcu_read_unlock();
2613d33e4553SDavid S. Miller 
261430f78d8eSEric Dumazet out:
261514972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
261614972cbdSRoopa Prabhu 
261714972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2618d33e4553SDavid S. Miller }
2619d33e4553SDavid S. Miller 
2620901731b8SDavid Ahern /* MTU selection:
2621901731b8SDavid Ahern  * 1. mtu on route is locked - use it
2622901731b8SDavid Ahern  * 2. mtu from nexthop exception
2623901731b8SDavid Ahern  * 3. mtu from egress device
2624901731b8SDavid Ahern  *
2625901731b8SDavid Ahern  * based on ip6_dst_mtu_forward and exception logic of
2626901731b8SDavid Ahern  * rt6_find_cached_rt; called with rcu_read_lock
2627901731b8SDavid Ahern  */
2628901731b8SDavid Ahern u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2629901731b8SDavid Ahern 		      struct in6_addr *saddr)
2630901731b8SDavid Ahern {
2631901731b8SDavid Ahern 	struct rt6_exception_bucket *bucket;
2632901731b8SDavid Ahern 	struct rt6_exception *rt6_ex;
2633901731b8SDavid Ahern 	struct in6_addr *src_key;
2634901731b8SDavid Ahern 	struct inet6_dev *idev;
2635901731b8SDavid Ahern 	u32 mtu = 0;
2636901731b8SDavid Ahern 
2637901731b8SDavid Ahern 	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2638901731b8SDavid Ahern 		mtu = f6i->fib6_pmtu;
2639901731b8SDavid Ahern 		if (mtu)
2640901731b8SDavid Ahern 			goto out;
2641901731b8SDavid Ahern 	}
2642901731b8SDavid Ahern 
2643901731b8SDavid Ahern 	src_key = NULL;
2644901731b8SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
2645901731b8SDavid Ahern 	if (f6i->fib6_src.plen)
2646901731b8SDavid Ahern 		src_key = saddr;
2647901731b8SDavid Ahern #endif
2648901731b8SDavid Ahern 
2649901731b8SDavid Ahern 	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2650901731b8SDavid Ahern 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2651901731b8SDavid Ahern 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2652901731b8SDavid Ahern 		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2653901731b8SDavid Ahern 
2654901731b8SDavid Ahern 	if (likely(!mtu)) {
2655901731b8SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(f6i);
2656901731b8SDavid Ahern 
2657901731b8SDavid Ahern 		mtu = IPV6_MIN_MTU;
2658901731b8SDavid Ahern 		idev = __in6_dev_get(dev);
2659901731b8SDavid Ahern 		if (idev && idev->cnf.mtu6 > mtu)
2660901731b8SDavid Ahern 			mtu = idev->cnf.mtu6;
2661901731b8SDavid Ahern 	}
2662901731b8SDavid Ahern 
2663901731b8SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2664901731b8SDavid Ahern out:
2665901731b8SDavid Ahern 	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2666901731b8SDavid Ahern }
2667901731b8SDavid Ahern 
26683b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
266987a11578SDavid S. Miller 				  struct flowi6 *fl6)
26701da177e4SLinus Torvalds {
267187a11578SDavid S. Miller 	struct dst_entry *dst;
26721da177e4SLinus Torvalds 	struct rt6_info *rt;
26731da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
2674c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
26751da177e4SLinus Torvalds 
267638308473SDavid S. Miller 	if (unlikely(!idev))
2677122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
26781da177e4SLinus Torvalds 
2679ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
268038308473SDavid S. Miller 	if (unlikely(!rt)) {
26811da177e4SLinus Torvalds 		in6_dev_put(idev);
268287a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
26831da177e4SLinus Torvalds 		goto out;
26841da177e4SLinus Torvalds 	}
26851da177e4SLinus Torvalds 
26868e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
2687588753f1SBrendan McGrath 	rt->dst.input = ip6_input;
26888e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
2689550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
269087a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
26918e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
26928e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
269314edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
26941da177e4SLinus Torvalds 
26954c981e28SIdo Schimmel 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2696587fea74SWei Wang 	 * do proper release of the net_device
2697587fea74SWei Wang 	 */
2698587fea74SWei Wang 	rt6_uncached_list_add(rt);
269981eb8447SWei Wang 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
27001da177e4SLinus Torvalds 
270187a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
270287a11578SDavid S. Miller 
27031da177e4SLinus Torvalds out:
270487a11578SDavid S. Miller 	return dst;
27051da177e4SLinus Torvalds }
27061da177e4SLinus Torvalds 
2707569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
27081da177e4SLinus Torvalds {
270986393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
27107019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
27117019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
27127019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
27137019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
27147019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2715fc66f95cSEric Dumazet 	int entries;
27161da177e4SLinus Torvalds 
2717fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
271849a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2719fc66f95cSEric Dumazet 	    entries <= rt_max_size)
27201da177e4SLinus Torvalds 		goto out;
27211da177e4SLinus Torvalds 
27226891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
272314956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2724fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
2725fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
27267019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
27271da177e4SLinus Torvalds out:
27287019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2729fc66f95cSEric Dumazet 	return entries > rt_max_size;
27301da177e4SLinus Torvalds }
27311da177e4SLinus Torvalds 
27328c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net,
27338c14586fSDavid Ahern 					    struct fib6_config *cfg,
2734f4797b33SDavid Ahern 					    const struct in6_addr *gw_addr,
2735f4797b33SDavid Ahern 					    u32 tbid, int flags)
27368c14586fSDavid Ahern {
27378c14586fSDavid Ahern 	struct flowi6 fl6 = {
27388c14586fSDavid Ahern 		.flowi6_oif = cfg->fc_ifindex,
27398c14586fSDavid Ahern 		.daddr = *gw_addr,
27408c14586fSDavid Ahern 		.saddr = cfg->fc_prefsrc,
27418c14586fSDavid Ahern 	};
27428c14586fSDavid Ahern 	struct fib6_table *table;
27438c14586fSDavid Ahern 	struct rt6_info *rt;
27448c14586fSDavid Ahern 
2745f4797b33SDavid Ahern 	table = fib6_get_table(net, tbid);
27468c14586fSDavid Ahern 	if (!table)
27478c14586fSDavid Ahern 		return NULL;
27488c14586fSDavid Ahern 
27498c14586fSDavid Ahern 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
27508c14586fSDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
27518c14586fSDavid Ahern 
2752f4797b33SDavid Ahern 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2753b75cc8f9SDavid Ahern 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
27548c14586fSDavid Ahern 
27558c14586fSDavid Ahern 	/* if table lookup failed, fall back to full lookup */
27568c14586fSDavid Ahern 	if (rt == net->ipv6.ip6_null_entry) {
27578c14586fSDavid Ahern 		ip6_rt_put(rt);
27588c14586fSDavid Ahern 		rt = NULL;
27598c14586fSDavid Ahern 	}
27608c14586fSDavid Ahern 
27618c14586fSDavid Ahern 	return rt;
27628c14586fSDavid Ahern }
27638c14586fSDavid Ahern 
2764fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net,
2765fc1e64e1SDavid Ahern 				     struct fib6_config *cfg,
27669fbb704cSDavid Ahern 				     const struct net_device *dev,
2767fc1e64e1SDavid Ahern 				     struct netlink_ext_ack *extack)
2768fc1e64e1SDavid Ahern {
276944750f84SDavid Ahern 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2770fc1e64e1SDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2771fc1e64e1SDavid Ahern 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2772bf1dc8baSPaolo Abeni 	struct fib6_info *from;
2773fc1e64e1SDavid Ahern 	struct rt6_info *grt;
2774fc1e64e1SDavid Ahern 	int err;
2775fc1e64e1SDavid Ahern 
2776fc1e64e1SDavid Ahern 	err = 0;
2777fc1e64e1SDavid Ahern 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2778fc1e64e1SDavid Ahern 	if (grt) {
2779bf1dc8baSPaolo Abeni 		rcu_read_lock();
2780bf1dc8baSPaolo Abeni 		from = rcu_dereference(grt->from);
278158e354c0SDavid Ahern 		if (!grt->dst.error &&
27824ed591c8SDavid Ahern 		    /* ignore match if it is the default route */
2783bf1dc8baSPaolo Abeni 		    from && !ipv6_addr_any(&from->fib6_dst.addr) &&
278458e354c0SDavid Ahern 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
278544750f84SDavid Ahern 			NL_SET_ERR_MSG(extack,
278644750f84SDavid Ahern 				       "Nexthop has invalid gateway or device mismatch");
2787fc1e64e1SDavid Ahern 			err = -EINVAL;
2788fc1e64e1SDavid Ahern 		}
2789bf1dc8baSPaolo Abeni 		rcu_read_unlock();
2790fc1e64e1SDavid Ahern 
2791fc1e64e1SDavid Ahern 		ip6_rt_put(grt);
2792fc1e64e1SDavid Ahern 	}
2793fc1e64e1SDavid Ahern 
2794fc1e64e1SDavid Ahern 	return err;
2795fc1e64e1SDavid Ahern }
2796fc1e64e1SDavid Ahern 
27971edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net,
27981edce99fSDavid Ahern 			      struct fib6_config *cfg,
27991edce99fSDavid Ahern 			      struct net_device **_dev,
28001edce99fSDavid Ahern 			      struct inet6_dev **idev)
28011edce99fSDavid Ahern {
28021edce99fSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28031edce99fSDavid Ahern 	struct net_device *dev = _dev ? *_dev : NULL;
28041edce99fSDavid Ahern 	struct rt6_info *grt = NULL;
28051edce99fSDavid Ahern 	int err = -EHOSTUNREACH;
28061edce99fSDavid Ahern 
28071edce99fSDavid Ahern 	if (cfg->fc_table) {
2808f4797b33SDavid Ahern 		int flags = RT6_LOOKUP_F_IFACE;
2809f4797b33SDavid Ahern 
2810f4797b33SDavid Ahern 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2811f4797b33SDavid Ahern 					  cfg->fc_table, flags);
28121edce99fSDavid Ahern 		if (grt) {
28131edce99fSDavid Ahern 			if (grt->rt6i_flags & RTF_GATEWAY ||
28141edce99fSDavid Ahern 			    (dev && dev != grt->dst.dev)) {
28151edce99fSDavid Ahern 				ip6_rt_put(grt);
28161edce99fSDavid Ahern 				grt = NULL;
28171edce99fSDavid Ahern 			}
28181edce99fSDavid Ahern 		}
28191edce99fSDavid Ahern 	}
28201edce99fSDavid Ahern 
28211edce99fSDavid Ahern 	if (!grt)
2822b75cc8f9SDavid Ahern 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
28231edce99fSDavid Ahern 
28241edce99fSDavid Ahern 	if (!grt)
28251edce99fSDavid Ahern 		goto out;
28261edce99fSDavid Ahern 
28271edce99fSDavid Ahern 	if (dev) {
28281edce99fSDavid Ahern 		if (dev != grt->dst.dev) {
28291edce99fSDavid Ahern 			ip6_rt_put(grt);
28301edce99fSDavid Ahern 			goto out;
28311edce99fSDavid Ahern 		}
28321edce99fSDavid Ahern 	} else {
28331edce99fSDavid Ahern 		*_dev = dev = grt->dst.dev;
28341edce99fSDavid Ahern 		*idev = grt->rt6i_idev;
28351edce99fSDavid Ahern 		dev_hold(dev);
28361edce99fSDavid Ahern 		in6_dev_hold(grt->rt6i_idev);
28371edce99fSDavid Ahern 	}
28381edce99fSDavid Ahern 
28391edce99fSDavid Ahern 	if (!(grt->rt6i_flags & RTF_GATEWAY))
28401edce99fSDavid Ahern 		err = 0;
28411edce99fSDavid Ahern 
28421edce99fSDavid Ahern 	ip6_rt_put(grt);
28431edce99fSDavid Ahern 
28441edce99fSDavid Ahern out:
28451edce99fSDavid Ahern 	return err;
28461edce99fSDavid Ahern }
28471edce99fSDavid Ahern 
28489fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
28499fbb704cSDavid Ahern 			   struct net_device **_dev, struct inet6_dev **idev,
28509fbb704cSDavid Ahern 			   struct netlink_ext_ack *extack)
28519fbb704cSDavid Ahern {
28529fbb704cSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28539fbb704cSDavid Ahern 	int gwa_type = ipv6_addr_type(gw_addr);
2854232378e8SDavid Ahern 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
28559fbb704cSDavid Ahern 	const struct net_device *dev = *_dev;
2856232378e8SDavid Ahern 	bool need_addr_check = !dev;
28579fbb704cSDavid Ahern 	int err = -EINVAL;
28589fbb704cSDavid Ahern 
28599fbb704cSDavid Ahern 	/* if gw_addr is local we will fail to detect this in case
28609fbb704cSDavid Ahern 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
28619fbb704cSDavid Ahern 	 * will return already-added prefix route via interface that
28629fbb704cSDavid Ahern 	 * prefix route was assigned to, which might be non-loopback.
28639fbb704cSDavid Ahern 	 */
2864232378e8SDavid Ahern 	if (dev &&
2865232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2866232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
28679fbb704cSDavid Ahern 		goto out;
28689fbb704cSDavid Ahern 	}
28699fbb704cSDavid Ahern 
28709fbb704cSDavid Ahern 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
28719fbb704cSDavid Ahern 		/* IPv6 strictly inhibits using not link-local
28729fbb704cSDavid Ahern 		 * addresses as nexthop address.
28739fbb704cSDavid Ahern 		 * Otherwise, router will not able to send redirects.
28749fbb704cSDavid Ahern 		 * It is very good, but in some (rare!) circumstances
28759fbb704cSDavid Ahern 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
28769fbb704cSDavid Ahern 		 * some exceptions. --ANK
28779fbb704cSDavid Ahern 		 * We allow IPv4-mapped nexthops to support RFC4798-type
28789fbb704cSDavid Ahern 		 * addressing
28799fbb704cSDavid Ahern 		 */
28809fbb704cSDavid Ahern 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
28819fbb704cSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
28829fbb704cSDavid Ahern 			goto out;
28839fbb704cSDavid Ahern 		}
28849fbb704cSDavid Ahern 
28859fbb704cSDavid Ahern 		if (cfg->fc_flags & RTNH_F_ONLINK)
28869fbb704cSDavid Ahern 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
28879fbb704cSDavid Ahern 		else
28889fbb704cSDavid Ahern 			err = ip6_route_check_nh(net, cfg, _dev, idev);
28899fbb704cSDavid Ahern 
28909fbb704cSDavid Ahern 		if (err)
28919fbb704cSDavid Ahern 			goto out;
28929fbb704cSDavid Ahern 	}
28939fbb704cSDavid Ahern 
28949fbb704cSDavid Ahern 	/* reload in case device was changed */
28959fbb704cSDavid Ahern 	dev = *_dev;
28969fbb704cSDavid Ahern 
28979fbb704cSDavid Ahern 	err = -EINVAL;
28989fbb704cSDavid Ahern 	if (!dev) {
28999fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack, "Egress device not specified");
29009fbb704cSDavid Ahern 		goto out;
29019fbb704cSDavid Ahern 	} else if (dev->flags & IFF_LOOPBACK) {
29029fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack,
29039fbb704cSDavid Ahern 			       "Egress device can not be loopback device for this route");
29049fbb704cSDavid Ahern 		goto out;
29059fbb704cSDavid Ahern 	}
2906232378e8SDavid Ahern 
2907232378e8SDavid Ahern 	/* if we did not check gw_addr above, do so now that the
2908232378e8SDavid Ahern 	 * egress device has been resolved.
2909232378e8SDavid Ahern 	 */
2910232378e8SDavid Ahern 	if (need_addr_check &&
2911232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2912232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2913232378e8SDavid Ahern 		goto out;
2914232378e8SDavid Ahern 	}
2915232378e8SDavid Ahern 
29169fbb704cSDavid Ahern 	err = 0;
29179fbb704cSDavid Ahern out:
29189fbb704cSDavid Ahern 	return err;
29199fbb704cSDavid Ahern }
29209fbb704cSDavid Ahern 
292183c44251SDavid Ahern static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
292283c44251SDavid Ahern {
292383c44251SDavid Ahern 	if ((flags & RTF_REJECT) ||
292483c44251SDavid Ahern 	    (dev && (dev->flags & IFF_LOOPBACK) &&
292583c44251SDavid Ahern 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
292683c44251SDavid Ahern 	     !(flags & RTF_LOCAL)))
292783c44251SDavid Ahern 		return true;
292883c44251SDavid Ahern 
292983c44251SDavid Ahern 	return false;
293083c44251SDavid Ahern }
293183c44251SDavid Ahern 
293283c44251SDavid Ahern int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
293383c44251SDavid Ahern 		 struct fib6_config *cfg, gfp_t gfp_flags,
293483c44251SDavid Ahern 		 struct netlink_ext_ack *extack)
293583c44251SDavid Ahern {
293683c44251SDavid Ahern 	struct net_device *dev = NULL;
293783c44251SDavid Ahern 	struct inet6_dev *idev = NULL;
293883c44251SDavid Ahern 	int addr_type;
293983c44251SDavid Ahern 	int err;
294083c44251SDavid Ahern 
2941f1741730SDavid Ahern 	fib6_nh->fib_nh_family = AF_INET6;
2942f1741730SDavid Ahern 
294383c44251SDavid Ahern 	err = -ENODEV;
294483c44251SDavid Ahern 	if (cfg->fc_ifindex) {
294583c44251SDavid Ahern 		dev = dev_get_by_index(net, cfg->fc_ifindex);
294683c44251SDavid Ahern 		if (!dev)
294783c44251SDavid Ahern 			goto out;
294883c44251SDavid Ahern 		idev = in6_dev_get(dev);
294983c44251SDavid Ahern 		if (!idev)
295083c44251SDavid Ahern 			goto out;
295183c44251SDavid Ahern 	}
295283c44251SDavid Ahern 
295383c44251SDavid Ahern 	if (cfg->fc_flags & RTNH_F_ONLINK) {
295483c44251SDavid Ahern 		if (!dev) {
295583c44251SDavid Ahern 			NL_SET_ERR_MSG(extack,
295683c44251SDavid Ahern 				       "Nexthop device required for onlink");
295783c44251SDavid Ahern 			goto out;
295883c44251SDavid Ahern 		}
295983c44251SDavid Ahern 
296083c44251SDavid Ahern 		if (!(dev->flags & IFF_UP)) {
296183c44251SDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
296283c44251SDavid Ahern 			err = -ENETDOWN;
296383c44251SDavid Ahern 			goto out;
296483c44251SDavid Ahern 		}
296583c44251SDavid Ahern 
2966ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
296783c44251SDavid Ahern 	}
296883c44251SDavid Ahern 
2969ad1601aeSDavid Ahern 	fib6_nh->fib_nh_weight = 1;
297083c44251SDavid Ahern 
297183c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
297283c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
297383c44251SDavid Ahern 	 */
297483c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
297583c44251SDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
297683c44251SDavid Ahern 		/* hold loopback dev/idev if we haven't done so. */
297783c44251SDavid Ahern 		if (dev != net->loopback_dev) {
297883c44251SDavid Ahern 			if (dev) {
297983c44251SDavid Ahern 				dev_put(dev);
298083c44251SDavid Ahern 				in6_dev_put(idev);
298183c44251SDavid Ahern 			}
298283c44251SDavid Ahern 			dev = net->loopback_dev;
298383c44251SDavid Ahern 			dev_hold(dev);
298483c44251SDavid Ahern 			idev = in6_dev_get(dev);
298583c44251SDavid Ahern 			if (!idev) {
298683c44251SDavid Ahern 				err = -ENODEV;
298783c44251SDavid Ahern 				goto out;
298883c44251SDavid Ahern 			}
298983c44251SDavid Ahern 		}
299083c44251SDavid Ahern 		goto set_dev;
299183c44251SDavid Ahern 	}
299283c44251SDavid Ahern 
299383c44251SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY) {
299483c44251SDavid Ahern 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
299583c44251SDavid Ahern 		if (err)
299683c44251SDavid Ahern 			goto out;
299783c44251SDavid Ahern 
2998ad1601aeSDavid Ahern 		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
2999bdf00467SDavid Ahern 		fib6_nh->fib_nh_gw_family = AF_INET6;
300083c44251SDavid Ahern 	}
300183c44251SDavid Ahern 
300283c44251SDavid Ahern 	err = -ENODEV;
300383c44251SDavid Ahern 	if (!dev)
300483c44251SDavid Ahern 		goto out;
300583c44251SDavid Ahern 
300683c44251SDavid Ahern 	if (idev->cnf.disable_ipv6) {
300783c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
300883c44251SDavid Ahern 		err = -EACCES;
300983c44251SDavid Ahern 		goto out;
301083c44251SDavid Ahern 	}
301183c44251SDavid Ahern 
301283c44251SDavid Ahern 	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
301383c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
301483c44251SDavid Ahern 		err = -ENETDOWN;
301583c44251SDavid Ahern 		goto out;
301683c44251SDavid Ahern 	}
301783c44251SDavid Ahern 
301883c44251SDavid Ahern 	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
301983c44251SDavid Ahern 	    !netif_carrier_ok(dev))
3020ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
302183c44251SDavid Ahern 
3022979e276eSDavid Ahern 	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3023979e276eSDavid Ahern 				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3024979e276eSDavid Ahern 	if (err)
3025979e276eSDavid Ahern 		goto out;
302683c44251SDavid Ahern set_dev:
3027ad1601aeSDavid Ahern 	fib6_nh->fib_nh_dev = dev;
3028f1741730SDavid Ahern 	fib6_nh->fib_nh_oif = dev->ifindex;
302983c44251SDavid Ahern 	err = 0;
303083c44251SDavid Ahern out:
303183c44251SDavid Ahern 	if (idev)
303283c44251SDavid Ahern 		in6_dev_put(idev);
303383c44251SDavid Ahern 
303483c44251SDavid Ahern 	if (err) {
3035ad1601aeSDavid Ahern 		lwtstate_put(fib6_nh->fib_nh_lws);
3036ad1601aeSDavid Ahern 		fib6_nh->fib_nh_lws = NULL;
303783c44251SDavid Ahern 		if (dev)
303883c44251SDavid Ahern 			dev_put(dev);
303983c44251SDavid Ahern 	}
304083c44251SDavid Ahern 
304183c44251SDavid Ahern 	return err;
304283c44251SDavid Ahern }
304383c44251SDavid Ahern 
3044dac7d0f2SDavid Ahern void fib6_nh_release(struct fib6_nh *fib6_nh)
3045dac7d0f2SDavid Ahern {
3046979e276eSDavid Ahern 	fib_nh_common_release(&fib6_nh->nh_common);
3047dac7d0f2SDavid Ahern }
3048dac7d0f2SDavid Ahern 
30498d1c802bSDavid Ahern static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3050acb54e3cSDavid Ahern 					      gfp_t gfp_flags,
3051333c4301SDavid Ahern 					      struct netlink_ext_ack *extack)
30521da177e4SLinus Torvalds {
30535578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
30548d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3055c71099acSThomas Graf 	struct fib6_table *table;
30568c5b83f0SRoopa Prabhu 	int err = -EINVAL;
305783c44251SDavid Ahern 	int addr_type;
30581da177e4SLinus Torvalds 
3059557c44beSDavid Ahern 	/* RTF_PCPU is an internal flag; can not be set by userspace */
3060d5d531cbSDavid Ahern 	if (cfg->fc_flags & RTF_PCPU) {
3061d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3062557c44beSDavid Ahern 		goto out;
3063d5d531cbSDavid Ahern 	}
3064557c44beSDavid Ahern 
30652ea2352eSWei Wang 	/* RTF_CACHE is an internal flag; can not be set by userspace */
30662ea2352eSWei Wang 	if (cfg->fc_flags & RTF_CACHE) {
30672ea2352eSWei Wang 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
30682ea2352eSWei Wang 		goto out;
30692ea2352eSWei Wang 	}
30702ea2352eSWei Wang 
3071e8478e80SDavid Ahern 	if (cfg->fc_type > RTN_MAX) {
3072e8478e80SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid route type");
3073e8478e80SDavid Ahern 		goto out;
3074e8478e80SDavid Ahern 	}
3075e8478e80SDavid Ahern 
3076d5d531cbSDavid Ahern 	if (cfg->fc_dst_len > 128) {
3077d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
30788c5b83f0SRoopa Prabhu 		goto out;
3079d5d531cbSDavid Ahern 	}
3080d5d531cbSDavid Ahern 	if (cfg->fc_src_len > 128) {
3081d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid source address length");
3082d5d531cbSDavid Ahern 		goto out;
3083d5d531cbSDavid Ahern 	}
30841da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
3085d5d531cbSDavid Ahern 	if (cfg->fc_src_len) {
3086d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack,
3087d5d531cbSDavid Ahern 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
30888c5b83f0SRoopa Prabhu 		goto out;
3089d5d531cbSDavid Ahern 	}
30901da177e4SLinus Torvalds #endif
3091fc1e64e1SDavid Ahern 
3092c71099acSThomas Graf 	err = -ENOBUFS;
309338308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
3094d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3095d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
309638308473SDavid S. Miller 		if (!table) {
3097f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3098d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
3099d71314b4SMatti Vaittinen 		}
3100d71314b4SMatti Vaittinen 	} else {
3101d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
3102d71314b4SMatti Vaittinen 	}
310338308473SDavid S. Miller 
310438308473SDavid S. Miller 	if (!table)
3105c71099acSThomas Graf 		goto out;
3106c71099acSThomas Graf 
31071da177e4SLinus Torvalds 	err = -ENOMEM;
310893531c67SDavid Ahern 	rt = fib6_info_alloc(gfp_flags);
310993531c67SDavid Ahern 	if (!rt)
31101da177e4SLinus Torvalds 		goto out;
311193531c67SDavid Ahern 
3112d7e774f3SDavid Ahern 	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3113d7e774f3SDavid Ahern 					       extack);
3114767a2217SDavid Ahern 	if (IS_ERR(rt->fib6_metrics)) {
3115767a2217SDavid Ahern 		err = PTR_ERR(rt->fib6_metrics);
3116fda21d46SEric Dumazet 		/* Do not leave garbage there. */
3117fda21d46SEric Dumazet 		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3118767a2217SDavid Ahern 		goto out;
3119767a2217SDavid Ahern 	}
3120767a2217SDavid Ahern 
312193531c67SDavid Ahern 	if (cfg->fc_flags & RTF_ADDRCONF)
312293531c67SDavid Ahern 		rt->dst_nocount = true;
31231da177e4SLinus Torvalds 
31241716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
312514895687SDavid Ahern 		fib6_set_expires(rt, jiffies +
31261716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
31271716a961SGao feng 	else
312814895687SDavid Ahern 		fib6_clean_expires(rt);
31291da177e4SLinus Torvalds 
313086872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
313186872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
313293c2fb25SDavid Ahern 	rt->fib6_protocol = cfg->fc_protocol;
313386872cb5SThomas Graf 
313483c44251SDavid Ahern 	rt->fib6_table = table;
313583c44251SDavid Ahern 	rt->fib6_metric = cfg->fc_metric;
313683c44251SDavid Ahern 	rt->fib6_type = cfg->fc_type;
31372b2450caSDavid Ahern 	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
313819e42e45SRoopa Prabhu 
313993c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
314093c2fb25SDavid Ahern 	rt->fib6_dst.plen = cfg->fc_dst_len;
314193c2fb25SDavid Ahern 	if (rt->fib6_dst.plen == 128)
31423b6761d1SDavid Ahern 		rt->dst_host = true;
31431da177e4SLinus Torvalds 
31441da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
314593c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
314693c2fb25SDavid Ahern 	rt->fib6_src.plen = cfg->fc_src_len;
31471da177e4SLinus Torvalds #endif
314883c44251SDavid Ahern 	err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
31491da177e4SLinus Torvalds 	if (err)
31501da177e4SLinus Torvalds 		goto out;
31519fbb704cSDavid Ahern 
315283c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
315383c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
315483c44251SDavid Ahern 	 */
315583c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
3156ad1601aeSDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
315783c44251SDavid Ahern 		rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3158955ec4cbSDavid Ahern 
3159c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
316083c44251SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
316183c44251SDavid Ahern 
3162c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3163d5d531cbSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid source address");
3164c3968a85SDaniel Walter 			err = -EINVAL;
3165c3968a85SDaniel Walter 			goto out;
3166c3968a85SDaniel Walter 		}
316793c2fb25SDavid Ahern 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
316893c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 128;
3169c3968a85SDaniel Walter 	} else
317093c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
3171c3968a85SDaniel Walter 
31728c5b83f0SRoopa Prabhu 	return rt;
31731da177e4SLinus Torvalds out:
317493531c67SDavid Ahern 	fib6_info_release(rt);
31758c5b83f0SRoopa Prabhu 	return ERR_PTR(err);
31766b9ea5a6SRoopa Prabhu }
31776b9ea5a6SRoopa Prabhu 
3178acb54e3cSDavid Ahern int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3179333c4301SDavid Ahern 		  struct netlink_ext_ack *extack)
31806b9ea5a6SRoopa Prabhu {
31818d1c802bSDavid Ahern 	struct fib6_info *rt;
31826b9ea5a6SRoopa Prabhu 	int err;
31836b9ea5a6SRoopa Prabhu 
3184acb54e3cSDavid Ahern 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3185d4ead6b3SDavid Ahern 	if (IS_ERR(rt))
3186d4ead6b3SDavid Ahern 		return PTR_ERR(rt);
31876b9ea5a6SRoopa Prabhu 
3188d4ead6b3SDavid Ahern 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
318993531c67SDavid Ahern 	fib6_info_release(rt);
31906b9ea5a6SRoopa Prabhu 
31911da177e4SLinus Torvalds 	return err;
31921da177e4SLinus Torvalds }
31931da177e4SLinus Torvalds 
31948d1c802bSDavid Ahern static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
31951da177e4SLinus Torvalds {
3196afb1d4b5SDavid Ahern 	struct net *net = info->nl_net;
3197c71099acSThomas Graf 	struct fib6_table *table;
3198afb1d4b5SDavid Ahern 	int err;
31991da177e4SLinus Torvalds 
3200421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
32016825a26cSGao feng 		err = -ENOENT;
32026825a26cSGao feng 		goto out;
32036825a26cSGao feng 	}
32046c813a72SPatrick McHardy 
320593c2fb25SDavid Ahern 	table = rt->fib6_table;
320666f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
320786872cb5SThomas Graf 	err = fib6_del(rt, info);
320866f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
32091da177e4SLinus Torvalds 
32106825a26cSGao feng out:
321193531c67SDavid Ahern 	fib6_info_release(rt);
32121da177e4SLinus Torvalds 	return err;
32131da177e4SLinus Torvalds }
32141da177e4SLinus Torvalds 
32158d1c802bSDavid Ahern int ip6_del_rt(struct net *net, struct fib6_info *rt)
3216e0a1ad73SThomas Graf {
3217afb1d4b5SDavid Ahern 	struct nl_info info = { .nl_net = net };
3218afb1d4b5SDavid Ahern 
3219528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
3220e0a1ad73SThomas Graf }
3221e0a1ad73SThomas Graf 
32228d1c802bSDavid Ahern static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
32230ae81335SDavid Ahern {
32240ae81335SDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
3225e3330039SWANG Cong 	struct net *net = info->nl_net;
322616a16cd3SDavid Ahern 	struct sk_buff *skb = NULL;
32270ae81335SDavid Ahern 	struct fib6_table *table;
3228e3330039SWANG Cong 	int err = -ENOENT;
32290ae81335SDavid Ahern 
3230421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
3231e3330039SWANG Cong 		goto out_put;
323293c2fb25SDavid Ahern 	table = rt->fib6_table;
323366f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
32340ae81335SDavid Ahern 
323593c2fb25SDavid Ahern 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
32368d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
32370ae81335SDavid Ahern 
323816a16cd3SDavid Ahern 		/* prefer to send a single notification with all hops */
323916a16cd3SDavid Ahern 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
324016a16cd3SDavid Ahern 		if (skb) {
324116a16cd3SDavid Ahern 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
324216a16cd3SDavid Ahern 
3243d4ead6b3SDavid Ahern 			if (rt6_fill_node(net, skb, rt, NULL,
324416a16cd3SDavid Ahern 					  NULL, NULL, 0, RTM_DELROUTE,
324516a16cd3SDavid Ahern 					  info->portid, seq, 0) < 0) {
324616a16cd3SDavid Ahern 				kfree_skb(skb);
324716a16cd3SDavid Ahern 				skb = NULL;
324816a16cd3SDavid Ahern 			} else
324916a16cd3SDavid Ahern 				info->skip_notify = 1;
325016a16cd3SDavid Ahern 		}
325116a16cd3SDavid Ahern 
32520ae81335SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
325393c2fb25SDavid Ahern 					 &rt->fib6_siblings,
325493c2fb25SDavid Ahern 					 fib6_siblings) {
32550ae81335SDavid Ahern 			err = fib6_del(sibling, info);
32560ae81335SDavid Ahern 			if (err)
3257e3330039SWANG Cong 				goto out_unlock;
32580ae81335SDavid Ahern 		}
32590ae81335SDavid Ahern 	}
32600ae81335SDavid Ahern 
32610ae81335SDavid Ahern 	err = fib6_del(rt, info);
3262e3330039SWANG Cong out_unlock:
326366f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
3264e3330039SWANG Cong out_put:
326593531c67SDavid Ahern 	fib6_info_release(rt);
326616a16cd3SDavid Ahern 
326716a16cd3SDavid Ahern 	if (skb) {
3268e3330039SWANG Cong 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
326916a16cd3SDavid Ahern 			    info->nlh, gfp_any());
327016a16cd3SDavid Ahern 	}
32710ae81335SDavid Ahern 	return err;
32720ae81335SDavid Ahern }
32730ae81335SDavid Ahern 
327423fb93a4SDavid Ahern static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
327523fb93a4SDavid Ahern {
327623fb93a4SDavid Ahern 	int rc = -ESRCH;
327723fb93a4SDavid Ahern 
327823fb93a4SDavid Ahern 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
327923fb93a4SDavid Ahern 		goto out;
328023fb93a4SDavid Ahern 
328123fb93a4SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY &&
328223fb93a4SDavid Ahern 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
328323fb93a4SDavid Ahern 		goto out;
3284761f6026SXin Long 
328523fb93a4SDavid Ahern 	rc = rt6_remove_exception_rt(rt);
328623fb93a4SDavid Ahern out:
328723fb93a4SDavid Ahern 	return rc;
328823fb93a4SDavid Ahern }
328923fb93a4SDavid Ahern 
3290333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg,
3291333c4301SDavid Ahern 			 struct netlink_ext_ack *extack)
32921da177e4SLinus Torvalds {
32938d1c802bSDavid Ahern 	struct rt6_info *rt_cache;
3294c71099acSThomas Graf 	struct fib6_table *table;
32958d1c802bSDavid Ahern 	struct fib6_info *rt;
32961da177e4SLinus Torvalds 	struct fib6_node *fn;
32971da177e4SLinus Torvalds 	int err = -ESRCH;
32981da177e4SLinus Torvalds 
32995578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3300d5d531cbSDavid Ahern 	if (!table) {
3301d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3302c71099acSThomas Graf 		return err;
3303d5d531cbSDavid Ahern 	}
33041da177e4SLinus Torvalds 
330566f5d6ceSWei Wang 	rcu_read_lock();
3306c71099acSThomas Graf 
3307c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
330886872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
330938fbeeeeSWei Wang 			 &cfg->fc_src, cfg->fc_src_len,
33102b760fcfSWei Wang 			 !(cfg->fc_flags & RTF_CACHE));
33111da177e4SLinus Torvalds 
33121da177e4SLinus Torvalds 	if (fn) {
331366f5d6ceSWei Wang 		for_each_fib6_node_rt_rcu(fn) {
3314ad1601aeSDavid Ahern 			struct fib6_nh *nh;
3315ad1601aeSDavid Ahern 
33162b760fcfSWei Wang 			if (cfg->fc_flags & RTF_CACHE) {
3317*7e4b5128SDavid Ahern 				struct fib6_result res = {
3318*7e4b5128SDavid Ahern 					.f6i = rt,
3319*7e4b5128SDavid Ahern 				};
332023fb93a4SDavid Ahern 				int rc;
332123fb93a4SDavid Ahern 
3322*7e4b5128SDavid Ahern 				rt_cache = rt6_find_cached_rt(&res,
3323*7e4b5128SDavid Ahern 							      &cfg->fc_dst,
33242b760fcfSWei Wang 							      &cfg->fc_src);
332523fb93a4SDavid Ahern 				if (rt_cache) {
332623fb93a4SDavid Ahern 					rc = ip6_del_cached_rt(rt_cache, cfg);
33279e575010SEric Dumazet 					if (rc != -ESRCH) {
33289e575010SEric Dumazet 						rcu_read_unlock();
332923fb93a4SDavid Ahern 						return rc;
333023fb93a4SDavid Ahern 					}
33319e575010SEric Dumazet 				}
33321f56a01fSMartin KaFai Lau 				continue;
33332b760fcfSWei Wang 			}
3334ad1601aeSDavid Ahern 
3335ad1601aeSDavid Ahern 			nh = &rt->fib6_nh;
333686872cb5SThomas Graf 			if (cfg->fc_ifindex &&
3337ad1601aeSDavid Ahern 			    (!nh->fib_nh_dev ||
3338ad1601aeSDavid Ahern 			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
33391da177e4SLinus Torvalds 				continue;
334086872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
3341ad1601aeSDavid Ahern 			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
33421da177e4SLinus Torvalds 				continue;
334393c2fb25SDavid Ahern 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
33441da177e4SLinus Torvalds 				continue;
334593c2fb25SDavid Ahern 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3346c2ed1880SMantas M 				continue;
3347e873e4b9SWei Wang 			if (!fib6_info_hold_safe(rt))
3348e873e4b9SWei Wang 				continue;
334966f5d6ceSWei Wang 			rcu_read_unlock();
33501da177e4SLinus Torvalds 
33510ae81335SDavid Ahern 			/* if gateway was specified only delete the one hop */
33520ae81335SDavid Ahern 			if (cfg->fc_flags & RTF_GATEWAY)
335386872cb5SThomas Graf 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
33540ae81335SDavid Ahern 
33550ae81335SDavid Ahern 			return __ip6_del_rt_siblings(rt, cfg);
33561da177e4SLinus Torvalds 		}
33571da177e4SLinus Torvalds 	}
335866f5d6ceSWei Wang 	rcu_read_unlock();
33591da177e4SLinus Torvalds 
33601da177e4SLinus Torvalds 	return err;
33611da177e4SLinus Torvalds }
33621da177e4SLinus Torvalds 
33636700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3364a6279458SYOSHIFUJI Hideaki {
3365a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
3366e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
3367e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
3368e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
3369e8599ff4SDavid S. Miller 	struct neighbour *neigh;
3370a68886a6SDavid Ahern 	struct fib6_info *from;
337171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
33726e157b6aSDavid S. Miller 	int optlen, on_link;
33736e157b6aSDavid S. Miller 	u8 *lladdr;
3374e8599ff4SDavid S. Miller 
337529a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
337671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
3377e8599ff4SDavid S. Miller 
3378e8599ff4SDavid S. Miller 	if (optlen < 0) {
33796e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3380e8599ff4SDavid S. Miller 		return;
3381e8599ff4SDavid S. Miller 	}
3382e8599ff4SDavid S. Miller 
338371bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
3384e8599ff4SDavid S. Miller 
338571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
33866e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3387e8599ff4SDavid S. Miller 		return;
3388e8599ff4SDavid S. Miller 	}
3389e8599ff4SDavid S. Miller 
33906e157b6aSDavid S. Miller 	on_link = 0;
339171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3392e8599ff4SDavid S. Miller 		on_link = 1;
339371bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
3394e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
33956e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3396e8599ff4SDavid S. Miller 		return;
3397e8599ff4SDavid S. Miller 	}
3398e8599ff4SDavid S. Miller 
3399e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
3400e8599ff4SDavid S. Miller 	if (!in6_dev)
3401e8599ff4SDavid S. Miller 		return;
3402e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3403e8599ff4SDavid S. Miller 		return;
3404e8599ff4SDavid S. Miller 
3405e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
3406e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
3407e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
3408e8599ff4SDavid S. Miller 	 */
3409e8599ff4SDavid S. Miller 
3410f997c55cSAlexander Aring 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3411e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3412e8599ff4SDavid S. Miller 		return;
3413e8599ff4SDavid S. Miller 	}
34146e157b6aSDavid S. Miller 
34156e157b6aSDavid S. Miller 	lladdr = NULL;
3416e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
3417e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3418e8599ff4SDavid S. Miller 					     skb->dev);
3419e8599ff4SDavid S. Miller 		if (!lladdr) {
3420e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3421e8599ff4SDavid S. Miller 			return;
3422e8599ff4SDavid S. Miller 		}
3423e8599ff4SDavid S. Miller 	}
3424e8599ff4SDavid S. Miller 
34256e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
3426ec13ad1dSMatthias Schiffer 	if (rt->rt6i_flags & RTF_REJECT) {
34276e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
34286e157b6aSDavid S. Miller 		return;
34296e157b6aSDavid S. Miller 	}
34306e157b6aSDavid S. Miller 
34316e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
34326e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
34336e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
34346e157b6aSDavid S. Miller 	 */
34350dec879fSJulian Anastasov 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
34366e157b6aSDavid S. Miller 
343771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3438e8599ff4SDavid S. Miller 	if (!neigh)
3439e8599ff4SDavid S. Miller 		return;
3440e8599ff4SDavid S. Miller 
34411da177e4SLinus Torvalds 	/*
34421da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
34431da177e4SLinus Torvalds 	 */
34441da177e4SLinus Torvalds 
3445f997c55cSAlexander Aring 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
34461da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
34471da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
34481da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3449f997c55cSAlexander Aring 				     NEIGH_UPDATE_F_ISROUTER)),
3450f997c55cSAlexander Aring 		     NDISC_REDIRECT, &ndopts);
34511da177e4SLinus Torvalds 
34524d85cd0cSDavid Ahern 	rcu_read_lock();
3453a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
3454e873e4b9SWei Wang 	/* This fib6_info_hold() is safe here because we hold reference to rt
3455e873e4b9SWei Wang 	 * and rt already holds reference to fib6_info.
3456e873e4b9SWei Wang 	 */
34578a14e46fSDavid Ahern 	fib6_info_hold(from);
34584d85cd0cSDavid Ahern 	rcu_read_unlock();
34598a14e46fSDavid Ahern 
34608a14e46fSDavid Ahern 	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
346138308473SDavid S. Miller 	if (!nrt)
34621da177e4SLinus Torvalds 		goto out;
34631da177e4SLinus Torvalds 
34641da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
34651da177e4SLinus Torvalds 	if (on_link)
34661da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
34671da177e4SLinus Torvalds 
34684e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
34691da177e4SLinus Torvalds 
34702b760fcfSWei Wang 	/* No need to remove rt from the exception table if rt is
34712b760fcfSWei Wang 	 * a cached route because rt6_insert_exception() will
34722b760fcfSWei Wang 	 * takes care of it
34732b760fcfSWei Wang 	 */
34748a14e46fSDavid Ahern 	if (rt6_insert_exception(nrt, from)) {
34752b760fcfSWei Wang 		dst_release_immediate(&nrt->dst);
34762b760fcfSWei Wang 		goto out;
34772b760fcfSWei Wang 	}
34781da177e4SLinus Torvalds 
3479d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
3480d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
348171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
348260592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
34838d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
34848d71740cSTom Tucker 
34851da177e4SLinus Torvalds out:
34868a14e46fSDavid Ahern 	fib6_info_release(from);
3487e8599ff4SDavid S. Miller 	neigh_release(neigh);
34886e157b6aSDavid S. Miller }
34896e157b6aSDavid S. Miller 
349070ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
34918d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
3492b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3493830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3494830218c1SDavid Ahern 					   struct net_device *dev)
349570ceb4f5SYOSHIFUJI Hideaki {
3496830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3497830218c1SDavid Ahern 	int ifindex = dev->ifindex;
349870ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
34998d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3500c71099acSThomas Graf 	struct fib6_table *table;
350170ceb4f5SYOSHIFUJI Hideaki 
3502830218c1SDavid Ahern 	table = fib6_get_table(net, tb_id);
350338308473SDavid S. Miller 	if (!table)
3504c71099acSThomas Graf 		return NULL;
3505c71099acSThomas Graf 
350666f5d6ceSWei Wang 	rcu_read_lock();
350738fbeeeeSWei Wang 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
350870ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
350970ceb4f5SYOSHIFUJI Hideaki 		goto out;
351070ceb4f5SYOSHIFUJI Hideaki 
351166f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
3512ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
351370ceb4f5SYOSHIFUJI Hideaki 			continue;
35142b2450caSDavid Ahern 		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
3515bdf00467SDavid Ahern 		    !rt->fib6_nh.fib_nh_gw_family)
351670ceb4f5SYOSHIFUJI Hideaki 			continue;
3517ad1601aeSDavid Ahern 		if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
351870ceb4f5SYOSHIFUJI Hideaki 			continue;
3519e873e4b9SWei Wang 		if (!fib6_info_hold_safe(rt))
3520e873e4b9SWei Wang 			continue;
352170ceb4f5SYOSHIFUJI Hideaki 		break;
352270ceb4f5SYOSHIFUJI Hideaki 	}
352370ceb4f5SYOSHIFUJI Hideaki out:
352466f5d6ceSWei Wang 	rcu_read_unlock();
352570ceb4f5SYOSHIFUJI Hideaki 	return rt;
352670ceb4f5SYOSHIFUJI Hideaki }
352770ceb4f5SYOSHIFUJI Hideaki 
35288d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
3529b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3530830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3531830218c1SDavid Ahern 					   struct net_device *dev,
353295c96174SEric Dumazet 					   unsigned int pref)
353370ceb4f5SYOSHIFUJI Hideaki {
353486872cb5SThomas Graf 	struct fib6_config cfg = {
3535238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
3536830218c1SDavid Ahern 		.fc_ifindex	= dev->ifindex,
353786872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
353886872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
353986872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
3540b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3541e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
354215e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
3543efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3544efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
354586872cb5SThomas Graf 	};
354670ceb4f5SYOSHIFUJI Hideaki 
3547830218c1SDavid Ahern 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
35484e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
35494e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
355086872cb5SThomas Graf 
3551e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
3552e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
355386872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
355470ceb4f5SYOSHIFUJI Hideaki 
3555acb54e3cSDavid Ahern 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
355670ceb4f5SYOSHIFUJI Hideaki 
3557830218c1SDavid Ahern 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
355870ceb4f5SYOSHIFUJI Hideaki }
355970ceb4f5SYOSHIFUJI Hideaki #endif
356070ceb4f5SYOSHIFUJI Hideaki 
35618d1c802bSDavid Ahern struct fib6_info *rt6_get_dflt_router(struct net *net,
3562afb1d4b5SDavid Ahern 				     const struct in6_addr *addr,
3563afb1d4b5SDavid Ahern 				     struct net_device *dev)
35641da177e4SLinus Torvalds {
3565830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
35668d1c802bSDavid Ahern 	struct fib6_info *rt;
3567c71099acSThomas Graf 	struct fib6_table *table;
35681da177e4SLinus Torvalds 
3569afb1d4b5SDavid Ahern 	table = fib6_get_table(net, tb_id);
357038308473SDavid S. Miller 	if (!table)
3571c71099acSThomas Graf 		return NULL;
35721da177e4SLinus Torvalds 
357366f5d6ceSWei Wang 	rcu_read_lock();
357466f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3575ad1601aeSDavid Ahern 		struct fib6_nh *nh = &rt->fib6_nh;
3576ad1601aeSDavid Ahern 
3577ad1601aeSDavid Ahern 		if (dev == nh->fib_nh_dev &&
357893c2fb25SDavid Ahern 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3579ad1601aeSDavid Ahern 		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
35801da177e4SLinus Torvalds 			break;
35811da177e4SLinus Torvalds 	}
3582e873e4b9SWei Wang 	if (rt && !fib6_info_hold_safe(rt))
3583e873e4b9SWei Wang 		rt = NULL;
358466f5d6ceSWei Wang 	rcu_read_unlock();
35851da177e4SLinus Torvalds 	return rt;
35861da177e4SLinus Torvalds }
35871da177e4SLinus Torvalds 
35888d1c802bSDavid Ahern struct fib6_info *rt6_add_dflt_router(struct net *net,
3589afb1d4b5SDavid Ahern 				     const struct in6_addr *gwaddr,
3590ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
3591ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
35921da177e4SLinus Torvalds {
359386872cb5SThomas Graf 	struct fib6_config cfg = {
3594ca254490SDavid Ahern 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3595238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
359686872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
359786872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
359886872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3599b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3600e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
360115e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
36025578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3603afb1d4b5SDavid Ahern 		.fc_nlinfo.nl_net = net,
360486872cb5SThomas Graf 	};
36051da177e4SLinus Torvalds 
36064e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
36071da177e4SLinus Torvalds 
3608acb54e3cSDavid Ahern 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3609830218c1SDavid Ahern 		struct fib6_table *table;
3610830218c1SDavid Ahern 
3611830218c1SDavid Ahern 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3612830218c1SDavid Ahern 		if (table)
3613830218c1SDavid Ahern 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3614830218c1SDavid Ahern 	}
36151da177e4SLinus Torvalds 
3616afb1d4b5SDavid Ahern 	return rt6_get_dflt_router(net, gwaddr, dev);
36171da177e4SLinus Torvalds }
36181da177e4SLinus Torvalds 
3619afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net,
3620afb1d4b5SDavid Ahern 				     struct fib6_table *table)
36211da177e4SLinus Torvalds {
36228d1c802bSDavid Ahern 	struct fib6_info *rt;
36231da177e4SLinus Torvalds 
36241da177e4SLinus Torvalds restart:
362566f5d6ceSWei Wang 	rcu_read_lock();
362666f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3627dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
3628dcd1f572SDavid Ahern 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3629dcd1f572SDavid Ahern 
363093c2fb25SDavid Ahern 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3631e873e4b9SWei Wang 		    (!idev || idev->cnf.accept_ra != 2) &&
3632e873e4b9SWei Wang 		    fib6_info_hold_safe(rt)) {
363366f5d6ceSWei Wang 			rcu_read_unlock();
3634afb1d4b5SDavid Ahern 			ip6_del_rt(net, rt);
36351da177e4SLinus Torvalds 			goto restart;
36361da177e4SLinus Torvalds 		}
36371da177e4SLinus Torvalds 	}
363866f5d6ceSWei Wang 	rcu_read_unlock();
3639830218c1SDavid Ahern 
3640830218c1SDavid Ahern 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3641830218c1SDavid Ahern }
3642830218c1SDavid Ahern 
3643830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net)
3644830218c1SDavid Ahern {
3645830218c1SDavid Ahern 	struct fib6_table *table;
3646830218c1SDavid Ahern 	struct hlist_head *head;
3647830218c1SDavid Ahern 	unsigned int h;
3648830218c1SDavid Ahern 
3649830218c1SDavid Ahern 	rcu_read_lock();
3650830218c1SDavid Ahern 
3651830218c1SDavid Ahern 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3652830218c1SDavid Ahern 		head = &net->ipv6.fib_table_hash[h];
3653830218c1SDavid Ahern 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3654830218c1SDavid Ahern 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3655afb1d4b5SDavid Ahern 				__rt6_purge_dflt_routers(net, table);
3656830218c1SDavid Ahern 		}
3657830218c1SDavid Ahern 	}
3658830218c1SDavid Ahern 
3659830218c1SDavid Ahern 	rcu_read_unlock();
36601da177e4SLinus Torvalds }
36611da177e4SLinus Torvalds 
36625578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
36635578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
366486872cb5SThomas Graf 				 struct fib6_config *cfg)
366586872cb5SThomas Graf {
36668823a3acSMaciej Żenczykowski 	*cfg = (struct fib6_config){
36678823a3acSMaciej Żenczykowski 		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
36688823a3acSMaciej Żenczykowski 			 : RT6_TABLE_MAIN,
36698823a3acSMaciej Żenczykowski 		.fc_ifindex = rtmsg->rtmsg_ifindex,
367067f69513SDavid Ahern 		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
36718823a3acSMaciej Żenczykowski 		.fc_expires = rtmsg->rtmsg_info,
36728823a3acSMaciej Żenczykowski 		.fc_dst_len = rtmsg->rtmsg_dst_len,
36738823a3acSMaciej Żenczykowski 		.fc_src_len = rtmsg->rtmsg_src_len,
36748823a3acSMaciej Żenczykowski 		.fc_flags = rtmsg->rtmsg_flags,
36758823a3acSMaciej Żenczykowski 		.fc_type = rtmsg->rtmsg_type,
367686872cb5SThomas Graf 
36778823a3acSMaciej Żenczykowski 		.fc_nlinfo.nl_net = net,
367886872cb5SThomas Graf 
36798823a3acSMaciej Żenczykowski 		.fc_dst = rtmsg->rtmsg_dst,
36808823a3acSMaciej Żenczykowski 		.fc_src = rtmsg->rtmsg_src,
36818823a3acSMaciej Żenczykowski 		.fc_gateway = rtmsg->rtmsg_gateway,
36828823a3acSMaciej Żenczykowski 	};
368386872cb5SThomas Graf }
368486872cb5SThomas Graf 
36855578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
36861da177e4SLinus Torvalds {
368786872cb5SThomas Graf 	struct fib6_config cfg;
36881da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
36891da177e4SLinus Torvalds 	int err;
36901da177e4SLinus Torvalds 
36911da177e4SLinus Torvalds 	switch (cmd) {
36921da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
36931da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
3694af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
36951da177e4SLinus Torvalds 			return -EPERM;
36961da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
36971da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
36981da177e4SLinus Torvalds 		if (err)
36991da177e4SLinus Torvalds 			return -EFAULT;
37001da177e4SLinus Torvalds 
37015578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
370286872cb5SThomas Graf 
37031da177e4SLinus Torvalds 		rtnl_lock();
37041da177e4SLinus Torvalds 		switch (cmd) {
37051da177e4SLinus Torvalds 		case SIOCADDRT:
3706acb54e3cSDavid Ahern 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
37071da177e4SLinus Torvalds 			break;
37081da177e4SLinus Torvalds 		case SIOCDELRT:
3709333c4301SDavid Ahern 			err = ip6_route_del(&cfg, NULL);
37101da177e4SLinus Torvalds 			break;
37111da177e4SLinus Torvalds 		default:
37121da177e4SLinus Torvalds 			err = -EINVAL;
37131da177e4SLinus Torvalds 		}
37141da177e4SLinus Torvalds 		rtnl_unlock();
37151da177e4SLinus Torvalds 
37161da177e4SLinus Torvalds 		return err;
37173ff50b79SStephen Hemminger 	}
37181da177e4SLinus Torvalds 
37191da177e4SLinus Torvalds 	return -EINVAL;
37201da177e4SLinus Torvalds }
37211da177e4SLinus Torvalds 
37221da177e4SLinus Torvalds /*
37231da177e4SLinus Torvalds  *	Drop the packet on the floor
37241da177e4SLinus Torvalds  */
37251da177e4SLinus Torvalds 
3726d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
37271da177e4SLinus Torvalds {
3728612f09e8SYOSHIFUJI Hideaki 	int type;
3729adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3730612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
3731612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
37320660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
373345bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
3734bdb7cc64SStephen Suryaputra 			IP6_INC_STATS(dev_net(dst->dev),
3735bdb7cc64SStephen Suryaputra 				      __in6_dev_get_safely(skb->dev),
37363bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
3737612f09e8SYOSHIFUJI Hideaki 			break;
3738612f09e8SYOSHIFUJI Hideaki 		}
3739612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
3740612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
37413bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
37423bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
3743612f09e8SYOSHIFUJI Hideaki 		break;
3744612f09e8SYOSHIFUJI Hideaki 	}
37453ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
37461da177e4SLinus Torvalds 	kfree_skb(skb);
37471da177e4SLinus Torvalds 	return 0;
37481da177e4SLinus Torvalds }
37491da177e4SLinus Torvalds 
37509ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
37519ce8ade0SThomas Graf {
3752612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
37539ce8ade0SThomas Graf }
37549ce8ade0SThomas Graf 
3755ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
37561da177e4SLinus Torvalds {
3757adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3758612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
37591da177e4SLinus Torvalds }
37601da177e4SLinus Torvalds 
37619ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
37629ce8ade0SThomas Graf {
3763612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
37649ce8ade0SThomas Graf }
37659ce8ade0SThomas Graf 
3766ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
37679ce8ade0SThomas Graf {
3768adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3769612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
37709ce8ade0SThomas Graf }
37719ce8ade0SThomas Graf 
37721da177e4SLinus Torvalds /*
37731da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
37741da177e4SLinus Torvalds  */
37751da177e4SLinus Torvalds 
3776360a9887SDavid Ahern struct fib6_info *addrconf_f6i_alloc(struct net *net,
3777afb1d4b5SDavid Ahern 				     struct inet6_dev *idev,
37781da177e4SLinus Torvalds 				     const struct in6_addr *addr,
3779acb54e3cSDavid Ahern 				     bool anycast, gfp_t gfp_flags)
37801da177e4SLinus Torvalds {
3781c7a1ce39SDavid Ahern 	struct fib6_config cfg = {
3782c7a1ce39SDavid Ahern 		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3783c7a1ce39SDavid Ahern 		.fc_ifindex = idev->dev->ifindex,
3784c7a1ce39SDavid Ahern 		.fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3785c7a1ce39SDavid Ahern 		.fc_dst = *addr,
3786c7a1ce39SDavid Ahern 		.fc_dst_len = 128,
3787c7a1ce39SDavid Ahern 		.fc_protocol = RTPROT_KERNEL,
3788c7a1ce39SDavid Ahern 		.fc_nlinfo.nl_net = net,
3789c7a1ce39SDavid Ahern 		.fc_ignore_dev_down = true,
3790c7a1ce39SDavid Ahern 	};
37915f02ce24SDavid Ahern 
3792e8478e80SDavid Ahern 	if (anycast) {
3793c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_ANYCAST;
3794c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_ANYCAST;
3795e8478e80SDavid Ahern 	} else {
3796c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_LOCAL;
3797c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_LOCAL;
3798e8478e80SDavid Ahern 	}
37991da177e4SLinus Torvalds 
3800c7a1ce39SDavid Ahern 	return ip6_route_info_create(&cfg, gfp_flags, NULL);
38011da177e4SLinus Torvalds }
38021da177e4SLinus Torvalds 
3803c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
3804c3968a85SDaniel Walter struct arg_dev_net_ip {
3805c3968a85SDaniel Walter 	struct net_device *dev;
3806c3968a85SDaniel Walter 	struct net *net;
3807c3968a85SDaniel Walter 	struct in6_addr *addr;
3808c3968a85SDaniel Walter };
3809c3968a85SDaniel Walter 
38108d1c802bSDavid Ahern static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3811c3968a85SDaniel Walter {
3812c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3813c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3814c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3815c3968a85SDaniel Walter 
3816ad1601aeSDavid Ahern 	if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
3817421842edSDavid Ahern 	    rt != net->ipv6.fib6_null_entry &&
381893c2fb25SDavid Ahern 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
381960006a48SWei Wang 		spin_lock_bh(&rt6_exception_lock);
3820c3968a85SDaniel Walter 		/* remove prefsrc entry */
382193c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
382260006a48SWei Wang 		spin_unlock_bh(&rt6_exception_lock);
3823c3968a85SDaniel Walter 	}
3824c3968a85SDaniel Walter 	return 0;
3825c3968a85SDaniel Walter }
3826c3968a85SDaniel Walter 
3827c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3828c3968a85SDaniel Walter {
3829c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
3830c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
3831c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
3832c3968a85SDaniel Walter 		.net = net,
3833c3968a85SDaniel Walter 		.addr = &ifp->addr,
3834c3968a85SDaniel Walter 	};
38350c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3836c3968a85SDaniel Walter }
3837c3968a85SDaniel Walter 
38382b2450caSDavid Ahern #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
3839be7a010dSDuan Jiong 
3840be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
38418d1c802bSDavid Ahern static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3842be7a010dSDuan Jiong {
3843be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
3844be7a010dSDuan Jiong 
384593c2fb25SDavid Ahern 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3846bdf00467SDavid Ahern 	    rt->fib6_nh.fib_nh_gw_family &&
3847ad1601aeSDavid Ahern 	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
3848be7a010dSDuan Jiong 		return -1;
3849be7a010dSDuan Jiong 	}
3850b16cb459SWei Wang 
3851b16cb459SWei Wang 	/* Further clean up cached routes in exception table.
3852b16cb459SWei Wang 	 * This is needed because cached route may have a different
3853b16cb459SWei Wang 	 * gateway than its 'parent' in the case of an ip redirect.
3854b16cb459SWei Wang 	 */
3855b16cb459SWei Wang 	rt6_exceptions_clean_tohost(rt, gateway);
3856b16cb459SWei Wang 
3857be7a010dSDuan Jiong 	return 0;
3858be7a010dSDuan Jiong }
3859be7a010dSDuan Jiong 
3860be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3861be7a010dSDuan Jiong {
3862be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3863be7a010dSDuan Jiong }
3864be7a010dSDuan Jiong 
38652127d95aSIdo Schimmel struct arg_netdev_event {
38662127d95aSIdo Schimmel 	const struct net_device *dev;
38674c981e28SIdo Schimmel 	union {
38682127d95aSIdo Schimmel 		unsigned int nh_flags;
38694c981e28SIdo Schimmel 		unsigned long event;
38704c981e28SIdo Schimmel 	};
38712127d95aSIdo Schimmel };
38722127d95aSIdo Schimmel 
38738d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3874d7dedee1SIdo Schimmel {
38758d1c802bSDavid Ahern 	struct fib6_info *iter;
3876d7dedee1SIdo Schimmel 	struct fib6_node *fn;
3877d7dedee1SIdo Schimmel 
387893c2fb25SDavid Ahern 	fn = rcu_dereference_protected(rt->fib6_node,
387993c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3880d7dedee1SIdo Schimmel 	iter = rcu_dereference_protected(fn->leaf,
388193c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3882d7dedee1SIdo Schimmel 	while (iter) {
388393c2fb25SDavid Ahern 		if (iter->fib6_metric == rt->fib6_metric &&
388433bd5ac5SDavid Ahern 		    rt6_qualify_for_ecmp(iter))
3885d7dedee1SIdo Schimmel 			return iter;
38868fb11a9aSDavid Ahern 		iter = rcu_dereference_protected(iter->fib6_next,
388793c2fb25SDavid Ahern 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3888d7dedee1SIdo Schimmel 	}
3889d7dedee1SIdo Schimmel 
3890d7dedee1SIdo Schimmel 	return NULL;
3891d7dedee1SIdo Schimmel }
3892d7dedee1SIdo Schimmel 
38938d1c802bSDavid Ahern static bool rt6_is_dead(const struct fib6_info *rt)
3894d7dedee1SIdo Schimmel {
3895ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3896ad1601aeSDavid Ahern 	    (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3897ad1601aeSDavid Ahern 	     ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
3898d7dedee1SIdo Schimmel 		return true;
3899d7dedee1SIdo Schimmel 
3900d7dedee1SIdo Schimmel 	return false;
3901d7dedee1SIdo Schimmel }
3902d7dedee1SIdo Schimmel 
39038d1c802bSDavid Ahern static int rt6_multipath_total_weight(const struct fib6_info *rt)
3904d7dedee1SIdo Schimmel {
39058d1c802bSDavid Ahern 	struct fib6_info *iter;
3906d7dedee1SIdo Schimmel 	int total = 0;
3907d7dedee1SIdo Schimmel 
3908d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt))
3909ad1601aeSDavid Ahern 		total += rt->fib6_nh.fib_nh_weight;
3910d7dedee1SIdo Schimmel 
391193c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3912d7dedee1SIdo Schimmel 		if (!rt6_is_dead(iter))
3913ad1601aeSDavid Ahern 			total += iter->fib6_nh.fib_nh_weight;
3914d7dedee1SIdo Schimmel 	}
3915d7dedee1SIdo Schimmel 
3916d7dedee1SIdo Schimmel 	return total;
3917d7dedee1SIdo Schimmel }
3918d7dedee1SIdo Schimmel 
39198d1c802bSDavid Ahern static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3920d7dedee1SIdo Schimmel {
3921d7dedee1SIdo Schimmel 	int upper_bound = -1;
3922d7dedee1SIdo Schimmel 
3923d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt)) {
3924ad1601aeSDavid Ahern 		*weight += rt->fib6_nh.fib_nh_weight;
3925d7dedee1SIdo Schimmel 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3926d7dedee1SIdo Schimmel 						    total) - 1;
3927d7dedee1SIdo Schimmel 	}
3928ad1601aeSDavid Ahern 	atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
3929d7dedee1SIdo Schimmel }
3930d7dedee1SIdo Schimmel 
39318d1c802bSDavid Ahern static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3932d7dedee1SIdo Schimmel {
39338d1c802bSDavid Ahern 	struct fib6_info *iter;
3934d7dedee1SIdo Schimmel 	int weight = 0;
3935d7dedee1SIdo Schimmel 
3936d7dedee1SIdo Schimmel 	rt6_upper_bound_set(rt, &weight, total);
3937d7dedee1SIdo Schimmel 
393893c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3939d7dedee1SIdo Schimmel 		rt6_upper_bound_set(iter, &weight, total);
3940d7dedee1SIdo Schimmel }
3941d7dedee1SIdo Schimmel 
39428d1c802bSDavid Ahern void rt6_multipath_rebalance(struct fib6_info *rt)
3943d7dedee1SIdo Schimmel {
39448d1c802bSDavid Ahern 	struct fib6_info *first;
3945d7dedee1SIdo Schimmel 	int total;
3946d7dedee1SIdo Schimmel 
3947d7dedee1SIdo Schimmel 	/* In case the entire multipath route was marked for flushing,
3948d7dedee1SIdo Schimmel 	 * then there is no need to rebalance upon the removal of every
3949d7dedee1SIdo Schimmel 	 * sibling route.
3950d7dedee1SIdo Schimmel 	 */
395193c2fb25SDavid Ahern 	if (!rt->fib6_nsiblings || rt->should_flush)
3952d7dedee1SIdo Schimmel 		return;
3953d7dedee1SIdo Schimmel 
3954d7dedee1SIdo Schimmel 	/* During lookup routes are evaluated in order, so we need to
3955d7dedee1SIdo Schimmel 	 * make sure upper bounds are assigned from the first sibling
3956d7dedee1SIdo Schimmel 	 * onwards.
3957d7dedee1SIdo Schimmel 	 */
3958d7dedee1SIdo Schimmel 	first = rt6_multipath_first_sibling(rt);
3959d7dedee1SIdo Schimmel 	if (WARN_ON_ONCE(!first))
3960d7dedee1SIdo Schimmel 		return;
3961d7dedee1SIdo Schimmel 
3962d7dedee1SIdo Schimmel 	total = rt6_multipath_total_weight(first);
3963d7dedee1SIdo Schimmel 	rt6_multipath_upper_bound_set(first, total);
3964d7dedee1SIdo Schimmel }
3965d7dedee1SIdo Schimmel 
39668d1c802bSDavid Ahern static int fib6_ifup(struct fib6_info *rt, void *p_arg)
39672127d95aSIdo Schimmel {
39682127d95aSIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
39697aef6859SDavid Ahern 	struct net *net = dev_net(arg->dev);
39702127d95aSIdo Schimmel 
3971ad1601aeSDavid Ahern 	if (rt != net->ipv6.fib6_null_entry &&
3972ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_dev == arg->dev) {
3973ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
39747aef6859SDavid Ahern 		fib6_update_sernum_upto_root(net, rt);
3975d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
39761de178edSIdo Schimmel 	}
39772127d95aSIdo Schimmel 
39782127d95aSIdo Schimmel 	return 0;
39792127d95aSIdo Schimmel }
39802127d95aSIdo Schimmel 
39812127d95aSIdo Schimmel void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
39822127d95aSIdo Schimmel {
39832127d95aSIdo Schimmel 	struct arg_netdev_event arg = {
39842127d95aSIdo Schimmel 		.dev = dev,
39856802f3adSIdo Schimmel 		{
39862127d95aSIdo Schimmel 			.nh_flags = nh_flags,
39876802f3adSIdo Schimmel 		},
39882127d95aSIdo Schimmel 	};
39892127d95aSIdo Schimmel 
39902127d95aSIdo Schimmel 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
39912127d95aSIdo Schimmel 		arg.nh_flags |= RTNH_F_LINKDOWN;
39922127d95aSIdo Schimmel 
39932127d95aSIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
39942127d95aSIdo Schimmel }
39952127d95aSIdo Schimmel 
39968d1c802bSDavid Ahern static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
39971de178edSIdo Schimmel 				   const struct net_device *dev)
39981de178edSIdo Schimmel {
39998d1c802bSDavid Ahern 	struct fib6_info *iter;
40001de178edSIdo Schimmel 
4001ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
40021de178edSIdo Schimmel 		return true;
400393c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4004ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
40051de178edSIdo Schimmel 			return true;
40061de178edSIdo Schimmel 
40071de178edSIdo Schimmel 	return false;
40081de178edSIdo Schimmel }
40091de178edSIdo Schimmel 
40108d1c802bSDavid Ahern static void rt6_multipath_flush(struct fib6_info *rt)
40111de178edSIdo Schimmel {
40128d1c802bSDavid Ahern 	struct fib6_info *iter;
40131de178edSIdo Schimmel 
40141de178edSIdo Schimmel 	rt->should_flush = 1;
401593c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
40161de178edSIdo Schimmel 		iter->should_flush = 1;
40171de178edSIdo Schimmel }
40181de178edSIdo Schimmel 
40198d1c802bSDavid Ahern static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
40201de178edSIdo Schimmel 					     const struct net_device *down_dev)
40211de178edSIdo Schimmel {
40228d1c802bSDavid Ahern 	struct fib6_info *iter;
40231de178edSIdo Schimmel 	unsigned int dead = 0;
40241de178edSIdo Schimmel 
4025ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == down_dev ||
4026ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
40271de178edSIdo Schimmel 		dead++;
402893c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4029ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == down_dev ||
4030ad1601aeSDavid Ahern 		    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
40311de178edSIdo Schimmel 			dead++;
40321de178edSIdo Schimmel 
40331de178edSIdo Schimmel 	return dead;
40341de178edSIdo Schimmel }
40351de178edSIdo Schimmel 
40368d1c802bSDavid Ahern static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
40371de178edSIdo Schimmel 				       const struct net_device *dev,
40381de178edSIdo Schimmel 				       unsigned int nh_flags)
40391de178edSIdo Schimmel {
40408d1c802bSDavid Ahern 	struct fib6_info *iter;
40411de178edSIdo Schimmel 
4042ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
4043ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= nh_flags;
404493c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4045ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
4046ad1601aeSDavid Ahern 			iter->fib6_nh.fib_nh_flags |= nh_flags;
40471de178edSIdo Schimmel }
40481de178edSIdo Schimmel 
4049a1a22c12SDavid Ahern /* called with write lock held for table with rt */
40508d1c802bSDavid Ahern static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
40511da177e4SLinus Torvalds {
40524c981e28SIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
40534c981e28SIdo Schimmel 	const struct net_device *dev = arg->dev;
40547aef6859SDavid Ahern 	struct net *net = dev_net(dev);
40558ed67789SDaniel Lezcano 
4056421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
405727c6fa73SIdo Schimmel 		return 0;
405827c6fa73SIdo Schimmel 
405927c6fa73SIdo Schimmel 	switch (arg->event) {
406027c6fa73SIdo Schimmel 	case NETDEV_UNREGISTER:
4061ad1601aeSDavid Ahern 		return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
406227c6fa73SIdo Schimmel 	case NETDEV_DOWN:
40631de178edSIdo Schimmel 		if (rt->should_flush)
406427c6fa73SIdo Schimmel 			return -1;
406593c2fb25SDavid Ahern 		if (!rt->fib6_nsiblings)
4066ad1601aeSDavid Ahern 			return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
40671de178edSIdo Schimmel 		if (rt6_multipath_uses_dev(rt, dev)) {
40681de178edSIdo Schimmel 			unsigned int count;
40691de178edSIdo Schimmel 
40701de178edSIdo Schimmel 			count = rt6_multipath_dead_count(rt, dev);
407193c2fb25SDavid Ahern 			if (rt->fib6_nsiblings + 1 == count) {
40721de178edSIdo Schimmel 				rt6_multipath_flush(rt);
40731de178edSIdo Schimmel 				return -1;
40741de178edSIdo Schimmel 			}
40751de178edSIdo Schimmel 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
40761de178edSIdo Schimmel 						   RTNH_F_LINKDOWN);
40777aef6859SDavid Ahern 			fib6_update_sernum(net, rt);
4078d7dedee1SIdo Schimmel 			rt6_multipath_rebalance(rt);
40791de178edSIdo Schimmel 		}
40801de178edSIdo Schimmel 		return -2;
408127c6fa73SIdo Schimmel 	case NETDEV_CHANGE:
4082ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev != dev ||
408393c2fb25SDavid Ahern 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
408427c6fa73SIdo Schimmel 			break;
4085ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
4086d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
408727c6fa73SIdo Schimmel 		break;
40882b241361SIdo Schimmel 	}
4089c159d30cSDavid S. Miller 
40901da177e4SLinus Torvalds 	return 0;
40911da177e4SLinus Torvalds }
40921da177e4SLinus Torvalds 
409327c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
40941da177e4SLinus Torvalds {
40954c981e28SIdo Schimmel 	struct arg_netdev_event arg = {
40968ed67789SDaniel Lezcano 		.dev = dev,
40976802f3adSIdo Schimmel 		{
40984c981e28SIdo Schimmel 			.event = event,
40996802f3adSIdo Schimmel 		},
41008ed67789SDaniel Lezcano 	};
41017c6bb7d2SDavid Ahern 	struct net *net = dev_net(dev);
41028ed67789SDaniel Lezcano 
41037c6bb7d2SDavid Ahern 	if (net->ipv6.sysctl.skip_notify_on_dev_down)
41047c6bb7d2SDavid Ahern 		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
41057c6bb7d2SDavid Ahern 	else
41067c6bb7d2SDavid Ahern 		fib6_clean_all(net, fib6_ifdown, &arg);
41074c981e28SIdo Schimmel }
41084c981e28SIdo Schimmel 
41094c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event)
41104c981e28SIdo Schimmel {
41114c981e28SIdo Schimmel 	rt6_sync_down_dev(dev, event);
41124c981e28SIdo Schimmel 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
41134c981e28SIdo Schimmel 	neigh_ifdown(&nd_tbl, dev);
41141da177e4SLinus Torvalds }
41151da177e4SLinus Torvalds 
411695c96174SEric Dumazet struct rt6_mtu_change_arg {
41171da177e4SLinus Torvalds 	struct net_device *dev;
411895c96174SEric Dumazet 	unsigned int mtu;
41191da177e4SLinus Torvalds };
41201da177e4SLinus Torvalds 
41218d1c802bSDavid Ahern static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
41221da177e4SLinus Torvalds {
41231da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
41241da177e4SLinus Torvalds 	struct inet6_dev *idev;
41251da177e4SLinus Torvalds 
41261da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
41271da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
41281da177e4SLinus Torvalds 	   We still use this lock to block changes
41291da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
41301da177e4SLinus Torvalds 	*/
41311da177e4SLinus Torvalds 
41321da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
413338308473SDavid S. Miller 	if (!idev)
41341da177e4SLinus Torvalds 		return 0;
41351da177e4SLinus Torvalds 
41361da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
41371da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
41381da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
41391da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
41401da177e4SLinus Torvalds 	 */
4141ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
4142d4ead6b3SDavid Ahern 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4143d4ead6b3SDavid Ahern 		u32 mtu = rt->fib6_pmtu;
4144d4ead6b3SDavid Ahern 
4145d4ead6b3SDavid Ahern 		if (mtu >= arg->mtu ||
4146d4ead6b3SDavid Ahern 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4147d4ead6b3SDavid Ahern 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4148d4ead6b3SDavid Ahern 
4149f5bbe7eeSWei Wang 		spin_lock_bh(&rt6_exception_lock);
4150e9fa1495SStefano Brivio 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4151f5bbe7eeSWei Wang 		spin_unlock_bh(&rt6_exception_lock);
41524b32b5adSMartin KaFai Lau 	}
41531da177e4SLinus Torvalds 	return 0;
41541da177e4SLinus Torvalds }
41551da177e4SLinus Torvalds 
415695c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
41571da177e4SLinus Torvalds {
4158c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
4159c71099acSThomas Graf 		.dev = dev,
4160c71099acSThomas Graf 		.mtu = mtu,
4161c71099acSThomas Graf 	};
41621da177e4SLinus Torvalds 
41630c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
41641da177e4SLinus Torvalds }
41651da177e4SLinus Torvalds 
4166ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
41675176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4168aa8f8778SEric Dumazet 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
416986872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
4170ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
417186872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
417286872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
417351ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4174c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
417519e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
417619e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
417732bc201eSXin Long 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4178622ec2c9SLorenzo Colitti 	[RTA_UID]		= { .type = NLA_U32 },
41793b45a410SLiping Zhang 	[RTA_MARK]		= { .type = NLA_U32 },
4180aa8f8778SEric Dumazet 	[RTA_TABLE]		= { .type = NLA_U32 },
4181eacb9384SRoopa Prabhu 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4182eacb9384SRoopa Prabhu 	[RTA_SPORT]		= { .type = NLA_U16 },
4183eacb9384SRoopa Prabhu 	[RTA_DPORT]		= { .type = NLA_U16 },
418486872cb5SThomas Graf };
418586872cb5SThomas Graf 
418686872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4187333c4301SDavid Ahern 			      struct fib6_config *cfg,
4188333c4301SDavid Ahern 			      struct netlink_ext_ack *extack)
41891da177e4SLinus Torvalds {
419086872cb5SThomas Graf 	struct rtmsg *rtm;
419186872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
4192c78ba6d6SLubomir Rintel 	unsigned int pref;
419386872cb5SThomas Graf 	int err;
41941da177e4SLinus Torvalds 
4195fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4196dac9c979SDavid Ahern 			  extack);
419786872cb5SThomas Graf 	if (err < 0)
419886872cb5SThomas Graf 		goto errout;
41991da177e4SLinus Torvalds 
420086872cb5SThomas Graf 	err = -EINVAL;
420186872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
420286872cb5SThomas Graf 
420384db8407SMaciej Żenczykowski 	*cfg = (struct fib6_config){
420484db8407SMaciej Żenczykowski 		.fc_table = rtm->rtm_table,
420584db8407SMaciej Żenczykowski 		.fc_dst_len = rtm->rtm_dst_len,
420684db8407SMaciej Żenczykowski 		.fc_src_len = rtm->rtm_src_len,
420784db8407SMaciej Żenczykowski 		.fc_flags = RTF_UP,
420884db8407SMaciej Żenczykowski 		.fc_protocol = rtm->rtm_protocol,
420984db8407SMaciej Żenczykowski 		.fc_type = rtm->rtm_type,
421084db8407SMaciej Żenczykowski 
421184db8407SMaciej Żenczykowski 		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
421284db8407SMaciej Żenczykowski 		.fc_nlinfo.nlh = nlh,
421384db8407SMaciej Żenczykowski 		.fc_nlinfo.nl_net = sock_net(skb->sk),
421484db8407SMaciej Żenczykowski 	};
421586872cb5SThomas Graf 
4216ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4217ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
4218b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
4219b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
422086872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
422186872cb5SThomas Graf 
4222ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
4223ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
4224ab79ad14SMaciej Żenczykowski 
42251f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
42261f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
42271f56a01fSMartin KaFai Lau 
4228fc1e64e1SDavid Ahern 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4229fc1e64e1SDavid Ahern 
423086872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
423167b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
423286872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
42331da177e4SLinus Torvalds 	}
4234e3818541SDavid Ahern 	if (tb[RTA_VIA]) {
4235e3818541SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4236e3818541SDavid Ahern 		goto errout;
4237e3818541SDavid Ahern 	}
423886872cb5SThomas Graf 
423986872cb5SThomas Graf 	if (tb[RTA_DST]) {
424086872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
424186872cb5SThomas Graf 
424286872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
424386872cb5SThomas Graf 			goto errout;
424486872cb5SThomas Graf 
424586872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
42461da177e4SLinus Torvalds 	}
424786872cb5SThomas Graf 
424886872cb5SThomas Graf 	if (tb[RTA_SRC]) {
424986872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
425086872cb5SThomas Graf 
425186872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
425286872cb5SThomas Graf 			goto errout;
425386872cb5SThomas Graf 
425486872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
42551da177e4SLinus Torvalds 	}
425686872cb5SThomas Graf 
4257c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
425867b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4259c3968a85SDaniel Walter 
426086872cb5SThomas Graf 	if (tb[RTA_OIF])
426186872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
426286872cb5SThomas Graf 
426386872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
426486872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
426586872cb5SThomas Graf 
426686872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
426786872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
426886872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
42691da177e4SLinus Torvalds 	}
427086872cb5SThomas Graf 
427186872cb5SThomas Graf 	if (tb[RTA_TABLE])
427286872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
427386872cb5SThomas Graf 
427451ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
427551ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
427651ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
42779ed59592SDavid Ahern 
42789ed59592SDavid Ahern 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4279c255bd68SDavid Ahern 						     cfg->fc_mp_len, extack);
42809ed59592SDavid Ahern 		if (err < 0)
42819ed59592SDavid Ahern 			goto errout;
428251ebd318SNicolas Dichtel 	}
428351ebd318SNicolas Dichtel 
4284c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
4285c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
4286c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4287c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4288c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4289c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
4290c78ba6d6SLubomir Rintel 	}
4291c78ba6d6SLubomir Rintel 
429219e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
429319e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
429419e42e45SRoopa Prabhu 
42959ed59592SDavid Ahern 	if (tb[RTA_ENCAP_TYPE]) {
429619e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
429719e42e45SRoopa Prabhu 
4298c255bd68SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
42999ed59592SDavid Ahern 		if (err < 0)
43009ed59592SDavid Ahern 			goto errout;
43019ed59592SDavid Ahern 	}
43029ed59592SDavid Ahern 
430332bc201eSXin Long 	if (tb[RTA_EXPIRES]) {
430432bc201eSXin Long 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
430532bc201eSXin Long 
430632bc201eSXin Long 		if (addrconf_finite_timeout(timeout)) {
430732bc201eSXin Long 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
430832bc201eSXin Long 			cfg->fc_flags |= RTF_EXPIRES;
430932bc201eSXin Long 		}
431032bc201eSXin Long 	}
431132bc201eSXin Long 
431286872cb5SThomas Graf 	err = 0;
431386872cb5SThomas Graf errout:
431486872cb5SThomas Graf 	return err;
43151da177e4SLinus Torvalds }
43161da177e4SLinus Torvalds 
43176b9ea5a6SRoopa Prabhu struct rt6_nh {
43188d1c802bSDavid Ahern 	struct fib6_info *fib6_info;
43196b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
43206b9ea5a6SRoopa Prabhu 	struct list_head next;
43216b9ea5a6SRoopa Prabhu };
43226b9ea5a6SRoopa Prabhu 
4323d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net,
4324d4ead6b3SDavid Ahern 				 struct list_head *rt6_nh_list,
43258d1c802bSDavid Ahern 				 struct fib6_info *rt,
43268d1c802bSDavid Ahern 				 struct fib6_config *r_cfg)
43276b9ea5a6SRoopa Prabhu {
43286b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
43296b9ea5a6SRoopa Prabhu 	int err = -EEXIST;
43306b9ea5a6SRoopa Prabhu 
43316b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
43328d1c802bSDavid Ahern 		/* check if fib6_info already exists */
43338d1c802bSDavid Ahern 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
43346b9ea5a6SRoopa Prabhu 			return err;
43356b9ea5a6SRoopa Prabhu 	}
43366b9ea5a6SRoopa Prabhu 
43376b9ea5a6SRoopa Prabhu 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
43386b9ea5a6SRoopa Prabhu 	if (!nh)
43396b9ea5a6SRoopa Prabhu 		return -ENOMEM;
43408d1c802bSDavid Ahern 	nh->fib6_info = rt;
43416b9ea5a6SRoopa Prabhu 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
43426b9ea5a6SRoopa Prabhu 	list_add_tail(&nh->next, rt6_nh_list);
43436b9ea5a6SRoopa Prabhu 
43446b9ea5a6SRoopa Prabhu 	return 0;
43456b9ea5a6SRoopa Prabhu }
43466b9ea5a6SRoopa Prabhu 
43478d1c802bSDavid Ahern static void ip6_route_mpath_notify(struct fib6_info *rt,
43488d1c802bSDavid Ahern 				   struct fib6_info *rt_last,
43493b1137feSDavid Ahern 				   struct nl_info *info,
43503b1137feSDavid Ahern 				   __u16 nlflags)
43513b1137feSDavid Ahern {
43523b1137feSDavid Ahern 	/* if this is an APPEND route, then rt points to the first route
43533b1137feSDavid Ahern 	 * inserted and rt_last points to last route inserted. Userspace
43543b1137feSDavid Ahern 	 * wants a consistent dump of the route which starts at the first
43553b1137feSDavid Ahern 	 * nexthop. Since sibling routes are always added at the end of
43563b1137feSDavid Ahern 	 * the list, find the first sibling of the last route appended
43573b1137feSDavid Ahern 	 */
435893c2fb25SDavid Ahern 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
435993c2fb25SDavid Ahern 		rt = list_first_entry(&rt_last->fib6_siblings,
43608d1c802bSDavid Ahern 				      struct fib6_info,
436193c2fb25SDavid Ahern 				      fib6_siblings);
43623b1137feSDavid Ahern 	}
43633b1137feSDavid Ahern 
43643b1137feSDavid Ahern 	if (rt)
43653b1137feSDavid Ahern 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
43663b1137feSDavid Ahern }
43673b1137feSDavid Ahern 
4368333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg,
4369333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
437051ebd318SNicolas Dichtel {
43718d1c802bSDavid Ahern 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
43723b1137feSDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
437351ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
437451ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
43758d1c802bSDavid Ahern 	struct fib6_info *rt;
43766b9ea5a6SRoopa Prabhu 	struct rt6_nh *err_nh;
43776b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh, *nh_safe;
43783b1137feSDavid Ahern 	__u16 nlflags;
437951ebd318SNicolas Dichtel 	int remaining;
438051ebd318SNicolas Dichtel 	int attrlen;
43816b9ea5a6SRoopa Prabhu 	int err = 1;
43826b9ea5a6SRoopa Prabhu 	int nhn = 0;
43836b9ea5a6SRoopa Prabhu 	int replace = (cfg->fc_nlinfo.nlh &&
43846b9ea5a6SRoopa Prabhu 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
43856b9ea5a6SRoopa Prabhu 	LIST_HEAD(rt6_nh_list);
438651ebd318SNicolas Dichtel 
43873b1137feSDavid Ahern 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
43883b1137feSDavid Ahern 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
43893b1137feSDavid Ahern 		nlflags |= NLM_F_APPEND;
43903b1137feSDavid Ahern 
439135f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
439251ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
439351ebd318SNicolas Dichtel 
43946b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
43958d1c802bSDavid Ahern 	 * fib6_info structs per nexthop
43966b9ea5a6SRoopa Prabhu 	 */
439751ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
439851ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
439951ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
440051ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
440151ebd318SNicolas Dichtel 
440251ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
440351ebd318SNicolas Dichtel 		if (attrlen > 0) {
440451ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
440551ebd318SNicolas Dichtel 
440651ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
440751ebd318SNicolas Dichtel 			if (nla) {
440867b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
440951ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
441051ebd318SNicolas Dichtel 			}
441119e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
441219e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
441319e42e45SRoopa Prabhu 			if (nla)
441419e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
441551ebd318SNicolas Dichtel 		}
44166b9ea5a6SRoopa Prabhu 
441768e2ffdeSDavid Ahern 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4418acb54e3cSDavid Ahern 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
44198c5b83f0SRoopa Prabhu 		if (IS_ERR(rt)) {
44208c5b83f0SRoopa Prabhu 			err = PTR_ERR(rt);
44218c5b83f0SRoopa Prabhu 			rt = NULL;
44226b9ea5a6SRoopa Prabhu 			goto cleanup;
44238c5b83f0SRoopa Prabhu 		}
4424b5d2d75eSDavid Ahern 		if (!rt6_qualify_for_ecmp(rt)) {
4425b5d2d75eSDavid Ahern 			err = -EINVAL;
4426b5d2d75eSDavid Ahern 			NL_SET_ERR_MSG(extack,
4427b5d2d75eSDavid Ahern 				       "Device only routes can not be added for IPv6 using the multipath API.");
4428b5d2d75eSDavid Ahern 			fib6_info_release(rt);
4429b5d2d75eSDavid Ahern 			goto cleanup;
4430b5d2d75eSDavid Ahern 		}
44316b9ea5a6SRoopa Prabhu 
4432ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
4433398958aeSIdo Schimmel 
4434d4ead6b3SDavid Ahern 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4435d4ead6b3SDavid Ahern 					    rt, &r_cfg);
443651ebd318SNicolas Dichtel 		if (err) {
443793531c67SDavid Ahern 			fib6_info_release(rt);
44386b9ea5a6SRoopa Prabhu 			goto cleanup;
443951ebd318SNicolas Dichtel 		}
44406b9ea5a6SRoopa Prabhu 
44416b9ea5a6SRoopa Prabhu 		rtnh = rtnh_next(rtnh, &remaining);
444251ebd318SNicolas Dichtel 	}
44436b9ea5a6SRoopa Prabhu 
44443b1137feSDavid Ahern 	/* for add and replace send one notification with all nexthops.
44453b1137feSDavid Ahern 	 * Skip the notification in fib6_add_rt2node and send one with
44463b1137feSDavid Ahern 	 * the full route when done
44473b1137feSDavid Ahern 	 */
44483b1137feSDavid Ahern 	info->skip_notify = 1;
44493b1137feSDavid Ahern 
44506b9ea5a6SRoopa Prabhu 	err_nh = NULL;
44516b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
44528d1c802bSDavid Ahern 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
44538d1c802bSDavid Ahern 		fib6_info_release(nh->fib6_info);
44543b1137feSDavid Ahern 
4455f7225172SDavid Ahern 		if (!err) {
4456f7225172SDavid Ahern 			/* save reference to last route successfully inserted */
4457f7225172SDavid Ahern 			rt_last = nh->fib6_info;
4458f7225172SDavid Ahern 
44596b9ea5a6SRoopa Prabhu 			/* save reference to first route for notification */
4460f7225172SDavid Ahern 			if (!rt_notif)
44618d1c802bSDavid Ahern 				rt_notif = nh->fib6_info;
4462f7225172SDavid Ahern 		}
44636b9ea5a6SRoopa Prabhu 
44648d1c802bSDavid Ahern 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
44658d1c802bSDavid Ahern 		nh->fib6_info = NULL;
44666b9ea5a6SRoopa Prabhu 		if (err) {
44676b9ea5a6SRoopa Prabhu 			if (replace && nhn)
4468a5a82d84SJakub Kicinski 				NL_SET_ERR_MSG_MOD(extack,
4469a5a82d84SJakub Kicinski 						   "multipath route replace failed (check consistency of installed routes)");
44706b9ea5a6SRoopa Prabhu 			err_nh = nh;
44716b9ea5a6SRoopa Prabhu 			goto add_errout;
44726b9ea5a6SRoopa Prabhu 		}
44736b9ea5a6SRoopa Prabhu 
44741a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
447527596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
447627596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
447727596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
447827596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
447927596472SMichal Kubeček 		 * be added to it.
44801a72418bSNicolas Dichtel 		 */
448127596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
448227596472SMichal Kubeček 						     NLM_F_REPLACE);
44836b9ea5a6SRoopa Prabhu 		nhn++;
44846b9ea5a6SRoopa Prabhu 	}
44856b9ea5a6SRoopa Prabhu 
44863b1137feSDavid Ahern 	/* success ... tell user about new route */
44873b1137feSDavid Ahern 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
44886b9ea5a6SRoopa Prabhu 	goto cleanup;
44896b9ea5a6SRoopa Prabhu 
44906b9ea5a6SRoopa Prabhu add_errout:
44913b1137feSDavid Ahern 	/* send notification for routes that were added so that
44923b1137feSDavid Ahern 	 * the delete notifications sent by ip6_route_del are
44933b1137feSDavid Ahern 	 * coherent
44943b1137feSDavid Ahern 	 */
44953b1137feSDavid Ahern 	if (rt_notif)
44963b1137feSDavid Ahern 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
44973b1137feSDavid Ahern 
44986b9ea5a6SRoopa Prabhu 	/* Delete routes that were already added */
44996b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
45006b9ea5a6SRoopa Prabhu 		if (err_nh == nh)
45016b9ea5a6SRoopa Prabhu 			break;
4502333c4301SDavid Ahern 		ip6_route_del(&nh->r_cfg, extack);
45036b9ea5a6SRoopa Prabhu 	}
45046b9ea5a6SRoopa Prabhu 
45056b9ea5a6SRoopa Prabhu cleanup:
45066b9ea5a6SRoopa Prabhu 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
45078d1c802bSDavid Ahern 		if (nh->fib6_info)
45088d1c802bSDavid Ahern 			fib6_info_release(nh->fib6_info);
45096b9ea5a6SRoopa Prabhu 		list_del(&nh->next);
45106b9ea5a6SRoopa Prabhu 		kfree(nh);
45116b9ea5a6SRoopa Prabhu 	}
45126b9ea5a6SRoopa Prabhu 
45136b9ea5a6SRoopa Prabhu 	return err;
45146b9ea5a6SRoopa Prabhu }
45156b9ea5a6SRoopa Prabhu 
4516333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg,
4517333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
45186b9ea5a6SRoopa Prabhu {
45196b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
45206b9ea5a6SRoopa Prabhu 	struct rtnexthop *rtnh;
45216b9ea5a6SRoopa Prabhu 	int remaining;
45226b9ea5a6SRoopa Prabhu 	int attrlen;
45236b9ea5a6SRoopa Prabhu 	int err = 1, last_err = 0;
45246b9ea5a6SRoopa Prabhu 
45256b9ea5a6SRoopa Prabhu 	remaining = cfg->fc_mp_len;
45266b9ea5a6SRoopa Prabhu 	rtnh = (struct rtnexthop *)cfg->fc_mp;
45276b9ea5a6SRoopa Prabhu 
45286b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry */
45296b9ea5a6SRoopa Prabhu 	while (rtnh_ok(rtnh, remaining)) {
45306b9ea5a6SRoopa Prabhu 		memcpy(&r_cfg, cfg, sizeof(*cfg));
45316b9ea5a6SRoopa Prabhu 		if (rtnh->rtnh_ifindex)
45326b9ea5a6SRoopa Prabhu 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
45336b9ea5a6SRoopa Prabhu 
45346b9ea5a6SRoopa Prabhu 		attrlen = rtnh_attrlen(rtnh);
45356b9ea5a6SRoopa Prabhu 		if (attrlen > 0) {
45366b9ea5a6SRoopa Prabhu 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
45376b9ea5a6SRoopa Prabhu 
45386b9ea5a6SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
45396b9ea5a6SRoopa Prabhu 			if (nla) {
45406b9ea5a6SRoopa Prabhu 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
45416b9ea5a6SRoopa Prabhu 				r_cfg.fc_flags |= RTF_GATEWAY;
45426b9ea5a6SRoopa Prabhu 			}
45436b9ea5a6SRoopa Prabhu 		}
4544333c4301SDavid Ahern 		err = ip6_route_del(&r_cfg, extack);
45456b9ea5a6SRoopa Prabhu 		if (err)
45466b9ea5a6SRoopa Prabhu 			last_err = err;
45476b9ea5a6SRoopa Prabhu 
454851ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
454951ebd318SNicolas Dichtel 	}
455051ebd318SNicolas Dichtel 
455151ebd318SNicolas Dichtel 	return last_err;
455251ebd318SNicolas Dichtel }
455351ebd318SNicolas Dichtel 
4554c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4555c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
45561da177e4SLinus Torvalds {
455786872cb5SThomas Graf 	struct fib6_config cfg;
455886872cb5SThomas Graf 	int err;
45591da177e4SLinus Torvalds 
4560333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
456186872cb5SThomas Graf 	if (err < 0)
456286872cb5SThomas Graf 		return err;
456386872cb5SThomas Graf 
456451ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4565333c4301SDavid Ahern 		return ip6_route_multipath_del(&cfg, extack);
45660ae81335SDavid Ahern 	else {
45670ae81335SDavid Ahern 		cfg.fc_delete_all_nh = 1;
4568333c4301SDavid Ahern 		return ip6_route_del(&cfg, extack);
45691da177e4SLinus Torvalds 	}
45700ae81335SDavid Ahern }
45711da177e4SLinus Torvalds 
4572c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4573c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
45741da177e4SLinus Torvalds {
457586872cb5SThomas Graf 	struct fib6_config cfg;
457686872cb5SThomas Graf 	int err;
45771da177e4SLinus Torvalds 
4578333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
457986872cb5SThomas Graf 	if (err < 0)
458086872cb5SThomas Graf 		return err;
458186872cb5SThomas Graf 
458267f69513SDavid Ahern 	if (cfg.fc_metric == 0)
458367f69513SDavid Ahern 		cfg.fc_metric = IP6_RT_PRIO_USER;
458467f69513SDavid Ahern 
458551ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4586333c4301SDavid Ahern 		return ip6_route_multipath_add(&cfg, extack);
458751ebd318SNicolas Dichtel 	else
4588acb54e3cSDavid Ahern 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
45891da177e4SLinus Torvalds }
45901da177e4SLinus Torvalds 
45918d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt)
4592339bf98fSThomas Graf {
4593beb1afacSDavid Ahern 	int nexthop_len = 0;
4594beb1afacSDavid Ahern 
459593c2fb25SDavid Ahern 	if (rt->fib6_nsiblings) {
4596beb1afacSDavid Ahern 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4597beb1afacSDavid Ahern 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4598beb1afacSDavid Ahern 			    + nla_total_size(16) /* RTA_GATEWAY */
4599ad1601aeSDavid Ahern 			    + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
4600beb1afacSDavid Ahern 
460193c2fb25SDavid Ahern 		nexthop_len *= rt->fib6_nsiblings;
4602beb1afacSDavid Ahern 	}
4603beb1afacSDavid Ahern 
4604339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4605339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
4606339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
4607339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
4608339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
4609339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
4610339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
4611339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
4612339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
46136a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4614ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4615c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
461619e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
4617ad1601aeSDavid Ahern 	       + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
4618beb1afacSDavid Ahern 	       + nexthop_len;
4619beb1afacSDavid Ahern }
4620beb1afacSDavid Ahern 
4621d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
46228d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
4623d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
462415e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
4625f8cfe2ceSDavid Ahern 			 unsigned int flags)
46261da177e4SLinus Torvalds {
462722d0bd82SXin Long 	struct rt6_info *rt6 = (struct rt6_info *)dst;
462822d0bd82SXin Long 	struct rt6key *rt6_dst, *rt6_src;
462922d0bd82SXin Long 	u32 *pmetrics, table, rt6_flags;
46301da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
463122d0bd82SXin Long 	struct rtmsg *rtm;
4632d4ead6b3SDavid Ahern 	long expires = 0;
46331da177e4SLinus Torvalds 
463415e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
463538308473SDavid S. Miller 	if (!nlh)
463626932566SPatrick McHardy 		return -EMSGSIZE;
46372d7202bfSThomas Graf 
463822d0bd82SXin Long 	if (rt6) {
463922d0bd82SXin Long 		rt6_dst = &rt6->rt6i_dst;
464022d0bd82SXin Long 		rt6_src = &rt6->rt6i_src;
464122d0bd82SXin Long 		rt6_flags = rt6->rt6i_flags;
464222d0bd82SXin Long 	} else {
464322d0bd82SXin Long 		rt6_dst = &rt->fib6_dst;
464422d0bd82SXin Long 		rt6_src = &rt->fib6_src;
464522d0bd82SXin Long 		rt6_flags = rt->fib6_flags;
464622d0bd82SXin Long 	}
464722d0bd82SXin Long 
46482d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
46491da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
465022d0bd82SXin Long 	rtm->rtm_dst_len = rt6_dst->plen;
465122d0bd82SXin Long 	rtm->rtm_src_len = rt6_src->plen;
46521da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
465393c2fb25SDavid Ahern 	if (rt->fib6_table)
465493c2fb25SDavid Ahern 		table = rt->fib6_table->tb6_id;
4655c71099acSThomas Graf 	else
46569e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
465797f0082aSKalash Nainwal 	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4658c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
4659c78679e8SDavid S. Miller 		goto nla_put_failure;
4660e8478e80SDavid Ahern 
4661e8478e80SDavid Ahern 	rtm->rtm_type = rt->fib6_type;
46621da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
46631da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
466493c2fb25SDavid Ahern 	rtm->rtm_protocol = rt->fib6_protocol;
46651da177e4SLinus Torvalds 
466622d0bd82SXin Long 	if (rt6_flags & RTF_CACHE)
46671da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
46681da177e4SLinus Torvalds 
4669d4ead6b3SDavid Ahern 	if (dest) {
4670d4ead6b3SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4671c78679e8SDavid S. Miller 			goto nla_put_failure;
46721da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
46731da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
467422d0bd82SXin Long 		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4675c78679e8SDavid S. Miller 			goto nla_put_failure;
46761da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
46771da177e4SLinus Torvalds 	if (src) {
4678930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4679c78679e8SDavid S. Miller 			goto nla_put_failure;
46801da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
4681c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
468222d0bd82SXin Long 		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4683c78679e8SDavid S. Miller 		goto nla_put_failure;
46841da177e4SLinus Torvalds #endif
46857bc570c8SYOSHIFUJI Hideaki 	if (iif) {
46867bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
468722d0bd82SXin Long 		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4688fd61c6baSDavid Ahern 			int err = ip6mr_get_route(net, skb, rtm, portid);
46892cf75070SNikolay Aleksandrov 
46907bc570c8SYOSHIFUJI Hideaki 			if (err == 0)
46917bc570c8SYOSHIFUJI Hideaki 				return 0;
4692fd61c6baSDavid Ahern 			if (err < 0)
46937bc570c8SYOSHIFUJI Hideaki 				goto nla_put_failure;
46947bc570c8SYOSHIFUJI Hideaki 		} else
46957bc570c8SYOSHIFUJI Hideaki #endif
4696c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
4697c78679e8SDavid S. Miller 				goto nla_put_failure;
4698d4ead6b3SDavid Ahern 	} else if (dest) {
46991da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
4700d4ead6b3SDavid Ahern 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4701930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4702c78679e8SDavid S. Miller 			goto nla_put_failure;
4703c3968a85SDaniel Walter 	}
4704c3968a85SDaniel Walter 
470593c2fb25SDavid Ahern 	if (rt->fib6_prefsrc.plen) {
4706c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
470793c2fb25SDavid Ahern 		saddr_buf = rt->fib6_prefsrc.addr;
4708930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4709c78679e8SDavid S. Miller 			goto nla_put_failure;
47101da177e4SLinus Torvalds 	}
47112d7202bfSThomas Graf 
4712d4ead6b3SDavid Ahern 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4713d4ead6b3SDavid Ahern 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
47142d7202bfSThomas Graf 		goto nla_put_failure;
47152d7202bfSThomas Graf 
471693c2fb25SDavid Ahern 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4717beb1afacSDavid Ahern 		goto nla_put_failure;
4718beb1afacSDavid Ahern 
4719beb1afacSDavid Ahern 	/* For multipath routes, walk the siblings list and add
4720beb1afacSDavid Ahern 	 * each as a nexthop within RTA_MULTIPATH.
4721beb1afacSDavid Ahern 	 */
472222d0bd82SXin Long 	if (rt6) {
472322d0bd82SXin Long 		if (rt6_flags & RTF_GATEWAY &&
472422d0bd82SXin Long 		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
472522d0bd82SXin Long 			goto nla_put_failure;
472622d0bd82SXin Long 
472722d0bd82SXin Long 		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
472822d0bd82SXin Long 			goto nla_put_failure;
472922d0bd82SXin Long 	} else if (rt->fib6_nsiblings) {
47308d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
4731beb1afacSDavid Ahern 		struct nlattr *mp;
4732beb1afacSDavid Ahern 
4733beb1afacSDavid Ahern 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4734beb1afacSDavid Ahern 		if (!mp)
4735beb1afacSDavid Ahern 			goto nla_put_failure;
4736beb1afacSDavid Ahern 
4737c0a72077SDavid Ahern 		if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4738c0a72077SDavid Ahern 				    rt->fib6_nh.fib_nh_weight) < 0)
4739beb1afacSDavid Ahern 			goto nla_put_failure;
4740beb1afacSDavid Ahern 
4741beb1afacSDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
474293c2fb25SDavid Ahern 					 &rt->fib6_siblings, fib6_siblings) {
4743c0a72077SDavid Ahern 			if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4744c0a72077SDavid Ahern 					    sibling->fib6_nh.fib_nh_weight) < 0)
474594f826b8SEric Dumazet 				goto nla_put_failure;
474694f826b8SEric Dumazet 		}
47472d7202bfSThomas Graf 
4748beb1afacSDavid Ahern 		nla_nest_end(skb, mp);
4749beb1afacSDavid Ahern 	} else {
4750c0a72077SDavid Ahern 		if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4751c0a72077SDavid Ahern 				     &rtm->rtm_flags, false) < 0)
4752c78679e8SDavid S. Miller 			goto nla_put_failure;
4753beb1afacSDavid Ahern 	}
47548253947eSLi Wei 
475522d0bd82SXin Long 	if (rt6_flags & RTF_EXPIRES) {
475614895687SDavid Ahern 		expires = dst ? dst->expires : rt->expires;
475714895687SDavid Ahern 		expires -= jiffies;
475814895687SDavid Ahern 	}
475969cdf8f9SYOSHIFUJI Hideaki 
4760d4ead6b3SDavid Ahern 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4761e3703b3dSThomas Graf 		goto nla_put_failure;
47621da177e4SLinus Torvalds 
476322d0bd82SXin Long 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4764c78ba6d6SLubomir Rintel 		goto nla_put_failure;
4765c78ba6d6SLubomir Rintel 
476619e42e45SRoopa Prabhu 
4767053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
4768053c095aSJohannes Berg 	return 0;
47692d7202bfSThomas Graf 
47702d7202bfSThomas Graf nla_put_failure:
477126932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
477226932566SPatrick McHardy 	return -EMSGSIZE;
47731da177e4SLinus Torvalds }
47741da177e4SLinus Torvalds 
477513e38901SDavid Ahern static bool fib6_info_uses_dev(const struct fib6_info *f6i,
477613e38901SDavid Ahern 			       const struct net_device *dev)
477713e38901SDavid Ahern {
4778ad1601aeSDavid Ahern 	if (f6i->fib6_nh.fib_nh_dev == dev)
477913e38901SDavid Ahern 		return true;
478013e38901SDavid Ahern 
478113e38901SDavid Ahern 	if (f6i->fib6_nsiblings) {
478213e38901SDavid Ahern 		struct fib6_info *sibling, *next_sibling;
478313e38901SDavid Ahern 
478413e38901SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
478513e38901SDavid Ahern 					 &f6i->fib6_siblings, fib6_siblings) {
4786ad1601aeSDavid Ahern 			if (sibling->fib6_nh.fib_nh_dev == dev)
478713e38901SDavid Ahern 				return true;
478813e38901SDavid Ahern 		}
478913e38901SDavid Ahern 	}
479013e38901SDavid Ahern 
479113e38901SDavid Ahern 	return false;
479213e38901SDavid Ahern }
479313e38901SDavid Ahern 
47948d1c802bSDavid Ahern int rt6_dump_route(struct fib6_info *rt, void *p_arg)
47951da177e4SLinus Torvalds {
47961da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
479713e38901SDavid Ahern 	struct fib_dump_filter *filter = &arg->filter;
479813e38901SDavid Ahern 	unsigned int flags = NLM_F_MULTI;
47991f17e2f2SDavid Ahern 	struct net *net = arg->net;
48001f17e2f2SDavid Ahern 
4801421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
48021f17e2f2SDavid Ahern 		return 0;
48031da177e4SLinus Torvalds 
480413e38901SDavid Ahern 	if ((filter->flags & RTM_F_PREFIX) &&
480593c2fb25SDavid Ahern 	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4806f8cfe2ceSDavid Ahern 		/* success since this is not a prefix route */
4807f8cfe2ceSDavid Ahern 		return 1;
4808f8cfe2ceSDavid Ahern 	}
480913e38901SDavid Ahern 	if (filter->filter_set) {
481013e38901SDavid Ahern 		if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
481113e38901SDavid Ahern 		    (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
481213e38901SDavid Ahern 		    (filter->protocol && rt->fib6_protocol != filter->protocol)) {
481313e38901SDavid Ahern 			return 1;
481413e38901SDavid Ahern 		}
481513e38901SDavid Ahern 		flags |= NLM_F_DUMP_FILTERED;
4816f8cfe2ceSDavid Ahern 	}
48171da177e4SLinus Torvalds 
4818d4ead6b3SDavid Ahern 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4819d4ead6b3SDavid Ahern 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
482013e38901SDavid Ahern 			     arg->cb->nlh->nlmsg_seq, flags);
48211da177e4SLinus Torvalds }
48221da177e4SLinus Torvalds 
48230eff0a27SJakub Kicinski static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
48240eff0a27SJakub Kicinski 					const struct nlmsghdr *nlh,
48250eff0a27SJakub Kicinski 					struct nlattr **tb,
48260eff0a27SJakub Kicinski 					struct netlink_ext_ack *extack)
48270eff0a27SJakub Kicinski {
48280eff0a27SJakub Kicinski 	struct rtmsg *rtm;
48290eff0a27SJakub Kicinski 	int i, err;
48300eff0a27SJakub Kicinski 
48310eff0a27SJakub Kicinski 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
48320eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48330eff0a27SJakub Kicinski 				   "Invalid header for get route request");
48340eff0a27SJakub Kicinski 		return -EINVAL;
48350eff0a27SJakub Kicinski 	}
48360eff0a27SJakub Kicinski 
48370eff0a27SJakub Kicinski 	if (!netlink_strict_get_check(skb))
48380eff0a27SJakub Kicinski 		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
48390eff0a27SJakub Kicinski 				   rtm_ipv6_policy, extack);
48400eff0a27SJakub Kicinski 
48410eff0a27SJakub Kicinski 	rtm = nlmsg_data(nlh);
48420eff0a27SJakub Kicinski 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
48430eff0a27SJakub Kicinski 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
48440eff0a27SJakub Kicinski 	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
48450eff0a27SJakub Kicinski 	    rtm->rtm_type) {
48460eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
48470eff0a27SJakub Kicinski 		return -EINVAL;
48480eff0a27SJakub Kicinski 	}
48490eff0a27SJakub Kicinski 	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
48500eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48510eff0a27SJakub Kicinski 				   "Invalid flags for get route request");
48520eff0a27SJakub Kicinski 		return -EINVAL;
48530eff0a27SJakub Kicinski 	}
48540eff0a27SJakub Kicinski 
48550eff0a27SJakub Kicinski 	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
48560eff0a27SJakub Kicinski 				 rtm_ipv6_policy, extack);
48570eff0a27SJakub Kicinski 	if (err)
48580eff0a27SJakub Kicinski 		return err;
48590eff0a27SJakub Kicinski 
48600eff0a27SJakub Kicinski 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
48610eff0a27SJakub Kicinski 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
48620eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
48630eff0a27SJakub Kicinski 		return -EINVAL;
48640eff0a27SJakub Kicinski 	}
48650eff0a27SJakub Kicinski 
48660eff0a27SJakub Kicinski 	for (i = 0; i <= RTA_MAX; i++) {
48670eff0a27SJakub Kicinski 		if (!tb[i])
48680eff0a27SJakub Kicinski 			continue;
48690eff0a27SJakub Kicinski 
48700eff0a27SJakub Kicinski 		switch (i) {
48710eff0a27SJakub Kicinski 		case RTA_SRC:
48720eff0a27SJakub Kicinski 		case RTA_DST:
48730eff0a27SJakub Kicinski 		case RTA_IIF:
48740eff0a27SJakub Kicinski 		case RTA_OIF:
48750eff0a27SJakub Kicinski 		case RTA_MARK:
48760eff0a27SJakub Kicinski 		case RTA_UID:
48770eff0a27SJakub Kicinski 		case RTA_SPORT:
48780eff0a27SJakub Kicinski 		case RTA_DPORT:
48790eff0a27SJakub Kicinski 		case RTA_IP_PROTO:
48800eff0a27SJakub Kicinski 			break;
48810eff0a27SJakub Kicinski 		default:
48820eff0a27SJakub Kicinski 			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
48830eff0a27SJakub Kicinski 			return -EINVAL;
48840eff0a27SJakub Kicinski 		}
48850eff0a27SJakub Kicinski 	}
48860eff0a27SJakub Kicinski 
48870eff0a27SJakub Kicinski 	return 0;
48880eff0a27SJakub Kicinski }
48890eff0a27SJakub Kicinski 
4890c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4891c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
48921da177e4SLinus Torvalds {
48933b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
4894ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
489518c3a61cSRoopa Prabhu 	int err, iif = 0, oif = 0;
4896a68886a6SDavid Ahern 	struct fib6_info *from;
489718c3a61cSRoopa Prabhu 	struct dst_entry *dst;
48981da177e4SLinus Torvalds 	struct rt6_info *rt;
4899ab364a6fSThomas Graf 	struct sk_buff *skb;
4900ab364a6fSThomas Graf 	struct rtmsg *rtm;
4901744486d4SMaciej Żenczykowski 	struct flowi6 fl6 = {};
490218c3a61cSRoopa Prabhu 	bool fibmatch;
4903ab364a6fSThomas Graf 
49040eff0a27SJakub Kicinski 	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4905ab364a6fSThomas Graf 	if (err < 0)
4906ab364a6fSThomas Graf 		goto errout;
4907ab364a6fSThomas Graf 
4908ab364a6fSThomas Graf 	err = -EINVAL;
490938b7097bSHannes Frederic Sowa 	rtm = nlmsg_data(nlh);
491038b7097bSHannes Frederic Sowa 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
491118c3a61cSRoopa Prabhu 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4912ab364a6fSThomas Graf 
4913ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
4914ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4915ab364a6fSThomas Graf 			goto errout;
4916ab364a6fSThomas Graf 
49174e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4918ab364a6fSThomas Graf 	}
4919ab364a6fSThomas Graf 
4920ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
4921ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4922ab364a6fSThomas Graf 			goto errout;
4923ab364a6fSThomas Graf 
49244e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4925ab364a6fSThomas Graf 	}
4926ab364a6fSThomas Graf 
4927ab364a6fSThomas Graf 	if (tb[RTA_IIF])
4928ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
4929ab364a6fSThomas Graf 
4930ab364a6fSThomas Graf 	if (tb[RTA_OIF])
493172331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
4932ab364a6fSThomas Graf 
49332e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
49342e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
49352e47b291SLorenzo Colitti 
4936622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
4937622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4938622ec2c9SLorenzo Colitti 					   nla_get_u32(tb[RTA_UID]));
4939622ec2c9SLorenzo Colitti 	else
4940622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4941622ec2c9SLorenzo Colitti 
4942eacb9384SRoopa Prabhu 	if (tb[RTA_SPORT])
4943eacb9384SRoopa Prabhu 		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4944eacb9384SRoopa Prabhu 
4945eacb9384SRoopa Prabhu 	if (tb[RTA_DPORT])
4946eacb9384SRoopa Prabhu 		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4947eacb9384SRoopa Prabhu 
4948eacb9384SRoopa Prabhu 	if (tb[RTA_IP_PROTO]) {
4949eacb9384SRoopa Prabhu 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
49505e1a99eaSHangbin Liu 						  &fl6.flowi6_proto, AF_INET6,
49515e1a99eaSHangbin Liu 						  extack);
4952eacb9384SRoopa Prabhu 		if (err)
4953eacb9384SRoopa Prabhu 			goto errout;
4954eacb9384SRoopa Prabhu 	}
4955eacb9384SRoopa Prabhu 
4956ab364a6fSThomas Graf 	if (iif) {
4957ab364a6fSThomas Graf 		struct net_device *dev;
495872331bc0SShmulik Ladkani 		int flags = 0;
495972331bc0SShmulik Ladkani 
4960121622dbSFlorian Westphal 		rcu_read_lock();
4961121622dbSFlorian Westphal 
4962121622dbSFlorian Westphal 		dev = dev_get_by_index_rcu(net, iif);
4963ab364a6fSThomas Graf 		if (!dev) {
4964121622dbSFlorian Westphal 			rcu_read_unlock();
4965ab364a6fSThomas Graf 			err = -ENODEV;
4966ab364a6fSThomas Graf 			goto errout;
4967ab364a6fSThomas Graf 		}
496872331bc0SShmulik Ladkani 
496972331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
497072331bc0SShmulik Ladkani 
497172331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
497272331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
497372331bc0SShmulik Ladkani 
4974b75cc8f9SDavid Ahern 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4975121622dbSFlorian Westphal 
4976121622dbSFlorian Westphal 		rcu_read_unlock();
497772331bc0SShmulik Ladkani 	} else {
497872331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
497972331bc0SShmulik Ladkani 
498018c3a61cSRoopa Prabhu 		dst = ip6_route_output(net, NULL, &fl6);
498118c3a61cSRoopa Prabhu 	}
498218c3a61cSRoopa Prabhu 
498318c3a61cSRoopa Prabhu 
498418c3a61cSRoopa Prabhu 	rt = container_of(dst, struct rt6_info, dst);
498518c3a61cSRoopa Prabhu 	if (rt->dst.error) {
498618c3a61cSRoopa Prabhu 		err = rt->dst.error;
498718c3a61cSRoopa Prabhu 		ip6_rt_put(rt);
498818c3a61cSRoopa Prabhu 		goto errout;
4989ab364a6fSThomas Graf 	}
49901da177e4SLinus Torvalds 
49919d6acb3bSWANG Cong 	if (rt == net->ipv6.ip6_null_entry) {
49929d6acb3bSWANG Cong 		err = rt->dst.error;
49939d6acb3bSWANG Cong 		ip6_rt_put(rt);
49949d6acb3bSWANG Cong 		goto errout;
49959d6acb3bSWANG Cong 	}
49969d6acb3bSWANG Cong 
49971da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
499838308473SDavid S. Miller 	if (!skb) {
499994e187c0SAmerigo Wang 		ip6_rt_put(rt);
5000ab364a6fSThomas Graf 		err = -ENOBUFS;
5001ab364a6fSThomas Graf 		goto errout;
5002ab364a6fSThomas Graf 	}
50031da177e4SLinus Torvalds 
5004d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
5005a68886a6SDavid Ahern 
5006a68886a6SDavid Ahern 	rcu_read_lock();
5007a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
5008a68886a6SDavid Ahern 
500918c3a61cSRoopa Prabhu 	if (fibmatch)
5010a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
501118c3a61cSRoopa Prabhu 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
501218c3a61cSRoopa Prabhu 				    nlh->nlmsg_seq, 0);
501318c3a61cSRoopa Prabhu 	else
5014a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5015a68886a6SDavid Ahern 				    &fl6.saddr, iif, RTM_NEWROUTE,
5016d4ead6b3SDavid Ahern 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5017d4ead6b3SDavid Ahern 				    0);
5018a68886a6SDavid Ahern 	rcu_read_unlock();
5019a68886a6SDavid Ahern 
50201da177e4SLinus Torvalds 	if (err < 0) {
5021ab364a6fSThomas Graf 		kfree_skb(skb);
5022ab364a6fSThomas Graf 		goto errout;
50231da177e4SLinus Torvalds 	}
50241da177e4SLinus Torvalds 
502515e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5026ab364a6fSThomas Graf errout:
50271da177e4SLinus Torvalds 	return err;
50281da177e4SLinus Torvalds }
50291da177e4SLinus Torvalds 
50308d1c802bSDavid Ahern void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
503137a1d361SRoopa Prabhu 		     unsigned int nlm_flags)
50321da177e4SLinus Torvalds {
50331da177e4SLinus Torvalds 	struct sk_buff *skb;
50345578689aSDaniel Lezcano 	struct net *net = info->nl_net;
5035528c4cebSDenis V. Lunev 	u32 seq;
5036528c4cebSDenis V. Lunev 	int err;
50370d51aa80SJamal Hadi Salim 
5038528c4cebSDenis V. Lunev 	err = -ENOBUFS;
503938308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
504086872cb5SThomas Graf 
504119e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
504238308473SDavid S. Miller 	if (!skb)
504321713ebcSThomas Graf 		goto errout;
50441da177e4SLinus Torvalds 
5045d4ead6b3SDavid Ahern 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5046f8cfe2ceSDavid Ahern 			    event, info->portid, seq, nlm_flags);
504726932566SPatrick McHardy 	if (err < 0) {
504826932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
504926932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
505026932566SPatrick McHardy 		kfree_skb(skb);
505126932566SPatrick McHardy 		goto errout;
505226932566SPatrick McHardy 	}
505315e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
50545578689aSDaniel Lezcano 		    info->nlh, gfp_any());
50551ce85fe4SPablo Neira Ayuso 	return;
505621713ebcSThomas Graf errout:
505721713ebcSThomas Graf 	if (err < 0)
50585578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
50591da177e4SLinus Torvalds }
50601da177e4SLinus Torvalds 
50618ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
5062351638e7SJiri Pirko 				unsigned long event, void *ptr)
50638ed67789SDaniel Lezcano {
5064351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5065c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
50668ed67789SDaniel Lezcano 
5067242d3a49SWANG Cong 	if (!(dev->flags & IFF_LOOPBACK))
5068242d3a49SWANG Cong 		return NOTIFY_OK;
5069242d3a49SWANG Cong 
5070242d3a49SWANG Cong 	if (event == NETDEV_REGISTER) {
5071ad1601aeSDavid Ahern 		net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
5072d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
50738ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
50748ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5075d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
50768ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5077d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
50788ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
50798ed67789SDaniel Lezcano #endif
508076da0704SWANG Cong 	 } else if (event == NETDEV_UNREGISTER &&
508176da0704SWANG Cong 		    dev->reg_state != NETREG_UNREGISTERED) {
508276da0704SWANG Cong 		/* NETDEV_UNREGISTER could be fired for multiple times by
508376da0704SWANG Cong 		 * netdev_wait_allrefs(). Make sure we only call this once.
508476da0704SWANG Cong 		 */
508512d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5086242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES
508712d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
508812d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5089242d3a49SWANG Cong #endif
50908ed67789SDaniel Lezcano 	}
50918ed67789SDaniel Lezcano 
50928ed67789SDaniel Lezcano 	return NOTIFY_OK;
50938ed67789SDaniel Lezcano }
50948ed67789SDaniel Lezcano 
50951da177e4SLinus Torvalds /*
50961da177e4SLinus Torvalds  *	/proc
50971da177e4SLinus Torvalds  */
50981da177e4SLinus Torvalds 
50991da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
51001da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
51011da177e4SLinus Torvalds {
510269ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
51031da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
510469ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
510569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
510681eb8447SWei Wang 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
510769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
510869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
5109fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
511069ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
51111da177e4SLinus Torvalds 
51121da177e4SLinus Torvalds 	return 0;
51131da177e4SLinus Torvalds }
51141da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
51151da177e4SLinus Torvalds 
51161da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
51171da177e4SLinus Torvalds 
51181da177e4SLinus Torvalds static
5119fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
51201da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
51211da177e4SLinus Torvalds {
5122c486da34SLucian Adrian Grijincu 	struct net *net;
5123c486da34SLucian Adrian Grijincu 	int delay;
5124f0fb9b28SAditya Pakki 	int ret;
5125c486da34SLucian Adrian Grijincu 	if (!write)
5126c486da34SLucian Adrian Grijincu 		return -EINVAL;
5127c486da34SLucian Adrian Grijincu 
5128c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
5129c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
5130f0fb9b28SAditya Pakki 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5131f0fb9b28SAditya Pakki 	if (ret)
5132f0fb9b28SAditya Pakki 		return ret;
5133f0fb9b28SAditya Pakki 
51342ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
51351da177e4SLinus Torvalds 	return 0;
51361da177e4SLinus Torvalds }
51371da177e4SLinus Torvalds 
51387c6bb7d2SDavid Ahern static int zero;
51397c6bb7d2SDavid Ahern static int one = 1;
51407c6bb7d2SDavid Ahern 
5141ed792e28SDavid Ahern static struct ctl_table ipv6_route_table_template[] = {
51421da177e4SLinus Torvalds 	{
51431da177e4SLinus Torvalds 		.procname	=	"flush",
51444990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
51451da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
514689c8b3a1SDave Jones 		.mode		=	0200,
51476d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
51481da177e4SLinus Torvalds 	},
51491da177e4SLinus Torvalds 	{
51501da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
51519a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
51521da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51531da177e4SLinus Torvalds 		.mode		=	0644,
51546d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
51551da177e4SLinus Torvalds 	},
51561da177e4SLinus Torvalds 	{
51571da177e4SLinus Torvalds 		.procname	=	"max_size",
51584990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
51591da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51601da177e4SLinus Torvalds 		.mode		=	0644,
51616d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
51621da177e4SLinus Torvalds 	},
51631da177e4SLinus Torvalds 	{
51641da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
51654990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
51661da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51671da177e4SLinus Torvalds 		.mode		=	0644,
51686d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51691da177e4SLinus Torvalds 	},
51701da177e4SLinus Torvalds 	{
51711da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
51724990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
51731da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51741da177e4SLinus Torvalds 		.mode		=	0644,
51756d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51761da177e4SLinus Torvalds 	},
51771da177e4SLinus Torvalds 	{
51781da177e4SLinus Torvalds 		.procname	=	"gc_interval",
51794990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
51801da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51811da177e4SLinus Torvalds 		.mode		=	0644,
51826d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51831da177e4SLinus Torvalds 	},
51841da177e4SLinus Torvalds 	{
51851da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
51864990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
51871da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51881da177e4SLinus Torvalds 		.mode		=	0644,
5189f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
51901da177e4SLinus Torvalds 	},
51911da177e4SLinus Torvalds 	{
51921da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
51934990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
51941da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
51951da177e4SLinus Torvalds 		.mode		=	0644,
51966d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
51971da177e4SLinus Torvalds 	},
51981da177e4SLinus Torvalds 	{
51991da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
52004990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
52011da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52021da177e4SLinus Torvalds 		.mode		=	0644,
5203f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
52041da177e4SLinus Torvalds 	},
52051da177e4SLinus Torvalds 	{
52061da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
52074990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
52081da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52091da177e4SLinus Torvalds 		.mode		=	0644,
52106d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
52111da177e4SLinus Torvalds 	},
52127c6bb7d2SDavid Ahern 	{
52137c6bb7d2SDavid Ahern 		.procname	=	"skip_notify_on_dev_down",
52147c6bb7d2SDavid Ahern 		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
52157c6bb7d2SDavid Ahern 		.maxlen		=	sizeof(int),
52167c6bb7d2SDavid Ahern 		.mode		=	0644,
52177c6bb7d2SDavid Ahern 		.proc_handler	=	proc_dointvec,
52187c6bb7d2SDavid Ahern 		.extra1		=	&zero,
52197c6bb7d2SDavid Ahern 		.extra2		=	&one,
52207c6bb7d2SDavid Ahern 	},
5221f8572d8fSEric W. Biederman 	{ }
52221da177e4SLinus Torvalds };
52231da177e4SLinus Torvalds 
52242c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5225760f2d01SDaniel Lezcano {
5226760f2d01SDaniel Lezcano 	struct ctl_table *table;
5227760f2d01SDaniel Lezcano 
5228760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
5229760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
5230760f2d01SDaniel Lezcano 			GFP_KERNEL);
52315ee09105SYOSHIFUJI Hideaki 
52325ee09105SYOSHIFUJI Hideaki 	if (table) {
52335ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
5234c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
523586393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
52365ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
52375ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52385ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
52395ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
52405ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
52415ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
52425ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
52439c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52447c6bb7d2SDavid Ahern 		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5245464dc801SEric W. Biederman 
5246464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
5247464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
5248464dc801SEric W. Biederman 			table[0].procname = NULL;
52495ee09105SYOSHIFUJI Hideaki 	}
52505ee09105SYOSHIFUJI Hideaki 
5251760f2d01SDaniel Lezcano 	return table;
5252760f2d01SDaniel Lezcano }
52531da177e4SLinus Torvalds #endif
52541da177e4SLinus Torvalds 
52552c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
5256cdb18761SDaniel Lezcano {
5257633d424bSPavel Emelyanov 	int ret = -ENOMEM;
52588ed67789SDaniel Lezcano 
525986393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
526086393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
5261f2fc6a54SBenjamin Thery 
5262fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5263fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
5264fc66f95cSEric Dumazet 
5265421842edSDavid Ahern 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5266421842edSDavid Ahern 					    sizeof(*net->ipv6.fib6_null_entry),
5267421842edSDavid Ahern 					    GFP_KERNEL);
5268421842edSDavid Ahern 	if (!net->ipv6.fib6_null_entry)
5269421842edSDavid Ahern 		goto out_ip6_dst_entries;
5270421842edSDavid Ahern 
52718ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
52728ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
52738ed67789SDaniel Lezcano 					   GFP_KERNEL);
52748ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
5275421842edSDavid Ahern 		goto out_fib6_null_entry;
5276d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
527762fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
527862fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
52798ed67789SDaniel Lezcano 
52808ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5281feca7d8cSVincent Bernat 	net->ipv6.fib6_has_custom_rules = false;
52828ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
52838ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
52848ed67789SDaniel Lezcano 					       GFP_KERNEL);
528568fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
528668fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
5287d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
528862fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
528962fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
52908ed67789SDaniel Lezcano 
52918ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
52928ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
52938ed67789SDaniel Lezcano 					       GFP_KERNEL);
529468fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
529568fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
5296d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
529762fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
529862fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
52998ed67789SDaniel Lezcano #endif
53008ed67789SDaniel Lezcano 
5301b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
5302b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5303b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5304b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5305b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5306b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5307b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5308b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
53097c6bb7d2SDavid Ahern 	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5310b339a47cSPeter Zijlstra 
53116891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
53126891a346SBenjamin Thery 
53138ed67789SDaniel Lezcano 	ret = 0;
53148ed67789SDaniel Lezcano out:
53158ed67789SDaniel Lezcano 	return ret;
5316f2fc6a54SBenjamin Thery 
531768fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
531868fffc67SPeter Zijlstra out_ip6_prohibit_entry:
531968fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
532068fffc67SPeter Zijlstra out_ip6_null_entry:
532168fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
532268fffc67SPeter Zijlstra #endif
5323421842edSDavid Ahern out_fib6_null_entry:
5324421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
5325fc66f95cSEric Dumazet out_ip6_dst_entries:
5326fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5327f2fc6a54SBenjamin Thery out_ip6_dst_ops:
5328f2fc6a54SBenjamin Thery 	goto out;
5329cdb18761SDaniel Lezcano }
5330cdb18761SDaniel Lezcano 
53312c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
5332cdb18761SDaniel Lezcano {
5333421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
53348ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
53358ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
53368ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
53378ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
53388ed67789SDaniel Lezcano #endif
533941bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5340cdb18761SDaniel Lezcano }
5341cdb18761SDaniel Lezcano 
5342d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
5343d189634eSThomas Graf {
5344d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5345c3506372SChristoph Hellwig 	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5346c3506372SChristoph Hellwig 			sizeof(struct ipv6_route_iter));
53473617d949SChristoph Hellwig 	proc_create_net_single("rt6_stats", 0444, net->proc_net,
53483617d949SChristoph Hellwig 			rt6_stats_seq_show, NULL);
5349d189634eSThomas Graf #endif
5350d189634eSThomas Graf 	return 0;
5351d189634eSThomas Graf }
5352d189634eSThomas Graf 
5353d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
5354d189634eSThomas Graf {
5355d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5356ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
5357ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
5358d189634eSThomas Graf #endif
5359d189634eSThomas Graf }
5360d189634eSThomas Graf 
5361cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
5362cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
5363cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
5364cdb18761SDaniel Lezcano };
5365cdb18761SDaniel Lezcano 
5366c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
5367c3426b47SDavid S. Miller {
5368c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5369c3426b47SDavid S. Miller 
5370c3426b47SDavid S. Miller 	if (!bp)
5371c3426b47SDavid S. Miller 		return -ENOMEM;
5372c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
5373c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
5374c3426b47SDavid S. Miller 	return 0;
5375c3426b47SDavid S. Miller }
5376c3426b47SDavid S. Miller 
5377c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
5378c3426b47SDavid S. Miller {
5379c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
5380c3426b47SDavid S. Miller 
5381c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
538256a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
5383c3426b47SDavid S. Miller 	kfree(bp);
5384c3426b47SDavid S. Miller }
5385c3426b47SDavid S. Miller 
53862b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
5387c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
5388c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
5389c3426b47SDavid S. Miller };
5390c3426b47SDavid S. Miller 
5391d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
5392d189634eSThomas Graf 	.init = ip6_route_net_init_late,
5393d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
5394d189634eSThomas Graf };
5395d189634eSThomas Graf 
53968ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
53978ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
5398242d3a49SWANG Cong 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
53998ed67789SDaniel Lezcano };
54008ed67789SDaniel Lezcano 
54012f460933SWANG Cong void __init ip6_route_init_special_entries(void)
54022f460933SWANG Cong {
54032f460933SWANG Cong 	/* Registering of the loopback is done before this portion of code,
54042f460933SWANG Cong 	 * the loopback reference in rt6_info will not be taken, do it
54052f460933SWANG Cong 	 * manually for init_net */
5406ad1601aeSDavid Ahern 	init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
54072f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
54082f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54092f460933SWANG Cong   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
54102f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
54112f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54122f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
54132f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54142f460933SWANG Cong   #endif
54152f460933SWANG Cong }
54162f460933SWANG Cong 
5417433d49c3SDaniel Lezcano int __init ip6_route_init(void)
54181da177e4SLinus Torvalds {
5419433d49c3SDaniel Lezcano 	int ret;
54208d0b94afSMartin KaFai Lau 	int cpu;
5421433d49c3SDaniel Lezcano 
54229a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
54239a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
54249a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
54259a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
54269a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
5427c19a28e1SFernando Carrijo 		goto out;
542814e50e57SDavid S. Miller 
5429fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
54308ed67789SDaniel Lezcano 	if (ret)
5431bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
5432bdb3289fSDaniel Lezcano 
5433c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5434c3426b47SDavid S. Miller 	if (ret)
5435e8803b6cSDavid S. Miller 		goto out_dst_entries;
54362a0c451aSThomas Graf 
54377e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
54387e52b33bSDavid S. Miller 	if (ret)
54397e52b33bSDavid S. Miller 		goto out_register_inetpeer;
5440c3426b47SDavid S. Miller 
54415dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
54425dc121e9SArnaud Ebalard 
5443e8803b6cSDavid S. Miller 	ret = fib6_init();
5444433d49c3SDaniel Lezcano 	if (ret)
54458ed67789SDaniel Lezcano 		goto out_register_subsys;
5446433d49c3SDaniel Lezcano 
5447433d49c3SDaniel Lezcano 	ret = xfrm6_init();
5448433d49c3SDaniel Lezcano 	if (ret)
5449e8803b6cSDavid S. Miller 		goto out_fib6_init;
5450c35b7e72SDaniel Lezcano 
5451433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
5452433d49c3SDaniel Lezcano 	if (ret)
5453433d49c3SDaniel Lezcano 		goto xfrm6_init;
54547e5449c2SDaniel Lezcano 
5455d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5456d189634eSThomas Graf 	if (ret)
5457d189634eSThomas Graf 		goto fib6_rules_init;
5458d189634eSThomas Graf 
545916feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
546016feebcfSFlorian Westphal 				   inet6_rtm_newroute, NULL, 0);
546116feebcfSFlorian Westphal 	if (ret < 0)
546216feebcfSFlorian Westphal 		goto out_register_late_subsys;
546316feebcfSFlorian Westphal 
546416feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
546516feebcfSFlorian Westphal 				   inet6_rtm_delroute, NULL, 0);
546616feebcfSFlorian Westphal 	if (ret < 0)
546716feebcfSFlorian Westphal 		goto out_register_late_subsys;
546816feebcfSFlorian Westphal 
546916feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
547016feebcfSFlorian Westphal 				   inet6_rtm_getroute, NULL,
547116feebcfSFlorian Westphal 				   RTNL_FLAG_DOIT_UNLOCKED);
547216feebcfSFlorian Westphal 	if (ret < 0)
5473d189634eSThomas Graf 		goto out_register_late_subsys;
5474433d49c3SDaniel Lezcano 
54758ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5476cdb18761SDaniel Lezcano 	if (ret)
5477d189634eSThomas Graf 		goto out_register_late_subsys;
54788ed67789SDaniel Lezcano 
54798d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
54808d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
54818d0b94afSMartin KaFai Lau 
54828d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
54838d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
54848d0b94afSMartin KaFai Lau 	}
54858d0b94afSMartin KaFai Lau 
5486433d49c3SDaniel Lezcano out:
5487433d49c3SDaniel Lezcano 	return ret;
5488433d49c3SDaniel Lezcano 
5489d189634eSThomas Graf out_register_late_subsys:
549016feebcfSFlorian Westphal 	rtnl_unregister_all(PF_INET6);
5491d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5492433d49c3SDaniel Lezcano fib6_rules_init:
5493433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
5494433d49c3SDaniel Lezcano xfrm6_init:
5495433d49c3SDaniel Lezcano 	xfrm6_fini();
54962a0c451aSThomas Graf out_fib6_init:
54972a0c451aSThomas Graf 	fib6_gc_cleanup();
54988ed67789SDaniel Lezcano out_register_subsys:
54998ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
55007e52b33bSDavid S. Miller out_register_inetpeer:
55017e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5502fc66f95cSEric Dumazet out_dst_entries:
5503fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5504433d49c3SDaniel Lezcano out_kmem_cache:
5505f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5506433d49c3SDaniel Lezcano 	goto out;
55071da177e4SLinus Torvalds }
55081da177e4SLinus Torvalds 
55091da177e4SLinus Torvalds void ip6_route_cleanup(void)
55101da177e4SLinus Torvalds {
55118ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5512d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5513101367c2SThomas Graf 	fib6_rules_cleanup();
55141da177e4SLinus Torvalds 	xfrm6_fini();
55151da177e4SLinus Torvalds 	fib6_gc_cleanup();
5516c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
55178ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
551841bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5519f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
55201da177e4SLinus Torvalds }
5521