xref: /openbmc/linux/net/ipv6/route.c (revision eea68cd371a8fa7d8dbfcf75bc076e8379526119)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
4735732d01SWei Wang #include <linux/jhash.h>
48457c4cbcSEric W. Biederman #include <net/net_namespace.h>
491da177e4SLinus Torvalds #include <net/snmp.h>
501da177e4SLinus Torvalds #include <net/ipv6.h>
511da177e4SLinus Torvalds #include <net/ip6_fib.h>
521da177e4SLinus Torvalds #include <net/ip6_route.h>
531da177e4SLinus Torvalds #include <net/ndisc.h>
541da177e4SLinus Torvalds #include <net/addrconf.h>
551da177e4SLinus Torvalds #include <net/tcp.h>
561da177e4SLinus Torvalds #include <linux/rtnetlink.h>
571da177e4SLinus Torvalds #include <net/dst.h>
58904af04dSJiri Benc #include <net/dst_metadata.h>
591da177e4SLinus Torvalds #include <net/xfrm.h>
608d71740cSTom Tucker #include <net/netevent.h>
6121713ebcSThomas Graf #include <net/netlink.h>
6251ebd318SNicolas Dichtel #include <net/nexthop.h>
6319e42e45SRoopa Prabhu #include <net/lwtunnel.h>
64904af04dSJiri Benc #include <net/ip_tunnels.h>
65ca254490SDavid Ahern #include <net/l3mdev.h>
66b811580dSDavid Ahern #include <trace/events/fib6.h>
671da177e4SLinus Torvalds 
687c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
711da177e4SLinus Torvalds #include <linux/sysctl.h>
721da177e4SLinus Torvalds #endif
731da177e4SLinus Torvalds 
74afc154e9SHannes Frederic Sowa enum rt6_nud_state {
757e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
767e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
777e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
78afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
79afc154e9SHannes Frederic Sowa };
80afc154e9SHannes Frederic Sowa 
811da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
820dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
841da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
851da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
861da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
871da177e4SLinus Torvalds 				       struct net_device *dev, int how);
88569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
91ede2059dSEric W. Biederman static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
927150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
93ede2059dSEric W. Biederman static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
941da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
956700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
966700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
976700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
986700c270SDavid S. Miller 					struct sk_buff *skb);
998d1c802bSDavid Ahern static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
1008d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt);
101d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
1028d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
103d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
10416a16cd3SDavid Ahern 			 int iif, int type, u32 portid, u32 seq,
10516a16cd3SDavid Ahern 			 unsigned int flags);
1068d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
10735732d01SWei Wang 					   struct in6_addr *daddr,
10835732d01SWei Wang 					   struct in6_addr *saddr);
1091da177e4SLinus Torvalds 
11070ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1118d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
112b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
113830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
114830218c1SDavid Ahern 					   struct net_device *dev,
11595c96174SEric Dumazet 					   unsigned int pref);
1168d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
117b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
118830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
119830218c1SDavid Ahern 					   struct net_device *dev);
12070ceb4f5SYOSHIFUJI Hideaki #endif
12170ceb4f5SYOSHIFUJI Hideaki 
1228d0b94afSMartin KaFai Lau struct uncached_list {
1238d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1248d0b94afSMartin KaFai Lau 	struct list_head	head;
1258d0b94afSMartin KaFai Lau };
1268d0b94afSMartin KaFai Lau 
1278d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1288d0b94afSMartin KaFai Lau 
129510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt)
1308d0b94afSMartin KaFai Lau {
1318d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1328d0b94afSMartin KaFai Lau 
1338d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1348d0b94afSMartin KaFai Lau 
1358d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1368d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1378d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1388d0b94afSMartin KaFai Lau }
1398d0b94afSMartin KaFai Lau 
140510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt)
1418d0b94afSMartin KaFai Lau {
1428d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1438d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
14481eb8447SWei Wang 		struct net *net = dev_net(rt->dst.dev);
1458d0b94afSMartin KaFai Lau 
1468d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1478d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
14881eb8447SWei Wang 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
1498d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1508d0b94afSMartin KaFai Lau 	}
1518d0b94afSMartin KaFai Lau }
1528d0b94afSMartin KaFai Lau 
1538d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1548d0b94afSMartin KaFai Lau {
1558d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1568d0b94afSMartin KaFai Lau 	int cpu;
1578d0b94afSMartin KaFai Lau 
158e332bc67SEric W. Biederman 	if (dev == loopback_dev)
159e332bc67SEric W. Biederman 		return;
160e332bc67SEric W. Biederman 
1618d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1628d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1638d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1648d0b94afSMartin KaFai Lau 
1658d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1668d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1678d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1688d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1698d0b94afSMartin KaFai Lau 
170e332bc67SEric W. Biederman 			if (rt_idev->dev == dev) {
1718d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1728d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1738d0b94afSMartin KaFai Lau 			}
1748d0b94afSMartin KaFai Lau 
175e332bc67SEric W. Biederman 			if (rt_dev == dev) {
1768d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1778d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1788d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1798d0b94afSMartin KaFai Lau 			}
1808d0b94afSMartin KaFai Lau 		}
1818d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1828d0b94afSMartin KaFai Lau 	}
1838d0b94afSMartin KaFai Lau }
1848d0b94afSMartin KaFai Lau 
185f8a1b43bSDavid Ahern static inline const void *choose_neigh_daddr(const struct in6_addr *p,
186f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
187f894cbf8SDavid S. Miller 					     const void *daddr)
18839232973SDavid S. Miller {
189a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19039232973SDavid S. Miller 		return (const void *) p;
191f894cbf8SDavid S. Miller 	else if (skb)
192f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
19339232973SDavid S. Miller 	return daddr;
19439232973SDavid S. Miller }
19539232973SDavid S. Miller 
196f8a1b43bSDavid Ahern struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
197f8a1b43bSDavid Ahern 				   struct net_device *dev,
198f894cbf8SDavid S. Miller 				   struct sk_buff *skb,
199f894cbf8SDavid S. Miller 				   const void *daddr)
200d3aaeb38SDavid S. Miller {
20139232973SDavid S. Miller 	struct neighbour *n;
20239232973SDavid S. Miller 
203f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(gw, skb, daddr);
204f8a1b43bSDavid Ahern 	n = __ipv6_neigh_lookup(dev, daddr);
205f83c7790SDavid S. Miller 	if (n)
206f83c7790SDavid S. Miller 		return n;
207f8a1b43bSDavid Ahern 	return neigh_create(&nd_tbl, daddr, dev);
208f8a1b43bSDavid Ahern }
209f8a1b43bSDavid Ahern 
210f8a1b43bSDavid Ahern static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
211f8a1b43bSDavid Ahern 					      struct sk_buff *skb,
212f8a1b43bSDavid Ahern 					      const void *daddr)
213f8a1b43bSDavid Ahern {
214f8a1b43bSDavid Ahern 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
215f8a1b43bSDavid Ahern 
216f8a1b43bSDavid Ahern 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
217f83c7790SDavid S. Miller }
218f83c7790SDavid S. Miller 
21963fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
22063fca65dSJulian Anastasov {
22163fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
22263fca65dSJulian Anastasov 	struct rt6_info *rt = (struct rt6_info *)dst;
22363fca65dSJulian Anastasov 
224f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
22563fca65dSJulian Anastasov 	if (!daddr)
22663fca65dSJulian Anastasov 		return;
22763fca65dSJulian Anastasov 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
22863fca65dSJulian Anastasov 		return;
22963fca65dSJulian Anastasov 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
23063fca65dSJulian Anastasov 		return;
23163fca65dSJulian Anastasov 	__ipv6_confirm_neigh(dev, daddr);
23263fca65dSJulian Anastasov }
23363fca65dSJulian Anastasov 
2349a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2351da177e4SLinus Torvalds 	.family			=	AF_INET6,
2361da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2371da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2381da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2390dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
240ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
241d4ead6b3SDavid Ahern 	.cow_metrics		=	dst_cow_metrics_generic,
2421da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2431da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2441da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2451da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2461da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2476e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2489f8955ccSEric W. Biederman 	.local_out		=	__ip6_local_out,
249f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
25063fca65dSJulian Anastasov 	.confirm_neigh		=	ip6_confirm_neigh,
2511da177e4SLinus Torvalds };
2521da177e4SLinus Torvalds 
253ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
254ec831ea7SRoland Dreier {
255618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
256618f9bc7SSteffen Klassert 
257618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
258ec831ea7SRoland Dreier }
259ec831ea7SRoland Dreier 
2606700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2616700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
26214e50e57SDavid S. Miller {
26314e50e57SDavid S. Miller }
26414e50e57SDavid S. Miller 
2656700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2666700c270SDavid S. Miller 				      struct sk_buff *skb)
267b587ee3bSDavid S. Miller {
268b587ee3bSDavid S. Miller }
269b587ee3bSDavid S. Miller 
27014e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
27114e50e57SDavid S. Miller 	.family			=	AF_INET6,
27214e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
27314e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
274ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
275214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
27614e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
277b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2780a1f5962SMartin KaFai Lau 	.cow_metrics		=	dst_cow_metrics_generic,
279f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
28014e50e57SDavid S. Miller };
28114e50e57SDavid S. Miller 
28262fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
28314edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
28462fa8a84SDavid S. Miller };
28562fa8a84SDavid S. Miller 
2868d1c802bSDavid Ahern static const struct fib6_info fib6_null_entry_template = {
28793c2fb25SDavid Ahern 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
28893c2fb25SDavid Ahern 	.fib6_protocol  = RTPROT_KERNEL,
28993c2fb25SDavid Ahern 	.fib6_metric	= ~(u32)0,
29093c2fb25SDavid Ahern 	.fib6_ref	= ATOMIC_INIT(1),
291421842edSDavid Ahern 	.fib6_type	= RTN_UNREACHABLE,
292421842edSDavid Ahern 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
293421842edSDavid Ahern };
294421842edSDavid Ahern 
295fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
2961da177e4SLinus Torvalds 	.dst = {
2971da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
2981da177e4SLinus Torvalds 		.__use		= 1,
2992c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
3001da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
3011da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
3021da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
3031da177e4SLinus Torvalds 	},
3041da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3051da177e4SLinus Torvalds };
3061da177e4SLinus Torvalds 
307101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
308101367c2SThomas Graf 
309fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
310101367c2SThomas Graf 	.dst = {
311101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
312101367c2SThomas Graf 		.__use		= 1,
3132c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
314101367c2SThomas Graf 		.error		= -EACCES,
3159ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
3169ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
317101367c2SThomas Graf 	},
318101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
319101367c2SThomas Graf };
320101367c2SThomas Graf 
321fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
322101367c2SThomas Graf 	.dst = {
323101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
324101367c2SThomas Graf 		.__use		= 1,
3252c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
326101367c2SThomas Graf 		.error		= -EINVAL,
327352e512cSHerbert Xu 		.input		= dst_discard,
328ede2059dSEric W. Biederman 		.output		= dst_discard_out,
329101367c2SThomas Graf 	},
330101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
331101367c2SThomas Graf };
332101367c2SThomas Graf 
333101367c2SThomas Graf #endif
334101367c2SThomas Graf 
335ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt)
336ebfa45f0SMartin KaFai Lau {
337ebfa45f0SMartin KaFai Lau 	struct dst_entry *dst = &rt->dst;
338ebfa45f0SMartin KaFai Lau 
339ebfa45f0SMartin KaFai Lau 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
340ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_uncached);
341ebfa45f0SMartin KaFai Lau }
342ebfa45f0SMartin KaFai Lau 
3431da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
34493531c67SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
345ad706862SMartin KaFai Lau 			       int flags)
3461da177e4SLinus Torvalds {
34797bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
348b2a9c0edSWei Wang 					1, DST_OBSOLETE_FORCE_CHK, flags);
349cf911662SDavid S. Miller 
35081eb8447SWei Wang 	if (rt) {
351ebfa45f0SMartin KaFai Lau 		rt6_info_init(rt);
35281eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
35381eb8447SWei Wang 	}
3548104891bSSteffen Klassert 
355cf911662SDavid S. Miller 	return rt;
3561da177e4SLinus Torvalds }
3579ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc);
358d52d3997SMartin KaFai Lau 
3591da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3601da177e4SLinus Torvalds {
3611da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3628d1c802bSDavid Ahern 	struct fib6_info *from = rt->from;
3638d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3641da177e4SLinus Torvalds 
3658e2ec639SYan, Zheng 	dst_destroy_metrics_generic(dst);
3668d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3678d0b94afSMartin KaFai Lau 
3688d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
36938308473SDavid S. Miller 	if (idev) {
3701da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3711da177e4SLinus Torvalds 		in6_dev_put(idev);
3721da177e4SLinus Torvalds 	}
373d4ead6b3SDavid Ahern 
3743a2232e9SDavid Miller 	rt->from = NULL;
37593531c67SDavid Ahern 	fib6_info_release(from);
376b3419363SDavid S. Miller }
377b3419363SDavid S. Miller 
3781da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3791da177e4SLinus Torvalds 			   int how)
3801da177e4SLinus Torvalds {
3811da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3821da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3835a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
384c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3851da177e4SLinus Torvalds 
386e5645f51SWei Wang 	if (idev && idev->dev != loopback_dev) {
387e5645f51SWei Wang 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
38838308473SDavid S. Miller 		if (loopback_idev) {
3891da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
3901da177e4SLinus Torvalds 			in6_dev_put(idev);
3911da177e4SLinus Torvalds 		}
3921da177e4SLinus Torvalds 	}
39397cac082SDavid S. Miller }
3941da177e4SLinus Torvalds 
3955973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt)
3965973fb1eSMartin KaFai Lau {
3975973fb1eSMartin KaFai Lau 	if (rt->rt6i_flags & RTF_EXPIRES)
3985973fb1eSMartin KaFai Lau 		return time_after(jiffies, rt->dst.expires);
3995973fb1eSMartin KaFai Lau 	else
4005973fb1eSMartin KaFai Lau 		return false;
4015973fb1eSMartin KaFai Lau }
4025973fb1eSMartin KaFai Lau 
403a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4041da177e4SLinus Torvalds {
4051716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4061716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
407a50feda5SEric Dumazet 			return true;
4083a2232e9SDavid Miller 	} else if (rt->from) {
4091e2ea8adSXin Long 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
41014895687SDavid Ahern 			fib6_check_expired(rt->from);
4111716a961SGao feng 	}
412a50feda5SEric Dumazet 	return false;
4131da177e4SLinus Torvalds }
4141da177e4SLinus Torvalds 
4158d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_select(const struct net *net,
4168d1c802bSDavid Ahern 					      struct fib6_info *match,
41752bd4c0cSNicolas Dichtel 					     struct flowi6 *fl6, int oif,
418b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
41952bd4c0cSNicolas Dichtel 					     int strict)
42051ebd318SNicolas Dichtel {
4218d1c802bSDavid Ahern 	struct fib6_info *sibling, *next_sibling;
42251ebd318SNicolas Dichtel 
423b673d6ccSJakub Sitnicki 	/* We might have already computed the hash for ICMPv6 errors. In such
424b673d6ccSJakub Sitnicki 	 * case it will always be non-zero. Otherwise now is the time to do it.
425b673d6ccSJakub Sitnicki 	 */
426b673d6ccSJakub Sitnicki 	if (!fl6->mp_hash)
427b4bac172SDavid Ahern 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
428b673d6ccSJakub Sitnicki 
4295e670d84SDavid Ahern 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
4303d709f69SIdo Schimmel 		return match;
431bbfcd776SIdo Schimmel 
43293c2fb25SDavid Ahern 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
43393c2fb25SDavid Ahern 				 fib6_siblings) {
4345e670d84SDavid Ahern 		int nh_upper_bound;
4355e670d84SDavid Ahern 
4365e670d84SDavid Ahern 		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
4375e670d84SDavid Ahern 		if (fl6->mp_hash > nh_upper_bound)
4383d709f69SIdo Schimmel 			continue;
43952bd4c0cSNicolas Dichtel 		if (rt6_score_route(sibling, oif, strict) < 0)
44052bd4c0cSNicolas Dichtel 			break;
44151ebd318SNicolas Dichtel 		match = sibling;
44251ebd318SNicolas Dichtel 		break;
44351ebd318SNicolas Dichtel 	}
4443d709f69SIdo Schimmel 
44551ebd318SNicolas Dichtel 	return match;
44651ebd318SNicolas Dichtel }
44751ebd318SNicolas Dichtel 
4481da177e4SLinus Torvalds /*
44966f5d6ceSWei Wang  *	Route lookup. rcu_read_lock() should be held.
4501da177e4SLinus Torvalds  */
4511da177e4SLinus Torvalds 
4528d1c802bSDavid Ahern static inline struct fib6_info *rt6_device_match(struct net *net,
4538d1c802bSDavid Ahern 						 struct fib6_info *rt,
454b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4551da177e4SLinus Torvalds 						    int oif,
456d420895eSYOSHIFUJI Hideaki 						    int flags)
4571da177e4SLinus Torvalds {
4588d1c802bSDavid Ahern 	struct fib6_info *sprt;
4591da177e4SLinus Torvalds 
4605e670d84SDavid Ahern 	if (!oif && ipv6_addr_any(saddr) &&
4615e670d84SDavid Ahern 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
4628067bb8cSIdo Schimmel 		return rt;
463dd3abc4eSYOSHIFUJI Hideaki 
464071fb37eSDavid Miller 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
4655e670d84SDavid Ahern 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
466dd3abc4eSYOSHIFUJI Hideaki 
4675e670d84SDavid Ahern 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
4688067bb8cSIdo Schimmel 			continue;
4698067bb8cSIdo Schimmel 
470dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
4711da177e4SLinus Torvalds 			if (dev->ifindex == oif)
4721da177e4SLinus Torvalds 				return sprt;
473dd3abc4eSYOSHIFUJI Hideaki 		} else {
474dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
475dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
476dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
477dd3abc4eSYOSHIFUJI Hideaki 		}
4781da177e4SLinus Torvalds 	}
4791da177e4SLinus Torvalds 
480*eea68cd3SDavid Ahern 	if (oif && flags & RT6_LOOKUP_F_IFACE)
481421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
4828067bb8cSIdo Schimmel 
483421842edSDavid Ahern 	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
4841da177e4SLinus Torvalds }
4851da177e4SLinus Torvalds 
48627097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
487c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
488c2f17e82SHannes Frederic Sowa 	struct work_struct work;
489c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
490c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
491c2f17e82SHannes Frederic Sowa };
492c2f17e82SHannes Frederic Sowa 
493c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
494c2f17e82SHannes Frederic Sowa {
495c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
496c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
497c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
498c2f17e82SHannes Frederic Sowa 
499c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
500adc176c5SErik Nordmark 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
501c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
502662f5533SMichael Büsch 	kfree(work);
503c2f17e82SHannes Frederic Sowa }
504c2f17e82SHannes Frederic Sowa 
5058d1c802bSDavid Ahern static void rt6_probe(struct fib6_info *rt)
50627097255SYOSHIFUJI Hideaki {
507990edb42SMartin KaFai Lau 	struct __rt6_probe_work *work;
5085e670d84SDavid Ahern 	const struct in6_addr *nh_gw;
509f2c31e32SEric Dumazet 	struct neighbour *neigh;
5105e670d84SDavid Ahern 	struct net_device *dev;
5115e670d84SDavid Ahern 
51227097255SYOSHIFUJI Hideaki 	/*
51327097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
51427097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
51527097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
51627097255SYOSHIFUJI Hideaki 	 *
51727097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
51827097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
51927097255SYOSHIFUJI Hideaki 	 */
52093c2fb25SDavid Ahern 	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
521fdd6681dSAmerigo Wang 		return;
5225e670d84SDavid Ahern 
5235e670d84SDavid Ahern 	nh_gw = &rt->fib6_nh.nh_gw;
5245e670d84SDavid Ahern 	dev = rt->fib6_nh.nh_dev;
5252152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
5265e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
5272152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5288d6c31bfSMartin KaFai Lau 		if (neigh->nud_state & NUD_VALID)
5298d6c31bfSMartin KaFai Lau 			goto out;
5308d6c31bfSMartin KaFai Lau 
531990edb42SMartin KaFai Lau 		work = NULL;
5322152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
533990edb42SMartin KaFai Lau 		if (!(neigh->nud_state & NUD_VALID) &&
534990edb42SMartin KaFai Lau 		    time_after(jiffies,
535990edb42SMartin KaFai Lau 			       neigh->updated +
53693c2fb25SDavid Ahern 			       rt->fib6_idev->cnf.rtr_probe_interval)) {
537c2f17e82SHannes Frederic Sowa 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
538990edb42SMartin KaFai Lau 			if (work)
5397e980569SJiri Benc 				__neigh_set_probe_once(neigh);
540990edb42SMartin KaFai Lau 		}
541c2f17e82SHannes Frederic Sowa 		write_unlock(&neigh->lock);
542990edb42SMartin KaFai Lau 	} else {
543990edb42SMartin KaFai Lau 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
544990edb42SMartin KaFai Lau 	}
545c2f17e82SHannes Frederic Sowa 
546c2f17e82SHannes Frederic Sowa 	if (work) {
547c2f17e82SHannes Frederic Sowa 		INIT_WORK(&work->work, rt6_probe_deferred);
5485e670d84SDavid Ahern 		work->target = *nh_gw;
5495e670d84SDavid Ahern 		dev_hold(dev);
5505e670d84SDavid Ahern 		work->dev = dev;
551c2f17e82SHannes Frederic Sowa 		schedule_work(&work->work);
552c2f17e82SHannes Frederic Sowa 	}
553990edb42SMartin KaFai Lau 
5548d6c31bfSMartin KaFai Lau out:
5552152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
556f2c31e32SEric Dumazet }
55727097255SYOSHIFUJI Hideaki #else
5588d1c802bSDavid Ahern static inline void rt6_probe(struct fib6_info *rt)
55927097255SYOSHIFUJI Hideaki {
56027097255SYOSHIFUJI Hideaki }
56127097255SYOSHIFUJI Hideaki #endif
56227097255SYOSHIFUJI Hideaki 
5631da177e4SLinus Torvalds /*
564554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
5651da177e4SLinus Torvalds  */
5668d1c802bSDavid Ahern static inline int rt6_check_dev(struct fib6_info *rt, int oif)
5671da177e4SLinus Torvalds {
5685e670d84SDavid Ahern 	const struct net_device *dev = rt->fib6_nh.nh_dev;
5695e670d84SDavid Ahern 
570161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
571554cfb7eSYOSHIFUJI Hideaki 		return 2;
572554cfb7eSYOSHIFUJI Hideaki 	return 0;
5731da177e4SLinus Torvalds }
5741da177e4SLinus Torvalds 
5758d1c802bSDavid Ahern static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
5761da177e4SLinus Torvalds {
577afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5785e670d84SDavid Ahern 	struct neighbour *neigh;
579f2c31e32SEric Dumazet 
58093c2fb25SDavid Ahern 	if (rt->fib6_flags & RTF_NONEXTHOP ||
58193c2fb25SDavid Ahern 	    !(rt->fib6_flags & RTF_GATEWAY))
582afc154e9SHannes Frederic Sowa 		return RT6_NUD_SUCCEED;
583145a3621SYOSHIFUJI Hideaki / 吉藤英明 
584145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
5855e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
5865e670d84SDavid Ahern 					  &rt->fib6_nh.nh_gw);
587145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
588145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
589554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
590afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
591398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
592a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
593afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
5947e980569SJiri Benc 		else
5957e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
596398bcbebSYOSHIFUJI Hideaki #endif
597145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
598afc154e9SHannes Frederic Sowa 	} else {
599afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6007e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
601a5a81f0bSPaul Marks 	}
602145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
603145a3621SYOSHIFUJI Hideaki / 吉藤英明 
604a5a81f0bSPaul Marks 	return ret;
6051da177e4SLinus Torvalds }
6061da177e4SLinus Torvalds 
6078d1c802bSDavid Ahern static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
608554cfb7eSYOSHIFUJI Hideaki {
609a5a81f0bSPaul Marks 	int m;
6104d0c5911SYOSHIFUJI Hideaki 
6114d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
61277d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
613afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
614ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
61593c2fb25SDavid Ahern 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
616ebacaaa0SYOSHIFUJI Hideaki #endif
617afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE) {
618afc154e9SHannes Frederic Sowa 		int n = rt6_check_neigh(rt);
619afc154e9SHannes Frederic Sowa 		if (n < 0)
620afc154e9SHannes Frederic Sowa 			return n;
621afc154e9SHannes Frederic Sowa 	}
622554cfb7eSYOSHIFUJI Hideaki 	return m;
623554cfb7eSYOSHIFUJI Hideaki }
624554cfb7eSYOSHIFUJI Hideaki 
6258d1c802bSDavid Ahern static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
6268d1c802bSDavid Ahern 				   int *mpri, struct fib6_info *match,
627afc154e9SHannes Frederic Sowa 				   bool *do_rr)
628554cfb7eSYOSHIFUJI Hideaki {
629554cfb7eSYOSHIFUJI Hideaki 	int m;
630afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
63193c2fb25SDavid Ahern 	struct inet6_dev *idev = rt->fib6_idev;
63235103d11SAndy Gospodarek 
6335e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
6348067bb8cSIdo Schimmel 		goto out;
6358067bb8cSIdo Schimmel 
63614c5206cSIdo Schimmel 	if (idev->cnf.ignore_routes_with_linkdown &&
6375e670d84SDavid Ahern 	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
638d5d32e4bSDavid Ahern 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
63935103d11SAndy Gospodarek 		goto out;
640554cfb7eSYOSHIFUJI Hideaki 
64114895687SDavid Ahern 	if (fib6_check_expired(rt))
642f11e6659SDavid S. Miller 		goto out;
643554cfb7eSYOSHIFUJI Hideaki 
644554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
6457e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
646afc154e9SHannes Frederic Sowa 		match_do_rr = true;
647afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6487e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
649f11e6659SDavid S. Miller 		goto out;
6501da177e4SLinus Torvalds 	}
651f11e6659SDavid S. Miller 
652afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
653afc154e9SHannes Frederic Sowa 		rt6_probe(rt);
654afc154e9SHannes Frederic Sowa 
6557e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
656afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
657afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
658afc154e9SHannes Frederic Sowa 		*mpri = m;
659afc154e9SHannes Frederic Sowa 		match = rt;
660afc154e9SHannes Frederic Sowa 	}
661f11e6659SDavid S. Miller out:
662f11e6659SDavid S. Miller 	return match;
6631da177e4SLinus Torvalds }
6641da177e4SLinus Torvalds 
6658d1c802bSDavid Ahern static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
6668d1c802bSDavid Ahern 				     struct fib6_info *leaf,
6678d1c802bSDavid Ahern 				     struct fib6_info *rr_head,
668afc154e9SHannes Frederic Sowa 				     u32 metric, int oif, int strict,
669afc154e9SHannes Frederic Sowa 				     bool *do_rr)
670f11e6659SDavid S. Miller {
6718d1c802bSDavid Ahern 	struct fib6_info *rt, *match, *cont;
672f11e6659SDavid S. Miller 	int mpri = -1;
673f11e6659SDavid S. Miller 
674f11e6659SDavid S. Miller 	match = NULL;
6759fbdcfafSSteffen Klassert 	cont = NULL;
676071fb37eSDavid Miller 	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
67793c2fb25SDavid Ahern 		if (rt->fib6_metric != metric) {
6789fbdcfafSSteffen Klassert 			cont = rt;
6799fbdcfafSSteffen Klassert 			break;
6809fbdcfafSSteffen Klassert 		}
6819fbdcfafSSteffen Klassert 
682afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
6839fbdcfafSSteffen Klassert 	}
6849fbdcfafSSteffen Klassert 
68566f5d6ceSWei Wang 	for (rt = leaf; rt && rt != rr_head;
686071fb37eSDavid Miller 	     rt = rcu_dereference(rt->rt6_next)) {
68793c2fb25SDavid Ahern 		if (rt->fib6_metric != metric) {
6889fbdcfafSSteffen Klassert 			cont = rt;
6899fbdcfafSSteffen Klassert 			break;
6909fbdcfafSSteffen Klassert 		}
6919fbdcfafSSteffen Klassert 
6929fbdcfafSSteffen Klassert 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
6939fbdcfafSSteffen Klassert 	}
6949fbdcfafSSteffen Klassert 
6959fbdcfafSSteffen Klassert 	if (match || !cont)
6969fbdcfafSSteffen Klassert 		return match;
6979fbdcfafSSteffen Klassert 
698071fb37eSDavid Miller 	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
699afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
700f11e6659SDavid S. Miller 
701f11e6659SDavid S. Miller 	return match;
702f11e6659SDavid S. Miller }
703f11e6659SDavid S. Miller 
7048d1c802bSDavid Ahern static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
7058d1040e8SWei Wang 				   int oif, int strict)
706f11e6659SDavid S. Miller {
7078d1c802bSDavid Ahern 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
7088d1c802bSDavid Ahern 	struct fib6_info *match, *rt0;
709afc154e9SHannes Frederic Sowa 	bool do_rr = false;
71017ecf590SWei Wang 	int key_plen;
711f11e6659SDavid S. Miller 
712421842edSDavid Ahern 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
713421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
7148d1040e8SWei Wang 
71566f5d6ceSWei Wang 	rt0 = rcu_dereference(fn->rr_ptr);
716f11e6659SDavid S. Miller 	if (!rt0)
71766f5d6ceSWei Wang 		rt0 = leaf;
718f11e6659SDavid S. Miller 
71917ecf590SWei Wang 	/* Double check to make sure fn is not an intermediate node
72017ecf590SWei Wang 	 * and fn->leaf does not points to its child's leaf
72117ecf590SWei Wang 	 * (This might happen if all routes under fn are deleted from
72217ecf590SWei Wang 	 * the tree and fib6_repair_tree() is called on the node.)
72317ecf590SWei Wang 	 */
72493c2fb25SDavid Ahern 	key_plen = rt0->fib6_dst.plen;
72517ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES
72693c2fb25SDavid Ahern 	if (rt0->fib6_src.plen)
72793c2fb25SDavid Ahern 		key_plen = rt0->fib6_src.plen;
72817ecf590SWei Wang #endif
72917ecf590SWei Wang 	if (fn->fn_bit != key_plen)
730421842edSDavid Ahern 		return net->ipv6.fib6_null_entry;
73117ecf590SWei Wang 
73293c2fb25SDavid Ahern 	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
733afc154e9SHannes Frederic Sowa 			     &do_rr);
734f11e6659SDavid S. Miller 
735afc154e9SHannes Frederic Sowa 	if (do_rr) {
7368d1c802bSDavid Ahern 		struct fib6_info *next = rcu_dereference(rt0->rt6_next);
737f11e6659SDavid S. Miller 
738554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
73993c2fb25SDavid Ahern 		if (!next || next->fib6_metric != rt0->fib6_metric)
7408d1040e8SWei Wang 			next = leaf;
741f11e6659SDavid S. Miller 
74266f5d6ceSWei Wang 		if (next != rt0) {
74393c2fb25SDavid Ahern 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
74466f5d6ceSWei Wang 			/* make sure next is not being deleted from the tree */
74593c2fb25SDavid Ahern 			if (next->fib6_node)
74666f5d6ceSWei Wang 				rcu_assign_pointer(fn->rr_ptr, next);
74793c2fb25SDavid Ahern 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
74866f5d6ceSWei Wang 		}
749554cfb7eSYOSHIFUJI Hideaki 	}
750554cfb7eSYOSHIFUJI Hideaki 
751421842edSDavid Ahern 	return match ? match : net->ipv6.fib6_null_entry;
7521da177e4SLinus Torvalds }
7531da177e4SLinus Torvalds 
7548d1c802bSDavid Ahern static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
7558b9df265SMartin KaFai Lau {
75693c2fb25SDavid Ahern 	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
7578b9df265SMartin KaFai Lau }
7588b9df265SMartin KaFai Lau 
75970ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
76070ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
761b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
76270ceb4f5SYOSHIFUJI Hideaki {
763c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
76470ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
76570ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
76670ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
7674bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
7688d1c802bSDavid Ahern 	struct fib6_info *rt;
76970ceb4f5SYOSHIFUJI Hideaki 
77070ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
77170ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
77270ceb4f5SYOSHIFUJI Hideaki 	}
77370ceb4f5SYOSHIFUJI Hideaki 
77470ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
77570ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
77670ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
77770ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
77870ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
77970ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
78070ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
78170ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
78270ceb4f5SYOSHIFUJI Hideaki 		}
78370ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
78470ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
78570ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
78670ceb4f5SYOSHIFUJI Hideaki 		}
78770ceb4f5SYOSHIFUJI Hideaki 	}
78870ceb4f5SYOSHIFUJI Hideaki 
78970ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
79070ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
7913933fc95SJens Rosenboom 		return -EINVAL;
79270ceb4f5SYOSHIFUJI Hideaki 
7934bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
79470ceb4f5SYOSHIFUJI Hideaki 
79570ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
79670ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
79770ceb4f5SYOSHIFUJI Hideaki 	else {
79870ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
79970ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
80070ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
80170ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
80270ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
80370ceb4f5SYOSHIFUJI Hideaki 	}
80470ceb4f5SYOSHIFUJI Hideaki 
805f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
806afb1d4b5SDavid Ahern 		rt = rt6_get_dflt_router(net, gwaddr, dev);
807f104a567SDuan Jiong 	else
808f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
809830218c1SDavid Ahern 					gwaddr, dev);
81070ceb4f5SYOSHIFUJI Hideaki 
81170ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
812afb1d4b5SDavid Ahern 		ip6_del_rt(net, rt);
81370ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
81470ceb4f5SYOSHIFUJI Hideaki 	}
81570ceb4f5SYOSHIFUJI Hideaki 
81670ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
817830218c1SDavid Ahern 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
818830218c1SDavid Ahern 					dev, pref);
81970ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
82093c2fb25SDavid Ahern 		rt->fib6_flags = RTF_ROUTEINFO |
82193c2fb25SDavid Ahern 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
82270ceb4f5SYOSHIFUJI Hideaki 
82370ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8241716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
82514895687SDavid Ahern 			fib6_clean_expires(rt);
8261716a961SGao feng 		else
82714895687SDavid Ahern 			fib6_set_expires(rt, jiffies + HZ * lifetime);
8281716a961SGao feng 
82993531c67SDavid Ahern 		fib6_info_release(rt);
83070ceb4f5SYOSHIFUJI Hideaki 	}
83170ceb4f5SYOSHIFUJI Hideaki 	return 0;
83270ceb4f5SYOSHIFUJI Hideaki }
83370ceb4f5SYOSHIFUJI Hideaki #endif
83470ceb4f5SYOSHIFUJI Hideaki 
835ae90d867SDavid Ahern /*
836ae90d867SDavid Ahern  *	Misc support functions
837ae90d867SDavid Ahern  */
838ae90d867SDavid Ahern 
839ae90d867SDavid Ahern /* called with rcu_lock held */
8408d1c802bSDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
841ae90d867SDavid Ahern {
8425e670d84SDavid Ahern 	struct net_device *dev = rt->fib6_nh.nh_dev;
843ae90d867SDavid Ahern 
84493c2fb25SDavid Ahern 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
845ae90d867SDavid Ahern 		/* for copies of local routes, dst->dev needs to be the
846ae90d867SDavid Ahern 		 * device if it is a master device, the master device if
847ae90d867SDavid Ahern 		 * device is enslaved, and the loopback as the default
848ae90d867SDavid Ahern 		 */
849ae90d867SDavid Ahern 		if (netif_is_l3_slave(dev) &&
85093c2fb25SDavid Ahern 		    !rt6_need_strict(&rt->fib6_dst.addr))
851ae90d867SDavid Ahern 			dev = l3mdev_master_dev_rcu(dev);
852ae90d867SDavid Ahern 		else if (!netif_is_l3_master(dev))
853ae90d867SDavid Ahern 			dev = dev_net(dev)->loopback_dev;
854ae90d867SDavid Ahern 		/* last case is netif_is_l3_master(dev) is true in which
855ae90d867SDavid Ahern 		 * case we want dev returned to be dev
856ae90d867SDavid Ahern 		 */
857ae90d867SDavid Ahern 	}
858ae90d867SDavid Ahern 
859ae90d867SDavid Ahern 	return dev;
860ae90d867SDavid Ahern }
861ae90d867SDavid Ahern 
8626edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = {
8636edb3c96SDavid Ahern 	[RTN_UNSPEC]	= 0,
8646edb3c96SDavid Ahern 	[RTN_UNICAST]	= 0,
8656edb3c96SDavid Ahern 	[RTN_LOCAL]	= 0,
8666edb3c96SDavid Ahern 	[RTN_BROADCAST]	= 0,
8676edb3c96SDavid Ahern 	[RTN_ANYCAST]	= 0,
8686edb3c96SDavid Ahern 	[RTN_MULTICAST]	= 0,
8696edb3c96SDavid Ahern 	[RTN_BLACKHOLE]	= -EINVAL,
8706edb3c96SDavid Ahern 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
8716edb3c96SDavid Ahern 	[RTN_PROHIBIT]	= -EACCES,
8726edb3c96SDavid Ahern 	[RTN_THROW]	= -EAGAIN,
8736edb3c96SDavid Ahern 	[RTN_NAT]	= -EINVAL,
8746edb3c96SDavid Ahern 	[RTN_XRESOLVE]	= -EINVAL,
8756edb3c96SDavid Ahern };
8766edb3c96SDavid Ahern 
8776edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type)
8786edb3c96SDavid Ahern {
8796edb3c96SDavid Ahern 	return fib6_prop[fib6_type];
8806edb3c96SDavid Ahern }
8816edb3c96SDavid Ahern 
8828d1c802bSDavid Ahern static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
8833b6761d1SDavid Ahern {
8843b6761d1SDavid Ahern 	unsigned short flags = 0;
8853b6761d1SDavid Ahern 
8863b6761d1SDavid Ahern 	if (rt->dst_nocount)
8873b6761d1SDavid Ahern 		flags |= DST_NOCOUNT;
8883b6761d1SDavid Ahern 	if (rt->dst_nopolicy)
8893b6761d1SDavid Ahern 		flags |= DST_NOPOLICY;
8903b6761d1SDavid Ahern 	if (rt->dst_host)
8913b6761d1SDavid Ahern 		flags |= DST_HOST;
8923b6761d1SDavid Ahern 
8933b6761d1SDavid Ahern 	return flags;
8943b6761d1SDavid Ahern }
8953b6761d1SDavid Ahern 
8968d1c802bSDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
8976edb3c96SDavid Ahern {
8986edb3c96SDavid Ahern 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
8996edb3c96SDavid Ahern 
9006edb3c96SDavid Ahern 	switch (ort->fib6_type) {
9016edb3c96SDavid Ahern 	case RTN_BLACKHOLE:
9026edb3c96SDavid Ahern 		rt->dst.output = dst_discard_out;
9036edb3c96SDavid Ahern 		rt->dst.input = dst_discard;
9046edb3c96SDavid Ahern 		break;
9056edb3c96SDavid Ahern 	case RTN_PROHIBIT:
9066edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_prohibit_out;
9076edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_prohibit;
9086edb3c96SDavid Ahern 		break;
9096edb3c96SDavid Ahern 	case RTN_THROW:
9106edb3c96SDavid Ahern 	case RTN_UNREACHABLE:
9116edb3c96SDavid Ahern 	default:
9126edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_discard_out;
9136edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_discard;
9146edb3c96SDavid Ahern 		break;
9156edb3c96SDavid Ahern 	}
9166edb3c96SDavid Ahern }
9176edb3c96SDavid Ahern 
9188d1c802bSDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
9196edb3c96SDavid Ahern {
9203b6761d1SDavid Ahern 	rt->dst.flags |= fib6_info_dst_flags(ort);
9213b6761d1SDavid Ahern 
92293c2fb25SDavid Ahern 	if (ort->fib6_flags & RTF_REJECT) {
9236edb3c96SDavid Ahern 		ip6_rt_init_dst_reject(rt, ort);
9246edb3c96SDavid Ahern 		return;
9256edb3c96SDavid Ahern 	}
9266edb3c96SDavid Ahern 
9276edb3c96SDavid Ahern 	rt->dst.error = 0;
9286edb3c96SDavid Ahern 	rt->dst.output = ip6_output;
9296edb3c96SDavid Ahern 
9306edb3c96SDavid Ahern 	if (ort->fib6_type == RTN_LOCAL) {
9316edb3c96SDavid Ahern 		rt->dst.input = ip6_input;
93293c2fb25SDavid Ahern 	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
9336edb3c96SDavid Ahern 		rt->dst.input = ip6_mc_input;
9346edb3c96SDavid Ahern 	} else {
9356edb3c96SDavid Ahern 		rt->dst.input = ip6_forward;
9366edb3c96SDavid Ahern 	}
9376edb3c96SDavid Ahern 
9386edb3c96SDavid Ahern 	if (ort->fib6_nh.nh_lwtstate) {
9396edb3c96SDavid Ahern 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
9406edb3c96SDavid Ahern 		lwtunnel_set_redirect(&rt->dst);
9416edb3c96SDavid Ahern 	}
9426edb3c96SDavid Ahern 
9436edb3c96SDavid Ahern 	rt->dst.lastuse = jiffies;
9446edb3c96SDavid Ahern }
9456edb3c96SDavid Ahern 
9468d1c802bSDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
947ae90d867SDavid Ahern {
948ae90d867SDavid Ahern 	rt->rt6i_flags &= ~RTF_EXPIRES;
94993531c67SDavid Ahern 	fib6_info_hold(from);
950ae90d867SDavid Ahern 	rt->from = from;
951d4ead6b3SDavid Ahern 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
952d4ead6b3SDavid Ahern 	if (from->fib6_metrics != &dst_default_metrics) {
953d4ead6b3SDavid Ahern 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
954d4ead6b3SDavid Ahern 		refcount_inc(&from->fib6_metrics->refcnt);
955d4ead6b3SDavid Ahern 	}
956ae90d867SDavid Ahern }
957ae90d867SDavid Ahern 
9588d1c802bSDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
959ae90d867SDavid Ahern {
9606edb3c96SDavid Ahern 	ip6_rt_init_dst(rt, ort);
9616edb3c96SDavid Ahern 
96293c2fb25SDavid Ahern 	rt->rt6i_dst = ort->fib6_dst;
96393c2fb25SDavid Ahern 	rt->rt6i_idev = ort->fib6_idev;
964ae90d867SDavid Ahern 	if (rt->rt6i_idev)
965ae90d867SDavid Ahern 		in6_dev_hold(rt->rt6i_idev);
9665e670d84SDavid Ahern 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
96793c2fb25SDavid Ahern 	rt->rt6i_flags = ort->fib6_flags;
968ae90d867SDavid Ahern 	rt6_set_from(rt, ort);
969ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
97093c2fb25SDavid Ahern 	rt->rt6i_src = ort->fib6_src;
971ae90d867SDavid Ahern #endif
97293c2fb25SDavid Ahern 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
9735e670d84SDavid Ahern 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
974ae90d867SDavid Ahern }
975ae90d867SDavid Ahern 
976a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
977a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
978a3c00e46SMartin KaFai Lau {
97966f5d6ceSWei Wang 	struct fib6_node *pn, *sn;
980a3c00e46SMartin KaFai Lau 	while (1) {
981a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
982a3c00e46SMartin KaFai Lau 			return NULL;
98366f5d6ceSWei Wang 		pn = rcu_dereference(fn->parent);
98466f5d6ceSWei Wang 		sn = FIB6_SUBTREE(pn);
98566f5d6ceSWei Wang 		if (sn && sn != fn)
98666f5d6ceSWei Wang 			fn = fib6_lookup(sn, NULL, saddr);
987a3c00e46SMartin KaFai Lau 		else
988a3c00e46SMartin KaFai Lau 			fn = pn;
989a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
990a3c00e46SMartin KaFai Lau 			return fn;
991a3c00e46SMartin KaFai Lau 	}
992a3c00e46SMartin KaFai Lau }
993c71099acSThomas Graf 
994d3843fe5SWei Wang static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
995d3843fe5SWei Wang 			  bool null_fallback)
996d3843fe5SWei Wang {
997d3843fe5SWei Wang 	struct rt6_info *rt = *prt;
998d3843fe5SWei Wang 
999d3843fe5SWei Wang 	if (dst_hold_safe(&rt->dst))
1000d3843fe5SWei Wang 		return true;
1001d3843fe5SWei Wang 	if (null_fallback) {
1002d3843fe5SWei Wang 		rt = net->ipv6.ip6_null_entry;
1003d3843fe5SWei Wang 		dst_hold(&rt->dst);
1004d3843fe5SWei Wang 	} else {
1005d3843fe5SWei Wang 		rt = NULL;
1006d3843fe5SWei Wang 	}
1007d3843fe5SWei Wang 	*prt = rt;
1008d3843fe5SWei Wang 	return false;
1009d3843fe5SWei Wang }
1010d3843fe5SWei Wang 
1011dec9b0e2SDavid Ahern /* called with rcu_lock held */
10128d1c802bSDavid Ahern static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
1013dec9b0e2SDavid Ahern {
10143b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
1015dec9b0e2SDavid Ahern 	struct net_device *dev = rt->fib6_nh.nh_dev;
1016dec9b0e2SDavid Ahern 	struct rt6_info *nrt;
1017dec9b0e2SDavid Ahern 
101893531c67SDavid Ahern 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
1019dec9b0e2SDavid Ahern 	if (nrt)
1020dec9b0e2SDavid Ahern 		ip6_rt_copy_init(nrt, rt);
1021dec9b0e2SDavid Ahern 
1022dec9b0e2SDavid Ahern 	return nrt;
1023dec9b0e2SDavid Ahern }
1024dec9b0e2SDavid Ahern 
10258ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
10268ed67789SDaniel Lezcano 					     struct fib6_table *table,
1027b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
1028b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
1029b75cc8f9SDavid Ahern 					     int flags)
10301da177e4SLinus Torvalds {
10318d1c802bSDavid Ahern 	struct fib6_info *f6i;
10321da177e4SLinus Torvalds 	struct fib6_node *fn;
103323fb93a4SDavid Ahern 	struct rt6_info *rt;
10341da177e4SLinus Torvalds 
1035b6cdbc85SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1036b6cdbc85SDavid Ahern 		flags &= ~RT6_LOOKUP_F_IFACE;
1037b6cdbc85SDavid Ahern 
103866f5d6ceSWei Wang 	rcu_read_lock();
10394c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1040c71099acSThomas Graf restart:
104123fb93a4SDavid Ahern 	f6i = rcu_dereference(fn->leaf);
104223fb93a4SDavid Ahern 	if (!f6i) {
104323fb93a4SDavid Ahern 		f6i = net->ipv6.fib6_null_entry;
104466f5d6ceSWei Wang 	} else {
104523fb93a4SDavid Ahern 		f6i = rt6_device_match(net, f6i, &fl6->saddr,
104666f5d6ceSWei Wang 				      fl6->flowi6_oif, flags);
104793c2fb25SDavid Ahern 		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
104823fb93a4SDavid Ahern 			f6i = rt6_multipath_select(net, f6i, fl6,
104923fb93a4SDavid Ahern 						   fl6->flowi6_oif, skb, flags);
105066f5d6ceSWei Wang 	}
105123fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1052a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1053a3c00e46SMartin KaFai Lau 		if (fn)
1054a3c00e46SMartin KaFai Lau 			goto restart;
1055a3c00e46SMartin KaFai Lau 	}
105623fb93a4SDavid Ahern 
10572b760fcfSWei Wang 	/* Search through exception table */
105823fb93a4SDavid Ahern 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
105923fb93a4SDavid Ahern 	if (rt) {
1060d3843fe5SWei Wang 		if (ip6_hold_safe(net, &rt, true))
1061d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
106223fb93a4SDavid Ahern 	} else if (f6i == net->ipv6.fib6_null_entry) {
1063dec9b0e2SDavid Ahern 		rt = net->ipv6.ip6_null_entry;
1064dec9b0e2SDavid Ahern 		dst_hold(&rt->dst);
106523fb93a4SDavid Ahern 	} else {
106623fb93a4SDavid Ahern 		rt = ip6_create_rt_rcu(f6i);
106723fb93a4SDavid Ahern 		if (!rt) {
106823fb93a4SDavid Ahern 			rt = net->ipv6.ip6_null_entry;
106923fb93a4SDavid Ahern 			dst_hold(&rt->dst);
107023fb93a4SDavid Ahern 		}
1071dec9b0e2SDavid Ahern 	}
1072d3843fe5SWei Wang 
107366f5d6ceSWei Wang 	rcu_read_unlock();
1074b811580dSDavid Ahern 
1075b65f164dSPaolo Abeni 	trace_fib6_table_lookup(net, rt, table, fl6);
1076b811580dSDavid Ahern 
10771da177e4SLinus Torvalds 	return rt;
1078c71099acSThomas Graf }
1079c71099acSThomas Graf 
1080ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1081b75cc8f9SDavid Ahern 				   const struct sk_buff *skb, int flags)
1082ea6e574eSFlorian Westphal {
1083b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1084ea6e574eSFlorian Westphal }
1085ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
1086ea6e574eSFlorian Westphal 
10879acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1088b75cc8f9SDavid Ahern 			    const struct in6_addr *saddr, int oif,
1089b75cc8f9SDavid Ahern 			    const struct sk_buff *skb, int strict)
1090c71099acSThomas Graf {
10914c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
10924c9483b2SDavid S. Miller 		.flowi6_oif = oif,
10934c9483b2SDavid S. Miller 		.daddr = *daddr,
1094c71099acSThomas Graf 	};
1095c71099acSThomas Graf 	struct dst_entry *dst;
109677d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1097c71099acSThomas Graf 
1098adaa70bbSThomas Graf 	if (saddr) {
10994c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1100adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1101adaa70bbSThomas Graf 	}
1102adaa70bbSThomas Graf 
1103b75cc8f9SDavid Ahern 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1104c71099acSThomas Graf 	if (dst->error == 0)
1105c71099acSThomas Graf 		return (struct rt6_info *) dst;
1106c71099acSThomas Graf 
1107c71099acSThomas Graf 	dst_release(dst);
1108c71099acSThomas Graf 
11091da177e4SLinus Torvalds 	return NULL;
11101da177e4SLinus Torvalds }
11117159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
11127159039aSYOSHIFUJI Hideaki 
1113c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
11141cfb71eeSWei Wang  * It takes new route entry, the addition fails by any reason the
11151cfb71eeSWei Wang  * route is released.
11161cfb71eeSWei Wang  * Caller must hold dst before calling it.
11171da177e4SLinus Torvalds  */
11181da177e4SLinus Torvalds 
11198d1c802bSDavid Ahern static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1120333c4301SDavid Ahern 			struct netlink_ext_ack *extack)
11211da177e4SLinus Torvalds {
11221da177e4SLinus Torvalds 	int err;
1123c71099acSThomas Graf 	struct fib6_table *table;
11241da177e4SLinus Torvalds 
112593c2fb25SDavid Ahern 	table = rt->fib6_table;
112666f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
1127d4ead6b3SDavid Ahern 	err = fib6_add(&table->tb6_root, rt, info, extack);
112866f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
11291da177e4SLinus Torvalds 
11301da177e4SLinus Torvalds 	return err;
11311da177e4SLinus Torvalds }
11321da177e4SLinus Torvalds 
11338d1c802bSDavid Ahern int ip6_ins_rt(struct net *net, struct fib6_info *rt)
113440e22e8fSThomas Graf {
1135afb1d4b5SDavid Ahern 	struct nl_info info = {	.nl_net = net, };
1136e715b6d3SFlorian Westphal 
1137d4ead6b3SDavid Ahern 	return __ip6_ins_rt(rt, &info, NULL);
113840e22e8fSThomas Graf }
113940e22e8fSThomas Graf 
11408d1c802bSDavid Ahern static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
114121efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
1142b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
11431da177e4SLinus Torvalds {
11444832c30dSDavid Ahern 	struct net_device *dev;
11451da177e4SLinus Torvalds 	struct rt6_info *rt;
11461da177e4SLinus Torvalds 
11471da177e4SLinus Torvalds 	/*
11481da177e4SLinus Torvalds 	 *	Clone the route.
11491da177e4SLinus Torvalds 	 */
11501da177e4SLinus Torvalds 
11514832c30dSDavid Ahern 	rcu_read_lock();
11524832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(ort);
115393531c67SDavid Ahern 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
11544832c30dSDavid Ahern 	rcu_read_unlock();
115583a09abdSMartin KaFai Lau 	if (!rt)
115683a09abdSMartin KaFai Lau 		return NULL;
115783a09abdSMartin KaFai Lau 
115883a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
11598b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
116083a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
116183a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
116283a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
11638b9df265SMartin KaFai Lau 
11648b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
116593c2fb25SDavid Ahern 		if (ort->fib6_dst.plen != 128 &&
116693c2fb25SDavid Ahern 		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
116758c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
11681da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
11691da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
11704e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
11711da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
11721da177e4SLinus Torvalds 		}
11731da177e4SLinus Torvalds #endif
117495a9a5baSYOSHIFUJI Hideaki 	}
117595a9a5baSYOSHIFUJI Hideaki 
1176299d9939SYOSHIFUJI Hideaki 	return rt;
1177299d9939SYOSHIFUJI Hideaki }
1178299d9939SYOSHIFUJI Hideaki 
11798d1c802bSDavid Ahern static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
1180d52d3997SMartin KaFai Lau {
11813b6761d1SDavid Ahern 	unsigned short flags = fib6_info_dst_flags(rt);
11824832c30dSDavid Ahern 	struct net_device *dev;
1183d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
1184d52d3997SMartin KaFai Lau 
11854832c30dSDavid Ahern 	rcu_read_lock();
11864832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(rt);
118793531c67SDavid Ahern 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
11884832c30dSDavid Ahern 	rcu_read_unlock();
1189d52d3997SMartin KaFai Lau 	if (!pcpu_rt)
1190d52d3997SMartin KaFai Lau 		return NULL;
1191d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
1192d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1193d52d3997SMartin KaFai Lau 	return pcpu_rt;
1194d52d3997SMartin KaFai Lau }
1195d52d3997SMartin KaFai Lau 
119666f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */
11978d1c802bSDavid Ahern static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
1198d52d3997SMartin KaFai Lau {
1199a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
1200d52d3997SMartin KaFai Lau 
1201d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1202d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1203d52d3997SMartin KaFai Lau 
1204d4ead6b3SDavid Ahern 	if (pcpu_rt)
1205d4ead6b3SDavid Ahern 		ip6_hold_safe(NULL, &pcpu_rt, false);
1206d3843fe5SWei Wang 
1207a73e4195SMartin KaFai Lau 	return pcpu_rt;
1208a73e4195SMartin KaFai Lau }
1209a73e4195SMartin KaFai Lau 
1210afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net,
12118d1c802bSDavid Ahern 					    struct fib6_info *rt)
1212a73e4195SMartin KaFai Lau {
1213a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1214d52d3997SMartin KaFai Lau 
1215d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1216d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
12179c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
12189c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1219d52d3997SMartin KaFai Lau 	}
1220d52d3997SMartin KaFai Lau 
1221a94b9367SWei Wang 	dst_hold(&pcpu_rt->dst);
1222a73e4195SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1223d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1224951f788aSEric Dumazet 	BUG_ON(prev);
1225a94b9367SWei Wang 
1226d52d3997SMartin KaFai Lau 	return pcpu_rt;
1227d52d3997SMartin KaFai Lau }
1228d52d3997SMartin KaFai Lau 
122935732d01SWei Wang /* exception hash table implementation
123035732d01SWei Wang  */
123135732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock);
123235732d01SWei Wang 
123335732d01SWei Wang /* Remove rt6_ex from hash table and free the memory
123435732d01SWei Wang  * Caller must hold rt6_exception_lock
123535732d01SWei Wang  */
123635732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
123735732d01SWei Wang 				 struct rt6_exception *rt6_ex)
123835732d01SWei Wang {
1239b2427e67SColin Ian King 	struct net *net;
124081eb8447SWei Wang 
124135732d01SWei Wang 	if (!bucket || !rt6_ex)
124235732d01SWei Wang 		return;
1243b2427e67SColin Ian King 
1244b2427e67SColin Ian King 	net = dev_net(rt6_ex->rt6i->dst.dev);
124535732d01SWei Wang 	hlist_del_rcu(&rt6_ex->hlist);
124677634cc6SDavid Ahern 	dst_release(&rt6_ex->rt6i->dst);
124735732d01SWei Wang 	kfree_rcu(rt6_ex, rcu);
124835732d01SWei Wang 	WARN_ON_ONCE(!bucket->depth);
124935732d01SWei Wang 	bucket->depth--;
125081eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache--;
125135732d01SWei Wang }
125235732d01SWei Wang 
125335732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory
125435732d01SWei Wang  * Caller must hold rt6_exception_lock
125535732d01SWei Wang  */
125635732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
125735732d01SWei Wang {
125835732d01SWei Wang 	struct rt6_exception *rt6_ex, *oldest = NULL;
125935732d01SWei Wang 
126035732d01SWei Wang 	if (!bucket)
126135732d01SWei Wang 		return;
126235732d01SWei Wang 
126335732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
126435732d01SWei Wang 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
126535732d01SWei Wang 			oldest = rt6_ex;
126635732d01SWei Wang 	}
126735732d01SWei Wang 	rt6_remove_exception(bucket, oldest);
126835732d01SWei Wang }
126935732d01SWei Wang 
127035732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst,
127135732d01SWei Wang 			      const struct in6_addr *src)
127235732d01SWei Wang {
127335732d01SWei Wang 	static u32 seed __read_mostly;
127435732d01SWei Wang 	u32 val;
127535732d01SWei Wang 
127635732d01SWei Wang 	net_get_random_once(&seed, sizeof(seed));
127735732d01SWei Wang 	val = jhash(dst, sizeof(*dst), seed);
127835732d01SWei Wang 
127935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
128035732d01SWei Wang 	if (src)
128135732d01SWei Wang 		val = jhash(src, sizeof(*src), val);
128235732d01SWei Wang #endif
128335732d01SWei Wang 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
128435732d01SWei Wang }
128535732d01SWei Wang 
128635732d01SWei Wang /* Helper function to find the cached rt in the hash table
128735732d01SWei Wang  * and update bucket pointer to point to the bucket for this
128835732d01SWei Wang  * (daddr, saddr) pair
128935732d01SWei Wang  * Caller must hold rt6_exception_lock
129035732d01SWei Wang  */
129135732d01SWei Wang static struct rt6_exception *
129235732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
129335732d01SWei Wang 			      const struct in6_addr *daddr,
129435732d01SWei Wang 			      const struct in6_addr *saddr)
129535732d01SWei Wang {
129635732d01SWei Wang 	struct rt6_exception *rt6_ex;
129735732d01SWei Wang 	u32 hval;
129835732d01SWei Wang 
129935732d01SWei Wang 	if (!(*bucket) || !daddr)
130035732d01SWei Wang 		return NULL;
130135732d01SWei Wang 
130235732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
130335732d01SWei Wang 	*bucket += hval;
130435732d01SWei Wang 
130535732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
130635732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
130735732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
130835732d01SWei Wang 
130935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
131035732d01SWei Wang 		if (matched && saddr)
131135732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
131235732d01SWei Wang #endif
131335732d01SWei Wang 		if (matched)
131435732d01SWei Wang 			return rt6_ex;
131535732d01SWei Wang 	}
131635732d01SWei Wang 	return NULL;
131735732d01SWei Wang }
131835732d01SWei Wang 
131935732d01SWei Wang /* Helper function to find the cached rt in the hash table
132035732d01SWei Wang  * and update bucket pointer to point to the bucket for this
132135732d01SWei Wang  * (daddr, saddr) pair
132235732d01SWei Wang  * Caller must hold rcu_read_lock()
132335732d01SWei Wang  */
132435732d01SWei Wang static struct rt6_exception *
132535732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
132635732d01SWei Wang 			 const struct in6_addr *daddr,
132735732d01SWei Wang 			 const struct in6_addr *saddr)
132835732d01SWei Wang {
132935732d01SWei Wang 	struct rt6_exception *rt6_ex;
133035732d01SWei Wang 	u32 hval;
133135732d01SWei Wang 
133235732d01SWei Wang 	WARN_ON_ONCE(!rcu_read_lock_held());
133335732d01SWei Wang 
133435732d01SWei Wang 	if (!(*bucket) || !daddr)
133535732d01SWei Wang 		return NULL;
133635732d01SWei Wang 
133735732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
133835732d01SWei Wang 	*bucket += hval;
133935732d01SWei Wang 
134035732d01SWei Wang 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
134135732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
134235732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
134335732d01SWei Wang 
134435732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
134535732d01SWei Wang 		if (matched && saddr)
134635732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
134735732d01SWei Wang #endif
134835732d01SWei Wang 		if (matched)
134935732d01SWei Wang 			return rt6_ex;
135035732d01SWei Wang 	}
135135732d01SWei Wang 	return NULL;
135235732d01SWei Wang }
135335732d01SWei Wang 
13548d1c802bSDavid Ahern static unsigned int fib6_mtu(const struct fib6_info *rt)
1355d4ead6b3SDavid Ahern {
1356d4ead6b3SDavid Ahern 	unsigned int mtu;
1357d4ead6b3SDavid Ahern 
135893c2fb25SDavid Ahern 	mtu = rt->fib6_pmtu ? : rt->fib6_idev->cnf.mtu6;
1359d4ead6b3SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1360d4ead6b3SDavid Ahern 
1361d4ead6b3SDavid Ahern 	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1362d4ead6b3SDavid Ahern }
1363d4ead6b3SDavid Ahern 
136435732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt,
13658d1c802bSDavid Ahern 				struct fib6_info *ort)
136635732d01SWei Wang {
13675e670d84SDavid Ahern 	struct net *net = dev_net(nrt->dst.dev);
136835732d01SWei Wang 	struct rt6_exception_bucket *bucket;
136935732d01SWei Wang 	struct in6_addr *src_key = NULL;
137035732d01SWei Wang 	struct rt6_exception *rt6_ex;
137135732d01SWei Wang 	int err = 0;
137235732d01SWei Wang 
137335732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
137435732d01SWei Wang 
137535732d01SWei Wang 	if (ort->exception_bucket_flushed) {
137635732d01SWei Wang 		err = -EINVAL;
137735732d01SWei Wang 		goto out;
137835732d01SWei Wang 	}
137935732d01SWei Wang 
138035732d01SWei Wang 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
138135732d01SWei Wang 					lockdep_is_held(&rt6_exception_lock));
138235732d01SWei Wang 	if (!bucket) {
138335732d01SWei Wang 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
138435732d01SWei Wang 				 GFP_ATOMIC);
138535732d01SWei Wang 		if (!bucket) {
138635732d01SWei Wang 			err = -ENOMEM;
138735732d01SWei Wang 			goto out;
138835732d01SWei Wang 		}
138935732d01SWei Wang 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
139035732d01SWei Wang 	}
139135732d01SWei Wang 
139235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
139335732d01SWei Wang 	/* rt6i_src.plen != 0 indicates ort is in subtree
139435732d01SWei Wang 	 * and exception table is indexed by a hash of
139535732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
139635732d01SWei Wang 	 * Otherwise, the exception table is indexed by
139735732d01SWei Wang 	 * a hash of only rt6i_dst.
139835732d01SWei Wang 	 */
139993c2fb25SDavid Ahern 	if (ort->fib6_src.plen)
140035732d01SWei Wang 		src_key = &nrt->rt6i_src.addr;
140135732d01SWei Wang #endif
140260006a48SWei Wang 
140360006a48SWei Wang 	/* Update rt6i_prefsrc as it could be changed
140460006a48SWei Wang 	 * in rt6_remove_prefsrc()
140560006a48SWei Wang 	 */
140693c2fb25SDavid Ahern 	nrt->rt6i_prefsrc = ort->fib6_prefsrc;
1407f5bbe7eeSWei Wang 	/* rt6_mtu_change() might lower mtu on ort.
1408f5bbe7eeSWei Wang 	 * Only insert this exception route if its mtu
1409f5bbe7eeSWei Wang 	 * is less than ort's mtu value.
1410f5bbe7eeSWei Wang 	 */
1411d4ead6b3SDavid Ahern 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1412f5bbe7eeSWei Wang 		err = -EINVAL;
1413f5bbe7eeSWei Wang 		goto out;
1414f5bbe7eeSWei Wang 	}
141560006a48SWei Wang 
141635732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
141735732d01SWei Wang 					       src_key);
141835732d01SWei Wang 	if (rt6_ex)
141935732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
142035732d01SWei Wang 
142135732d01SWei Wang 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
142235732d01SWei Wang 	if (!rt6_ex) {
142335732d01SWei Wang 		err = -ENOMEM;
142435732d01SWei Wang 		goto out;
142535732d01SWei Wang 	}
142635732d01SWei Wang 	rt6_ex->rt6i = nrt;
142735732d01SWei Wang 	rt6_ex->stamp = jiffies;
142835732d01SWei Wang 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
142935732d01SWei Wang 	bucket->depth++;
143081eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache++;
143135732d01SWei Wang 
143235732d01SWei Wang 	if (bucket->depth > FIB6_MAX_DEPTH)
143335732d01SWei Wang 		rt6_exception_remove_oldest(bucket);
143435732d01SWei Wang 
143535732d01SWei Wang out:
143635732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
143735732d01SWei Wang 
143835732d01SWei Wang 	/* Update fn->fn_sernum to invalidate all cached dst */
1439b886d5f2SPaolo Abeni 	if (!err) {
144093c2fb25SDavid Ahern 		spin_lock_bh(&ort->fib6_table->tb6_lock);
14417aef6859SDavid Ahern 		fib6_update_sernum(net, ort);
144293c2fb25SDavid Ahern 		spin_unlock_bh(&ort->fib6_table->tb6_lock);
1443b886d5f2SPaolo Abeni 		fib6_force_start_gc(net);
1444b886d5f2SPaolo Abeni 	}
144535732d01SWei Wang 
144635732d01SWei Wang 	return err;
144735732d01SWei Wang }
144835732d01SWei Wang 
14498d1c802bSDavid Ahern void rt6_flush_exceptions(struct fib6_info *rt)
145035732d01SWei Wang {
145135732d01SWei Wang 	struct rt6_exception_bucket *bucket;
145235732d01SWei Wang 	struct rt6_exception *rt6_ex;
145335732d01SWei Wang 	struct hlist_node *tmp;
145435732d01SWei Wang 	int i;
145535732d01SWei Wang 
145635732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
145735732d01SWei Wang 	/* Prevent rt6_insert_exception() to recreate the bucket list */
145835732d01SWei Wang 	rt->exception_bucket_flushed = 1;
145935732d01SWei Wang 
146035732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
146135732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
146235732d01SWei Wang 	if (!bucket)
146335732d01SWei Wang 		goto out;
146435732d01SWei Wang 
146535732d01SWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
146635732d01SWei Wang 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
146735732d01SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
146835732d01SWei Wang 		WARN_ON_ONCE(bucket->depth);
146935732d01SWei Wang 		bucket++;
147035732d01SWei Wang 	}
147135732d01SWei Wang 
147235732d01SWei Wang out:
147335732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
147435732d01SWei Wang }
147535732d01SWei Wang 
147635732d01SWei Wang /* Find cached rt in the hash table inside passed in rt
147735732d01SWei Wang  * Caller has to hold rcu_read_lock()
147835732d01SWei Wang  */
14798d1c802bSDavid Ahern static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
148035732d01SWei Wang 					   struct in6_addr *daddr,
148135732d01SWei Wang 					   struct in6_addr *saddr)
148235732d01SWei Wang {
148335732d01SWei Wang 	struct rt6_exception_bucket *bucket;
148435732d01SWei Wang 	struct in6_addr *src_key = NULL;
148535732d01SWei Wang 	struct rt6_exception *rt6_ex;
148635732d01SWei Wang 	struct rt6_info *res = NULL;
148735732d01SWei Wang 
148835732d01SWei Wang 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
148935732d01SWei Wang 
149035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
149135732d01SWei Wang 	/* rt6i_src.plen != 0 indicates rt is in subtree
149235732d01SWei Wang 	 * and exception table is indexed by a hash of
149335732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
149435732d01SWei Wang 	 * Otherwise, the exception table is indexed by
149535732d01SWei Wang 	 * a hash of only rt6i_dst.
149635732d01SWei Wang 	 */
149793c2fb25SDavid Ahern 	if (rt->fib6_src.plen)
149835732d01SWei Wang 		src_key = saddr;
149935732d01SWei Wang #endif
150035732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
150135732d01SWei Wang 
150235732d01SWei Wang 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
150335732d01SWei Wang 		res = rt6_ex->rt6i;
150435732d01SWei Wang 
150535732d01SWei Wang 	return res;
150635732d01SWei Wang }
150735732d01SWei Wang 
150835732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */
150923fb93a4SDavid Ahern static int rt6_remove_exception_rt(struct rt6_info *rt)
151035732d01SWei Wang {
151135732d01SWei Wang 	struct rt6_exception_bucket *bucket;
15128d1c802bSDavid Ahern 	struct fib6_info *from = rt->from;
151335732d01SWei Wang 	struct in6_addr *src_key = NULL;
151435732d01SWei Wang 	struct rt6_exception *rt6_ex;
151535732d01SWei Wang 	int err;
151635732d01SWei Wang 
151735732d01SWei Wang 	if (!from ||
1518442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
151935732d01SWei Wang 		return -EINVAL;
152035732d01SWei Wang 
152135732d01SWei Wang 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
152235732d01SWei Wang 		return -ENOENT;
152335732d01SWei Wang 
152435732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
152535732d01SWei Wang 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
152635732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
152735732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
152835732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
152935732d01SWei Wang 	 * and exception table is indexed by a hash of
153035732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
153135732d01SWei Wang 	 * Otherwise, the exception table is indexed by
153235732d01SWei Wang 	 * a hash of only rt6i_dst.
153335732d01SWei Wang 	 */
153493c2fb25SDavid Ahern 	if (from->fib6_src.plen)
153535732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
153635732d01SWei Wang #endif
153735732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
153835732d01SWei Wang 					       &rt->rt6i_dst.addr,
153935732d01SWei Wang 					       src_key);
154035732d01SWei Wang 	if (rt6_ex) {
154135732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
154235732d01SWei Wang 		err = 0;
154335732d01SWei Wang 	} else {
154435732d01SWei Wang 		err = -ENOENT;
154535732d01SWei Wang 	}
154635732d01SWei Wang 
154735732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
154835732d01SWei Wang 	return err;
154935732d01SWei Wang }
155035732d01SWei Wang 
155135732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and
155235732d01SWei Wang  * refresh its stamp
155335732d01SWei Wang  */
155435732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
155535732d01SWei Wang {
155635732d01SWei Wang 	struct rt6_exception_bucket *bucket;
15578d1c802bSDavid Ahern 	struct fib6_info *from = rt->from;
155835732d01SWei Wang 	struct in6_addr *src_key = NULL;
155935732d01SWei Wang 	struct rt6_exception *rt6_ex;
156035732d01SWei Wang 
156135732d01SWei Wang 	if (!from ||
1562442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
156335732d01SWei Wang 		return;
156435732d01SWei Wang 
156535732d01SWei Wang 	rcu_read_lock();
156635732d01SWei Wang 	bucket = rcu_dereference(from->rt6i_exception_bucket);
156735732d01SWei Wang 
156835732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
156935732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
157035732d01SWei Wang 	 * and exception table is indexed by a hash of
157135732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
157235732d01SWei Wang 	 * Otherwise, the exception table is indexed by
157335732d01SWei Wang 	 * a hash of only rt6i_dst.
157435732d01SWei Wang 	 */
157593c2fb25SDavid Ahern 	if (from->fib6_src.plen)
157635732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
157735732d01SWei Wang #endif
157835732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket,
157935732d01SWei Wang 					  &rt->rt6i_dst.addr,
158035732d01SWei Wang 					  src_key);
158135732d01SWei Wang 	if (rt6_ex)
158235732d01SWei Wang 		rt6_ex->stamp = jiffies;
158335732d01SWei Wang 
158435732d01SWei Wang 	rcu_read_unlock();
158535732d01SWei Wang }
158635732d01SWei Wang 
15878d1c802bSDavid Ahern static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
158860006a48SWei Wang {
158960006a48SWei Wang 	struct rt6_exception_bucket *bucket;
159060006a48SWei Wang 	struct rt6_exception *rt6_ex;
159160006a48SWei Wang 	int i;
159260006a48SWei Wang 
159360006a48SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
159460006a48SWei Wang 					lockdep_is_held(&rt6_exception_lock));
159560006a48SWei Wang 
159660006a48SWei Wang 	if (bucket) {
159760006a48SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
159860006a48SWei Wang 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
159960006a48SWei Wang 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
160060006a48SWei Wang 			}
160160006a48SWei Wang 			bucket++;
160260006a48SWei Wang 		}
160360006a48SWei Wang 	}
160460006a48SWei Wang }
160560006a48SWei Wang 
1606e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1607e9fa1495SStefano Brivio 					 struct rt6_info *rt, int mtu)
1608e9fa1495SStefano Brivio {
1609e9fa1495SStefano Brivio 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1610e9fa1495SStefano Brivio 	 * lowest MTU in the path: always allow updating the route PMTU to
1611e9fa1495SStefano Brivio 	 * reflect PMTU decreases.
1612e9fa1495SStefano Brivio 	 *
1613e9fa1495SStefano Brivio 	 * If the new MTU is higher, and the route PMTU is equal to the local
1614e9fa1495SStefano Brivio 	 * MTU, this means the old MTU is the lowest in the path, so allow
1615e9fa1495SStefano Brivio 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1616e9fa1495SStefano Brivio 	 * handle this.
1617e9fa1495SStefano Brivio 	 */
1618e9fa1495SStefano Brivio 
1619e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) >= mtu)
1620e9fa1495SStefano Brivio 		return true;
1621e9fa1495SStefano Brivio 
1622e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1623e9fa1495SStefano Brivio 		return true;
1624e9fa1495SStefano Brivio 
1625e9fa1495SStefano Brivio 	return false;
1626e9fa1495SStefano Brivio }
1627e9fa1495SStefano Brivio 
1628e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
16298d1c802bSDavid Ahern 				       struct fib6_info *rt, int mtu)
1630f5bbe7eeSWei Wang {
1631f5bbe7eeSWei Wang 	struct rt6_exception_bucket *bucket;
1632f5bbe7eeSWei Wang 	struct rt6_exception *rt6_ex;
1633f5bbe7eeSWei Wang 	int i;
1634f5bbe7eeSWei Wang 
1635f5bbe7eeSWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1636f5bbe7eeSWei Wang 					lockdep_is_held(&rt6_exception_lock));
1637f5bbe7eeSWei Wang 
1638e9fa1495SStefano Brivio 	if (!bucket)
1639e9fa1495SStefano Brivio 		return;
1640e9fa1495SStefano Brivio 
1641f5bbe7eeSWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1642f5bbe7eeSWei Wang 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1643f5bbe7eeSWei Wang 			struct rt6_info *entry = rt6_ex->rt6i;
1644e9fa1495SStefano Brivio 
1645e9fa1495SStefano Brivio 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1646d4ead6b3SDavid Ahern 			 * route), the metrics of its rt->from have already
1647f5bbe7eeSWei Wang 			 * been updated.
1648f5bbe7eeSWei Wang 			 */
1649d4ead6b3SDavid Ahern 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1650e9fa1495SStefano Brivio 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1651d4ead6b3SDavid Ahern 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1652f5bbe7eeSWei Wang 		}
1653f5bbe7eeSWei Wang 		bucket++;
1654f5bbe7eeSWei Wang 	}
1655f5bbe7eeSWei Wang }
1656f5bbe7eeSWei Wang 
1657b16cb459SWei Wang #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1658b16cb459SWei Wang 
16598d1c802bSDavid Ahern static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1660b16cb459SWei Wang 					struct in6_addr *gateway)
1661b16cb459SWei Wang {
1662b16cb459SWei Wang 	struct rt6_exception_bucket *bucket;
1663b16cb459SWei Wang 	struct rt6_exception *rt6_ex;
1664b16cb459SWei Wang 	struct hlist_node *tmp;
1665b16cb459SWei Wang 	int i;
1666b16cb459SWei Wang 
1667b16cb459SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1668b16cb459SWei Wang 		return;
1669b16cb459SWei Wang 
1670b16cb459SWei Wang 	spin_lock_bh(&rt6_exception_lock);
1671b16cb459SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1672b16cb459SWei Wang 				     lockdep_is_held(&rt6_exception_lock));
1673b16cb459SWei Wang 
1674b16cb459SWei Wang 	if (bucket) {
1675b16cb459SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1676b16cb459SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1677b16cb459SWei Wang 						  &bucket->chain, hlist) {
1678b16cb459SWei Wang 				struct rt6_info *entry = rt6_ex->rt6i;
1679b16cb459SWei Wang 
1680b16cb459SWei Wang 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1681b16cb459SWei Wang 				    RTF_CACHE_GATEWAY &&
1682b16cb459SWei Wang 				    ipv6_addr_equal(gateway,
1683b16cb459SWei Wang 						    &entry->rt6i_gateway)) {
1684b16cb459SWei Wang 					rt6_remove_exception(bucket, rt6_ex);
1685b16cb459SWei Wang 				}
1686b16cb459SWei Wang 			}
1687b16cb459SWei Wang 			bucket++;
1688b16cb459SWei Wang 		}
1689b16cb459SWei Wang 	}
1690b16cb459SWei Wang 
1691b16cb459SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
1692b16cb459SWei Wang }
1693b16cb459SWei Wang 
1694c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1695c757faa8SWei Wang 				      struct rt6_exception *rt6_ex,
1696c757faa8SWei Wang 				      struct fib6_gc_args *gc_args,
1697c757faa8SWei Wang 				      unsigned long now)
1698c757faa8SWei Wang {
1699c757faa8SWei Wang 	struct rt6_info *rt = rt6_ex->rt6i;
1700c757faa8SWei Wang 
17011859bac0SPaolo Abeni 	/* we are pruning and obsoleting aged-out and non gateway exceptions
17021859bac0SPaolo Abeni 	 * even if others have still references to them, so that on next
17031859bac0SPaolo Abeni 	 * dst_check() such references can be dropped.
17041859bac0SPaolo Abeni 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
17051859bac0SPaolo Abeni 	 * expired, independently from their aging, as per RFC 8201 section 4
17061859bac0SPaolo Abeni 	 */
170731afeb42SWei Wang 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
170831afeb42SWei Wang 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1709c757faa8SWei Wang 			RT6_TRACE("aging clone %p\n", rt);
1710c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1711c757faa8SWei Wang 			return;
171231afeb42SWei Wang 		}
171331afeb42SWei Wang 	} else if (time_after(jiffies, rt->dst.expires)) {
171431afeb42SWei Wang 		RT6_TRACE("purging expired route %p\n", rt);
171531afeb42SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
171631afeb42SWei Wang 		return;
171731afeb42SWei Wang 	}
171831afeb42SWei Wang 
171931afeb42SWei Wang 	if (rt->rt6i_flags & RTF_GATEWAY) {
1720c757faa8SWei Wang 		struct neighbour *neigh;
1721c757faa8SWei Wang 		__u8 neigh_flags = 0;
1722c757faa8SWei Wang 
17231bfa26ffSEric Dumazet 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
17241bfa26ffSEric Dumazet 		if (neigh)
1725c757faa8SWei Wang 			neigh_flags = neigh->flags;
17261bfa26ffSEric Dumazet 
1727c757faa8SWei Wang 		if (!(neigh_flags & NTF_ROUTER)) {
1728c757faa8SWei Wang 			RT6_TRACE("purging route %p via non-router but gateway\n",
1729c757faa8SWei Wang 				  rt);
1730c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1731c757faa8SWei Wang 			return;
1732c757faa8SWei Wang 		}
1733c757faa8SWei Wang 	}
173431afeb42SWei Wang 
1735c757faa8SWei Wang 	gc_args->more++;
1736c757faa8SWei Wang }
1737c757faa8SWei Wang 
17388d1c802bSDavid Ahern void rt6_age_exceptions(struct fib6_info *rt,
1739c757faa8SWei Wang 			struct fib6_gc_args *gc_args,
1740c757faa8SWei Wang 			unsigned long now)
1741c757faa8SWei Wang {
1742c757faa8SWei Wang 	struct rt6_exception_bucket *bucket;
1743c757faa8SWei Wang 	struct rt6_exception *rt6_ex;
1744c757faa8SWei Wang 	struct hlist_node *tmp;
1745c757faa8SWei Wang 	int i;
1746c757faa8SWei Wang 
1747c757faa8SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1748c757faa8SWei Wang 		return;
1749c757faa8SWei Wang 
17501bfa26ffSEric Dumazet 	rcu_read_lock_bh();
17511bfa26ffSEric Dumazet 	spin_lock(&rt6_exception_lock);
1752c757faa8SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1753c757faa8SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
1754c757faa8SWei Wang 
1755c757faa8SWei Wang 	if (bucket) {
1756c757faa8SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1757c757faa8SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1758c757faa8SWei Wang 						  &bucket->chain, hlist) {
1759c757faa8SWei Wang 				rt6_age_examine_exception(bucket, rt6_ex,
1760c757faa8SWei Wang 							  gc_args, now);
1761c757faa8SWei Wang 			}
1762c757faa8SWei Wang 			bucket++;
1763c757faa8SWei Wang 		}
1764c757faa8SWei Wang 	}
17651bfa26ffSEric Dumazet 	spin_unlock(&rt6_exception_lock);
17661bfa26ffSEric Dumazet 	rcu_read_unlock_bh();
1767c757faa8SWei Wang }
1768c757faa8SWei Wang 
17699ff74384SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1770b75cc8f9SDavid Ahern 			       int oif, struct flowi6 *fl6,
1771b75cc8f9SDavid Ahern 			       const struct sk_buff *skb, int flags)
17721da177e4SLinus Torvalds {
1773367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
17748d1c802bSDavid Ahern 	struct fib6_info *f6i;
177523fb93a4SDavid Ahern 	struct rt6_info *rt;
1776c71099acSThomas Graf 	int strict = 0;
17771da177e4SLinus Torvalds 
177877d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
1779d5d32e4bSDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1780367efcb9SMartin KaFai Lau 	if (net->ipv6.devconf_all->forwarding == 0)
1781367efcb9SMartin KaFai Lau 		strict |= RT6_LOOKUP_F_REACHABLE;
17821da177e4SLinus Torvalds 
178366f5d6ceSWei Wang 	rcu_read_lock();
17841da177e4SLinus Torvalds 
17854c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1786367efcb9SMartin KaFai Lau 	saved_fn = fn;
17871da177e4SLinus Torvalds 
1788ca254490SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1789ca254490SDavid Ahern 		oif = 0;
1790ca254490SDavid Ahern 
1791a3c00e46SMartin KaFai Lau redo_rt6_select:
179223fb93a4SDavid Ahern 	f6i = rt6_select(net, fn, oif, strict);
179393c2fb25SDavid Ahern 	if (f6i->fib6_nsiblings)
179423fb93a4SDavid Ahern 		f6i = rt6_multipath_select(net, f6i, fl6, oif, skb, strict);
179523fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1796a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1797a3c00e46SMartin KaFai Lau 		if (fn)
1798a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1799367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1800367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1801367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1802367efcb9SMartin KaFai Lau 			fn = saved_fn;
1803367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1804367efcb9SMartin KaFai Lau 		}
1805a3c00e46SMartin KaFai Lau 	}
1806a3c00e46SMartin KaFai Lau 
180723fb93a4SDavid Ahern 	if (f6i == net->ipv6.fib6_null_entry) {
1808421842edSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
180966f5d6ceSWei Wang 		rcu_read_unlock();
1810d3843fe5SWei Wang 		dst_hold(&rt->dst);
1811b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, rt, table, fl6);
1812d3843fe5SWei Wang 		return rt;
181323fb93a4SDavid Ahern 	}
181423fb93a4SDavid Ahern 
181523fb93a4SDavid Ahern 	/*Search through exception table */
181623fb93a4SDavid Ahern 	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
181723fb93a4SDavid Ahern 	if (rt) {
1818d4ead6b3SDavid Ahern 		if (ip6_hold_safe(net, &rt, true))
1819d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
1820d4ead6b3SDavid Ahern 
182166f5d6ceSWei Wang 		rcu_read_unlock();
1822b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, rt, table, fl6);
1823d52d3997SMartin KaFai Lau 		return rt;
18243da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
182593c2fb25SDavid Ahern 			    !(f6i->fib6_flags & RTF_GATEWAY))) {
18263da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
18273da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
18283da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
18293da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
18303da59bd9SMartin KaFai Lau 		 */
1831c71099acSThomas Graf 
18323da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
18333da59bd9SMartin KaFai Lau 
183493531c67SDavid Ahern 		fib6_info_hold(f6i);
183566f5d6ceSWei Wang 		rcu_read_unlock();
1836d52d3997SMartin KaFai Lau 
183723fb93a4SDavid Ahern 		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
183893531c67SDavid Ahern 		fib6_info_release(f6i);
18393da59bd9SMartin KaFai Lau 
18401cfb71eeSWei Wang 		if (uncached_rt) {
18411cfb71eeSWei Wang 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
18421cfb71eeSWei Wang 			 * No need for another dst_hold()
18431cfb71eeSWei Wang 			 */
18448d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
184581eb8447SWei Wang 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
18461cfb71eeSWei Wang 		} else {
18473da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
18483da59bd9SMartin KaFai Lau 			dst_hold(&uncached_rt->dst);
18491cfb71eeSWei Wang 		}
1850b811580dSDavid Ahern 
1851b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
18523da59bd9SMartin KaFai Lau 		return uncached_rt;
18533da59bd9SMartin KaFai Lau 
1854d52d3997SMartin KaFai Lau 	} else {
1855d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1856d52d3997SMartin KaFai Lau 
1857d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1858d52d3997SMartin KaFai Lau 
1859951f788aSEric Dumazet 		local_bh_disable();
186023fb93a4SDavid Ahern 		pcpu_rt = rt6_get_pcpu_route(f6i);
1861d52d3997SMartin KaFai Lau 
186293531c67SDavid Ahern 		if (!pcpu_rt)
186323fb93a4SDavid Ahern 			pcpu_rt = rt6_make_pcpu_route(net, f6i);
186493531c67SDavid Ahern 
1865951f788aSEric Dumazet 		local_bh_enable();
1866951f788aSEric Dumazet 		rcu_read_unlock();
1867b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1868d52d3997SMartin KaFai Lau 		return pcpu_rt;
1869d52d3997SMartin KaFai Lau 	}
1870c71099acSThomas Graf }
18719ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route);
1872c71099acSThomas Graf 
1873b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net,
1874b75cc8f9SDavid Ahern 					    struct fib6_table *table,
1875b75cc8f9SDavid Ahern 					    struct flowi6 *fl6,
1876b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
1877b75cc8f9SDavid Ahern 					    int flags)
18784acad72dSPavel Emelyanov {
1879b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
18804acad72dSPavel Emelyanov }
18814acad72dSPavel Emelyanov 
1882d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net,
188372331bc0SShmulik Ladkani 					 struct net_device *dev,
1884b75cc8f9SDavid Ahern 					 struct flowi6 *fl6,
1885b75cc8f9SDavid Ahern 					 const struct sk_buff *skb,
1886b75cc8f9SDavid Ahern 					 int flags)
188772331bc0SShmulik Ladkani {
188872331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
188972331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
189072331bc0SShmulik Ladkani 
1891b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
189272331bc0SShmulik Ladkani }
1893d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
189472331bc0SShmulik Ladkani 
189523aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb,
18965e5d6fedSRoopa Prabhu 				  struct flow_keys *keys,
18975e5d6fedSRoopa Prabhu 				  struct flow_keys *flkeys)
189823aebdacSJakub Sitnicki {
189923aebdacSJakub Sitnicki 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
190023aebdacSJakub Sitnicki 	const struct ipv6hdr *key_iph = outer_iph;
19015e5d6fedSRoopa Prabhu 	struct flow_keys *_flkeys = flkeys;
190223aebdacSJakub Sitnicki 	const struct ipv6hdr *inner_iph;
190323aebdacSJakub Sitnicki 	const struct icmp6hdr *icmph;
190423aebdacSJakub Sitnicki 	struct ipv6hdr _inner_iph;
190523aebdacSJakub Sitnicki 
190623aebdacSJakub Sitnicki 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
190723aebdacSJakub Sitnicki 		goto out;
190823aebdacSJakub Sitnicki 
190923aebdacSJakub Sitnicki 	icmph = icmp6_hdr(skb);
191023aebdacSJakub Sitnicki 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
191123aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
191223aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
191323aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
191423aebdacSJakub Sitnicki 		goto out;
191523aebdacSJakub Sitnicki 
191623aebdacSJakub Sitnicki 	inner_iph = skb_header_pointer(skb,
191723aebdacSJakub Sitnicki 				       skb_transport_offset(skb) + sizeof(*icmph),
191823aebdacSJakub Sitnicki 				       sizeof(_inner_iph), &_inner_iph);
191923aebdacSJakub Sitnicki 	if (!inner_iph)
192023aebdacSJakub Sitnicki 		goto out;
192123aebdacSJakub Sitnicki 
192223aebdacSJakub Sitnicki 	key_iph = inner_iph;
19235e5d6fedSRoopa Prabhu 	_flkeys = NULL;
192423aebdacSJakub Sitnicki out:
19255e5d6fedSRoopa Prabhu 	if (_flkeys) {
19265e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
19275e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
19285e5d6fedSRoopa Prabhu 		keys->tags.flow_label = _flkeys->tags.flow_label;
19295e5d6fedSRoopa Prabhu 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
19305e5d6fedSRoopa Prabhu 	} else {
193123aebdacSJakub Sitnicki 		keys->addrs.v6addrs.src = key_iph->saddr;
193223aebdacSJakub Sitnicki 		keys->addrs.v6addrs.dst = key_iph->daddr;
193323aebdacSJakub Sitnicki 		keys->tags.flow_label = ip6_flowinfo(key_iph);
193423aebdacSJakub Sitnicki 		keys->basic.ip_proto = key_iph->nexthdr;
193523aebdacSJakub Sitnicki 	}
19365e5d6fedSRoopa Prabhu }
193723aebdacSJakub Sitnicki 
193823aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */
1939b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1940b4bac172SDavid Ahern 		       const struct sk_buff *skb, struct flow_keys *flkeys)
194123aebdacSJakub Sitnicki {
194223aebdacSJakub Sitnicki 	struct flow_keys hash_keys;
19439a2a537aSDavid Ahern 	u32 mhash;
194423aebdacSJakub Sitnicki 
1945bbfa047aSDavid S. Miller 	switch (ip6_multipath_hash_policy(net)) {
1946b4bac172SDavid Ahern 	case 0:
19476f74b6c2SDavid Ahern 		memset(&hash_keys, 0, sizeof(hash_keys));
19486f74b6c2SDavid Ahern 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
19499a2a537aSDavid Ahern 		if (skb) {
19505e5d6fedSRoopa Prabhu 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
19519a2a537aSDavid Ahern 		} else {
19529a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
19539a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
19549a2a537aSDavid Ahern 			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
19559a2a537aSDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
195623aebdacSJakub Sitnicki 		}
1957b4bac172SDavid Ahern 		break;
1958b4bac172SDavid Ahern 	case 1:
1959b4bac172SDavid Ahern 		if (skb) {
1960b4bac172SDavid Ahern 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1961b4bac172SDavid Ahern 			struct flow_keys keys;
1962b4bac172SDavid Ahern 
1963b4bac172SDavid Ahern 			/* short-circuit if we already have L4 hash present */
1964b4bac172SDavid Ahern 			if (skb->l4_hash)
1965b4bac172SDavid Ahern 				return skb_get_hash_raw(skb) >> 1;
1966b4bac172SDavid Ahern 
1967b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
1968b4bac172SDavid Ahern 
1969b4bac172SDavid Ahern                         if (!flkeys) {
1970b4bac172SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
1971b4bac172SDavid Ahern 				flkeys = &keys;
1972b4bac172SDavid Ahern 			}
1973b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1974b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
1975b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
1976b4bac172SDavid Ahern 			hash_keys.ports.src = flkeys->ports.src;
1977b4bac172SDavid Ahern 			hash_keys.ports.dst = flkeys->ports.dst;
1978b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1979b4bac172SDavid Ahern 		} else {
1980b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
1981b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1982b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
1983b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
1984b4bac172SDavid Ahern 			hash_keys.ports.src = fl6->fl6_sport;
1985b4bac172SDavid Ahern 			hash_keys.ports.dst = fl6->fl6_dport;
1986b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
1987b4bac172SDavid Ahern 		}
1988b4bac172SDavid Ahern 		break;
1989b4bac172SDavid Ahern 	}
19909a2a537aSDavid Ahern 	mhash = flow_hash_from_keys(&hash_keys);
199123aebdacSJakub Sitnicki 
19929a2a537aSDavid Ahern 	return mhash >> 1;
199323aebdacSJakub Sitnicki }
199423aebdacSJakub Sitnicki 
1995c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
1996c71099acSThomas Graf {
1997b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1998c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
1999adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2000904af04dSJiri Benc 	struct ip_tunnel_info *tun_info;
20014c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
2002e0d56fddSDavid Ahern 		.flowi6_iif = skb->dev->ifindex,
20034c9483b2SDavid S. Miller 		.daddr = iph->daddr,
20044c9483b2SDavid S. Miller 		.saddr = iph->saddr,
20056502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
20064c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
20074c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
2008c71099acSThomas Graf 	};
20095e5d6fedSRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
2010adaa70bbSThomas Graf 
2011904af04dSJiri Benc 	tun_info = skb_tunnel_info(skb);
201246fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2013904af04dSJiri Benc 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
20145e5d6fedSRoopa Prabhu 
20155e5d6fedSRoopa Prabhu 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
20165e5d6fedSRoopa Prabhu 		flkeys = &_flkeys;
20175e5d6fedSRoopa Prabhu 
201823aebdacSJakub Sitnicki 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2019b4bac172SDavid Ahern 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
202006e9d040SJiri Benc 	skb_dst_drop(skb);
2021b75cc8f9SDavid Ahern 	skb_dst_set(skb,
2022b75cc8f9SDavid Ahern 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2023c71099acSThomas Graf }
2024c71099acSThomas Graf 
2025b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net,
2026b75cc8f9SDavid Ahern 					     struct fib6_table *table,
2027b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
2028b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2029b75cc8f9SDavid Ahern 					     int flags)
2030c71099acSThomas Graf {
2031b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2032c71099acSThomas Graf }
2033c71099acSThomas Graf 
20346f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
20356f21c96aSPaolo Abeni 					 struct flowi6 *fl6, int flags)
2036c71099acSThomas Graf {
2037d46a9d67SDavid Ahern 	bool any_src;
2038c71099acSThomas Graf 
20394c1feac5SDavid Ahern 	if (rt6_need_strict(&fl6->daddr)) {
20404c1feac5SDavid Ahern 		struct dst_entry *dst;
20414c1feac5SDavid Ahern 
20424c1feac5SDavid Ahern 		dst = l3mdev_link_scope_lookup(net, fl6);
2043ca254490SDavid Ahern 		if (dst)
2044ca254490SDavid Ahern 			return dst;
20454c1feac5SDavid Ahern 	}
2046ca254490SDavid Ahern 
20471fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
20484dc27d1cSDavid McCullough 
2049d46a9d67SDavid Ahern 	any_src = ipv6_addr_any(&fl6->saddr);
2050741a11d9SDavid Ahern 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2051d46a9d67SDavid Ahern 	    (fl6->flowi6_oif && any_src))
205277d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
2053c71099acSThomas Graf 
2054d46a9d67SDavid Ahern 	if (!any_src)
2055adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
20560c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
20570c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2058adaa70bbSThomas Graf 
2059b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
20601da177e4SLinus Torvalds }
20616f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags);
20621da177e4SLinus Torvalds 
20632774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
206414e50e57SDavid S. Miller {
20655c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
20661dbe3252SWei Wang 	struct net_device *loopback_dev = net->loopback_dev;
206714e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
206814e50e57SDavid S. Miller 
20691dbe3252SWei Wang 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
207062cf27e5SSteffen Klassert 		       DST_OBSOLETE_DEAD, 0);
207114e50e57SDavid S. Miller 	if (rt) {
20720a1f5962SMartin KaFai Lau 		rt6_info_init(rt);
207381eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
20740a1f5962SMartin KaFai Lau 
2075d8d1f30bSChangli Gao 		new = &rt->dst;
207614e50e57SDavid S. Miller 		new->__use = 1;
2077352e512cSHerbert Xu 		new->input = dst_discard;
2078ede2059dSEric W. Biederman 		new->output = dst_discard_out;
207914e50e57SDavid S. Miller 
2080defb3519SDavid S. Miller 		dst_copy_metrics(new, &ort->dst);
208114e50e57SDavid S. Miller 
20821dbe3252SWei Wang 		rt->rt6i_idev = in6_dev_get(loopback_dev);
20834e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
20840a1f5962SMartin KaFai Lau 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
208514e50e57SDavid S. Miller 
208614e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
208714e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
208814e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
208914e50e57SDavid S. Miller #endif
209014e50e57SDavid S. Miller 	}
209114e50e57SDavid S. Miller 
209269ead7afSDavid S. Miller 	dst_release(dst_orig);
209369ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
209414e50e57SDavid S. Miller }
209514e50e57SDavid S. Miller 
20961da177e4SLinus Torvalds /*
20971da177e4SLinus Torvalds  *	Destination cache support functions
20981da177e4SLinus Torvalds  */
20991da177e4SLinus Torvalds 
21008d1c802bSDavid Ahern static bool fib6_check(struct fib6_info *f6i, u32 cookie)
210193531c67SDavid Ahern {
210293531c67SDavid Ahern 	u32 rt_cookie = 0;
210393531c67SDavid Ahern 
210493531c67SDavid Ahern 	if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) ||
210593531c67SDavid Ahern 	     rt_cookie != cookie)
210693531c67SDavid Ahern 		return false;
210793531c67SDavid Ahern 
210893531c67SDavid Ahern 	if (fib6_check_expired(f6i))
210993531c67SDavid Ahern 		return false;
211093531c67SDavid Ahern 
211193531c67SDavid Ahern 	return true;
211293531c67SDavid Ahern }
211393531c67SDavid Ahern 
21143da59bd9SMartin KaFai Lau static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
21153da59bd9SMartin KaFai Lau {
211636143645SSteffen Klassert 	u32 rt_cookie = 0;
2117c5cff856SWei Wang 
211893531c67SDavid Ahern 	if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) ||
211993531c67SDavid Ahern 	    rt_cookie != cookie)
21203da59bd9SMartin KaFai Lau 		return NULL;
21213da59bd9SMartin KaFai Lau 
21223da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
21233da59bd9SMartin KaFai Lau 		return NULL;
21243da59bd9SMartin KaFai Lau 
21253da59bd9SMartin KaFai Lau 	return &rt->dst;
21263da59bd9SMartin KaFai Lau }
21273da59bd9SMartin KaFai Lau 
21283da59bd9SMartin KaFai Lau static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
21293da59bd9SMartin KaFai Lau {
21305973fb1eSMartin KaFai Lau 	if (!__rt6_check_expired(rt) &&
21315973fb1eSMartin KaFai Lau 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
213293531c67SDavid Ahern 	    fib6_check(rt->from, cookie))
21333da59bd9SMartin KaFai Lau 		return &rt->dst;
21343da59bd9SMartin KaFai Lau 	else
21353da59bd9SMartin KaFai Lau 		return NULL;
21363da59bd9SMartin KaFai Lau }
21373da59bd9SMartin KaFai Lau 
21381da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
21391da177e4SLinus Torvalds {
21401da177e4SLinus Torvalds 	struct rt6_info *rt;
21411da177e4SLinus Torvalds 
21421da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
21431da177e4SLinus Torvalds 
21446f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
21456f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
21466f3118b5SNicolas Dichtel 	 * into this function always.
21476f3118b5SNicolas Dichtel 	 */
2148e3bc10bdSHannes Frederic Sowa 
214902bcf4e0SMartin KaFai Lau 	if (rt->rt6i_flags & RTF_PCPU ||
21503a2232e9SDavid Miller 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
21513da59bd9SMartin KaFai Lau 		return rt6_dst_from_check(rt, cookie);
21523da59bd9SMartin KaFai Lau 	else
21533da59bd9SMartin KaFai Lau 		return rt6_check(rt, cookie);
21541da177e4SLinus Torvalds }
21551da177e4SLinus Torvalds 
21561da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
21571da177e4SLinus Torvalds {
21581da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
21591da177e4SLinus Torvalds 
21601da177e4SLinus Torvalds 	if (rt) {
216154c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
216254c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
216393531c67SDavid Ahern 				rt6_remove_exception_rt(rt);
216454c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
21651da177e4SLinus Torvalds 			}
216654c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
216754c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
216854c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
216954c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
217054c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
217154c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
21721da177e4SLinus Torvalds }
21731da177e4SLinus Torvalds 
21741da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
21751da177e4SLinus Torvalds {
21761da177e4SLinus Torvalds 	struct rt6_info *rt;
21771da177e4SLinus Torvalds 
21783ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
21791da177e4SLinus Torvalds 
2180adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
21811da177e4SLinus Torvalds 	if (rt) {
21821eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
2183ad65a2f0SWei Wang 			if (dst_hold_safe(&rt->dst))
218493531c67SDavid Ahern 				rt6_remove_exception_rt(rt);
218593531c67SDavid Ahern 		} else if (rt->from) {
2186c5cff856SWei Wang 			struct fib6_node *fn;
2187c5cff856SWei Wang 
2188c5cff856SWei Wang 			rcu_read_lock();
218993c2fb25SDavid Ahern 			fn = rcu_dereference(rt->from->fib6_node);
2190c5cff856SWei Wang 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2191c5cff856SWei Wang 				fn->fn_sernum = -1;
2192c5cff856SWei Wang 			rcu_read_unlock();
21931da177e4SLinus Torvalds 		}
21941da177e4SLinus Torvalds 	}
21951eb4f758SHannes Frederic Sowa }
21961da177e4SLinus Torvalds 
219745e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
219845e4fd26SMartin KaFai Lau {
219945e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
220045e4fd26SMartin KaFai Lau 
2201d4ead6b3SDavid Ahern 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
220245e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
220345e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
220445e4fd26SMartin KaFai Lau }
220545e4fd26SMartin KaFai Lau 
22060d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
22070d3f6d29SMartin KaFai Lau {
22080d3f6d29SMartin KaFai Lau 	return !(rt->rt6i_flags & RTF_CACHE) &&
220977634cc6SDavid Ahern 		(rt->rt6i_flags & RTF_PCPU || rt->from);
22100d3f6d29SMartin KaFai Lau }
22110d3f6d29SMartin KaFai Lau 
221245e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
221345e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
22141da177e4SLinus Torvalds {
22150dec879fSJulian Anastasov 	const struct in6_addr *daddr, *saddr;
22161da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
22171da177e4SLinus Torvalds 
221845e4fd26SMartin KaFai Lau 	if (rt6->rt6i_flags & RTF_LOCAL)
221945e4fd26SMartin KaFai Lau 		return;
222045e4fd26SMartin KaFai Lau 
222119bda36cSXin Long 	if (dst_metric_locked(dst, RTAX_MTU))
222219bda36cSXin Long 		return;
222319bda36cSXin Long 
222445e4fd26SMartin KaFai Lau 	if (iph) {
222545e4fd26SMartin KaFai Lau 		daddr = &iph->daddr;
222645e4fd26SMartin KaFai Lau 		saddr = &iph->saddr;
222745e4fd26SMartin KaFai Lau 	} else if (sk) {
222845e4fd26SMartin KaFai Lau 		daddr = &sk->sk_v6_daddr;
222945e4fd26SMartin KaFai Lau 		saddr = &inet6_sk(sk)->saddr;
223045e4fd26SMartin KaFai Lau 	} else {
22310dec879fSJulian Anastasov 		daddr = NULL;
22320dec879fSJulian Anastasov 		saddr = NULL;
22331da177e4SLinus Torvalds 	}
22340dec879fSJulian Anastasov 	dst_confirm_neigh(dst, daddr);
22350dec879fSJulian Anastasov 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
22360dec879fSJulian Anastasov 	if (mtu >= dst_mtu(dst))
22370dec879fSJulian Anastasov 		return;
22380dec879fSJulian Anastasov 
22390dec879fSJulian Anastasov 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
22400dec879fSJulian Anastasov 		rt6_do_update_pmtu(rt6, mtu);
22412b760fcfSWei Wang 		/* update rt6_ex->stamp for cache */
22422b760fcfSWei Wang 		if (rt6->rt6i_flags & RTF_CACHE)
22432b760fcfSWei Wang 			rt6_update_exception_stamp_rt(rt6);
22440dec879fSJulian Anastasov 	} else if (daddr) {
22450dec879fSJulian Anastasov 		struct rt6_info *nrt6;
22460dec879fSJulian Anastasov 
2247d4ead6b3SDavid Ahern 		nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
224845e4fd26SMartin KaFai Lau 		if (nrt6) {
224945e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
2250d4ead6b3SDavid Ahern 			if (rt6_insert_exception(nrt6, rt6->from))
22512b760fcfSWei Wang 				dst_release_immediate(&nrt6->dst);
225245e4fd26SMartin KaFai Lau 		}
225345e4fd26SMartin KaFai Lau 	}
225445e4fd26SMartin KaFai Lau }
225545e4fd26SMartin KaFai Lau 
225645e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
225745e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
225845e4fd26SMartin KaFai Lau {
225945e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
22601da177e4SLinus Torvalds }
22611da177e4SLinus Torvalds 
226242ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2263e2d118a1SLorenzo Colitti 		     int oif, u32 mark, kuid_t uid)
226481aded24SDavid S. Miller {
226581aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
226681aded24SDavid S. Miller 	struct dst_entry *dst;
226781aded24SDavid S. Miller 	struct flowi6 fl6;
226881aded24SDavid S. Miller 
226981aded24SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
227081aded24SDavid S. Miller 	fl6.flowi6_oif = oif;
22711b3c61dcSLorenzo Colitti 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
227281aded24SDavid S. Miller 	fl6.daddr = iph->daddr;
227381aded24SDavid S. Miller 	fl6.saddr = iph->saddr;
22746502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
2275e2d118a1SLorenzo Colitti 	fl6.flowi6_uid = uid;
227681aded24SDavid S. Miller 
227781aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
227881aded24SDavid S. Miller 	if (!dst->error)
227945e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
228081aded24SDavid S. Miller 	dst_release(dst);
228181aded24SDavid S. Miller }
228281aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
228381aded24SDavid S. Miller 
228481aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
228581aded24SDavid S. Miller {
228633c162a9SMartin KaFai Lau 	struct dst_entry *dst;
228733c162a9SMartin KaFai Lau 
228881aded24SDavid S. Miller 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2289e2d118a1SLorenzo Colitti 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
229033c162a9SMartin KaFai Lau 
229133c162a9SMartin KaFai Lau 	dst = __sk_dst_get(sk);
229233c162a9SMartin KaFai Lau 	if (!dst || !dst->obsolete ||
229333c162a9SMartin KaFai Lau 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
229433c162a9SMartin KaFai Lau 		return;
229533c162a9SMartin KaFai Lau 
229633c162a9SMartin KaFai Lau 	bh_lock_sock(sk);
229733c162a9SMartin KaFai Lau 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
229833c162a9SMartin KaFai Lau 		ip6_datagram_dst_update(sk, false);
229933c162a9SMartin KaFai Lau 	bh_unlock_sock(sk);
230081aded24SDavid S. Miller }
230181aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
230281aded24SDavid S. Miller 
23037d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
23047d6850f7SAlexey Kodanev 			   const struct flowi6 *fl6)
23057d6850f7SAlexey Kodanev {
23067d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
23077d6850f7SAlexey Kodanev 	struct ipv6_pinfo *np = inet6_sk(sk);
23087d6850f7SAlexey Kodanev #endif
23097d6850f7SAlexey Kodanev 
23107d6850f7SAlexey Kodanev 	ip6_dst_store(sk, dst,
23117d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
23127d6850f7SAlexey Kodanev 		      &sk->sk_v6_daddr : NULL,
23137d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
23147d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
23157d6850f7SAlexey Kodanev 		      &np->saddr :
23167d6850f7SAlexey Kodanev #endif
23177d6850f7SAlexey Kodanev 		      NULL);
23187d6850f7SAlexey Kodanev }
23197d6850f7SAlexey Kodanev 
2320b55b76b2SDuan Jiong /* Handle redirects */
2321b55b76b2SDuan Jiong struct ip6rd_flowi {
2322b55b76b2SDuan Jiong 	struct flowi6 fl6;
2323b55b76b2SDuan Jiong 	struct in6_addr gateway;
2324b55b76b2SDuan Jiong };
2325b55b76b2SDuan Jiong 
2326b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
2327b55b76b2SDuan Jiong 					     struct fib6_table *table,
2328b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
2329b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2330b55b76b2SDuan Jiong 					     int flags)
2331b55b76b2SDuan Jiong {
2332b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
233323fb93a4SDavid Ahern 	struct rt6_info *ret = NULL, *rt_cache;
23348d1c802bSDavid Ahern 	struct fib6_info *rt;
2335b55b76b2SDuan Jiong 	struct fib6_node *fn;
2336b55b76b2SDuan Jiong 
2337b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
233867c408cfSAlexander Alemayhu 	 * check if the redirect has come from appropriate router.
2339b55b76b2SDuan Jiong 	 *
2340b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
2341b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
2342b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
2343b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
2344b55b76b2SDuan Jiong 	 * routes.
2345b55b76b2SDuan Jiong 	 */
2346b55b76b2SDuan Jiong 
234766f5d6ceSWei Wang 	rcu_read_lock();
2348b55b76b2SDuan Jiong 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2349b55b76b2SDuan Jiong restart:
235066f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
23515e670d84SDavid Ahern 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
23528067bb8cSIdo Schimmel 			continue;
235314895687SDavid Ahern 		if (fib6_check_expired(rt))
2354b55b76b2SDuan Jiong 			continue;
235593c2fb25SDavid Ahern 		if (rt->fib6_flags & RTF_REJECT)
2356b55b76b2SDuan Jiong 			break;
235793c2fb25SDavid Ahern 		if (!(rt->fib6_flags & RTF_GATEWAY))
2358b55b76b2SDuan Jiong 			continue;
23595e670d84SDavid Ahern 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2360b55b76b2SDuan Jiong 			continue;
23612b760fcfSWei Wang 		/* rt_cache's gateway might be different from its 'parent'
23622b760fcfSWei Wang 		 * in the case of an ip redirect.
23632b760fcfSWei Wang 		 * So we keep searching in the exception table if the gateway
23642b760fcfSWei Wang 		 * is different.
23652b760fcfSWei Wang 		 */
23665e670d84SDavid Ahern 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
23672b760fcfSWei Wang 			rt_cache = rt6_find_cached_rt(rt,
23682b760fcfSWei Wang 						      &fl6->daddr,
23692b760fcfSWei Wang 						      &fl6->saddr);
23702b760fcfSWei Wang 			if (rt_cache &&
23712b760fcfSWei Wang 			    ipv6_addr_equal(&rdfl->gateway,
23722b760fcfSWei Wang 					    &rt_cache->rt6i_gateway)) {
237323fb93a4SDavid Ahern 				ret = rt_cache;
23742b760fcfSWei Wang 				break;
23752b760fcfSWei Wang 			}
2376b55b76b2SDuan Jiong 			continue;
23772b760fcfSWei Wang 		}
2378b55b76b2SDuan Jiong 		break;
2379b55b76b2SDuan Jiong 	}
2380b55b76b2SDuan Jiong 
2381b55b76b2SDuan Jiong 	if (!rt)
2382421842edSDavid Ahern 		rt = net->ipv6.fib6_null_entry;
238393c2fb25SDavid Ahern 	else if (rt->fib6_flags & RTF_REJECT) {
238423fb93a4SDavid Ahern 		ret = net->ipv6.ip6_null_entry;
2385b0a1ba59SMartin KaFai Lau 		goto out;
2386b0a1ba59SMartin KaFai Lau 	}
2387b0a1ba59SMartin KaFai Lau 
2388421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
2389a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
2390a3c00e46SMartin KaFai Lau 		if (fn)
2391a3c00e46SMartin KaFai Lau 			goto restart;
2392b55b76b2SDuan Jiong 	}
2393a3c00e46SMartin KaFai Lau 
2394b0a1ba59SMartin KaFai Lau out:
239523fb93a4SDavid Ahern 	if (ret)
239623fb93a4SDavid Ahern 		dst_hold(&ret->dst);
239723fb93a4SDavid Ahern 	else
239823fb93a4SDavid Ahern 		ret = ip6_create_rt_rcu(rt);
2399b55b76b2SDuan Jiong 
240066f5d6ceSWei Wang 	rcu_read_unlock();
2401b55b76b2SDuan Jiong 
240223fb93a4SDavid Ahern 	trace_fib6_table_lookup(net, ret, table, fl6);
240323fb93a4SDavid Ahern 	return ret;
2404b55b76b2SDuan Jiong };
2405b55b76b2SDuan Jiong 
2406b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
2407b55b76b2SDuan Jiong 					    const struct flowi6 *fl6,
2408b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
2409b55b76b2SDuan Jiong 					    const struct in6_addr *gateway)
2410b55b76b2SDuan Jiong {
2411b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2412b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
2413b55b76b2SDuan Jiong 
2414b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
2415b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
2416b55b76b2SDuan Jiong 
2417b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2418b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
2419b55b76b2SDuan Jiong }
2420b55b76b2SDuan Jiong 
2421e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2422e2d118a1SLorenzo Colitti 		  kuid_t uid)
24233a5ad2eeSDavid S. Miller {
24243a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
24253a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
24263a5ad2eeSDavid S. Miller 	struct flowi6 fl6;
24273a5ad2eeSDavid S. Miller 
24283a5ad2eeSDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
2429e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
24303a5ad2eeSDavid S. Miller 	fl6.flowi6_oif = oif;
24313a5ad2eeSDavid S. Miller 	fl6.flowi6_mark = mark;
24323a5ad2eeSDavid S. Miller 	fl6.daddr = iph->daddr;
24333a5ad2eeSDavid S. Miller 	fl6.saddr = iph->saddr;
24346502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
2435e2d118a1SLorenzo Colitti 	fl6.flowi6_uid = uid;
24363a5ad2eeSDavid S. Miller 
2437b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
24386700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
24393a5ad2eeSDavid S. Miller 	dst_release(dst);
24403a5ad2eeSDavid S. Miller }
24413a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
24423a5ad2eeSDavid S. Miller 
2443c92a59ecSDuan Jiong void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2444c92a59ecSDuan Jiong 			    u32 mark)
2445c92a59ecSDuan Jiong {
2446c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2447c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2448c92a59ecSDuan Jiong 	struct dst_entry *dst;
2449c92a59ecSDuan Jiong 	struct flowi6 fl6;
2450c92a59ecSDuan Jiong 
2451c92a59ecSDuan Jiong 	memset(&fl6, 0, sizeof(fl6));
2452e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2453c92a59ecSDuan Jiong 	fl6.flowi6_oif = oif;
2454c92a59ecSDuan Jiong 	fl6.flowi6_mark = mark;
2455c92a59ecSDuan Jiong 	fl6.daddr = msg->dest;
2456c92a59ecSDuan Jiong 	fl6.saddr = iph->daddr;
2457e2d118a1SLorenzo Colitti 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2458c92a59ecSDuan Jiong 
2459b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2460c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
2461c92a59ecSDuan Jiong 	dst_release(dst);
2462c92a59ecSDuan Jiong }
2463c92a59ecSDuan Jiong 
24643a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
24653a5ad2eeSDavid S. Miller {
2466e2d118a1SLorenzo Colitti 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2467e2d118a1SLorenzo Colitti 		     sk->sk_uid);
24683a5ad2eeSDavid S. Miller }
24693a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
24703a5ad2eeSDavid S. Miller 
24710dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
24721da177e4SLinus Torvalds {
24730dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
24740dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
24750dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
24760dbaee3bSDavid S. Miller 
24771da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
24781da177e4SLinus Torvalds 
24795578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
24805578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
24811da177e4SLinus Torvalds 
24821da177e4SLinus Torvalds 	/*
24831da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
24841da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
24851da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
24861da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
24871da177e4SLinus Torvalds 	 */
24881da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
24891da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
24901da177e4SLinus Torvalds 	return mtu;
24911da177e4SLinus Torvalds }
24921da177e4SLinus Torvalds 
2493ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
2494d33e4553SDavid S. Miller {
2495d33e4553SDavid S. Miller 	struct inet6_dev *idev;
2496d4ead6b3SDavid Ahern 	unsigned int mtu;
2497618f9bc7SSteffen Klassert 
24984b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
24994b32b5adSMartin KaFai Lau 	if (mtu)
25004b32b5adSMartin KaFai Lau 		goto out;
25014b32b5adSMartin KaFai Lau 
2502618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
2503d33e4553SDavid S. Miller 
2504d33e4553SDavid S. Miller 	rcu_read_lock();
2505d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
2506d33e4553SDavid S. Miller 	if (idev)
2507d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
2508d33e4553SDavid S. Miller 	rcu_read_unlock();
2509d33e4553SDavid S. Miller 
251030f78d8eSEric Dumazet out:
251114972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
251214972cbdSRoopa Prabhu 
251314972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2514d33e4553SDavid S. Miller }
2515d33e4553SDavid S. Miller 
25163b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
251787a11578SDavid S. Miller 				  struct flowi6 *fl6)
25181da177e4SLinus Torvalds {
251987a11578SDavid S. Miller 	struct dst_entry *dst;
25201da177e4SLinus Torvalds 	struct rt6_info *rt;
25211da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
2522c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
25231da177e4SLinus Torvalds 
252438308473SDavid S. Miller 	if (unlikely(!idev))
2525122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
25261da177e4SLinus Torvalds 
2527ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
252838308473SDavid S. Miller 	if (unlikely(!rt)) {
25291da177e4SLinus Torvalds 		in6_dev_put(idev);
253087a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
25311da177e4SLinus Torvalds 		goto out;
25321da177e4SLinus Torvalds 	}
25331da177e4SLinus Torvalds 
25348e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
2535588753f1SBrendan McGrath 	rt->dst.input = ip6_input;
25368e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
2537550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
253887a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
25398e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
25408e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
254114edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
25421da177e4SLinus Torvalds 
25434c981e28SIdo Schimmel 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2544587fea74SWei Wang 	 * do proper release of the net_device
2545587fea74SWei Wang 	 */
2546587fea74SWei Wang 	rt6_uncached_list_add(rt);
254781eb8447SWei Wang 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
25481da177e4SLinus Torvalds 
254987a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
255087a11578SDavid S. Miller 
25511da177e4SLinus Torvalds out:
255287a11578SDavid S. Miller 	return dst;
25531da177e4SLinus Torvalds }
25541da177e4SLinus Torvalds 
2555569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
25561da177e4SLinus Torvalds {
255786393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
25587019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
25597019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
25607019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
25617019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
25627019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2563fc66f95cSEric Dumazet 	int entries;
25641da177e4SLinus Torvalds 
2565fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
256649a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2567fc66f95cSEric Dumazet 	    entries <= rt_max_size)
25681da177e4SLinus Torvalds 		goto out;
25691da177e4SLinus Torvalds 
25706891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
257114956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2572fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
2573fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
25747019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
25751da177e4SLinus Torvalds out:
25767019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2577fc66f95cSEric Dumazet 	return entries > rt_max_size;
25781da177e4SLinus Torvalds }
25791da177e4SLinus Torvalds 
25808d1c802bSDavid Ahern static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
2581d4ead6b3SDavid Ahern 			       struct fib6_config *cfg)
2582e715b6d3SFlorian Westphal {
2583d4ead6b3SDavid Ahern 	int err = 0;
2584e715b6d3SFlorian Westphal 
2585d4ead6b3SDavid Ahern 	if (cfg->fc_mx) {
2586d4ead6b3SDavid Ahern 		rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2587d4ead6b3SDavid Ahern 					   GFP_KERNEL);
2588d4ead6b3SDavid Ahern 		if (unlikely(!rt->fib6_metrics))
2589e715b6d3SFlorian Westphal 			return -ENOMEM;
2590e715b6d3SFlorian Westphal 
2591d4ead6b3SDavid Ahern 		refcount_set(&rt->fib6_metrics->refcnt, 1);
2592ea697639SDaniel Borkmann 
2593d4ead6b3SDavid Ahern 		err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2594d4ead6b3SDavid Ahern 					 rt->fib6_metrics->metrics);
2595e715b6d3SFlorian Westphal 	}
2596e715b6d3SFlorian Westphal 
2597d4ead6b3SDavid Ahern 	return err;
2598e715b6d3SFlorian Westphal }
25991da177e4SLinus Torvalds 
26008c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net,
26018c14586fSDavid Ahern 					    struct fib6_config *cfg,
2602f4797b33SDavid Ahern 					    const struct in6_addr *gw_addr,
2603f4797b33SDavid Ahern 					    u32 tbid, int flags)
26048c14586fSDavid Ahern {
26058c14586fSDavid Ahern 	struct flowi6 fl6 = {
26068c14586fSDavid Ahern 		.flowi6_oif = cfg->fc_ifindex,
26078c14586fSDavid Ahern 		.daddr = *gw_addr,
26088c14586fSDavid Ahern 		.saddr = cfg->fc_prefsrc,
26098c14586fSDavid Ahern 	};
26108c14586fSDavid Ahern 	struct fib6_table *table;
26118c14586fSDavid Ahern 	struct rt6_info *rt;
26128c14586fSDavid Ahern 
2613f4797b33SDavid Ahern 	table = fib6_get_table(net, tbid);
26148c14586fSDavid Ahern 	if (!table)
26158c14586fSDavid Ahern 		return NULL;
26168c14586fSDavid Ahern 
26178c14586fSDavid Ahern 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
26188c14586fSDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
26198c14586fSDavid Ahern 
2620f4797b33SDavid Ahern 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2621b75cc8f9SDavid Ahern 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
26228c14586fSDavid Ahern 
26238c14586fSDavid Ahern 	/* if table lookup failed, fall back to full lookup */
26248c14586fSDavid Ahern 	if (rt == net->ipv6.ip6_null_entry) {
26258c14586fSDavid Ahern 		ip6_rt_put(rt);
26268c14586fSDavid Ahern 		rt = NULL;
26278c14586fSDavid Ahern 	}
26288c14586fSDavid Ahern 
26298c14586fSDavid Ahern 	return rt;
26308c14586fSDavid Ahern }
26318c14586fSDavid Ahern 
2632fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net,
2633fc1e64e1SDavid Ahern 				     struct fib6_config *cfg,
26349fbb704cSDavid Ahern 				     const struct net_device *dev,
2635fc1e64e1SDavid Ahern 				     struct netlink_ext_ack *extack)
2636fc1e64e1SDavid Ahern {
263744750f84SDavid Ahern 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2638fc1e64e1SDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2639fc1e64e1SDavid Ahern 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2640fc1e64e1SDavid Ahern 	struct rt6_info *grt;
2641fc1e64e1SDavid Ahern 	int err;
2642fc1e64e1SDavid Ahern 
2643fc1e64e1SDavid Ahern 	err = 0;
2644fc1e64e1SDavid Ahern 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2645fc1e64e1SDavid Ahern 	if (grt) {
264658e354c0SDavid Ahern 		if (!grt->dst.error &&
264758e354c0SDavid Ahern 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
264844750f84SDavid Ahern 			NL_SET_ERR_MSG(extack,
264944750f84SDavid Ahern 				       "Nexthop has invalid gateway or device mismatch");
2650fc1e64e1SDavid Ahern 			err = -EINVAL;
2651fc1e64e1SDavid Ahern 		}
2652fc1e64e1SDavid Ahern 
2653fc1e64e1SDavid Ahern 		ip6_rt_put(grt);
2654fc1e64e1SDavid Ahern 	}
2655fc1e64e1SDavid Ahern 
2656fc1e64e1SDavid Ahern 	return err;
2657fc1e64e1SDavid Ahern }
2658fc1e64e1SDavid Ahern 
26591edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net,
26601edce99fSDavid Ahern 			      struct fib6_config *cfg,
26611edce99fSDavid Ahern 			      struct net_device **_dev,
26621edce99fSDavid Ahern 			      struct inet6_dev **idev)
26631edce99fSDavid Ahern {
26641edce99fSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
26651edce99fSDavid Ahern 	struct net_device *dev = _dev ? *_dev : NULL;
26661edce99fSDavid Ahern 	struct rt6_info *grt = NULL;
26671edce99fSDavid Ahern 	int err = -EHOSTUNREACH;
26681edce99fSDavid Ahern 
26691edce99fSDavid Ahern 	if (cfg->fc_table) {
2670f4797b33SDavid Ahern 		int flags = RT6_LOOKUP_F_IFACE;
2671f4797b33SDavid Ahern 
2672f4797b33SDavid Ahern 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2673f4797b33SDavid Ahern 					  cfg->fc_table, flags);
26741edce99fSDavid Ahern 		if (grt) {
26751edce99fSDavid Ahern 			if (grt->rt6i_flags & RTF_GATEWAY ||
26761edce99fSDavid Ahern 			    (dev && dev != grt->dst.dev)) {
26771edce99fSDavid Ahern 				ip6_rt_put(grt);
26781edce99fSDavid Ahern 				grt = NULL;
26791edce99fSDavid Ahern 			}
26801edce99fSDavid Ahern 		}
26811edce99fSDavid Ahern 	}
26821edce99fSDavid Ahern 
26831edce99fSDavid Ahern 	if (!grt)
2684b75cc8f9SDavid Ahern 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
26851edce99fSDavid Ahern 
26861edce99fSDavid Ahern 	if (!grt)
26871edce99fSDavid Ahern 		goto out;
26881edce99fSDavid Ahern 
26891edce99fSDavid Ahern 	if (dev) {
26901edce99fSDavid Ahern 		if (dev != grt->dst.dev) {
26911edce99fSDavid Ahern 			ip6_rt_put(grt);
26921edce99fSDavid Ahern 			goto out;
26931edce99fSDavid Ahern 		}
26941edce99fSDavid Ahern 	} else {
26951edce99fSDavid Ahern 		*_dev = dev = grt->dst.dev;
26961edce99fSDavid Ahern 		*idev = grt->rt6i_idev;
26971edce99fSDavid Ahern 		dev_hold(dev);
26981edce99fSDavid Ahern 		in6_dev_hold(grt->rt6i_idev);
26991edce99fSDavid Ahern 	}
27001edce99fSDavid Ahern 
27011edce99fSDavid Ahern 	if (!(grt->rt6i_flags & RTF_GATEWAY))
27021edce99fSDavid Ahern 		err = 0;
27031edce99fSDavid Ahern 
27041edce99fSDavid Ahern 	ip6_rt_put(grt);
27051edce99fSDavid Ahern 
27061edce99fSDavid Ahern out:
27071edce99fSDavid Ahern 	return err;
27081edce99fSDavid Ahern }
27091edce99fSDavid Ahern 
27109fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
27119fbb704cSDavid Ahern 			   struct net_device **_dev, struct inet6_dev **idev,
27129fbb704cSDavid Ahern 			   struct netlink_ext_ack *extack)
27139fbb704cSDavid Ahern {
27149fbb704cSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
27159fbb704cSDavid Ahern 	int gwa_type = ipv6_addr_type(gw_addr);
2716232378e8SDavid Ahern 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
27179fbb704cSDavid Ahern 	const struct net_device *dev = *_dev;
2718232378e8SDavid Ahern 	bool need_addr_check = !dev;
27199fbb704cSDavid Ahern 	int err = -EINVAL;
27209fbb704cSDavid Ahern 
27219fbb704cSDavid Ahern 	/* if gw_addr is local we will fail to detect this in case
27229fbb704cSDavid Ahern 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
27239fbb704cSDavid Ahern 	 * will return already-added prefix route via interface that
27249fbb704cSDavid Ahern 	 * prefix route was assigned to, which might be non-loopback.
27259fbb704cSDavid Ahern 	 */
2726232378e8SDavid Ahern 	if (dev &&
2727232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2728232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
27299fbb704cSDavid Ahern 		goto out;
27309fbb704cSDavid Ahern 	}
27319fbb704cSDavid Ahern 
27329fbb704cSDavid Ahern 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
27339fbb704cSDavid Ahern 		/* IPv6 strictly inhibits using not link-local
27349fbb704cSDavid Ahern 		 * addresses as nexthop address.
27359fbb704cSDavid Ahern 		 * Otherwise, router will not able to send redirects.
27369fbb704cSDavid Ahern 		 * It is very good, but in some (rare!) circumstances
27379fbb704cSDavid Ahern 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
27389fbb704cSDavid Ahern 		 * some exceptions. --ANK
27399fbb704cSDavid Ahern 		 * We allow IPv4-mapped nexthops to support RFC4798-type
27409fbb704cSDavid Ahern 		 * addressing
27419fbb704cSDavid Ahern 		 */
27429fbb704cSDavid Ahern 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
27439fbb704cSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
27449fbb704cSDavid Ahern 			goto out;
27459fbb704cSDavid Ahern 		}
27469fbb704cSDavid Ahern 
27479fbb704cSDavid Ahern 		if (cfg->fc_flags & RTNH_F_ONLINK)
27489fbb704cSDavid Ahern 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
27499fbb704cSDavid Ahern 		else
27509fbb704cSDavid Ahern 			err = ip6_route_check_nh(net, cfg, _dev, idev);
27519fbb704cSDavid Ahern 
27529fbb704cSDavid Ahern 		if (err)
27539fbb704cSDavid Ahern 			goto out;
27549fbb704cSDavid Ahern 	}
27559fbb704cSDavid Ahern 
27569fbb704cSDavid Ahern 	/* reload in case device was changed */
27579fbb704cSDavid Ahern 	dev = *_dev;
27589fbb704cSDavid Ahern 
27599fbb704cSDavid Ahern 	err = -EINVAL;
27609fbb704cSDavid Ahern 	if (!dev) {
27619fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack, "Egress device not specified");
27629fbb704cSDavid Ahern 		goto out;
27639fbb704cSDavid Ahern 	} else if (dev->flags & IFF_LOOPBACK) {
27649fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack,
27659fbb704cSDavid Ahern 			       "Egress device can not be loopback device for this route");
27669fbb704cSDavid Ahern 		goto out;
27679fbb704cSDavid Ahern 	}
2768232378e8SDavid Ahern 
2769232378e8SDavid Ahern 	/* if we did not check gw_addr above, do so now that the
2770232378e8SDavid Ahern 	 * egress device has been resolved.
2771232378e8SDavid Ahern 	 */
2772232378e8SDavid Ahern 	if (need_addr_check &&
2773232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2774232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2775232378e8SDavid Ahern 		goto out;
2776232378e8SDavid Ahern 	}
2777232378e8SDavid Ahern 
27789fbb704cSDavid Ahern 	err = 0;
27799fbb704cSDavid Ahern out:
27809fbb704cSDavid Ahern 	return err;
27819fbb704cSDavid Ahern }
27829fbb704cSDavid Ahern 
27838d1c802bSDavid Ahern static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
2784acb54e3cSDavid Ahern 					      gfp_t gfp_flags,
2785333c4301SDavid Ahern 					      struct netlink_ext_ack *extack)
27861da177e4SLinus Torvalds {
27875578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
27888d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
27891da177e4SLinus Torvalds 	struct net_device *dev = NULL;
27901da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
2791c71099acSThomas Graf 	struct fib6_table *table;
27921da177e4SLinus Torvalds 	int addr_type;
27938c5b83f0SRoopa Prabhu 	int err = -EINVAL;
27941da177e4SLinus Torvalds 
2795557c44beSDavid Ahern 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2796d5d531cbSDavid Ahern 	if (cfg->fc_flags & RTF_PCPU) {
2797d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2798557c44beSDavid Ahern 		goto out;
2799d5d531cbSDavid Ahern 	}
2800557c44beSDavid Ahern 
28012ea2352eSWei Wang 	/* RTF_CACHE is an internal flag; can not be set by userspace */
28022ea2352eSWei Wang 	if (cfg->fc_flags & RTF_CACHE) {
28032ea2352eSWei Wang 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
28042ea2352eSWei Wang 		goto out;
28052ea2352eSWei Wang 	}
28062ea2352eSWei Wang 
2807e8478e80SDavid Ahern 	if (cfg->fc_type > RTN_MAX) {
2808e8478e80SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid route type");
2809e8478e80SDavid Ahern 		goto out;
2810e8478e80SDavid Ahern 	}
2811e8478e80SDavid Ahern 
2812d5d531cbSDavid Ahern 	if (cfg->fc_dst_len > 128) {
2813d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
28148c5b83f0SRoopa Prabhu 		goto out;
2815d5d531cbSDavid Ahern 	}
2816d5d531cbSDavid Ahern 	if (cfg->fc_src_len > 128) {
2817d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2818d5d531cbSDavid Ahern 		goto out;
2819d5d531cbSDavid Ahern 	}
28201da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
2821d5d531cbSDavid Ahern 	if (cfg->fc_src_len) {
2822d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack,
2823d5d531cbSDavid Ahern 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
28248c5b83f0SRoopa Prabhu 		goto out;
2825d5d531cbSDavid Ahern 	}
28261da177e4SLinus Torvalds #endif
282786872cb5SThomas Graf 	if (cfg->fc_ifindex) {
28281da177e4SLinus Torvalds 		err = -ENODEV;
28295578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
28301da177e4SLinus Torvalds 		if (!dev)
28311da177e4SLinus Torvalds 			goto out;
28321da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
28331da177e4SLinus Torvalds 		if (!idev)
28341da177e4SLinus Torvalds 			goto out;
28351da177e4SLinus Torvalds 	}
28361da177e4SLinus Torvalds 
283786872cb5SThomas Graf 	if (cfg->fc_metric == 0)
283886872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
28391da177e4SLinus Torvalds 
2840fc1e64e1SDavid Ahern 	if (cfg->fc_flags & RTNH_F_ONLINK) {
2841fc1e64e1SDavid Ahern 		if (!dev) {
2842fc1e64e1SDavid Ahern 			NL_SET_ERR_MSG(extack,
2843fc1e64e1SDavid Ahern 				       "Nexthop device required for onlink");
2844fc1e64e1SDavid Ahern 			err = -ENODEV;
2845fc1e64e1SDavid Ahern 			goto out;
2846fc1e64e1SDavid Ahern 		}
2847fc1e64e1SDavid Ahern 
2848fc1e64e1SDavid Ahern 		if (!(dev->flags & IFF_UP)) {
2849fc1e64e1SDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2850fc1e64e1SDavid Ahern 			err = -ENETDOWN;
2851fc1e64e1SDavid Ahern 			goto out;
2852fc1e64e1SDavid Ahern 		}
2853fc1e64e1SDavid Ahern 	}
2854fc1e64e1SDavid Ahern 
2855c71099acSThomas Graf 	err = -ENOBUFS;
285638308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
2857d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2858d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
285938308473SDavid S. Miller 		if (!table) {
2860f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2861d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
2862d71314b4SMatti Vaittinen 		}
2863d71314b4SMatti Vaittinen 	} else {
2864d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
2865d71314b4SMatti Vaittinen 	}
286638308473SDavid S. Miller 
286738308473SDavid S. Miller 	if (!table)
2868c71099acSThomas Graf 		goto out;
2869c71099acSThomas Graf 
28701da177e4SLinus Torvalds 	err = -ENOMEM;
287193531c67SDavid Ahern 	rt = fib6_info_alloc(gfp_flags);
287293531c67SDavid Ahern 	if (!rt)
28731da177e4SLinus Torvalds 		goto out;
287493531c67SDavid Ahern 
287593531c67SDavid Ahern 	if (cfg->fc_flags & RTF_ADDRCONF)
287693531c67SDavid Ahern 		rt->dst_nocount = true;
28771da177e4SLinus Torvalds 
2878d4ead6b3SDavid Ahern 	err = ip6_convert_metrics(net, rt, cfg);
2879d4ead6b3SDavid Ahern 	if (err < 0)
2880d4ead6b3SDavid Ahern 		goto out;
2881d4ead6b3SDavid Ahern 
28821716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
288314895687SDavid Ahern 		fib6_set_expires(rt, jiffies +
28841716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
28851716a961SGao feng 	else
288614895687SDavid Ahern 		fib6_clean_expires(rt);
28871da177e4SLinus Torvalds 
288886872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
288986872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
289093c2fb25SDavid Ahern 	rt->fib6_protocol = cfg->fc_protocol;
289186872cb5SThomas Graf 
289286872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
28931da177e4SLinus Torvalds 
289419e42e45SRoopa Prabhu 	if (cfg->fc_encap) {
289519e42e45SRoopa Prabhu 		struct lwtunnel_state *lwtstate;
289619e42e45SRoopa Prabhu 
289730357d7dSDavid Ahern 		err = lwtunnel_build_state(cfg->fc_encap_type,
2898127eb7cdSTom Herbert 					   cfg->fc_encap, AF_INET6, cfg,
28999ae28727SDavid Ahern 					   &lwtstate, extack);
290019e42e45SRoopa Prabhu 		if (err)
290119e42e45SRoopa Prabhu 			goto out;
29025e670d84SDavid Ahern 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
290325368623STom Herbert 	}
290419e42e45SRoopa Prabhu 
290593c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
290693c2fb25SDavid Ahern 	rt->fib6_dst.plen = cfg->fc_dst_len;
290793c2fb25SDavid Ahern 	if (rt->fib6_dst.plen == 128)
29083b6761d1SDavid Ahern 		rt->dst_host = true;
29091da177e4SLinus Torvalds 
29101da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
291193c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
291293c2fb25SDavid Ahern 	rt->fib6_src.plen = cfg->fc_src_len;
29131da177e4SLinus Torvalds #endif
29141da177e4SLinus Torvalds 
291593c2fb25SDavid Ahern 	rt->fib6_metric = cfg->fc_metric;
29165e670d84SDavid Ahern 	rt->fib6_nh.nh_weight = 1;
29171da177e4SLinus Torvalds 
2918e8478e80SDavid Ahern 	rt->fib6_type = cfg->fc_type;
2919e8478e80SDavid Ahern 
29201da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
29211da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
29221da177e4SLinus Torvalds 	 */
292386872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
292438308473SDavid S. Miller 	    (dev && (dev->flags & IFF_LOOPBACK) &&
292538308473SDavid S. Miller 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
292638308473SDavid S. Miller 	     !(cfg->fc_flags & RTF_LOCAL))) {
29271da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
29285578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
29291da177e4SLinus Torvalds 			if (dev) {
29301da177e4SLinus Torvalds 				dev_put(dev);
29311da177e4SLinus Torvalds 				in6_dev_put(idev);
29321da177e4SLinus Torvalds 			}
29335578689aSDaniel Lezcano 			dev = net->loopback_dev;
29341da177e4SLinus Torvalds 			dev_hold(dev);
29351da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
29361da177e4SLinus Torvalds 			if (!idev) {
29371da177e4SLinus Torvalds 				err = -ENODEV;
29381da177e4SLinus Torvalds 				goto out;
29391da177e4SLinus Torvalds 			}
29401da177e4SLinus Torvalds 		}
294193c2fb25SDavid Ahern 		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
29421da177e4SLinus Torvalds 		goto install_route;
29431da177e4SLinus Torvalds 	}
29441da177e4SLinus Torvalds 
294586872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
29469fbb704cSDavid Ahern 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
29471da177e4SLinus Torvalds 		if (err)
29481da177e4SLinus Torvalds 			goto out;
29499fbb704cSDavid Ahern 
295093531c67SDavid Ahern 		rt->fib6_nh.nh_gw = cfg->fc_gateway;
29511da177e4SLinus Torvalds 	}
29521da177e4SLinus Torvalds 
29531da177e4SLinus Torvalds 	err = -ENODEV;
295438308473SDavid S. Miller 	if (!dev)
29551da177e4SLinus Torvalds 		goto out;
29561da177e4SLinus Torvalds 
2957428604fbSLorenzo Bianconi 	if (idev->cnf.disable_ipv6) {
2958428604fbSLorenzo Bianconi 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2959428604fbSLorenzo Bianconi 		err = -EACCES;
2960428604fbSLorenzo Bianconi 		goto out;
2961428604fbSLorenzo Bianconi 	}
2962428604fbSLorenzo Bianconi 
2963955ec4cbSDavid Ahern 	if (!(dev->flags & IFF_UP)) {
2964955ec4cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2965955ec4cbSDavid Ahern 		err = -ENETDOWN;
2966955ec4cbSDavid Ahern 		goto out;
2967955ec4cbSDavid Ahern 	}
2968955ec4cbSDavid Ahern 
2969c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2970c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2971d5d531cbSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid source address");
2972c3968a85SDaniel Walter 			err = -EINVAL;
2973c3968a85SDaniel Walter 			goto out;
2974c3968a85SDaniel Walter 		}
297593c2fb25SDavid Ahern 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
297693c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 128;
2977c3968a85SDaniel Walter 	} else
297893c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
2979c3968a85SDaniel Walter 
298093c2fb25SDavid Ahern 	rt->fib6_flags = cfg->fc_flags;
29811da177e4SLinus Torvalds 
29821da177e4SLinus Torvalds install_route:
298393c2fb25SDavid Ahern 	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
29845609b80aSIdo Schimmel 	    !netif_carrier_ok(dev))
29855e670d84SDavid Ahern 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
29865e670d84SDavid Ahern 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
298793531c67SDavid Ahern 	rt->fib6_nh.nh_dev = dev;
298893c2fb25SDavid Ahern 	rt->fib6_idev = idev;
298993c2fb25SDavid Ahern 	rt->fib6_table = table;
299063152fc0SDaniel Lezcano 
2991c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
299263152fc0SDaniel Lezcano 
29938c5b83f0SRoopa Prabhu 	return rt;
29941da177e4SLinus Torvalds out:
29951da177e4SLinus Torvalds 	if (dev)
29961da177e4SLinus Torvalds 		dev_put(dev);
29971da177e4SLinus Torvalds 	if (idev)
29981da177e4SLinus Torvalds 		in6_dev_put(idev);
29996b9ea5a6SRoopa Prabhu 
300093531c67SDavid Ahern 	fib6_info_release(rt);
30018c5b83f0SRoopa Prabhu 	return ERR_PTR(err);
30026b9ea5a6SRoopa Prabhu }
30036b9ea5a6SRoopa Prabhu 
3004acb54e3cSDavid Ahern int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3005acb54e3cSDavid Ahern 		  struct netlink_ext_ack *extack)
30066b9ea5a6SRoopa Prabhu {
30078d1c802bSDavid Ahern 	struct fib6_info *rt;
30086b9ea5a6SRoopa Prabhu 	int err;
30096b9ea5a6SRoopa Prabhu 
3010acb54e3cSDavid Ahern 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3011d4ead6b3SDavid Ahern 	if (IS_ERR(rt))
3012d4ead6b3SDavid Ahern 		return PTR_ERR(rt);
30136b9ea5a6SRoopa Prabhu 
3014d4ead6b3SDavid Ahern 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
301593531c67SDavid Ahern 	fib6_info_release(rt);
30166b9ea5a6SRoopa Prabhu 
30171da177e4SLinus Torvalds 	return err;
30181da177e4SLinus Torvalds }
30191da177e4SLinus Torvalds 
30208d1c802bSDavid Ahern static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
30211da177e4SLinus Torvalds {
3022afb1d4b5SDavid Ahern 	struct net *net = info->nl_net;
3023c71099acSThomas Graf 	struct fib6_table *table;
3024afb1d4b5SDavid Ahern 	int err;
30251da177e4SLinus Torvalds 
3026421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
30276825a26cSGao feng 		err = -ENOENT;
30286825a26cSGao feng 		goto out;
30296825a26cSGao feng 	}
30306c813a72SPatrick McHardy 
303193c2fb25SDavid Ahern 	table = rt->fib6_table;
303266f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
303386872cb5SThomas Graf 	err = fib6_del(rt, info);
303466f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
30351da177e4SLinus Torvalds 
30366825a26cSGao feng out:
303793531c67SDavid Ahern 	fib6_info_release(rt);
30381da177e4SLinus Torvalds 	return err;
30391da177e4SLinus Torvalds }
30401da177e4SLinus Torvalds 
30418d1c802bSDavid Ahern int ip6_del_rt(struct net *net, struct fib6_info *rt)
3042e0a1ad73SThomas Graf {
3043afb1d4b5SDavid Ahern 	struct nl_info info = { .nl_net = net };
3044afb1d4b5SDavid Ahern 
3045528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
3046e0a1ad73SThomas Graf }
3047e0a1ad73SThomas Graf 
30488d1c802bSDavid Ahern static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
30490ae81335SDavid Ahern {
30500ae81335SDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
3051e3330039SWANG Cong 	struct net *net = info->nl_net;
305216a16cd3SDavid Ahern 	struct sk_buff *skb = NULL;
30530ae81335SDavid Ahern 	struct fib6_table *table;
3054e3330039SWANG Cong 	int err = -ENOENT;
30550ae81335SDavid Ahern 
3056421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
3057e3330039SWANG Cong 		goto out_put;
305893c2fb25SDavid Ahern 	table = rt->fib6_table;
305966f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
30600ae81335SDavid Ahern 
306193c2fb25SDavid Ahern 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
30628d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
30630ae81335SDavid Ahern 
306416a16cd3SDavid Ahern 		/* prefer to send a single notification with all hops */
306516a16cd3SDavid Ahern 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
306616a16cd3SDavid Ahern 		if (skb) {
306716a16cd3SDavid Ahern 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
306816a16cd3SDavid Ahern 
3069d4ead6b3SDavid Ahern 			if (rt6_fill_node(net, skb, rt, NULL,
307016a16cd3SDavid Ahern 					  NULL, NULL, 0, RTM_DELROUTE,
307116a16cd3SDavid Ahern 					  info->portid, seq, 0) < 0) {
307216a16cd3SDavid Ahern 				kfree_skb(skb);
307316a16cd3SDavid Ahern 				skb = NULL;
307416a16cd3SDavid Ahern 			} else
307516a16cd3SDavid Ahern 				info->skip_notify = 1;
307616a16cd3SDavid Ahern 		}
307716a16cd3SDavid Ahern 
30780ae81335SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
307993c2fb25SDavid Ahern 					 &rt->fib6_siblings,
308093c2fb25SDavid Ahern 					 fib6_siblings) {
30810ae81335SDavid Ahern 			err = fib6_del(sibling, info);
30820ae81335SDavid Ahern 			if (err)
3083e3330039SWANG Cong 				goto out_unlock;
30840ae81335SDavid Ahern 		}
30850ae81335SDavid Ahern 	}
30860ae81335SDavid Ahern 
30870ae81335SDavid Ahern 	err = fib6_del(rt, info);
3088e3330039SWANG Cong out_unlock:
308966f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
3090e3330039SWANG Cong out_put:
309193531c67SDavid Ahern 	fib6_info_release(rt);
309216a16cd3SDavid Ahern 
309316a16cd3SDavid Ahern 	if (skb) {
3094e3330039SWANG Cong 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
309516a16cd3SDavid Ahern 			    info->nlh, gfp_any());
309616a16cd3SDavid Ahern 	}
30970ae81335SDavid Ahern 	return err;
30980ae81335SDavid Ahern }
30990ae81335SDavid Ahern 
310023fb93a4SDavid Ahern static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
310123fb93a4SDavid Ahern {
310223fb93a4SDavid Ahern 	int rc = -ESRCH;
310323fb93a4SDavid Ahern 
310423fb93a4SDavid Ahern 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
310523fb93a4SDavid Ahern 		goto out;
310623fb93a4SDavid Ahern 
310723fb93a4SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY &&
310823fb93a4SDavid Ahern 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
310923fb93a4SDavid Ahern 		goto out;
311023fb93a4SDavid Ahern 	if (dst_hold_safe(&rt->dst))
311123fb93a4SDavid Ahern 		rc = rt6_remove_exception_rt(rt);
311223fb93a4SDavid Ahern out:
311323fb93a4SDavid Ahern 	return rc;
311423fb93a4SDavid Ahern }
311523fb93a4SDavid Ahern 
3116333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg,
3117333c4301SDavid Ahern 			 struct netlink_ext_ack *extack)
31181da177e4SLinus Torvalds {
31198d1c802bSDavid Ahern 	struct rt6_info *rt_cache;
3120c71099acSThomas Graf 	struct fib6_table *table;
31218d1c802bSDavid Ahern 	struct fib6_info *rt;
31221da177e4SLinus Torvalds 	struct fib6_node *fn;
31231da177e4SLinus Torvalds 	int err = -ESRCH;
31241da177e4SLinus Torvalds 
31255578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3126d5d531cbSDavid Ahern 	if (!table) {
3127d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3128c71099acSThomas Graf 		return err;
3129d5d531cbSDavid Ahern 	}
31301da177e4SLinus Torvalds 
313166f5d6ceSWei Wang 	rcu_read_lock();
3132c71099acSThomas Graf 
3133c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
313486872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
313538fbeeeeSWei Wang 			 &cfg->fc_src, cfg->fc_src_len,
31362b760fcfSWei Wang 			 !(cfg->fc_flags & RTF_CACHE));
31371da177e4SLinus Torvalds 
31381da177e4SLinus Torvalds 	if (fn) {
313966f5d6ceSWei Wang 		for_each_fib6_node_rt_rcu(fn) {
31402b760fcfSWei Wang 			if (cfg->fc_flags & RTF_CACHE) {
314123fb93a4SDavid Ahern 				int rc;
314223fb93a4SDavid Ahern 
31432b760fcfSWei Wang 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
31442b760fcfSWei Wang 							      &cfg->fc_src);
314523fb93a4SDavid Ahern 				if (rt_cache) {
314623fb93a4SDavid Ahern 					rc = ip6_del_cached_rt(rt_cache, cfg);
314723fb93a4SDavid Ahern 					if (rc != -ESRCH)
314823fb93a4SDavid Ahern 						return rc;
314923fb93a4SDavid Ahern 				}
31501f56a01fSMartin KaFai Lau 				continue;
31512b760fcfSWei Wang 			}
315286872cb5SThomas Graf 			if (cfg->fc_ifindex &&
31535e670d84SDavid Ahern 			    (!rt->fib6_nh.nh_dev ||
31545e670d84SDavid Ahern 			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
31551da177e4SLinus Torvalds 				continue;
315686872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
31575e670d84SDavid Ahern 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
31581da177e4SLinus Torvalds 				continue;
315993c2fb25SDavid Ahern 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
31601da177e4SLinus Torvalds 				continue;
316193c2fb25SDavid Ahern 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3162c2ed1880SMantas M 				continue;
316393531c67SDavid Ahern 			fib6_info_hold(rt);
316466f5d6ceSWei Wang 			rcu_read_unlock();
31651da177e4SLinus Torvalds 
31660ae81335SDavid Ahern 			/* if gateway was specified only delete the one hop */
31670ae81335SDavid Ahern 			if (cfg->fc_flags & RTF_GATEWAY)
316886872cb5SThomas Graf 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
31690ae81335SDavid Ahern 
31700ae81335SDavid Ahern 			return __ip6_del_rt_siblings(rt, cfg);
31711da177e4SLinus Torvalds 		}
31721da177e4SLinus Torvalds 	}
317366f5d6ceSWei Wang 	rcu_read_unlock();
31741da177e4SLinus Torvalds 
31751da177e4SLinus Torvalds 	return err;
31761da177e4SLinus Torvalds }
31771da177e4SLinus Torvalds 
31786700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3179a6279458SYOSHIFUJI Hideaki {
3180a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
3181e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
3182e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
3183e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
3184e8599ff4SDavid S. Miller 	struct neighbour *neigh;
318571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
31866e157b6aSDavid S. Miller 	int optlen, on_link;
31876e157b6aSDavid S. Miller 	u8 *lladdr;
3188e8599ff4SDavid S. Miller 
318929a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
319071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
3191e8599ff4SDavid S. Miller 
3192e8599ff4SDavid S. Miller 	if (optlen < 0) {
31936e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3194e8599ff4SDavid S. Miller 		return;
3195e8599ff4SDavid S. Miller 	}
3196e8599ff4SDavid S. Miller 
319771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
3198e8599ff4SDavid S. Miller 
319971bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
32006e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3201e8599ff4SDavid S. Miller 		return;
3202e8599ff4SDavid S. Miller 	}
3203e8599ff4SDavid S. Miller 
32046e157b6aSDavid S. Miller 	on_link = 0;
320571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3206e8599ff4SDavid S. Miller 		on_link = 1;
320771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
3208e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
32096e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3210e8599ff4SDavid S. Miller 		return;
3211e8599ff4SDavid S. Miller 	}
3212e8599ff4SDavid S. Miller 
3213e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
3214e8599ff4SDavid S. Miller 	if (!in6_dev)
3215e8599ff4SDavid S. Miller 		return;
3216e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3217e8599ff4SDavid S. Miller 		return;
3218e8599ff4SDavid S. Miller 
3219e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
3220e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
3221e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
3222e8599ff4SDavid S. Miller 	 */
3223e8599ff4SDavid S. Miller 
3224f997c55cSAlexander Aring 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3225e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3226e8599ff4SDavid S. Miller 		return;
3227e8599ff4SDavid S. Miller 	}
32286e157b6aSDavid S. Miller 
32296e157b6aSDavid S. Miller 	lladdr = NULL;
3230e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
3231e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3232e8599ff4SDavid S. Miller 					     skb->dev);
3233e8599ff4SDavid S. Miller 		if (!lladdr) {
3234e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3235e8599ff4SDavid S. Miller 			return;
3236e8599ff4SDavid S. Miller 		}
3237e8599ff4SDavid S. Miller 	}
3238e8599ff4SDavid S. Miller 
32396e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
3240ec13ad1dSMatthias Schiffer 	if (rt->rt6i_flags & RTF_REJECT) {
32416e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
32426e157b6aSDavid S. Miller 		return;
32436e157b6aSDavid S. Miller 	}
32446e157b6aSDavid S. Miller 
32456e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
32466e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
32476e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
32486e157b6aSDavid S. Miller 	 */
32490dec879fSJulian Anastasov 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
32506e157b6aSDavid S. Miller 
325171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3252e8599ff4SDavid S. Miller 	if (!neigh)
3253e8599ff4SDavid S. Miller 		return;
3254e8599ff4SDavid S. Miller 
32551da177e4SLinus Torvalds 	/*
32561da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
32571da177e4SLinus Torvalds 	 */
32581da177e4SLinus Torvalds 
3259f997c55cSAlexander Aring 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
32601da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
32611da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
32621da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3263f997c55cSAlexander Aring 				     NEIGH_UPDATE_F_ISROUTER)),
3264f997c55cSAlexander Aring 		     NDISC_REDIRECT, &ndopts);
32651da177e4SLinus Torvalds 
326623fb93a4SDavid Ahern 	nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL);
326738308473SDavid S. Miller 	if (!nrt)
32681da177e4SLinus Torvalds 		goto out;
32691da177e4SLinus Torvalds 
32701da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
32711da177e4SLinus Torvalds 	if (on_link)
32721da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
32731da177e4SLinus Torvalds 
32744e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
32751da177e4SLinus Torvalds 
32762b760fcfSWei Wang 	/* No need to remove rt from the exception table if rt is
32772b760fcfSWei Wang 	 * a cached route because rt6_insert_exception() will
32782b760fcfSWei Wang 	 * takes care of it
32792b760fcfSWei Wang 	 */
3280d4ead6b3SDavid Ahern 	if (rt6_insert_exception(nrt, rt->from)) {
32812b760fcfSWei Wang 		dst_release_immediate(&nrt->dst);
32822b760fcfSWei Wang 		goto out;
32832b760fcfSWei Wang 	}
32841da177e4SLinus Torvalds 
3285d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
3286d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
328771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
328860592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
32898d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
32908d71740cSTom Tucker 
32911da177e4SLinus Torvalds out:
3292e8599ff4SDavid S. Miller 	neigh_release(neigh);
32936e157b6aSDavid S. Miller }
32946e157b6aSDavid S. Miller 
329570ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
32968d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
3297b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3298830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3299830218c1SDavid Ahern 					   struct net_device *dev)
330070ceb4f5SYOSHIFUJI Hideaki {
3301830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3302830218c1SDavid Ahern 	int ifindex = dev->ifindex;
330370ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
33048d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3305c71099acSThomas Graf 	struct fib6_table *table;
330670ceb4f5SYOSHIFUJI Hideaki 
3307830218c1SDavid Ahern 	table = fib6_get_table(net, tb_id);
330838308473SDavid S. Miller 	if (!table)
3309c71099acSThomas Graf 		return NULL;
3310c71099acSThomas Graf 
331166f5d6ceSWei Wang 	rcu_read_lock();
331238fbeeeeSWei Wang 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
331370ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
331470ceb4f5SYOSHIFUJI Hideaki 		goto out;
331570ceb4f5SYOSHIFUJI Hideaki 
331666f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
33175e670d84SDavid Ahern 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
331870ceb4f5SYOSHIFUJI Hideaki 			continue;
331993c2fb25SDavid Ahern 		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
332070ceb4f5SYOSHIFUJI Hideaki 			continue;
33215e670d84SDavid Ahern 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
332270ceb4f5SYOSHIFUJI Hideaki 			continue;
33238d1c802bSDavid Ahern 		fib6_info_hold(rt);
332470ceb4f5SYOSHIFUJI Hideaki 		break;
332570ceb4f5SYOSHIFUJI Hideaki 	}
332670ceb4f5SYOSHIFUJI Hideaki out:
332766f5d6ceSWei Wang 	rcu_read_unlock();
332870ceb4f5SYOSHIFUJI Hideaki 	return rt;
332970ceb4f5SYOSHIFUJI Hideaki }
333070ceb4f5SYOSHIFUJI Hideaki 
33318d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
3332b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3333830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3334830218c1SDavid Ahern 					   struct net_device *dev,
333595c96174SEric Dumazet 					   unsigned int pref)
333670ceb4f5SYOSHIFUJI Hideaki {
333786872cb5SThomas Graf 	struct fib6_config cfg = {
3338238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
3339830218c1SDavid Ahern 		.fc_ifindex	= dev->ifindex,
334086872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
334186872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
334286872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
3343b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3344e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
334515e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
3346efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3347efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
334886872cb5SThomas Graf 	};
334970ceb4f5SYOSHIFUJI Hideaki 
3350830218c1SDavid Ahern 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
33514e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
33524e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
335386872cb5SThomas Graf 
3354e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
3355e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
335686872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
335770ceb4f5SYOSHIFUJI Hideaki 
3358acb54e3cSDavid Ahern 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
335970ceb4f5SYOSHIFUJI Hideaki 
3360830218c1SDavid Ahern 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
336170ceb4f5SYOSHIFUJI Hideaki }
336270ceb4f5SYOSHIFUJI Hideaki #endif
336370ceb4f5SYOSHIFUJI Hideaki 
33648d1c802bSDavid Ahern struct fib6_info *rt6_get_dflt_router(struct net *net,
3365afb1d4b5SDavid Ahern 				     const struct in6_addr *addr,
3366afb1d4b5SDavid Ahern 				     struct net_device *dev)
33671da177e4SLinus Torvalds {
3368830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
33698d1c802bSDavid Ahern 	struct fib6_info *rt;
3370c71099acSThomas Graf 	struct fib6_table *table;
33711da177e4SLinus Torvalds 
3372afb1d4b5SDavid Ahern 	table = fib6_get_table(net, tb_id);
337338308473SDavid S. Miller 	if (!table)
3374c71099acSThomas Graf 		return NULL;
33751da177e4SLinus Torvalds 
337666f5d6ceSWei Wang 	rcu_read_lock();
337766f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
33785e670d84SDavid Ahern 		if (dev == rt->fib6_nh.nh_dev &&
337993c2fb25SDavid Ahern 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
33805e670d84SDavid Ahern 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
33811da177e4SLinus Torvalds 			break;
33821da177e4SLinus Torvalds 	}
33831da177e4SLinus Torvalds 	if (rt)
33848d1c802bSDavid Ahern 		fib6_info_hold(rt);
338566f5d6ceSWei Wang 	rcu_read_unlock();
33861da177e4SLinus Torvalds 	return rt;
33871da177e4SLinus Torvalds }
33881da177e4SLinus Torvalds 
33898d1c802bSDavid Ahern struct fib6_info *rt6_add_dflt_router(struct net *net,
3390afb1d4b5SDavid Ahern 				     const struct in6_addr *gwaddr,
3391ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
3392ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
33931da177e4SLinus Torvalds {
339486872cb5SThomas Graf 	struct fib6_config cfg = {
3395ca254490SDavid Ahern 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3396238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
339786872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
339886872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
339986872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3400b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3401e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
340215e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
34035578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3404afb1d4b5SDavid Ahern 		.fc_nlinfo.nl_net = net,
340586872cb5SThomas Graf 	};
34061da177e4SLinus Torvalds 
34074e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
34081da177e4SLinus Torvalds 
3409acb54e3cSDavid Ahern 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3410830218c1SDavid Ahern 		struct fib6_table *table;
3411830218c1SDavid Ahern 
3412830218c1SDavid Ahern 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3413830218c1SDavid Ahern 		if (table)
3414830218c1SDavid Ahern 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3415830218c1SDavid Ahern 	}
34161da177e4SLinus Torvalds 
3417afb1d4b5SDavid Ahern 	return rt6_get_dflt_router(net, gwaddr, dev);
34181da177e4SLinus Torvalds }
34191da177e4SLinus Torvalds 
3420afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net,
3421afb1d4b5SDavid Ahern 				     struct fib6_table *table)
34221da177e4SLinus Torvalds {
34238d1c802bSDavid Ahern 	struct fib6_info *rt;
34241da177e4SLinus Torvalds 
34251da177e4SLinus Torvalds restart:
342666f5d6ceSWei Wang 	rcu_read_lock();
342766f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
342893c2fb25SDavid Ahern 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
342993c2fb25SDavid Ahern 		    (!rt->fib6_idev || rt->fib6_idev->cnf.accept_ra != 2)) {
343093531c67SDavid Ahern 			fib6_info_hold(rt);
343166f5d6ceSWei Wang 			rcu_read_unlock();
3432afb1d4b5SDavid Ahern 			ip6_del_rt(net, rt);
34331da177e4SLinus Torvalds 			goto restart;
34341da177e4SLinus Torvalds 		}
34351da177e4SLinus Torvalds 	}
343666f5d6ceSWei Wang 	rcu_read_unlock();
3437830218c1SDavid Ahern 
3438830218c1SDavid Ahern 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3439830218c1SDavid Ahern }
3440830218c1SDavid Ahern 
3441830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net)
3442830218c1SDavid Ahern {
3443830218c1SDavid Ahern 	struct fib6_table *table;
3444830218c1SDavid Ahern 	struct hlist_head *head;
3445830218c1SDavid Ahern 	unsigned int h;
3446830218c1SDavid Ahern 
3447830218c1SDavid Ahern 	rcu_read_lock();
3448830218c1SDavid Ahern 
3449830218c1SDavid Ahern 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3450830218c1SDavid Ahern 		head = &net->ipv6.fib_table_hash[h];
3451830218c1SDavid Ahern 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3452830218c1SDavid Ahern 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3453afb1d4b5SDavid Ahern 				__rt6_purge_dflt_routers(net, table);
3454830218c1SDavid Ahern 		}
3455830218c1SDavid Ahern 	}
3456830218c1SDavid Ahern 
3457830218c1SDavid Ahern 	rcu_read_unlock();
34581da177e4SLinus Torvalds }
34591da177e4SLinus Torvalds 
34605578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
34615578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
346286872cb5SThomas Graf 				 struct fib6_config *cfg)
346386872cb5SThomas Graf {
346486872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
346586872cb5SThomas Graf 
3466ca254490SDavid Ahern 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3467ca254490SDavid Ahern 			 : RT6_TABLE_MAIN;
346886872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
346986872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
347086872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
347186872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
347286872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
347386872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
3474e8478e80SDavid Ahern 	cfg->fc_type = rtmsg->rtmsg_type;
347586872cb5SThomas Graf 
34765578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
3477f1243c2dSBenjamin Thery 
34784e3fd7a0SAlexey Dobriyan 	cfg->fc_dst = rtmsg->rtmsg_dst;
34794e3fd7a0SAlexey Dobriyan 	cfg->fc_src = rtmsg->rtmsg_src;
34804e3fd7a0SAlexey Dobriyan 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
348186872cb5SThomas Graf }
348286872cb5SThomas Graf 
34835578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
34841da177e4SLinus Torvalds {
348586872cb5SThomas Graf 	struct fib6_config cfg;
34861da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
34871da177e4SLinus Torvalds 	int err;
34881da177e4SLinus Torvalds 
34891da177e4SLinus Torvalds 	switch (cmd) {
34901da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
34911da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
3492af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
34931da177e4SLinus Torvalds 			return -EPERM;
34941da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
34951da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
34961da177e4SLinus Torvalds 		if (err)
34971da177e4SLinus Torvalds 			return -EFAULT;
34981da177e4SLinus Torvalds 
34995578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
350086872cb5SThomas Graf 
35011da177e4SLinus Torvalds 		rtnl_lock();
35021da177e4SLinus Torvalds 		switch (cmd) {
35031da177e4SLinus Torvalds 		case SIOCADDRT:
3504acb54e3cSDavid Ahern 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
35051da177e4SLinus Torvalds 			break;
35061da177e4SLinus Torvalds 		case SIOCDELRT:
3507333c4301SDavid Ahern 			err = ip6_route_del(&cfg, NULL);
35081da177e4SLinus Torvalds 			break;
35091da177e4SLinus Torvalds 		default:
35101da177e4SLinus Torvalds 			err = -EINVAL;
35111da177e4SLinus Torvalds 		}
35121da177e4SLinus Torvalds 		rtnl_unlock();
35131da177e4SLinus Torvalds 
35141da177e4SLinus Torvalds 		return err;
35153ff50b79SStephen Hemminger 	}
35161da177e4SLinus Torvalds 
35171da177e4SLinus Torvalds 	return -EINVAL;
35181da177e4SLinus Torvalds }
35191da177e4SLinus Torvalds 
35201da177e4SLinus Torvalds /*
35211da177e4SLinus Torvalds  *	Drop the packet on the floor
35221da177e4SLinus Torvalds  */
35231da177e4SLinus Torvalds 
3524d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
35251da177e4SLinus Torvalds {
3526612f09e8SYOSHIFUJI Hideaki 	int type;
3527adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3528612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
3529612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
35300660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
353145bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
3532bdb7cc64SStephen Suryaputra 			IP6_INC_STATS(dev_net(dst->dev),
3533bdb7cc64SStephen Suryaputra 				      __in6_dev_get_safely(skb->dev),
35343bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
3535612f09e8SYOSHIFUJI Hideaki 			break;
3536612f09e8SYOSHIFUJI Hideaki 		}
3537612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
3538612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
35393bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
35403bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
3541612f09e8SYOSHIFUJI Hideaki 		break;
3542612f09e8SYOSHIFUJI Hideaki 	}
35433ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
35441da177e4SLinus Torvalds 	kfree_skb(skb);
35451da177e4SLinus Torvalds 	return 0;
35461da177e4SLinus Torvalds }
35471da177e4SLinus Torvalds 
35489ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
35499ce8ade0SThomas Graf {
3550612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
35519ce8ade0SThomas Graf }
35529ce8ade0SThomas Graf 
3553ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
35541da177e4SLinus Torvalds {
3555adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3556612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
35571da177e4SLinus Torvalds }
35581da177e4SLinus Torvalds 
35599ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
35609ce8ade0SThomas Graf {
3561612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
35629ce8ade0SThomas Graf }
35639ce8ade0SThomas Graf 
3564ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
35659ce8ade0SThomas Graf {
3566adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3567612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
35689ce8ade0SThomas Graf }
35699ce8ade0SThomas Graf 
35701da177e4SLinus Torvalds /*
35711da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
35721da177e4SLinus Torvalds  */
35731da177e4SLinus Torvalds 
3574360a9887SDavid Ahern struct fib6_info *addrconf_f6i_alloc(struct net *net,
3575afb1d4b5SDavid Ahern 				     struct inet6_dev *idev,
35761da177e4SLinus Torvalds 				     const struct in6_addr *addr,
3577acb54e3cSDavid Ahern 				     bool anycast, gfp_t gfp_flags)
35781da177e4SLinus Torvalds {
3579ca254490SDavid Ahern 	u32 tb_id;
35804832c30dSDavid Ahern 	struct net_device *dev = idev->dev;
3581360a9887SDavid Ahern 	struct fib6_info *f6i;
35825f02ce24SDavid Ahern 
3583360a9887SDavid Ahern 	f6i = fib6_info_alloc(gfp_flags);
3584360a9887SDavid Ahern 	if (!f6i)
35851da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
35861da177e4SLinus Torvalds 
3587360a9887SDavid Ahern 	f6i->dst_nocount = true;
35883b6761d1SDavid Ahern 
35891da177e4SLinus Torvalds 	in6_dev_hold(idev);
3590360a9887SDavid Ahern 	f6i->fib6_idev = idev;
35911da177e4SLinus Torvalds 
3592360a9887SDavid Ahern 	f6i->dst_host = true;
3593360a9887SDavid Ahern 	f6i->fib6_protocol = RTPROT_KERNEL;
3594360a9887SDavid Ahern 	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
3595e8478e80SDavid Ahern 	if (anycast) {
3596360a9887SDavid Ahern 		f6i->fib6_type = RTN_ANYCAST;
3597360a9887SDavid Ahern 		f6i->fib6_flags |= RTF_ANYCAST;
3598e8478e80SDavid Ahern 	} else {
3599360a9887SDavid Ahern 		f6i->fib6_type = RTN_LOCAL;
3600360a9887SDavid Ahern 		f6i->fib6_flags |= RTF_LOCAL;
3601e8478e80SDavid Ahern 	}
36021da177e4SLinus Torvalds 
3603360a9887SDavid Ahern 	f6i->fib6_nh.nh_gw = *addr;
360493531c67SDavid Ahern 	dev_hold(dev);
3605360a9887SDavid Ahern 	f6i->fib6_nh.nh_dev = dev;
3606360a9887SDavid Ahern 	f6i->fib6_dst.addr = *addr;
3607360a9887SDavid Ahern 	f6i->fib6_dst.plen = 128;
3608ca254490SDavid Ahern 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3609360a9887SDavid Ahern 	f6i->fib6_table = fib6_get_table(net, tb_id);
36101da177e4SLinus Torvalds 
3611360a9887SDavid Ahern 	return f6i;
36121da177e4SLinus Torvalds }
36131da177e4SLinus Torvalds 
3614c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
3615c3968a85SDaniel Walter struct arg_dev_net_ip {
3616c3968a85SDaniel Walter 	struct net_device *dev;
3617c3968a85SDaniel Walter 	struct net *net;
3618c3968a85SDaniel Walter 	struct in6_addr *addr;
3619c3968a85SDaniel Walter };
3620c3968a85SDaniel Walter 
36218d1c802bSDavid Ahern static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3622c3968a85SDaniel Walter {
3623c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3624c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3625c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3626c3968a85SDaniel Walter 
36275e670d84SDavid Ahern 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3628421842edSDavid Ahern 	    rt != net->ipv6.fib6_null_entry &&
362993c2fb25SDavid Ahern 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
363060006a48SWei Wang 		spin_lock_bh(&rt6_exception_lock);
3631c3968a85SDaniel Walter 		/* remove prefsrc entry */
363293c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
363360006a48SWei Wang 		/* need to update cache as well */
363460006a48SWei Wang 		rt6_exceptions_remove_prefsrc(rt);
363560006a48SWei Wang 		spin_unlock_bh(&rt6_exception_lock);
3636c3968a85SDaniel Walter 	}
3637c3968a85SDaniel Walter 	return 0;
3638c3968a85SDaniel Walter }
3639c3968a85SDaniel Walter 
3640c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3641c3968a85SDaniel Walter {
3642c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
3643c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
3644c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
3645c3968a85SDaniel Walter 		.net = net,
3646c3968a85SDaniel Walter 		.addr = &ifp->addr,
3647c3968a85SDaniel Walter 	};
36480c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3649c3968a85SDaniel Walter }
3650c3968a85SDaniel Walter 
3651be7a010dSDuan Jiong #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3652be7a010dSDuan Jiong 
3653be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
36548d1c802bSDavid Ahern static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3655be7a010dSDuan Jiong {
3656be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
3657be7a010dSDuan Jiong 
365893c2fb25SDavid Ahern 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
36595e670d84SDavid Ahern 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3660be7a010dSDuan Jiong 		return -1;
3661be7a010dSDuan Jiong 	}
3662b16cb459SWei Wang 
3663b16cb459SWei Wang 	/* Further clean up cached routes in exception table.
3664b16cb459SWei Wang 	 * This is needed because cached route may have a different
3665b16cb459SWei Wang 	 * gateway than its 'parent' in the case of an ip redirect.
3666b16cb459SWei Wang 	 */
3667b16cb459SWei Wang 	rt6_exceptions_clean_tohost(rt, gateway);
3668b16cb459SWei Wang 
3669be7a010dSDuan Jiong 	return 0;
3670be7a010dSDuan Jiong }
3671be7a010dSDuan Jiong 
3672be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3673be7a010dSDuan Jiong {
3674be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3675be7a010dSDuan Jiong }
3676be7a010dSDuan Jiong 
36772127d95aSIdo Schimmel struct arg_netdev_event {
36782127d95aSIdo Schimmel 	const struct net_device *dev;
36794c981e28SIdo Schimmel 	union {
36802127d95aSIdo Schimmel 		unsigned int nh_flags;
36814c981e28SIdo Schimmel 		unsigned long event;
36824c981e28SIdo Schimmel 	};
36832127d95aSIdo Schimmel };
36842127d95aSIdo Schimmel 
36858d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3686d7dedee1SIdo Schimmel {
36878d1c802bSDavid Ahern 	struct fib6_info *iter;
3688d7dedee1SIdo Schimmel 	struct fib6_node *fn;
3689d7dedee1SIdo Schimmel 
369093c2fb25SDavid Ahern 	fn = rcu_dereference_protected(rt->fib6_node,
369193c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3692d7dedee1SIdo Schimmel 	iter = rcu_dereference_protected(fn->leaf,
369393c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3694d7dedee1SIdo Schimmel 	while (iter) {
369593c2fb25SDavid Ahern 		if (iter->fib6_metric == rt->fib6_metric &&
3696d7dedee1SIdo Schimmel 		    rt6_qualify_for_ecmp(iter))
3697d7dedee1SIdo Schimmel 			return iter;
3698d7dedee1SIdo Schimmel 		iter = rcu_dereference_protected(iter->rt6_next,
369993c2fb25SDavid Ahern 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3700d7dedee1SIdo Schimmel 	}
3701d7dedee1SIdo Schimmel 
3702d7dedee1SIdo Schimmel 	return NULL;
3703d7dedee1SIdo Schimmel }
3704d7dedee1SIdo Schimmel 
37058d1c802bSDavid Ahern static bool rt6_is_dead(const struct fib6_info *rt)
3706d7dedee1SIdo Schimmel {
37075e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
37085e670d84SDavid Ahern 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
370993c2fb25SDavid Ahern 	     rt->fib6_idev->cnf.ignore_routes_with_linkdown))
3710d7dedee1SIdo Schimmel 		return true;
3711d7dedee1SIdo Schimmel 
3712d7dedee1SIdo Schimmel 	return false;
3713d7dedee1SIdo Schimmel }
3714d7dedee1SIdo Schimmel 
37158d1c802bSDavid Ahern static int rt6_multipath_total_weight(const struct fib6_info *rt)
3716d7dedee1SIdo Schimmel {
37178d1c802bSDavid Ahern 	struct fib6_info *iter;
3718d7dedee1SIdo Schimmel 	int total = 0;
3719d7dedee1SIdo Schimmel 
3720d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt))
37215e670d84SDavid Ahern 		total += rt->fib6_nh.nh_weight;
3722d7dedee1SIdo Schimmel 
372393c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3724d7dedee1SIdo Schimmel 		if (!rt6_is_dead(iter))
37255e670d84SDavid Ahern 			total += iter->fib6_nh.nh_weight;
3726d7dedee1SIdo Schimmel 	}
3727d7dedee1SIdo Schimmel 
3728d7dedee1SIdo Schimmel 	return total;
3729d7dedee1SIdo Schimmel }
3730d7dedee1SIdo Schimmel 
37318d1c802bSDavid Ahern static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3732d7dedee1SIdo Schimmel {
3733d7dedee1SIdo Schimmel 	int upper_bound = -1;
3734d7dedee1SIdo Schimmel 
3735d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt)) {
37365e670d84SDavid Ahern 		*weight += rt->fib6_nh.nh_weight;
3737d7dedee1SIdo Schimmel 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3738d7dedee1SIdo Schimmel 						    total) - 1;
3739d7dedee1SIdo Schimmel 	}
37405e670d84SDavid Ahern 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3741d7dedee1SIdo Schimmel }
3742d7dedee1SIdo Schimmel 
37438d1c802bSDavid Ahern static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3744d7dedee1SIdo Schimmel {
37458d1c802bSDavid Ahern 	struct fib6_info *iter;
3746d7dedee1SIdo Schimmel 	int weight = 0;
3747d7dedee1SIdo Schimmel 
3748d7dedee1SIdo Schimmel 	rt6_upper_bound_set(rt, &weight, total);
3749d7dedee1SIdo Schimmel 
375093c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3751d7dedee1SIdo Schimmel 		rt6_upper_bound_set(iter, &weight, total);
3752d7dedee1SIdo Schimmel }
3753d7dedee1SIdo Schimmel 
37548d1c802bSDavid Ahern void rt6_multipath_rebalance(struct fib6_info *rt)
3755d7dedee1SIdo Schimmel {
37568d1c802bSDavid Ahern 	struct fib6_info *first;
3757d7dedee1SIdo Schimmel 	int total;
3758d7dedee1SIdo Schimmel 
3759d7dedee1SIdo Schimmel 	/* In case the entire multipath route was marked for flushing,
3760d7dedee1SIdo Schimmel 	 * then there is no need to rebalance upon the removal of every
3761d7dedee1SIdo Schimmel 	 * sibling route.
3762d7dedee1SIdo Schimmel 	 */
376393c2fb25SDavid Ahern 	if (!rt->fib6_nsiblings || rt->should_flush)
3764d7dedee1SIdo Schimmel 		return;
3765d7dedee1SIdo Schimmel 
3766d7dedee1SIdo Schimmel 	/* During lookup routes are evaluated in order, so we need to
3767d7dedee1SIdo Schimmel 	 * make sure upper bounds are assigned from the first sibling
3768d7dedee1SIdo Schimmel 	 * onwards.
3769d7dedee1SIdo Schimmel 	 */
3770d7dedee1SIdo Schimmel 	first = rt6_multipath_first_sibling(rt);
3771d7dedee1SIdo Schimmel 	if (WARN_ON_ONCE(!first))
3772d7dedee1SIdo Schimmel 		return;
3773d7dedee1SIdo Schimmel 
3774d7dedee1SIdo Schimmel 	total = rt6_multipath_total_weight(first);
3775d7dedee1SIdo Schimmel 	rt6_multipath_upper_bound_set(first, total);
3776d7dedee1SIdo Schimmel }
3777d7dedee1SIdo Schimmel 
37788d1c802bSDavid Ahern static int fib6_ifup(struct fib6_info *rt, void *p_arg)
37792127d95aSIdo Schimmel {
37802127d95aSIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
37817aef6859SDavid Ahern 	struct net *net = dev_net(arg->dev);
37822127d95aSIdo Schimmel 
3783421842edSDavid Ahern 	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
37845e670d84SDavid Ahern 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
37857aef6859SDavid Ahern 		fib6_update_sernum_upto_root(net, rt);
3786d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
37871de178edSIdo Schimmel 	}
37882127d95aSIdo Schimmel 
37892127d95aSIdo Schimmel 	return 0;
37902127d95aSIdo Schimmel }
37912127d95aSIdo Schimmel 
37922127d95aSIdo Schimmel void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
37932127d95aSIdo Schimmel {
37942127d95aSIdo Schimmel 	struct arg_netdev_event arg = {
37952127d95aSIdo Schimmel 		.dev = dev,
37966802f3adSIdo Schimmel 		{
37972127d95aSIdo Schimmel 			.nh_flags = nh_flags,
37986802f3adSIdo Schimmel 		},
37992127d95aSIdo Schimmel 	};
38002127d95aSIdo Schimmel 
38012127d95aSIdo Schimmel 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
38022127d95aSIdo Schimmel 		arg.nh_flags |= RTNH_F_LINKDOWN;
38032127d95aSIdo Schimmel 
38042127d95aSIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
38052127d95aSIdo Schimmel }
38062127d95aSIdo Schimmel 
38078d1c802bSDavid Ahern static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
38081de178edSIdo Schimmel 				   const struct net_device *dev)
38091de178edSIdo Schimmel {
38108d1c802bSDavid Ahern 	struct fib6_info *iter;
38111de178edSIdo Schimmel 
38125e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == dev)
38131de178edSIdo Schimmel 		return true;
381493c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
38155e670d84SDavid Ahern 		if (iter->fib6_nh.nh_dev == dev)
38161de178edSIdo Schimmel 			return true;
38171de178edSIdo Schimmel 
38181de178edSIdo Schimmel 	return false;
38191de178edSIdo Schimmel }
38201de178edSIdo Schimmel 
38218d1c802bSDavid Ahern static void rt6_multipath_flush(struct fib6_info *rt)
38221de178edSIdo Schimmel {
38238d1c802bSDavid Ahern 	struct fib6_info *iter;
38241de178edSIdo Schimmel 
38251de178edSIdo Schimmel 	rt->should_flush = 1;
382693c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
38271de178edSIdo Schimmel 		iter->should_flush = 1;
38281de178edSIdo Schimmel }
38291de178edSIdo Schimmel 
38308d1c802bSDavid Ahern static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
38311de178edSIdo Schimmel 					     const struct net_device *down_dev)
38321de178edSIdo Schimmel {
38338d1c802bSDavid Ahern 	struct fib6_info *iter;
38341de178edSIdo Schimmel 	unsigned int dead = 0;
38351de178edSIdo Schimmel 
38365e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == down_dev ||
38375e670d84SDavid Ahern 	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
38381de178edSIdo Schimmel 		dead++;
383993c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
38405e670d84SDavid Ahern 		if (iter->fib6_nh.nh_dev == down_dev ||
38415e670d84SDavid Ahern 		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
38421de178edSIdo Schimmel 			dead++;
38431de178edSIdo Schimmel 
38441de178edSIdo Schimmel 	return dead;
38451de178edSIdo Schimmel }
38461de178edSIdo Schimmel 
38478d1c802bSDavid Ahern static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
38481de178edSIdo Schimmel 				       const struct net_device *dev,
38491de178edSIdo Schimmel 				       unsigned int nh_flags)
38501de178edSIdo Schimmel {
38518d1c802bSDavid Ahern 	struct fib6_info *iter;
38521de178edSIdo Schimmel 
38535e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == dev)
38545e670d84SDavid Ahern 		rt->fib6_nh.nh_flags |= nh_flags;
385593c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
38565e670d84SDavid Ahern 		if (iter->fib6_nh.nh_dev == dev)
38575e670d84SDavid Ahern 			iter->fib6_nh.nh_flags |= nh_flags;
38581de178edSIdo Schimmel }
38591de178edSIdo Schimmel 
3860a1a22c12SDavid Ahern /* called with write lock held for table with rt */
38618d1c802bSDavid Ahern static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
38621da177e4SLinus Torvalds {
38634c981e28SIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
38644c981e28SIdo Schimmel 	const struct net_device *dev = arg->dev;
38657aef6859SDavid Ahern 	struct net *net = dev_net(dev);
38668ed67789SDaniel Lezcano 
3867421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
386827c6fa73SIdo Schimmel 		return 0;
386927c6fa73SIdo Schimmel 
387027c6fa73SIdo Schimmel 	switch (arg->event) {
387127c6fa73SIdo Schimmel 	case NETDEV_UNREGISTER:
38725e670d84SDavid Ahern 		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
387327c6fa73SIdo Schimmel 	case NETDEV_DOWN:
38741de178edSIdo Schimmel 		if (rt->should_flush)
387527c6fa73SIdo Schimmel 			return -1;
387693c2fb25SDavid Ahern 		if (!rt->fib6_nsiblings)
38775e670d84SDavid Ahern 			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
38781de178edSIdo Schimmel 		if (rt6_multipath_uses_dev(rt, dev)) {
38791de178edSIdo Schimmel 			unsigned int count;
38801de178edSIdo Schimmel 
38811de178edSIdo Schimmel 			count = rt6_multipath_dead_count(rt, dev);
388293c2fb25SDavid Ahern 			if (rt->fib6_nsiblings + 1 == count) {
38831de178edSIdo Schimmel 				rt6_multipath_flush(rt);
38841de178edSIdo Schimmel 				return -1;
38851de178edSIdo Schimmel 			}
38861de178edSIdo Schimmel 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
38871de178edSIdo Schimmel 						   RTNH_F_LINKDOWN);
38887aef6859SDavid Ahern 			fib6_update_sernum(net, rt);
3889d7dedee1SIdo Schimmel 			rt6_multipath_rebalance(rt);
38901de178edSIdo Schimmel 		}
38911de178edSIdo Schimmel 		return -2;
389227c6fa73SIdo Schimmel 	case NETDEV_CHANGE:
38935e670d84SDavid Ahern 		if (rt->fib6_nh.nh_dev != dev ||
389493c2fb25SDavid Ahern 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
389527c6fa73SIdo Schimmel 			break;
38965e670d84SDavid Ahern 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3897d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
389827c6fa73SIdo Schimmel 		break;
38992b241361SIdo Schimmel 	}
3900c159d30cSDavid S. Miller 
39011da177e4SLinus Torvalds 	return 0;
39021da177e4SLinus Torvalds }
39031da177e4SLinus Torvalds 
390427c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
39051da177e4SLinus Torvalds {
39064c981e28SIdo Schimmel 	struct arg_netdev_event arg = {
39078ed67789SDaniel Lezcano 		.dev = dev,
39086802f3adSIdo Schimmel 		{
39094c981e28SIdo Schimmel 			.event = event,
39106802f3adSIdo Schimmel 		},
39118ed67789SDaniel Lezcano 	};
39128ed67789SDaniel Lezcano 
39134c981e28SIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
39144c981e28SIdo Schimmel }
39154c981e28SIdo Schimmel 
39164c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event)
39174c981e28SIdo Schimmel {
39184c981e28SIdo Schimmel 	rt6_sync_down_dev(dev, event);
39194c981e28SIdo Schimmel 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
39204c981e28SIdo Schimmel 	neigh_ifdown(&nd_tbl, dev);
39211da177e4SLinus Torvalds }
39221da177e4SLinus Torvalds 
392395c96174SEric Dumazet struct rt6_mtu_change_arg {
39241da177e4SLinus Torvalds 	struct net_device *dev;
392595c96174SEric Dumazet 	unsigned int mtu;
39261da177e4SLinus Torvalds };
39271da177e4SLinus Torvalds 
39288d1c802bSDavid Ahern static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
39291da177e4SLinus Torvalds {
39301da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
39311da177e4SLinus Torvalds 	struct inet6_dev *idev;
39321da177e4SLinus Torvalds 
39331da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
39341da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
39351da177e4SLinus Torvalds 	   We still use this lock to block changes
39361da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
39371da177e4SLinus Torvalds 	*/
39381da177e4SLinus Torvalds 
39391da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
394038308473SDavid S. Miller 	if (!idev)
39411da177e4SLinus Torvalds 		return 0;
39421da177e4SLinus Torvalds 
39431da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
39441da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
39451da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
39461da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
39471da177e4SLinus Torvalds 	 */
39485e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == arg->dev &&
3949d4ead6b3SDavid Ahern 	    !fib6_metric_locked(rt, RTAX_MTU)) {
3950d4ead6b3SDavid Ahern 		u32 mtu = rt->fib6_pmtu;
3951d4ead6b3SDavid Ahern 
3952d4ead6b3SDavid Ahern 		if (mtu >= arg->mtu ||
3953d4ead6b3SDavid Ahern 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
3954d4ead6b3SDavid Ahern 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
3955d4ead6b3SDavid Ahern 
3956f5bbe7eeSWei Wang 		spin_lock_bh(&rt6_exception_lock);
3957e9fa1495SStefano Brivio 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
3958f5bbe7eeSWei Wang 		spin_unlock_bh(&rt6_exception_lock);
39594b32b5adSMartin KaFai Lau 	}
39601da177e4SLinus Torvalds 	return 0;
39611da177e4SLinus Torvalds }
39621da177e4SLinus Torvalds 
396395c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
39641da177e4SLinus Torvalds {
3965c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
3966c71099acSThomas Graf 		.dev = dev,
3967c71099acSThomas Graf 		.mtu = mtu,
3968c71099acSThomas Graf 	};
39691da177e4SLinus Torvalds 
39700c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
39711da177e4SLinus Torvalds }
39721da177e4SLinus Torvalds 
3973ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
39745176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
397586872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
3976ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
397786872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
397886872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
397951ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
3980c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
398119e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
398219e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
398332bc201eSXin Long 	[RTA_EXPIRES]		= { .type = NLA_U32 },
3984622ec2c9SLorenzo Colitti 	[RTA_UID]		= { .type = NLA_U32 },
39853b45a410SLiping Zhang 	[RTA_MARK]		= { .type = NLA_U32 },
398686872cb5SThomas Graf };
398786872cb5SThomas Graf 
398886872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
3989333c4301SDavid Ahern 			      struct fib6_config *cfg,
3990333c4301SDavid Ahern 			      struct netlink_ext_ack *extack)
39911da177e4SLinus Torvalds {
399286872cb5SThomas Graf 	struct rtmsg *rtm;
399386872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
3994c78ba6d6SLubomir Rintel 	unsigned int pref;
399586872cb5SThomas Graf 	int err;
39961da177e4SLinus Torvalds 
3997fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
3998fceb6435SJohannes Berg 			  NULL);
399986872cb5SThomas Graf 	if (err < 0)
400086872cb5SThomas Graf 		goto errout;
40011da177e4SLinus Torvalds 
400286872cb5SThomas Graf 	err = -EINVAL;
400386872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
400486872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
400586872cb5SThomas Graf 
400686872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
400786872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
400886872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
400986872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
401086872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
4011ef2c7d7bSNicolas Dichtel 	cfg->fc_type = rtm->rtm_type;
401286872cb5SThomas Graf 
4013ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4014ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
4015b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
4016b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
401786872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
401886872cb5SThomas Graf 
4019ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
4020ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
4021ab79ad14SMaciej Żenczykowski 
40221f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
40231f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
40241f56a01fSMartin KaFai Lau 
4025fc1e64e1SDavid Ahern 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4026fc1e64e1SDavid Ahern 
402715e47304SEric W. Biederman 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
402886872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
40293b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
403086872cb5SThomas Graf 
403186872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
403267b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
403386872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
40341da177e4SLinus Torvalds 	}
403586872cb5SThomas Graf 
403686872cb5SThomas Graf 	if (tb[RTA_DST]) {
403786872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
403886872cb5SThomas Graf 
403986872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
404086872cb5SThomas Graf 			goto errout;
404186872cb5SThomas Graf 
404286872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
40431da177e4SLinus Torvalds 	}
404486872cb5SThomas Graf 
404586872cb5SThomas Graf 	if (tb[RTA_SRC]) {
404686872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
404786872cb5SThomas Graf 
404886872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
404986872cb5SThomas Graf 			goto errout;
405086872cb5SThomas Graf 
405186872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
40521da177e4SLinus Torvalds 	}
405386872cb5SThomas Graf 
4054c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
405567b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4056c3968a85SDaniel Walter 
405786872cb5SThomas Graf 	if (tb[RTA_OIF])
405886872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
405986872cb5SThomas Graf 
406086872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
406186872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
406286872cb5SThomas Graf 
406386872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
406486872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
406586872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
40661da177e4SLinus Torvalds 	}
406786872cb5SThomas Graf 
406886872cb5SThomas Graf 	if (tb[RTA_TABLE])
406986872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
407086872cb5SThomas Graf 
407151ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
407251ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
407351ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
40749ed59592SDavid Ahern 
40759ed59592SDavid Ahern 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4076c255bd68SDavid Ahern 						     cfg->fc_mp_len, extack);
40779ed59592SDavid Ahern 		if (err < 0)
40789ed59592SDavid Ahern 			goto errout;
407951ebd318SNicolas Dichtel 	}
408051ebd318SNicolas Dichtel 
4081c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
4082c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
4083c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4084c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4085c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4086c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
4087c78ba6d6SLubomir Rintel 	}
4088c78ba6d6SLubomir Rintel 
408919e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
409019e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
409119e42e45SRoopa Prabhu 
40929ed59592SDavid Ahern 	if (tb[RTA_ENCAP_TYPE]) {
409319e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
409419e42e45SRoopa Prabhu 
4095c255bd68SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
40969ed59592SDavid Ahern 		if (err < 0)
40979ed59592SDavid Ahern 			goto errout;
40989ed59592SDavid Ahern 	}
40999ed59592SDavid Ahern 
410032bc201eSXin Long 	if (tb[RTA_EXPIRES]) {
410132bc201eSXin Long 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
410232bc201eSXin Long 
410332bc201eSXin Long 		if (addrconf_finite_timeout(timeout)) {
410432bc201eSXin Long 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
410532bc201eSXin Long 			cfg->fc_flags |= RTF_EXPIRES;
410632bc201eSXin Long 		}
410732bc201eSXin Long 	}
410832bc201eSXin Long 
410986872cb5SThomas Graf 	err = 0;
411086872cb5SThomas Graf errout:
411186872cb5SThomas Graf 	return err;
41121da177e4SLinus Torvalds }
41131da177e4SLinus Torvalds 
41146b9ea5a6SRoopa Prabhu struct rt6_nh {
41158d1c802bSDavid Ahern 	struct fib6_info *fib6_info;
41166b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
41176b9ea5a6SRoopa Prabhu 	struct list_head next;
41186b9ea5a6SRoopa Prabhu };
41196b9ea5a6SRoopa Prabhu 
41206b9ea5a6SRoopa Prabhu static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
41216b9ea5a6SRoopa Prabhu {
41226b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
41236b9ea5a6SRoopa Prabhu 
41246b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
41257d4d5065SDavid Ahern 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
41266b9ea5a6SRoopa Prabhu 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
41276b9ea5a6SRoopa Prabhu 		        nh->r_cfg.fc_ifindex);
41286b9ea5a6SRoopa Prabhu 	}
41296b9ea5a6SRoopa Prabhu }
41306b9ea5a6SRoopa Prabhu 
4131d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net,
4132d4ead6b3SDavid Ahern 				 struct list_head *rt6_nh_list,
41338d1c802bSDavid Ahern 				 struct fib6_info *rt,
41348d1c802bSDavid Ahern 				 struct fib6_config *r_cfg)
41356b9ea5a6SRoopa Prabhu {
41366b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
41376b9ea5a6SRoopa Prabhu 	int err = -EEXIST;
41386b9ea5a6SRoopa Prabhu 
41396b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
41408d1c802bSDavid Ahern 		/* check if fib6_info already exists */
41418d1c802bSDavid Ahern 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
41426b9ea5a6SRoopa Prabhu 			return err;
41436b9ea5a6SRoopa Prabhu 	}
41446b9ea5a6SRoopa Prabhu 
41456b9ea5a6SRoopa Prabhu 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
41466b9ea5a6SRoopa Prabhu 	if (!nh)
41476b9ea5a6SRoopa Prabhu 		return -ENOMEM;
41488d1c802bSDavid Ahern 	nh->fib6_info = rt;
4149d4ead6b3SDavid Ahern 	err = ip6_convert_metrics(net, rt, r_cfg);
41506b9ea5a6SRoopa Prabhu 	if (err) {
41516b9ea5a6SRoopa Prabhu 		kfree(nh);
41526b9ea5a6SRoopa Prabhu 		return err;
41536b9ea5a6SRoopa Prabhu 	}
41546b9ea5a6SRoopa Prabhu 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
41556b9ea5a6SRoopa Prabhu 	list_add_tail(&nh->next, rt6_nh_list);
41566b9ea5a6SRoopa Prabhu 
41576b9ea5a6SRoopa Prabhu 	return 0;
41586b9ea5a6SRoopa Prabhu }
41596b9ea5a6SRoopa Prabhu 
41608d1c802bSDavid Ahern static void ip6_route_mpath_notify(struct fib6_info *rt,
41618d1c802bSDavid Ahern 				   struct fib6_info *rt_last,
41623b1137feSDavid Ahern 				   struct nl_info *info,
41633b1137feSDavid Ahern 				   __u16 nlflags)
41643b1137feSDavid Ahern {
41653b1137feSDavid Ahern 	/* if this is an APPEND route, then rt points to the first route
41663b1137feSDavid Ahern 	 * inserted and rt_last points to last route inserted. Userspace
41673b1137feSDavid Ahern 	 * wants a consistent dump of the route which starts at the first
41683b1137feSDavid Ahern 	 * nexthop. Since sibling routes are always added at the end of
41693b1137feSDavid Ahern 	 * the list, find the first sibling of the last route appended
41703b1137feSDavid Ahern 	 */
417193c2fb25SDavid Ahern 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
417293c2fb25SDavid Ahern 		rt = list_first_entry(&rt_last->fib6_siblings,
41738d1c802bSDavid Ahern 				      struct fib6_info,
417493c2fb25SDavid Ahern 				      fib6_siblings);
41753b1137feSDavid Ahern 	}
41763b1137feSDavid Ahern 
41773b1137feSDavid Ahern 	if (rt)
41783b1137feSDavid Ahern 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
41793b1137feSDavid Ahern }
41803b1137feSDavid Ahern 
4181333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg,
4182333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
418351ebd318SNicolas Dichtel {
41848d1c802bSDavid Ahern 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
41853b1137feSDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
418651ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
418751ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
41888d1c802bSDavid Ahern 	struct fib6_info *rt;
41896b9ea5a6SRoopa Prabhu 	struct rt6_nh *err_nh;
41906b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh, *nh_safe;
41913b1137feSDavid Ahern 	__u16 nlflags;
419251ebd318SNicolas Dichtel 	int remaining;
419351ebd318SNicolas Dichtel 	int attrlen;
41946b9ea5a6SRoopa Prabhu 	int err = 1;
41956b9ea5a6SRoopa Prabhu 	int nhn = 0;
41966b9ea5a6SRoopa Prabhu 	int replace = (cfg->fc_nlinfo.nlh &&
41976b9ea5a6SRoopa Prabhu 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
41986b9ea5a6SRoopa Prabhu 	LIST_HEAD(rt6_nh_list);
419951ebd318SNicolas Dichtel 
42003b1137feSDavid Ahern 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
42013b1137feSDavid Ahern 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
42023b1137feSDavid Ahern 		nlflags |= NLM_F_APPEND;
42033b1137feSDavid Ahern 
420435f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
420551ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
420651ebd318SNicolas Dichtel 
42076b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
42088d1c802bSDavid Ahern 	 * fib6_info structs per nexthop
42096b9ea5a6SRoopa Prabhu 	 */
421051ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
421151ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
421251ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
421351ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
421451ebd318SNicolas Dichtel 
421551ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
421651ebd318SNicolas Dichtel 		if (attrlen > 0) {
421751ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
421851ebd318SNicolas Dichtel 
421951ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
422051ebd318SNicolas Dichtel 			if (nla) {
422167b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
422251ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
422351ebd318SNicolas Dichtel 			}
422419e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
422519e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
422619e42e45SRoopa Prabhu 			if (nla)
422719e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
422851ebd318SNicolas Dichtel 		}
42296b9ea5a6SRoopa Prabhu 
423068e2ffdeSDavid Ahern 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4231acb54e3cSDavid Ahern 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
42328c5b83f0SRoopa Prabhu 		if (IS_ERR(rt)) {
42338c5b83f0SRoopa Prabhu 			err = PTR_ERR(rt);
42348c5b83f0SRoopa Prabhu 			rt = NULL;
42356b9ea5a6SRoopa Prabhu 			goto cleanup;
42368c5b83f0SRoopa Prabhu 		}
42376b9ea5a6SRoopa Prabhu 
42385e670d84SDavid Ahern 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4239398958aeSIdo Schimmel 
4240d4ead6b3SDavid Ahern 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4241d4ead6b3SDavid Ahern 					    rt, &r_cfg);
424251ebd318SNicolas Dichtel 		if (err) {
424393531c67SDavid Ahern 			fib6_info_release(rt);
42446b9ea5a6SRoopa Prabhu 			goto cleanup;
424551ebd318SNicolas Dichtel 		}
42466b9ea5a6SRoopa Prabhu 
42476b9ea5a6SRoopa Prabhu 		rtnh = rtnh_next(rtnh, &remaining);
424851ebd318SNicolas Dichtel 	}
42496b9ea5a6SRoopa Prabhu 
42503b1137feSDavid Ahern 	/* for add and replace send one notification with all nexthops.
42513b1137feSDavid Ahern 	 * Skip the notification in fib6_add_rt2node and send one with
42523b1137feSDavid Ahern 	 * the full route when done
42533b1137feSDavid Ahern 	 */
42543b1137feSDavid Ahern 	info->skip_notify = 1;
42553b1137feSDavid Ahern 
42566b9ea5a6SRoopa Prabhu 	err_nh = NULL;
42576b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
42588d1c802bSDavid Ahern 		rt_last = nh->fib6_info;
42598d1c802bSDavid Ahern 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
42608d1c802bSDavid Ahern 		fib6_info_release(nh->fib6_info);
426193531c67SDavid Ahern 
42623b1137feSDavid Ahern 		/* save reference to first route for notification */
42633b1137feSDavid Ahern 		if (!rt_notif && !err)
42648d1c802bSDavid Ahern 			rt_notif = nh->fib6_info;
42653b1137feSDavid Ahern 
42668d1c802bSDavid Ahern 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
42678d1c802bSDavid Ahern 		nh->fib6_info = NULL;
42686b9ea5a6SRoopa Prabhu 		if (err) {
42696b9ea5a6SRoopa Prabhu 			if (replace && nhn)
42706b9ea5a6SRoopa Prabhu 				ip6_print_replace_route_err(&rt6_nh_list);
42716b9ea5a6SRoopa Prabhu 			err_nh = nh;
42726b9ea5a6SRoopa Prabhu 			goto add_errout;
42736b9ea5a6SRoopa Prabhu 		}
42746b9ea5a6SRoopa Prabhu 
42751a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
427627596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
427727596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
427827596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
427927596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
428027596472SMichal Kubeček 		 * be added to it.
42811a72418bSNicolas Dichtel 		 */
428227596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
428327596472SMichal Kubeček 						     NLM_F_REPLACE);
42846b9ea5a6SRoopa Prabhu 		nhn++;
42856b9ea5a6SRoopa Prabhu 	}
42866b9ea5a6SRoopa Prabhu 
42873b1137feSDavid Ahern 	/* success ... tell user about new route */
42883b1137feSDavid Ahern 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
42896b9ea5a6SRoopa Prabhu 	goto cleanup;
42906b9ea5a6SRoopa Prabhu 
42916b9ea5a6SRoopa Prabhu add_errout:
42923b1137feSDavid Ahern 	/* send notification for routes that were added so that
42933b1137feSDavid Ahern 	 * the delete notifications sent by ip6_route_del are
42943b1137feSDavid Ahern 	 * coherent
42953b1137feSDavid Ahern 	 */
42963b1137feSDavid Ahern 	if (rt_notif)
42973b1137feSDavid Ahern 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
42983b1137feSDavid Ahern 
42996b9ea5a6SRoopa Prabhu 	/* Delete routes that were already added */
43006b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
43016b9ea5a6SRoopa Prabhu 		if (err_nh == nh)
43026b9ea5a6SRoopa Prabhu 			break;
4303333c4301SDavid Ahern 		ip6_route_del(&nh->r_cfg, extack);
43046b9ea5a6SRoopa Prabhu 	}
43056b9ea5a6SRoopa Prabhu 
43066b9ea5a6SRoopa Prabhu cleanup:
43076b9ea5a6SRoopa Prabhu 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
43088d1c802bSDavid Ahern 		if (nh->fib6_info)
43098d1c802bSDavid Ahern 			fib6_info_release(nh->fib6_info);
43106b9ea5a6SRoopa Prabhu 		list_del(&nh->next);
43116b9ea5a6SRoopa Prabhu 		kfree(nh);
43126b9ea5a6SRoopa Prabhu 	}
43136b9ea5a6SRoopa Prabhu 
43146b9ea5a6SRoopa Prabhu 	return err;
43156b9ea5a6SRoopa Prabhu }
43166b9ea5a6SRoopa Prabhu 
4317333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg,
4318333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
43196b9ea5a6SRoopa Prabhu {
43206b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
43216b9ea5a6SRoopa Prabhu 	struct rtnexthop *rtnh;
43226b9ea5a6SRoopa Prabhu 	int remaining;
43236b9ea5a6SRoopa Prabhu 	int attrlen;
43246b9ea5a6SRoopa Prabhu 	int err = 1, last_err = 0;
43256b9ea5a6SRoopa Prabhu 
43266b9ea5a6SRoopa Prabhu 	remaining = cfg->fc_mp_len;
43276b9ea5a6SRoopa Prabhu 	rtnh = (struct rtnexthop *)cfg->fc_mp;
43286b9ea5a6SRoopa Prabhu 
43296b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry */
43306b9ea5a6SRoopa Prabhu 	while (rtnh_ok(rtnh, remaining)) {
43316b9ea5a6SRoopa Prabhu 		memcpy(&r_cfg, cfg, sizeof(*cfg));
43326b9ea5a6SRoopa Prabhu 		if (rtnh->rtnh_ifindex)
43336b9ea5a6SRoopa Prabhu 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
43346b9ea5a6SRoopa Prabhu 
43356b9ea5a6SRoopa Prabhu 		attrlen = rtnh_attrlen(rtnh);
43366b9ea5a6SRoopa Prabhu 		if (attrlen > 0) {
43376b9ea5a6SRoopa Prabhu 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
43386b9ea5a6SRoopa Prabhu 
43396b9ea5a6SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
43406b9ea5a6SRoopa Prabhu 			if (nla) {
43416b9ea5a6SRoopa Prabhu 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
43426b9ea5a6SRoopa Prabhu 				r_cfg.fc_flags |= RTF_GATEWAY;
43436b9ea5a6SRoopa Prabhu 			}
43446b9ea5a6SRoopa Prabhu 		}
4345333c4301SDavid Ahern 		err = ip6_route_del(&r_cfg, extack);
43466b9ea5a6SRoopa Prabhu 		if (err)
43476b9ea5a6SRoopa Prabhu 			last_err = err;
43486b9ea5a6SRoopa Prabhu 
434951ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
435051ebd318SNicolas Dichtel 	}
435151ebd318SNicolas Dichtel 
435251ebd318SNicolas Dichtel 	return last_err;
435351ebd318SNicolas Dichtel }
435451ebd318SNicolas Dichtel 
4355c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4356c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
43571da177e4SLinus Torvalds {
435886872cb5SThomas Graf 	struct fib6_config cfg;
435986872cb5SThomas Graf 	int err;
43601da177e4SLinus Torvalds 
4361333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
436286872cb5SThomas Graf 	if (err < 0)
436386872cb5SThomas Graf 		return err;
436486872cb5SThomas Graf 
436551ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4366333c4301SDavid Ahern 		return ip6_route_multipath_del(&cfg, extack);
43670ae81335SDavid Ahern 	else {
43680ae81335SDavid Ahern 		cfg.fc_delete_all_nh = 1;
4369333c4301SDavid Ahern 		return ip6_route_del(&cfg, extack);
43701da177e4SLinus Torvalds 	}
43710ae81335SDavid Ahern }
43721da177e4SLinus Torvalds 
4373c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4374c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
43751da177e4SLinus Torvalds {
437686872cb5SThomas Graf 	struct fib6_config cfg;
437786872cb5SThomas Graf 	int err;
43781da177e4SLinus Torvalds 
4379333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
438086872cb5SThomas Graf 	if (err < 0)
438186872cb5SThomas Graf 		return err;
438286872cb5SThomas Graf 
438351ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4384333c4301SDavid Ahern 		return ip6_route_multipath_add(&cfg, extack);
438551ebd318SNicolas Dichtel 	else
4386acb54e3cSDavid Ahern 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
43871da177e4SLinus Torvalds }
43881da177e4SLinus Torvalds 
43898d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt)
4390339bf98fSThomas Graf {
4391beb1afacSDavid Ahern 	int nexthop_len = 0;
4392beb1afacSDavid Ahern 
439393c2fb25SDavid Ahern 	if (rt->fib6_nsiblings) {
4394beb1afacSDavid Ahern 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4395beb1afacSDavid Ahern 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4396beb1afacSDavid Ahern 			    + nla_total_size(16) /* RTA_GATEWAY */
43975e670d84SDavid Ahern 			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4398beb1afacSDavid Ahern 
439993c2fb25SDavid Ahern 		nexthop_len *= rt->fib6_nsiblings;
4400beb1afacSDavid Ahern 	}
4401beb1afacSDavid Ahern 
4402339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4403339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
4404339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
4405339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
4406339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
4407339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
4408339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
4409339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
4410339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
44116a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4412ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4413c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
441419e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
44155e670d84SDavid Ahern 	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4416beb1afacSDavid Ahern 	       + nexthop_len;
4417beb1afacSDavid Ahern }
4418beb1afacSDavid Ahern 
44198d1c802bSDavid Ahern static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
44205be083ceSDavid Ahern 			    unsigned int *flags, bool skip_oif)
4421beb1afacSDavid Ahern {
44225e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4423f9d882eaSIdo Schimmel 		*flags |= RTNH_F_DEAD;
4424f9d882eaSIdo Schimmel 
44255e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4426beb1afacSDavid Ahern 		*flags |= RTNH_F_LINKDOWN;
442793c2fb25SDavid Ahern 		if (rt->fib6_idev->cnf.ignore_routes_with_linkdown)
4428beb1afacSDavid Ahern 			*flags |= RTNH_F_DEAD;
4429beb1afacSDavid Ahern 	}
4430beb1afacSDavid Ahern 
443193c2fb25SDavid Ahern 	if (rt->fib6_flags & RTF_GATEWAY) {
44325e670d84SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4433beb1afacSDavid Ahern 			goto nla_put_failure;
4434beb1afacSDavid Ahern 	}
4435beb1afacSDavid Ahern 
44365e670d84SDavid Ahern 	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
44375e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
443861e4d01eSIdo Schimmel 		*flags |= RTNH_F_OFFLOAD;
443961e4d01eSIdo Schimmel 
44405be083ceSDavid Ahern 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
44415e670d84SDavid Ahern 	if (!skip_oif && rt->fib6_nh.nh_dev &&
44425e670d84SDavid Ahern 	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4443beb1afacSDavid Ahern 		goto nla_put_failure;
4444beb1afacSDavid Ahern 
44455e670d84SDavid Ahern 	if (rt->fib6_nh.nh_lwtstate &&
44465e670d84SDavid Ahern 	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4447beb1afacSDavid Ahern 		goto nla_put_failure;
4448beb1afacSDavid Ahern 
4449beb1afacSDavid Ahern 	return 0;
4450beb1afacSDavid Ahern 
4451beb1afacSDavid Ahern nla_put_failure:
4452beb1afacSDavid Ahern 	return -EMSGSIZE;
4453beb1afacSDavid Ahern }
4454beb1afacSDavid Ahern 
44555be083ceSDavid Ahern /* add multipath next hop */
44568d1c802bSDavid Ahern static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
4457beb1afacSDavid Ahern {
44585e670d84SDavid Ahern 	const struct net_device *dev = rt->fib6_nh.nh_dev;
4459beb1afacSDavid Ahern 	struct rtnexthop *rtnh;
4460beb1afacSDavid Ahern 	unsigned int flags = 0;
4461beb1afacSDavid Ahern 
4462beb1afacSDavid Ahern 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4463beb1afacSDavid Ahern 	if (!rtnh)
4464beb1afacSDavid Ahern 		goto nla_put_failure;
4465beb1afacSDavid Ahern 
44665e670d84SDavid Ahern 	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
44675e670d84SDavid Ahern 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4468beb1afacSDavid Ahern 
44695be083ceSDavid Ahern 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4470beb1afacSDavid Ahern 		goto nla_put_failure;
4471beb1afacSDavid Ahern 
4472beb1afacSDavid Ahern 	rtnh->rtnh_flags = flags;
4473beb1afacSDavid Ahern 
4474beb1afacSDavid Ahern 	/* length of rtnetlink header + attributes */
4475beb1afacSDavid Ahern 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4476beb1afacSDavid Ahern 
4477beb1afacSDavid Ahern 	return 0;
4478beb1afacSDavid Ahern 
4479beb1afacSDavid Ahern nla_put_failure:
4480beb1afacSDavid Ahern 	return -EMSGSIZE;
4481339bf98fSThomas Graf }
4482339bf98fSThomas Graf 
4483d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
44848d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
4485d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
448615e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
4487f8cfe2ceSDavid Ahern 			 unsigned int flags)
44881da177e4SLinus Torvalds {
44891da177e4SLinus Torvalds 	struct rtmsg *rtm;
44901da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
4491d4ead6b3SDavid Ahern 	long expires = 0;
4492d4ead6b3SDavid Ahern 	u32 *pmetrics;
44939e762a4aSPatrick McHardy 	u32 table;
44941da177e4SLinus Torvalds 
449515e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
449638308473SDavid S. Miller 	if (!nlh)
449726932566SPatrick McHardy 		return -EMSGSIZE;
44982d7202bfSThomas Graf 
44992d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
45001da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
450193c2fb25SDavid Ahern 	rtm->rtm_dst_len = rt->fib6_dst.plen;
450293c2fb25SDavid Ahern 	rtm->rtm_src_len = rt->fib6_src.plen;
45031da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
450493c2fb25SDavid Ahern 	if (rt->fib6_table)
450593c2fb25SDavid Ahern 		table = rt->fib6_table->tb6_id;
4506c71099acSThomas Graf 	else
45079e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
45089e762a4aSPatrick McHardy 	rtm->rtm_table = table;
4509c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
4510c78679e8SDavid S. Miller 		goto nla_put_failure;
4511e8478e80SDavid Ahern 
4512e8478e80SDavid Ahern 	rtm->rtm_type = rt->fib6_type;
45131da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
45141da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
451593c2fb25SDavid Ahern 	rtm->rtm_protocol = rt->fib6_protocol;
45161da177e4SLinus Torvalds 
451793c2fb25SDavid Ahern 	if (rt->fib6_flags & RTF_CACHE)
45181da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
45191da177e4SLinus Torvalds 
4520d4ead6b3SDavid Ahern 	if (dest) {
4521d4ead6b3SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4522c78679e8SDavid S. Miller 			goto nla_put_failure;
45231da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
45241da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
452593c2fb25SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
4526c78679e8SDavid S. Miller 			goto nla_put_failure;
45271da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
45281da177e4SLinus Torvalds 	if (src) {
4529930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4530c78679e8SDavid S. Miller 			goto nla_put_failure;
45311da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
4532c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
453393c2fb25SDavid Ahern 		   nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
4534c78679e8SDavid S. Miller 		goto nla_put_failure;
45351da177e4SLinus Torvalds #endif
45367bc570c8SYOSHIFUJI Hideaki 	if (iif) {
45377bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
453893c2fb25SDavid Ahern 		if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
4539fd61c6baSDavid Ahern 			int err = ip6mr_get_route(net, skb, rtm, portid);
45402cf75070SNikolay Aleksandrov 
45417bc570c8SYOSHIFUJI Hideaki 			if (err == 0)
45427bc570c8SYOSHIFUJI Hideaki 				return 0;
4543fd61c6baSDavid Ahern 			if (err < 0)
45447bc570c8SYOSHIFUJI Hideaki 				goto nla_put_failure;
45457bc570c8SYOSHIFUJI Hideaki 		} else
45467bc570c8SYOSHIFUJI Hideaki #endif
4547c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
4548c78679e8SDavid S. Miller 				goto nla_put_failure;
4549d4ead6b3SDavid Ahern 	} else if (dest) {
45501da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
4551d4ead6b3SDavid Ahern 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4552930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4553c78679e8SDavid S. Miller 			goto nla_put_failure;
4554c3968a85SDaniel Walter 	}
4555c3968a85SDaniel Walter 
455693c2fb25SDavid Ahern 	if (rt->fib6_prefsrc.plen) {
4557c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
455893c2fb25SDavid Ahern 		saddr_buf = rt->fib6_prefsrc.addr;
4559930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4560c78679e8SDavid S. Miller 			goto nla_put_failure;
45611da177e4SLinus Torvalds 	}
45622d7202bfSThomas Graf 
4563d4ead6b3SDavid Ahern 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4564d4ead6b3SDavid Ahern 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
45652d7202bfSThomas Graf 		goto nla_put_failure;
45662d7202bfSThomas Graf 
456793c2fb25SDavid Ahern 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4568beb1afacSDavid Ahern 		goto nla_put_failure;
4569beb1afacSDavid Ahern 
4570beb1afacSDavid Ahern 	/* For multipath routes, walk the siblings list and add
4571beb1afacSDavid Ahern 	 * each as a nexthop within RTA_MULTIPATH.
4572beb1afacSDavid Ahern 	 */
457393c2fb25SDavid Ahern 	if (rt->fib6_nsiblings) {
45748d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
4575beb1afacSDavid Ahern 		struct nlattr *mp;
4576beb1afacSDavid Ahern 
4577beb1afacSDavid Ahern 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4578beb1afacSDavid Ahern 		if (!mp)
4579beb1afacSDavid Ahern 			goto nla_put_failure;
4580beb1afacSDavid Ahern 
4581beb1afacSDavid Ahern 		if (rt6_add_nexthop(skb, rt) < 0)
4582beb1afacSDavid Ahern 			goto nla_put_failure;
4583beb1afacSDavid Ahern 
4584beb1afacSDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
458593c2fb25SDavid Ahern 					 &rt->fib6_siblings, fib6_siblings) {
4586beb1afacSDavid Ahern 			if (rt6_add_nexthop(skb, sibling) < 0)
458794f826b8SEric Dumazet 				goto nla_put_failure;
458894f826b8SEric Dumazet 		}
45892d7202bfSThomas Graf 
4590beb1afacSDavid Ahern 		nla_nest_end(skb, mp);
4591beb1afacSDavid Ahern 	} else {
45925be083ceSDavid Ahern 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4593c78679e8SDavid S. Miller 			goto nla_put_failure;
4594beb1afacSDavid Ahern 	}
45958253947eSLi Wei 
459693c2fb25SDavid Ahern 	if (rt->fib6_flags & RTF_EXPIRES) {
459714895687SDavid Ahern 		expires = dst ? dst->expires : rt->expires;
459814895687SDavid Ahern 		expires -= jiffies;
459914895687SDavid Ahern 	}
460069cdf8f9SYOSHIFUJI Hideaki 
4601d4ead6b3SDavid Ahern 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4602e3703b3dSThomas Graf 		goto nla_put_failure;
46031da177e4SLinus Torvalds 
460493c2fb25SDavid Ahern 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
4605c78ba6d6SLubomir Rintel 		goto nla_put_failure;
4606c78ba6d6SLubomir Rintel 
460719e42e45SRoopa Prabhu 
4608053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
4609053c095aSJohannes Berg 	return 0;
46102d7202bfSThomas Graf 
46112d7202bfSThomas Graf nla_put_failure:
461226932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
461326932566SPatrick McHardy 	return -EMSGSIZE;
46141da177e4SLinus Torvalds }
46151da177e4SLinus Torvalds 
46168d1c802bSDavid Ahern int rt6_dump_route(struct fib6_info *rt, void *p_arg)
46171da177e4SLinus Torvalds {
46181da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
46191f17e2f2SDavid Ahern 	struct net *net = arg->net;
46201f17e2f2SDavid Ahern 
4621421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
46221f17e2f2SDavid Ahern 		return 0;
46231da177e4SLinus Torvalds 
46242d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
46252d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4626f8cfe2ceSDavid Ahern 
4627f8cfe2ceSDavid Ahern 		/* user wants prefix routes only */
4628f8cfe2ceSDavid Ahern 		if (rtm->rtm_flags & RTM_F_PREFIX &&
462993c2fb25SDavid Ahern 		    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4630f8cfe2ceSDavid Ahern 			/* success since this is not a prefix route */
4631f8cfe2ceSDavid Ahern 			return 1;
4632f8cfe2ceSDavid Ahern 		}
4633f8cfe2ceSDavid Ahern 	}
46341da177e4SLinus Torvalds 
4635d4ead6b3SDavid Ahern 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4636d4ead6b3SDavid Ahern 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4637d4ead6b3SDavid Ahern 			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
46381da177e4SLinus Torvalds }
46391da177e4SLinus Torvalds 
4640c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4641c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
46421da177e4SLinus Torvalds {
46433b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
4644ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
464518c3a61cSRoopa Prabhu 	int err, iif = 0, oif = 0;
464618c3a61cSRoopa Prabhu 	struct dst_entry *dst;
46471da177e4SLinus Torvalds 	struct rt6_info *rt;
4648ab364a6fSThomas Graf 	struct sk_buff *skb;
4649ab364a6fSThomas Graf 	struct rtmsg *rtm;
46504c9483b2SDavid S. Miller 	struct flowi6 fl6;
465118c3a61cSRoopa Prabhu 	bool fibmatch;
4652ab364a6fSThomas Graf 
4653fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4654c21ef3e3SDavid Ahern 			  extack);
4655ab364a6fSThomas Graf 	if (err < 0)
4656ab364a6fSThomas Graf 		goto errout;
4657ab364a6fSThomas Graf 
4658ab364a6fSThomas Graf 	err = -EINVAL;
46594c9483b2SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
466038b7097bSHannes Frederic Sowa 	rtm = nlmsg_data(nlh);
466138b7097bSHannes Frederic Sowa 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
466218c3a61cSRoopa Prabhu 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4663ab364a6fSThomas Graf 
4664ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
4665ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4666ab364a6fSThomas Graf 			goto errout;
4667ab364a6fSThomas Graf 
46684e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4669ab364a6fSThomas Graf 	}
4670ab364a6fSThomas Graf 
4671ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
4672ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4673ab364a6fSThomas Graf 			goto errout;
4674ab364a6fSThomas Graf 
46754e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4676ab364a6fSThomas Graf 	}
4677ab364a6fSThomas Graf 
4678ab364a6fSThomas Graf 	if (tb[RTA_IIF])
4679ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
4680ab364a6fSThomas Graf 
4681ab364a6fSThomas Graf 	if (tb[RTA_OIF])
468272331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
4683ab364a6fSThomas Graf 
46842e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
46852e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
46862e47b291SLorenzo Colitti 
4687622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
4688622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4689622ec2c9SLorenzo Colitti 					   nla_get_u32(tb[RTA_UID]));
4690622ec2c9SLorenzo Colitti 	else
4691622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4692622ec2c9SLorenzo Colitti 
4693ab364a6fSThomas Graf 	if (iif) {
4694ab364a6fSThomas Graf 		struct net_device *dev;
469572331bc0SShmulik Ladkani 		int flags = 0;
469672331bc0SShmulik Ladkani 
4697121622dbSFlorian Westphal 		rcu_read_lock();
4698121622dbSFlorian Westphal 
4699121622dbSFlorian Westphal 		dev = dev_get_by_index_rcu(net, iif);
4700ab364a6fSThomas Graf 		if (!dev) {
4701121622dbSFlorian Westphal 			rcu_read_unlock();
4702ab364a6fSThomas Graf 			err = -ENODEV;
4703ab364a6fSThomas Graf 			goto errout;
4704ab364a6fSThomas Graf 		}
470572331bc0SShmulik Ladkani 
470672331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
470772331bc0SShmulik Ladkani 
470872331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
470972331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
471072331bc0SShmulik Ladkani 
4711b75cc8f9SDavid Ahern 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4712121622dbSFlorian Westphal 
4713121622dbSFlorian Westphal 		rcu_read_unlock();
471472331bc0SShmulik Ladkani 	} else {
471572331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
471672331bc0SShmulik Ladkani 
471718c3a61cSRoopa Prabhu 		dst = ip6_route_output(net, NULL, &fl6);
471818c3a61cSRoopa Prabhu 	}
471918c3a61cSRoopa Prabhu 
472018c3a61cSRoopa Prabhu 
472118c3a61cSRoopa Prabhu 	rt = container_of(dst, struct rt6_info, dst);
472218c3a61cSRoopa Prabhu 	if (rt->dst.error) {
472318c3a61cSRoopa Prabhu 		err = rt->dst.error;
472418c3a61cSRoopa Prabhu 		ip6_rt_put(rt);
472518c3a61cSRoopa Prabhu 		goto errout;
4726ab364a6fSThomas Graf 	}
47271da177e4SLinus Torvalds 
47289d6acb3bSWANG Cong 	if (rt == net->ipv6.ip6_null_entry) {
47299d6acb3bSWANG Cong 		err = rt->dst.error;
47309d6acb3bSWANG Cong 		ip6_rt_put(rt);
47319d6acb3bSWANG Cong 		goto errout;
47329d6acb3bSWANG Cong 	}
47339d6acb3bSWANG Cong 
47341da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
473538308473SDavid S. Miller 	if (!skb) {
473694e187c0SAmerigo Wang 		ip6_rt_put(rt);
4737ab364a6fSThomas Graf 		err = -ENOBUFS;
4738ab364a6fSThomas Graf 		goto errout;
4739ab364a6fSThomas Graf 	}
47401da177e4SLinus Torvalds 
4741d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
474218c3a61cSRoopa Prabhu 	if (fibmatch)
474393531c67SDavid Ahern 		err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif,
474418c3a61cSRoopa Prabhu 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
474518c3a61cSRoopa Prabhu 				    nlh->nlmsg_seq, 0);
474618c3a61cSRoopa Prabhu 	else
474793531c67SDavid Ahern 		err = rt6_fill_node(net, skb, rt->from, dst,
474893531c67SDavid Ahern 				    &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE,
4749d4ead6b3SDavid Ahern 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4750d4ead6b3SDavid Ahern 				    0);
47511da177e4SLinus Torvalds 	if (err < 0) {
4752ab364a6fSThomas Graf 		kfree_skb(skb);
4753ab364a6fSThomas Graf 		goto errout;
47541da177e4SLinus Torvalds 	}
47551da177e4SLinus Torvalds 
475615e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4757ab364a6fSThomas Graf errout:
47581da177e4SLinus Torvalds 	return err;
47591da177e4SLinus Torvalds }
47601da177e4SLinus Torvalds 
47618d1c802bSDavid Ahern void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
476237a1d361SRoopa Prabhu 		     unsigned int nlm_flags)
47631da177e4SLinus Torvalds {
47641da177e4SLinus Torvalds 	struct sk_buff *skb;
47655578689aSDaniel Lezcano 	struct net *net = info->nl_net;
4766528c4cebSDenis V. Lunev 	u32 seq;
4767528c4cebSDenis V. Lunev 	int err;
47680d51aa80SJamal Hadi Salim 
4769528c4cebSDenis V. Lunev 	err = -ENOBUFS;
477038308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
477186872cb5SThomas Graf 
477219e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
477338308473SDavid S. Miller 	if (!skb)
477421713ebcSThomas Graf 		goto errout;
47751da177e4SLinus Torvalds 
4776d4ead6b3SDavid Ahern 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4777f8cfe2ceSDavid Ahern 			    event, info->portid, seq, nlm_flags);
477826932566SPatrick McHardy 	if (err < 0) {
477926932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
478026932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
478126932566SPatrick McHardy 		kfree_skb(skb);
478226932566SPatrick McHardy 		goto errout;
478326932566SPatrick McHardy 	}
478415e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
47855578689aSDaniel Lezcano 		    info->nlh, gfp_any());
47861ce85fe4SPablo Neira Ayuso 	return;
478721713ebcSThomas Graf errout:
478821713ebcSThomas Graf 	if (err < 0)
47895578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
47901da177e4SLinus Torvalds }
47911da177e4SLinus Torvalds 
47928ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
4793351638e7SJiri Pirko 				unsigned long event, void *ptr)
47948ed67789SDaniel Lezcano {
4795351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4796c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
47978ed67789SDaniel Lezcano 
4798242d3a49SWANG Cong 	if (!(dev->flags & IFF_LOOPBACK))
4799242d3a49SWANG Cong 		return NOTIFY_OK;
4800242d3a49SWANG Cong 
4801242d3a49SWANG Cong 	if (event == NETDEV_REGISTER) {
4802421842edSDavid Ahern 		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
480393c2fb25SDavid Ahern 		net->ipv6.fib6_null_entry->fib6_idev = in6_dev_get(dev);
4804d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
48058ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
48068ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4807d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
48088ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4809d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
48108ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
48118ed67789SDaniel Lezcano #endif
481276da0704SWANG Cong 	 } else if (event == NETDEV_UNREGISTER &&
481376da0704SWANG Cong 		    dev->reg_state != NETREG_UNREGISTERED) {
481476da0704SWANG Cong 		/* NETDEV_UNREGISTER could be fired for multiple times by
481576da0704SWANG Cong 		 * netdev_wait_allrefs(). Make sure we only call this once.
481676da0704SWANG Cong 		 */
481793c2fb25SDavid Ahern 		in6_dev_put_clear(&net->ipv6.fib6_null_entry->fib6_idev);
481812d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4819242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES
482012d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
482112d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4822242d3a49SWANG Cong #endif
48238ed67789SDaniel Lezcano 	}
48248ed67789SDaniel Lezcano 
48258ed67789SDaniel Lezcano 	return NOTIFY_OK;
48268ed67789SDaniel Lezcano }
48278ed67789SDaniel Lezcano 
48281da177e4SLinus Torvalds /*
48291da177e4SLinus Torvalds  *	/proc
48301da177e4SLinus Torvalds  */
48311da177e4SLinus Torvalds 
48321da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
48331da177e4SLinus Torvalds 
483433120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
483533120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
483633120b30SAlexey Dobriyan 	.read		= seq_read,
483733120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
48388d2ca1d7SHannes Frederic Sowa 	.release	= seq_release_net,
483933120b30SAlexey Dobriyan };
484033120b30SAlexey Dobriyan 
48411da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
48421da177e4SLinus Torvalds {
484369ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
48441da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
484569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
484669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
484781eb8447SWei Wang 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
484869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
484969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
4850fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
485169ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
48521da177e4SLinus Torvalds 
48531da177e4SLinus Torvalds 	return 0;
48541da177e4SLinus Torvalds }
48551da177e4SLinus Torvalds 
48561da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
48571da177e4SLinus Torvalds {
4858de05c557SPavel Emelyanov 	return single_open_net(inode, file, rt6_stats_seq_show);
485969ddb805SDaniel Lezcano }
486069ddb805SDaniel Lezcano 
48619a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
48621da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
48631da177e4SLinus Torvalds 	.read	 = seq_read,
48641da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
4865b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
48661da177e4SLinus Torvalds };
48671da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
48681da177e4SLinus Torvalds 
48691da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
48701da177e4SLinus Torvalds 
48711da177e4SLinus Torvalds static
4872fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
48731da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
48741da177e4SLinus Torvalds {
4875c486da34SLucian Adrian Grijincu 	struct net *net;
4876c486da34SLucian Adrian Grijincu 	int delay;
4877c486da34SLucian Adrian Grijincu 	if (!write)
4878c486da34SLucian Adrian Grijincu 		return -EINVAL;
4879c486da34SLucian Adrian Grijincu 
4880c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
4881c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
48828d65af78SAlexey Dobriyan 	proc_dointvec(ctl, write, buffer, lenp, ppos);
48832ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
48841da177e4SLinus Torvalds 	return 0;
48851da177e4SLinus Torvalds }
48861da177e4SLinus Torvalds 
4887fe2c6338SJoe Perches struct ctl_table ipv6_route_table_template[] = {
48881da177e4SLinus Torvalds 	{
48891da177e4SLinus Torvalds 		.procname	=	"flush",
48904990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
48911da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
489289c8b3a1SDave Jones 		.mode		=	0200,
48936d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
48941da177e4SLinus Torvalds 	},
48951da177e4SLinus Torvalds 	{
48961da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
48979a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
48981da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
48991da177e4SLinus Torvalds 		.mode		=	0644,
49006d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
49011da177e4SLinus Torvalds 	},
49021da177e4SLinus Torvalds 	{
49031da177e4SLinus Torvalds 		.procname	=	"max_size",
49044990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
49051da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49061da177e4SLinus Torvalds 		.mode		=	0644,
49076d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
49081da177e4SLinus Torvalds 	},
49091da177e4SLinus Torvalds 	{
49101da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
49114990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
49121da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49131da177e4SLinus Torvalds 		.mode		=	0644,
49146d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49151da177e4SLinus Torvalds 	},
49161da177e4SLinus Torvalds 	{
49171da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
49184990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
49191da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49201da177e4SLinus Torvalds 		.mode		=	0644,
49216d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49221da177e4SLinus Torvalds 	},
49231da177e4SLinus Torvalds 	{
49241da177e4SLinus Torvalds 		.procname	=	"gc_interval",
49254990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
49261da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49271da177e4SLinus Torvalds 		.mode		=	0644,
49286d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49291da177e4SLinus Torvalds 	},
49301da177e4SLinus Torvalds 	{
49311da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
49324990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
49331da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49341da177e4SLinus Torvalds 		.mode		=	0644,
4935f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
49361da177e4SLinus Torvalds 	},
49371da177e4SLinus Torvalds 	{
49381da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
49394990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
49401da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49411da177e4SLinus Torvalds 		.mode		=	0644,
49426d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49431da177e4SLinus Torvalds 	},
49441da177e4SLinus Torvalds 	{
49451da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
49464990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
49471da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49481da177e4SLinus Torvalds 		.mode		=	0644,
4949f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
49501da177e4SLinus Torvalds 	},
49511da177e4SLinus Torvalds 	{
49521da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
49534990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
49541da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49551da177e4SLinus Torvalds 		.mode		=	0644,
49566d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
49571da177e4SLinus Torvalds 	},
4958f8572d8fSEric W. Biederman 	{ }
49591da177e4SLinus Torvalds };
49601da177e4SLinus Torvalds 
49612c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
4962760f2d01SDaniel Lezcano {
4963760f2d01SDaniel Lezcano 	struct ctl_table *table;
4964760f2d01SDaniel Lezcano 
4965760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
4966760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
4967760f2d01SDaniel Lezcano 			GFP_KERNEL);
49685ee09105SYOSHIFUJI Hideaki 
49695ee09105SYOSHIFUJI Hideaki 	if (table) {
49705ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
4971c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
497286393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
49735ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
49745ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
49755ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
49765ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
49775ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
49785ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
49795ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
49809c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4981464dc801SEric W. Biederman 
4982464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
4983464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
4984464dc801SEric W. Biederman 			table[0].procname = NULL;
49855ee09105SYOSHIFUJI Hideaki 	}
49865ee09105SYOSHIFUJI Hideaki 
4987760f2d01SDaniel Lezcano 	return table;
4988760f2d01SDaniel Lezcano }
49891da177e4SLinus Torvalds #endif
49901da177e4SLinus Torvalds 
49912c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
4992cdb18761SDaniel Lezcano {
4993633d424bSPavel Emelyanov 	int ret = -ENOMEM;
49948ed67789SDaniel Lezcano 
499586393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
499686393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
4997f2fc6a54SBenjamin Thery 
4998fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
4999fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
5000fc66f95cSEric Dumazet 
5001421842edSDavid Ahern 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5002421842edSDavid Ahern 					    sizeof(*net->ipv6.fib6_null_entry),
5003421842edSDavid Ahern 					    GFP_KERNEL);
5004421842edSDavid Ahern 	if (!net->ipv6.fib6_null_entry)
5005421842edSDavid Ahern 		goto out_ip6_dst_entries;
5006421842edSDavid Ahern 
50078ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
50088ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
50098ed67789SDaniel Lezcano 					   GFP_KERNEL);
50108ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
5011421842edSDavid Ahern 		goto out_fib6_null_entry;
5012d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
501362fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
501462fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
50158ed67789SDaniel Lezcano 
50168ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5017feca7d8cSVincent Bernat 	net->ipv6.fib6_has_custom_rules = false;
50188ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
50198ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
50208ed67789SDaniel Lezcano 					       GFP_KERNEL);
502168fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
502268fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
5023d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
502462fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
502562fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
50268ed67789SDaniel Lezcano 
50278ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
50288ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
50298ed67789SDaniel Lezcano 					       GFP_KERNEL);
503068fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
503168fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
5032d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
503362fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
503462fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
50358ed67789SDaniel Lezcano #endif
50368ed67789SDaniel Lezcano 
5037b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
5038b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5039b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5040b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5041b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5042b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5043b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5044b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5045b339a47cSPeter Zijlstra 
50466891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
50476891a346SBenjamin Thery 
50488ed67789SDaniel Lezcano 	ret = 0;
50498ed67789SDaniel Lezcano out:
50508ed67789SDaniel Lezcano 	return ret;
5051f2fc6a54SBenjamin Thery 
505268fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
505368fffc67SPeter Zijlstra out_ip6_prohibit_entry:
505468fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
505568fffc67SPeter Zijlstra out_ip6_null_entry:
505668fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
505768fffc67SPeter Zijlstra #endif
5058421842edSDavid Ahern out_fib6_null_entry:
5059421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
5060fc66f95cSEric Dumazet out_ip6_dst_entries:
5061fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5062f2fc6a54SBenjamin Thery out_ip6_dst_ops:
5063f2fc6a54SBenjamin Thery 	goto out;
5064cdb18761SDaniel Lezcano }
5065cdb18761SDaniel Lezcano 
50662c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
5067cdb18761SDaniel Lezcano {
5068421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
50698ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
50708ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
50718ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
50728ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
50738ed67789SDaniel Lezcano #endif
507441bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5075cdb18761SDaniel Lezcano }
5076cdb18761SDaniel Lezcano 
5077d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
5078d189634eSThomas Graf {
5079d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5080d4beaa66SGao feng 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5081d6444062SJoe Perches 	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
5082d189634eSThomas Graf #endif
5083d189634eSThomas Graf 	return 0;
5084d189634eSThomas Graf }
5085d189634eSThomas Graf 
5086d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
5087d189634eSThomas Graf {
5088d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5089ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
5090ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
5091d189634eSThomas Graf #endif
5092d189634eSThomas Graf }
5093d189634eSThomas Graf 
5094cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
5095cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
5096cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
5097cdb18761SDaniel Lezcano };
5098cdb18761SDaniel Lezcano 
5099c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
5100c3426b47SDavid S. Miller {
5101c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5102c3426b47SDavid S. Miller 
5103c3426b47SDavid S. Miller 	if (!bp)
5104c3426b47SDavid S. Miller 		return -ENOMEM;
5105c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
5106c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
5107c3426b47SDavid S. Miller 	return 0;
5108c3426b47SDavid S. Miller }
5109c3426b47SDavid S. Miller 
5110c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
5111c3426b47SDavid S. Miller {
5112c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
5113c3426b47SDavid S. Miller 
5114c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
511556a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
5116c3426b47SDavid S. Miller 	kfree(bp);
5117c3426b47SDavid S. Miller }
5118c3426b47SDavid S. Miller 
51192b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
5120c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
5121c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
5122c3426b47SDavid S. Miller };
5123c3426b47SDavid S. Miller 
5124d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
5125d189634eSThomas Graf 	.init = ip6_route_net_init_late,
5126d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
5127d189634eSThomas Graf };
5128d189634eSThomas Graf 
51298ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
51308ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
5131242d3a49SWANG Cong 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
51328ed67789SDaniel Lezcano };
51338ed67789SDaniel Lezcano 
51342f460933SWANG Cong void __init ip6_route_init_special_entries(void)
51352f460933SWANG Cong {
51362f460933SWANG Cong 	/* Registering of the loopback is done before this portion of code,
51372f460933SWANG Cong 	 * the loopback reference in rt6_info will not be taken, do it
51382f460933SWANG Cong 	 * manually for init_net */
5139421842edSDavid Ahern 	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
514093c2fb25SDavid Ahern 	init_net.ipv6.fib6_null_entry->fib6_idev = in6_dev_get(init_net.loopback_dev);
51412f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
51422f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
51432f460933SWANG Cong   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
51442f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
51452f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
51462f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
51472f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
51482f460933SWANG Cong   #endif
51492f460933SWANG Cong }
51502f460933SWANG Cong 
5151433d49c3SDaniel Lezcano int __init ip6_route_init(void)
51521da177e4SLinus Torvalds {
5153433d49c3SDaniel Lezcano 	int ret;
51548d0b94afSMartin KaFai Lau 	int cpu;
5155433d49c3SDaniel Lezcano 
51569a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
51579a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
51589a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
51599a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
51609a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
5161c19a28e1SFernando Carrijo 		goto out;
516214e50e57SDavid S. Miller 
5163fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
51648ed67789SDaniel Lezcano 	if (ret)
5165bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
5166bdb3289fSDaniel Lezcano 
5167c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5168c3426b47SDavid S. Miller 	if (ret)
5169e8803b6cSDavid S. Miller 		goto out_dst_entries;
51702a0c451aSThomas Graf 
51717e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
51727e52b33bSDavid S. Miller 	if (ret)
51737e52b33bSDavid S. Miller 		goto out_register_inetpeer;
5174c3426b47SDavid S. Miller 
51755dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
51765dc121e9SArnaud Ebalard 
5177e8803b6cSDavid S. Miller 	ret = fib6_init();
5178433d49c3SDaniel Lezcano 	if (ret)
51798ed67789SDaniel Lezcano 		goto out_register_subsys;
5180433d49c3SDaniel Lezcano 
5181433d49c3SDaniel Lezcano 	ret = xfrm6_init();
5182433d49c3SDaniel Lezcano 	if (ret)
5183e8803b6cSDavid S. Miller 		goto out_fib6_init;
5184c35b7e72SDaniel Lezcano 
5185433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
5186433d49c3SDaniel Lezcano 	if (ret)
5187433d49c3SDaniel Lezcano 		goto xfrm6_init;
51887e5449c2SDaniel Lezcano 
5189d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5190d189634eSThomas Graf 	if (ret)
5191d189634eSThomas Graf 		goto fib6_rules_init;
5192d189634eSThomas Graf 
519316feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
519416feebcfSFlorian Westphal 				   inet6_rtm_newroute, NULL, 0);
519516feebcfSFlorian Westphal 	if (ret < 0)
519616feebcfSFlorian Westphal 		goto out_register_late_subsys;
519716feebcfSFlorian Westphal 
519816feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
519916feebcfSFlorian Westphal 				   inet6_rtm_delroute, NULL, 0);
520016feebcfSFlorian Westphal 	if (ret < 0)
520116feebcfSFlorian Westphal 		goto out_register_late_subsys;
520216feebcfSFlorian Westphal 
520316feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
520416feebcfSFlorian Westphal 				   inet6_rtm_getroute, NULL,
520516feebcfSFlorian Westphal 				   RTNL_FLAG_DOIT_UNLOCKED);
520616feebcfSFlorian Westphal 	if (ret < 0)
5207d189634eSThomas Graf 		goto out_register_late_subsys;
5208433d49c3SDaniel Lezcano 
52098ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5210cdb18761SDaniel Lezcano 	if (ret)
5211d189634eSThomas Graf 		goto out_register_late_subsys;
52128ed67789SDaniel Lezcano 
52138d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
52148d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
52158d0b94afSMartin KaFai Lau 
52168d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
52178d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
52188d0b94afSMartin KaFai Lau 	}
52198d0b94afSMartin KaFai Lau 
5220433d49c3SDaniel Lezcano out:
5221433d49c3SDaniel Lezcano 	return ret;
5222433d49c3SDaniel Lezcano 
5223d189634eSThomas Graf out_register_late_subsys:
522416feebcfSFlorian Westphal 	rtnl_unregister_all(PF_INET6);
5225d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5226433d49c3SDaniel Lezcano fib6_rules_init:
5227433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
5228433d49c3SDaniel Lezcano xfrm6_init:
5229433d49c3SDaniel Lezcano 	xfrm6_fini();
52302a0c451aSThomas Graf out_fib6_init:
52312a0c451aSThomas Graf 	fib6_gc_cleanup();
52328ed67789SDaniel Lezcano out_register_subsys:
52338ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
52347e52b33bSDavid S. Miller out_register_inetpeer:
52357e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5236fc66f95cSEric Dumazet out_dst_entries:
5237fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5238433d49c3SDaniel Lezcano out_kmem_cache:
5239f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5240433d49c3SDaniel Lezcano 	goto out;
52411da177e4SLinus Torvalds }
52421da177e4SLinus Torvalds 
52431da177e4SLinus Torvalds void ip6_route_cleanup(void)
52441da177e4SLinus Torvalds {
52458ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5246d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5247101367c2SThomas Graf 	fib6_rules_cleanup();
52481da177e4SLinus Torvalds 	xfrm6_fini();
52491da177e4SLinus Torvalds 	fib6_gc_cleanup();
5250c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
52518ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
525241bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5253f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
52541da177e4SLinus Torvalds }
5255