xref: /openbmc/linux/net/ipv6/route.c (revision ae0be8de9a53cda3505865c11826d8ff0640237c)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
4735732d01SWei Wang #include <linux/jhash.h>
48457c4cbcSEric W. Biederman #include <net/net_namespace.h>
491da177e4SLinus Torvalds #include <net/snmp.h>
501da177e4SLinus Torvalds #include <net/ipv6.h>
511da177e4SLinus Torvalds #include <net/ip6_fib.h>
521da177e4SLinus Torvalds #include <net/ip6_route.h>
531da177e4SLinus Torvalds #include <net/ndisc.h>
541da177e4SLinus Torvalds #include <net/addrconf.h>
551da177e4SLinus Torvalds #include <net/tcp.h>
561da177e4SLinus Torvalds #include <linux/rtnetlink.h>
571da177e4SLinus Torvalds #include <net/dst.h>
58904af04dSJiri Benc #include <net/dst_metadata.h>
591da177e4SLinus Torvalds #include <net/xfrm.h>
608d71740cSTom Tucker #include <net/netevent.h>
6121713ebcSThomas Graf #include <net/netlink.h>
623c618c1dSDavid Ahern #include <net/rtnh.h>
6319e42e45SRoopa Prabhu #include <net/lwtunnel.h>
64904af04dSJiri Benc #include <net/ip_tunnels.h>
65ca254490SDavid Ahern #include <net/l3mdev.h>
66eacb9384SRoopa Prabhu #include <net/ip.h>
677c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
701da177e4SLinus Torvalds #include <linux/sysctl.h>
711da177e4SLinus Torvalds #endif
721da177e4SLinus Torvalds 
7330d444d3SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type);
7430d444d3SDavid Ahern 
7530d444d3SDavid Ahern #define CREATE_TRACE_POINTS
7630d444d3SDavid Ahern #include <trace/events/fib6.h>
7730d444d3SDavid Ahern EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
7830d444d3SDavid Ahern #undef CREATE_TRACE_POINTS
7930d444d3SDavid Ahern 
80afc154e9SHannes Frederic Sowa enum rt6_nud_state {
817e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
827e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
837e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
84afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
85afc154e9SHannes Frederic Sowa };
86afc154e9SHannes Frederic Sowa 
871da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
880dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
89ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
901da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
911da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
921da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
931da177e4SLinus Torvalds 				       struct net_device *dev, int how);
94569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
951da177e4SLinus Torvalds 
961da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
97ede2059dSEric W. Biederman static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
987150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
99ede2059dSEric W. Biederman static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1001da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
1016700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1026700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
1036700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
1046700c270SDavid S. Miller 					struct sk_buff *skb);
105702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
106702cea56SDavid Ahern 			   int strict);
1078d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt);
108d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
1098d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
110d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
11116a16cd3SDavid Ahern 			 int iif, int type, u32 portid, u32 seq,
11216a16cd3SDavid Ahern 			 unsigned int flags);
1137e4b5128SDavid Ahern static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
11435732d01SWei Wang 					   struct in6_addr *daddr,
11535732d01SWei Wang 					   struct in6_addr *saddr);
1161da177e4SLinus Torvalds 
11770ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
1188d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
119b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
120830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
121830218c1SDavid Ahern 					   struct net_device *dev,
12295c96174SEric Dumazet 					   unsigned int pref);
1238d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
124b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
125830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
126830218c1SDavid Ahern 					   struct net_device *dev);
12770ceb4f5SYOSHIFUJI Hideaki #endif
12870ceb4f5SYOSHIFUJI Hideaki 
1298d0b94afSMartin KaFai Lau struct uncached_list {
1308d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1318d0b94afSMartin KaFai Lau 	struct list_head	head;
1328d0b94afSMartin KaFai Lau };
1338d0b94afSMartin KaFai Lau 
1348d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1358d0b94afSMartin KaFai Lau 
136510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt)
1378d0b94afSMartin KaFai Lau {
1388d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1398d0b94afSMartin KaFai Lau 
1408d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1418d0b94afSMartin KaFai Lau 
1428d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1438d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1448d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1458d0b94afSMartin KaFai Lau }
1468d0b94afSMartin KaFai Lau 
147510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt)
1488d0b94afSMartin KaFai Lau {
1498d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1508d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
15181eb8447SWei Wang 		struct net *net = dev_net(rt->dst.dev);
1528d0b94afSMartin KaFai Lau 
1538d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1548d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
15581eb8447SWei Wang 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
1568d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1578d0b94afSMartin KaFai Lau 	}
1588d0b94afSMartin KaFai Lau }
1598d0b94afSMartin KaFai Lau 
1608d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1618d0b94afSMartin KaFai Lau {
1628d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1638d0b94afSMartin KaFai Lau 	int cpu;
1648d0b94afSMartin KaFai Lau 
165e332bc67SEric W. Biederman 	if (dev == loopback_dev)
166e332bc67SEric W. Biederman 		return;
167e332bc67SEric W. Biederman 
1688d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1698d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1708d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1718d0b94afSMartin KaFai Lau 
1728d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1738d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1748d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1758d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1768d0b94afSMartin KaFai Lau 
177e332bc67SEric W. Biederman 			if (rt_idev->dev == dev) {
1788d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1798d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1808d0b94afSMartin KaFai Lau 			}
1818d0b94afSMartin KaFai Lau 
182e332bc67SEric W. Biederman 			if (rt_dev == dev) {
1838d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1848d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1858d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1868d0b94afSMartin KaFai Lau 			}
1878d0b94afSMartin KaFai Lau 		}
1888d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1898d0b94afSMartin KaFai Lau 	}
1908d0b94afSMartin KaFai Lau }
1918d0b94afSMartin KaFai Lau 
192f8a1b43bSDavid Ahern static inline const void *choose_neigh_daddr(const struct in6_addr *p,
193f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
194f894cbf8SDavid S. Miller 					     const void *daddr)
19539232973SDavid S. Miller {
196a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19739232973SDavid S. Miller 		return (const void *) p;
198f894cbf8SDavid S. Miller 	else if (skb)
199f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
20039232973SDavid S. Miller 	return daddr;
20139232973SDavid S. Miller }
20239232973SDavid S. Miller 
203f8a1b43bSDavid Ahern struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
204f8a1b43bSDavid Ahern 				   struct net_device *dev,
205f894cbf8SDavid S. Miller 				   struct sk_buff *skb,
206f894cbf8SDavid S. Miller 				   const void *daddr)
207d3aaeb38SDavid S. Miller {
20839232973SDavid S. Miller 	struct neighbour *n;
20939232973SDavid S. Miller 
210f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(gw, skb, daddr);
211f8a1b43bSDavid Ahern 	n = __ipv6_neigh_lookup(dev, daddr);
212f83c7790SDavid S. Miller 	if (n)
213f83c7790SDavid S. Miller 		return n;
2147adf3246SStefano Brivio 
2157adf3246SStefano Brivio 	n = neigh_create(&nd_tbl, daddr, dev);
2167adf3246SStefano Brivio 	return IS_ERR(n) ? NULL : n;
217f8a1b43bSDavid Ahern }
218f8a1b43bSDavid Ahern 
219f8a1b43bSDavid Ahern static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
220f8a1b43bSDavid Ahern 					      struct sk_buff *skb,
221f8a1b43bSDavid Ahern 					      const void *daddr)
222f8a1b43bSDavid Ahern {
223f8a1b43bSDavid Ahern 	const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
224f8a1b43bSDavid Ahern 
225f8a1b43bSDavid Ahern 	return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
226f83c7790SDavid S. Miller }
227f83c7790SDavid S. Miller 
22863fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
22963fca65dSJulian Anastasov {
23063fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
23163fca65dSJulian Anastasov 	struct rt6_info *rt = (struct rt6_info *)dst;
23263fca65dSJulian Anastasov 
233f8a1b43bSDavid Ahern 	daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
23463fca65dSJulian Anastasov 	if (!daddr)
23563fca65dSJulian Anastasov 		return;
23663fca65dSJulian Anastasov 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
23763fca65dSJulian Anastasov 		return;
23863fca65dSJulian Anastasov 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
23963fca65dSJulian Anastasov 		return;
24063fca65dSJulian Anastasov 	__ipv6_confirm_neigh(dev, daddr);
24163fca65dSJulian Anastasov }
24263fca65dSJulian Anastasov 
2439a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2441da177e4SLinus Torvalds 	.family			=	AF_INET6,
2451da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2461da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2471da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2480dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
249ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
250d4ead6b3SDavid Ahern 	.cow_metrics		=	dst_cow_metrics_generic,
2511da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2521da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2531da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2541da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2551da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2566e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2579f8955ccSEric W. Biederman 	.local_out		=	__ip6_local_out,
258f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
25963fca65dSJulian Anastasov 	.confirm_neigh		=	ip6_confirm_neigh,
2601da177e4SLinus Torvalds };
2611da177e4SLinus Torvalds 
262ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
263ec831ea7SRoland Dreier {
264618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
265618f9bc7SSteffen Klassert 
266618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
267ec831ea7SRoland Dreier }
268ec831ea7SRoland Dreier 
2696700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2706700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
27114e50e57SDavid S. Miller {
27214e50e57SDavid S. Miller }
27314e50e57SDavid S. Miller 
2746700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2756700c270SDavid S. Miller 				      struct sk_buff *skb)
276b587ee3bSDavid S. Miller {
277b587ee3bSDavid S. Miller }
278b587ee3bSDavid S. Miller 
27914e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
28014e50e57SDavid S. Miller 	.family			=	AF_INET6,
28114e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
28214e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
283ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
284214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
28514e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
286b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2870a1f5962SMartin KaFai Lau 	.cow_metrics		=	dst_cow_metrics_generic,
288f8a1b43bSDavid Ahern 	.neigh_lookup		=	ip6_dst_neigh_lookup,
28914e50e57SDavid S. Miller };
29014e50e57SDavid S. Miller 
29162fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
29214edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
29362fa8a84SDavid S. Miller };
29462fa8a84SDavid S. Miller 
2958d1c802bSDavid Ahern static const struct fib6_info fib6_null_entry_template = {
29693c2fb25SDavid Ahern 	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
29793c2fb25SDavid Ahern 	.fib6_protocol  = RTPROT_KERNEL,
29893c2fb25SDavid Ahern 	.fib6_metric	= ~(u32)0,
299f05713e0SEric Dumazet 	.fib6_ref	= REFCOUNT_INIT(1),
300421842edSDavid Ahern 	.fib6_type	= RTN_UNREACHABLE,
301421842edSDavid Ahern 	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
302421842edSDavid Ahern };
303421842edSDavid Ahern 
304fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
3051da177e4SLinus Torvalds 	.dst = {
3061da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
3071da177e4SLinus Torvalds 		.__use		= 1,
3082c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
3091da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
3101da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
3111da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
3121da177e4SLinus Torvalds 	},
3131da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3141da177e4SLinus Torvalds };
3151da177e4SLinus Torvalds 
316101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
317101367c2SThomas Graf 
318fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
319101367c2SThomas Graf 	.dst = {
320101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
321101367c2SThomas Graf 		.__use		= 1,
3222c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
323101367c2SThomas Graf 		.error		= -EACCES,
3249ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
3259ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
326101367c2SThomas Graf 	},
327101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
328101367c2SThomas Graf };
329101367c2SThomas Graf 
330fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
331101367c2SThomas Graf 	.dst = {
332101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
333101367c2SThomas Graf 		.__use		= 1,
3342c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
335101367c2SThomas Graf 		.error		= -EINVAL,
336352e512cSHerbert Xu 		.input		= dst_discard,
337ede2059dSEric W. Biederman 		.output		= dst_discard_out,
338101367c2SThomas Graf 	},
339101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
340101367c2SThomas Graf };
341101367c2SThomas Graf 
342101367c2SThomas Graf #endif
343101367c2SThomas Graf 
344ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt)
345ebfa45f0SMartin KaFai Lau {
346ebfa45f0SMartin KaFai Lau 	struct dst_entry *dst = &rt->dst;
347ebfa45f0SMartin KaFai Lau 
348ebfa45f0SMartin KaFai Lau 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
349ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_uncached);
350ebfa45f0SMartin KaFai Lau }
351ebfa45f0SMartin KaFai Lau 
3521da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
35393531c67SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
354ad706862SMartin KaFai Lau 			       int flags)
3551da177e4SLinus Torvalds {
35697bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
357b2a9c0edSWei Wang 					1, DST_OBSOLETE_FORCE_CHK, flags);
358cf911662SDavid S. Miller 
35981eb8447SWei Wang 	if (rt) {
360ebfa45f0SMartin KaFai Lau 		rt6_info_init(rt);
36181eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
36281eb8447SWei Wang 	}
3638104891bSSteffen Klassert 
364cf911662SDavid S. Miller 	return rt;
3651da177e4SLinus Torvalds }
3669ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc);
367d52d3997SMartin KaFai Lau 
3681da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3691da177e4SLinus Torvalds {
3701da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
371a68886a6SDavid Ahern 	struct fib6_info *from;
3728d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3731da177e4SLinus Torvalds 
3741620a336SDavid Ahern 	ip_dst_metrics_put(dst);
3758d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3768d0b94afSMartin KaFai Lau 
3778d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
37838308473SDavid S. Miller 	if (idev) {
3791da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3801da177e4SLinus Torvalds 		in6_dev_put(idev);
3811da177e4SLinus Torvalds 	}
3821716a961SGao feng 
383a68886a6SDavid Ahern 	rcu_read_lock();
384a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
385a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, NULL);
38693531c67SDavid Ahern 	fib6_info_release(from);
387a68886a6SDavid Ahern 	rcu_read_unlock();
388b3419363SDavid S. Miller }
389b3419363SDavid S. Miller 
3901da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3911da177e4SLinus Torvalds 			   int how)
3921da177e4SLinus Torvalds {
3931da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3941da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3955a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
396c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3971da177e4SLinus Torvalds 
398e5645f51SWei Wang 	if (idev && idev->dev != loopback_dev) {
399e5645f51SWei Wang 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
40038308473SDavid S. Miller 		if (loopback_idev) {
4011da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
4021da177e4SLinus Torvalds 			in6_dev_put(idev);
4031da177e4SLinus Torvalds 		}
4041da177e4SLinus Torvalds 	}
40597cac082SDavid S. Miller }
4061da177e4SLinus Torvalds 
4075973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt)
4085973fb1eSMartin KaFai Lau {
4095973fb1eSMartin KaFai Lau 	if (rt->rt6i_flags & RTF_EXPIRES)
4105973fb1eSMartin KaFai Lau 		return time_after(jiffies, rt->dst.expires);
4115973fb1eSMartin KaFai Lau 	else
4125973fb1eSMartin KaFai Lau 		return false;
4135973fb1eSMartin KaFai Lau }
4145973fb1eSMartin KaFai Lau 
415a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4161da177e4SLinus Torvalds {
417a68886a6SDavid Ahern 	struct fib6_info *from;
418a68886a6SDavid Ahern 
419a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
420a68886a6SDavid Ahern 
4211716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4221716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
423a50feda5SEric Dumazet 			return true;
424a68886a6SDavid Ahern 	} else if (from) {
4251e2ea8adSXin Long 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
426a68886a6SDavid Ahern 			fib6_check_expired(from);
4271716a961SGao feng 	}
428a50feda5SEric Dumazet 	return false;
4291da177e4SLinus Torvalds }
4301da177e4SLinus Torvalds 
431b1d40991SDavid Ahern void fib6_select_path(const struct net *net, struct fib6_result *res,
432b1d40991SDavid Ahern 		      struct flowi6 *fl6, int oif, bool have_oif_match,
433b1d40991SDavid Ahern 		      const struct sk_buff *skb, int strict)
43451ebd318SNicolas Dichtel {
4358d1c802bSDavid Ahern 	struct fib6_info *sibling, *next_sibling;
436b1d40991SDavid Ahern 	struct fib6_info *match = res->f6i;
437b1d40991SDavid Ahern 
438b1d40991SDavid Ahern 	if (!match->fib6_nsiblings || have_oif_match)
439b1d40991SDavid Ahern 		goto out;
44051ebd318SNicolas Dichtel 
441b673d6ccSJakub Sitnicki 	/* We might have already computed the hash for ICMPv6 errors. In such
442b673d6ccSJakub Sitnicki 	 * case it will always be non-zero. Otherwise now is the time to do it.
443b673d6ccSJakub Sitnicki 	 */
444b673d6ccSJakub Sitnicki 	if (!fl6->mp_hash)
445b4bac172SDavid Ahern 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
446b673d6ccSJakub Sitnicki 
447ad1601aeSDavid Ahern 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
448b1d40991SDavid Ahern 		goto out;
449bbfcd776SIdo Schimmel 
45093c2fb25SDavid Ahern 	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
45193c2fb25SDavid Ahern 				 fib6_siblings) {
452702cea56SDavid Ahern 		const struct fib6_nh *nh = &sibling->fib6_nh;
4535e670d84SDavid Ahern 		int nh_upper_bound;
4545e670d84SDavid Ahern 
455702cea56SDavid Ahern 		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
4565e670d84SDavid Ahern 		if (fl6->mp_hash > nh_upper_bound)
4573d709f69SIdo Schimmel 			continue;
458702cea56SDavid Ahern 		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
45952bd4c0cSNicolas Dichtel 			break;
46051ebd318SNicolas Dichtel 		match = sibling;
46151ebd318SNicolas Dichtel 		break;
46251ebd318SNicolas Dichtel 	}
4633d709f69SIdo Schimmel 
464b1d40991SDavid Ahern out:
465b1d40991SDavid Ahern 	res->f6i = match;
466b1d40991SDavid Ahern 	res->nh = &match->fib6_nh;
46751ebd318SNicolas Dichtel }
46851ebd318SNicolas Dichtel 
4691da177e4SLinus Torvalds /*
47066f5d6ceSWei Wang  *	Route lookup. rcu_read_lock() should be held.
4711da177e4SLinus Torvalds  */
4721da177e4SLinus Torvalds 
4730c59d006SDavid Ahern static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
4740c59d006SDavid Ahern 			       const struct in6_addr *saddr, int oif, int flags)
4750c59d006SDavid Ahern {
4760c59d006SDavid Ahern 	const struct net_device *dev;
4770c59d006SDavid Ahern 
4780c59d006SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD)
4790c59d006SDavid Ahern 		return false;
4800c59d006SDavid Ahern 
4810c59d006SDavid Ahern 	dev = nh->fib_nh_dev;
4820c59d006SDavid Ahern 	if (oif) {
4830c59d006SDavid Ahern 		if (dev->ifindex == oif)
4840c59d006SDavid Ahern 			return true;
4850c59d006SDavid Ahern 	} else {
4860c59d006SDavid Ahern 		if (ipv6_chk_addr(net, saddr, dev,
4870c59d006SDavid Ahern 				  flags & RT6_LOOKUP_F_IFACE))
4880c59d006SDavid Ahern 			return true;
4890c59d006SDavid Ahern 	}
4900c59d006SDavid Ahern 
4910c59d006SDavid Ahern 	return false;
4920c59d006SDavid Ahern }
4930c59d006SDavid Ahern 
49475ef7389SDavid Ahern static void rt6_device_match(struct net *net, struct fib6_result *res,
49575ef7389SDavid Ahern 			     const struct in6_addr *saddr, int oif, int flags)
4961da177e4SLinus Torvalds {
49775ef7389SDavid Ahern 	struct fib6_info *f6i = res->f6i;
49875ef7389SDavid Ahern 	struct fib6_info *spf6i;
49975ef7389SDavid Ahern 	struct fib6_nh *nh;
5001da177e4SLinus Torvalds 
50175ef7389SDavid Ahern 	if (!oif && ipv6_addr_any(saddr)) {
50275ef7389SDavid Ahern 		nh = &f6i->fib6_nh;
5037d21fec9SDavid Ahern 		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
5047d21fec9SDavid Ahern 			goto out;
5051da177e4SLinus Torvalds 	}
5061da177e4SLinus Torvalds 
50775ef7389SDavid Ahern 	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
50875ef7389SDavid Ahern 		nh = &spf6i->fib6_nh;
50975ef7389SDavid Ahern 		if (__rt6_device_match(net, nh, saddr, oif, flags)) {
51075ef7389SDavid Ahern 			res->f6i = spf6i;
5117d21fec9SDavid Ahern 			goto out;
51275ef7389SDavid Ahern 		}
51375ef7389SDavid Ahern 	}
5141da177e4SLinus Torvalds 
51575ef7389SDavid Ahern 	if (oif && flags & RT6_LOOKUP_F_IFACE) {
51675ef7389SDavid Ahern 		res->f6i = net->ipv6.fib6_null_entry;
5177d21fec9SDavid Ahern 		nh = &res->f6i->fib6_nh;
5187d21fec9SDavid Ahern 		goto out;
51975ef7389SDavid Ahern 	}
52075ef7389SDavid Ahern 
5217d21fec9SDavid Ahern 	nh = &f6i->fib6_nh;
5227d21fec9SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD) {
52375ef7389SDavid Ahern 		res->f6i = net->ipv6.fib6_null_entry;
5247d21fec9SDavid Ahern 		nh = &res->f6i->fib6_nh;
52575ef7389SDavid Ahern 	}
5267d21fec9SDavid Ahern out:
5277d21fec9SDavid Ahern 	res->nh = nh;
5287d21fec9SDavid Ahern 	res->fib6_type = res->f6i->fib6_type;
5297d21fec9SDavid Ahern 	res->fib6_flags = res->f6i->fib6_flags;
5301da177e4SLinus Torvalds }
5311da177e4SLinus Torvalds 
53227097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
533c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
534c2f17e82SHannes Frederic Sowa 	struct work_struct work;
535c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
536c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
537c2f17e82SHannes Frederic Sowa };
538c2f17e82SHannes Frederic Sowa 
539c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
540c2f17e82SHannes Frederic Sowa {
541c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
542c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
543c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
544c2f17e82SHannes Frederic Sowa 
545c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
546adc176c5SErik Nordmark 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
547c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
548662f5533SMichael Büsch 	kfree(work);
549c2f17e82SHannes Frederic Sowa }
550c2f17e82SHannes Frederic Sowa 
551cc3a86c8SDavid Ahern static void rt6_probe(struct fib6_nh *fib6_nh)
55227097255SYOSHIFUJI Hideaki {
553f547fac6SSabrina Dubroca 	struct __rt6_probe_work *work = NULL;
5545e670d84SDavid Ahern 	const struct in6_addr *nh_gw;
555f2c31e32SEric Dumazet 	struct neighbour *neigh;
5565e670d84SDavid Ahern 	struct net_device *dev;
557f547fac6SSabrina Dubroca 	struct inet6_dev *idev;
5585e670d84SDavid Ahern 
55927097255SYOSHIFUJI Hideaki 	/*
56027097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
56127097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
56227097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
56327097255SYOSHIFUJI Hideaki 	 *
56427097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
56527097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
56627097255SYOSHIFUJI Hideaki 	 */
567cc3a86c8SDavid Ahern 	if (fib6_nh->fib_nh_gw_family)
568fdd6681dSAmerigo Wang 		return;
5695e670d84SDavid Ahern 
570cc3a86c8SDavid Ahern 	nh_gw = &fib6_nh->fib_nh_gw6;
571cc3a86c8SDavid Ahern 	dev = fib6_nh->fib_nh_dev;
5722152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
573f547fac6SSabrina Dubroca 	idev = __in6_dev_get(dev);
5745e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
5752152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5768d6c31bfSMartin KaFai Lau 		if (neigh->nud_state & NUD_VALID)
5778d6c31bfSMartin KaFai Lau 			goto out;
5788d6c31bfSMartin KaFai Lau 
5792152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
580990edb42SMartin KaFai Lau 		if (!(neigh->nud_state & NUD_VALID) &&
581990edb42SMartin KaFai Lau 		    time_after(jiffies,
582dcd1f572SDavid Ahern 			       neigh->updated + idev->cnf.rtr_probe_interval)) {
583c2f17e82SHannes Frederic Sowa 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
584990edb42SMartin KaFai Lau 			if (work)
5857e980569SJiri Benc 				__neigh_set_probe_once(neigh);
586990edb42SMartin KaFai Lau 		}
587c2f17e82SHannes Frederic Sowa 		write_unlock(&neigh->lock);
588cc3a86c8SDavid Ahern 	} else if (time_after(jiffies, fib6_nh->last_probe +
589f547fac6SSabrina Dubroca 				       idev->cnf.rtr_probe_interval)) {
590990edb42SMartin KaFai Lau 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
591990edb42SMartin KaFai Lau 	}
592c2f17e82SHannes Frederic Sowa 
593c2f17e82SHannes Frederic Sowa 	if (work) {
594cc3a86c8SDavid Ahern 		fib6_nh->last_probe = jiffies;
595c2f17e82SHannes Frederic Sowa 		INIT_WORK(&work->work, rt6_probe_deferred);
5965e670d84SDavid Ahern 		work->target = *nh_gw;
5975e670d84SDavid Ahern 		dev_hold(dev);
5985e670d84SDavid Ahern 		work->dev = dev;
599c2f17e82SHannes Frederic Sowa 		schedule_work(&work->work);
600c2f17e82SHannes Frederic Sowa 	}
601990edb42SMartin KaFai Lau 
6028d6c31bfSMartin KaFai Lau out:
6032152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
604f2c31e32SEric Dumazet }
60527097255SYOSHIFUJI Hideaki #else
606cc3a86c8SDavid Ahern static inline void rt6_probe(struct fib6_nh *fib6_nh)
60727097255SYOSHIFUJI Hideaki {
60827097255SYOSHIFUJI Hideaki }
60927097255SYOSHIFUJI Hideaki #endif
61027097255SYOSHIFUJI Hideaki 
6111da177e4SLinus Torvalds /*
612554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
6131da177e4SLinus Torvalds  */
6141ba9a895SDavid Ahern static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
6151da177e4SLinus Torvalds {
616afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
6175e670d84SDavid Ahern 	struct neighbour *neigh;
618f2c31e32SEric Dumazet 
619145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
6201ba9a895SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
6211ba9a895SDavid Ahern 					  &fib6_nh->fib_nh_gw6);
622145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
623145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
624554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
625afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
626398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
627a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
628afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6297e980569SJiri Benc 		else
6307e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
631398bcbebSYOSHIFUJI Hideaki #endif
632145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
633afc154e9SHannes Frederic Sowa 	} else {
634afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6357e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
636a5a81f0bSPaul Marks 	}
637145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
638145a3621SYOSHIFUJI Hideaki / 吉藤英明 
639a5a81f0bSPaul Marks 	return ret;
6401da177e4SLinus Torvalds }
6411da177e4SLinus Torvalds 
642702cea56SDavid Ahern static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
643702cea56SDavid Ahern 			   int strict)
644554cfb7eSYOSHIFUJI Hideaki {
6456e1809a5SDavid Ahern 	int m = 0;
6464d0c5911SYOSHIFUJI Hideaki 
6476e1809a5SDavid Ahern 	if (!oif || nh->fib_nh_dev->ifindex == oif)
6486e1809a5SDavid Ahern 		m = 2;
6496e1809a5SDavid Ahern 
65077d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
651afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
652ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
653702cea56SDavid Ahern 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
654ebacaaa0SYOSHIFUJI Hideaki #endif
6551ba9a895SDavid Ahern 	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
656702cea56SDavid Ahern 	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
6571ba9a895SDavid Ahern 		int n = rt6_check_neigh(nh);
658afc154e9SHannes Frederic Sowa 		if (n < 0)
659afc154e9SHannes Frederic Sowa 			return n;
660afc154e9SHannes Frederic Sowa 	}
661554cfb7eSYOSHIFUJI Hideaki 	return m;
662554cfb7eSYOSHIFUJI Hideaki }
663554cfb7eSYOSHIFUJI Hideaki 
66428679ed1SDavid Ahern static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
66528679ed1SDavid Ahern 		       int oif, int strict, int *mpri, bool *do_rr)
666554cfb7eSYOSHIFUJI Hideaki {
667afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
66828679ed1SDavid Ahern 	bool rc = false;
66928679ed1SDavid Ahern 	int m;
67035103d11SAndy Gospodarek 
67128679ed1SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD)
6728067bb8cSIdo Schimmel 		goto out;
6738067bb8cSIdo Schimmel 
67428679ed1SDavid Ahern 	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
67528679ed1SDavid Ahern 	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
676d5d32e4bSDavid Ahern 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
67735103d11SAndy Gospodarek 		goto out;
678554cfb7eSYOSHIFUJI Hideaki 
67928679ed1SDavid Ahern 	m = rt6_score_route(nh, fib6_flags, oif, strict);
6807e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
681afc154e9SHannes Frederic Sowa 		match_do_rr = true;
682afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6837e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
684f11e6659SDavid S. Miller 		goto out;
6851da177e4SLinus Torvalds 	}
686f11e6659SDavid S. Miller 
687afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
68828679ed1SDavid Ahern 		rt6_probe(nh);
689afc154e9SHannes Frederic Sowa 
6907e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
691afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
692afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
693afc154e9SHannes Frederic Sowa 		*mpri = m;
69428679ed1SDavid Ahern 		rc = true;
695afc154e9SHannes Frederic Sowa 	}
696f11e6659SDavid S. Miller out:
69728679ed1SDavid Ahern 	return rc;
6981da177e4SLinus Torvalds }
6991da177e4SLinus Torvalds 
700b7bc4b6aSDavid Ahern static void __find_rr_leaf(struct fib6_info *f6i_start,
70130c15f03SDavid Ahern 			   struct fib6_info *nomatch, u32 metric,
702b7bc4b6aSDavid Ahern 			   struct fib6_result *res, struct fib6_info **cont,
70330c15f03SDavid Ahern 			   int oif, int strict, bool *do_rr, int *mpri)
70430c15f03SDavid Ahern {
705b7bc4b6aSDavid Ahern 	struct fib6_info *f6i;
70630c15f03SDavid Ahern 
707b7bc4b6aSDavid Ahern 	for (f6i = f6i_start;
708b7bc4b6aSDavid Ahern 	     f6i && f6i != nomatch;
709b7bc4b6aSDavid Ahern 	     f6i = rcu_dereference(f6i->fib6_next)) {
71030c15f03SDavid Ahern 		struct fib6_nh *nh;
71130c15f03SDavid Ahern 
712b7bc4b6aSDavid Ahern 		if (cont && f6i->fib6_metric != metric) {
713b7bc4b6aSDavid Ahern 			*cont = f6i;
71430c15f03SDavid Ahern 			return;
71530c15f03SDavid Ahern 		}
71630c15f03SDavid Ahern 
717b7bc4b6aSDavid Ahern 		if (fib6_check_expired(f6i))
71830c15f03SDavid Ahern 			continue;
71930c15f03SDavid Ahern 
720b7bc4b6aSDavid Ahern 		nh = &f6i->fib6_nh;
721b7bc4b6aSDavid Ahern 		if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
722b7bc4b6aSDavid Ahern 			res->f6i = f6i;
723b7bc4b6aSDavid Ahern 			res->nh = nh;
7247d21fec9SDavid Ahern 			res->fib6_flags = f6i->fib6_flags;
7257d21fec9SDavid Ahern 			res->fib6_type = f6i->fib6_type;
726b7bc4b6aSDavid Ahern 		}
72730c15f03SDavid Ahern 	}
72830c15f03SDavid Ahern }
72930c15f03SDavid Ahern 
730b7bc4b6aSDavid Ahern static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
731b7bc4b6aSDavid Ahern 			 struct fib6_info *rr_head, int oif, int strict,
732b7bc4b6aSDavid Ahern 			 bool *do_rr, struct fib6_result *res)
733f11e6659SDavid S. Miller {
734b7bc4b6aSDavid Ahern 	u32 metric = rr_head->fib6_metric;
735b7bc4b6aSDavid Ahern 	struct fib6_info *cont = NULL;
736f11e6659SDavid S. Miller 	int mpri = -1;
737f11e6659SDavid S. Miller 
738b7bc4b6aSDavid Ahern 	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
73930c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
7409fbdcfafSSteffen Klassert 
741b7bc4b6aSDavid Ahern 	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
74230c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
7439fbdcfafSSteffen Klassert 
744b7bc4b6aSDavid Ahern 	if (res->f6i || !cont)
745b7bc4b6aSDavid Ahern 		return;
7469fbdcfafSSteffen Klassert 
747b7bc4b6aSDavid Ahern 	__find_rr_leaf(cont, NULL, metric, res, NULL,
74830c15f03SDavid Ahern 		       oif, strict, do_rr, &mpri);
749f11e6659SDavid S. Miller }
750f11e6659SDavid S. Miller 
751b7bc4b6aSDavid Ahern static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
752b7bc4b6aSDavid Ahern 		       struct fib6_result *res, int strict)
753f11e6659SDavid S. Miller {
7548d1c802bSDavid Ahern 	struct fib6_info *leaf = rcu_dereference(fn->leaf);
755b7bc4b6aSDavid Ahern 	struct fib6_info *rt0;
756afc154e9SHannes Frederic Sowa 	bool do_rr = false;
75717ecf590SWei Wang 	int key_plen;
758f11e6659SDavid S. Miller 
759b7bc4b6aSDavid Ahern 	/* make sure this function or its helpers sets f6i */
760b7bc4b6aSDavid Ahern 	res->f6i = NULL;
761b7bc4b6aSDavid Ahern 
762421842edSDavid Ahern 	if (!leaf || leaf == net->ipv6.fib6_null_entry)
763b7bc4b6aSDavid Ahern 		goto out;
7648d1040e8SWei Wang 
76566f5d6ceSWei Wang 	rt0 = rcu_dereference(fn->rr_ptr);
766f11e6659SDavid S. Miller 	if (!rt0)
76766f5d6ceSWei Wang 		rt0 = leaf;
768f11e6659SDavid S. Miller 
76917ecf590SWei Wang 	/* Double check to make sure fn is not an intermediate node
77017ecf590SWei Wang 	 * and fn->leaf does not points to its child's leaf
77117ecf590SWei Wang 	 * (This might happen if all routes under fn are deleted from
77217ecf590SWei Wang 	 * the tree and fib6_repair_tree() is called on the node.)
77317ecf590SWei Wang 	 */
77493c2fb25SDavid Ahern 	key_plen = rt0->fib6_dst.plen;
77517ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES
77693c2fb25SDavid Ahern 	if (rt0->fib6_src.plen)
77793c2fb25SDavid Ahern 		key_plen = rt0->fib6_src.plen;
77817ecf590SWei Wang #endif
77917ecf590SWei Wang 	if (fn->fn_bit != key_plen)
780b7bc4b6aSDavid Ahern 		goto out;
78117ecf590SWei Wang 
782b7bc4b6aSDavid Ahern 	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
783afc154e9SHannes Frederic Sowa 	if (do_rr) {
7848fb11a9aSDavid Ahern 		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
785f11e6659SDavid S. Miller 
786554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
78793c2fb25SDavid Ahern 		if (!next || next->fib6_metric != rt0->fib6_metric)
7888d1040e8SWei Wang 			next = leaf;
789f11e6659SDavid S. Miller 
79066f5d6ceSWei Wang 		if (next != rt0) {
79193c2fb25SDavid Ahern 			spin_lock_bh(&leaf->fib6_table->tb6_lock);
79266f5d6ceSWei Wang 			/* make sure next is not being deleted from the tree */
79393c2fb25SDavid Ahern 			if (next->fib6_node)
79466f5d6ceSWei Wang 				rcu_assign_pointer(fn->rr_ptr, next);
79593c2fb25SDavid Ahern 			spin_unlock_bh(&leaf->fib6_table->tb6_lock);
79666f5d6ceSWei Wang 		}
797554cfb7eSYOSHIFUJI Hideaki 	}
798554cfb7eSYOSHIFUJI Hideaki 
799b7bc4b6aSDavid Ahern out:
800b7bc4b6aSDavid Ahern 	if (!res->f6i) {
801b7bc4b6aSDavid Ahern 		res->f6i = net->ipv6.fib6_null_entry;
802b7bc4b6aSDavid Ahern 		res->nh = &res->f6i->fib6_nh;
8037d21fec9SDavid Ahern 		res->fib6_flags = res->f6i->fib6_flags;
8047d21fec9SDavid Ahern 		res->fib6_type = res->f6i->fib6_type;
805b7bc4b6aSDavid Ahern 	}
8061da177e4SLinus Torvalds }
8071da177e4SLinus Torvalds 
80885bd05deSDavid Ahern static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
8098b9df265SMartin KaFai Lau {
81085bd05deSDavid Ahern 	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
81185bd05deSDavid Ahern 	       res->nh->fib_nh_gw_family;
8128b9df265SMartin KaFai Lau }
8138b9df265SMartin KaFai Lau 
81470ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
81570ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
816b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
81770ceb4f5SYOSHIFUJI Hideaki {
818c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
81970ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
82070ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
82170ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
8224bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
8238d1c802bSDavid Ahern 	struct fib6_info *rt;
82470ceb4f5SYOSHIFUJI Hideaki 
82570ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
82670ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
82770ceb4f5SYOSHIFUJI Hideaki 	}
82870ceb4f5SYOSHIFUJI Hideaki 
82970ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
83070ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
83170ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
83270ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
83370ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
83470ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
83570ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
83670ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
83770ceb4f5SYOSHIFUJI Hideaki 		}
83870ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
83970ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
84070ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
84170ceb4f5SYOSHIFUJI Hideaki 		}
84270ceb4f5SYOSHIFUJI Hideaki 	}
84370ceb4f5SYOSHIFUJI Hideaki 
84470ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
84570ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
8463933fc95SJens Rosenboom 		return -EINVAL;
84770ceb4f5SYOSHIFUJI Hideaki 
8484bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
84970ceb4f5SYOSHIFUJI Hideaki 
85070ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
85170ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
85270ceb4f5SYOSHIFUJI Hideaki 	else {
85370ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
85470ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
85570ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
85670ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
85770ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
85870ceb4f5SYOSHIFUJI Hideaki 	}
85970ceb4f5SYOSHIFUJI Hideaki 
860f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
861afb1d4b5SDavid Ahern 		rt = rt6_get_dflt_router(net, gwaddr, dev);
862f104a567SDuan Jiong 	else
863f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
864830218c1SDavid Ahern 					gwaddr, dev);
86570ceb4f5SYOSHIFUJI Hideaki 
86670ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
867afb1d4b5SDavid Ahern 		ip6_del_rt(net, rt);
86870ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
86970ceb4f5SYOSHIFUJI Hideaki 	}
87070ceb4f5SYOSHIFUJI Hideaki 
87170ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
872830218c1SDavid Ahern 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
873830218c1SDavid Ahern 					dev, pref);
87470ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
87593c2fb25SDavid Ahern 		rt->fib6_flags = RTF_ROUTEINFO |
87693c2fb25SDavid Ahern 				 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
87770ceb4f5SYOSHIFUJI Hideaki 
87870ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8791716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
88014895687SDavid Ahern 			fib6_clean_expires(rt);
8811716a961SGao feng 		else
88214895687SDavid Ahern 			fib6_set_expires(rt, jiffies + HZ * lifetime);
8831716a961SGao feng 
88493531c67SDavid Ahern 		fib6_info_release(rt);
88570ceb4f5SYOSHIFUJI Hideaki 	}
88670ceb4f5SYOSHIFUJI Hideaki 	return 0;
88770ceb4f5SYOSHIFUJI Hideaki }
88870ceb4f5SYOSHIFUJI Hideaki #endif
88970ceb4f5SYOSHIFUJI Hideaki 
890ae90d867SDavid Ahern /*
891ae90d867SDavid Ahern  *	Misc support functions
892ae90d867SDavid Ahern  */
893ae90d867SDavid Ahern 
894ae90d867SDavid Ahern /* called with rcu_lock held */
8950d161581SDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
896ae90d867SDavid Ahern {
8970d161581SDavid Ahern 	struct net_device *dev = res->nh->fib_nh_dev;
898ae90d867SDavid Ahern 
8997d21fec9SDavid Ahern 	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
900ae90d867SDavid Ahern 		/* for copies of local routes, dst->dev needs to be the
901ae90d867SDavid Ahern 		 * device if it is a master device, the master device if
902ae90d867SDavid Ahern 		 * device is enslaved, and the loopback as the default
903ae90d867SDavid Ahern 		 */
904ae90d867SDavid Ahern 		if (netif_is_l3_slave(dev) &&
9057d21fec9SDavid Ahern 		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
906ae90d867SDavid Ahern 			dev = l3mdev_master_dev_rcu(dev);
907ae90d867SDavid Ahern 		else if (!netif_is_l3_master(dev))
908ae90d867SDavid Ahern 			dev = dev_net(dev)->loopback_dev;
909ae90d867SDavid Ahern 		/* last case is netif_is_l3_master(dev) is true in which
910ae90d867SDavid Ahern 		 * case we want dev returned to be dev
911ae90d867SDavid Ahern 		 */
912ae90d867SDavid Ahern 	}
913ae90d867SDavid Ahern 
914ae90d867SDavid Ahern 	return dev;
915ae90d867SDavid Ahern }
916ae90d867SDavid Ahern 
9176edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = {
9186edb3c96SDavid Ahern 	[RTN_UNSPEC]	= 0,
9196edb3c96SDavid Ahern 	[RTN_UNICAST]	= 0,
9206edb3c96SDavid Ahern 	[RTN_LOCAL]	= 0,
9216edb3c96SDavid Ahern 	[RTN_BROADCAST]	= 0,
9226edb3c96SDavid Ahern 	[RTN_ANYCAST]	= 0,
9236edb3c96SDavid Ahern 	[RTN_MULTICAST]	= 0,
9246edb3c96SDavid Ahern 	[RTN_BLACKHOLE]	= -EINVAL,
9256edb3c96SDavid Ahern 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
9266edb3c96SDavid Ahern 	[RTN_PROHIBIT]	= -EACCES,
9276edb3c96SDavid Ahern 	[RTN_THROW]	= -EAGAIN,
9286edb3c96SDavid Ahern 	[RTN_NAT]	= -EINVAL,
9296edb3c96SDavid Ahern 	[RTN_XRESOLVE]	= -EINVAL,
9306edb3c96SDavid Ahern };
9316edb3c96SDavid Ahern 
9326edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type)
9336edb3c96SDavid Ahern {
9346edb3c96SDavid Ahern 	return fib6_prop[fib6_type];
9356edb3c96SDavid Ahern }
9366edb3c96SDavid Ahern 
9378d1c802bSDavid Ahern static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
9383b6761d1SDavid Ahern {
9393b6761d1SDavid Ahern 	unsigned short flags = 0;
9403b6761d1SDavid Ahern 
9413b6761d1SDavid Ahern 	if (rt->dst_nocount)
9423b6761d1SDavid Ahern 		flags |= DST_NOCOUNT;
9433b6761d1SDavid Ahern 	if (rt->dst_nopolicy)
9443b6761d1SDavid Ahern 		flags |= DST_NOPOLICY;
9453b6761d1SDavid Ahern 	if (rt->dst_host)
9463b6761d1SDavid Ahern 		flags |= DST_HOST;
9473b6761d1SDavid Ahern 
9483b6761d1SDavid Ahern 	return flags;
9493b6761d1SDavid Ahern }
9503b6761d1SDavid Ahern 
9517d21fec9SDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
9526edb3c96SDavid Ahern {
9537d21fec9SDavid Ahern 	rt->dst.error = ip6_rt_type_to_error(fib6_type);
9546edb3c96SDavid Ahern 
9557d21fec9SDavid Ahern 	switch (fib6_type) {
9566edb3c96SDavid Ahern 	case RTN_BLACKHOLE:
9576edb3c96SDavid Ahern 		rt->dst.output = dst_discard_out;
9586edb3c96SDavid Ahern 		rt->dst.input = dst_discard;
9596edb3c96SDavid Ahern 		break;
9606edb3c96SDavid Ahern 	case RTN_PROHIBIT:
9616edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_prohibit_out;
9626edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_prohibit;
9636edb3c96SDavid Ahern 		break;
9646edb3c96SDavid Ahern 	case RTN_THROW:
9656edb3c96SDavid Ahern 	case RTN_UNREACHABLE:
9666edb3c96SDavid Ahern 	default:
9676edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_discard_out;
9686edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_discard;
9696edb3c96SDavid Ahern 		break;
9706edb3c96SDavid Ahern 	}
9716edb3c96SDavid Ahern }
9726edb3c96SDavid Ahern 
9730d161581SDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
9746edb3c96SDavid Ahern {
9757d21fec9SDavid Ahern 	struct fib6_info *f6i = res->f6i;
9760d161581SDavid Ahern 
9777d21fec9SDavid Ahern 	if (res->fib6_flags & RTF_REJECT) {
9787d21fec9SDavid Ahern 		ip6_rt_init_dst_reject(rt, res->fib6_type);
9796edb3c96SDavid Ahern 		return;
9806edb3c96SDavid Ahern 	}
9816edb3c96SDavid Ahern 
9826edb3c96SDavid Ahern 	rt->dst.error = 0;
9836edb3c96SDavid Ahern 	rt->dst.output = ip6_output;
9846edb3c96SDavid Ahern 
9857d21fec9SDavid Ahern 	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
9866edb3c96SDavid Ahern 		rt->dst.input = ip6_input;
9877d21fec9SDavid Ahern 	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
9886edb3c96SDavid Ahern 		rt->dst.input = ip6_mc_input;
9896edb3c96SDavid Ahern 	} else {
9906edb3c96SDavid Ahern 		rt->dst.input = ip6_forward;
9916edb3c96SDavid Ahern 	}
9926edb3c96SDavid Ahern 
9930d161581SDavid Ahern 	if (res->nh->fib_nh_lws) {
9940d161581SDavid Ahern 		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
9956edb3c96SDavid Ahern 		lwtunnel_set_redirect(&rt->dst);
9966edb3c96SDavid Ahern 	}
9976edb3c96SDavid Ahern 
9986edb3c96SDavid Ahern 	rt->dst.lastuse = jiffies;
9996edb3c96SDavid Ahern }
10006edb3c96SDavid Ahern 
1001e873e4b9SWei Wang /* Caller must already hold reference to @from */
10028d1c802bSDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
1003ae90d867SDavid Ahern {
1004ae90d867SDavid Ahern 	rt->rt6i_flags &= ~RTF_EXPIRES;
1005a68886a6SDavid Ahern 	rcu_assign_pointer(rt->from, from);
1006e1255ed4SDavid Ahern 	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
1007ae90d867SDavid Ahern }
1008ae90d867SDavid Ahern 
10090d161581SDavid Ahern /* Caller must already hold reference to f6i in result */
10100d161581SDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
1011ae90d867SDavid Ahern {
10120d161581SDavid Ahern 	const struct fib6_nh *nh = res->nh;
10130d161581SDavid Ahern 	const struct net_device *dev = nh->fib_nh_dev;
10140d161581SDavid Ahern 	struct fib6_info *f6i = res->f6i;
1015dcd1f572SDavid Ahern 
10160d161581SDavid Ahern 	ip6_rt_init_dst(rt, res);
10176edb3c96SDavid Ahern 
10180d161581SDavid Ahern 	rt->rt6i_dst = f6i->fib6_dst;
1019dcd1f572SDavid Ahern 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
10207d21fec9SDavid Ahern 	rt->rt6i_flags = res->fib6_flags;
10210d161581SDavid Ahern 	if (nh->fib_nh_gw_family) {
10220d161581SDavid Ahern 		rt->rt6i_gateway = nh->fib_nh_gw6;
10232b2450caSDavid Ahern 		rt->rt6i_flags |= RTF_GATEWAY;
10242b2450caSDavid Ahern 	}
10250d161581SDavid Ahern 	rt6_set_from(rt, f6i);
1026ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
10270d161581SDavid Ahern 	rt->rt6i_src = f6i->fib6_src;
1028ae90d867SDavid Ahern #endif
1029ae90d867SDavid Ahern }
1030ae90d867SDavid Ahern 
1031a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1032a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
1033a3c00e46SMartin KaFai Lau {
103466f5d6ceSWei Wang 	struct fib6_node *pn, *sn;
1035a3c00e46SMartin KaFai Lau 	while (1) {
1036a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
1037a3c00e46SMartin KaFai Lau 			return NULL;
103866f5d6ceSWei Wang 		pn = rcu_dereference(fn->parent);
103966f5d6ceSWei Wang 		sn = FIB6_SUBTREE(pn);
104066f5d6ceSWei Wang 		if (sn && sn != fn)
10416454743bSDavid Ahern 			fn = fib6_node_lookup(sn, NULL, saddr);
1042a3c00e46SMartin KaFai Lau 		else
1043a3c00e46SMartin KaFai Lau 			fn = pn;
1044a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
1045a3c00e46SMartin KaFai Lau 			return fn;
1046a3c00e46SMartin KaFai Lau 	}
1047a3c00e46SMartin KaFai Lau }
1048c71099acSThomas Graf 
104910585b43SDavid Ahern static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
1050d3843fe5SWei Wang {
1051d3843fe5SWei Wang 	struct rt6_info *rt = *prt;
1052d3843fe5SWei Wang 
1053d3843fe5SWei Wang 	if (dst_hold_safe(&rt->dst))
1054d3843fe5SWei Wang 		return true;
105510585b43SDavid Ahern 	if (net) {
1056d3843fe5SWei Wang 		rt = net->ipv6.ip6_null_entry;
1057d3843fe5SWei Wang 		dst_hold(&rt->dst);
1058d3843fe5SWei Wang 	} else {
1059d3843fe5SWei Wang 		rt = NULL;
1060d3843fe5SWei Wang 	}
1061d3843fe5SWei Wang 	*prt = rt;
1062d3843fe5SWei Wang 	return false;
1063d3843fe5SWei Wang }
1064d3843fe5SWei Wang 
1065dec9b0e2SDavid Ahern /* called with rcu_lock held */
10669b6b35abSDavid Ahern static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
1067dec9b0e2SDavid Ahern {
10689b6b35abSDavid Ahern 	struct net_device *dev = res->nh->fib_nh_dev;
10699b6b35abSDavid Ahern 	struct fib6_info *f6i = res->f6i;
10709b6b35abSDavid Ahern 	unsigned short flags;
1071dec9b0e2SDavid Ahern 	struct rt6_info *nrt;
1072dec9b0e2SDavid Ahern 
10739b6b35abSDavid Ahern 	if (!fib6_info_hold_safe(f6i))
10741c87e79aSXin Long 		goto fallback;
1075e873e4b9SWei Wang 
10769b6b35abSDavid Ahern 	flags = fib6_info_dst_flags(f6i);
107793531c67SDavid Ahern 	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
10781c87e79aSXin Long 	if (!nrt) {
10799b6b35abSDavid Ahern 		fib6_info_release(f6i);
10801c87e79aSXin Long 		goto fallback;
10811c87e79aSXin Long 	}
1082dec9b0e2SDavid Ahern 
10830d161581SDavid Ahern 	ip6_rt_copy_init(nrt, res);
10841c87e79aSXin Long 	return nrt;
10851c87e79aSXin Long 
10861c87e79aSXin Long fallback:
10871c87e79aSXin Long 	nrt = dev_net(dev)->ipv6.ip6_null_entry;
10881c87e79aSXin Long 	dst_hold(&nrt->dst);
1089dec9b0e2SDavid Ahern 	return nrt;
1090dec9b0e2SDavid Ahern }
1091dec9b0e2SDavid Ahern 
10928ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
10938ed67789SDaniel Lezcano 					     struct fib6_table *table,
1094b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
1095b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
1096b75cc8f9SDavid Ahern 					     int flags)
10971da177e4SLinus Torvalds {
1098b1d40991SDavid Ahern 	struct fib6_result res = {};
10991da177e4SLinus Torvalds 	struct fib6_node *fn;
110023fb93a4SDavid Ahern 	struct rt6_info *rt;
11011da177e4SLinus Torvalds 
1102b6cdbc85SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1103b6cdbc85SDavid Ahern 		flags &= ~RT6_LOOKUP_F_IFACE;
1104b6cdbc85SDavid Ahern 
110566f5d6ceSWei Wang 	rcu_read_lock();
11066454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1107c71099acSThomas Graf restart:
1108b1d40991SDavid Ahern 	res.f6i = rcu_dereference(fn->leaf);
1109b1d40991SDavid Ahern 	if (!res.f6i)
1110b1d40991SDavid Ahern 		res.f6i = net->ipv6.fib6_null_entry;
1111af52a52cSDavid Ahern 	else
111275ef7389SDavid Ahern 		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
111375ef7389SDavid Ahern 				 flags);
1114af52a52cSDavid Ahern 
1115b1d40991SDavid Ahern 	if (res.f6i == net->ipv6.fib6_null_entry) {
1116a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1117a3c00e46SMartin KaFai Lau 		if (fn)
1118a3c00e46SMartin KaFai Lau 			goto restart;
1119af52a52cSDavid Ahern 
1120af52a52cSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
1121af52a52cSDavid Ahern 		dst_hold(&rt->dst);
1122af52a52cSDavid Ahern 		goto out;
1123a3c00e46SMartin KaFai Lau 	}
11242b760fcfSWei Wang 
1125b1d40991SDavid Ahern 	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
1126b1d40991SDavid Ahern 			 fl6->flowi6_oif != 0, skb, flags);
1127b1d40991SDavid Ahern 
11284c9483b2SDavid S. Miller 	/* Search through exception table */
11297e4b5128SDavid Ahern 	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
113023fb93a4SDavid Ahern 	if (rt) {
113110585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
1132d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
113323fb93a4SDavid Ahern 	} else {
11349b6b35abSDavid Ahern 		rt = ip6_create_rt_rcu(&res);
1135dec9b0e2SDavid Ahern 	}
1136d3843fe5SWei Wang 
1137af52a52cSDavid Ahern out:
11388ff2e5b2SDavid Ahern 	trace_fib6_table_lookup(net, &res, table, fl6);
1139af52a52cSDavid Ahern 
114066f5d6ceSWei Wang 	rcu_read_unlock();
1141b811580dSDavid Ahern 
11421da177e4SLinus Torvalds 	return rt;
1143c71099acSThomas Graf }
1144c71099acSThomas Graf 
1145ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1146b75cc8f9SDavid Ahern 				   const struct sk_buff *skb, int flags)
1147ea6e574eSFlorian Westphal {
1148b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1149ea6e574eSFlorian Westphal }
1150ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
1151ea6e574eSFlorian Westphal 
11529acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1153b75cc8f9SDavid Ahern 			    const struct in6_addr *saddr, int oif,
1154b75cc8f9SDavid Ahern 			    const struct sk_buff *skb, int strict)
1155c71099acSThomas Graf {
11564c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11574c9483b2SDavid S. Miller 		.flowi6_oif = oif,
11584c9483b2SDavid S. Miller 		.daddr = *daddr,
1159c71099acSThomas Graf 	};
1160c71099acSThomas Graf 	struct dst_entry *dst;
116177d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1162c71099acSThomas Graf 
1163adaa70bbSThomas Graf 	if (saddr) {
11644c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1165adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1166adaa70bbSThomas Graf 	}
1167adaa70bbSThomas Graf 
1168b75cc8f9SDavid Ahern 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1169c71099acSThomas Graf 	if (dst->error == 0)
1170c71099acSThomas Graf 		return (struct rt6_info *) dst;
1171c71099acSThomas Graf 
1172c71099acSThomas Graf 	dst_release(dst);
1173c71099acSThomas Graf 
11741da177e4SLinus Torvalds 	return NULL;
11751da177e4SLinus Torvalds }
11767159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
11777159039aSYOSHIFUJI Hideaki 
1178c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
11791cfb71eeSWei Wang  * It takes new route entry, the addition fails by any reason the
11801cfb71eeSWei Wang  * route is released.
11811cfb71eeSWei Wang  * Caller must hold dst before calling it.
11821da177e4SLinus Torvalds  */
11831da177e4SLinus Torvalds 
11848d1c802bSDavid Ahern static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
1185333c4301SDavid Ahern 			struct netlink_ext_ack *extack)
11861da177e4SLinus Torvalds {
11871da177e4SLinus Torvalds 	int err;
1188c71099acSThomas Graf 	struct fib6_table *table;
11891da177e4SLinus Torvalds 
119093c2fb25SDavid Ahern 	table = rt->fib6_table;
119166f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
1192d4ead6b3SDavid Ahern 	err = fib6_add(&table->tb6_root, rt, info, extack);
119366f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
11941da177e4SLinus Torvalds 
11951da177e4SLinus Torvalds 	return err;
11961da177e4SLinus Torvalds }
11971da177e4SLinus Torvalds 
11988d1c802bSDavid Ahern int ip6_ins_rt(struct net *net, struct fib6_info *rt)
119940e22e8fSThomas Graf {
1200afb1d4b5SDavid Ahern 	struct nl_info info = {	.nl_net = net, };
1201e715b6d3SFlorian Westphal 
1202d4ead6b3SDavid Ahern 	return __ip6_ins_rt(rt, &info, NULL);
120340e22e8fSThomas Graf }
120440e22e8fSThomas Graf 
120585bd05deSDavid Ahern static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
120621efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
1207b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
12081da177e4SLinus Torvalds {
120985bd05deSDavid Ahern 	struct fib6_info *f6i = res->f6i;
12104832c30dSDavid Ahern 	struct net_device *dev;
12111da177e4SLinus Torvalds 	struct rt6_info *rt;
12121da177e4SLinus Torvalds 
12131da177e4SLinus Torvalds 	/*
12141da177e4SLinus Torvalds 	 *	Clone the route.
12151da177e4SLinus Torvalds 	 */
12161da177e4SLinus Torvalds 
121785bd05deSDavid Ahern 	if (!fib6_info_hold_safe(f6i))
1218e873e4b9SWei Wang 		return NULL;
1219e873e4b9SWei Wang 
12200d161581SDavid Ahern 	dev = ip6_rt_get_dev_rcu(res);
122193531c67SDavid Ahern 	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
1222e873e4b9SWei Wang 	if (!rt) {
122385bd05deSDavid Ahern 		fib6_info_release(f6i);
122483a09abdSMartin KaFai Lau 		return NULL;
1225e873e4b9SWei Wang 	}
122683a09abdSMartin KaFai Lau 
12270d161581SDavid Ahern 	ip6_rt_copy_init(rt, res);
12288b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
122983a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
123083a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
123183a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
12328b9df265SMartin KaFai Lau 
123385bd05deSDavid Ahern 	if (!rt6_is_gw_or_nonexthop(res)) {
123485bd05deSDavid Ahern 		if (f6i->fib6_dst.plen != 128 &&
123585bd05deSDavid Ahern 		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
123658c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
12371da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
12381da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
12394e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
12401da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
12411da177e4SLinus Torvalds 		}
12421da177e4SLinus Torvalds #endif
124395a9a5baSYOSHIFUJI Hideaki 	}
124495a9a5baSYOSHIFUJI Hideaki 
1245299d9939SYOSHIFUJI Hideaki 	return rt;
1246299d9939SYOSHIFUJI Hideaki }
1247299d9939SYOSHIFUJI Hideaki 
1248db3fedeeSDavid Ahern static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
1249d52d3997SMartin KaFai Lau {
1250db3fedeeSDavid Ahern 	struct fib6_info *f6i = res->f6i;
1251db3fedeeSDavid Ahern 	unsigned short flags = fib6_info_dst_flags(f6i);
12524832c30dSDavid Ahern 	struct net_device *dev;
1253d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
1254d52d3997SMartin KaFai Lau 
1255db3fedeeSDavid Ahern 	if (!fib6_info_hold_safe(f6i))
1256e873e4b9SWei Wang 		return NULL;
1257e873e4b9SWei Wang 
12584832c30dSDavid Ahern 	rcu_read_lock();
12590d161581SDavid Ahern 	dev = ip6_rt_get_dev_rcu(res);
126093531c67SDavid Ahern 	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
12614832c30dSDavid Ahern 	rcu_read_unlock();
1262e873e4b9SWei Wang 	if (!pcpu_rt) {
1263db3fedeeSDavid Ahern 		fib6_info_release(f6i);
1264d52d3997SMartin KaFai Lau 		return NULL;
1265e873e4b9SWei Wang 	}
12660d161581SDavid Ahern 	ip6_rt_copy_init(pcpu_rt, res);
1267d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1268d52d3997SMartin KaFai Lau 	return pcpu_rt;
1269d52d3997SMartin KaFai Lau }
1270d52d3997SMartin KaFai Lau 
127166f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */
1272db3fedeeSDavid Ahern static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
1273d52d3997SMartin KaFai Lau {
1274a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
1275d52d3997SMartin KaFai Lau 
1276db3fedeeSDavid Ahern 	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
1277d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1278d52d3997SMartin KaFai Lau 
1279d4ead6b3SDavid Ahern 	if (pcpu_rt)
128010585b43SDavid Ahern 		ip6_hold_safe(NULL, &pcpu_rt);
1281d3843fe5SWei Wang 
1282a73e4195SMartin KaFai Lau 	return pcpu_rt;
1283a73e4195SMartin KaFai Lau }
1284a73e4195SMartin KaFai Lau 
1285afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1286db3fedeeSDavid Ahern 					    const struct fib6_result *res)
1287a73e4195SMartin KaFai Lau {
1288a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1289d52d3997SMartin KaFai Lau 
1290db3fedeeSDavid Ahern 	pcpu_rt = ip6_rt_pcpu_alloc(res);
1291d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
12929c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
12939c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1294d52d3997SMartin KaFai Lau 	}
1295d52d3997SMartin KaFai Lau 
1296a94b9367SWei Wang 	dst_hold(&pcpu_rt->dst);
1297db3fedeeSDavid Ahern 	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
1298d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1299951f788aSEric Dumazet 	BUG_ON(prev);
1300a94b9367SWei Wang 
1301d52d3997SMartin KaFai Lau 	return pcpu_rt;
1302d52d3997SMartin KaFai Lau }
1303d52d3997SMartin KaFai Lau 
130435732d01SWei Wang /* exception hash table implementation
130535732d01SWei Wang  */
130635732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock);
130735732d01SWei Wang 
130835732d01SWei Wang /* Remove rt6_ex from hash table and free the memory
130935732d01SWei Wang  * Caller must hold rt6_exception_lock
131035732d01SWei Wang  */
131135732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
131235732d01SWei Wang 				 struct rt6_exception *rt6_ex)
131335732d01SWei Wang {
1314f5b51fe8SPaolo Abeni 	struct fib6_info *from;
1315b2427e67SColin Ian King 	struct net *net;
131681eb8447SWei Wang 
131735732d01SWei Wang 	if (!bucket || !rt6_ex)
131835732d01SWei Wang 		return;
1319b2427e67SColin Ian King 
1320b2427e67SColin Ian King 	net = dev_net(rt6_ex->rt6i->dst.dev);
1321f5b51fe8SPaolo Abeni 	net->ipv6.rt6_stats->fib_rt_cache--;
1322f5b51fe8SPaolo Abeni 
1323f5b51fe8SPaolo Abeni 	/* purge completely the exception to allow releasing the held resources:
1324f5b51fe8SPaolo Abeni 	 * some [sk] cache may keep the dst around for unlimited time
1325f5b51fe8SPaolo Abeni 	 */
1326f5b51fe8SPaolo Abeni 	from = rcu_dereference_protected(rt6_ex->rt6i->from,
1327f5b51fe8SPaolo Abeni 					 lockdep_is_held(&rt6_exception_lock));
1328f5b51fe8SPaolo Abeni 	rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
1329f5b51fe8SPaolo Abeni 	fib6_info_release(from);
1330f5b51fe8SPaolo Abeni 	dst_dev_put(&rt6_ex->rt6i->dst);
1331f5b51fe8SPaolo Abeni 
133235732d01SWei Wang 	hlist_del_rcu(&rt6_ex->hlist);
133377634cc6SDavid Ahern 	dst_release(&rt6_ex->rt6i->dst);
133435732d01SWei Wang 	kfree_rcu(rt6_ex, rcu);
133535732d01SWei Wang 	WARN_ON_ONCE(!bucket->depth);
133635732d01SWei Wang 	bucket->depth--;
133735732d01SWei Wang }
133835732d01SWei Wang 
133935732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory
134035732d01SWei Wang  * Caller must hold rt6_exception_lock
134135732d01SWei Wang  */
134235732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
134335732d01SWei Wang {
134435732d01SWei Wang 	struct rt6_exception *rt6_ex, *oldest = NULL;
134535732d01SWei Wang 
134635732d01SWei Wang 	if (!bucket)
134735732d01SWei Wang 		return;
134835732d01SWei Wang 
134935732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
135035732d01SWei Wang 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
135135732d01SWei Wang 			oldest = rt6_ex;
135235732d01SWei Wang 	}
135335732d01SWei Wang 	rt6_remove_exception(bucket, oldest);
135435732d01SWei Wang }
135535732d01SWei Wang 
135635732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst,
135735732d01SWei Wang 			      const struct in6_addr *src)
135835732d01SWei Wang {
135935732d01SWei Wang 	static u32 seed __read_mostly;
136035732d01SWei Wang 	u32 val;
136135732d01SWei Wang 
136235732d01SWei Wang 	net_get_random_once(&seed, sizeof(seed));
136335732d01SWei Wang 	val = jhash(dst, sizeof(*dst), seed);
136435732d01SWei Wang 
136535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
136635732d01SWei Wang 	if (src)
136735732d01SWei Wang 		val = jhash(src, sizeof(*src), val);
136835732d01SWei Wang #endif
136935732d01SWei Wang 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
137035732d01SWei Wang }
137135732d01SWei Wang 
137235732d01SWei Wang /* Helper function to find the cached rt in the hash table
137335732d01SWei Wang  * and update bucket pointer to point to the bucket for this
137435732d01SWei Wang  * (daddr, saddr) pair
137535732d01SWei Wang  * Caller must hold rt6_exception_lock
137635732d01SWei Wang  */
137735732d01SWei Wang static struct rt6_exception *
137835732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
137935732d01SWei Wang 			      const struct in6_addr *daddr,
138035732d01SWei Wang 			      const struct in6_addr *saddr)
138135732d01SWei Wang {
138235732d01SWei Wang 	struct rt6_exception *rt6_ex;
138335732d01SWei Wang 	u32 hval;
138435732d01SWei Wang 
138535732d01SWei Wang 	if (!(*bucket) || !daddr)
138635732d01SWei Wang 		return NULL;
138735732d01SWei Wang 
138835732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
138935732d01SWei Wang 	*bucket += hval;
139035732d01SWei Wang 
139135732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
139235732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
139335732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
139435732d01SWei Wang 
139535732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
139635732d01SWei Wang 		if (matched && saddr)
139735732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
139835732d01SWei Wang #endif
139935732d01SWei Wang 		if (matched)
140035732d01SWei Wang 			return rt6_ex;
140135732d01SWei Wang 	}
140235732d01SWei Wang 	return NULL;
140335732d01SWei Wang }
140435732d01SWei Wang 
140535732d01SWei Wang /* Helper function to find the cached rt in the hash table
140635732d01SWei Wang  * and update bucket pointer to point to the bucket for this
140735732d01SWei Wang  * (daddr, saddr) pair
140835732d01SWei Wang  * Caller must hold rcu_read_lock()
140935732d01SWei Wang  */
141035732d01SWei Wang static struct rt6_exception *
141135732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
141235732d01SWei Wang 			 const struct in6_addr *daddr,
141335732d01SWei Wang 			 const struct in6_addr *saddr)
141435732d01SWei Wang {
141535732d01SWei Wang 	struct rt6_exception *rt6_ex;
141635732d01SWei Wang 	u32 hval;
141735732d01SWei Wang 
141835732d01SWei Wang 	WARN_ON_ONCE(!rcu_read_lock_held());
141935732d01SWei Wang 
142035732d01SWei Wang 	if (!(*bucket) || !daddr)
142135732d01SWei Wang 		return NULL;
142235732d01SWei Wang 
142335732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
142435732d01SWei Wang 	*bucket += hval;
142535732d01SWei Wang 
142635732d01SWei Wang 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
142735732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
142835732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
142935732d01SWei Wang 
143035732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
143135732d01SWei Wang 		if (matched && saddr)
143235732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
143335732d01SWei Wang #endif
143435732d01SWei Wang 		if (matched)
143535732d01SWei Wang 			return rt6_ex;
143635732d01SWei Wang 	}
143735732d01SWei Wang 	return NULL;
143835732d01SWei Wang }
143935732d01SWei Wang 
1440b748f260SDavid Ahern static unsigned int fib6_mtu(const struct fib6_result *res)
144135732d01SWei Wang {
1442b748f260SDavid Ahern 	const struct fib6_nh *nh = res->nh;
1443d4ead6b3SDavid Ahern 	unsigned int mtu;
1444d4ead6b3SDavid Ahern 
1445b748f260SDavid Ahern 	if (res->f6i->fib6_pmtu) {
1446b748f260SDavid Ahern 		mtu = res->f6i->fib6_pmtu;
1447dcd1f572SDavid Ahern 	} else {
1448b748f260SDavid Ahern 		struct net_device *dev = nh->fib_nh_dev;
1449dcd1f572SDavid Ahern 		struct inet6_dev *idev;
1450dcd1f572SDavid Ahern 
1451dcd1f572SDavid Ahern 		rcu_read_lock();
1452dcd1f572SDavid Ahern 		idev = __in6_dev_get(dev);
1453dcd1f572SDavid Ahern 		mtu = idev->cnf.mtu6;
1454dcd1f572SDavid Ahern 		rcu_read_unlock();
1455dcd1f572SDavid Ahern 	}
1456dcd1f572SDavid Ahern 
1457d4ead6b3SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1458d4ead6b3SDavid Ahern 
1459b748f260SDavid Ahern 	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
1460d4ead6b3SDavid Ahern }
1461d4ead6b3SDavid Ahern 
146235732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt,
14635012f0a5SDavid Ahern 				const struct fib6_result *res)
146435732d01SWei Wang {
14655e670d84SDavid Ahern 	struct net *net = dev_net(nrt->dst.dev);
146635732d01SWei Wang 	struct rt6_exception_bucket *bucket;
146735732d01SWei Wang 	struct in6_addr *src_key = NULL;
146835732d01SWei Wang 	struct rt6_exception *rt6_ex;
14695012f0a5SDavid Ahern 	struct fib6_info *f6i = res->f6i;
147035732d01SWei Wang 	int err = 0;
147135732d01SWei Wang 
147235732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
147335732d01SWei Wang 
14745012f0a5SDavid Ahern 	if (f6i->exception_bucket_flushed) {
147535732d01SWei Wang 		err = -EINVAL;
147635732d01SWei Wang 		goto out;
147735732d01SWei Wang 	}
147835732d01SWei Wang 
14795012f0a5SDavid Ahern 	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
148035732d01SWei Wang 					lockdep_is_held(&rt6_exception_lock));
148135732d01SWei Wang 	if (!bucket) {
148235732d01SWei Wang 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
148335732d01SWei Wang 				 GFP_ATOMIC);
148435732d01SWei Wang 		if (!bucket) {
148535732d01SWei Wang 			err = -ENOMEM;
148635732d01SWei Wang 			goto out;
148735732d01SWei Wang 		}
14885012f0a5SDavid Ahern 		rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
148935732d01SWei Wang 	}
149035732d01SWei Wang 
149135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
14925012f0a5SDavid Ahern 	/* fib6_src.plen != 0 indicates f6i is in subtree
149335732d01SWei Wang 	 * and exception table is indexed by a hash of
14945012f0a5SDavid Ahern 	 * both fib6_dst and fib6_src.
149535732d01SWei Wang 	 * Otherwise, the exception table is indexed by
14965012f0a5SDavid Ahern 	 * a hash of only fib6_dst.
149735732d01SWei Wang 	 */
14985012f0a5SDavid Ahern 	if (f6i->fib6_src.plen)
149935732d01SWei Wang 		src_key = &nrt->rt6i_src.addr;
150035732d01SWei Wang #endif
15015012f0a5SDavid Ahern 	/* rt6_mtu_change() might lower mtu on f6i.
1502f5bbe7eeSWei Wang 	 * Only insert this exception route if its mtu
15035012f0a5SDavid Ahern 	 * is less than f6i's mtu value.
1504f5bbe7eeSWei Wang 	 */
1505b748f260SDavid Ahern 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
1506f5bbe7eeSWei Wang 		err = -EINVAL;
1507f5bbe7eeSWei Wang 		goto out;
1508f5bbe7eeSWei Wang 	}
150960006a48SWei Wang 
151035732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
151135732d01SWei Wang 					       src_key);
151235732d01SWei Wang 	if (rt6_ex)
151335732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
151435732d01SWei Wang 
151535732d01SWei Wang 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
151635732d01SWei Wang 	if (!rt6_ex) {
151735732d01SWei Wang 		err = -ENOMEM;
151835732d01SWei Wang 		goto out;
151935732d01SWei Wang 	}
152035732d01SWei Wang 	rt6_ex->rt6i = nrt;
152135732d01SWei Wang 	rt6_ex->stamp = jiffies;
152235732d01SWei Wang 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
152335732d01SWei Wang 	bucket->depth++;
152481eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache++;
152535732d01SWei Wang 
152635732d01SWei Wang 	if (bucket->depth > FIB6_MAX_DEPTH)
152735732d01SWei Wang 		rt6_exception_remove_oldest(bucket);
152835732d01SWei Wang 
152935732d01SWei Wang out:
153035732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
153135732d01SWei Wang 
153235732d01SWei Wang 	/* Update fn->fn_sernum to invalidate all cached dst */
1533b886d5f2SPaolo Abeni 	if (!err) {
15345012f0a5SDavid Ahern 		spin_lock_bh(&f6i->fib6_table->tb6_lock);
15355012f0a5SDavid Ahern 		fib6_update_sernum(net, f6i);
15365012f0a5SDavid Ahern 		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
1537b886d5f2SPaolo Abeni 		fib6_force_start_gc(net);
1538b886d5f2SPaolo Abeni 	}
153935732d01SWei Wang 
154035732d01SWei Wang 	return err;
154135732d01SWei Wang }
154235732d01SWei Wang 
15438d1c802bSDavid Ahern void rt6_flush_exceptions(struct fib6_info *rt)
154435732d01SWei Wang {
154535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
154635732d01SWei Wang 	struct rt6_exception *rt6_ex;
154735732d01SWei Wang 	struct hlist_node *tmp;
154835732d01SWei Wang 	int i;
154935732d01SWei Wang 
155035732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
155135732d01SWei Wang 	/* Prevent rt6_insert_exception() to recreate the bucket list */
155235732d01SWei Wang 	rt->exception_bucket_flushed = 1;
155335732d01SWei Wang 
155435732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
155535732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
155635732d01SWei Wang 	if (!bucket)
155735732d01SWei Wang 		goto out;
155835732d01SWei Wang 
155935732d01SWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
156035732d01SWei Wang 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
156135732d01SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
156235732d01SWei Wang 		WARN_ON_ONCE(bucket->depth);
156335732d01SWei Wang 		bucket++;
156435732d01SWei Wang 	}
156535732d01SWei Wang 
156635732d01SWei Wang out:
156735732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
156835732d01SWei Wang }
156935732d01SWei Wang 
157035732d01SWei Wang /* Find cached rt in the hash table inside passed in rt
157135732d01SWei Wang  * Caller has to hold rcu_read_lock()
157235732d01SWei Wang  */
15737e4b5128SDavid Ahern static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
157435732d01SWei Wang 					   struct in6_addr *daddr,
157535732d01SWei Wang 					   struct in6_addr *saddr)
157635732d01SWei Wang {
157735732d01SWei Wang 	struct rt6_exception_bucket *bucket;
157835732d01SWei Wang 	struct in6_addr *src_key = NULL;
157935732d01SWei Wang 	struct rt6_exception *rt6_ex;
15807e4b5128SDavid Ahern 	struct rt6_info *ret = NULL;
158135732d01SWei Wang 
15827e4b5128SDavid Ahern 	bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
158335732d01SWei Wang 
158435732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
15857e4b5128SDavid Ahern 	/* fib6i_src.plen != 0 indicates f6i is in subtree
158635732d01SWei Wang 	 * and exception table is indexed by a hash of
15877e4b5128SDavid Ahern 	 * both fib6_dst and fib6_src.
158835732d01SWei Wang 	 * Otherwise, the exception table is indexed by
15897e4b5128SDavid Ahern 	 * a hash of only fib6_dst.
159035732d01SWei Wang 	 */
15917e4b5128SDavid Ahern 	if (res->f6i->fib6_src.plen)
159235732d01SWei Wang 		src_key = saddr;
159335732d01SWei Wang #endif
159435732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
159535732d01SWei Wang 
159635732d01SWei Wang 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
15977e4b5128SDavid Ahern 		ret = rt6_ex->rt6i;
159835732d01SWei Wang 
15997e4b5128SDavid Ahern 	return ret;
160035732d01SWei Wang }
160135732d01SWei Wang 
160235732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */
160323fb93a4SDavid Ahern static int rt6_remove_exception_rt(struct rt6_info *rt)
160435732d01SWei Wang {
160535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
160635732d01SWei Wang 	struct in6_addr *src_key = NULL;
160735732d01SWei Wang 	struct rt6_exception *rt6_ex;
16088a14e46fSDavid Ahern 	struct fib6_info *from;
160935732d01SWei Wang 	int err;
161035732d01SWei Wang 
1611091311deSEric Dumazet 	from = rcu_dereference(rt->from);
161235732d01SWei Wang 	if (!from ||
1613442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
161435732d01SWei Wang 		return -EINVAL;
161535732d01SWei Wang 
161635732d01SWei Wang 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
161735732d01SWei Wang 		return -ENOENT;
161835732d01SWei Wang 
161935732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
162035732d01SWei Wang 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
162135732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
162235732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
162335732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
162435732d01SWei Wang 	 * and exception table is indexed by a hash of
162535732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
162635732d01SWei Wang 	 * Otherwise, the exception table is indexed by
162735732d01SWei Wang 	 * a hash of only rt6i_dst.
162835732d01SWei Wang 	 */
162993c2fb25SDavid Ahern 	if (from->fib6_src.plen)
163035732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
163135732d01SWei Wang #endif
163235732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
163335732d01SWei Wang 					       &rt->rt6i_dst.addr,
163435732d01SWei Wang 					       src_key);
163535732d01SWei Wang 	if (rt6_ex) {
163635732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
163735732d01SWei Wang 		err = 0;
163835732d01SWei Wang 	} else {
163935732d01SWei Wang 		err = -ENOENT;
164035732d01SWei Wang 	}
164135732d01SWei Wang 
164235732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
164335732d01SWei Wang 	return err;
164435732d01SWei Wang }
164535732d01SWei Wang 
164635732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and
164735732d01SWei Wang  * refresh its stamp
164835732d01SWei Wang  */
164935732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
165035732d01SWei Wang {
165135732d01SWei Wang 	struct rt6_exception_bucket *bucket;
165235732d01SWei Wang 	struct in6_addr *src_key = NULL;
165335732d01SWei Wang 	struct rt6_exception *rt6_ex;
1654193f3685SPaolo Abeni 	struct fib6_info *from;
165535732d01SWei Wang 
165635732d01SWei Wang 	rcu_read_lock();
1657193f3685SPaolo Abeni 	from = rcu_dereference(rt->from);
1658193f3685SPaolo Abeni 	if (!from || !(rt->rt6i_flags & RTF_CACHE))
1659193f3685SPaolo Abeni 		goto unlock;
1660193f3685SPaolo Abeni 
166135732d01SWei Wang 	bucket = rcu_dereference(from->rt6i_exception_bucket);
166235732d01SWei Wang 
166335732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
166435732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
166535732d01SWei Wang 	 * and exception table is indexed by a hash of
166635732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
166735732d01SWei Wang 	 * Otherwise, the exception table is indexed by
166835732d01SWei Wang 	 * a hash of only rt6i_dst.
166935732d01SWei Wang 	 */
167093c2fb25SDavid Ahern 	if (from->fib6_src.plen)
167135732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
167235732d01SWei Wang #endif
167335732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket,
167435732d01SWei Wang 					  &rt->rt6i_dst.addr,
167535732d01SWei Wang 					  src_key);
167635732d01SWei Wang 	if (rt6_ex)
167735732d01SWei Wang 		rt6_ex->stamp = jiffies;
167835732d01SWei Wang 
1679193f3685SPaolo Abeni unlock:
168035732d01SWei Wang 	rcu_read_unlock();
168135732d01SWei Wang }
168235732d01SWei Wang 
1683e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1684e9fa1495SStefano Brivio 					 struct rt6_info *rt, int mtu)
1685e9fa1495SStefano Brivio {
1686e9fa1495SStefano Brivio 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1687e9fa1495SStefano Brivio 	 * lowest MTU in the path: always allow updating the route PMTU to
1688e9fa1495SStefano Brivio 	 * reflect PMTU decreases.
1689e9fa1495SStefano Brivio 	 *
1690e9fa1495SStefano Brivio 	 * If the new MTU is higher, and the route PMTU is equal to the local
1691e9fa1495SStefano Brivio 	 * MTU, this means the old MTU is the lowest in the path, so allow
1692e9fa1495SStefano Brivio 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1693e9fa1495SStefano Brivio 	 * handle this.
1694e9fa1495SStefano Brivio 	 */
1695e9fa1495SStefano Brivio 
1696e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) >= mtu)
1697e9fa1495SStefano Brivio 		return true;
1698e9fa1495SStefano Brivio 
1699e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1700e9fa1495SStefano Brivio 		return true;
1701e9fa1495SStefano Brivio 
1702e9fa1495SStefano Brivio 	return false;
1703e9fa1495SStefano Brivio }
1704e9fa1495SStefano Brivio 
1705e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
17068d1c802bSDavid Ahern 				       struct fib6_info *rt, int mtu)
1707f5bbe7eeSWei Wang {
1708f5bbe7eeSWei Wang 	struct rt6_exception_bucket *bucket;
1709f5bbe7eeSWei Wang 	struct rt6_exception *rt6_ex;
1710f5bbe7eeSWei Wang 	int i;
1711f5bbe7eeSWei Wang 
1712f5bbe7eeSWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1713f5bbe7eeSWei Wang 					lockdep_is_held(&rt6_exception_lock));
1714f5bbe7eeSWei Wang 
1715e9fa1495SStefano Brivio 	if (!bucket)
1716e9fa1495SStefano Brivio 		return;
1717e9fa1495SStefano Brivio 
1718f5bbe7eeSWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1719f5bbe7eeSWei Wang 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1720f5bbe7eeSWei Wang 			struct rt6_info *entry = rt6_ex->rt6i;
1721e9fa1495SStefano Brivio 
1722e9fa1495SStefano Brivio 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1723d4ead6b3SDavid Ahern 			 * route), the metrics of its rt->from have already
1724f5bbe7eeSWei Wang 			 * been updated.
1725f5bbe7eeSWei Wang 			 */
1726d4ead6b3SDavid Ahern 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1727e9fa1495SStefano Brivio 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1728d4ead6b3SDavid Ahern 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1729f5bbe7eeSWei Wang 		}
1730f5bbe7eeSWei Wang 		bucket++;
1731f5bbe7eeSWei Wang 	}
1732f5bbe7eeSWei Wang }
1733f5bbe7eeSWei Wang 
1734b16cb459SWei Wang #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1735b16cb459SWei Wang 
17368d1c802bSDavid Ahern static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
1737b16cb459SWei Wang 					struct in6_addr *gateway)
1738b16cb459SWei Wang {
1739b16cb459SWei Wang 	struct rt6_exception_bucket *bucket;
1740b16cb459SWei Wang 	struct rt6_exception *rt6_ex;
1741b16cb459SWei Wang 	struct hlist_node *tmp;
1742b16cb459SWei Wang 	int i;
1743b16cb459SWei Wang 
1744b16cb459SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1745b16cb459SWei Wang 		return;
1746b16cb459SWei Wang 
1747b16cb459SWei Wang 	spin_lock_bh(&rt6_exception_lock);
1748b16cb459SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1749b16cb459SWei Wang 				     lockdep_is_held(&rt6_exception_lock));
1750b16cb459SWei Wang 
1751b16cb459SWei Wang 	if (bucket) {
1752b16cb459SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1753b16cb459SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1754b16cb459SWei Wang 						  &bucket->chain, hlist) {
1755b16cb459SWei Wang 				struct rt6_info *entry = rt6_ex->rt6i;
1756b16cb459SWei Wang 
1757b16cb459SWei Wang 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1758b16cb459SWei Wang 				    RTF_CACHE_GATEWAY &&
1759b16cb459SWei Wang 				    ipv6_addr_equal(gateway,
1760b16cb459SWei Wang 						    &entry->rt6i_gateway)) {
1761b16cb459SWei Wang 					rt6_remove_exception(bucket, rt6_ex);
1762b16cb459SWei Wang 				}
1763b16cb459SWei Wang 			}
1764b16cb459SWei Wang 			bucket++;
1765b16cb459SWei Wang 		}
1766b16cb459SWei Wang 	}
1767b16cb459SWei Wang 
1768b16cb459SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
1769b16cb459SWei Wang }
1770b16cb459SWei Wang 
1771c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1772c757faa8SWei Wang 				      struct rt6_exception *rt6_ex,
1773c757faa8SWei Wang 				      struct fib6_gc_args *gc_args,
1774c757faa8SWei Wang 				      unsigned long now)
1775c757faa8SWei Wang {
1776c757faa8SWei Wang 	struct rt6_info *rt = rt6_ex->rt6i;
1777c757faa8SWei Wang 
17781859bac0SPaolo Abeni 	/* we are pruning and obsoleting aged-out and non gateway exceptions
17791859bac0SPaolo Abeni 	 * even if others have still references to them, so that on next
17801859bac0SPaolo Abeni 	 * dst_check() such references can be dropped.
17811859bac0SPaolo Abeni 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
17821859bac0SPaolo Abeni 	 * expired, independently from their aging, as per RFC 8201 section 4
17831859bac0SPaolo Abeni 	 */
178431afeb42SWei Wang 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
178531afeb42SWei Wang 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1786c757faa8SWei Wang 			RT6_TRACE("aging clone %p\n", rt);
1787c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1788c757faa8SWei Wang 			return;
178931afeb42SWei Wang 		}
179031afeb42SWei Wang 	} else if (time_after(jiffies, rt->dst.expires)) {
179131afeb42SWei Wang 		RT6_TRACE("purging expired route %p\n", rt);
179231afeb42SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
179331afeb42SWei Wang 		return;
179431afeb42SWei Wang 	}
179531afeb42SWei Wang 
179631afeb42SWei Wang 	if (rt->rt6i_flags & RTF_GATEWAY) {
1797c757faa8SWei Wang 		struct neighbour *neigh;
1798c757faa8SWei Wang 		__u8 neigh_flags = 0;
1799c757faa8SWei Wang 
18001bfa26ffSEric Dumazet 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
18011bfa26ffSEric Dumazet 		if (neigh)
1802c757faa8SWei Wang 			neigh_flags = neigh->flags;
18031bfa26ffSEric Dumazet 
1804c757faa8SWei Wang 		if (!(neigh_flags & NTF_ROUTER)) {
1805c757faa8SWei Wang 			RT6_TRACE("purging route %p via non-router but gateway\n",
1806c757faa8SWei Wang 				  rt);
1807c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1808c757faa8SWei Wang 			return;
1809c757faa8SWei Wang 		}
1810c757faa8SWei Wang 	}
181131afeb42SWei Wang 
1812c757faa8SWei Wang 	gc_args->more++;
1813c757faa8SWei Wang }
1814c757faa8SWei Wang 
18158d1c802bSDavid Ahern void rt6_age_exceptions(struct fib6_info *rt,
1816c757faa8SWei Wang 			struct fib6_gc_args *gc_args,
1817c757faa8SWei Wang 			unsigned long now)
1818c757faa8SWei Wang {
1819c757faa8SWei Wang 	struct rt6_exception_bucket *bucket;
1820c757faa8SWei Wang 	struct rt6_exception *rt6_ex;
1821c757faa8SWei Wang 	struct hlist_node *tmp;
1822c757faa8SWei Wang 	int i;
1823c757faa8SWei Wang 
1824c757faa8SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1825c757faa8SWei Wang 		return;
1826c757faa8SWei Wang 
18271bfa26ffSEric Dumazet 	rcu_read_lock_bh();
18281bfa26ffSEric Dumazet 	spin_lock(&rt6_exception_lock);
1829c757faa8SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1830c757faa8SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
1831c757faa8SWei Wang 
1832c757faa8SWei Wang 	if (bucket) {
1833c757faa8SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1834c757faa8SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1835c757faa8SWei Wang 						  &bucket->chain, hlist) {
1836c757faa8SWei Wang 				rt6_age_examine_exception(bucket, rt6_ex,
1837c757faa8SWei Wang 							  gc_args, now);
1838c757faa8SWei Wang 			}
1839c757faa8SWei Wang 			bucket++;
1840c757faa8SWei Wang 		}
1841c757faa8SWei Wang 	}
18421bfa26ffSEric Dumazet 	spin_unlock(&rt6_exception_lock);
18431bfa26ffSEric Dumazet 	rcu_read_unlock_bh();
1844c757faa8SWei Wang }
1845c757faa8SWei Wang 
18461d053da9SDavid Ahern /* must be called with rcu lock held */
1847effda4ddSDavid Ahern int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
1848effda4ddSDavid Ahern 		      struct flowi6 *fl6, struct fib6_result *res, int strict)
18491da177e4SLinus Torvalds {
1850367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
18511da177e4SLinus Torvalds 
18526454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1853367efcb9SMartin KaFai Lau 	saved_fn = fn;
18541da177e4SLinus Torvalds 
1855ca254490SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1856ca254490SDavid Ahern 		oif = 0;
1857ca254490SDavid Ahern 
1858a3c00e46SMartin KaFai Lau redo_rt6_select:
1859effda4ddSDavid Ahern 	rt6_select(net, fn, oif, res, strict);
1860effda4ddSDavid Ahern 	if (res->f6i == net->ipv6.fib6_null_entry) {
1861a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1862a3c00e46SMartin KaFai Lau 		if (fn)
1863a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1864367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1865367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1866367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1867367efcb9SMartin KaFai Lau 			fn = saved_fn;
1868367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1869367efcb9SMartin KaFai Lau 		}
1870a3c00e46SMartin KaFai Lau 	}
1871a3c00e46SMartin KaFai Lau 
1872effda4ddSDavid Ahern 	trace_fib6_table_lookup(net, res, table, fl6);
1873d52d3997SMartin KaFai Lau 
1874effda4ddSDavid Ahern 	return 0;
18751d053da9SDavid Ahern }
18761d053da9SDavid Ahern 
18771d053da9SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
18781d053da9SDavid Ahern 			       int oif, struct flowi6 *fl6,
18791d053da9SDavid Ahern 			       const struct sk_buff *skb, int flags)
18801d053da9SDavid Ahern {
1881b1d40991SDavid Ahern 	struct fib6_result res = {};
18821d053da9SDavid Ahern 	struct rt6_info *rt;
18831d053da9SDavid Ahern 	int strict = 0;
18841d053da9SDavid Ahern 
18851d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IFACE;
18861d053da9SDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
18871d053da9SDavid Ahern 	if (net->ipv6.devconf_all->forwarding == 0)
18881d053da9SDavid Ahern 		strict |= RT6_LOOKUP_F_REACHABLE;
18891d053da9SDavid Ahern 
18901d053da9SDavid Ahern 	rcu_read_lock();
18911d053da9SDavid Ahern 
1892effda4ddSDavid Ahern 	fib6_table_lookup(net, table, oif, fl6, &res, strict);
1893b1d40991SDavid Ahern 	if (res.f6i == net->ipv6.fib6_null_entry) {
1894421842edSDavid Ahern 		rt = net->ipv6.ip6_null_entry;
189566f5d6ceSWei Wang 		rcu_read_unlock();
1896d3843fe5SWei Wang 		dst_hold(&rt->dst);
1897d3843fe5SWei Wang 		return rt;
1898d3843fe5SWei Wang 	}
189923fb93a4SDavid Ahern 
1900b1d40991SDavid Ahern 	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
1901d83009d4SDavid Ahern 
190223fb93a4SDavid Ahern 	/*Search through exception table */
19037e4b5128SDavid Ahern 	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
190423fb93a4SDavid Ahern 	if (rt) {
190510585b43SDavid Ahern 		if (ip6_hold_safe(net, &rt))
19061da177e4SLinus Torvalds 			dst_use_noref(&rt->dst, jiffies);
1907d4ead6b3SDavid Ahern 
190866f5d6ceSWei Wang 		rcu_read_unlock();
1909d52d3997SMartin KaFai Lau 		return rt;
19103da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1911b1d40991SDavid Ahern 			    !res.nh->fib_nh_gw_family)) {
19123da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
19133da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
19143da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
19153da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
19163da59bd9SMartin KaFai Lau 		 */
19173da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
19183da59bd9SMartin KaFai Lau 
191985bd05deSDavid Ahern 		uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
1920d52d3997SMartin KaFai Lau 
19214d85cd0cSDavid Ahern 		rcu_read_unlock();
19223da59bd9SMartin KaFai Lau 
19231cfb71eeSWei Wang 		if (uncached_rt) {
19241cfb71eeSWei Wang 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
19251cfb71eeSWei Wang 			 * No need for another dst_hold()
19261cfb71eeSWei Wang 			 */
19278d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
192881eb8447SWei Wang 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
19291cfb71eeSWei Wang 		} else {
19303da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
19313da59bd9SMartin KaFai Lau 			dst_hold(&uncached_rt->dst);
19321cfb71eeSWei Wang 		}
1933b811580dSDavid Ahern 
19343da59bd9SMartin KaFai Lau 		return uncached_rt;
1935d52d3997SMartin KaFai Lau 	} else {
1936d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1937d52d3997SMartin KaFai Lau 
1938d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1939d52d3997SMartin KaFai Lau 
1940951f788aSEric Dumazet 		local_bh_disable();
1941db3fedeeSDavid Ahern 		pcpu_rt = rt6_get_pcpu_route(&res);
1942d52d3997SMartin KaFai Lau 
194393531c67SDavid Ahern 		if (!pcpu_rt)
1944db3fedeeSDavid Ahern 			pcpu_rt = rt6_make_pcpu_route(net, &res);
194593531c67SDavid Ahern 
1946951f788aSEric Dumazet 		local_bh_enable();
1947951f788aSEric Dumazet 		rcu_read_unlock();
1948d4bea421SDavid Ahern 
1949d52d3997SMartin KaFai Lau 		return pcpu_rt;
1950d52d3997SMartin KaFai Lau 	}
1951c71099acSThomas Graf }
19529ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route);
1953c71099acSThomas Graf 
1954b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net,
1955b75cc8f9SDavid Ahern 					    struct fib6_table *table,
1956b75cc8f9SDavid Ahern 					    struct flowi6 *fl6,
1957b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
1958b75cc8f9SDavid Ahern 					    int flags)
19594acad72dSPavel Emelyanov {
1960b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
19614acad72dSPavel Emelyanov }
19624acad72dSPavel Emelyanov 
1963d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net,
196472331bc0SShmulik Ladkani 					 struct net_device *dev,
1965b75cc8f9SDavid Ahern 					 struct flowi6 *fl6,
1966b75cc8f9SDavid Ahern 					 const struct sk_buff *skb,
1967b75cc8f9SDavid Ahern 					 int flags)
196872331bc0SShmulik Ladkani {
196972331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
197072331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
197172331bc0SShmulik Ladkani 
1972b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
197372331bc0SShmulik Ladkani }
1974d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
197572331bc0SShmulik Ladkani 
197623aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb,
19775e5d6fedSRoopa Prabhu 				  struct flow_keys *keys,
19785e5d6fedSRoopa Prabhu 				  struct flow_keys *flkeys)
197923aebdacSJakub Sitnicki {
198023aebdacSJakub Sitnicki 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
198123aebdacSJakub Sitnicki 	const struct ipv6hdr *key_iph = outer_iph;
19825e5d6fedSRoopa Prabhu 	struct flow_keys *_flkeys = flkeys;
198323aebdacSJakub Sitnicki 	const struct ipv6hdr *inner_iph;
198423aebdacSJakub Sitnicki 	const struct icmp6hdr *icmph;
198523aebdacSJakub Sitnicki 	struct ipv6hdr _inner_iph;
1986cea67a2dSEric Dumazet 	struct icmp6hdr _icmph;
198723aebdacSJakub Sitnicki 
198823aebdacSJakub Sitnicki 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
198923aebdacSJakub Sitnicki 		goto out;
199023aebdacSJakub Sitnicki 
1991cea67a2dSEric Dumazet 	icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1992cea67a2dSEric Dumazet 				   sizeof(_icmph), &_icmph);
1993cea67a2dSEric Dumazet 	if (!icmph)
1994cea67a2dSEric Dumazet 		goto out;
1995cea67a2dSEric Dumazet 
199623aebdacSJakub Sitnicki 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
199723aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
199823aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
199923aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
200023aebdacSJakub Sitnicki 		goto out;
200123aebdacSJakub Sitnicki 
200223aebdacSJakub Sitnicki 	inner_iph = skb_header_pointer(skb,
200323aebdacSJakub Sitnicki 				       skb_transport_offset(skb) + sizeof(*icmph),
200423aebdacSJakub Sitnicki 				       sizeof(_inner_iph), &_inner_iph);
200523aebdacSJakub Sitnicki 	if (!inner_iph)
200623aebdacSJakub Sitnicki 		goto out;
200723aebdacSJakub Sitnicki 
200823aebdacSJakub Sitnicki 	key_iph = inner_iph;
20095e5d6fedSRoopa Prabhu 	_flkeys = NULL;
201023aebdacSJakub Sitnicki out:
20115e5d6fedSRoopa Prabhu 	if (_flkeys) {
20125e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
20135e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
20145e5d6fedSRoopa Prabhu 		keys->tags.flow_label = _flkeys->tags.flow_label;
20155e5d6fedSRoopa Prabhu 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
20165e5d6fedSRoopa Prabhu 	} else {
201723aebdacSJakub Sitnicki 		keys->addrs.v6addrs.src = key_iph->saddr;
201823aebdacSJakub Sitnicki 		keys->addrs.v6addrs.dst = key_iph->daddr;
2019fa1be7e0SMichal Kubecek 		keys->tags.flow_label = ip6_flowlabel(key_iph);
202023aebdacSJakub Sitnicki 		keys->basic.ip_proto = key_iph->nexthdr;
202123aebdacSJakub Sitnicki 	}
20225e5d6fedSRoopa Prabhu }
202323aebdacSJakub Sitnicki 
202423aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */
2025b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2026b4bac172SDavid Ahern 		       const struct sk_buff *skb, struct flow_keys *flkeys)
202723aebdacSJakub Sitnicki {
202823aebdacSJakub Sitnicki 	struct flow_keys hash_keys;
20299a2a537aSDavid Ahern 	u32 mhash;
203023aebdacSJakub Sitnicki 
2031bbfa047aSDavid S. Miller 	switch (ip6_multipath_hash_policy(net)) {
2032b4bac172SDavid Ahern 	case 0:
20336f74b6c2SDavid Ahern 		memset(&hash_keys, 0, sizeof(hash_keys));
20346f74b6c2SDavid Ahern 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
20359a2a537aSDavid Ahern 		if (skb) {
20365e5d6fedSRoopa Prabhu 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
20379a2a537aSDavid Ahern 		} else {
20389a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
20399a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2040fa1be7e0SMichal Kubecek 			hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
20419a2a537aSDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
204223aebdacSJakub Sitnicki 		}
2043b4bac172SDavid Ahern 		break;
2044b4bac172SDavid Ahern 	case 1:
2045b4bac172SDavid Ahern 		if (skb) {
2046b4bac172SDavid Ahern 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2047b4bac172SDavid Ahern 			struct flow_keys keys;
2048b4bac172SDavid Ahern 
2049b4bac172SDavid Ahern 			/* short-circuit if we already have L4 hash present */
2050b4bac172SDavid Ahern 			if (skb->l4_hash)
2051b4bac172SDavid Ahern 				return skb_get_hash_raw(skb) >> 1;
2052b4bac172SDavid Ahern 
2053b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2054b4bac172SDavid Ahern 
2055b4bac172SDavid Ahern                         if (!flkeys) {
2056b4bac172SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2057b4bac172SDavid Ahern 				flkeys = &keys;
2058b4bac172SDavid Ahern 			}
2059b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2060b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2061b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2062b4bac172SDavid Ahern 			hash_keys.ports.src = flkeys->ports.src;
2063b4bac172SDavid Ahern 			hash_keys.ports.dst = flkeys->ports.dst;
2064b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2065b4bac172SDavid Ahern 		} else {
2066b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2067b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2068b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2069b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2070b4bac172SDavid Ahern 			hash_keys.ports.src = fl6->fl6_sport;
2071b4bac172SDavid Ahern 			hash_keys.ports.dst = fl6->fl6_dport;
2072b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2073b4bac172SDavid Ahern 		}
2074b4bac172SDavid Ahern 		break;
2075b4bac172SDavid Ahern 	}
20769a2a537aSDavid Ahern 	mhash = flow_hash_from_keys(&hash_keys);
207723aebdacSJakub Sitnicki 
20789a2a537aSDavid Ahern 	return mhash >> 1;
207923aebdacSJakub Sitnicki }
208023aebdacSJakub Sitnicki 
2081c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
2082c71099acSThomas Graf {
2083b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2084c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
2085adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2086904af04dSJiri Benc 	struct ip_tunnel_info *tun_info;
20874c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
2088e0d56fddSDavid Ahern 		.flowi6_iif = skb->dev->ifindex,
20894c9483b2SDavid S. Miller 		.daddr = iph->daddr,
20904c9483b2SDavid S. Miller 		.saddr = iph->saddr,
20916502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
20924c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
20934c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
2094c71099acSThomas Graf 	};
20955e5d6fedSRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
2096adaa70bbSThomas Graf 
2097904af04dSJiri Benc 	tun_info = skb_tunnel_info(skb);
209846fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2099904af04dSJiri Benc 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
21005e5d6fedSRoopa Prabhu 
21015e5d6fedSRoopa Prabhu 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
21025e5d6fedSRoopa Prabhu 		flkeys = &_flkeys;
21035e5d6fedSRoopa Prabhu 
210423aebdacSJakub Sitnicki 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2105b4bac172SDavid Ahern 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
210606e9d040SJiri Benc 	skb_dst_drop(skb);
2107b75cc8f9SDavid Ahern 	skb_dst_set(skb,
2108b75cc8f9SDavid Ahern 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2109c71099acSThomas Graf }
2110c71099acSThomas Graf 
2111b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net,
2112b75cc8f9SDavid Ahern 					     struct fib6_table *table,
2113b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
2114b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2115b75cc8f9SDavid Ahern 					     int flags)
2116c71099acSThomas Graf {
2117b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2118c71099acSThomas Graf }
2119c71099acSThomas Graf 
21206f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
21216f21c96aSPaolo Abeni 					 struct flowi6 *fl6, int flags)
2122c71099acSThomas Graf {
2123d46a9d67SDavid Ahern 	bool any_src;
2124c71099acSThomas Graf 
21253ede0bbcSRobert Shearman 	if (ipv6_addr_type(&fl6->daddr) &
21263ede0bbcSRobert Shearman 	    (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)) {
21274c1feac5SDavid Ahern 		struct dst_entry *dst;
21284c1feac5SDavid Ahern 
21294c1feac5SDavid Ahern 		dst = l3mdev_link_scope_lookup(net, fl6);
2130ca254490SDavid Ahern 		if (dst)
2131ca254490SDavid Ahern 			return dst;
21324c1feac5SDavid Ahern 	}
2133ca254490SDavid Ahern 
21341fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
21354dc27d1cSDavid McCullough 
2136d46a9d67SDavid Ahern 	any_src = ipv6_addr_any(&fl6->saddr);
2137741a11d9SDavid Ahern 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2138d46a9d67SDavid Ahern 	    (fl6->flowi6_oif && any_src))
213977d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
2140c71099acSThomas Graf 
2141d46a9d67SDavid Ahern 	if (!any_src)
2142adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
21430c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
21440c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2145adaa70bbSThomas Graf 
2146b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
21471da177e4SLinus Torvalds }
21486f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags);
21491da177e4SLinus Torvalds 
21502774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
215114e50e57SDavid S. Miller {
21525c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
21531dbe3252SWei Wang 	struct net_device *loopback_dev = net->loopback_dev;
215414e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
215514e50e57SDavid S. Miller 
21561dbe3252SWei Wang 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
215762cf27e5SSteffen Klassert 		       DST_OBSOLETE_DEAD, 0);
215814e50e57SDavid S. Miller 	if (rt) {
21590a1f5962SMartin KaFai Lau 		rt6_info_init(rt);
216081eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
21610a1f5962SMartin KaFai Lau 
2162d8d1f30bSChangli Gao 		new = &rt->dst;
216314e50e57SDavid S. Miller 		new->__use = 1;
2164352e512cSHerbert Xu 		new->input = dst_discard;
2165ede2059dSEric W. Biederman 		new->output = dst_discard_out;
216614e50e57SDavid S. Miller 
2167defb3519SDavid S. Miller 		dst_copy_metrics(new, &ort->dst);
216814e50e57SDavid S. Miller 
21691dbe3252SWei Wang 		rt->rt6i_idev = in6_dev_get(loopback_dev);
21704e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
21710a1f5962SMartin KaFai Lau 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
217214e50e57SDavid S. Miller 
217314e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
217414e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
217514e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
217614e50e57SDavid S. Miller #endif
217714e50e57SDavid S. Miller 	}
217814e50e57SDavid S. Miller 
217969ead7afSDavid S. Miller 	dst_release(dst_orig);
218069ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
218114e50e57SDavid S. Miller }
218214e50e57SDavid S. Miller 
21831da177e4SLinus Torvalds /*
21841da177e4SLinus Torvalds  *	Destination cache support functions
21851da177e4SLinus Torvalds  */
21861da177e4SLinus Torvalds 
21878d1c802bSDavid Ahern static bool fib6_check(struct fib6_info *f6i, u32 cookie)
21883da59bd9SMartin KaFai Lau {
218936143645SSteffen Klassert 	u32 rt_cookie = 0;
2190c5cff856SWei Wang 
21918ae86971SDavid Ahern 	if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
219293531c67SDavid Ahern 		return false;
219393531c67SDavid Ahern 
219493531c67SDavid Ahern 	if (fib6_check_expired(f6i))
219593531c67SDavid Ahern 		return false;
219693531c67SDavid Ahern 
219793531c67SDavid Ahern 	return true;
219893531c67SDavid Ahern }
219993531c67SDavid Ahern 
2200a68886a6SDavid Ahern static struct dst_entry *rt6_check(struct rt6_info *rt,
2201a68886a6SDavid Ahern 				   struct fib6_info *from,
2202a68886a6SDavid Ahern 				   u32 cookie)
22033da59bd9SMartin KaFai Lau {
2204c5cff856SWei Wang 	u32 rt_cookie = 0;
2205c5cff856SWei Wang 
2206a68886a6SDavid Ahern 	if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
220793531c67SDavid Ahern 	    rt_cookie != cookie)
22083da59bd9SMartin KaFai Lau 		return NULL;
22093da59bd9SMartin KaFai Lau 
22103da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
22113da59bd9SMartin KaFai Lau 		return NULL;
22123da59bd9SMartin KaFai Lau 
22133da59bd9SMartin KaFai Lau 	return &rt->dst;
22143da59bd9SMartin KaFai Lau }
22153da59bd9SMartin KaFai Lau 
2216a68886a6SDavid Ahern static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2217a68886a6SDavid Ahern 					    struct fib6_info *from,
2218a68886a6SDavid Ahern 					    u32 cookie)
22193da59bd9SMartin KaFai Lau {
22205973fb1eSMartin KaFai Lau 	if (!__rt6_check_expired(rt) &&
22215973fb1eSMartin KaFai Lau 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
2222a68886a6SDavid Ahern 	    fib6_check(from, cookie))
22233da59bd9SMartin KaFai Lau 		return &rt->dst;
22243da59bd9SMartin KaFai Lau 	else
22253da59bd9SMartin KaFai Lau 		return NULL;
22263da59bd9SMartin KaFai Lau }
22273da59bd9SMartin KaFai Lau 
22281da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
22291da177e4SLinus Torvalds {
2230a87b7dc9SDavid Ahern 	struct dst_entry *dst_ret;
2231a68886a6SDavid Ahern 	struct fib6_info *from;
22321da177e4SLinus Torvalds 	struct rt6_info *rt;
22331da177e4SLinus Torvalds 
2234a87b7dc9SDavid Ahern 	rt = container_of(dst, struct rt6_info, dst);
2235a87b7dc9SDavid Ahern 
2236a87b7dc9SDavid Ahern 	rcu_read_lock();
22371da177e4SLinus Torvalds 
22386f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
22396f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
22406f3118b5SNicolas Dichtel 	 * into this function always.
22416f3118b5SNicolas Dichtel 	 */
2242e3bc10bdSHannes Frederic Sowa 
2243a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
22444b32b5adSMartin KaFai Lau 
2245a68886a6SDavid Ahern 	if (from && (rt->rt6i_flags & RTF_PCPU ||
2246a68886a6SDavid Ahern 	    unlikely(!list_empty(&rt->rt6i_uncached))))
2247a68886a6SDavid Ahern 		dst_ret = rt6_dst_from_check(rt, from, cookie);
22483da59bd9SMartin KaFai Lau 	else
2249a68886a6SDavid Ahern 		dst_ret = rt6_check(rt, from, cookie);
2250a87b7dc9SDavid Ahern 
2251a87b7dc9SDavid Ahern 	rcu_read_unlock();
2252a87b7dc9SDavid Ahern 
2253a87b7dc9SDavid Ahern 	return dst_ret;
22541da177e4SLinus Torvalds }
22551da177e4SLinus Torvalds 
22561da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
22571da177e4SLinus Torvalds {
22581da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
22591da177e4SLinus Torvalds 
22601da177e4SLinus Torvalds 	if (rt) {
226154c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
2262c3c14da0SDavid Ahern 			rcu_read_lock();
226354c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
226493531c67SDavid Ahern 				rt6_remove_exception_rt(rt);
226554c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
22661da177e4SLinus Torvalds 			}
2267c3c14da0SDavid Ahern 			rcu_read_unlock();
226854c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
226954c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
227054c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
227154c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
227254c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
227354c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
22741da177e4SLinus Torvalds }
22751da177e4SLinus Torvalds 
22761da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
22771da177e4SLinus Torvalds {
22781da177e4SLinus Torvalds 	struct rt6_info *rt;
22791da177e4SLinus Torvalds 
22803ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
22811da177e4SLinus Torvalds 
2282adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
22831da177e4SLinus Torvalds 	if (rt) {
22848a14e46fSDavid Ahern 		rcu_read_lock();
22851eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
228693531c67SDavid Ahern 			rt6_remove_exception_rt(rt);
2287c5cff856SWei Wang 		} else {
2288a68886a6SDavid Ahern 			struct fib6_info *from;
2289c5cff856SWei Wang 			struct fib6_node *fn;
2290c5cff856SWei Wang 
2291a68886a6SDavid Ahern 			from = rcu_dereference(rt->from);
2292a68886a6SDavid Ahern 			if (from) {
2293a68886a6SDavid Ahern 				fn = rcu_dereference(from->fib6_node);
2294c5cff856SWei Wang 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2295c5cff856SWei Wang 					fn->fn_sernum = -1;
2296a68886a6SDavid Ahern 			}
22971da177e4SLinus Torvalds 		}
22981da177e4SLinus Torvalds 		rcu_read_unlock();
22991da177e4SLinus Torvalds 	}
23001da177e4SLinus Torvalds }
23011da177e4SLinus Torvalds 
23026a3e030fSDavid Ahern static void rt6_update_expires(struct rt6_info *rt0, int timeout)
23036a3e030fSDavid Ahern {
2304a68886a6SDavid Ahern 	if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2305a68886a6SDavid Ahern 		struct fib6_info *from;
2306a68886a6SDavid Ahern 
2307a68886a6SDavid Ahern 		rcu_read_lock();
2308a68886a6SDavid Ahern 		from = rcu_dereference(rt0->from);
2309a68886a6SDavid Ahern 		if (from)
2310a68886a6SDavid Ahern 			rt0->dst.expires = from->expires;
2311a68886a6SDavid Ahern 		rcu_read_unlock();
2312a68886a6SDavid Ahern 	}
23136a3e030fSDavid Ahern 
23146a3e030fSDavid Ahern 	dst_set_expires(&rt0->dst, timeout);
23156a3e030fSDavid Ahern 	rt0->rt6i_flags |= RTF_EXPIRES;
23166700c270SDavid S. Miller }
23171da177e4SLinus Torvalds 
231845e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
231945e4fd26SMartin KaFai Lau {
232045e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
232145e4fd26SMartin KaFai Lau 
2322d4ead6b3SDavid Ahern 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
232345e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
232445e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
232545e4fd26SMartin KaFai Lau }
232645e4fd26SMartin KaFai Lau 
23270d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
23280d3f6d29SMartin KaFai Lau {
23290d3f6d29SMartin KaFai Lau 	return !(rt->rt6i_flags & RTF_CACHE) &&
23301490ed2aSPaolo Abeni 		(rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
23310d3f6d29SMartin KaFai Lau }
23320d3f6d29SMartin KaFai Lau 
233345e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
233445e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
23351da177e4SLinus Torvalds {
23360dec879fSJulian Anastasov 	const struct in6_addr *daddr, *saddr;
23371da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
23381da177e4SLinus Torvalds 
233919bda36cSXin Long 	if (dst_metric_locked(dst, RTAX_MTU))
234019bda36cSXin Long 		return;
234119bda36cSXin Long 
234245e4fd26SMartin KaFai Lau 	if (iph) {
234345e4fd26SMartin KaFai Lau 		daddr = &iph->daddr;
234445e4fd26SMartin KaFai Lau 		saddr = &iph->saddr;
234545e4fd26SMartin KaFai Lau 	} else if (sk) {
234645e4fd26SMartin KaFai Lau 		daddr = &sk->sk_v6_daddr;
234745e4fd26SMartin KaFai Lau 		saddr = &inet6_sk(sk)->saddr;
234845e4fd26SMartin KaFai Lau 	} else {
23490dec879fSJulian Anastasov 		daddr = NULL;
23500dec879fSJulian Anastasov 		saddr = NULL;
23511da177e4SLinus Torvalds 	}
23520dec879fSJulian Anastasov 	dst_confirm_neigh(dst, daddr);
23530dec879fSJulian Anastasov 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
23540dec879fSJulian Anastasov 	if (mtu >= dst_mtu(dst))
23550dec879fSJulian Anastasov 		return;
23560dec879fSJulian Anastasov 
23570dec879fSJulian Anastasov 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
23580dec879fSJulian Anastasov 		rt6_do_update_pmtu(rt6, mtu);
23592b760fcfSWei Wang 		/* update rt6_ex->stamp for cache */
23602b760fcfSWei Wang 		if (rt6->rt6i_flags & RTF_CACHE)
23612b760fcfSWei Wang 			rt6_update_exception_stamp_rt(rt6);
23620dec879fSJulian Anastasov 	} else if (daddr) {
236385bd05deSDavid Ahern 		struct fib6_result res = {};
23640dec879fSJulian Anastasov 		struct rt6_info *nrt6;
23650dec879fSJulian Anastasov 
23664d85cd0cSDavid Ahern 		rcu_read_lock();
236785bd05deSDavid Ahern 		res.f6i = rcu_dereference(rt6->from);
236885bd05deSDavid Ahern 		if (!res.f6i) {
23699c69a132SJonathan Lemon 			rcu_read_unlock();
23709c69a132SJonathan Lemon 			return;
23719c69a132SJonathan Lemon 		}
237285bd05deSDavid Ahern 		res.nh = &res.f6i->fib6_nh;
23737d21fec9SDavid Ahern 		res.fib6_flags = res.f6i->fib6_flags;
23747d21fec9SDavid Ahern 		res.fib6_type = res.f6i->fib6_type;
23757d21fec9SDavid Ahern 
237685bd05deSDavid Ahern 		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
237745e4fd26SMartin KaFai Lau 		if (nrt6) {
237845e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
23795012f0a5SDavid Ahern 			if (rt6_insert_exception(nrt6, &res))
23802b760fcfSWei Wang 				dst_release_immediate(&nrt6->dst);
238145e4fd26SMartin KaFai Lau 		}
2382a68886a6SDavid Ahern 		rcu_read_unlock();
238345e4fd26SMartin KaFai Lau 	}
238445e4fd26SMartin KaFai Lau }
238545e4fd26SMartin KaFai Lau 
238645e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
238745e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
238845e4fd26SMartin KaFai Lau {
238945e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
23901da177e4SLinus Torvalds }
23911da177e4SLinus Torvalds 
239242ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2393e2d118a1SLorenzo Colitti 		     int oif, u32 mark, kuid_t uid)
239481aded24SDavid S. Miller {
239581aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
239681aded24SDavid S. Miller 	struct dst_entry *dst;
2397dc92095dSMaciej Żenczykowski 	struct flowi6 fl6 = {
2398dc92095dSMaciej Żenczykowski 		.flowi6_oif = oif,
2399dc92095dSMaciej Żenczykowski 		.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark),
2400dc92095dSMaciej Żenczykowski 		.daddr = iph->daddr,
2401dc92095dSMaciej Żenczykowski 		.saddr = iph->saddr,
2402dc92095dSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
2403dc92095dSMaciej Żenczykowski 		.flowi6_uid = uid,
2404dc92095dSMaciej Żenczykowski 	};
240581aded24SDavid S. Miller 
240681aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
240781aded24SDavid S. Miller 	if (!dst->error)
240845e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
240981aded24SDavid S. Miller 	dst_release(dst);
241081aded24SDavid S. Miller }
241181aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
241281aded24SDavid S. Miller 
241381aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
241481aded24SDavid S. Miller {
24157ddacfa5SDavid Ahern 	int oif = sk->sk_bound_dev_if;
241633c162a9SMartin KaFai Lau 	struct dst_entry *dst;
241733c162a9SMartin KaFai Lau 
24187ddacfa5SDavid Ahern 	if (!oif && skb->dev)
24197ddacfa5SDavid Ahern 		oif = l3mdev_master_ifindex(skb->dev);
24207ddacfa5SDavid Ahern 
24217ddacfa5SDavid Ahern 	ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
242233c162a9SMartin KaFai Lau 
242333c162a9SMartin KaFai Lau 	dst = __sk_dst_get(sk);
242433c162a9SMartin KaFai Lau 	if (!dst || !dst->obsolete ||
242533c162a9SMartin KaFai Lau 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
242633c162a9SMartin KaFai Lau 		return;
242733c162a9SMartin KaFai Lau 
242833c162a9SMartin KaFai Lau 	bh_lock_sock(sk);
242933c162a9SMartin KaFai Lau 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
243033c162a9SMartin KaFai Lau 		ip6_datagram_dst_update(sk, false);
243133c162a9SMartin KaFai Lau 	bh_unlock_sock(sk);
243281aded24SDavid S. Miller }
243381aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
243481aded24SDavid S. Miller 
24357d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
24367d6850f7SAlexey Kodanev 			   const struct flowi6 *fl6)
24377d6850f7SAlexey Kodanev {
24387d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
24397d6850f7SAlexey Kodanev 	struct ipv6_pinfo *np = inet6_sk(sk);
24407d6850f7SAlexey Kodanev #endif
24417d6850f7SAlexey Kodanev 
24427d6850f7SAlexey Kodanev 	ip6_dst_store(sk, dst,
24437d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
24447d6850f7SAlexey Kodanev 		      &sk->sk_v6_daddr : NULL,
24457d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
24467d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
24477d6850f7SAlexey Kodanev 		      &np->saddr :
24487d6850f7SAlexey Kodanev #endif
24497d6850f7SAlexey Kodanev 		      NULL);
24507d6850f7SAlexey Kodanev }
24517d6850f7SAlexey Kodanev 
24529b6b35abSDavid Ahern static bool ip6_redirect_nh_match(const struct fib6_result *res,
24530b34eb00SDavid Ahern 				  struct flowi6 *fl6,
24540b34eb00SDavid Ahern 				  const struct in6_addr *gw,
24550b34eb00SDavid Ahern 				  struct rt6_info **ret)
24560b34eb00SDavid Ahern {
24579b6b35abSDavid Ahern 	const struct fib6_nh *nh = res->nh;
24589b6b35abSDavid Ahern 
24590b34eb00SDavid Ahern 	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
24600b34eb00SDavid Ahern 	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
24610b34eb00SDavid Ahern 		return false;
24620b34eb00SDavid Ahern 
24630b34eb00SDavid Ahern 	/* rt_cache's gateway might be different from its 'parent'
24640b34eb00SDavid Ahern 	 * in the case of an ip redirect.
24650b34eb00SDavid Ahern 	 * So we keep searching in the exception table if the gateway
24660b34eb00SDavid Ahern 	 * is different.
24670b34eb00SDavid Ahern 	 */
24680b34eb00SDavid Ahern 	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
24690b34eb00SDavid Ahern 		struct rt6_info *rt_cache;
24700b34eb00SDavid Ahern 
24719b6b35abSDavid Ahern 		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
24720b34eb00SDavid Ahern 		if (rt_cache &&
24730b34eb00SDavid Ahern 		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
24740b34eb00SDavid Ahern 			*ret = rt_cache;
24750b34eb00SDavid Ahern 			return true;
24760b34eb00SDavid Ahern 		}
24770b34eb00SDavid Ahern 		return false;
24780b34eb00SDavid Ahern 	}
24790b34eb00SDavid Ahern 	return true;
24800b34eb00SDavid Ahern }
24810b34eb00SDavid Ahern 
2482b55b76b2SDuan Jiong /* Handle redirects */
2483b55b76b2SDuan Jiong struct ip6rd_flowi {
2484b55b76b2SDuan Jiong 	struct flowi6 fl6;
2485b55b76b2SDuan Jiong 	struct in6_addr gateway;
2486b55b76b2SDuan Jiong };
2487b55b76b2SDuan Jiong 
2488b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
2489b55b76b2SDuan Jiong 					     struct fib6_table *table,
2490b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
2491b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2492b55b76b2SDuan Jiong 					     int flags)
2493b55b76b2SDuan Jiong {
2494b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
24950b34eb00SDavid Ahern 	struct rt6_info *ret = NULL;
24969b6b35abSDavid Ahern 	struct fib6_result res = {};
24978d1c802bSDavid Ahern 	struct fib6_info *rt;
2498b55b76b2SDuan Jiong 	struct fib6_node *fn;
2499b55b76b2SDuan Jiong 
2500b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
250167c408cfSAlexander Alemayhu 	 * check if the redirect has come from appropriate router.
2502b55b76b2SDuan Jiong 	 *
2503b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
2504b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
2505b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
2506b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
2507b55b76b2SDuan Jiong 	 * routes.
2508b55b76b2SDuan Jiong 	 */
2509b55b76b2SDuan Jiong 
251066f5d6ceSWei Wang 	rcu_read_lock();
25116454743bSDavid Ahern 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2512b55b76b2SDuan Jiong restart:
251366f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
25149b6b35abSDavid Ahern 		res.f6i = rt;
25159b6b35abSDavid Ahern 		res.nh = &rt->fib6_nh;
25169b6b35abSDavid Ahern 
251714895687SDavid Ahern 		if (fib6_check_expired(rt))
2518b55b76b2SDuan Jiong 			continue;
251993c2fb25SDavid Ahern 		if (rt->fib6_flags & RTF_REJECT)
2520b55b76b2SDuan Jiong 			break;
25219b6b35abSDavid Ahern 		if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
25220b34eb00SDavid Ahern 			goto out;
2523b55b76b2SDuan Jiong 	}
2524b55b76b2SDuan Jiong 
2525b55b76b2SDuan Jiong 	if (!rt)
2526421842edSDavid Ahern 		rt = net->ipv6.fib6_null_entry;
252793c2fb25SDavid Ahern 	else if (rt->fib6_flags & RTF_REJECT) {
252823fb93a4SDavid Ahern 		ret = net->ipv6.ip6_null_entry;
2529b0a1ba59SMartin KaFai Lau 		goto out;
2530b0a1ba59SMartin KaFai Lau 	}
2531b0a1ba59SMartin KaFai Lau 
2532421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
2533a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
2534a3c00e46SMartin KaFai Lau 		if (fn)
2535a3c00e46SMartin KaFai Lau 			goto restart;
2536b55b76b2SDuan Jiong 	}
2537a3c00e46SMartin KaFai Lau 
25389b6b35abSDavid Ahern 	res.f6i = rt;
25399b6b35abSDavid Ahern 	res.nh = &rt->fib6_nh;
2540b0a1ba59SMartin KaFai Lau out:
25417d21fec9SDavid Ahern 	if (ret) {
254210585b43SDavid Ahern 		ip6_hold_safe(net, &ret);
25437d21fec9SDavid Ahern 	} else {
25447d21fec9SDavid Ahern 		res.fib6_flags = res.f6i->fib6_flags;
25457d21fec9SDavid Ahern 		res.fib6_type = res.f6i->fib6_type;
25469b6b35abSDavid Ahern 		ret = ip6_create_rt_rcu(&res);
25477d21fec9SDavid Ahern 	}
2548b55b76b2SDuan Jiong 
254966f5d6ceSWei Wang 	rcu_read_unlock();
2550b55b76b2SDuan Jiong 
25518ff2e5b2SDavid Ahern 	trace_fib6_table_lookup(net, &res, table, fl6);
255223fb93a4SDavid Ahern 	return ret;
2553b55b76b2SDuan Jiong };
2554b55b76b2SDuan Jiong 
2555b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
2556b55b76b2SDuan Jiong 					    const struct flowi6 *fl6,
2557b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
2558b55b76b2SDuan Jiong 					    const struct in6_addr *gateway)
2559b55b76b2SDuan Jiong {
2560b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2561b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
2562b55b76b2SDuan Jiong 
2563b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
2564b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
2565b55b76b2SDuan Jiong 
2566b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2567b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
2568b55b76b2SDuan Jiong }
2569b55b76b2SDuan Jiong 
2570e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2571e2d118a1SLorenzo Colitti 		  kuid_t uid)
25723a5ad2eeSDavid S. Miller {
25733a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
25743a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
25751f7f10acSMaciej Żenczykowski 	struct flowi6 fl6 = {
25761f7f10acSMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25771f7f10acSMaciej Żenczykowski 		.flowi6_oif = oif,
25781f7f10acSMaciej Żenczykowski 		.flowi6_mark = mark,
25791f7f10acSMaciej Żenczykowski 		.daddr = iph->daddr,
25801f7f10acSMaciej Żenczykowski 		.saddr = iph->saddr,
25811f7f10acSMaciej Żenczykowski 		.flowlabel = ip6_flowinfo(iph),
25821f7f10acSMaciej Żenczykowski 		.flowi6_uid = uid,
25831f7f10acSMaciej Żenczykowski 	};
25843a5ad2eeSDavid S. Miller 
2585b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
25866700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
25873a5ad2eeSDavid S. Miller 	dst_release(dst);
25883a5ad2eeSDavid S. Miller }
25893a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
25903a5ad2eeSDavid S. Miller 
2591d456336dSMaciej Żenczykowski void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
2592c92a59ecSDuan Jiong {
2593c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2594c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2595c92a59ecSDuan Jiong 	struct dst_entry *dst;
25960b26fb17SMaciej Żenczykowski 	struct flowi6 fl6 = {
25970b26fb17SMaciej Żenczykowski 		.flowi6_iif = LOOPBACK_IFINDEX,
25980b26fb17SMaciej Żenczykowski 		.flowi6_oif = oif,
25990b26fb17SMaciej Żenczykowski 		.daddr = msg->dest,
26000b26fb17SMaciej Żenczykowski 		.saddr = iph->daddr,
26010b26fb17SMaciej Żenczykowski 		.flowi6_uid = sock_net_uid(net, NULL),
26020b26fb17SMaciej Żenczykowski 	};
2603c92a59ecSDuan Jiong 
2604b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2605c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
2606c92a59ecSDuan Jiong 	dst_release(dst);
2607c92a59ecSDuan Jiong }
2608c92a59ecSDuan Jiong 
26093a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
26103a5ad2eeSDavid S. Miller {
2611e2d118a1SLorenzo Colitti 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2612e2d118a1SLorenzo Colitti 		     sk->sk_uid);
26133a5ad2eeSDavid S. Miller }
26143a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
26153a5ad2eeSDavid S. Miller 
26160dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
26171da177e4SLinus Torvalds {
26180dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
26190dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
26200dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
26210dbaee3bSDavid S. Miller 
26221da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
26231da177e4SLinus Torvalds 
26245578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
26255578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
26261da177e4SLinus Torvalds 
26271da177e4SLinus Torvalds 	/*
26281da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
26291da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
26301da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
26311da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
26321da177e4SLinus Torvalds 	 */
26331da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
26341da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
26351da177e4SLinus Torvalds 	return mtu;
26361da177e4SLinus Torvalds }
26371da177e4SLinus Torvalds 
2638ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
2639d33e4553SDavid S. Miller {
2640d33e4553SDavid S. Miller 	struct inet6_dev *idev;
2641d4ead6b3SDavid Ahern 	unsigned int mtu;
2642618f9bc7SSteffen Klassert 
26434b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
26444b32b5adSMartin KaFai Lau 	if (mtu)
26454b32b5adSMartin KaFai Lau 		goto out;
26464b32b5adSMartin KaFai Lau 
2647618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
2648d33e4553SDavid S. Miller 
2649d33e4553SDavid S. Miller 	rcu_read_lock();
2650d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
2651d33e4553SDavid S. Miller 	if (idev)
2652d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
2653d33e4553SDavid S. Miller 	rcu_read_unlock();
2654d33e4553SDavid S. Miller 
265530f78d8eSEric Dumazet out:
265614972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
265714972cbdSRoopa Prabhu 
265814972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2659d33e4553SDavid S. Miller }
2660d33e4553SDavid S. Miller 
2661901731b8SDavid Ahern /* MTU selection:
2662901731b8SDavid Ahern  * 1. mtu on route is locked - use it
2663901731b8SDavid Ahern  * 2. mtu from nexthop exception
2664901731b8SDavid Ahern  * 3. mtu from egress device
2665901731b8SDavid Ahern  *
2666901731b8SDavid Ahern  * based on ip6_dst_mtu_forward and exception logic of
2667901731b8SDavid Ahern  * rt6_find_cached_rt; called with rcu_read_lock
2668901731b8SDavid Ahern  */
2669b748f260SDavid Ahern u32 ip6_mtu_from_fib6(const struct fib6_result *res,
2670b748f260SDavid Ahern 		      const struct in6_addr *daddr,
2671b748f260SDavid Ahern 		      const struct in6_addr *saddr)
2672901731b8SDavid Ahern {
2673901731b8SDavid Ahern 	struct rt6_exception_bucket *bucket;
2674b748f260SDavid Ahern 	const struct fib6_nh *nh = res->nh;
2675b748f260SDavid Ahern 	struct fib6_info *f6i = res->f6i;
2676b748f260SDavid Ahern 	const struct in6_addr *src_key;
2677901731b8SDavid Ahern 	struct rt6_exception *rt6_ex;
2678901731b8SDavid Ahern 	struct inet6_dev *idev;
2679901731b8SDavid Ahern 	u32 mtu = 0;
2680901731b8SDavid Ahern 
2681901731b8SDavid Ahern 	if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2682901731b8SDavid Ahern 		mtu = f6i->fib6_pmtu;
2683901731b8SDavid Ahern 		if (mtu)
2684901731b8SDavid Ahern 			goto out;
2685901731b8SDavid Ahern 	}
2686901731b8SDavid Ahern 
2687901731b8SDavid Ahern 	src_key = NULL;
2688901731b8SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
2689901731b8SDavid Ahern 	if (f6i->fib6_src.plen)
2690901731b8SDavid Ahern 		src_key = saddr;
2691901731b8SDavid Ahern #endif
2692901731b8SDavid Ahern 
2693901731b8SDavid Ahern 	bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2694901731b8SDavid Ahern 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2695901731b8SDavid Ahern 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2696901731b8SDavid Ahern 		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2697901731b8SDavid Ahern 
2698901731b8SDavid Ahern 	if (likely(!mtu)) {
2699b748f260SDavid Ahern 		struct net_device *dev = nh->fib_nh_dev;
2700901731b8SDavid Ahern 
2701901731b8SDavid Ahern 		mtu = IPV6_MIN_MTU;
2702901731b8SDavid Ahern 		idev = __in6_dev_get(dev);
2703901731b8SDavid Ahern 		if (idev && idev->cnf.mtu6 > mtu)
2704901731b8SDavid Ahern 			mtu = idev->cnf.mtu6;
2705901731b8SDavid Ahern 	}
2706901731b8SDavid Ahern 
2707901731b8SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2708901731b8SDavid Ahern out:
2709b748f260SDavid Ahern 	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
2710901731b8SDavid Ahern }
2711901731b8SDavid Ahern 
27123b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
271387a11578SDavid S. Miller 				  struct flowi6 *fl6)
27141da177e4SLinus Torvalds {
271587a11578SDavid S. Miller 	struct dst_entry *dst;
27161da177e4SLinus Torvalds 	struct rt6_info *rt;
27171da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
2718c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
27191da177e4SLinus Torvalds 
272038308473SDavid S. Miller 	if (unlikely(!idev))
2721122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
27221da177e4SLinus Torvalds 
2723ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
272438308473SDavid S. Miller 	if (unlikely(!rt)) {
27251da177e4SLinus Torvalds 		in6_dev_put(idev);
272687a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
27271da177e4SLinus Torvalds 		goto out;
27281da177e4SLinus Torvalds 	}
27291da177e4SLinus Torvalds 
27308e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
2731588753f1SBrendan McGrath 	rt->dst.input = ip6_input;
27328e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
2733550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
273487a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
27358e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
27368e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
273714edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
27381da177e4SLinus Torvalds 
27394c981e28SIdo Schimmel 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2740587fea74SWei Wang 	 * do proper release of the net_device
2741587fea74SWei Wang 	 */
2742587fea74SWei Wang 	rt6_uncached_list_add(rt);
274381eb8447SWei Wang 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
27441da177e4SLinus Torvalds 
274587a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
274687a11578SDavid S. Miller 
27471da177e4SLinus Torvalds out:
274887a11578SDavid S. Miller 	return dst;
27491da177e4SLinus Torvalds }
27501da177e4SLinus Torvalds 
2751569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
27521da177e4SLinus Torvalds {
275386393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
27547019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
27557019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
27567019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
27577019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
27587019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2759fc66f95cSEric Dumazet 	int entries;
27601da177e4SLinus Torvalds 
2761fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
276249a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2763fc66f95cSEric Dumazet 	    entries <= rt_max_size)
27641da177e4SLinus Torvalds 		goto out;
27651da177e4SLinus Torvalds 
27666891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
276714956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2768fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
2769fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
27707019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
27711da177e4SLinus Torvalds out:
27727019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2773fc66f95cSEric Dumazet 	return entries > rt_max_size;
27741da177e4SLinus Torvalds }
27751da177e4SLinus Torvalds 
27768c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net,
27778c14586fSDavid Ahern 					    struct fib6_config *cfg,
2778f4797b33SDavid Ahern 					    const struct in6_addr *gw_addr,
2779f4797b33SDavid Ahern 					    u32 tbid, int flags)
27808c14586fSDavid Ahern {
27818c14586fSDavid Ahern 	struct flowi6 fl6 = {
27828c14586fSDavid Ahern 		.flowi6_oif = cfg->fc_ifindex,
27838c14586fSDavid Ahern 		.daddr = *gw_addr,
27848c14586fSDavid Ahern 		.saddr = cfg->fc_prefsrc,
27858c14586fSDavid Ahern 	};
27868c14586fSDavid Ahern 	struct fib6_table *table;
27878c14586fSDavid Ahern 	struct rt6_info *rt;
27888c14586fSDavid Ahern 
2789f4797b33SDavid Ahern 	table = fib6_get_table(net, tbid);
27908c14586fSDavid Ahern 	if (!table)
27918c14586fSDavid Ahern 		return NULL;
27928c14586fSDavid Ahern 
27938c14586fSDavid Ahern 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
27948c14586fSDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
27958c14586fSDavid Ahern 
2796f4797b33SDavid Ahern 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2797b75cc8f9SDavid Ahern 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
27988c14586fSDavid Ahern 
27998c14586fSDavid Ahern 	/* if table lookup failed, fall back to full lookup */
28008c14586fSDavid Ahern 	if (rt == net->ipv6.ip6_null_entry) {
28018c14586fSDavid Ahern 		ip6_rt_put(rt);
28028c14586fSDavid Ahern 		rt = NULL;
28038c14586fSDavid Ahern 	}
28048c14586fSDavid Ahern 
28058c14586fSDavid Ahern 	return rt;
28068c14586fSDavid Ahern }
28078c14586fSDavid Ahern 
2808fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net,
2809fc1e64e1SDavid Ahern 				     struct fib6_config *cfg,
28109fbb704cSDavid Ahern 				     const struct net_device *dev,
2811fc1e64e1SDavid Ahern 				     struct netlink_ext_ack *extack)
2812fc1e64e1SDavid Ahern {
281344750f84SDavid Ahern 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2814fc1e64e1SDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2815fc1e64e1SDavid Ahern 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2816bf1dc8baSPaolo Abeni 	struct fib6_info *from;
2817fc1e64e1SDavid Ahern 	struct rt6_info *grt;
2818fc1e64e1SDavid Ahern 	int err;
2819fc1e64e1SDavid Ahern 
2820fc1e64e1SDavid Ahern 	err = 0;
2821fc1e64e1SDavid Ahern 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2822fc1e64e1SDavid Ahern 	if (grt) {
2823bf1dc8baSPaolo Abeni 		rcu_read_lock();
2824bf1dc8baSPaolo Abeni 		from = rcu_dereference(grt->from);
282558e354c0SDavid Ahern 		if (!grt->dst.error &&
28264ed591c8SDavid Ahern 		    /* ignore match if it is the default route */
2827bf1dc8baSPaolo Abeni 		    from && !ipv6_addr_any(&from->fib6_dst.addr) &&
282858e354c0SDavid Ahern 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
282944750f84SDavid Ahern 			NL_SET_ERR_MSG(extack,
283044750f84SDavid Ahern 				       "Nexthop has invalid gateway or device mismatch");
2831fc1e64e1SDavid Ahern 			err = -EINVAL;
2832fc1e64e1SDavid Ahern 		}
2833bf1dc8baSPaolo Abeni 		rcu_read_unlock();
2834fc1e64e1SDavid Ahern 
2835fc1e64e1SDavid Ahern 		ip6_rt_put(grt);
2836fc1e64e1SDavid Ahern 	}
2837fc1e64e1SDavid Ahern 
2838fc1e64e1SDavid Ahern 	return err;
2839fc1e64e1SDavid Ahern }
2840fc1e64e1SDavid Ahern 
28411edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net,
28421edce99fSDavid Ahern 			      struct fib6_config *cfg,
28431edce99fSDavid Ahern 			      struct net_device **_dev,
28441edce99fSDavid Ahern 			      struct inet6_dev **idev)
28451edce99fSDavid Ahern {
28461edce99fSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28471edce99fSDavid Ahern 	struct net_device *dev = _dev ? *_dev : NULL;
28481edce99fSDavid Ahern 	struct rt6_info *grt = NULL;
28491edce99fSDavid Ahern 	int err = -EHOSTUNREACH;
28501edce99fSDavid Ahern 
28511edce99fSDavid Ahern 	if (cfg->fc_table) {
2852f4797b33SDavid Ahern 		int flags = RT6_LOOKUP_F_IFACE;
2853f4797b33SDavid Ahern 
2854f4797b33SDavid Ahern 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2855f4797b33SDavid Ahern 					  cfg->fc_table, flags);
28561edce99fSDavid Ahern 		if (grt) {
28571edce99fSDavid Ahern 			if (grt->rt6i_flags & RTF_GATEWAY ||
28581edce99fSDavid Ahern 			    (dev && dev != grt->dst.dev)) {
28591edce99fSDavid Ahern 				ip6_rt_put(grt);
28601edce99fSDavid Ahern 				grt = NULL;
28611edce99fSDavid Ahern 			}
28621edce99fSDavid Ahern 		}
28631edce99fSDavid Ahern 	}
28641edce99fSDavid Ahern 
28651edce99fSDavid Ahern 	if (!grt)
2866b75cc8f9SDavid Ahern 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
28671edce99fSDavid Ahern 
28681edce99fSDavid Ahern 	if (!grt)
28691edce99fSDavid Ahern 		goto out;
28701edce99fSDavid Ahern 
28711edce99fSDavid Ahern 	if (dev) {
28721edce99fSDavid Ahern 		if (dev != grt->dst.dev) {
28731edce99fSDavid Ahern 			ip6_rt_put(grt);
28741edce99fSDavid Ahern 			goto out;
28751edce99fSDavid Ahern 		}
28761edce99fSDavid Ahern 	} else {
28771edce99fSDavid Ahern 		*_dev = dev = grt->dst.dev;
28781edce99fSDavid Ahern 		*idev = grt->rt6i_idev;
28791edce99fSDavid Ahern 		dev_hold(dev);
28801edce99fSDavid Ahern 		in6_dev_hold(grt->rt6i_idev);
28811edce99fSDavid Ahern 	}
28821edce99fSDavid Ahern 
28831edce99fSDavid Ahern 	if (!(grt->rt6i_flags & RTF_GATEWAY))
28841edce99fSDavid Ahern 		err = 0;
28851edce99fSDavid Ahern 
28861edce99fSDavid Ahern 	ip6_rt_put(grt);
28871edce99fSDavid Ahern 
28881edce99fSDavid Ahern out:
28891edce99fSDavid Ahern 	return err;
28901edce99fSDavid Ahern }
28911edce99fSDavid Ahern 
28929fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
28939fbb704cSDavid Ahern 			   struct net_device **_dev, struct inet6_dev **idev,
28949fbb704cSDavid Ahern 			   struct netlink_ext_ack *extack)
28959fbb704cSDavid Ahern {
28969fbb704cSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
28979fbb704cSDavid Ahern 	int gwa_type = ipv6_addr_type(gw_addr);
2898232378e8SDavid Ahern 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
28999fbb704cSDavid Ahern 	const struct net_device *dev = *_dev;
2900232378e8SDavid Ahern 	bool need_addr_check = !dev;
29019fbb704cSDavid Ahern 	int err = -EINVAL;
29029fbb704cSDavid Ahern 
29039fbb704cSDavid Ahern 	/* if gw_addr is local we will fail to detect this in case
29049fbb704cSDavid Ahern 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
29059fbb704cSDavid Ahern 	 * will return already-added prefix route via interface that
29069fbb704cSDavid Ahern 	 * prefix route was assigned to, which might be non-loopback.
29079fbb704cSDavid Ahern 	 */
2908232378e8SDavid Ahern 	if (dev &&
2909232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2910232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
29119fbb704cSDavid Ahern 		goto out;
29129fbb704cSDavid Ahern 	}
29139fbb704cSDavid Ahern 
29149fbb704cSDavid Ahern 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
29159fbb704cSDavid Ahern 		/* IPv6 strictly inhibits using not link-local
29169fbb704cSDavid Ahern 		 * addresses as nexthop address.
29179fbb704cSDavid Ahern 		 * Otherwise, router will not able to send redirects.
29189fbb704cSDavid Ahern 		 * It is very good, but in some (rare!) circumstances
29199fbb704cSDavid Ahern 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
29209fbb704cSDavid Ahern 		 * some exceptions. --ANK
29219fbb704cSDavid Ahern 		 * We allow IPv4-mapped nexthops to support RFC4798-type
29229fbb704cSDavid Ahern 		 * addressing
29239fbb704cSDavid Ahern 		 */
29249fbb704cSDavid Ahern 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
29259fbb704cSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
29269fbb704cSDavid Ahern 			goto out;
29279fbb704cSDavid Ahern 		}
29289fbb704cSDavid Ahern 
29299fbb704cSDavid Ahern 		if (cfg->fc_flags & RTNH_F_ONLINK)
29309fbb704cSDavid Ahern 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
29319fbb704cSDavid Ahern 		else
29329fbb704cSDavid Ahern 			err = ip6_route_check_nh(net, cfg, _dev, idev);
29339fbb704cSDavid Ahern 
29349fbb704cSDavid Ahern 		if (err)
29359fbb704cSDavid Ahern 			goto out;
29369fbb704cSDavid Ahern 	}
29379fbb704cSDavid Ahern 
29389fbb704cSDavid Ahern 	/* reload in case device was changed */
29399fbb704cSDavid Ahern 	dev = *_dev;
29409fbb704cSDavid Ahern 
29419fbb704cSDavid Ahern 	err = -EINVAL;
29429fbb704cSDavid Ahern 	if (!dev) {
29439fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack, "Egress device not specified");
29449fbb704cSDavid Ahern 		goto out;
29459fbb704cSDavid Ahern 	} else if (dev->flags & IFF_LOOPBACK) {
29469fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack,
29479fbb704cSDavid Ahern 			       "Egress device can not be loopback device for this route");
29489fbb704cSDavid Ahern 		goto out;
29499fbb704cSDavid Ahern 	}
2950232378e8SDavid Ahern 
2951232378e8SDavid Ahern 	/* if we did not check gw_addr above, do so now that the
2952232378e8SDavid Ahern 	 * egress device has been resolved.
2953232378e8SDavid Ahern 	 */
2954232378e8SDavid Ahern 	if (need_addr_check &&
2955232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2956232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2957232378e8SDavid Ahern 		goto out;
2958232378e8SDavid Ahern 	}
2959232378e8SDavid Ahern 
29609fbb704cSDavid Ahern 	err = 0;
29619fbb704cSDavid Ahern out:
29629fbb704cSDavid Ahern 	return err;
29639fbb704cSDavid Ahern }
29649fbb704cSDavid Ahern 
296583c44251SDavid Ahern static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
296683c44251SDavid Ahern {
296783c44251SDavid Ahern 	if ((flags & RTF_REJECT) ||
296883c44251SDavid Ahern 	    (dev && (dev->flags & IFF_LOOPBACK) &&
296983c44251SDavid Ahern 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
297083c44251SDavid Ahern 	     !(flags & RTF_LOCAL)))
297183c44251SDavid Ahern 		return true;
297283c44251SDavid Ahern 
297383c44251SDavid Ahern 	return false;
297483c44251SDavid Ahern }
297583c44251SDavid Ahern 
297683c44251SDavid Ahern int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
297783c44251SDavid Ahern 		 struct fib6_config *cfg, gfp_t gfp_flags,
297883c44251SDavid Ahern 		 struct netlink_ext_ack *extack)
297983c44251SDavid Ahern {
298083c44251SDavid Ahern 	struct net_device *dev = NULL;
298183c44251SDavid Ahern 	struct inet6_dev *idev = NULL;
298283c44251SDavid Ahern 	int addr_type;
298383c44251SDavid Ahern 	int err;
298483c44251SDavid Ahern 
2985f1741730SDavid Ahern 	fib6_nh->fib_nh_family = AF_INET6;
2986f1741730SDavid Ahern 
298783c44251SDavid Ahern 	err = -ENODEV;
298883c44251SDavid Ahern 	if (cfg->fc_ifindex) {
298983c44251SDavid Ahern 		dev = dev_get_by_index(net, cfg->fc_ifindex);
299083c44251SDavid Ahern 		if (!dev)
299183c44251SDavid Ahern 			goto out;
299283c44251SDavid Ahern 		idev = in6_dev_get(dev);
299383c44251SDavid Ahern 		if (!idev)
299483c44251SDavid Ahern 			goto out;
299583c44251SDavid Ahern 	}
299683c44251SDavid Ahern 
299783c44251SDavid Ahern 	if (cfg->fc_flags & RTNH_F_ONLINK) {
299883c44251SDavid Ahern 		if (!dev) {
299983c44251SDavid Ahern 			NL_SET_ERR_MSG(extack,
300083c44251SDavid Ahern 				       "Nexthop device required for onlink");
300183c44251SDavid Ahern 			goto out;
300283c44251SDavid Ahern 		}
300383c44251SDavid Ahern 
300483c44251SDavid Ahern 		if (!(dev->flags & IFF_UP)) {
300583c44251SDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
300683c44251SDavid Ahern 			err = -ENETDOWN;
300783c44251SDavid Ahern 			goto out;
300883c44251SDavid Ahern 		}
300983c44251SDavid Ahern 
3010ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
301183c44251SDavid Ahern 	}
301283c44251SDavid Ahern 
3013ad1601aeSDavid Ahern 	fib6_nh->fib_nh_weight = 1;
301483c44251SDavid Ahern 
301583c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
301683c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
301783c44251SDavid Ahern 	 */
301883c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
301983c44251SDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
302083c44251SDavid Ahern 		/* hold loopback dev/idev if we haven't done so. */
302183c44251SDavid Ahern 		if (dev != net->loopback_dev) {
302283c44251SDavid Ahern 			if (dev) {
302383c44251SDavid Ahern 				dev_put(dev);
302483c44251SDavid Ahern 				in6_dev_put(idev);
302583c44251SDavid Ahern 			}
302683c44251SDavid Ahern 			dev = net->loopback_dev;
302783c44251SDavid Ahern 			dev_hold(dev);
302883c44251SDavid Ahern 			idev = in6_dev_get(dev);
302983c44251SDavid Ahern 			if (!idev) {
303083c44251SDavid Ahern 				err = -ENODEV;
303183c44251SDavid Ahern 				goto out;
303283c44251SDavid Ahern 			}
303383c44251SDavid Ahern 		}
303483c44251SDavid Ahern 		goto set_dev;
303583c44251SDavid Ahern 	}
303683c44251SDavid Ahern 
303783c44251SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY) {
303883c44251SDavid Ahern 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
303983c44251SDavid Ahern 		if (err)
304083c44251SDavid Ahern 			goto out;
304183c44251SDavid Ahern 
3042ad1601aeSDavid Ahern 		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
3043bdf00467SDavid Ahern 		fib6_nh->fib_nh_gw_family = AF_INET6;
304483c44251SDavid Ahern 	}
304583c44251SDavid Ahern 
304683c44251SDavid Ahern 	err = -ENODEV;
304783c44251SDavid Ahern 	if (!dev)
304883c44251SDavid Ahern 		goto out;
304983c44251SDavid Ahern 
305083c44251SDavid Ahern 	if (idev->cnf.disable_ipv6) {
305183c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
305283c44251SDavid Ahern 		err = -EACCES;
305383c44251SDavid Ahern 		goto out;
305483c44251SDavid Ahern 	}
305583c44251SDavid Ahern 
305683c44251SDavid Ahern 	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
305783c44251SDavid Ahern 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
305883c44251SDavid Ahern 		err = -ENETDOWN;
305983c44251SDavid Ahern 		goto out;
306083c44251SDavid Ahern 	}
306183c44251SDavid Ahern 
306283c44251SDavid Ahern 	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
306383c44251SDavid Ahern 	    !netif_carrier_ok(dev))
3064ad1601aeSDavid Ahern 		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
306583c44251SDavid Ahern 
3066979e276eSDavid Ahern 	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
3067979e276eSDavid Ahern 				 cfg->fc_encap_type, cfg, gfp_flags, extack);
3068979e276eSDavid Ahern 	if (err)
3069979e276eSDavid Ahern 		goto out;
307083c44251SDavid Ahern set_dev:
3071ad1601aeSDavid Ahern 	fib6_nh->fib_nh_dev = dev;
3072f1741730SDavid Ahern 	fib6_nh->fib_nh_oif = dev->ifindex;
307383c44251SDavid Ahern 	err = 0;
307483c44251SDavid Ahern out:
307583c44251SDavid Ahern 	if (idev)
307683c44251SDavid Ahern 		in6_dev_put(idev);
307783c44251SDavid Ahern 
307883c44251SDavid Ahern 	if (err) {
3079ad1601aeSDavid Ahern 		lwtstate_put(fib6_nh->fib_nh_lws);
3080ad1601aeSDavid Ahern 		fib6_nh->fib_nh_lws = NULL;
308183c44251SDavid Ahern 		if (dev)
308283c44251SDavid Ahern 			dev_put(dev);
308383c44251SDavid Ahern 	}
308483c44251SDavid Ahern 
308583c44251SDavid Ahern 	return err;
308683c44251SDavid Ahern }
308783c44251SDavid Ahern 
3088dac7d0f2SDavid Ahern void fib6_nh_release(struct fib6_nh *fib6_nh)
3089dac7d0f2SDavid Ahern {
3090979e276eSDavid Ahern 	fib_nh_common_release(&fib6_nh->nh_common);
3091dac7d0f2SDavid Ahern }
3092dac7d0f2SDavid Ahern 
30938d1c802bSDavid Ahern static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
3094acb54e3cSDavid Ahern 					      gfp_t gfp_flags,
3095333c4301SDavid Ahern 					      struct netlink_ext_ack *extack)
30961da177e4SLinus Torvalds {
30975578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
30988d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3099c71099acSThomas Graf 	struct fib6_table *table;
31008c5b83f0SRoopa Prabhu 	int err = -EINVAL;
310183c44251SDavid Ahern 	int addr_type;
31021da177e4SLinus Torvalds 
3103557c44beSDavid Ahern 	/* RTF_PCPU is an internal flag; can not be set by userspace */
3104d5d531cbSDavid Ahern 	if (cfg->fc_flags & RTF_PCPU) {
3105d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
3106557c44beSDavid Ahern 		goto out;
3107d5d531cbSDavid Ahern 	}
3108557c44beSDavid Ahern 
31092ea2352eSWei Wang 	/* RTF_CACHE is an internal flag; can not be set by userspace */
31102ea2352eSWei Wang 	if (cfg->fc_flags & RTF_CACHE) {
31112ea2352eSWei Wang 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
31122ea2352eSWei Wang 		goto out;
31132ea2352eSWei Wang 	}
31142ea2352eSWei Wang 
3115e8478e80SDavid Ahern 	if (cfg->fc_type > RTN_MAX) {
3116e8478e80SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid route type");
3117e8478e80SDavid Ahern 		goto out;
3118e8478e80SDavid Ahern 	}
3119e8478e80SDavid Ahern 
3120d5d531cbSDavid Ahern 	if (cfg->fc_dst_len > 128) {
3121d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
31228c5b83f0SRoopa Prabhu 		goto out;
3123d5d531cbSDavid Ahern 	}
3124d5d531cbSDavid Ahern 	if (cfg->fc_src_len > 128) {
3125d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid source address length");
3126d5d531cbSDavid Ahern 		goto out;
3127d5d531cbSDavid Ahern 	}
31281da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
3129d5d531cbSDavid Ahern 	if (cfg->fc_src_len) {
3130d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack,
3131d5d531cbSDavid Ahern 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
31328c5b83f0SRoopa Prabhu 		goto out;
3133d5d531cbSDavid Ahern 	}
31341da177e4SLinus Torvalds #endif
3135fc1e64e1SDavid Ahern 
3136c71099acSThomas Graf 	err = -ENOBUFS;
313738308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
3138d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
3139d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
314038308473SDavid S. Miller 		if (!table) {
3141f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
3142d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
3143d71314b4SMatti Vaittinen 		}
3144d71314b4SMatti Vaittinen 	} else {
3145d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
3146d71314b4SMatti Vaittinen 	}
314738308473SDavid S. Miller 
314838308473SDavid S. Miller 	if (!table)
3149c71099acSThomas Graf 		goto out;
3150c71099acSThomas Graf 
31511da177e4SLinus Torvalds 	err = -ENOMEM;
315293531c67SDavid Ahern 	rt = fib6_info_alloc(gfp_flags);
315393531c67SDavid Ahern 	if (!rt)
31541da177e4SLinus Torvalds 		goto out;
315593531c67SDavid Ahern 
3156d7e774f3SDavid Ahern 	rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len,
3157d7e774f3SDavid Ahern 					       extack);
3158767a2217SDavid Ahern 	if (IS_ERR(rt->fib6_metrics)) {
3159767a2217SDavid Ahern 		err = PTR_ERR(rt->fib6_metrics);
3160fda21d46SEric Dumazet 		/* Do not leave garbage there. */
3161fda21d46SEric Dumazet 		rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
3162767a2217SDavid Ahern 		goto out;
3163767a2217SDavid Ahern 	}
3164767a2217SDavid Ahern 
316593531c67SDavid Ahern 	if (cfg->fc_flags & RTF_ADDRCONF)
316693531c67SDavid Ahern 		rt->dst_nocount = true;
31671da177e4SLinus Torvalds 
31681716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
316914895687SDavid Ahern 		fib6_set_expires(rt, jiffies +
31701716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
31711716a961SGao feng 	else
317214895687SDavid Ahern 		fib6_clean_expires(rt);
31731da177e4SLinus Torvalds 
317486872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
317586872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
317693c2fb25SDavid Ahern 	rt->fib6_protocol = cfg->fc_protocol;
317786872cb5SThomas Graf 
317883c44251SDavid Ahern 	rt->fib6_table = table;
317983c44251SDavid Ahern 	rt->fib6_metric = cfg->fc_metric;
318083c44251SDavid Ahern 	rt->fib6_type = cfg->fc_type;
31812b2450caSDavid Ahern 	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
318219e42e45SRoopa Prabhu 
318393c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
318493c2fb25SDavid Ahern 	rt->fib6_dst.plen = cfg->fc_dst_len;
318593c2fb25SDavid Ahern 	if (rt->fib6_dst.plen == 128)
31863b6761d1SDavid Ahern 		rt->dst_host = true;
31871da177e4SLinus Torvalds 
31881da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
318993c2fb25SDavid Ahern 	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
319093c2fb25SDavid Ahern 	rt->fib6_src.plen = cfg->fc_src_len;
31911da177e4SLinus Torvalds #endif
319283c44251SDavid Ahern 	err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
31931da177e4SLinus Torvalds 	if (err)
31941da177e4SLinus Torvalds 		goto out;
31959fbb704cSDavid Ahern 
319683c44251SDavid Ahern 	/* We cannot add true routes via loopback here,
319783c44251SDavid Ahern 	 * they would result in kernel looping; promote them to reject routes
319883c44251SDavid Ahern 	 */
319983c44251SDavid Ahern 	addr_type = ipv6_addr_type(&cfg->fc_dst);
3200ad1601aeSDavid Ahern 	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
320183c44251SDavid Ahern 		rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
3202955ec4cbSDavid Ahern 
3203c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
320483c44251SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
320583c44251SDavid Ahern 
3206c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3207d5d531cbSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid source address");
3208c3968a85SDaniel Walter 			err = -EINVAL;
3209c3968a85SDaniel Walter 			goto out;
3210c3968a85SDaniel Walter 		}
321193c2fb25SDavid Ahern 		rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
321293c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 128;
3213c3968a85SDaniel Walter 	} else
321493c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
3215c3968a85SDaniel Walter 
32168c5b83f0SRoopa Prabhu 	return rt;
32171da177e4SLinus Torvalds out:
321893531c67SDavid Ahern 	fib6_info_release(rt);
32198c5b83f0SRoopa Prabhu 	return ERR_PTR(err);
32206b9ea5a6SRoopa Prabhu }
32216b9ea5a6SRoopa Prabhu 
3222acb54e3cSDavid Ahern int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
3223333c4301SDavid Ahern 		  struct netlink_ext_ack *extack)
32246b9ea5a6SRoopa Prabhu {
32258d1c802bSDavid Ahern 	struct fib6_info *rt;
32266b9ea5a6SRoopa Prabhu 	int err;
32276b9ea5a6SRoopa Prabhu 
3228acb54e3cSDavid Ahern 	rt = ip6_route_info_create(cfg, gfp_flags, extack);
3229d4ead6b3SDavid Ahern 	if (IS_ERR(rt))
3230d4ead6b3SDavid Ahern 		return PTR_ERR(rt);
32316b9ea5a6SRoopa Prabhu 
3232d4ead6b3SDavid Ahern 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
323393531c67SDavid Ahern 	fib6_info_release(rt);
32346b9ea5a6SRoopa Prabhu 
32351da177e4SLinus Torvalds 	return err;
32361da177e4SLinus Torvalds }
32371da177e4SLinus Torvalds 
32388d1c802bSDavid Ahern static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
32391da177e4SLinus Torvalds {
3240afb1d4b5SDavid Ahern 	struct net *net = info->nl_net;
3241c71099acSThomas Graf 	struct fib6_table *table;
3242afb1d4b5SDavid Ahern 	int err;
32431da177e4SLinus Torvalds 
3244421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry) {
32456825a26cSGao feng 		err = -ENOENT;
32466825a26cSGao feng 		goto out;
32476825a26cSGao feng 	}
32486c813a72SPatrick McHardy 
324993c2fb25SDavid Ahern 	table = rt->fib6_table;
325066f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
325186872cb5SThomas Graf 	err = fib6_del(rt, info);
325266f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
32531da177e4SLinus Torvalds 
32546825a26cSGao feng out:
325593531c67SDavid Ahern 	fib6_info_release(rt);
32561da177e4SLinus Torvalds 	return err;
32571da177e4SLinus Torvalds }
32581da177e4SLinus Torvalds 
32598d1c802bSDavid Ahern int ip6_del_rt(struct net *net, struct fib6_info *rt)
3260e0a1ad73SThomas Graf {
3261afb1d4b5SDavid Ahern 	struct nl_info info = { .nl_net = net };
3262afb1d4b5SDavid Ahern 
3263528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
3264e0a1ad73SThomas Graf }
3265e0a1ad73SThomas Graf 
32668d1c802bSDavid Ahern static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
32670ae81335SDavid Ahern {
32680ae81335SDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
3269e3330039SWANG Cong 	struct net *net = info->nl_net;
327016a16cd3SDavid Ahern 	struct sk_buff *skb = NULL;
32710ae81335SDavid Ahern 	struct fib6_table *table;
3272e3330039SWANG Cong 	int err = -ENOENT;
32730ae81335SDavid Ahern 
3274421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
3275e3330039SWANG Cong 		goto out_put;
327693c2fb25SDavid Ahern 	table = rt->fib6_table;
327766f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
32780ae81335SDavid Ahern 
327993c2fb25SDavid Ahern 	if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
32808d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
32810ae81335SDavid Ahern 
328216a16cd3SDavid Ahern 		/* prefer to send a single notification with all hops */
328316a16cd3SDavid Ahern 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
328416a16cd3SDavid Ahern 		if (skb) {
328516a16cd3SDavid Ahern 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
328616a16cd3SDavid Ahern 
3287d4ead6b3SDavid Ahern 			if (rt6_fill_node(net, skb, rt, NULL,
328816a16cd3SDavid Ahern 					  NULL, NULL, 0, RTM_DELROUTE,
328916a16cd3SDavid Ahern 					  info->portid, seq, 0) < 0) {
329016a16cd3SDavid Ahern 				kfree_skb(skb);
329116a16cd3SDavid Ahern 				skb = NULL;
329216a16cd3SDavid Ahern 			} else
329316a16cd3SDavid Ahern 				info->skip_notify = 1;
329416a16cd3SDavid Ahern 		}
329516a16cd3SDavid Ahern 
32960ae81335SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
329793c2fb25SDavid Ahern 					 &rt->fib6_siblings,
329893c2fb25SDavid Ahern 					 fib6_siblings) {
32990ae81335SDavid Ahern 			err = fib6_del(sibling, info);
33000ae81335SDavid Ahern 			if (err)
3301e3330039SWANG Cong 				goto out_unlock;
33020ae81335SDavid Ahern 		}
33030ae81335SDavid Ahern 	}
33040ae81335SDavid Ahern 
33050ae81335SDavid Ahern 	err = fib6_del(rt, info);
3306e3330039SWANG Cong out_unlock:
330766f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
3308e3330039SWANG Cong out_put:
330993531c67SDavid Ahern 	fib6_info_release(rt);
331016a16cd3SDavid Ahern 
331116a16cd3SDavid Ahern 	if (skb) {
3312e3330039SWANG Cong 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
331316a16cd3SDavid Ahern 			    info->nlh, gfp_any());
331416a16cd3SDavid Ahern 	}
33150ae81335SDavid Ahern 	return err;
33160ae81335SDavid Ahern }
33170ae81335SDavid Ahern 
331823fb93a4SDavid Ahern static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
331923fb93a4SDavid Ahern {
332023fb93a4SDavid Ahern 	int rc = -ESRCH;
332123fb93a4SDavid Ahern 
332223fb93a4SDavid Ahern 	if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
332323fb93a4SDavid Ahern 		goto out;
332423fb93a4SDavid Ahern 
332523fb93a4SDavid Ahern 	if (cfg->fc_flags & RTF_GATEWAY &&
332623fb93a4SDavid Ahern 	    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
332723fb93a4SDavid Ahern 		goto out;
3328761f6026SXin Long 
332923fb93a4SDavid Ahern 	rc = rt6_remove_exception_rt(rt);
333023fb93a4SDavid Ahern out:
333123fb93a4SDavid Ahern 	return rc;
333223fb93a4SDavid Ahern }
333323fb93a4SDavid Ahern 
3334333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg,
3335333c4301SDavid Ahern 			 struct netlink_ext_ack *extack)
33361da177e4SLinus Torvalds {
33378d1c802bSDavid Ahern 	struct rt6_info *rt_cache;
3338c71099acSThomas Graf 	struct fib6_table *table;
33398d1c802bSDavid Ahern 	struct fib6_info *rt;
33401da177e4SLinus Torvalds 	struct fib6_node *fn;
33411da177e4SLinus Torvalds 	int err = -ESRCH;
33421da177e4SLinus Torvalds 
33435578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3344d5d531cbSDavid Ahern 	if (!table) {
3345d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3346c71099acSThomas Graf 		return err;
3347d5d531cbSDavid Ahern 	}
33481da177e4SLinus Torvalds 
334966f5d6ceSWei Wang 	rcu_read_lock();
3350c71099acSThomas Graf 
3351c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
335286872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
335338fbeeeeSWei Wang 			 &cfg->fc_src, cfg->fc_src_len,
33542b760fcfSWei Wang 			 !(cfg->fc_flags & RTF_CACHE));
33551da177e4SLinus Torvalds 
33561da177e4SLinus Torvalds 	if (fn) {
335766f5d6ceSWei Wang 		for_each_fib6_node_rt_rcu(fn) {
3358ad1601aeSDavid Ahern 			struct fib6_nh *nh;
3359ad1601aeSDavid Ahern 
33602b760fcfSWei Wang 			if (cfg->fc_flags & RTF_CACHE) {
33617e4b5128SDavid Ahern 				struct fib6_result res = {
33627e4b5128SDavid Ahern 					.f6i = rt,
33637e4b5128SDavid Ahern 				};
336423fb93a4SDavid Ahern 				int rc;
336523fb93a4SDavid Ahern 
33667e4b5128SDavid Ahern 				rt_cache = rt6_find_cached_rt(&res,
33677e4b5128SDavid Ahern 							      &cfg->fc_dst,
33682b760fcfSWei Wang 							      &cfg->fc_src);
336923fb93a4SDavid Ahern 				if (rt_cache) {
337023fb93a4SDavid Ahern 					rc = ip6_del_cached_rt(rt_cache, cfg);
33719e575010SEric Dumazet 					if (rc != -ESRCH) {
33729e575010SEric Dumazet 						rcu_read_unlock();
337323fb93a4SDavid Ahern 						return rc;
337423fb93a4SDavid Ahern 					}
33759e575010SEric Dumazet 				}
33761f56a01fSMartin KaFai Lau 				continue;
33772b760fcfSWei Wang 			}
3378ad1601aeSDavid Ahern 
3379ad1601aeSDavid Ahern 			nh = &rt->fib6_nh;
338086872cb5SThomas Graf 			if (cfg->fc_ifindex &&
3381ad1601aeSDavid Ahern 			    (!nh->fib_nh_dev ||
3382ad1601aeSDavid Ahern 			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
33831da177e4SLinus Torvalds 				continue;
338486872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
3385ad1601aeSDavid Ahern 			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
33861da177e4SLinus Torvalds 				continue;
338793c2fb25SDavid Ahern 			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
33881da177e4SLinus Torvalds 				continue;
338993c2fb25SDavid Ahern 			if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
3390c2ed1880SMantas M 				continue;
3391e873e4b9SWei Wang 			if (!fib6_info_hold_safe(rt))
3392e873e4b9SWei Wang 				continue;
339366f5d6ceSWei Wang 			rcu_read_unlock();
33941da177e4SLinus Torvalds 
33950ae81335SDavid Ahern 			/* if gateway was specified only delete the one hop */
33960ae81335SDavid Ahern 			if (cfg->fc_flags & RTF_GATEWAY)
339786872cb5SThomas Graf 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
33980ae81335SDavid Ahern 
33990ae81335SDavid Ahern 			return __ip6_del_rt_siblings(rt, cfg);
34001da177e4SLinus Torvalds 		}
34011da177e4SLinus Torvalds 	}
340266f5d6ceSWei Wang 	rcu_read_unlock();
34031da177e4SLinus Torvalds 
34041da177e4SLinus Torvalds 	return err;
34051da177e4SLinus Torvalds }
34061da177e4SLinus Torvalds 
34076700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3408a6279458SYOSHIFUJI Hideaki {
3409a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
3410e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
341185bd05deSDavid Ahern 	struct fib6_result res = {};
3412e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
3413e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
3414e8599ff4SDavid S. Miller 	struct neighbour *neigh;
341571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
34166e157b6aSDavid S. Miller 	int optlen, on_link;
34176e157b6aSDavid S. Miller 	u8 *lladdr;
3418e8599ff4SDavid S. Miller 
341929a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
342071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
3421e8599ff4SDavid S. Miller 
3422e8599ff4SDavid S. Miller 	if (optlen < 0) {
34236e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3424e8599ff4SDavid S. Miller 		return;
3425e8599ff4SDavid S. Miller 	}
3426e8599ff4SDavid S. Miller 
342771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
3428e8599ff4SDavid S. Miller 
342971bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
34306e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3431e8599ff4SDavid S. Miller 		return;
3432e8599ff4SDavid S. Miller 	}
3433e8599ff4SDavid S. Miller 
34346e157b6aSDavid S. Miller 	on_link = 0;
343571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3436e8599ff4SDavid S. Miller 		on_link = 1;
343771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
3438e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
34396e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3440e8599ff4SDavid S. Miller 		return;
3441e8599ff4SDavid S. Miller 	}
3442e8599ff4SDavid S. Miller 
3443e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
3444e8599ff4SDavid S. Miller 	if (!in6_dev)
3445e8599ff4SDavid S. Miller 		return;
3446e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3447e8599ff4SDavid S. Miller 		return;
3448e8599ff4SDavid S. Miller 
3449e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
3450e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
3451e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
3452e8599ff4SDavid S. Miller 	 */
3453e8599ff4SDavid S. Miller 
3454f997c55cSAlexander Aring 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3455e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3456e8599ff4SDavid S. Miller 		return;
3457e8599ff4SDavid S. Miller 	}
34586e157b6aSDavid S. Miller 
34596e157b6aSDavid S. Miller 	lladdr = NULL;
3460e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
3461e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3462e8599ff4SDavid S. Miller 					     skb->dev);
3463e8599ff4SDavid S. Miller 		if (!lladdr) {
3464e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3465e8599ff4SDavid S. Miller 			return;
3466e8599ff4SDavid S. Miller 		}
3467e8599ff4SDavid S. Miller 	}
3468e8599ff4SDavid S. Miller 
34696e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
3470ec13ad1dSMatthias Schiffer 	if (rt->rt6i_flags & RTF_REJECT) {
34716e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
34726e157b6aSDavid S. Miller 		return;
34736e157b6aSDavid S. Miller 	}
34746e157b6aSDavid S. Miller 
34756e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
34766e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
34776e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
34786e157b6aSDavid S. Miller 	 */
34790dec879fSJulian Anastasov 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
34806e157b6aSDavid S. Miller 
348171bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3482e8599ff4SDavid S. Miller 	if (!neigh)
3483e8599ff4SDavid S. Miller 		return;
3484e8599ff4SDavid S. Miller 
34851da177e4SLinus Torvalds 	/*
34861da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
34871da177e4SLinus Torvalds 	 */
34881da177e4SLinus Torvalds 
3489f997c55cSAlexander Aring 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
34901da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
34911da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
34921da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3493f997c55cSAlexander Aring 				     NEIGH_UPDATE_F_ISROUTER)),
3494f997c55cSAlexander Aring 		     NDISC_REDIRECT, &ndopts);
34951da177e4SLinus Torvalds 
34964d85cd0cSDavid Ahern 	rcu_read_lock();
349785bd05deSDavid Ahern 	res.f6i = rcu_dereference(rt->from);
3498e873e4b9SWei Wang 	/* This fib6_info_hold() is safe here because we hold reference to rt
3499e873e4b9SWei Wang 	 * and rt already holds reference to fib6_info.
3500e873e4b9SWei Wang 	 */
350185bd05deSDavid Ahern 	fib6_info_hold(res.f6i);
35024d85cd0cSDavid Ahern 	rcu_read_unlock();
35038a14e46fSDavid Ahern 
350485bd05deSDavid Ahern 	res.nh = &res.f6i->fib6_nh;
35057d21fec9SDavid Ahern 	res.fib6_flags = res.f6i->fib6_flags;
35067d21fec9SDavid Ahern 	res.fib6_type = res.f6i->fib6_type;
350785bd05deSDavid Ahern 	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
350838308473SDavid S. Miller 	if (!nrt)
35091da177e4SLinus Torvalds 		goto out;
35101da177e4SLinus Torvalds 
35111da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
35121da177e4SLinus Torvalds 	if (on_link)
35131da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
35141da177e4SLinus Torvalds 
35154e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
35161da177e4SLinus Torvalds 
35172b760fcfSWei Wang 	/* No need to remove rt from the exception table if rt is
35182b760fcfSWei Wang 	 * a cached route because rt6_insert_exception() will
35192b760fcfSWei Wang 	 * takes care of it
35202b760fcfSWei Wang 	 */
35215012f0a5SDavid Ahern 	if (rt6_insert_exception(nrt, &res)) {
35222b760fcfSWei Wang 		dst_release_immediate(&nrt->dst);
35232b760fcfSWei Wang 		goto out;
35242b760fcfSWei Wang 	}
35251da177e4SLinus Torvalds 
3526d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
3527d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
352871bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
352960592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
35308d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
35318d71740cSTom Tucker 
35321da177e4SLinus Torvalds out:
353385bd05deSDavid Ahern 	fib6_info_release(res.f6i);
3534e8599ff4SDavid S. Miller 	neigh_release(neigh);
35356e157b6aSDavid S. Miller }
35366e157b6aSDavid S. Miller 
353770ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
35388d1c802bSDavid Ahern static struct fib6_info *rt6_get_route_info(struct net *net,
3539b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3540830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3541830218c1SDavid Ahern 					   struct net_device *dev)
354270ceb4f5SYOSHIFUJI Hideaki {
3543830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3544830218c1SDavid Ahern 	int ifindex = dev->ifindex;
354570ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
35468d1c802bSDavid Ahern 	struct fib6_info *rt = NULL;
3547c71099acSThomas Graf 	struct fib6_table *table;
354870ceb4f5SYOSHIFUJI Hideaki 
3549830218c1SDavid Ahern 	table = fib6_get_table(net, tb_id);
355038308473SDavid S. Miller 	if (!table)
3551c71099acSThomas Graf 		return NULL;
3552c71099acSThomas Graf 
355366f5d6ceSWei Wang 	rcu_read_lock();
355438fbeeeeSWei Wang 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
355570ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
355670ceb4f5SYOSHIFUJI Hideaki 		goto out;
355770ceb4f5SYOSHIFUJI Hideaki 
355866f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
3559ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
356070ceb4f5SYOSHIFUJI Hideaki 			continue;
35612b2450caSDavid Ahern 		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
3562bdf00467SDavid Ahern 		    !rt->fib6_nh.fib_nh_gw_family)
356370ceb4f5SYOSHIFUJI Hideaki 			continue;
3564ad1601aeSDavid Ahern 		if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
356570ceb4f5SYOSHIFUJI Hideaki 			continue;
3566e873e4b9SWei Wang 		if (!fib6_info_hold_safe(rt))
3567e873e4b9SWei Wang 			continue;
356870ceb4f5SYOSHIFUJI Hideaki 		break;
356970ceb4f5SYOSHIFUJI Hideaki 	}
357070ceb4f5SYOSHIFUJI Hideaki out:
357166f5d6ceSWei Wang 	rcu_read_unlock();
357270ceb4f5SYOSHIFUJI Hideaki 	return rt;
357370ceb4f5SYOSHIFUJI Hideaki }
357470ceb4f5SYOSHIFUJI Hideaki 
35758d1c802bSDavid Ahern static struct fib6_info *rt6_add_route_info(struct net *net,
3576b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3577830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3578830218c1SDavid Ahern 					   struct net_device *dev,
357995c96174SEric Dumazet 					   unsigned int pref)
358070ceb4f5SYOSHIFUJI Hideaki {
358186872cb5SThomas Graf 	struct fib6_config cfg = {
3582238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
3583830218c1SDavid Ahern 		.fc_ifindex	= dev->ifindex,
358486872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
358586872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
358686872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
3587b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3588e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
358915e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
3590efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3591efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
359286872cb5SThomas Graf 	};
359370ceb4f5SYOSHIFUJI Hideaki 
3594830218c1SDavid Ahern 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
35954e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
35964e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
359786872cb5SThomas Graf 
3598e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
3599e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
360086872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
360170ceb4f5SYOSHIFUJI Hideaki 
3602acb54e3cSDavid Ahern 	ip6_route_add(&cfg, GFP_ATOMIC, NULL);
360370ceb4f5SYOSHIFUJI Hideaki 
3604830218c1SDavid Ahern 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
360570ceb4f5SYOSHIFUJI Hideaki }
360670ceb4f5SYOSHIFUJI Hideaki #endif
360770ceb4f5SYOSHIFUJI Hideaki 
36088d1c802bSDavid Ahern struct fib6_info *rt6_get_dflt_router(struct net *net,
3609afb1d4b5SDavid Ahern 				     const struct in6_addr *addr,
3610afb1d4b5SDavid Ahern 				     struct net_device *dev)
36111da177e4SLinus Torvalds {
3612830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
36138d1c802bSDavid Ahern 	struct fib6_info *rt;
3614c71099acSThomas Graf 	struct fib6_table *table;
36151da177e4SLinus Torvalds 
3616afb1d4b5SDavid Ahern 	table = fib6_get_table(net, tb_id);
361738308473SDavid S. Miller 	if (!table)
3618c71099acSThomas Graf 		return NULL;
36191da177e4SLinus Torvalds 
362066f5d6ceSWei Wang 	rcu_read_lock();
362166f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3622ad1601aeSDavid Ahern 		struct fib6_nh *nh = &rt->fib6_nh;
3623ad1601aeSDavid Ahern 
3624ad1601aeSDavid Ahern 		if (dev == nh->fib_nh_dev &&
362593c2fb25SDavid Ahern 		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
3626ad1601aeSDavid Ahern 		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
36271da177e4SLinus Torvalds 			break;
36281da177e4SLinus Torvalds 	}
3629e873e4b9SWei Wang 	if (rt && !fib6_info_hold_safe(rt))
3630e873e4b9SWei Wang 		rt = NULL;
363166f5d6ceSWei Wang 	rcu_read_unlock();
36321da177e4SLinus Torvalds 	return rt;
36331da177e4SLinus Torvalds }
36341da177e4SLinus Torvalds 
36358d1c802bSDavid Ahern struct fib6_info *rt6_add_dflt_router(struct net *net,
3636afb1d4b5SDavid Ahern 				     const struct in6_addr *gwaddr,
3637ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
3638ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
36391da177e4SLinus Torvalds {
364086872cb5SThomas Graf 	struct fib6_config cfg = {
3641ca254490SDavid Ahern 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3642238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
364386872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
364486872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
364586872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3646b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3647e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
364815e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
36495578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3650afb1d4b5SDavid Ahern 		.fc_nlinfo.nl_net = net,
365186872cb5SThomas Graf 	};
36521da177e4SLinus Torvalds 
36534e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
36541da177e4SLinus Torvalds 
3655acb54e3cSDavid Ahern 	if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
3656830218c1SDavid Ahern 		struct fib6_table *table;
3657830218c1SDavid Ahern 
3658830218c1SDavid Ahern 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3659830218c1SDavid Ahern 		if (table)
3660830218c1SDavid Ahern 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3661830218c1SDavid Ahern 	}
36621da177e4SLinus Torvalds 
3663afb1d4b5SDavid Ahern 	return rt6_get_dflt_router(net, gwaddr, dev);
36641da177e4SLinus Torvalds }
36651da177e4SLinus Torvalds 
3666afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net,
3667afb1d4b5SDavid Ahern 				     struct fib6_table *table)
36681da177e4SLinus Torvalds {
36698d1c802bSDavid Ahern 	struct fib6_info *rt;
36701da177e4SLinus Torvalds 
36711da177e4SLinus Torvalds restart:
367266f5d6ceSWei Wang 	rcu_read_lock();
367366f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
3674dcd1f572SDavid Ahern 		struct net_device *dev = fib6_info_nh_dev(rt);
3675dcd1f572SDavid Ahern 		struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3676dcd1f572SDavid Ahern 
367793c2fb25SDavid Ahern 		if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3678e873e4b9SWei Wang 		    (!idev || idev->cnf.accept_ra != 2) &&
3679e873e4b9SWei Wang 		    fib6_info_hold_safe(rt)) {
368066f5d6ceSWei Wang 			rcu_read_unlock();
3681afb1d4b5SDavid Ahern 			ip6_del_rt(net, rt);
36821da177e4SLinus Torvalds 			goto restart;
36831da177e4SLinus Torvalds 		}
36841da177e4SLinus Torvalds 	}
368566f5d6ceSWei Wang 	rcu_read_unlock();
3686830218c1SDavid Ahern 
3687830218c1SDavid Ahern 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3688830218c1SDavid Ahern }
3689830218c1SDavid Ahern 
3690830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net)
3691830218c1SDavid Ahern {
3692830218c1SDavid Ahern 	struct fib6_table *table;
3693830218c1SDavid Ahern 	struct hlist_head *head;
3694830218c1SDavid Ahern 	unsigned int h;
3695830218c1SDavid Ahern 
3696830218c1SDavid Ahern 	rcu_read_lock();
3697830218c1SDavid Ahern 
3698830218c1SDavid Ahern 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3699830218c1SDavid Ahern 		head = &net->ipv6.fib_table_hash[h];
3700830218c1SDavid Ahern 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3701830218c1SDavid Ahern 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3702afb1d4b5SDavid Ahern 				__rt6_purge_dflt_routers(net, table);
3703830218c1SDavid Ahern 		}
3704830218c1SDavid Ahern 	}
3705830218c1SDavid Ahern 
3706830218c1SDavid Ahern 	rcu_read_unlock();
37071da177e4SLinus Torvalds }
37081da177e4SLinus Torvalds 
37095578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
37105578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
371186872cb5SThomas Graf 				 struct fib6_config *cfg)
371286872cb5SThomas Graf {
37138823a3acSMaciej Żenczykowski 	*cfg = (struct fib6_config){
37148823a3acSMaciej Żenczykowski 		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
37158823a3acSMaciej Żenczykowski 			 : RT6_TABLE_MAIN,
37168823a3acSMaciej Żenczykowski 		.fc_ifindex = rtmsg->rtmsg_ifindex,
371767f69513SDavid Ahern 		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
37188823a3acSMaciej Żenczykowski 		.fc_expires = rtmsg->rtmsg_info,
37198823a3acSMaciej Żenczykowski 		.fc_dst_len = rtmsg->rtmsg_dst_len,
37208823a3acSMaciej Żenczykowski 		.fc_src_len = rtmsg->rtmsg_src_len,
37218823a3acSMaciej Żenczykowski 		.fc_flags = rtmsg->rtmsg_flags,
37228823a3acSMaciej Żenczykowski 		.fc_type = rtmsg->rtmsg_type,
372386872cb5SThomas Graf 
37248823a3acSMaciej Żenczykowski 		.fc_nlinfo.nl_net = net,
372586872cb5SThomas Graf 
37268823a3acSMaciej Żenczykowski 		.fc_dst = rtmsg->rtmsg_dst,
37278823a3acSMaciej Żenczykowski 		.fc_src = rtmsg->rtmsg_src,
37288823a3acSMaciej Żenczykowski 		.fc_gateway = rtmsg->rtmsg_gateway,
37298823a3acSMaciej Żenczykowski 	};
373086872cb5SThomas Graf }
373186872cb5SThomas Graf 
37325578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
37331da177e4SLinus Torvalds {
373486872cb5SThomas Graf 	struct fib6_config cfg;
37351da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
37361da177e4SLinus Torvalds 	int err;
37371da177e4SLinus Torvalds 
37381da177e4SLinus Torvalds 	switch (cmd) {
37391da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
37401da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
3741af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
37421da177e4SLinus Torvalds 			return -EPERM;
37431da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
37441da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
37451da177e4SLinus Torvalds 		if (err)
37461da177e4SLinus Torvalds 			return -EFAULT;
37471da177e4SLinus Torvalds 
37485578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
374986872cb5SThomas Graf 
37501da177e4SLinus Torvalds 		rtnl_lock();
37511da177e4SLinus Torvalds 		switch (cmd) {
37521da177e4SLinus Torvalds 		case SIOCADDRT:
3753acb54e3cSDavid Ahern 			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
37541da177e4SLinus Torvalds 			break;
37551da177e4SLinus Torvalds 		case SIOCDELRT:
3756333c4301SDavid Ahern 			err = ip6_route_del(&cfg, NULL);
37571da177e4SLinus Torvalds 			break;
37581da177e4SLinus Torvalds 		default:
37591da177e4SLinus Torvalds 			err = -EINVAL;
37601da177e4SLinus Torvalds 		}
37611da177e4SLinus Torvalds 		rtnl_unlock();
37621da177e4SLinus Torvalds 
37631da177e4SLinus Torvalds 		return err;
37643ff50b79SStephen Hemminger 	}
37651da177e4SLinus Torvalds 
37661da177e4SLinus Torvalds 	return -EINVAL;
37671da177e4SLinus Torvalds }
37681da177e4SLinus Torvalds 
37691da177e4SLinus Torvalds /*
37701da177e4SLinus Torvalds  *	Drop the packet on the floor
37711da177e4SLinus Torvalds  */
37721da177e4SLinus Torvalds 
3773d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
37741da177e4SLinus Torvalds {
3775612f09e8SYOSHIFUJI Hideaki 	int type;
3776adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3777612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
3778612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
37790660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
378045bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
3781bdb7cc64SStephen Suryaputra 			IP6_INC_STATS(dev_net(dst->dev),
3782bdb7cc64SStephen Suryaputra 				      __in6_dev_get_safely(skb->dev),
37833bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
3784612f09e8SYOSHIFUJI Hideaki 			break;
3785612f09e8SYOSHIFUJI Hideaki 		}
3786612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
3787612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
37883bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
37893bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
3790612f09e8SYOSHIFUJI Hideaki 		break;
3791612f09e8SYOSHIFUJI Hideaki 	}
37923ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
37931da177e4SLinus Torvalds 	kfree_skb(skb);
37941da177e4SLinus Torvalds 	return 0;
37951da177e4SLinus Torvalds }
37961da177e4SLinus Torvalds 
37979ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
37989ce8ade0SThomas Graf {
3799612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
38009ce8ade0SThomas Graf }
38019ce8ade0SThomas Graf 
3802ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
38031da177e4SLinus Torvalds {
3804adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3805612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
38061da177e4SLinus Torvalds }
38071da177e4SLinus Torvalds 
38089ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
38099ce8ade0SThomas Graf {
3810612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
38119ce8ade0SThomas Graf }
38129ce8ade0SThomas Graf 
3813ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
38149ce8ade0SThomas Graf {
3815adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3816612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
38179ce8ade0SThomas Graf }
38189ce8ade0SThomas Graf 
38191da177e4SLinus Torvalds /*
38201da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
38211da177e4SLinus Torvalds  */
38221da177e4SLinus Torvalds 
3823360a9887SDavid Ahern struct fib6_info *addrconf_f6i_alloc(struct net *net,
3824afb1d4b5SDavid Ahern 				     struct inet6_dev *idev,
38251da177e4SLinus Torvalds 				     const struct in6_addr *addr,
3826acb54e3cSDavid Ahern 				     bool anycast, gfp_t gfp_flags)
38271da177e4SLinus Torvalds {
3828c7a1ce39SDavid Ahern 	struct fib6_config cfg = {
3829c7a1ce39SDavid Ahern 		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
3830c7a1ce39SDavid Ahern 		.fc_ifindex = idev->dev->ifindex,
3831c7a1ce39SDavid Ahern 		.fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
3832c7a1ce39SDavid Ahern 		.fc_dst = *addr,
3833c7a1ce39SDavid Ahern 		.fc_dst_len = 128,
3834c7a1ce39SDavid Ahern 		.fc_protocol = RTPROT_KERNEL,
3835c7a1ce39SDavid Ahern 		.fc_nlinfo.nl_net = net,
3836c7a1ce39SDavid Ahern 		.fc_ignore_dev_down = true,
3837c7a1ce39SDavid Ahern 	};
38385f02ce24SDavid Ahern 
3839e8478e80SDavid Ahern 	if (anycast) {
3840c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_ANYCAST;
3841c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_ANYCAST;
3842e8478e80SDavid Ahern 	} else {
3843c7a1ce39SDavid Ahern 		cfg.fc_type = RTN_LOCAL;
3844c7a1ce39SDavid Ahern 		cfg.fc_flags |= RTF_LOCAL;
3845e8478e80SDavid Ahern 	}
38461da177e4SLinus Torvalds 
3847c7a1ce39SDavid Ahern 	return ip6_route_info_create(&cfg, gfp_flags, NULL);
38481da177e4SLinus Torvalds }
38491da177e4SLinus Torvalds 
3850c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
3851c3968a85SDaniel Walter struct arg_dev_net_ip {
3852c3968a85SDaniel Walter 	struct net_device *dev;
3853c3968a85SDaniel Walter 	struct net *net;
3854c3968a85SDaniel Walter 	struct in6_addr *addr;
3855c3968a85SDaniel Walter };
3856c3968a85SDaniel Walter 
38578d1c802bSDavid Ahern static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
3858c3968a85SDaniel Walter {
3859c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3860c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3861c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3862c3968a85SDaniel Walter 
3863ad1601aeSDavid Ahern 	if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
3864421842edSDavid Ahern 	    rt != net->ipv6.fib6_null_entry &&
386593c2fb25SDavid Ahern 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
386660006a48SWei Wang 		spin_lock_bh(&rt6_exception_lock);
3867c3968a85SDaniel Walter 		/* remove prefsrc entry */
386893c2fb25SDavid Ahern 		rt->fib6_prefsrc.plen = 0;
386960006a48SWei Wang 		spin_unlock_bh(&rt6_exception_lock);
3870c3968a85SDaniel Walter 	}
3871c3968a85SDaniel Walter 	return 0;
3872c3968a85SDaniel Walter }
3873c3968a85SDaniel Walter 
3874c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3875c3968a85SDaniel Walter {
3876c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
3877c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
3878c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
3879c3968a85SDaniel Walter 		.net = net,
3880c3968a85SDaniel Walter 		.addr = &ifp->addr,
3881c3968a85SDaniel Walter 	};
38820c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3883c3968a85SDaniel Walter }
3884c3968a85SDaniel Walter 
38852b2450caSDavid Ahern #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
3886be7a010dSDuan Jiong 
3887be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
38888d1c802bSDavid Ahern static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
3889be7a010dSDuan Jiong {
3890be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
3891be7a010dSDuan Jiong 
389293c2fb25SDavid Ahern 	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3893bdf00467SDavid Ahern 	    rt->fib6_nh.fib_nh_gw_family &&
3894ad1601aeSDavid Ahern 	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
3895be7a010dSDuan Jiong 		return -1;
3896be7a010dSDuan Jiong 	}
3897b16cb459SWei Wang 
3898b16cb459SWei Wang 	/* Further clean up cached routes in exception table.
3899b16cb459SWei Wang 	 * This is needed because cached route may have a different
3900b16cb459SWei Wang 	 * gateway than its 'parent' in the case of an ip redirect.
3901b16cb459SWei Wang 	 */
3902b16cb459SWei Wang 	rt6_exceptions_clean_tohost(rt, gateway);
3903b16cb459SWei Wang 
3904be7a010dSDuan Jiong 	return 0;
3905be7a010dSDuan Jiong }
3906be7a010dSDuan Jiong 
3907be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3908be7a010dSDuan Jiong {
3909be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3910be7a010dSDuan Jiong }
3911be7a010dSDuan Jiong 
39122127d95aSIdo Schimmel struct arg_netdev_event {
39132127d95aSIdo Schimmel 	const struct net_device *dev;
39144c981e28SIdo Schimmel 	union {
3915ecc5663cSDavid Ahern 		unsigned char nh_flags;
39164c981e28SIdo Schimmel 		unsigned long event;
39174c981e28SIdo Schimmel 	};
39182127d95aSIdo Schimmel };
39192127d95aSIdo Schimmel 
39208d1c802bSDavid Ahern static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
3921d7dedee1SIdo Schimmel {
39228d1c802bSDavid Ahern 	struct fib6_info *iter;
3923d7dedee1SIdo Schimmel 	struct fib6_node *fn;
3924d7dedee1SIdo Schimmel 
392593c2fb25SDavid Ahern 	fn = rcu_dereference_protected(rt->fib6_node,
392693c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3927d7dedee1SIdo Schimmel 	iter = rcu_dereference_protected(fn->leaf,
392893c2fb25SDavid Ahern 			lockdep_is_held(&rt->fib6_table->tb6_lock));
3929d7dedee1SIdo Schimmel 	while (iter) {
393093c2fb25SDavid Ahern 		if (iter->fib6_metric == rt->fib6_metric &&
393133bd5ac5SDavid Ahern 		    rt6_qualify_for_ecmp(iter))
3932d7dedee1SIdo Schimmel 			return iter;
39338fb11a9aSDavid Ahern 		iter = rcu_dereference_protected(iter->fib6_next,
393493c2fb25SDavid Ahern 				lockdep_is_held(&rt->fib6_table->tb6_lock));
3935d7dedee1SIdo Schimmel 	}
3936d7dedee1SIdo Schimmel 
3937d7dedee1SIdo Schimmel 	return NULL;
3938d7dedee1SIdo Schimmel }
3939d7dedee1SIdo Schimmel 
39408d1c802bSDavid Ahern static bool rt6_is_dead(const struct fib6_info *rt)
3941d7dedee1SIdo Schimmel {
3942ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
3943ad1601aeSDavid Ahern 	    (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
3944ad1601aeSDavid Ahern 	     ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
3945d7dedee1SIdo Schimmel 		return true;
3946d7dedee1SIdo Schimmel 
3947d7dedee1SIdo Schimmel 	return false;
3948d7dedee1SIdo Schimmel }
3949d7dedee1SIdo Schimmel 
39508d1c802bSDavid Ahern static int rt6_multipath_total_weight(const struct fib6_info *rt)
3951d7dedee1SIdo Schimmel {
39528d1c802bSDavid Ahern 	struct fib6_info *iter;
3953d7dedee1SIdo Schimmel 	int total = 0;
3954d7dedee1SIdo Schimmel 
3955d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt))
3956ad1601aeSDavid Ahern 		total += rt->fib6_nh.fib_nh_weight;
3957d7dedee1SIdo Schimmel 
395893c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
3959d7dedee1SIdo Schimmel 		if (!rt6_is_dead(iter))
3960ad1601aeSDavid Ahern 			total += iter->fib6_nh.fib_nh_weight;
3961d7dedee1SIdo Schimmel 	}
3962d7dedee1SIdo Schimmel 
3963d7dedee1SIdo Schimmel 	return total;
3964d7dedee1SIdo Schimmel }
3965d7dedee1SIdo Schimmel 
39668d1c802bSDavid Ahern static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
3967d7dedee1SIdo Schimmel {
3968d7dedee1SIdo Schimmel 	int upper_bound = -1;
3969d7dedee1SIdo Schimmel 
3970d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt)) {
3971ad1601aeSDavid Ahern 		*weight += rt->fib6_nh.fib_nh_weight;
3972d7dedee1SIdo Schimmel 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3973d7dedee1SIdo Schimmel 						    total) - 1;
3974d7dedee1SIdo Schimmel 	}
3975ad1601aeSDavid Ahern 	atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
3976d7dedee1SIdo Schimmel }
3977d7dedee1SIdo Schimmel 
39788d1c802bSDavid Ahern static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
3979d7dedee1SIdo Schimmel {
39808d1c802bSDavid Ahern 	struct fib6_info *iter;
3981d7dedee1SIdo Schimmel 	int weight = 0;
3982d7dedee1SIdo Schimmel 
3983d7dedee1SIdo Schimmel 	rt6_upper_bound_set(rt, &weight, total);
3984d7dedee1SIdo Schimmel 
398593c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
3986d7dedee1SIdo Schimmel 		rt6_upper_bound_set(iter, &weight, total);
3987d7dedee1SIdo Schimmel }
3988d7dedee1SIdo Schimmel 
39898d1c802bSDavid Ahern void rt6_multipath_rebalance(struct fib6_info *rt)
3990d7dedee1SIdo Schimmel {
39918d1c802bSDavid Ahern 	struct fib6_info *first;
3992d7dedee1SIdo Schimmel 	int total;
3993d7dedee1SIdo Schimmel 
3994d7dedee1SIdo Schimmel 	/* In case the entire multipath route was marked for flushing,
3995d7dedee1SIdo Schimmel 	 * then there is no need to rebalance upon the removal of every
3996d7dedee1SIdo Schimmel 	 * sibling route.
3997d7dedee1SIdo Schimmel 	 */
399893c2fb25SDavid Ahern 	if (!rt->fib6_nsiblings || rt->should_flush)
3999d7dedee1SIdo Schimmel 		return;
4000d7dedee1SIdo Schimmel 
4001d7dedee1SIdo Schimmel 	/* During lookup routes are evaluated in order, so we need to
4002d7dedee1SIdo Schimmel 	 * make sure upper bounds are assigned from the first sibling
4003d7dedee1SIdo Schimmel 	 * onwards.
4004d7dedee1SIdo Schimmel 	 */
4005d7dedee1SIdo Schimmel 	first = rt6_multipath_first_sibling(rt);
4006d7dedee1SIdo Schimmel 	if (WARN_ON_ONCE(!first))
4007d7dedee1SIdo Schimmel 		return;
4008d7dedee1SIdo Schimmel 
4009d7dedee1SIdo Schimmel 	total = rt6_multipath_total_weight(first);
4010d7dedee1SIdo Schimmel 	rt6_multipath_upper_bound_set(first, total);
4011d7dedee1SIdo Schimmel }
4012d7dedee1SIdo Schimmel 
40138d1c802bSDavid Ahern static int fib6_ifup(struct fib6_info *rt, void *p_arg)
40142127d95aSIdo Schimmel {
40152127d95aSIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
40167aef6859SDavid Ahern 	struct net *net = dev_net(arg->dev);
40172127d95aSIdo Schimmel 
4018ad1601aeSDavid Ahern 	if (rt != net->ipv6.fib6_null_entry &&
4019ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_dev == arg->dev) {
4020ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
40217aef6859SDavid Ahern 		fib6_update_sernum_upto_root(net, rt);
4022d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
40231de178edSIdo Schimmel 	}
40242127d95aSIdo Schimmel 
40252127d95aSIdo Schimmel 	return 0;
40262127d95aSIdo Schimmel }
40272127d95aSIdo Schimmel 
4028ecc5663cSDavid Ahern void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
40292127d95aSIdo Schimmel {
40302127d95aSIdo Schimmel 	struct arg_netdev_event arg = {
40312127d95aSIdo Schimmel 		.dev = dev,
40326802f3adSIdo Schimmel 		{
40332127d95aSIdo Schimmel 			.nh_flags = nh_flags,
40346802f3adSIdo Schimmel 		},
40352127d95aSIdo Schimmel 	};
40362127d95aSIdo Schimmel 
40372127d95aSIdo Schimmel 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
40382127d95aSIdo Schimmel 		arg.nh_flags |= RTNH_F_LINKDOWN;
40392127d95aSIdo Schimmel 
40402127d95aSIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
40412127d95aSIdo Schimmel }
40422127d95aSIdo Schimmel 
40438d1c802bSDavid Ahern static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
40441de178edSIdo Schimmel 				   const struct net_device *dev)
40451de178edSIdo Schimmel {
40468d1c802bSDavid Ahern 	struct fib6_info *iter;
40471de178edSIdo Schimmel 
4048ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
40491de178edSIdo Schimmel 		return true;
405093c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4051ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
40521de178edSIdo Schimmel 			return true;
40531de178edSIdo Schimmel 
40541de178edSIdo Schimmel 	return false;
40551de178edSIdo Schimmel }
40561de178edSIdo Schimmel 
40578d1c802bSDavid Ahern static void rt6_multipath_flush(struct fib6_info *rt)
40581de178edSIdo Schimmel {
40598d1c802bSDavid Ahern 	struct fib6_info *iter;
40601de178edSIdo Schimmel 
40611de178edSIdo Schimmel 	rt->should_flush = 1;
406293c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
40631de178edSIdo Schimmel 		iter->should_flush = 1;
40641de178edSIdo Schimmel }
40651de178edSIdo Schimmel 
40668d1c802bSDavid Ahern static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
40671de178edSIdo Schimmel 					     const struct net_device *down_dev)
40681de178edSIdo Schimmel {
40698d1c802bSDavid Ahern 	struct fib6_info *iter;
40701de178edSIdo Schimmel 	unsigned int dead = 0;
40711de178edSIdo Schimmel 
4072ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == down_dev ||
4073ad1601aeSDavid Ahern 	    rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
40741de178edSIdo Schimmel 		dead++;
407593c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4076ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == down_dev ||
4077ad1601aeSDavid Ahern 		    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
40781de178edSIdo Schimmel 			dead++;
40791de178edSIdo Schimmel 
40801de178edSIdo Schimmel 	return dead;
40811de178edSIdo Schimmel }
40821de178edSIdo Schimmel 
40838d1c802bSDavid Ahern static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
40841de178edSIdo Schimmel 				       const struct net_device *dev,
4085ecc5663cSDavid Ahern 				       unsigned char nh_flags)
40861de178edSIdo Schimmel {
40878d1c802bSDavid Ahern 	struct fib6_info *iter;
40881de178edSIdo Schimmel 
4089ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == dev)
4090ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= nh_flags;
409193c2fb25SDavid Ahern 	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
4092ad1601aeSDavid Ahern 		if (iter->fib6_nh.fib_nh_dev == dev)
4093ad1601aeSDavid Ahern 			iter->fib6_nh.fib_nh_flags |= nh_flags;
40941de178edSIdo Schimmel }
40951de178edSIdo Schimmel 
4096a1a22c12SDavid Ahern /* called with write lock held for table with rt */
40978d1c802bSDavid Ahern static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
40981da177e4SLinus Torvalds {
40994c981e28SIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
41004c981e28SIdo Schimmel 	const struct net_device *dev = arg->dev;
41017aef6859SDavid Ahern 	struct net *net = dev_net(dev);
41028ed67789SDaniel Lezcano 
4103421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
410427c6fa73SIdo Schimmel 		return 0;
410527c6fa73SIdo Schimmel 
410627c6fa73SIdo Schimmel 	switch (arg->event) {
410727c6fa73SIdo Schimmel 	case NETDEV_UNREGISTER:
4108ad1601aeSDavid Ahern 		return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
410927c6fa73SIdo Schimmel 	case NETDEV_DOWN:
41101de178edSIdo Schimmel 		if (rt->should_flush)
411127c6fa73SIdo Schimmel 			return -1;
411293c2fb25SDavid Ahern 		if (!rt->fib6_nsiblings)
4113ad1601aeSDavid Ahern 			return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
41141de178edSIdo Schimmel 		if (rt6_multipath_uses_dev(rt, dev)) {
41151de178edSIdo Schimmel 			unsigned int count;
41161de178edSIdo Schimmel 
41171de178edSIdo Schimmel 			count = rt6_multipath_dead_count(rt, dev);
411893c2fb25SDavid Ahern 			if (rt->fib6_nsiblings + 1 == count) {
41191de178edSIdo Schimmel 				rt6_multipath_flush(rt);
41201de178edSIdo Schimmel 				return -1;
41211de178edSIdo Schimmel 			}
41221de178edSIdo Schimmel 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
41231de178edSIdo Schimmel 						   RTNH_F_LINKDOWN);
41247aef6859SDavid Ahern 			fib6_update_sernum(net, rt);
4125d7dedee1SIdo Schimmel 			rt6_multipath_rebalance(rt);
41261de178edSIdo Schimmel 		}
41271de178edSIdo Schimmel 		return -2;
412827c6fa73SIdo Schimmel 	case NETDEV_CHANGE:
4129ad1601aeSDavid Ahern 		if (rt->fib6_nh.fib_nh_dev != dev ||
413093c2fb25SDavid Ahern 		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
413127c6fa73SIdo Schimmel 			break;
4132ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
4133d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
413427c6fa73SIdo Schimmel 		break;
41352b241361SIdo Schimmel 	}
4136c159d30cSDavid S. Miller 
41371da177e4SLinus Torvalds 	return 0;
41381da177e4SLinus Torvalds }
41391da177e4SLinus Torvalds 
414027c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
41411da177e4SLinus Torvalds {
41424c981e28SIdo Schimmel 	struct arg_netdev_event arg = {
41438ed67789SDaniel Lezcano 		.dev = dev,
41446802f3adSIdo Schimmel 		{
41454c981e28SIdo Schimmel 			.event = event,
41466802f3adSIdo Schimmel 		},
41478ed67789SDaniel Lezcano 	};
41487c6bb7d2SDavid Ahern 	struct net *net = dev_net(dev);
41498ed67789SDaniel Lezcano 
41507c6bb7d2SDavid Ahern 	if (net->ipv6.sysctl.skip_notify_on_dev_down)
41517c6bb7d2SDavid Ahern 		fib6_clean_all_skip_notify(net, fib6_ifdown, &arg);
41527c6bb7d2SDavid Ahern 	else
41537c6bb7d2SDavid Ahern 		fib6_clean_all(net, fib6_ifdown, &arg);
41544c981e28SIdo Schimmel }
41554c981e28SIdo Schimmel 
41564c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event)
41574c981e28SIdo Schimmel {
41584c981e28SIdo Schimmel 	rt6_sync_down_dev(dev, event);
41594c981e28SIdo Schimmel 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
41604c981e28SIdo Schimmel 	neigh_ifdown(&nd_tbl, dev);
41611da177e4SLinus Torvalds }
41621da177e4SLinus Torvalds 
416395c96174SEric Dumazet struct rt6_mtu_change_arg {
41641da177e4SLinus Torvalds 	struct net_device *dev;
416595c96174SEric Dumazet 	unsigned int mtu;
41661da177e4SLinus Torvalds };
41671da177e4SLinus Torvalds 
41688d1c802bSDavid Ahern static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
41691da177e4SLinus Torvalds {
41701da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
41711da177e4SLinus Torvalds 	struct inet6_dev *idev;
41721da177e4SLinus Torvalds 
41731da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
41741da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
41751da177e4SLinus Torvalds 	   We still use this lock to block changes
41761da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
41771da177e4SLinus Torvalds 	*/
41781da177e4SLinus Torvalds 
41791da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
418038308473SDavid S. Miller 	if (!idev)
41811da177e4SLinus Torvalds 		return 0;
41821da177e4SLinus Torvalds 
41831da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
41841da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
41851da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
41861da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
41871da177e4SLinus Torvalds 	 */
4188ad1601aeSDavid Ahern 	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
4189d4ead6b3SDavid Ahern 	    !fib6_metric_locked(rt, RTAX_MTU)) {
4190d4ead6b3SDavid Ahern 		u32 mtu = rt->fib6_pmtu;
4191d4ead6b3SDavid Ahern 
4192d4ead6b3SDavid Ahern 		if (mtu >= arg->mtu ||
4193d4ead6b3SDavid Ahern 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4194d4ead6b3SDavid Ahern 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4195d4ead6b3SDavid Ahern 
4196f5bbe7eeSWei Wang 		spin_lock_bh(&rt6_exception_lock);
4197e9fa1495SStefano Brivio 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
4198f5bbe7eeSWei Wang 		spin_unlock_bh(&rt6_exception_lock);
41994b32b5adSMartin KaFai Lau 	}
42001da177e4SLinus Torvalds 	return 0;
42011da177e4SLinus Torvalds }
42021da177e4SLinus Torvalds 
420395c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
42041da177e4SLinus Torvalds {
4205c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
4206c71099acSThomas Graf 		.dev = dev,
4207c71099acSThomas Graf 		.mtu = mtu,
4208c71099acSThomas Graf 	};
42091da177e4SLinus Torvalds 
42100c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
42111da177e4SLinus Torvalds }
42121da177e4SLinus Torvalds 
4213ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
42145176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
4215aa8f8778SEric Dumazet 	[RTA_PREFSRC]		= { .len = sizeof(struct in6_addr) },
421686872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
4217ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
421886872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
421986872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
422051ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
4221c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
422219e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
422319e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
422432bc201eSXin Long 	[RTA_EXPIRES]		= { .type = NLA_U32 },
4225622ec2c9SLorenzo Colitti 	[RTA_UID]		= { .type = NLA_U32 },
42263b45a410SLiping Zhang 	[RTA_MARK]		= { .type = NLA_U32 },
4227aa8f8778SEric Dumazet 	[RTA_TABLE]		= { .type = NLA_U32 },
4228eacb9384SRoopa Prabhu 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
4229eacb9384SRoopa Prabhu 	[RTA_SPORT]		= { .type = NLA_U16 },
4230eacb9384SRoopa Prabhu 	[RTA_DPORT]		= { .type = NLA_U16 },
423186872cb5SThomas Graf };
423286872cb5SThomas Graf 
423386872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
4234333c4301SDavid Ahern 			      struct fib6_config *cfg,
4235333c4301SDavid Ahern 			      struct netlink_ext_ack *extack)
42361da177e4SLinus Torvalds {
423786872cb5SThomas Graf 	struct rtmsg *rtm;
423886872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
4239c78ba6d6SLubomir Rintel 	unsigned int pref;
424086872cb5SThomas Graf 	int err;
42411da177e4SLinus Torvalds 
4242fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4243dac9c979SDavid Ahern 			  extack);
424486872cb5SThomas Graf 	if (err < 0)
424586872cb5SThomas Graf 		goto errout;
42461da177e4SLinus Torvalds 
424786872cb5SThomas Graf 	err = -EINVAL;
424886872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
424986872cb5SThomas Graf 
425084db8407SMaciej Żenczykowski 	*cfg = (struct fib6_config){
425184db8407SMaciej Żenczykowski 		.fc_table = rtm->rtm_table,
425284db8407SMaciej Żenczykowski 		.fc_dst_len = rtm->rtm_dst_len,
425384db8407SMaciej Żenczykowski 		.fc_src_len = rtm->rtm_src_len,
425484db8407SMaciej Żenczykowski 		.fc_flags = RTF_UP,
425584db8407SMaciej Żenczykowski 		.fc_protocol = rtm->rtm_protocol,
425684db8407SMaciej Żenczykowski 		.fc_type = rtm->rtm_type,
425784db8407SMaciej Żenczykowski 
425884db8407SMaciej Żenczykowski 		.fc_nlinfo.portid = NETLINK_CB(skb).portid,
425984db8407SMaciej Żenczykowski 		.fc_nlinfo.nlh = nlh,
426084db8407SMaciej Żenczykowski 		.fc_nlinfo.nl_net = sock_net(skb->sk),
426184db8407SMaciej Żenczykowski 	};
426286872cb5SThomas Graf 
4263ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4264ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
4265b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
4266b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
426786872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
426886872cb5SThomas Graf 
4269ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
4270ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
4271ab79ad14SMaciej Żenczykowski 
42721f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
42731f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
42741f56a01fSMartin KaFai Lau 
4275fc1e64e1SDavid Ahern 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4276fc1e64e1SDavid Ahern 
427786872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
427867b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
427986872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
42801da177e4SLinus Torvalds 	}
4281e3818541SDavid Ahern 	if (tb[RTA_VIA]) {
4282e3818541SDavid Ahern 		NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
4283e3818541SDavid Ahern 		goto errout;
4284e3818541SDavid Ahern 	}
428586872cb5SThomas Graf 
428686872cb5SThomas Graf 	if (tb[RTA_DST]) {
428786872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
428886872cb5SThomas Graf 
428986872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
429086872cb5SThomas Graf 			goto errout;
429186872cb5SThomas Graf 
429286872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
42931da177e4SLinus Torvalds 	}
429486872cb5SThomas Graf 
429586872cb5SThomas Graf 	if (tb[RTA_SRC]) {
429686872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
429786872cb5SThomas Graf 
429886872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
429986872cb5SThomas Graf 			goto errout;
430086872cb5SThomas Graf 
430186872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
43021da177e4SLinus Torvalds 	}
430386872cb5SThomas Graf 
4304c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
430567b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4306c3968a85SDaniel Walter 
430786872cb5SThomas Graf 	if (tb[RTA_OIF])
430886872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
430986872cb5SThomas Graf 
431086872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
431186872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
431286872cb5SThomas Graf 
431386872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
431486872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
431586872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
43161da177e4SLinus Torvalds 	}
431786872cb5SThomas Graf 
431886872cb5SThomas Graf 	if (tb[RTA_TABLE])
431986872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
432086872cb5SThomas Graf 
432151ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
432251ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
432351ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
43249ed59592SDavid Ahern 
43259ed59592SDavid Ahern 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4326c255bd68SDavid Ahern 						     cfg->fc_mp_len, extack);
43279ed59592SDavid Ahern 		if (err < 0)
43289ed59592SDavid Ahern 			goto errout;
432951ebd318SNicolas Dichtel 	}
433051ebd318SNicolas Dichtel 
4331c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
4332c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
4333c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4334c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4335c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4336c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
4337c78ba6d6SLubomir Rintel 	}
4338c78ba6d6SLubomir Rintel 
433919e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
434019e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
434119e42e45SRoopa Prabhu 
43429ed59592SDavid Ahern 	if (tb[RTA_ENCAP_TYPE]) {
434319e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
434419e42e45SRoopa Prabhu 
4345c255bd68SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
43469ed59592SDavid Ahern 		if (err < 0)
43479ed59592SDavid Ahern 			goto errout;
43489ed59592SDavid Ahern 	}
43499ed59592SDavid Ahern 
435032bc201eSXin Long 	if (tb[RTA_EXPIRES]) {
435132bc201eSXin Long 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
435232bc201eSXin Long 
435332bc201eSXin Long 		if (addrconf_finite_timeout(timeout)) {
435432bc201eSXin Long 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
435532bc201eSXin Long 			cfg->fc_flags |= RTF_EXPIRES;
435632bc201eSXin Long 		}
435732bc201eSXin Long 	}
435832bc201eSXin Long 
435986872cb5SThomas Graf 	err = 0;
436086872cb5SThomas Graf errout:
436186872cb5SThomas Graf 	return err;
43621da177e4SLinus Torvalds }
43631da177e4SLinus Torvalds 
43646b9ea5a6SRoopa Prabhu struct rt6_nh {
43658d1c802bSDavid Ahern 	struct fib6_info *fib6_info;
43666b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
43676b9ea5a6SRoopa Prabhu 	struct list_head next;
43686b9ea5a6SRoopa Prabhu };
43696b9ea5a6SRoopa Prabhu 
4370d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net,
4371d4ead6b3SDavid Ahern 				 struct list_head *rt6_nh_list,
43728d1c802bSDavid Ahern 				 struct fib6_info *rt,
43738d1c802bSDavid Ahern 				 struct fib6_config *r_cfg)
43746b9ea5a6SRoopa Prabhu {
43756b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
43766b9ea5a6SRoopa Prabhu 	int err = -EEXIST;
43776b9ea5a6SRoopa Prabhu 
43786b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
43798d1c802bSDavid Ahern 		/* check if fib6_info already exists */
43808d1c802bSDavid Ahern 		if (rt6_duplicate_nexthop(nh->fib6_info, rt))
43816b9ea5a6SRoopa Prabhu 			return err;
43826b9ea5a6SRoopa Prabhu 	}
43836b9ea5a6SRoopa Prabhu 
43846b9ea5a6SRoopa Prabhu 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
43856b9ea5a6SRoopa Prabhu 	if (!nh)
43866b9ea5a6SRoopa Prabhu 		return -ENOMEM;
43878d1c802bSDavid Ahern 	nh->fib6_info = rt;
43886b9ea5a6SRoopa Prabhu 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
43896b9ea5a6SRoopa Prabhu 	list_add_tail(&nh->next, rt6_nh_list);
43906b9ea5a6SRoopa Prabhu 
43916b9ea5a6SRoopa Prabhu 	return 0;
43926b9ea5a6SRoopa Prabhu }
43936b9ea5a6SRoopa Prabhu 
43948d1c802bSDavid Ahern static void ip6_route_mpath_notify(struct fib6_info *rt,
43958d1c802bSDavid Ahern 				   struct fib6_info *rt_last,
43963b1137feSDavid Ahern 				   struct nl_info *info,
43973b1137feSDavid Ahern 				   __u16 nlflags)
43983b1137feSDavid Ahern {
43993b1137feSDavid Ahern 	/* if this is an APPEND route, then rt points to the first route
44003b1137feSDavid Ahern 	 * inserted and rt_last points to last route inserted. Userspace
44013b1137feSDavid Ahern 	 * wants a consistent dump of the route which starts at the first
44023b1137feSDavid Ahern 	 * nexthop. Since sibling routes are always added at the end of
44033b1137feSDavid Ahern 	 * the list, find the first sibling of the last route appended
44043b1137feSDavid Ahern 	 */
440593c2fb25SDavid Ahern 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
440693c2fb25SDavid Ahern 		rt = list_first_entry(&rt_last->fib6_siblings,
44078d1c802bSDavid Ahern 				      struct fib6_info,
440893c2fb25SDavid Ahern 				      fib6_siblings);
44093b1137feSDavid Ahern 	}
44103b1137feSDavid Ahern 
44113b1137feSDavid Ahern 	if (rt)
44123b1137feSDavid Ahern 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
44133b1137feSDavid Ahern }
44143b1137feSDavid Ahern 
4415333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg,
4416333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
441751ebd318SNicolas Dichtel {
44188d1c802bSDavid Ahern 	struct fib6_info *rt_notif = NULL, *rt_last = NULL;
44193b1137feSDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
442051ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
442151ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
44228d1c802bSDavid Ahern 	struct fib6_info *rt;
44236b9ea5a6SRoopa Prabhu 	struct rt6_nh *err_nh;
44246b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh, *nh_safe;
44253b1137feSDavid Ahern 	__u16 nlflags;
442651ebd318SNicolas Dichtel 	int remaining;
442751ebd318SNicolas Dichtel 	int attrlen;
44286b9ea5a6SRoopa Prabhu 	int err = 1;
44296b9ea5a6SRoopa Prabhu 	int nhn = 0;
44306b9ea5a6SRoopa Prabhu 	int replace = (cfg->fc_nlinfo.nlh &&
44316b9ea5a6SRoopa Prabhu 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
44326b9ea5a6SRoopa Prabhu 	LIST_HEAD(rt6_nh_list);
443351ebd318SNicolas Dichtel 
44343b1137feSDavid Ahern 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
44353b1137feSDavid Ahern 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
44363b1137feSDavid Ahern 		nlflags |= NLM_F_APPEND;
44373b1137feSDavid Ahern 
443835f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
443951ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
444051ebd318SNicolas Dichtel 
44416b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
44428d1c802bSDavid Ahern 	 * fib6_info structs per nexthop
44436b9ea5a6SRoopa Prabhu 	 */
444451ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
444551ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
444651ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
444751ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
444851ebd318SNicolas Dichtel 
444951ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
445051ebd318SNicolas Dichtel 		if (attrlen > 0) {
445151ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
445251ebd318SNicolas Dichtel 
445351ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
445451ebd318SNicolas Dichtel 			if (nla) {
445567b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
445651ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
445751ebd318SNicolas Dichtel 			}
445819e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
445919e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
446019e42e45SRoopa Prabhu 			if (nla)
446119e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
446251ebd318SNicolas Dichtel 		}
44636b9ea5a6SRoopa Prabhu 
446468e2ffdeSDavid Ahern 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4465acb54e3cSDavid Ahern 		rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
44668c5b83f0SRoopa Prabhu 		if (IS_ERR(rt)) {
44678c5b83f0SRoopa Prabhu 			err = PTR_ERR(rt);
44688c5b83f0SRoopa Prabhu 			rt = NULL;
44696b9ea5a6SRoopa Prabhu 			goto cleanup;
44708c5b83f0SRoopa Prabhu 		}
4471b5d2d75eSDavid Ahern 		if (!rt6_qualify_for_ecmp(rt)) {
4472b5d2d75eSDavid Ahern 			err = -EINVAL;
4473b5d2d75eSDavid Ahern 			NL_SET_ERR_MSG(extack,
4474b5d2d75eSDavid Ahern 				       "Device only routes can not be added for IPv6 using the multipath API.");
4475b5d2d75eSDavid Ahern 			fib6_info_release(rt);
4476b5d2d75eSDavid Ahern 			goto cleanup;
4477b5d2d75eSDavid Ahern 		}
44786b9ea5a6SRoopa Prabhu 
4479ad1601aeSDavid Ahern 		rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
4480398958aeSIdo Schimmel 
4481d4ead6b3SDavid Ahern 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4482d4ead6b3SDavid Ahern 					    rt, &r_cfg);
448351ebd318SNicolas Dichtel 		if (err) {
448493531c67SDavid Ahern 			fib6_info_release(rt);
44856b9ea5a6SRoopa Prabhu 			goto cleanup;
448651ebd318SNicolas Dichtel 		}
44876b9ea5a6SRoopa Prabhu 
44886b9ea5a6SRoopa Prabhu 		rtnh = rtnh_next(rtnh, &remaining);
448951ebd318SNicolas Dichtel 	}
44906b9ea5a6SRoopa Prabhu 
44913b1137feSDavid Ahern 	/* for add and replace send one notification with all nexthops.
44923b1137feSDavid Ahern 	 * Skip the notification in fib6_add_rt2node and send one with
44933b1137feSDavid Ahern 	 * the full route when done
44943b1137feSDavid Ahern 	 */
44953b1137feSDavid Ahern 	info->skip_notify = 1;
44963b1137feSDavid Ahern 
44976b9ea5a6SRoopa Prabhu 	err_nh = NULL;
44986b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
44998d1c802bSDavid Ahern 		err = __ip6_ins_rt(nh->fib6_info, info, extack);
45008d1c802bSDavid Ahern 		fib6_info_release(nh->fib6_info);
45013b1137feSDavid Ahern 
4502f7225172SDavid Ahern 		if (!err) {
4503f7225172SDavid Ahern 			/* save reference to last route successfully inserted */
4504f7225172SDavid Ahern 			rt_last = nh->fib6_info;
4505f7225172SDavid Ahern 
45066b9ea5a6SRoopa Prabhu 			/* save reference to first route for notification */
4507f7225172SDavid Ahern 			if (!rt_notif)
45088d1c802bSDavid Ahern 				rt_notif = nh->fib6_info;
4509f7225172SDavid Ahern 		}
45106b9ea5a6SRoopa Prabhu 
45118d1c802bSDavid Ahern 		/* nh->fib6_info is used or freed at this point, reset to NULL*/
45128d1c802bSDavid Ahern 		nh->fib6_info = NULL;
45136b9ea5a6SRoopa Prabhu 		if (err) {
45146b9ea5a6SRoopa Prabhu 			if (replace && nhn)
4515a5a82d84SJakub Kicinski 				NL_SET_ERR_MSG_MOD(extack,
4516a5a82d84SJakub Kicinski 						   "multipath route replace failed (check consistency of installed routes)");
45176b9ea5a6SRoopa Prabhu 			err_nh = nh;
45186b9ea5a6SRoopa Prabhu 			goto add_errout;
45196b9ea5a6SRoopa Prabhu 		}
45206b9ea5a6SRoopa Prabhu 
45211a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
452227596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
452327596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
452427596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
452527596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
452627596472SMichal Kubeček 		 * be added to it.
45271a72418bSNicolas Dichtel 		 */
452827596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
452927596472SMichal Kubeček 						     NLM_F_REPLACE);
45306b9ea5a6SRoopa Prabhu 		nhn++;
45316b9ea5a6SRoopa Prabhu 	}
45326b9ea5a6SRoopa Prabhu 
45333b1137feSDavid Ahern 	/* success ... tell user about new route */
45343b1137feSDavid Ahern 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
45356b9ea5a6SRoopa Prabhu 	goto cleanup;
45366b9ea5a6SRoopa Prabhu 
45376b9ea5a6SRoopa Prabhu add_errout:
45383b1137feSDavid Ahern 	/* send notification for routes that were added so that
45393b1137feSDavid Ahern 	 * the delete notifications sent by ip6_route_del are
45403b1137feSDavid Ahern 	 * coherent
45413b1137feSDavid Ahern 	 */
45423b1137feSDavid Ahern 	if (rt_notif)
45433b1137feSDavid Ahern 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
45443b1137feSDavid Ahern 
45456b9ea5a6SRoopa Prabhu 	/* Delete routes that were already added */
45466b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
45476b9ea5a6SRoopa Prabhu 		if (err_nh == nh)
45486b9ea5a6SRoopa Prabhu 			break;
4549333c4301SDavid Ahern 		ip6_route_del(&nh->r_cfg, extack);
45506b9ea5a6SRoopa Prabhu 	}
45516b9ea5a6SRoopa Prabhu 
45526b9ea5a6SRoopa Prabhu cleanup:
45536b9ea5a6SRoopa Prabhu 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
45548d1c802bSDavid Ahern 		if (nh->fib6_info)
45558d1c802bSDavid Ahern 			fib6_info_release(nh->fib6_info);
45566b9ea5a6SRoopa Prabhu 		list_del(&nh->next);
45576b9ea5a6SRoopa Prabhu 		kfree(nh);
45586b9ea5a6SRoopa Prabhu 	}
45596b9ea5a6SRoopa Prabhu 
45606b9ea5a6SRoopa Prabhu 	return err;
45616b9ea5a6SRoopa Prabhu }
45626b9ea5a6SRoopa Prabhu 
4563333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg,
4564333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
45656b9ea5a6SRoopa Prabhu {
45666b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
45676b9ea5a6SRoopa Prabhu 	struct rtnexthop *rtnh;
45686b9ea5a6SRoopa Prabhu 	int remaining;
45696b9ea5a6SRoopa Prabhu 	int attrlen;
45706b9ea5a6SRoopa Prabhu 	int err = 1, last_err = 0;
45716b9ea5a6SRoopa Prabhu 
45726b9ea5a6SRoopa Prabhu 	remaining = cfg->fc_mp_len;
45736b9ea5a6SRoopa Prabhu 	rtnh = (struct rtnexthop *)cfg->fc_mp;
45746b9ea5a6SRoopa Prabhu 
45756b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry */
45766b9ea5a6SRoopa Prabhu 	while (rtnh_ok(rtnh, remaining)) {
45776b9ea5a6SRoopa Prabhu 		memcpy(&r_cfg, cfg, sizeof(*cfg));
45786b9ea5a6SRoopa Prabhu 		if (rtnh->rtnh_ifindex)
45796b9ea5a6SRoopa Prabhu 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
45806b9ea5a6SRoopa Prabhu 
45816b9ea5a6SRoopa Prabhu 		attrlen = rtnh_attrlen(rtnh);
45826b9ea5a6SRoopa Prabhu 		if (attrlen > 0) {
45836b9ea5a6SRoopa Prabhu 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
45846b9ea5a6SRoopa Prabhu 
45856b9ea5a6SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
45866b9ea5a6SRoopa Prabhu 			if (nla) {
45876b9ea5a6SRoopa Prabhu 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
45886b9ea5a6SRoopa Prabhu 				r_cfg.fc_flags |= RTF_GATEWAY;
45896b9ea5a6SRoopa Prabhu 			}
45906b9ea5a6SRoopa Prabhu 		}
4591333c4301SDavid Ahern 		err = ip6_route_del(&r_cfg, extack);
45926b9ea5a6SRoopa Prabhu 		if (err)
45936b9ea5a6SRoopa Prabhu 			last_err = err;
45946b9ea5a6SRoopa Prabhu 
459551ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
459651ebd318SNicolas Dichtel 	}
459751ebd318SNicolas Dichtel 
459851ebd318SNicolas Dichtel 	return last_err;
459951ebd318SNicolas Dichtel }
460051ebd318SNicolas Dichtel 
4601c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4602c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
46031da177e4SLinus Torvalds {
460486872cb5SThomas Graf 	struct fib6_config cfg;
460586872cb5SThomas Graf 	int err;
46061da177e4SLinus Torvalds 
4607333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
460886872cb5SThomas Graf 	if (err < 0)
460986872cb5SThomas Graf 		return err;
461086872cb5SThomas Graf 
461151ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4612333c4301SDavid Ahern 		return ip6_route_multipath_del(&cfg, extack);
46130ae81335SDavid Ahern 	else {
46140ae81335SDavid Ahern 		cfg.fc_delete_all_nh = 1;
4615333c4301SDavid Ahern 		return ip6_route_del(&cfg, extack);
46161da177e4SLinus Torvalds 	}
46170ae81335SDavid Ahern }
46181da177e4SLinus Torvalds 
4619c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4620c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
46211da177e4SLinus Torvalds {
462286872cb5SThomas Graf 	struct fib6_config cfg;
462386872cb5SThomas Graf 	int err;
46241da177e4SLinus Torvalds 
4625333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
462686872cb5SThomas Graf 	if (err < 0)
462786872cb5SThomas Graf 		return err;
462886872cb5SThomas Graf 
462967f69513SDavid Ahern 	if (cfg.fc_metric == 0)
463067f69513SDavid Ahern 		cfg.fc_metric = IP6_RT_PRIO_USER;
463167f69513SDavid Ahern 
463251ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4633333c4301SDavid Ahern 		return ip6_route_multipath_add(&cfg, extack);
463451ebd318SNicolas Dichtel 	else
4635acb54e3cSDavid Ahern 		return ip6_route_add(&cfg, GFP_KERNEL, extack);
46361da177e4SLinus Torvalds }
46371da177e4SLinus Torvalds 
46388d1c802bSDavid Ahern static size_t rt6_nlmsg_size(struct fib6_info *rt)
4639339bf98fSThomas Graf {
4640beb1afacSDavid Ahern 	int nexthop_len = 0;
4641beb1afacSDavid Ahern 
464293c2fb25SDavid Ahern 	if (rt->fib6_nsiblings) {
4643beb1afacSDavid Ahern 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4644beb1afacSDavid Ahern 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4645beb1afacSDavid Ahern 			    + nla_total_size(16) /* RTA_GATEWAY */
4646ad1601aeSDavid Ahern 			    + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
4647beb1afacSDavid Ahern 
464893c2fb25SDavid Ahern 		nexthop_len *= rt->fib6_nsiblings;
4649beb1afacSDavid Ahern 	}
4650beb1afacSDavid Ahern 
4651339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4652339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
4653339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
4654339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
4655339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
4656339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
4657339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
4658339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
4659339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
46606a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4661ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4662c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
466319e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
4664ad1601aeSDavid Ahern 	       + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
4665beb1afacSDavid Ahern 	       + nexthop_len;
4666beb1afacSDavid Ahern }
4667beb1afacSDavid Ahern 
4668d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
46698d1c802bSDavid Ahern 			 struct fib6_info *rt, struct dst_entry *dst,
4670d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
467115e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
4672f8cfe2ceSDavid Ahern 			 unsigned int flags)
46731da177e4SLinus Torvalds {
467422d0bd82SXin Long 	struct rt6_info *rt6 = (struct rt6_info *)dst;
467522d0bd82SXin Long 	struct rt6key *rt6_dst, *rt6_src;
467622d0bd82SXin Long 	u32 *pmetrics, table, rt6_flags;
46771da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
467822d0bd82SXin Long 	struct rtmsg *rtm;
4679d4ead6b3SDavid Ahern 	long expires = 0;
46801da177e4SLinus Torvalds 
468115e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
468238308473SDavid S. Miller 	if (!nlh)
468326932566SPatrick McHardy 		return -EMSGSIZE;
46842d7202bfSThomas Graf 
468522d0bd82SXin Long 	if (rt6) {
468622d0bd82SXin Long 		rt6_dst = &rt6->rt6i_dst;
468722d0bd82SXin Long 		rt6_src = &rt6->rt6i_src;
468822d0bd82SXin Long 		rt6_flags = rt6->rt6i_flags;
468922d0bd82SXin Long 	} else {
469022d0bd82SXin Long 		rt6_dst = &rt->fib6_dst;
469122d0bd82SXin Long 		rt6_src = &rt->fib6_src;
469222d0bd82SXin Long 		rt6_flags = rt->fib6_flags;
469322d0bd82SXin Long 	}
469422d0bd82SXin Long 
46952d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
46961da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
469722d0bd82SXin Long 	rtm->rtm_dst_len = rt6_dst->plen;
469822d0bd82SXin Long 	rtm->rtm_src_len = rt6_src->plen;
46991da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
470093c2fb25SDavid Ahern 	if (rt->fib6_table)
470193c2fb25SDavid Ahern 		table = rt->fib6_table->tb6_id;
4702c71099acSThomas Graf 	else
47039e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
470497f0082aSKalash Nainwal 	rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
4705c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
4706c78679e8SDavid S. Miller 		goto nla_put_failure;
4707e8478e80SDavid Ahern 
4708e8478e80SDavid Ahern 	rtm->rtm_type = rt->fib6_type;
47091da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
47101da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
471193c2fb25SDavid Ahern 	rtm->rtm_protocol = rt->fib6_protocol;
47121da177e4SLinus Torvalds 
471322d0bd82SXin Long 	if (rt6_flags & RTF_CACHE)
47141da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
47151da177e4SLinus Torvalds 
4716d4ead6b3SDavid Ahern 	if (dest) {
4717d4ead6b3SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4718c78679e8SDavid S. Miller 			goto nla_put_failure;
47191da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
47201da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
472122d0bd82SXin Long 		if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
4722c78679e8SDavid S. Miller 			goto nla_put_failure;
47231da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
47241da177e4SLinus Torvalds 	if (src) {
4725930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4726c78679e8SDavid S. Miller 			goto nla_put_failure;
47271da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
4728c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
472922d0bd82SXin Long 		   nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
4730c78679e8SDavid S. Miller 		goto nla_put_failure;
47311da177e4SLinus Torvalds #endif
47327bc570c8SYOSHIFUJI Hideaki 	if (iif) {
47337bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
473422d0bd82SXin Long 		if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
4735fd61c6baSDavid Ahern 			int err = ip6mr_get_route(net, skb, rtm, portid);
47362cf75070SNikolay Aleksandrov 
47377bc570c8SYOSHIFUJI Hideaki 			if (err == 0)
47387bc570c8SYOSHIFUJI Hideaki 				return 0;
4739fd61c6baSDavid Ahern 			if (err < 0)
47407bc570c8SYOSHIFUJI Hideaki 				goto nla_put_failure;
47417bc570c8SYOSHIFUJI Hideaki 		} else
47427bc570c8SYOSHIFUJI Hideaki #endif
4743c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
4744c78679e8SDavid S. Miller 				goto nla_put_failure;
4745d4ead6b3SDavid Ahern 	} else if (dest) {
47461da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
4747d4ead6b3SDavid Ahern 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4748930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4749c78679e8SDavid S. Miller 			goto nla_put_failure;
4750c3968a85SDaniel Walter 	}
4751c3968a85SDaniel Walter 
475293c2fb25SDavid Ahern 	if (rt->fib6_prefsrc.plen) {
4753c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
475493c2fb25SDavid Ahern 		saddr_buf = rt->fib6_prefsrc.addr;
4755930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4756c78679e8SDavid S. Miller 			goto nla_put_failure;
47571da177e4SLinus Torvalds 	}
47582d7202bfSThomas Graf 
4759d4ead6b3SDavid Ahern 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4760d4ead6b3SDavid Ahern 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
47612d7202bfSThomas Graf 		goto nla_put_failure;
47622d7202bfSThomas Graf 
476393c2fb25SDavid Ahern 	if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
4764beb1afacSDavid Ahern 		goto nla_put_failure;
4765beb1afacSDavid Ahern 
4766beb1afacSDavid Ahern 	/* For multipath routes, walk the siblings list and add
4767beb1afacSDavid Ahern 	 * each as a nexthop within RTA_MULTIPATH.
4768beb1afacSDavid Ahern 	 */
476922d0bd82SXin Long 	if (rt6) {
477022d0bd82SXin Long 		if (rt6_flags & RTF_GATEWAY &&
477122d0bd82SXin Long 		    nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
477222d0bd82SXin Long 			goto nla_put_failure;
477322d0bd82SXin Long 
477422d0bd82SXin Long 		if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
477522d0bd82SXin Long 			goto nla_put_failure;
477622d0bd82SXin Long 	} else if (rt->fib6_nsiblings) {
47778d1c802bSDavid Ahern 		struct fib6_info *sibling, *next_sibling;
4778beb1afacSDavid Ahern 		struct nlattr *mp;
4779beb1afacSDavid Ahern 
4780*ae0be8deSMichal Kubecek 		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
4781beb1afacSDavid Ahern 		if (!mp)
4782beb1afacSDavid Ahern 			goto nla_put_failure;
4783beb1afacSDavid Ahern 
4784c0a72077SDavid Ahern 		if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
4785c0a72077SDavid Ahern 				    rt->fib6_nh.fib_nh_weight) < 0)
4786beb1afacSDavid Ahern 			goto nla_put_failure;
4787beb1afacSDavid Ahern 
4788beb1afacSDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
478993c2fb25SDavid Ahern 					 &rt->fib6_siblings, fib6_siblings) {
4790c0a72077SDavid Ahern 			if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
4791c0a72077SDavid Ahern 					    sibling->fib6_nh.fib_nh_weight) < 0)
479294f826b8SEric Dumazet 				goto nla_put_failure;
479394f826b8SEric Dumazet 		}
47942d7202bfSThomas Graf 
4795beb1afacSDavid Ahern 		nla_nest_end(skb, mp);
4796beb1afacSDavid Ahern 	} else {
4797ecc5663cSDavid Ahern 		unsigned char nh_flags = 0;
4798ecc5663cSDavid Ahern 
4799c0a72077SDavid Ahern 		if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
4800ecc5663cSDavid Ahern 				     &nh_flags, false) < 0)
4801c78679e8SDavid S. Miller 			goto nla_put_failure;
4802ecc5663cSDavid Ahern 
4803ecc5663cSDavid Ahern 		rtm->rtm_flags |= nh_flags;
4804beb1afacSDavid Ahern 	}
48058253947eSLi Wei 
480622d0bd82SXin Long 	if (rt6_flags & RTF_EXPIRES) {
480714895687SDavid Ahern 		expires = dst ? dst->expires : rt->expires;
480814895687SDavid Ahern 		expires -= jiffies;
480914895687SDavid Ahern 	}
481069cdf8f9SYOSHIFUJI Hideaki 
4811d4ead6b3SDavid Ahern 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4812e3703b3dSThomas Graf 		goto nla_put_failure;
48131da177e4SLinus Torvalds 
481422d0bd82SXin Long 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
4815c78ba6d6SLubomir Rintel 		goto nla_put_failure;
4816c78ba6d6SLubomir Rintel 
481719e42e45SRoopa Prabhu 
4818053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
4819053c095aSJohannes Berg 	return 0;
48202d7202bfSThomas Graf 
48212d7202bfSThomas Graf nla_put_failure:
482226932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
482326932566SPatrick McHardy 	return -EMSGSIZE;
48241da177e4SLinus Torvalds }
48251da177e4SLinus Torvalds 
482613e38901SDavid Ahern static bool fib6_info_uses_dev(const struct fib6_info *f6i,
482713e38901SDavid Ahern 			       const struct net_device *dev)
482813e38901SDavid Ahern {
4829ad1601aeSDavid Ahern 	if (f6i->fib6_nh.fib_nh_dev == dev)
483013e38901SDavid Ahern 		return true;
483113e38901SDavid Ahern 
483213e38901SDavid Ahern 	if (f6i->fib6_nsiblings) {
483313e38901SDavid Ahern 		struct fib6_info *sibling, *next_sibling;
483413e38901SDavid Ahern 
483513e38901SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
483613e38901SDavid Ahern 					 &f6i->fib6_siblings, fib6_siblings) {
4837ad1601aeSDavid Ahern 			if (sibling->fib6_nh.fib_nh_dev == dev)
483813e38901SDavid Ahern 				return true;
483913e38901SDavid Ahern 		}
484013e38901SDavid Ahern 	}
484113e38901SDavid Ahern 
484213e38901SDavid Ahern 	return false;
484313e38901SDavid Ahern }
484413e38901SDavid Ahern 
48458d1c802bSDavid Ahern int rt6_dump_route(struct fib6_info *rt, void *p_arg)
48461da177e4SLinus Torvalds {
48471da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
484813e38901SDavid Ahern 	struct fib_dump_filter *filter = &arg->filter;
484913e38901SDavid Ahern 	unsigned int flags = NLM_F_MULTI;
48501f17e2f2SDavid Ahern 	struct net *net = arg->net;
48511f17e2f2SDavid Ahern 
4852421842edSDavid Ahern 	if (rt == net->ipv6.fib6_null_entry)
48531f17e2f2SDavid Ahern 		return 0;
48541da177e4SLinus Torvalds 
485513e38901SDavid Ahern 	if ((filter->flags & RTM_F_PREFIX) &&
485693c2fb25SDavid Ahern 	    !(rt->fib6_flags & RTF_PREFIX_RT)) {
4857f8cfe2ceSDavid Ahern 		/* success since this is not a prefix route */
4858f8cfe2ceSDavid Ahern 		return 1;
4859f8cfe2ceSDavid Ahern 	}
486013e38901SDavid Ahern 	if (filter->filter_set) {
486113e38901SDavid Ahern 		if ((filter->rt_type && rt->fib6_type != filter->rt_type) ||
486213e38901SDavid Ahern 		    (filter->dev && !fib6_info_uses_dev(rt, filter->dev)) ||
486313e38901SDavid Ahern 		    (filter->protocol && rt->fib6_protocol != filter->protocol)) {
486413e38901SDavid Ahern 			return 1;
486513e38901SDavid Ahern 		}
486613e38901SDavid Ahern 		flags |= NLM_F_DUMP_FILTERED;
4867f8cfe2ceSDavid Ahern 	}
48681da177e4SLinus Torvalds 
4869d4ead6b3SDavid Ahern 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4870d4ead6b3SDavid Ahern 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
487113e38901SDavid Ahern 			     arg->cb->nlh->nlmsg_seq, flags);
48721da177e4SLinus Torvalds }
48731da177e4SLinus Torvalds 
48740eff0a27SJakub Kicinski static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
48750eff0a27SJakub Kicinski 					const struct nlmsghdr *nlh,
48760eff0a27SJakub Kicinski 					struct nlattr **tb,
48770eff0a27SJakub Kicinski 					struct netlink_ext_ack *extack)
48780eff0a27SJakub Kicinski {
48790eff0a27SJakub Kicinski 	struct rtmsg *rtm;
48800eff0a27SJakub Kicinski 	int i, err;
48810eff0a27SJakub Kicinski 
48820eff0a27SJakub Kicinski 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
48830eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
48840eff0a27SJakub Kicinski 				   "Invalid header for get route request");
48850eff0a27SJakub Kicinski 		return -EINVAL;
48860eff0a27SJakub Kicinski 	}
48870eff0a27SJakub Kicinski 
48880eff0a27SJakub Kicinski 	if (!netlink_strict_get_check(skb))
48890eff0a27SJakub Kicinski 		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
48900eff0a27SJakub Kicinski 				   rtm_ipv6_policy, extack);
48910eff0a27SJakub Kicinski 
48920eff0a27SJakub Kicinski 	rtm = nlmsg_data(nlh);
48930eff0a27SJakub Kicinski 	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
48940eff0a27SJakub Kicinski 	    (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
48950eff0a27SJakub Kicinski 	    rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
48960eff0a27SJakub Kicinski 	    rtm->rtm_type) {
48970eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
48980eff0a27SJakub Kicinski 		return -EINVAL;
48990eff0a27SJakub Kicinski 	}
49000eff0a27SJakub Kicinski 	if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
49010eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack,
49020eff0a27SJakub Kicinski 				   "Invalid flags for get route request");
49030eff0a27SJakub Kicinski 		return -EINVAL;
49040eff0a27SJakub Kicinski 	}
49050eff0a27SJakub Kicinski 
49060eff0a27SJakub Kicinski 	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
49070eff0a27SJakub Kicinski 				 rtm_ipv6_policy, extack);
49080eff0a27SJakub Kicinski 	if (err)
49090eff0a27SJakub Kicinski 		return err;
49100eff0a27SJakub Kicinski 
49110eff0a27SJakub Kicinski 	if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
49120eff0a27SJakub Kicinski 	    (tb[RTA_DST] && !rtm->rtm_dst_len)) {
49130eff0a27SJakub Kicinski 		NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
49140eff0a27SJakub Kicinski 		return -EINVAL;
49150eff0a27SJakub Kicinski 	}
49160eff0a27SJakub Kicinski 
49170eff0a27SJakub Kicinski 	for (i = 0; i <= RTA_MAX; i++) {
49180eff0a27SJakub Kicinski 		if (!tb[i])
49190eff0a27SJakub Kicinski 			continue;
49200eff0a27SJakub Kicinski 
49210eff0a27SJakub Kicinski 		switch (i) {
49220eff0a27SJakub Kicinski 		case RTA_SRC:
49230eff0a27SJakub Kicinski 		case RTA_DST:
49240eff0a27SJakub Kicinski 		case RTA_IIF:
49250eff0a27SJakub Kicinski 		case RTA_OIF:
49260eff0a27SJakub Kicinski 		case RTA_MARK:
49270eff0a27SJakub Kicinski 		case RTA_UID:
49280eff0a27SJakub Kicinski 		case RTA_SPORT:
49290eff0a27SJakub Kicinski 		case RTA_DPORT:
49300eff0a27SJakub Kicinski 		case RTA_IP_PROTO:
49310eff0a27SJakub Kicinski 			break;
49320eff0a27SJakub Kicinski 		default:
49330eff0a27SJakub Kicinski 			NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
49340eff0a27SJakub Kicinski 			return -EINVAL;
49350eff0a27SJakub Kicinski 		}
49360eff0a27SJakub Kicinski 	}
49370eff0a27SJakub Kicinski 
49380eff0a27SJakub Kicinski 	return 0;
49390eff0a27SJakub Kicinski }
49400eff0a27SJakub Kicinski 
4941c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4942c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
49431da177e4SLinus Torvalds {
49443b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
4945ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
494618c3a61cSRoopa Prabhu 	int err, iif = 0, oif = 0;
4947a68886a6SDavid Ahern 	struct fib6_info *from;
494818c3a61cSRoopa Prabhu 	struct dst_entry *dst;
49491da177e4SLinus Torvalds 	struct rt6_info *rt;
4950ab364a6fSThomas Graf 	struct sk_buff *skb;
4951ab364a6fSThomas Graf 	struct rtmsg *rtm;
4952744486d4SMaciej Żenczykowski 	struct flowi6 fl6 = {};
495318c3a61cSRoopa Prabhu 	bool fibmatch;
4954ab364a6fSThomas Graf 
49550eff0a27SJakub Kicinski 	err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
4956ab364a6fSThomas Graf 	if (err < 0)
4957ab364a6fSThomas Graf 		goto errout;
4958ab364a6fSThomas Graf 
4959ab364a6fSThomas Graf 	err = -EINVAL;
496038b7097bSHannes Frederic Sowa 	rtm = nlmsg_data(nlh);
496138b7097bSHannes Frederic Sowa 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
496218c3a61cSRoopa Prabhu 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4963ab364a6fSThomas Graf 
4964ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
4965ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4966ab364a6fSThomas Graf 			goto errout;
4967ab364a6fSThomas Graf 
49684e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4969ab364a6fSThomas Graf 	}
4970ab364a6fSThomas Graf 
4971ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
4972ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4973ab364a6fSThomas Graf 			goto errout;
4974ab364a6fSThomas Graf 
49754e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4976ab364a6fSThomas Graf 	}
4977ab364a6fSThomas Graf 
4978ab364a6fSThomas Graf 	if (tb[RTA_IIF])
4979ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
4980ab364a6fSThomas Graf 
4981ab364a6fSThomas Graf 	if (tb[RTA_OIF])
498272331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
4983ab364a6fSThomas Graf 
49842e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
49852e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
49862e47b291SLorenzo Colitti 
4987622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
4988622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4989622ec2c9SLorenzo Colitti 					   nla_get_u32(tb[RTA_UID]));
4990622ec2c9SLorenzo Colitti 	else
4991622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4992622ec2c9SLorenzo Colitti 
4993eacb9384SRoopa Prabhu 	if (tb[RTA_SPORT])
4994eacb9384SRoopa Prabhu 		fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4995eacb9384SRoopa Prabhu 
4996eacb9384SRoopa Prabhu 	if (tb[RTA_DPORT])
4997eacb9384SRoopa Prabhu 		fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4998eacb9384SRoopa Prabhu 
4999eacb9384SRoopa Prabhu 	if (tb[RTA_IP_PROTO]) {
5000eacb9384SRoopa Prabhu 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
50015e1a99eaSHangbin Liu 						  &fl6.flowi6_proto, AF_INET6,
50025e1a99eaSHangbin Liu 						  extack);
5003eacb9384SRoopa Prabhu 		if (err)
5004eacb9384SRoopa Prabhu 			goto errout;
5005eacb9384SRoopa Prabhu 	}
5006eacb9384SRoopa Prabhu 
5007ab364a6fSThomas Graf 	if (iif) {
5008ab364a6fSThomas Graf 		struct net_device *dev;
500972331bc0SShmulik Ladkani 		int flags = 0;
501072331bc0SShmulik Ladkani 
5011121622dbSFlorian Westphal 		rcu_read_lock();
5012121622dbSFlorian Westphal 
5013121622dbSFlorian Westphal 		dev = dev_get_by_index_rcu(net, iif);
5014ab364a6fSThomas Graf 		if (!dev) {
5015121622dbSFlorian Westphal 			rcu_read_unlock();
5016ab364a6fSThomas Graf 			err = -ENODEV;
5017ab364a6fSThomas Graf 			goto errout;
5018ab364a6fSThomas Graf 		}
501972331bc0SShmulik Ladkani 
502072331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
502172331bc0SShmulik Ladkani 
502272331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
502372331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
502472331bc0SShmulik Ladkani 
5025b75cc8f9SDavid Ahern 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
5026121622dbSFlorian Westphal 
5027121622dbSFlorian Westphal 		rcu_read_unlock();
502872331bc0SShmulik Ladkani 	} else {
502972331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
503072331bc0SShmulik Ladkani 
503118c3a61cSRoopa Prabhu 		dst = ip6_route_output(net, NULL, &fl6);
503218c3a61cSRoopa Prabhu 	}
503318c3a61cSRoopa Prabhu 
503418c3a61cSRoopa Prabhu 
503518c3a61cSRoopa Prabhu 	rt = container_of(dst, struct rt6_info, dst);
503618c3a61cSRoopa Prabhu 	if (rt->dst.error) {
503718c3a61cSRoopa Prabhu 		err = rt->dst.error;
503818c3a61cSRoopa Prabhu 		ip6_rt_put(rt);
503918c3a61cSRoopa Prabhu 		goto errout;
5040ab364a6fSThomas Graf 	}
50411da177e4SLinus Torvalds 
50429d6acb3bSWANG Cong 	if (rt == net->ipv6.ip6_null_entry) {
50439d6acb3bSWANG Cong 		err = rt->dst.error;
50449d6acb3bSWANG Cong 		ip6_rt_put(rt);
50459d6acb3bSWANG Cong 		goto errout;
50469d6acb3bSWANG Cong 	}
50479d6acb3bSWANG Cong 
50481da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
504938308473SDavid S. Miller 	if (!skb) {
505094e187c0SAmerigo Wang 		ip6_rt_put(rt);
5051ab364a6fSThomas Graf 		err = -ENOBUFS;
5052ab364a6fSThomas Graf 		goto errout;
5053ab364a6fSThomas Graf 	}
50541da177e4SLinus Torvalds 
5055d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
5056a68886a6SDavid Ahern 
5057a68886a6SDavid Ahern 	rcu_read_lock();
5058a68886a6SDavid Ahern 	from = rcu_dereference(rt->from);
5059a68886a6SDavid Ahern 
506018c3a61cSRoopa Prabhu 	if (fibmatch)
5061a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
506218c3a61cSRoopa Prabhu 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
506318c3a61cSRoopa Prabhu 				    nlh->nlmsg_seq, 0);
506418c3a61cSRoopa Prabhu 	else
5065a68886a6SDavid Ahern 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
5066a68886a6SDavid Ahern 				    &fl6.saddr, iif, RTM_NEWROUTE,
5067d4ead6b3SDavid Ahern 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
5068d4ead6b3SDavid Ahern 				    0);
5069a68886a6SDavid Ahern 	rcu_read_unlock();
5070a68886a6SDavid Ahern 
50711da177e4SLinus Torvalds 	if (err < 0) {
5072ab364a6fSThomas Graf 		kfree_skb(skb);
5073ab364a6fSThomas Graf 		goto errout;
50741da177e4SLinus Torvalds 	}
50751da177e4SLinus Torvalds 
507615e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
5077ab364a6fSThomas Graf errout:
50781da177e4SLinus Torvalds 	return err;
50791da177e4SLinus Torvalds }
50801da177e4SLinus Torvalds 
50818d1c802bSDavid Ahern void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
508237a1d361SRoopa Prabhu 		     unsigned int nlm_flags)
50831da177e4SLinus Torvalds {
50841da177e4SLinus Torvalds 	struct sk_buff *skb;
50855578689aSDaniel Lezcano 	struct net *net = info->nl_net;
5086528c4cebSDenis V. Lunev 	u32 seq;
5087528c4cebSDenis V. Lunev 	int err;
50880d51aa80SJamal Hadi Salim 
5089528c4cebSDenis V. Lunev 	err = -ENOBUFS;
509038308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
509186872cb5SThomas Graf 
509219e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
509338308473SDavid S. Miller 	if (!skb)
509421713ebcSThomas Graf 		goto errout;
50951da177e4SLinus Torvalds 
5096d4ead6b3SDavid Ahern 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5097f8cfe2ceSDavid Ahern 			    event, info->portid, seq, nlm_flags);
509826932566SPatrick McHardy 	if (err < 0) {
509926932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
510026932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
510126932566SPatrick McHardy 		kfree_skb(skb);
510226932566SPatrick McHardy 		goto errout;
510326932566SPatrick McHardy 	}
510415e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
51055578689aSDaniel Lezcano 		    info->nlh, gfp_any());
51061ce85fe4SPablo Neira Ayuso 	return;
510721713ebcSThomas Graf errout:
510821713ebcSThomas Graf 	if (err < 0)
51095578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
51101da177e4SLinus Torvalds }
51111da177e4SLinus Torvalds 
51128ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
5113351638e7SJiri Pirko 				unsigned long event, void *ptr)
51148ed67789SDaniel Lezcano {
5115351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
5116c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
51178ed67789SDaniel Lezcano 
5118242d3a49SWANG Cong 	if (!(dev->flags & IFF_LOOPBACK))
5119242d3a49SWANG Cong 		return NOTIFY_OK;
5120242d3a49SWANG Cong 
5121242d3a49SWANG Cong 	if (event == NETDEV_REGISTER) {
5122ad1601aeSDavid Ahern 		net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
5123d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
51248ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
51258ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5126d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
51278ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
5128d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
51298ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
51308ed67789SDaniel Lezcano #endif
513176da0704SWANG Cong 	 } else if (event == NETDEV_UNREGISTER &&
513276da0704SWANG Cong 		    dev->reg_state != NETREG_UNREGISTERED) {
513376da0704SWANG Cong 		/* NETDEV_UNREGISTER could be fired for multiple times by
513476da0704SWANG Cong 		 * netdev_wait_allrefs(). Make sure we only call this once.
513576da0704SWANG Cong 		 */
513612d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
5137242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES
513812d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
513912d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
5140242d3a49SWANG Cong #endif
51418ed67789SDaniel Lezcano 	}
51428ed67789SDaniel Lezcano 
51438ed67789SDaniel Lezcano 	return NOTIFY_OK;
51448ed67789SDaniel Lezcano }
51458ed67789SDaniel Lezcano 
51461da177e4SLinus Torvalds /*
51471da177e4SLinus Torvalds  *	/proc
51481da177e4SLinus Torvalds  */
51491da177e4SLinus Torvalds 
51501da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
51511da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
51521da177e4SLinus Torvalds {
515369ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
51541da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
515569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
515669ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
515781eb8447SWei Wang 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
515869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
515969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
5160fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
516169ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
51621da177e4SLinus Torvalds 
51631da177e4SLinus Torvalds 	return 0;
51641da177e4SLinus Torvalds }
51651da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
51661da177e4SLinus Torvalds 
51671da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
51681da177e4SLinus Torvalds 
51691da177e4SLinus Torvalds static
5170fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
51711da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
51721da177e4SLinus Torvalds {
5173c486da34SLucian Adrian Grijincu 	struct net *net;
5174c486da34SLucian Adrian Grijincu 	int delay;
5175f0fb9b28SAditya Pakki 	int ret;
5176c486da34SLucian Adrian Grijincu 	if (!write)
5177c486da34SLucian Adrian Grijincu 		return -EINVAL;
5178c486da34SLucian Adrian Grijincu 
5179c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
5180c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
5181f0fb9b28SAditya Pakki 	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
5182f0fb9b28SAditya Pakki 	if (ret)
5183f0fb9b28SAditya Pakki 		return ret;
5184f0fb9b28SAditya Pakki 
51852ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
51861da177e4SLinus Torvalds 	return 0;
51871da177e4SLinus Torvalds }
51881da177e4SLinus Torvalds 
51897c6bb7d2SDavid Ahern static int zero;
51907c6bb7d2SDavid Ahern static int one = 1;
51917c6bb7d2SDavid Ahern 
5192ed792e28SDavid Ahern static struct ctl_table ipv6_route_table_template[] = {
51931da177e4SLinus Torvalds 	{
51941da177e4SLinus Torvalds 		.procname	=	"flush",
51954990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
51961da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
519789c8b3a1SDave Jones 		.mode		=	0200,
51986d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
51991da177e4SLinus Torvalds 	},
52001da177e4SLinus Torvalds 	{
52011da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
52029a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
52031da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52041da177e4SLinus Torvalds 		.mode		=	0644,
52056d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
52061da177e4SLinus Torvalds 	},
52071da177e4SLinus Torvalds 	{
52081da177e4SLinus Torvalds 		.procname	=	"max_size",
52094990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
52101da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52111da177e4SLinus Torvalds 		.mode		=	0644,
52126d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
52131da177e4SLinus Torvalds 	},
52141da177e4SLinus Torvalds 	{
52151da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
52164990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
52171da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52181da177e4SLinus Torvalds 		.mode		=	0644,
52196d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52201da177e4SLinus Torvalds 	},
52211da177e4SLinus Torvalds 	{
52221da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
52234990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
52241da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52251da177e4SLinus Torvalds 		.mode		=	0644,
52266d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52271da177e4SLinus Torvalds 	},
52281da177e4SLinus Torvalds 	{
52291da177e4SLinus Torvalds 		.procname	=	"gc_interval",
52304990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
52311da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52321da177e4SLinus Torvalds 		.mode		=	0644,
52336d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52341da177e4SLinus Torvalds 	},
52351da177e4SLinus Torvalds 	{
52361da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
52374990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
52381da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52391da177e4SLinus Torvalds 		.mode		=	0644,
5240f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
52411da177e4SLinus Torvalds 	},
52421da177e4SLinus Torvalds 	{
52431da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
52444990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
52451da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52461da177e4SLinus Torvalds 		.mode		=	0644,
52476d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
52481da177e4SLinus Torvalds 	},
52491da177e4SLinus Torvalds 	{
52501da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
52514990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
52521da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52531da177e4SLinus Torvalds 		.mode		=	0644,
5254f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
52551da177e4SLinus Torvalds 	},
52561da177e4SLinus Torvalds 	{
52571da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
52584990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
52591da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
52601da177e4SLinus Torvalds 		.mode		=	0644,
52616d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
52621da177e4SLinus Torvalds 	},
52637c6bb7d2SDavid Ahern 	{
52647c6bb7d2SDavid Ahern 		.procname	=	"skip_notify_on_dev_down",
52657c6bb7d2SDavid Ahern 		.data		=	&init_net.ipv6.sysctl.skip_notify_on_dev_down,
52667c6bb7d2SDavid Ahern 		.maxlen		=	sizeof(int),
52677c6bb7d2SDavid Ahern 		.mode		=	0644,
52687c6bb7d2SDavid Ahern 		.proc_handler	=	proc_dointvec,
52697c6bb7d2SDavid Ahern 		.extra1		=	&zero,
52707c6bb7d2SDavid Ahern 		.extra2		=	&one,
52717c6bb7d2SDavid Ahern 	},
5272f8572d8fSEric W. Biederman 	{ }
52731da177e4SLinus Torvalds };
52741da177e4SLinus Torvalds 
52752c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
5276760f2d01SDaniel Lezcano {
5277760f2d01SDaniel Lezcano 	struct ctl_table *table;
5278760f2d01SDaniel Lezcano 
5279760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
5280760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
5281760f2d01SDaniel Lezcano 			GFP_KERNEL);
52825ee09105SYOSHIFUJI Hideaki 
52835ee09105SYOSHIFUJI Hideaki 	if (table) {
52845ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
5285c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
528686393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
52875ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
52885ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52895ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
52905ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
52915ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
52925ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
52935ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
52949c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
52957c6bb7d2SDavid Ahern 		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
5296464dc801SEric W. Biederman 
5297464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
5298464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
5299464dc801SEric W. Biederman 			table[0].procname = NULL;
53005ee09105SYOSHIFUJI Hideaki 	}
53015ee09105SYOSHIFUJI Hideaki 
5302760f2d01SDaniel Lezcano 	return table;
5303760f2d01SDaniel Lezcano }
53041da177e4SLinus Torvalds #endif
53051da177e4SLinus Torvalds 
53062c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
5307cdb18761SDaniel Lezcano {
5308633d424bSPavel Emelyanov 	int ret = -ENOMEM;
53098ed67789SDaniel Lezcano 
531086393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
531186393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
5312f2fc6a54SBenjamin Thery 
5313fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5314fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
5315fc66f95cSEric Dumazet 
5316421842edSDavid Ahern 	net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5317421842edSDavid Ahern 					    sizeof(*net->ipv6.fib6_null_entry),
5318421842edSDavid Ahern 					    GFP_KERNEL);
5319421842edSDavid Ahern 	if (!net->ipv6.fib6_null_entry)
5320421842edSDavid Ahern 		goto out_ip6_dst_entries;
5321421842edSDavid Ahern 
53228ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
53238ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
53248ed67789SDaniel Lezcano 					   GFP_KERNEL);
53258ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
5326421842edSDavid Ahern 		goto out_fib6_null_entry;
5327d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
532862fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
532962fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
53308ed67789SDaniel Lezcano 
53318ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5332feca7d8cSVincent Bernat 	net->ipv6.fib6_has_custom_rules = false;
53338ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
53348ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
53358ed67789SDaniel Lezcano 					       GFP_KERNEL);
533668fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
533768fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
5338d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
533962fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
534062fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
53418ed67789SDaniel Lezcano 
53428ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
53438ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
53448ed67789SDaniel Lezcano 					       GFP_KERNEL);
534568fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
534668fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
5347d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
534862fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
534962fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
53508ed67789SDaniel Lezcano #endif
53518ed67789SDaniel Lezcano 
5352b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
5353b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5354b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5355b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5356b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5357b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5358b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5359b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
53607c6bb7d2SDavid Ahern 	net->ipv6.sysctl.skip_notify_on_dev_down = 0;
5361b339a47cSPeter Zijlstra 
53626891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
53636891a346SBenjamin Thery 
53648ed67789SDaniel Lezcano 	ret = 0;
53658ed67789SDaniel Lezcano out:
53668ed67789SDaniel Lezcano 	return ret;
5367f2fc6a54SBenjamin Thery 
536868fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
536968fffc67SPeter Zijlstra out_ip6_prohibit_entry:
537068fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
537168fffc67SPeter Zijlstra out_ip6_null_entry:
537268fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
537368fffc67SPeter Zijlstra #endif
5374421842edSDavid Ahern out_fib6_null_entry:
5375421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
5376fc66f95cSEric Dumazet out_ip6_dst_entries:
5377fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5378f2fc6a54SBenjamin Thery out_ip6_dst_ops:
5379f2fc6a54SBenjamin Thery 	goto out;
5380cdb18761SDaniel Lezcano }
5381cdb18761SDaniel Lezcano 
53822c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
5383cdb18761SDaniel Lezcano {
5384421842edSDavid Ahern 	kfree(net->ipv6.fib6_null_entry);
53858ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
53868ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
53878ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
53888ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
53898ed67789SDaniel Lezcano #endif
539041bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5391cdb18761SDaniel Lezcano }
5392cdb18761SDaniel Lezcano 
5393d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
5394d189634eSThomas Graf {
5395d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5396c3506372SChristoph Hellwig 	proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5397c3506372SChristoph Hellwig 			sizeof(struct ipv6_route_iter));
53983617d949SChristoph Hellwig 	proc_create_net_single("rt6_stats", 0444, net->proc_net,
53993617d949SChristoph Hellwig 			rt6_stats_seq_show, NULL);
5400d189634eSThomas Graf #endif
5401d189634eSThomas Graf 	return 0;
5402d189634eSThomas Graf }
5403d189634eSThomas Graf 
5404d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
5405d189634eSThomas Graf {
5406d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5407ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
5408ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
5409d189634eSThomas Graf #endif
5410d189634eSThomas Graf }
5411d189634eSThomas Graf 
5412cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
5413cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
5414cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
5415cdb18761SDaniel Lezcano };
5416cdb18761SDaniel Lezcano 
5417c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
5418c3426b47SDavid S. Miller {
5419c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5420c3426b47SDavid S. Miller 
5421c3426b47SDavid S. Miller 	if (!bp)
5422c3426b47SDavid S. Miller 		return -ENOMEM;
5423c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
5424c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
5425c3426b47SDavid S. Miller 	return 0;
5426c3426b47SDavid S. Miller }
5427c3426b47SDavid S. Miller 
5428c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
5429c3426b47SDavid S. Miller {
5430c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
5431c3426b47SDavid S. Miller 
5432c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
543356a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
5434c3426b47SDavid S. Miller 	kfree(bp);
5435c3426b47SDavid S. Miller }
5436c3426b47SDavid S. Miller 
54372b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
5438c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
5439c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
5440c3426b47SDavid S. Miller };
5441c3426b47SDavid S. Miller 
5442d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
5443d189634eSThomas Graf 	.init = ip6_route_net_init_late,
5444d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
5445d189634eSThomas Graf };
5446d189634eSThomas Graf 
54478ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
54488ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
5449242d3a49SWANG Cong 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
54508ed67789SDaniel Lezcano };
54518ed67789SDaniel Lezcano 
54522f460933SWANG Cong void __init ip6_route_init_special_entries(void)
54532f460933SWANG Cong {
54542f460933SWANG Cong 	/* Registering of the loopback is done before this portion of code,
54552f460933SWANG Cong 	 * the loopback reference in rt6_info will not be taken, do it
54562f460933SWANG Cong 	 * manually for init_net */
5457ad1601aeSDavid Ahern 	init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
54582f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
54592f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54602f460933SWANG Cong   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
54612f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
54622f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54632f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
54642f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
54652f460933SWANG Cong   #endif
54662f460933SWANG Cong }
54672f460933SWANG Cong 
5468433d49c3SDaniel Lezcano int __init ip6_route_init(void)
54691da177e4SLinus Torvalds {
5470433d49c3SDaniel Lezcano 	int ret;
54718d0b94afSMartin KaFai Lau 	int cpu;
5472433d49c3SDaniel Lezcano 
54739a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
54749a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
54759a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
54769a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
54779a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
5478c19a28e1SFernando Carrijo 		goto out;
547914e50e57SDavid S. Miller 
5480fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
54818ed67789SDaniel Lezcano 	if (ret)
5482bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
5483bdb3289fSDaniel Lezcano 
5484c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5485c3426b47SDavid S. Miller 	if (ret)
5486e8803b6cSDavid S. Miller 		goto out_dst_entries;
54872a0c451aSThomas Graf 
54887e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
54897e52b33bSDavid S. Miller 	if (ret)
54907e52b33bSDavid S. Miller 		goto out_register_inetpeer;
5491c3426b47SDavid S. Miller 
54925dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
54935dc121e9SArnaud Ebalard 
5494e8803b6cSDavid S. Miller 	ret = fib6_init();
5495433d49c3SDaniel Lezcano 	if (ret)
54968ed67789SDaniel Lezcano 		goto out_register_subsys;
5497433d49c3SDaniel Lezcano 
5498433d49c3SDaniel Lezcano 	ret = xfrm6_init();
5499433d49c3SDaniel Lezcano 	if (ret)
5500e8803b6cSDavid S. Miller 		goto out_fib6_init;
5501c35b7e72SDaniel Lezcano 
5502433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
5503433d49c3SDaniel Lezcano 	if (ret)
5504433d49c3SDaniel Lezcano 		goto xfrm6_init;
55057e5449c2SDaniel Lezcano 
5506d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5507d189634eSThomas Graf 	if (ret)
5508d189634eSThomas Graf 		goto fib6_rules_init;
5509d189634eSThomas Graf 
551016feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
551116feebcfSFlorian Westphal 				   inet6_rtm_newroute, NULL, 0);
551216feebcfSFlorian Westphal 	if (ret < 0)
551316feebcfSFlorian Westphal 		goto out_register_late_subsys;
551416feebcfSFlorian Westphal 
551516feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
551616feebcfSFlorian Westphal 				   inet6_rtm_delroute, NULL, 0);
551716feebcfSFlorian Westphal 	if (ret < 0)
551816feebcfSFlorian Westphal 		goto out_register_late_subsys;
551916feebcfSFlorian Westphal 
552016feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
552116feebcfSFlorian Westphal 				   inet6_rtm_getroute, NULL,
552216feebcfSFlorian Westphal 				   RTNL_FLAG_DOIT_UNLOCKED);
552316feebcfSFlorian Westphal 	if (ret < 0)
5524d189634eSThomas Graf 		goto out_register_late_subsys;
5525433d49c3SDaniel Lezcano 
55268ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5527cdb18761SDaniel Lezcano 	if (ret)
5528d189634eSThomas Graf 		goto out_register_late_subsys;
55298ed67789SDaniel Lezcano 
55308d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
55318d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
55328d0b94afSMartin KaFai Lau 
55338d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
55348d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
55358d0b94afSMartin KaFai Lau 	}
55368d0b94afSMartin KaFai Lau 
5537433d49c3SDaniel Lezcano out:
5538433d49c3SDaniel Lezcano 	return ret;
5539433d49c3SDaniel Lezcano 
5540d189634eSThomas Graf out_register_late_subsys:
554116feebcfSFlorian Westphal 	rtnl_unregister_all(PF_INET6);
5542d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5543433d49c3SDaniel Lezcano fib6_rules_init:
5544433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
5545433d49c3SDaniel Lezcano xfrm6_init:
5546433d49c3SDaniel Lezcano 	xfrm6_fini();
55472a0c451aSThomas Graf out_fib6_init:
55482a0c451aSThomas Graf 	fib6_gc_cleanup();
55498ed67789SDaniel Lezcano out_register_subsys:
55508ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
55517e52b33bSDavid S. Miller out_register_inetpeer:
55527e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5553fc66f95cSEric Dumazet out_dst_entries:
5554fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5555433d49c3SDaniel Lezcano out_kmem_cache:
5556f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5557433d49c3SDaniel Lezcano 	goto out;
55581da177e4SLinus Torvalds }
55591da177e4SLinus Torvalds 
55601da177e4SLinus Torvalds void ip6_route_cleanup(void)
55611da177e4SLinus Torvalds {
55628ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5563d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5564101367c2SThomas Graf 	fib6_rules_cleanup();
55651da177e4SLinus Torvalds 	xfrm6_fini();
55661da177e4SLinus Torvalds 	fib6_gc_cleanup();
5567c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
55688ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
556941bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5570f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
55711da177e4SLinus Torvalds }
5572