xref: /openbmc/linux/net/ipv6/route.c (revision d4ead6b34b67fd711639324b6465a050bcb197d4)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
4735732d01SWei Wang #include <linux/jhash.h>
48457c4cbcSEric W. Biederman #include <net/net_namespace.h>
491da177e4SLinus Torvalds #include <net/snmp.h>
501da177e4SLinus Torvalds #include <net/ipv6.h>
511da177e4SLinus Torvalds #include <net/ip6_fib.h>
521da177e4SLinus Torvalds #include <net/ip6_route.h>
531da177e4SLinus Torvalds #include <net/ndisc.h>
541da177e4SLinus Torvalds #include <net/addrconf.h>
551da177e4SLinus Torvalds #include <net/tcp.h>
561da177e4SLinus Torvalds #include <linux/rtnetlink.h>
571da177e4SLinus Torvalds #include <net/dst.h>
58904af04dSJiri Benc #include <net/dst_metadata.h>
591da177e4SLinus Torvalds #include <net/xfrm.h>
608d71740cSTom Tucker #include <net/netevent.h>
6121713ebcSThomas Graf #include <net/netlink.h>
6251ebd318SNicolas Dichtel #include <net/nexthop.h>
6319e42e45SRoopa Prabhu #include <net/lwtunnel.h>
64904af04dSJiri Benc #include <net/ip_tunnels.h>
65ca254490SDavid Ahern #include <net/l3mdev.h>
66b811580dSDavid Ahern #include <trace/events/fib6.h>
671da177e4SLinus Torvalds 
687c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
711da177e4SLinus Torvalds #include <linux/sysctl.h>
721da177e4SLinus Torvalds #endif
731da177e4SLinus Torvalds 
74afc154e9SHannes Frederic Sowa enum rt6_nud_state {
757e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
767e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
777e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
78afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
79afc154e9SHannes Frederic Sowa };
80afc154e9SHannes Frederic Sowa 
811da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
820dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
83ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
841da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
851da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
861da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
871da177e4SLinus Torvalds 				       struct net_device *dev, int how);
88569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
891da177e4SLinus Torvalds 
901da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
91ede2059dSEric W. Biederman static int		ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
927150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
93ede2059dSEric W. Biederman static int		ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
941da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
956700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
966700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
976700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
986700c270SDavid S. Miller 					struct sk_buff *skb);
9952bd4c0cSNicolas Dichtel static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
10016a16cd3SDavid Ahern static size_t rt6_nlmsg_size(struct rt6_info *rt);
101*d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
102*d4ead6b3SDavid Ahern 			 struct rt6_info *rt, struct dst_entry *dst,
103*d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
10416a16cd3SDavid Ahern 			 int iif, int type, u32 portid, u32 seq,
10516a16cd3SDavid Ahern 			 unsigned int flags);
10635732d01SWei Wang static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
10735732d01SWei Wang 					   struct in6_addr *daddr,
10835732d01SWei Wang 					   struct in6_addr *saddr);
1091da177e4SLinus Torvalds 
11070ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
111efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
112b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
113830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
114830218c1SDavid Ahern 					   struct net_device *dev,
11595c96174SEric Dumazet 					   unsigned int pref);
116efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
117b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
118830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
119830218c1SDavid Ahern 					   struct net_device *dev);
12070ceb4f5SYOSHIFUJI Hideaki #endif
12170ceb4f5SYOSHIFUJI Hideaki 
1228d0b94afSMartin KaFai Lau struct uncached_list {
1238d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1248d0b94afSMartin KaFai Lau 	struct list_head	head;
1258d0b94afSMartin KaFai Lau };
1268d0b94afSMartin KaFai Lau 
1278d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1288d0b94afSMartin KaFai Lau 
129510c321bSXin Long void rt6_uncached_list_add(struct rt6_info *rt)
1308d0b94afSMartin KaFai Lau {
1318d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1328d0b94afSMartin KaFai Lau 
1338d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1348d0b94afSMartin KaFai Lau 
1358d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1368d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1378d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1388d0b94afSMartin KaFai Lau }
1398d0b94afSMartin KaFai Lau 
140510c321bSXin Long void rt6_uncached_list_del(struct rt6_info *rt)
1418d0b94afSMartin KaFai Lau {
1428d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1438d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
14481eb8447SWei Wang 		struct net *net = dev_net(rt->dst.dev);
1458d0b94afSMartin KaFai Lau 
1468d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1478d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
14881eb8447SWei Wang 		atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
1498d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1508d0b94afSMartin KaFai Lau 	}
1518d0b94afSMartin KaFai Lau }
1528d0b94afSMartin KaFai Lau 
1538d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1548d0b94afSMartin KaFai Lau {
1558d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1568d0b94afSMartin KaFai Lau 	int cpu;
1578d0b94afSMartin KaFai Lau 
158e332bc67SEric W. Biederman 	if (dev == loopback_dev)
159e332bc67SEric W. Biederman 		return;
160e332bc67SEric W. Biederman 
1618d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1628d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1638d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1648d0b94afSMartin KaFai Lau 
1658d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1668d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1678d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1688d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1698d0b94afSMartin KaFai Lau 
170e332bc67SEric W. Biederman 			if (rt_idev->dev == dev) {
1718d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1728d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1738d0b94afSMartin KaFai Lau 			}
1748d0b94afSMartin KaFai Lau 
175e332bc67SEric W. Biederman 			if (rt_dev == dev) {
1768d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1778d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1788d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1798d0b94afSMartin KaFai Lau 			}
1808d0b94afSMartin KaFai Lau 		}
1818d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1828d0b94afSMartin KaFai Lau 	}
1838d0b94afSMartin KaFai Lau }
1848d0b94afSMartin KaFai Lau 
185f894cbf8SDavid S. Miller static inline const void *choose_neigh_daddr(struct rt6_info *rt,
186f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
187f894cbf8SDavid S. Miller 					     const void *daddr)
18839232973SDavid S. Miller {
18939232973SDavid S. Miller 	struct in6_addr *p = &rt->rt6i_gateway;
19039232973SDavid S. Miller 
191a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19239232973SDavid S. Miller 		return (const void *) p;
193f894cbf8SDavid S. Miller 	else if (skb)
194f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
19539232973SDavid S. Miller 	return daddr;
19639232973SDavid S. Miller }
19739232973SDavid S. Miller 
198f894cbf8SDavid S. Miller static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
199f894cbf8SDavid S. Miller 					  struct sk_buff *skb,
200f894cbf8SDavid S. Miller 					  const void *daddr)
201d3aaeb38SDavid S. Miller {
20239232973SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *) dst;
20339232973SDavid S. Miller 	struct neighbour *n;
20439232973SDavid S. Miller 
205f894cbf8SDavid S. Miller 	daddr = choose_neigh_daddr(rt, skb, daddr);
2068e022ee6SYOSHIFUJI Hideaki / 吉藤英明 	n = __ipv6_neigh_lookup(dst->dev, daddr);
207f83c7790SDavid S. Miller 	if (n)
208f83c7790SDavid S. Miller 		return n;
209f83c7790SDavid S. Miller 	return neigh_create(&nd_tbl, daddr, dst->dev);
210f83c7790SDavid S. Miller }
211f83c7790SDavid S. Miller 
21263fca65dSJulian Anastasov static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
21363fca65dSJulian Anastasov {
21463fca65dSJulian Anastasov 	struct net_device *dev = dst->dev;
21563fca65dSJulian Anastasov 	struct rt6_info *rt = (struct rt6_info *)dst;
21663fca65dSJulian Anastasov 
21763fca65dSJulian Anastasov 	daddr = choose_neigh_daddr(rt, NULL, daddr);
21863fca65dSJulian Anastasov 	if (!daddr)
21963fca65dSJulian Anastasov 		return;
22063fca65dSJulian Anastasov 	if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
22163fca65dSJulian Anastasov 		return;
22263fca65dSJulian Anastasov 	if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
22363fca65dSJulian Anastasov 		return;
22463fca65dSJulian Anastasov 	__ipv6_confirm_neigh(dev, daddr);
22563fca65dSJulian Anastasov }
22663fca65dSJulian Anastasov 
2279a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2281da177e4SLinus Torvalds 	.family			=	AF_INET6,
2291da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2301da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2311da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2320dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
233ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
234*d4ead6b3SDavid Ahern 	.cow_metrics		=	dst_cow_metrics_generic,
2351da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2361da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2371da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2381da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2391da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2406e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2419f8955ccSEric W. Biederman 	.local_out		=	__ip6_local_out,
242d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ip6_neigh_lookup,
24363fca65dSJulian Anastasov 	.confirm_neigh		=	ip6_confirm_neigh,
2441da177e4SLinus Torvalds };
2451da177e4SLinus Torvalds 
246ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
247ec831ea7SRoland Dreier {
248618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
249618f9bc7SSteffen Klassert 
250618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
251ec831ea7SRoland Dreier }
252ec831ea7SRoland Dreier 
2536700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2546700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
25514e50e57SDavid S. Miller {
25614e50e57SDavid S. Miller }
25714e50e57SDavid S. Miller 
2586700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2596700c270SDavid S. Miller 				      struct sk_buff *skb)
260b587ee3bSDavid S. Miller {
261b587ee3bSDavid S. Miller }
262b587ee3bSDavid S. Miller 
26314e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
26414e50e57SDavid S. Miller 	.family			=	AF_INET6,
26514e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
26614e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
267ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
268214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
26914e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
270b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2710a1f5962SMartin KaFai Lau 	.cow_metrics		=	dst_cow_metrics_generic,
272d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ip6_neigh_lookup,
27314e50e57SDavid S. Miller };
27414e50e57SDavid S. Miller 
27562fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
27614edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
27762fa8a84SDavid S. Miller };
27862fa8a84SDavid S. Miller 
279fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
2801da177e4SLinus Torvalds 	.dst = {
2811da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
2821da177e4SLinus Torvalds 		.__use		= 1,
2832c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
2841da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
2851da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
2861da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
2871da177e4SLinus Torvalds 	},
2881da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
2894f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
2901da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
2911da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
292e8478e80SDavid Ahern 	.fib6_type	= RTN_UNREACHABLE,
2931da177e4SLinus Torvalds };
2941da177e4SLinus Torvalds 
295101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
296101367c2SThomas Graf 
297fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
298101367c2SThomas Graf 	.dst = {
299101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
300101367c2SThomas Graf 		.__use		= 1,
3012c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
302101367c2SThomas Graf 		.error		= -EACCES,
3039ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
3049ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
305101367c2SThomas Graf 	},
306101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3074f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
308101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
309101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
310e8478e80SDavid Ahern 	.fib6_type	= RTN_PROHIBIT,
311101367c2SThomas Graf };
312101367c2SThomas Graf 
313fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
314101367c2SThomas Graf 	.dst = {
315101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
316101367c2SThomas Graf 		.__use		= 1,
3172c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
318101367c2SThomas Graf 		.error		= -EINVAL,
319352e512cSHerbert Xu 		.input		= dst_discard,
320ede2059dSEric W. Biederman 		.output		= dst_discard_out,
321101367c2SThomas Graf 	},
322101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3234f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
324101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
325101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
326e8478e80SDavid Ahern 	.fib6_type	= RTN_BLACKHOLE,
327101367c2SThomas Graf };
328101367c2SThomas Graf 
329101367c2SThomas Graf #endif
330101367c2SThomas Graf 
331ebfa45f0SMartin KaFai Lau static void rt6_info_init(struct rt6_info *rt)
332ebfa45f0SMartin KaFai Lau {
333ebfa45f0SMartin KaFai Lau 	struct dst_entry *dst = &rt->dst;
334ebfa45f0SMartin KaFai Lau 
335ebfa45f0SMartin KaFai Lau 	memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
336ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_siblings);
337ebfa45f0SMartin KaFai Lau 	INIT_LIST_HEAD(&rt->rt6i_uncached);
338*d4ead6b3SDavid Ahern 	rt->fib6_metrics = (struct dst_metrics *)&dst_default_metrics;
339ebfa45f0SMartin KaFai Lau }
340ebfa45f0SMartin KaFai Lau 
3411da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
342d52d3997SMartin KaFai Lau static struct rt6_info *__ip6_dst_alloc(struct net *net,
343957c665fSDavid S. Miller 					struct net_device *dev,
344ad706862SMartin KaFai Lau 					int flags)
3451da177e4SLinus Torvalds {
34697bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
347b2a9c0edSWei Wang 					1, DST_OBSOLETE_FORCE_CHK, flags);
348cf911662SDavid S. Miller 
34981eb8447SWei Wang 	if (rt) {
350ebfa45f0SMartin KaFai Lau 		rt6_info_init(rt);
35181eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
35281eb8447SWei Wang 	}
3538104891bSSteffen Klassert 
354cf911662SDavid S. Miller 	return rt;
3551da177e4SLinus Torvalds }
3561da177e4SLinus Torvalds 
3579ab179d8SDavid Ahern struct rt6_info *ip6_dst_alloc(struct net *net,
358d52d3997SMartin KaFai Lau 			       struct net_device *dev,
359ad706862SMartin KaFai Lau 			       int flags)
360d52d3997SMartin KaFai Lau {
361ad706862SMartin KaFai Lau 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
362d52d3997SMartin KaFai Lau 
363d52d3997SMartin KaFai Lau 	if (rt) {
364d52d3997SMartin KaFai Lau 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
365bfd8e5a4SEric Dumazet 		if (!rt->rt6i_pcpu) {
366587fea74SWei Wang 			dst_release_immediate(&rt->dst);
367d52d3997SMartin KaFai Lau 			return NULL;
368d52d3997SMartin KaFai Lau 		}
369d52d3997SMartin KaFai Lau 	}
370d52d3997SMartin KaFai Lau 
371d52d3997SMartin KaFai Lau 	return rt;
372d52d3997SMartin KaFai Lau }
3739ab179d8SDavid Ahern EXPORT_SYMBOL(ip6_dst_alloc);
374d52d3997SMartin KaFai Lau 
3751da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3761da177e4SLinus Torvalds {
3771da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
37835732d01SWei Wang 	struct rt6_exception_bucket *bucket;
3793a2232e9SDavid Miller 	struct rt6_info *from = rt->from;
3808d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
381*d4ead6b3SDavid Ahern 	struct dst_metrics *m;
3821da177e4SLinus Torvalds 
3838e2ec639SYan, Zheng 	dst_destroy_metrics_generic(dst);
384d52d3997SMartin KaFai Lau 	free_percpu(rt->rt6i_pcpu);
3858d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3868d0b94afSMartin KaFai Lau 
3878d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
38838308473SDavid S. Miller 	if (idev) {
3891da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3901da177e4SLinus Torvalds 		in6_dev_put(idev);
3911da177e4SLinus Torvalds 	}
39235732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
39335732d01SWei Wang 	if (bucket) {
39435732d01SWei Wang 		rt->rt6i_exception_bucket = NULL;
39535732d01SWei Wang 		kfree(bucket);
39635732d01SWei Wang 	}
3971716a961SGao feng 
398*d4ead6b3SDavid Ahern 	m = rt->fib6_metrics;
399*d4ead6b3SDavid Ahern 	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
400*d4ead6b3SDavid Ahern 		kfree(m);
401*d4ead6b3SDavid Ahern 
4023a2232e9SDavid Miller 	rt->from = NULL;
4033a2232e9SDavid Miller 	dst_release(&from->dst);
404b3419363SDavid S. Miller }
405b3419363SDavid S. Miller 
4061da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
4071da177e4SLinus Torvalds 			   int how)
4081da177e4SLinus Torvalds {
4091da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
4101da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
4115a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
412c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
4131da177e4SLinus Torvalds 
414e5645f51SWei Wang 	if (idev && idev->dev != loopback_dev) {
415e5645f51SWei Wang 		struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
41638308473SDavid S. Miller 		if (loopback_idev) {
4171da177e4SLinus Torvalds 			rt->rt6i_idev = loopback_idev;
4181da177e4SLinus Torvalds 			in6_dev_put(idev);
4191da177e4SLinus Torvalds 		}
4201da177e4SLinus Torvalds 	}
42197cac082SDavid S. Miller }
4221da177e4SLinus Torvalds 
4235973fb1eSMartin KaFai Lau static bool __rt6_check_expired(const struct rt6_info *rt)
4245973fb1eSMartin KaFai Lau {
4255973fb1eSMartin KaFai Lau 	if (rt->rt6i_flags & RTF_EXPIRES)
4265973fb1eSMartin KaFai Lau 		return time_after(jiffies, rt->dst.expires);
4275973fb1eSMartin KaFai Lau 	else
4285973fb1eSMartin KaFai Lau 		return false;
4295973fb1eSMartin KaFai Lau }
4305973fb1eSMartin KaFai Lau 
431a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4321da177e4SLinus Torvalds {
4331716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4341716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
435a50feda5SEric Dumazet 			return true;
4363a2232e9SDavid Miller 	} else if (rt->from) {
4371e2ea8adSXin Long 		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
4383a2232e9SDavid Miller 			rt6_check_expired(rt->from);
4391716a961SGao feng 	}
440a50feda5SEric Dumazet 	return false;
4411da177e4SLinus Torvalds }
4421da177e4SLinus Torvalds 
443b4bac172SDavid Ahern static struct rt6_info *rt6_multipath_select(const struct net *net,
444b4bac172SDavid Ahern 					     struct rt6_info *match,
44552bd4c0cSNicolas Dichtel 					     struct flowi6 *fl6, int oif,
446b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
44752bd4c0cSNicolas Dichtel 					     int strict)
44851ebd318SNicolas Dichtel {
44951ebd318SNicolas Dichtel 	struct rt6_info *sibling, *next_sibling;
45051ebd318SNicolas Dichtel 
451b673d6ccSJakub Sitnicki 	/* We might have already computed the hash for ICMPv6 errors. In such
452b673d6ccSJakub Sitnicki 	 * case it will always be non-zero. Otherwise now is the time to do it.
453b673d6ccSJakub Sitnicki 	 */
454b673d6ccSJakub Sitnicki 	if (!fl6->mp_hash)
455b4bac172SDavid Ahern 		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
456b673d6ccSJakub Sitnicki 
4575e670d84SDavid Ahern 	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
4583d709f69SIdo Schimmel 		return match;
459bbfcd776SIdo Schimmel 
4603d709f69SIdo Schimmel 	list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
4613d709f69SIdo Schimmel 				 rt6i_siblings) {
4625e670d84SDavid Ahern 		int nh_upper_bound;
4635e670d84SDavid Ahern 
4645e670d84SDavid Ahern 		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
4655e670d84SDavid Ahern 		if (fl6->mp_hash > nh_upper_bound)
4663d709f69SIdo Schimmel 			continue;
46752bd4c0cSNicolas Dichtel 		if (rt6_score_route(sibling, oif, strict) < 0)
46852bd4c0cSNicolas Dichtel 			break;
46951ebd318SNicolas Dichtel 		match = sibling;
47051ebd318SNicolas Dichtel 		break;
47151ebd318SNicolas Dichtel 	}
4723d709f69SIdo Schimmel 
47351ebd318SNicolas Dichtel 	return match;
47451ebd318SNicolas Dichtel }
47551ebd318SNicolas Dichtel 
4761da177e4SLinus Torvalds /*
47766f5d6ceSWei Wang  *	Route lookup. rcu_read_lock() should be held.
4781da177e4SLinus Torvalds  */
4791da177e4SLinus Torvalds 
4808ed67789SDaniel Lezcano static inline struct rt6_info *rt6_device_match(struct net *net,
4818ed67789SDaniel Lezcano 						    struct rt6_info *rt,
482b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4831da177e4SLinus Torvalds 						    int oif,
484d420895eSYOSHIFUJI Hideaki 						    int flags)
4851da177e4SLinus Torvalds {
4861da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
4871da177e4SLinus Torvalds 	struct rt6_info *sprt;
4881da177e4SLinus Torvalds 
4895e670d84SDavid Ahern 	if (!oif && ipv6_addr_any(saddr) &&
4905e670d84SDavid Ahern 	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
4918067bb8cSIdo Schimmel 		return rt;
492dd3abc4eSYOSHIFUJI Hideaki 
493071fb37eSDavid Miller 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
4945e670d84SDavid Ahern 		const struct net_device *dev = sprt->fib6_nh.nh_dev;
495dd3abc4eSYOSHIFUJI Hideaki 
4965e670d84SDavid Ahern 		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
4978067bb8cSIdo Schimmel 			continue;
4988067bb8cSIdo Schimmel 
499dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
5001da177e4SLinus Torvalds 			if (dev->ifindex == oif)
5011da177e4SLinus Torvalds 				return sprt;
5021da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
50338308473SDavid S. Miller 				if (!sprt->rt6i_idev ||
5041da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
50517fb0b2bSDavid Ahern 					if (flags & RT6_LOOKUP_F_IFACE)
5061da177e4SLinus Torvalds 						continue;
50717fb0b2bSDavid Ahern 					if (local &&
50817fb0b2bSDavid Ahern 					    local->rt6i_idev->dev->ifindex == oif)
5091da177e4SLinus Torvalds 						continue;
5101da177e4SLinus Torvalds 				}
5111da177e4SLinus Torvalds 				local = sprt;
5121da177e4SLinus Torvalds 			}
513dd3abc4eSYOSHIFUJI Hideaki 		} else {
514dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
515dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
516dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
517dd3abc4eSYOSHIFUJI Hideaki 		}
5181da177e4SLinus Torvalds 	}
5191da177e4SLinus Torvalds 
520dd3abc4eSYOSHIFUJI Hideaki 	if (oif) {
5211da177e4SLinus Torvalds 		if (local)
5221da177e4SLinus Torvalds 			return local;
5231da177e4SLinus Torvalds 
524d420895eSYOSHIFUJI Hideaki 		if (flags & RT6_LOOKUP_F_IFACE)
5258ed67789SDaniel Lezcano 			return net->ipv6.ip6_null_entry;
5261da177e4SLinus Torvalds 	}
5278067bb8cSIdo Schimmel 
5285e670d84SDavid Ahern 	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
5291da177e4SLinus Torvalds }
5301da177e4SLinus Torvalds 
53127097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
532c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
533c2f17e82SHannes Frederic Sowa 	struct work_struct work;
534c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
535c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
536c2f17e82SHannes Frederic Sowa };
537c2f17e82SHannes Frederic Sowa 
538c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
539c2f17e82SHannes Frederic Sowa {
540c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
541c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
542c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
543c2f17e82SHannes Frederic Sowa 
544c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
545adc176c5SErik Nordmark 	ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
546c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
547662f5533SMichael Büsch 	kfree(work);
548c2f17e82SHannes Frederic Sowa }
549c2f17e82SHannes Frederic Sowa 
55027097255SYOSHIFUJI Hideaki static void rt6_probe(struct rt6_info *rt)
55127097255SYOSHIFUJI Hideaki {
552990edb42SMartin KaFai Lau 	struct __rt6_probe_work *work;
5535e670d84SDavid Ahern 	const struct in6_addr *nh_gw;
554f2c31e32SEric Dumazet 	struct neighbour *neigh;
5555e670d84SDavid Ahern 	struct net_device *dev;
5565e670d84SDavid Ahern 
55727097255SYOSHIFUJI Hideaki 	/*
55827097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
55927097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
56027097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
56127097255SYOSHIFUJI Hideaki 	 *
56227097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
56327097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
56427097255SYOSHIFUJI Hideaki 	 */
5652152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
566fdd6681dSAmerigo Wang 		return;
5675e670d84SDavid Ahern 
5685e670d84SDavid Ahern 	nh_gw = &rt->fib6_nh.nh_gw;
5695e670d84SDavid Ahern 	dev = rt->fib6_nh.nh_dev;
5702152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
5715e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
5722152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5738d6c31bfSMartin KaFai Lau 		if (neigh->nud_state & NUD_VALID)
5748d6c31bfSMartin KaFai Lau 			goto out;
5758d6c31bfSMartin KaFai Lau 
576990edb42SMartin KaFai Lau 		work = NULL;
5772152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
578990edb42SMartin KaFai Lau 		if (!(neigh->nud_state & NUD_VALID) &&
579990edb42SMartin KaFai Lau 		    time_after(jiffies,
580990edb42SMartin KaFai Lau 			       neigh->updated +
581990edb42SMartin KaFai Lau 			       rt->rt6i_idev->cnf.rtr_probe_interval)) {
582c2f17e82SHannes Frederic Sowa 			work = kmalloc(sizeof(*work), GFP_ATOMIC);
583990edb42SMartin KaFai Lau 			if (work)
5847e980569SJiri Benc 				__neigh_set_probe_once(neigh);
585990edb42SMartin KaFai Lau 		}
586c2f17e82SHannes Frederic Sowa 		write_unlock(&neigh->lock);
587990edb42SMartin KaFai Lau 	} else {
588990edb42SMartin KaFai Lau 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
589990edb42SMartin KaFai Lau 	}
590c2f17e82SHannes Frederic Sowa 
591c2f17e82SHannes Frederic Sowa 	if (work) {
592c2f17e82SHannes Frederic Sowa 		INIT_WORK(&work->work, rt6_probe_deferred);
5935e670d84SDavid Ahern 		work->target = *nh_gw;
5945e670d84SDavid Ahern 		dev_hold(dev);
5955e670d84SDavid Ahern 		work->dev = dev;
596c2f17e82SHannes Frederic Sowa 		schedule_work(&work->work);
597c2f17e82SHannes Frederic Sowa 	}
598990edb42SMartin KaFai Lau 
5998d6c31bfSMartin KaFai Lau out:
6002152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
601f2c31e32SEric Dumazet }
60227097255SYOSHIFUJI Hideaki #else
60327097255SYOSHIFUJI Hideaki static inline void rt6_probe(struct rt6_info *rt)
60427097255SYOSHIFUJI Hideaki {
60527097255SYOSHIFUJI Hideaki }
60627097255SYOSHIFUJI Hideaki #endif
60727097255SYOSHIFUJI Hideaki 
6081da177e4SLinus Torvalds /*
609554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
6101da177e4SLinus Torvalds  */
611b6f99a21SDave Jones static inline int rt6_check_dev(struct rt6_info *rt, int oif)
6121da177e4SLinus Torvalds {
6135e670d84SDavid Ahern 	const struct net_device *dev = rt->fib6_nh.nh_dev;
6145e670d84SDavid Ahern 
615161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
616554cfb7eSYOSHIFUJI Hideaki 		return 2;
617161980f4SDavid S. Miller 	if ((dev->flags & IFF_LOOPBACK) &&
618161980f4SDavid S. Miller 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
619161980f4SDavid S. Miller 		return 1;
620554cfb7eSYOSHIFUJI Hideaki 	return 0;
6211da177e4SLinus Torvalds }
6221da177e4SLinus Torvalds 
623afc154e9SHannes Frederic Sowa static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
6241da177e4SLinus Torvalds {
625afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
6265e670d84SDavid Ahern 	struct neighbour *neigh;
627f2c31e32SEric Dumazet 
6284d0c5911SYOSHIFUJI Hideaki 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
6294d0c5911SYOSHIFUJI Hideaki 	    !(rt->rt6i_flags & RTF_GATEWAY))
630afc154e9SHannes Frederic Sowa 		return RT6_NUD_SUCCEED;
631145a3621SYOSHIFUJI Hideaki / 吉藤英明 
632145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
6335e670d84SDavid Ahern 	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
6345e670d84SDavid Ahern 					  &rt->fib6_nh.nh_gw);
635145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
636145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
637554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
638afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
639398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
640a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
641afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6427e980569SJiri Benc 		else
6437e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
644398bcbebSYOSHIFUJI Hideaki #endif
645145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
646afc154e9SHannes Frederic Sowa 	} else {
647afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6487e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
649a5a81f0bSPaul Marks 	}
650145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
651145a3621SYOSHIFUJI Hideaki / 吉藤英明 
652a5a81f0bSPaul Marks 	return ret;
6531da177e4SLinus Torvalds }
6541da177e4SLinus Torvalds 
655554cfb7eSYOSHIFUJI Hideaki static int rt6_score_route(struct rt6_info *rt, int oif,
656554cfb7eSYOSHIFUJI Hideaki 			   int strict)
657554cfb7eSYOSHIFUJI Hideaki {
658a5a81f0bSPaul Marks 	int m;
6594d0c5911SYOSHIFUJI Hideaki 
6604d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
66177d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
662afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
663ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
664ebacaaa0SYOSHIFUJI Hideaki 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
665ebacaaa0SYOSHIFUJI Hideaki #endif
666afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE) {
667afc154e9SHannes Frederic Sowa 		int n = rt6_check_neigh(rt);
668afc154e9SHannes Frederic Sowa 		if (n < 0)
669afc154e9SHannes Frederic Sowa 			return n;
670afc154e9SHannes Frederic Sowa 	}
671554cfb7eSYOSHIFUJI Hideaki 	return m;
672554cfb7eSYOSHIFUJI Hideaki }
673554cfb7eSYOSHIFUJI Hideaki 
674f11e6659SDavid S. Miller static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
675afc154e9SHannes Frederic Sowa 				   int *mpri, struct rt6_info *match,
676afc154e9SHannes Frederic Sowa 				   bool *do_rr)
677554cfb7eSYOSHIFUJI Hideaki {
678554cfb7eSYOSHIFUJI Hideaki 	int m;
679afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
68035103d11SAndy Gospodarek 	struct inet6_dev *idev = rt->rt6i_idev;
68135103d11SAndy Gospodarek 
6825e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
6838067bb8cSIdo Schimmel 		goto out;
6848067bb8cSIdo Schimmel 
68514c5206cSIdo Schimmel 	if (idev->cnf.ignore_routes_with_linkdown &&
6865e670d84SDavid Ahern 	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
687d5d32e4bSDavid Ahern 	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
68835103d11SAndy Gospodarek 		goto out;
689554cfb7eSYOSHIFUJI Hideaki 
690554cfb7eSYOSHIFUJI Hideaki 	if (rt6_check_expired(rt))
691f11e6659SDavid S. Miller 		goto out;
692554cfb7eSYOSHIFUJI Hideaki 
693554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
6947e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
695afc154e9SHannes Frederic Sowa 		match_do_rr = true;
696afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6977e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
698f11e6659SDavid S. Miller 		goto out;
6991da177e4SLinus Torvalds 	}
700f11e6659SDavid S. Miller 
701afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
702afc154e9SHannes Frederic Sowa 		rt6_probe(rt);
703afc154e9SHannes Frederic Sowa 
7047e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
705afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
706afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
707afc154e9SHannes Frederic Sowa 		*mpri = m;
708afc154e9SHannes Frederic Sowa 		match = rt;
709afc154e9SHannes Frederic Sowa 	}
710f11e6659SDavid S. Miller out:
711f11e6659SDavid S. Miller 	return match;
7121da177e4SLinus Torvalds }
7131da177e4SLinus Torvalds 
714f11e6659SDavid S. Miller static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
7158d1040e8SWei Wang 				     struct rt6_info *leaf,
716f11e6659SDavid S. Miller 				     struct rt6_info *rr_head,
717afc154e9SHannes Frederic Sowa 				     u32 metric, int oif, int strict,
718afc154e9SHannes Frederic Sowa 				     bool *do_rr)
719f11e6659SDavid S. Miller {
7209fbdcfafSSteffen Klassert 	struct rt6_info *rt, *match, *cont;
721f11e6659SDavid S. Miller 	int mpri = -1;
722f11e6659SDavid S. Miller 
723f11e6659SDavid S. Miller 	match = NULL;
7249fbdcfafSSteffen Klassert 	cont = NULL;
725071fb37eSDavid Miller 	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
7269fbdcfafSSteffen Klassert 		if (rt->rt6i_metric != metric) {
7279fbdcfafSSteffen Klassert 			cont = rt;
7289fbdcfafSSteffen Klassert 			break;
7299fbdcfafSSteffen Klassert 		}
7309fbdcfafSSteffen Klassert 
731afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7329fbdcfafSSteffen Klassert 	}
7339fbdcfafSSteffen Klassert 
73466f5d6ceSWei Wang 	for (rt = leaf; rt && rt != rr_head;
735071fb37eSDavid Miller 	     rt = rcu_dereference(rt->rt6_next)) {
7369fbdcfafSSteffen Klassert 		if (rt->rt6i_metric != metric) {
7379fbdcfafSSteffen Klassert 			cont = rt;
7389fbdcfafSSteffen Klassert 			break;
7399fbdcfafSSteffen Klassert 		}
7409fbdcfafSSteffen Klassert 
7419fbdcfafSSteffen Klassert 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7429fbdcfafSSteffen Klassert 	}
7439fbdcfafSSteffen Klassert 
7449fbdcfafSSteffen Klassert 	if (match || !cont)
7459fbdcfafSSteffen Klassert 		return match;
7469fbdcfafSSteffen Klassert 
747071fb37eSDavid Miller 	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
748afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
749f11e6659SDavid S. Miller 
750f11e6659SDavid S. Miller 	return match;
751f11e6659SDavid S. Miller }
752f11e6659SDavid S. Miller 
7538d1040e8SWei Wang static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
7548d1040e8SWei Wang 				   int oif, int strict)
755f11e6659SDavid S. Miller {
75666f5d6ceSWei Wang 	struct rt6_info *leaf = rcu_dereference(fn->leaf);
757f11e6659SDavid S. Miller 	struct rt6_info *match, *rt0;
758afc154e9SHannes Frederic Sowa 	bool do_rr = false;
75917ecf590SWei Wang 	int key_plen;
760f11e6659SDavid S. Miller 
76187b1af8dSWei Wang 	if (!leaf || leaf == net->ipv6.ip6_null_entry)
7628d1040e8SWei Wang 		return net->ipv6.ip6_null_entry;
7638d1040e8SWei Wang 
76466f5d6ceSWei Wang 	rt0 = rcu_dereference(fn->rr_ptr);
765f11e6659SDavid S. Miller 	if (!rt0)
76666f5d6ceSWei Wang 		rt0 = leaf;
767f11e6659SDavid S. Miller 
76817ecf590SWei Wang 	/* Double check to make sure fn is not an intermediate node
76917ecf590SWei Wang 	 * and fn->leaf does not points to its child's leaf
77017ecf590SWei Wang 	 * (This might happen if all routes under fn are deleted from
77117ecf590SWei Wang 	 * the tree and fib6_repair_tree() is called on the node.)
77217ecf590SWei Wang 	 */
77317ecf590SWei Wang 	key_plen = rt0->rt6i_dst.plen;
77417ecf590SWei Wang #ifdef CONFIG_IPV6_SUBTREES
77517ecf590SWei Wang 	if (rt0->rt6i_src.plen)
77617ecf590SWei Wang 		key_plen = rt0->rt6i_src.plen;
77717ecf590SWei Wang #endif
77817ecf590SWei Wang 	if (fn->fn_bit != key_plen)
77917ecf590SWei Wang 		return net->ipv6.ip6_null_entry;
78017ecf590SWei Wang 
7818d1040e8SWei Wang 	match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
782afc154e9SHannes Frederic Sowa 			     &do_rr);
783f11e6659SDavid S. Miller 
784afc154e9SHannes Frederic Sowa 	if (do_rr) {
785071fb37eSDavid Miller 		struct rt6_info *next = rcu_dereference(rt0->rt6_next);
786f11e6659SDavid S. Miller 
787554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
788f11e6659SDavid S. Miller 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
7898d1040e8SWei Wang 			next = leaf;
790f11e6659SDavid S. Miller 
79166f5d6ceSWei Wang 		if (next != rt0) {
79266f5d6ceSWei Wang 			spin_lock_bh(&leaf->rt6i_table->tb6_lock);
79366f5d6ceSWei Wang 			/* make sure next is not being deleted from the tree */
79466f5d6ceSWei Wang 			if (next->rt6i_node)
79566f5d6ceSWei Wang 				rcu_assign_pointer(fn->rr_ptr, next);
79666f5d6ceSWei Wang 			spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
79766f5d6ceSWei Wang 		}
798554cfb7eSYOSHIFUJI Hideaki 	}
799554cfb7eSYOSHIFUJI Hideaki 
800a02cec21SEric Dumazet 	return match ? match : net->ipv6.ip6_null_entry;
8011da177e4SLinus Torvalds }
8021da177e4SLinus Torvalds 
8038b9df265SMartin KaFai Lau static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
8048b9df265SMartin KaFai Lau {
8058b9df265SMartin KaFai Lau 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8068b9df265SMartin KaFai Lau }
8078b9df265SMartin KaFai Lau 
80870ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
80970ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
810b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
81170ceb4f5SYOSHIFUJI Hideaki {
812c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
81370ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
81470ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
81570ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
8164bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
81770ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt;
81870ceb4f5SYOSHIFUJI Hideaki 
81970ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
82070ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
82170ceb4f5SYOSHIFUJI Hideaki 	}
82270ceb4f5SYOSHIFUJI Hideaki 
82370ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
82470ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
82570ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
82670ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
82770ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
82870ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
82970ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
83070ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
83170ceb4f5SYOSHIFUJI Hideaki 		}
83270ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
83370ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
83470ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
83570ceb4f5SYOSHIFUJI Hideaki 		}
83670ceb4f5SYOSHIFUJI Hideaki 	}
83770ceb4f5SYOSHIFUJI Hideaki 
83870ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
83970ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
8403933fc95SJens Rosenboom 		return -EINVAL;
84170ceb4f5SYOSHIFUJI Hideaki 
8424bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
84370ceb4f5SYOSHIFUJI Hideaki 
84470ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
84570ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
84670ceb4f5SYOSHIFUJI Hideaki 	else {
84770ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
84870ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
84970ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
85070ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
85170ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
85270ceb4f5SYOSHIFUJI Hideaki 	}
85370ceb4f5SYOSHIFUJI Hideaki 
854f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
855afb1d4b5SDavid Ahern 		rt = rt6_get_dflt_router(net, gwaddr, dev);
856f104a567SDuan Jiong 	else
857f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
858830218c1SDavid Ahern 					gwaddr, dev);
85970ceb4f5SYOSHIFUJI Hideaki 
86070ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
861afb1d4b5SDavid Ahern 		ip6_del_rt(net, rt);
86270ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
86370ceb4f5SYOSHIFUJI Hideaki 	}
86470ceb4f5SYOSHIFUJI Hideaki 
86570ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
866830218c1SDavid Ahern 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
867830218c1SDavid Ahern 					dev, pref);
86870ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
86970ceb4f5SYOSHIFUJI Hideaki 		rt->rt6i_flags = RTF_ROUTEINFO |
87070ceb4f5SYOSHIFUJI Hideaki 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
87170ceb4f5SYOSHIFUJI Hideaki 
87270ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8731716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
8741716a961SGao feng 			rt6_clean_expires(rt);
8751716a961SGao feng 		else
8761716a961SGao feng 			rt6_set_expires(rt, jiffies + HZ * lifetime);
8771716a961SGao feng 
87894e187c0SAmerigo Wang 		ip6_rt_put(rt);
87970ceb4f5SYOSHIFUJI Hideaki 	}
88070ceb4f5SYOSHIFUJI Hideaki 	return 0;
88170ceb4f5SYOSHIFUJI Hideaki }
88270ceb4f5SYOSHIFUJI Hideaki #endif
88370ceb4f5SYOSHIFUJI Hideaki 
884ae90d867SDavid Ahern /*
885ae90d867SDavid Ahern  *	Misc support functions
886ae90d867SDavid Ahern  */
887ae90d867SDavid Ahern 
888ae90d867SDavid Ahern /* called with rcu_lock held */
889ae90d867SDavid Ahern static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
890ae90d867SDavid Ahern {
8915e670d84SDavid Ahern 	struct net_device *dev = rt->fib6_nh.nh_dev;
892ae90d867SDavid Ahern 
893ae90d867SDavid Ahern 	if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
894ae90d867SDavid Ahern 		/* for copies of local routes, dst->dev needs to be the
895ae90d867SDavid Ahern 		 * device if it is a master device, the master device if
896ae90d867SDavid Ahern 		 * device is enslaved, and the loopback as the default
897ae90d867SDavid Ahern 		 */
898ae90d867SDavid Ahern 		if (netif_is_l3_slave(dev) &&
899ae90d867SDavid Ahern 		    !rt6_need_strict(&rt->rt6i_dst.addr))
900ae90d867SDavid Ahern 			dev = l3mdev_master_dev_rcu(dev);
901ae90d867SDavid Ahern 		else if (!netif_is_l3_master(dev))
902ae90d867SDavid Ahern 			dev = dev_net(dev)->loopback_dev;
903ae90d867SDavid Ahern 		/* last case is netif_is_l3_master(dev) is true in which
904ae90d867SDavid Ahern 		 * case we want dev returned to be dev
905ae90d867SDavid Ahern 		 */
906ae90d867SDavid Ahern 	}
907ae90d867SDavid Ahern 
908ae90d867SDavid Ahern 	return dev;
909ae90d867SDavid Ahern }
910ae90d867SDavid Ahern 
9116edb3c96SDavid Ahern static const int fib6_prop[RTN_MAX + 1] = {
9126edb3c96SDavid Ahern 	[RTN_UNSPEC]	= 0,
9136edb3c96SDavid Ahern 	[RTN_UNICAST]	= 0,
9146edb3c96SDavid Ahern 	[RTN_LOCAL]	= 0,
9156edb3c96SDavid Ahern 	[RTN_BROADCAST]	= 0,
9166edb3c96SDavid Ahern 	[RTN_ANYCAST]	= 0,
9176edb3c96SDavid Ahern 	[RTN_MULTICAST]	= 0,
9186edb3c96SDavid Ahern 	[RTN_BLACKHOLE]	= -EINVAL,
9196edb3c96SDavid Ahern 	[RTN_UNREACHABLE] = -EHOSTUNREACH,
9206edb3c96SDavid Ahern 	[RTN_PROHIBIT]	= -EACCES,
9216edb3c96SDavid Ahern 	[RTN_THROW]	= -EAGAIN,
9226edb3c96SDavid Ahern 	[RTN_NAT]	= -EINVAL,
9236edb3c96SDavid Ahern 	[RTN_XRESOLVE]	= -EINVAL,
9246edb3c96SDavid Ahern };
9256edb3c96SDavid Ahern 
9266edb3c96SDavid Ahern static int ip6_rt_type_to_error(u8 fib6_type)
9276edb3c96SDavid Ahern {
9286edb3c96SDavid Ahern 	return fib6_prop[fib6_type];
9296edb3c96SDavid Ahern }
9306edb3c96SDavid Ahern 
9316edb3c96SDavid Ahern static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct rt6_info *ort)
9326edb3c96SDavid Ahern {
9336edb3c96SDavid Ahern 	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
9346edb3c96SDavid Ahern 
9356edb3c96SDavid Ahern 	switch (ort->fib6_type) {
9366edb3c96SDavid Ahern 	case RTN_BLACKHOLE:
9376edb3c96SDavid Ahern 		rt->dst.output = dst_discard_out;
9386edb3c96SDavid Ahern 		rt->dst.input = dst_discard;
9396edb3c96SDavid Ahern 		break;
9406edb3c96SDavid Ahern 	case RTN_PROHIBIT:
9416edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_prohibit_out;
9426edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_prohibit;
9436edb3c96SDavid Ahern 		break;
9446edb3c96SDavid Ahern 	case RTN_THROW:
9456edb3c96SDavid Ahern 	case RTN_UNREACHABLE:
9466edb3c96SDavid Ahern 	default:
9476edb3c96SDavid Ahern 		rt->dst.output = ip6_pkt_discard_out;
9486edb3c96SDavid Ahern 		rt->dst.input = ip6_pkt_discard;
9496edb3c96SDavid Ahern 		break;
9506edb3c96SDavid Ahern 	}
9516edb3c96SDavid Ahern }
9526edb3c96SDavid Ahern 
9536edb3c96SDavid Ahern static void ip6_rt_init_dst(struct rt6_info *rt, struct rt6_info *ort)
9546edb3c96SDavid Ahern {
9556edb3c96SDavid Ahern 	if (ort->rt6i_flags & RTF_REJECT) {
9566edb3c96SDavid Ahern 		ip6_rt_init_dst_reject(rt, ort);
9576edb3c96SDavid Ahern 		return;
9586edb3c96SDavid Ahern 	}
9596edb3c96SDavid Ahern 
9606edb3c96SDavid Ahern 	rt->dst.error = 0;
9616edb3c96SDavid Ahern 	rt->dst.output = ip6_output;
9626edb3c96SDavid Ahern 
9636edb3c96SDavid Ahern 	if (ort->fib6_type == RTN_LOCAL) {
9646edb3c96SDavid Ahern 		rt->dst.flags |= DST_HOST;
9656edb3c96SDavid Ahern 		rt->dst.input = ip6_input;
9666edb3c96SDavid Ahern 	} else if (ipv6_addr_type(&ort->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) {
9676edb3c96SDavid Ahern 		rt->dst.input = ip6_mc_input;
9686edb3c96SDavid Ahern 	} else {
9696edb3c96SDavid Ahern 		rt->dst.input = ip6_forward;
9706edb3c96SDavid Ahern 	}
9716edb3c96SDavid Ahern 
9726edb3c96SDavid Ahern 	if (ort->fib6_nh.nh_lwtstate) {
9736edb3c96SDavid Ahern 		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
9746edb3c96SDavid Ahern 		lwtunnel_set_redirect(&rt->dst);
9756edb3c96SDavid Ahern 	}
9766edb3c96SDavid Ahern 
9776edb3c96SDavid Ahern 	rt->dst.lastuse = jiffies;
9786edb3c96SDavid Ahern }
9796edb3c96SDavid Ahern 
980ae90d867SDavid Ahern static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
981ae90d867SDavid Ahern {
982ae90d867SDavid Ahern 	BUG_ON(from->from);
983ae90d867SDavid Ahern 
984ae90d867SDavid Ahern 	rt->rt6i_flags &= ~RTF_EXPIRES;
985ae90d867SDavid Ahern 	dst_hold(&from->dst);
986ae90d867SDavid Ahern 	rt->from = from;
987*d4ead6b3SDavid Ahern 	dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
988*d4ead6b3SDavid Ahern 	if (from->fib6_metrics != &dst_default_metrics) {
989*d4ead6b3SDavid Ahern 		rt->dst._metrics |= DST_METRICS_REFCOUNTED;
990*d4ead6b3SDavid Ahern 		refcount_inc(&from->fib6_metrics->refcnt);
991*d4ead6b3SDavid Ahern 	}
992ae90d867SDavid Ahern }
993ae90d867SDavid Ahern 
994ae90d867SDavid Ahern static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
995ae90d867SDavid Ahern {
9966edb3c96SDavid Ahern 	ip6_rt_init_dst(rt, ort);
9976edb3c96SDavid Ahern 
998ae90d867SDavid Ahern 	rt->rt6i_dst = ort->rt6i_dst;
999ae90d867SDavid Ahern 	rt->rt6i_idev = ort->rt6i_idev;
1000ae90d867SDavid Ahern 	if (rt->rt6i_idev)
1001ae90d867SDavid Ahern 		in6_dev_hold(rt->rt6i_idev);
10025e670d84SDavid Ahern 	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
1003ae90d867SDavid Ahern 	rt->rt6i_flags = ort->rt6i_flags;
1004ae90d867SDavid Ahern 	rt6_set_from(rt, ort);
1005ae90d867SDavid Ahern 	rt->rt6i_metric = ort->rt6i_metric;
1006ae90d867SDavid Ahern #ifdef CONFIG_IPV6_SUBTREES
1007ae90d867SDavid Ahern 	rt->rt6i_src = ort->rt6i_src;
1008ae90d867SDavid Ahern #endif
1009ae90d867SDavid Ahern 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
1010ae90d867SDavid Ahern 	rt->rt6i_table = ort->rt6i_table;
10115e670d84SDavid Ahern 	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
1012ae90d867SDavid Ahern }
1013ae90d867SDavid Ahern 
1014a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1015a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
1016a3c00e46SMartin KaFai Lau {
101766f5d6ceSWei Wang 	struct fib6_node *pn, *sn;
1018a3c00e46SMartin KaFai Lau 	while (1) {
1019a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
1020a3c00e46SMartin KaFai Lau 			return NULL;
102166f5d6ceSWei Wang 		pn = rcu_dereference(fn->parent);
102266f5d6ceSWei Wang 		sn = FIB6_SUBTREE(pn);
102366f5d6ceSWei Wang 		if (sn && sn != fn)
102466f5d6ceSWei Wang 			fn = fib6_lookup(sn, NULL, saddr);
1025a3c00e46SMartin KaFai Lau 		else
1026a3c00e46SMartin KaFai Lau 			fn = pn;
1027a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
1028a3c00e46SMartin KaFai Lau 			return fn;
1029a3c00e46SMartin KaFai Lau 	}
1030a3c00e46SMartin KaFai Lau }
1031c71099acSThomas Graf 
1032d3843fe5SWei Wang static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1033d3843fe5SWei Wang 			  bool null_fallback)
1034d3843fe5SWei Wang {
1035d3843fe5SWei Wang 	struct rt6_info *rt = *prt;
1036d3843fe5SWei Wang 
1037d3843fe5SWei Wang 	if (dst_hold_safe(&rt->dst))
1038d3843fe5SWei Wang 		return true;
1039d3843fe5SWei Wang 	if (null_fallback) {
1040d3843fe5SWei Wang 		rt = net->ipv6.ip6_null_entry;
1041d3843fe5SWei Wang 		dst_hold(&rt->dst);
1042d3843fe5SWei Wang 	} else {
1043d3843fe5SWei Wang 		rt = NULL;
1044d3843fe5SWei Wang 	}
1045d3843fe5SWei Wang 	*prt = rt;
1046d3843fe5SWei Wang 	return false;
1047d3843fe5SWei Wang }
1048d3843fe5SWei Wang 
10498ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
10508ed67789SDaniel Lezcano 					     struct fib6_table *table,
1051b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
1052b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
1053b75cc8f9SDavid Ahern 					     int flags)
10541da177e4SLinus Torvalds {
10552b760fcfSWei Wang 	struct rt6_info *rt, *rt_cache;
10561da177e4SLinus Torvalds 	struct fib6_node *fn;
10571da177e4SLinus Torvalds 
1058b6cdbc85SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1059b6cdbc85SDavid Ahern 		flags &= ~RT6_LOOKUP_F_IFACE;
1060b6cdbc85SDavid Ahern 
106166f5d6ceSWei Wang 	rcu_read_lock();
10624c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1063c71099acSThomas Graf restart:
106466f5d6ceSWei Wang 	rt = rcu_dereference(fn->leaf);
106566f5d6ceSWei Wang 	if (!rt) {
106666f5d6ceSWei Wang 		rt = net->ipv6.ip6_null_entry;
106766f5d6ceSWei Wang 	} else {
106866f5d6ceSWei Wang 		rt = rt6_device_match(net, rt, &fl6->saddr,
106966f5d6ceSWei Wang 				      fl6->flowi6_oif, flags);
107051ebd318SNicolas Dichtel 		if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
1071b4bac172SDavid Ahern 			rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
1072b75cc8f9SDavid Ahern 						  skb, flags);
107366f5d6ceSWei Wang 	}
1074a3c00e46SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
1075a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1076a3c00e46SMartin KaFai Lau 		if (fn)
1077a3c00e46SMartin KaFai Lau 			goto restart;
1078a3c00e46SMartin KaFai Lau 	}
10792b760fcfSWei Wang 	/* Search through exception table */
10802b760fcfSWei Wang 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
10812b760fcfSWei Wang 	if (rt_cache)
10822b760fcfSWei Wang 		rt = rt_cache;
10832b760fcfSWei Wang 
1084d3843fe5SWei Wang 	if (ip6_hold_safe(net, &rt, true))
1085d3843fe5SWei Wang 		dst_use_noref(&rt->dst, jiffies);
1086d3843fe5SWei Wang 
108766f5d6ceSWei Wang 	rcu_read_unlock();
1088b811580dSDavid Ahern 
1089b65f164dSPaolo Abeni 	trace_fib6_table_lookup(net, rt, table, fl6);
1090b811580dSDavid Ahern 
10911da177e4SLinus Torvalds 	return rt;
1092c71099acSThomas Graf 
1093c71099acSThomas Graf }
1094c71099acSThomas Graf 
1095ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
1096b75cc8f9SDavid Ahern 				   const struct sk_buff *skb, int flags)
1097ea6e574eSFlorian Westphal {
1098b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
1099ea6e574eSFlorian Westphal }
1100ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
1101ea6e574eSFlorian Westphal 
11029acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
1103b75cc8f9SDavid Ahern 			    const struct in6_addr *saddr, int oif,
1104b75cc8f9SDavid Ahern 			    const struct sk_buff *skb, int strict)
1105c71099acSThomas Graf {
11064c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11074c9483b2SDavid S. Miller 		.flowi6_oif = oif,
11084c9483b2SDavid S. Miller 		.daddr = *daddr,
1109c71099acSThomas Graf 	};
1110c71099acSThomas Graf 	struct dst_entry *dst;
111177d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
1112c71099acSThomas Graf 
1113adaa70bbSThomas Graf 	if (saddr) {
11144c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
1115adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
1116adaa70bbSThomas Graf 	}
1117adaa70bbSThomas Graf 
1118b75cc8f9SDavid Ahern 	dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
1119c71099acSThomas Graf 	if (dst->error == 0)
1120c71099acSThomas Graf 		return (struct rt6_info *) dst;
1121c71099acSThomas Graf 
1122c71099acSThomas Graf 	dst_release(dst);
1123c71099acSThomas Graf 
11241da177e4SLinus Torvalds 	return NULL;
11251da177e4SLinus Torvalds }
11267159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
11277159039aSYOSHIFUJI Hideaki 
1128c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
11291cfb71eeSWei Wang  * It takes new route entry, the addition fails by any reason the
11301cfb71eeSWei Wang  * route is released.
11311cfb71eeSWei Wang  * Caller must hold dst before calling it.
11321da177e4SLinus Torvalds  */
11331da177e4SLinus Torvalds 
1134e5fd387aSMichal Kubeček static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
1135333c4301SDavid Ahern 			struct netlink_ext_ack *extack)
11361da177e4SLinus Torvalds {
11371da177e4SLinus Torvalds 	int err;
1138c71099acSThomas Graf 	struct fib6_table *table;
11391da177e4SLinus Torvalds 
1140c71099acSThomas Graf 	table = rt->rt6i_table;
114166f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
1142*d4ead6b3SDavid Ahern 	err = fib6_add(&table->tb6_root, rt, info, extack);
114366f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
11441da177e4SLinus Torvalds 
11451da177e4SLinus Torvalds 	return err;
11461da177e4SLinus Torvalds }
11471da177e4SLinus Torvalds 
1148afb1d4b5SDavid Ahern int ip6_ins_rt(struct net *net, struct rt6_info *rt)
114940e22e8fSThomas Graf {
1150afb1d4b5SDavid Ahern 	struct nl_info info = {	.nl_net = net, };
1151e715b6d3SFlorian Westphal 
11521cfb71eeSWei Wang 	/* Hold dst to account for the reference from the fib6 tree */
11531cfb71eeSWei Wang 	dst_hold(&rt->dst);
1154*d4ead6b3SDavid Ahern 	return __ip6_ins_rt(rt, &info, NULL);
115540e22e8fSThomas Graf }
115640e22e8fSThomas Graf 
11578b9df265SMartin KaFai Lau static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
115821efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
1159b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
11601da177e4SLinus Torvalds {
11614832c30dSDavid Ahern 	struct net_device *dev;
11621da177e4SLinus Torvalds 	struct rt6_info *rt;
11631da177e4SLinus Torvalds 
11641da177e4SLinus Torvalds 	/*
11651da177e4SLinus Torvalds 	 *	Clone the route.
11661da177e4SLinus Torvalds 	 */
11671da177e4SLinus Torvalds 
1168d52d3997SMartin KaFai Lau 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
11693a2232e9SDavid Miller 		ort = ort->from;
11701da177e4SLinus Torvalds 
11714832c30dSDavid Ahern 	rcu_read_lock();
11724832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(ort);
11734832c30dSDavid Ahern 	rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
11744832c30dSDavid Ahern 	rcu_read_unlock();
117583a09abdSMartin KaFai Lau 	if (!rt)
117683a09abdSMartin KaFai Lau 		return NULL;
117783a09abdSMartin KaFai Lau 
117883a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
11798b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
118083a09abdSMartin KaFai Lau 	rt->rt6i_metric = 0;
118183a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
118283a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
118383a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
11848b9df265SMartin KaFai Lau 
11858b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
1186bb3c3686SDavid S. Miller 		if (ort->rt6i_dst.plen != 128 &&
118721efcfa0SEric Dumazet 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
118858c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
11891da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
11901da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
11914e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
11921da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
11931da177e4SLinus Torvalds 		}
11941da177e4SLinus Torvalds #endif
119595a9a5baSYOSHIFUJI Hideaki 	}
119695a9a5baSYOSHIFUJI Hideaki 
1197299d9939SYOSHIFUJI Hideaki 	return rt;
1198299d9939SYOSHIFUJI Hideaki }
1199299d9939SYOSHIFUJI Hideaki 
1200d52d3997SMartin KaFai Lau static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1201d52d3997SMartin KaFai Lau {
12024832c30dSDavid Ahern 	struct net_device *dev;
1203d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
1204d52d3997SMartin KaFai Lau 
12054832c30dSDavid Ahern 	rcu_read_lock();
12064832c30dSDavid Ahern 	dev = ip6_rt_get_dev_rcu(rt);
12074832c30dSDavid Ahern 	pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
12084832c30dSDavid Ahern 	rcu_read_unlock();
1209d52d3997SMartin KaFai Lau 	if (!pcpu_rt)
1210d52d3997SMartin KaFai Lau 		return NULL;
1211d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
1212d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1213d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
1214d52d3997SMartin KaFai Lau 	return pcpu_rt;
1215d52d3997SMartin KaFai Lau }
1216d52d3997SMartin KaFai Lau 
121766f5d6ceSWei Wang /* It should be called with rcu_read_lock() acquired */
1218d52d3997SMartin KaFai Lau static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1219d52d3997SMartin KaFai Lau {
1220a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, **p;
1221d52d3997SMartin KaFai Lau 
1222d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1223d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1224d52d3997SMartin KaFai Lau 
1225*d4ead6b3SDavid Ahern 	if (pcpu_rt)
1226*d4ead6b3SDavid Ahern 		ip6_hold_safe(NULL, &pcpu_rt, false);
1227d3843fe5SWei Wang 
1228a73e4195SMartin KaFai Lau 	return pcpu_rt;
1229a73e4195SMartin KaFai Lau }
1230a73e4195SMartin KaFai Lau 
1231afb1d4b5SDavid Ahern static struct rt6_info *rt6_make_pcpu_route(struct net *net,
1232afb1d4b5SDavid Ahern 					    struct rt6_info *rt)
1233a73e4195SMartin KaFai Lau {
1234a73e4195SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1235d52d3997SMartin KaFai Lau 
1236d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1237d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
12389c7370a1SMartin KaFai Lau 		dst_hold(&net->ipv6.ip6_null_entry->dst);
12399c7370a1SMartin KaFai Lau 		return net->ipv6.ip6_null_entry;
1240d52d3997SMartin KaFai Lau 	}
1241d52d3997SMartin KaFai Lau 
1242a94b9367SWei Wang 	dst_hold(&pcpu_rt->dst);
1243a73e4195SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1244d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1245951f788aSEric Dumazet 	BUG_ON(prev);
1246a94b9367SWei Wang 
1247d52d3997SMartin KaFai Lau 	return pcpu_rt;
1248d52d3997SMartin KaFai Lau }
1249d52d3997SMartin KaFai Lau 
125035732d01SWei Wang /* exception hash table implementation
125135732d01SWei Wang  */
125235732d01SWei Wang static DEFINE_SPINLOCK(rt6_exception_lock);
125335732d01SWei Wang 
125435732d01SWei Wang /* Remove rt6_ex from hash table and free the memory
125535732d01SWei Wang  * Caller must hold rt6_exception_lock
125635732d01SWei Wang  */
125735732d01SWei Wang static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
125835732d01SWei Wang 				 struct rt6_exception *rt6_ex)
125935732d01SWei Wang {
1260b2427e67SColin Ian King 	struct net *net;
126181eb8447SWei Wang 
126235732d01SWei Wang 	if (!bucket || !rt6_ex)
126335732d01SWei Wang 		return;
1264b2427e67SColin Ian King 
1265b2427e67SColin Ian King 	net = dev_net(rt6_ex->rt6i->dst.dev);
126635732d01SWei Wang 	rt6_ex->rt6i->rt6i_node = NULL;
126735732d01SWei Wang 	hlist_del_rcu(&rt6_ex->hlist);
126835732d01SWei Wang 	rt6_release(rt6_ex->rt6i);
126935732d01SWei Wang 	kfree_rcu(rt6_ex, rcu);
127035732d01SWei Wang 	WARN_ON_ONCE(!bucket->depth);
127135732d01SWei Wang 	bucket->depth--;
127281eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache--;
127335732d01SWei Wang }
127435732d01SWei Wang 
127535732d01SWei Wang /* Remove oldest rt6_ex in bucket and free the memory
127635732d01SWei Wang  * Caller must hold rt6_exception_lock
127735732d01SWei Wang  */
127835732d01SWei Wang static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
127935732d01SWei Wang {
128035732d01SWei Wang 	struct rt6_exception *rt6_ex, *oldest = NULL;
128135732d01SWei Wang 
128235732d01SWei Wang 	if (!bucket)
128335732d01SWei Wang 		return;
128435732d01SWei Wang 
128535732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
128635732d01SWei Wang 		if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
128735732d01SWei Wang 			oldest = rt6_ex;
128835732d01SWei Wang 	}
128935732d01SWei Wang 	rt6_remove_exception(bucket, oldest);
129035732d01SWei Wang }
129135732d01SWei Wang 
129235732d01SWei Wang static u32 rt6_exception_hash(const struct in6_addr *dst,
129335732d01SWei Wang 			      const struct in6_addr *src)
129435732d01SWei Wang {
129535732d01SWei Wang 	static u32 seed __read_mostly;
129635732d01SWei Wang 	u32 val;
129735732d01SWei Wang 
129835732d01SWei Wang 	net_get_random_once(&seed, sizeof(seed));
129935732d01SWei Wang 	val = jhash(dst, sizeof(*dst), seed);
130035732d01SWei Wang 
130135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
130235732d01SWei Wang 	if (src)
130335732d01SWei Wang 		val = jhash(src, sizeof(*src), val);
130435732d01SWei Wang #endif
130535732d01SWei Wang 	return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
130635732d01SWei Wang }
130735732d01SWei Wang 
130835732d01SWei Wang /* Helper function to find the cached rt in the hash table
130935732d01SWei Wang  * and update bucket pointer to point to the bucket for this
131035732d01SWei Wang  * (daddr, saddr) pair
131135732d01SWei Wang  * Caller must hold rt6_exception_lock
131235732d01SWei Wang  */
131335732d01SWei Wang static struct rt6_exception *
131435732d01SWei Wang __rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
131535732d01SWei Wang 			      const struct in6_addr *daddr,
131635732d01SWei Wang 			      const struct in6_addr *saddr)
131735732d01SWei Wang {
131835732d01SWei Wang 	struct rt6_exception *rt6_ex;
131935732d01SWei Wang 	u32 hval;
132035732d01SWei Wang 
132135732d01SWei Wang 	if (!(*bucket) || !daddr)
132235732d01SWei Wang 		return NULL;
132335732d01SWei Wang 
132435732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
132535732d01SWei Wang 	*bucket += hval;
132635732d01SWei Wang 
132735732d01SWei Wang 	hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
132835732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
132935732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
133035732d01SWei Wang 
133135732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
133235732d01SWei Wang 		if (matched && saddr)
133335732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
133435732d01SWei Wang #endif
133535732d01SWei Wang 		if (matched)
133635732d01SWei Wang 			return rt6_ex;
133735732d01SWei Wang 	}
133835732d01SWei Wang 	return NULL;
133935732d01SWei Wang }
134035732d01SWei Wang 
134135732d01SWei Wang /* Helper function to find the cached rt in the hash table
134235732d01SWei Wang  * and update bucket pointer to point to the bucket for this
134335732d01SWei Wang  * (daddr, saddr) pair
134435732d01SWei Wang  * Caller must hold rcu_read_lock()
134535732d01SWei Wang  */
134635732d01SWei Wang static struct rt6_exception *
134735732d01SWei Wang __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
134835732d01SWei Wang 			 const struct in6_addr *daddr,
134935732d01SWei Wang 			 const struct in6_addr *saddr)
135035732d01SWei Wang {
135135732d01SWei Wang 	struct rt6_exception *rt6_ex;
135235732d01SWei Wang 	u32 hval;
135335732d01SWei Wang 
135435732d01SWei Wang 	WARN_ON_ONCE(!rcu_read_lock_held());
135535732d01SWei Wang 
135635732d01SWei Wang 	if (!(*bucket) || !daddr)
135735732d01SWei Wang 		return NULL;
135835732d01SWei Wang 
135935732d01SWei Wang 	hval = rt6_exception_hash(daddr, saddr);
136035732d01SWei Wang 	*bucket += hval;
136135732d01SWei Wang 
136235732d01SWei Wang 	hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
136335732d01SWei Wang 		struct rt6_info *rt6 = rt6_ex->rt6i;
136435732d01SWei Wang 		bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
136535732d01SWei Wang 
136635732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
136735732d01SWei Wang 		if (matched && saddr)
136835732d01SWei Wang 			matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
136935732d01SWei Wang #endif
137035732d01SWei Wang 		if (matched)
137135732d01SWei Wang 			return rt6_ex;
137235732d01SWei Wang 	}
137335732d01SWei Wang 	return NULL;
137435732d01SWei Wang }
137535732d01SWei Wang 
1376*d4ead6b3SDavid Ahern static unsigned int fib6_mtu(const struct rt6_info *rt)
1377*d4ead6b3SDavid Ahern {
1378*d4ead6b3SDavid Ahern 	unsigned int mtu;
1379*d4ead6b3SDavid Ahern 
1380*d4ead6b3SDavid Ahern 	mtu = rt->fib6_pmtu ? : rt->rt6i_idev->cnf.mtu6;
1381*d4ead6b3SDavid Ahern 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1382*d4ead6b3SDavid Ahern 
1383*d4ead6b3SDavid Ahern 	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1384*d4ead6b3SDavid Ahern }
1385*d4ead6b3SDavid Ahern 
138635732d01SWei Wang static int rt6_insert_exception(struct rt6_info *nrt,
138735732d01SWei Wang 				struct rt6_info *ort)
138835732d01SWei Wang {
13895e670d84SDavid Ahern 	struct net *net = dev_net(nrt->dst.dev);
139035732d01SWei Wang 	struct rt6_exception_bucket *bucket;
139135732d01SWei Wang 	struct in6_addr *src_key = NULL;
139235732d01SWei Wang 	struct rt6_exception *rt6_ex;
139335732d01SWei Wang 	int err = 0;
139435732d01SWei Wang 
139535732d01SWei Wang 	/* ort can't be a cache or pcpu route */
139635732d01SWei Wang 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
13973a2232e9SDavid Miller 		ort = ort->from;
139835732d01SWei Wang 	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
139935732d01SWei Wang 
140035732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
140135732d01SWei Wang 
140235732d01SWei Wang 	if (ort->exception_bucket_flushed) {
140335732d01SWei Wang 		err = -EINVAL;
140435732d01SWei Wang 		goto out;
140535732d01SWei Wang 	}
140635732d01SWei Wang 
140735732d01SWei Wang 	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
140835732d01SWei Wang 					lockdep_is_held(&rt6_exception_lock));
140935732d01SWei Wang 	if (!bucket) {
141035732d01SWei Wang 		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
141135732d01SWei Wang 				 GFP_ATOMIC);
141235732d01SWei Wang 		if (!bucket) {
141335732d01SWei Wang 			err = -ENOMEM;
141435732d01SWei Wang 			goto out;
141535732d01SWei Wang 		}
141635732d01SWei Wang 		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
141735732d01SWei Wang 	}
141835732d01SWei Wang 
141935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
142035732d01SWei Wang 	/* rt6i_src.plen != 0 indicates ort is in subtree
142135732d01SWei Wang 	 * and exception table is indexed by a hash of
142235732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
142335732d01SWei Wang 	 * Otherwise, the exception table is indexed by
142435732d01SWei Wang 	 * a hash of only rt6i_dst.
142535732d01SWei Wang 	 */
142635732d01SWei Wang 	if (ort->rt6i_src.plen)
142735732d01SWei Wang 		src_key = &nrt->rt6i_src.addr;
142835732d01SWei Wang #endif
142960006a48SWei Wang 
143060006a48SWei Wang 	/* Update rt6i_prefsrc as it could be changed
143160006a48SWei Wang 	 * in rt6_remove_prefsrc()
143260006a48SWei Wang 	 */
143360006a48SWei Wang 	nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
1434f5bbe7eeSWei Wang 	/* rt6_mtu_change() might lower mtu on ort.
1435f5bbe7eeSWei Wang 	 * Only insert this exception route if its mtu
1436f5bbe7eeSWei Wang 	 * is less than ort's mtu value.
1437f5bbe7eeSWei Wang 	 */
1438*d4ead6b3SDavid Ahern 	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
1439f5bbe7eeSWei Wang 		err = -EINVAL;
1440f5bbe7eeSWei Wang 		goto out;
1441f5bbe7eeSWei Wang 	}
144260006a48SWei Wang 
144335732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
144435732d01SWei Wang 					       src_key);
144535732d01SWei Wang 	if (rt6_ex)
144635732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
144735732d01SWei Wang 
144835732d01SWei Wang 	rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
144935732d01SWei Wang 	if (!rt6_ex) {
145035732d01SWei Wang 		err = -ENOMEM;
145135732d01SWei Wang 		goto out;
145235732d01SWei Wang 	}
145335732d01SWei Wang 	rt6_ex->rt6i = nrt;
145435732d01SWei Wang 	rt6_ex->stamp = jiffies;
145535732d01SWei Wang 	atomic_inc(&nrt->rt6i_ref);
145635732d01SWei Wang 	nrt->rt6i_node = ort->rt6i_node;
145735732d01SWei Wang 	hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
145835732d01SWei Wang 	bucket->depth++;
145981eb8447SWei Wang 	net->ipv6.rt6_stats->fib_rt_cache++;
146035732d01SWei Wang 
146135732d01SWei Wang 	if (bucket->depth > FIB6_MAX_DEPTH)
146235732d01SWei Wang 		rt6_exception_remove_oldest(bucket);
146335732d01SWei Wang 
146435732d01SWei Wang out:
146535732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
146635732d01SWei Wang 
146735732d01SWei Wang 	/* Update fn->fn_sernum to invalidate all cached dst */
1468b886d5f2SPaolo Abeni 	if (!err) {
1469922c2ac8SIdo Schimmel 		spin_lock_bh(&ort->rt6i_table->tb6_lock);
14707aef6859SDavid Ahern 		fib6_update_sernum(net, ort);
1471922c2ac8SIdo Schimmel 		spin_unlock_bh(&ort->rt6i_table->tb6_lock);
1472b886d5f2SPaolo Abeni 		fib6_force_start_gc(net);
1473b886d5f2SPaolo Abeni 	}
147435732d01SWei Wang 
147535732d01SWei Wang 	return err;
147635732d01SWei Wang }
147735732d01SWei Wang 
147835732d01SWei Wang void rt6_flush_exceptions(struct rt6_info *rt)
147935732d01SWei Wang {
148035732d01SWei Wang 	struct rt6_exception_bucket *bucket;
148135732d01SWei Wang 	struct rt6_exception *rt6_ex;
148235732d01SWei Wang 	struct hlist_node *tmp;
148335732d01SWei Wang 	int i;
148435732d01SWei Wang 
148535732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
148635732d01SWei Wang 	/* Prevent rt6_insert_exception() to recreate the bucket list */
148735732d01SWei Wang 	rt->exception_bucket_flushed = 1;
148835732d01SWei Wang 
148935732d01SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
149035732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
149135732d01SWei Wang 	if (!bucket)
149235732d01SWei Wang 		goto out;
149335732d01SWei Wang 
149435732d01SWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
149535732d01SWei Wang 		hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
149635732d01SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
149735732d01SWei Wang 		WARN_ON_ONCE(bucket->depth);
149835732d01SWei Wang 		bucket++;
149935732d01SWei Wang 	}
150035732d01SWei Wang 
150135732d01SWei Wang out:
150235732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
150335732d01SWei Wang }
150435732d01SWei Wang 
150535732d01SWei Wang /* Find cached rt in the hash table inside passed in rt
150635732d01SWei Wang  * Caller has to hold rcu_read_lock()
150735732d01SWei Wang  */
150835732d01SWei Wang static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
150935732d01SWei Wang 					   struct in6_addr *daddr,
151035732d01SWei Wang 					   struct in6_addr *saddr)
151135732d01SWei Wang {
151235732d01SWei Wang 	struct rt6_exception_bucket *bucket;
151335732d01SWei Wang 	struct in6_addr *src_key = NULL;
151435732d01SWei Wang 	struct rt6_exception *rt6_ex;
151535732d01SWei Wang 	struct rt6_info *res = NULL;
151635732d01SWei Wang 
151735732d01SWei Wang 	bucket = rcu_dereference(rt->rt6i_exception_bucket);
151835732d01SWei Wang 
151935732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
152035732d01SWei Wang 	/* rt6i_src.plen != 0 indicates rt is in subtree
152135732d01SWei Wang 	 * and exception table is indexed by a hash of
152235732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
152335732d01SWei Wang 	 * Otherwise, the exception table is indexed by
152435732d01SWei Wang 	 * a hash of only rt6i_dst.
152535732d01SWei Wang 	 */
152635732d01SWei Wang 	if (rt->rt6i_src.plen)
152735732d01SWei Wang 		src_key = saddr;
152835732d01SWei Wang #endif
152935732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
153035732d01SWei Wang 
153135732d01SWei Wang 	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
153235732d01SWei Wang 		res = rt6_ex->rt6i;
153335732d01SWei Wang 
153435732d01SWei Wang 	return res;
153535732d01SWei Wang }
153635732d01SWei Wang 
153735732d01SWei Wang /* Remove the passed in cached rt from the hash table that contains it */
153835732d01SWei Wang int rt6_remove_exception_rt(struct rt6_info *rt)
153935732d01SWei Wang {
154035732d01SWei Wang 	struct rt6_exception_bucket *bucket;
15413a2232e9SDavid Miller 	struct rt6_info *from = rt->from;
154235732d01SWei Wang 	struct in6_addr *src_key = NULL;
154335732d01SWei Wang 	struct rt6_exception *rt6_ex;
154435732d01SWei Wang 	int err;
154535732d01SWei Wang 
154635732d01SWei Wang 	if (!from ||
1547442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
154835732d01SWei Wang 		return -EINVAL;
154935732d01SWei Wang 
155035732d01SWei Wang 	if (!rcu_access_pointer(from->rt6i_exception_bucket))
155135732d01SWei Wang 		return -ENOENT;
155235732d01SWei Wang 
155335732d01SWei Wang 	spin_lock_bh(&rt6_exception_lock);
155435732d01SWei Wang 	bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
155535732d01SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
155635732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
155735732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
155835732d01SWei Wang 	 * and exception table is indexed by a hash of
155935732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
156035732d01SWei Wang 	 * Otherwise, the exception table is indexed by
156135732d01SWei Wang 	 * a hash of only rt6i_dst.
156235732d01SWei Wang 	 */
156335732d01SWei Wang 	if (from->rt6i_src.plen)
156435732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
156535732d01SWei Wang #endif
156635732d01SWei Wang 	rt6_ex = __rt6_find_exception_spinlock(&bucket,
156735732d01SWei Wang 					       &rt->rt6i_dst.addr,
156835732d01SWei Wang 					       src_key);
156935732d01SWei Wang 	if (rt6_ex) {
157035732d01SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
157135732d01SWei Wang 		err = 0;
157235732d01SWei Wang 	} else {
157335732d01SWei Wang 		err = -ENOENT;
157435732d01SWei Wang 	}
157535732d01SWei Wang 
157635732d01SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
157735732d01SWei Wang 	return err;
157835732d01SWei Wang }
157935732d01SWei Wang 
158035732d01SWei Wang /* Find rt6_ex which contains the passed in rt cache and
158135732d01SWei Wang  * refresh its stamp
158235732d01SWei Wang  */
158335732d01SWei Wang static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
158435732d01SWei Wang {
158535732d01SWei Wang 	struct rt6_exception_bucket *bucket;
15863a2232e9SDavid Miller 	struct rt6_info *from = rt->from;
158735732d01SWei Wang 	struct in6_addr *src_key = NULL;
158835732d01SWei Wang 	struct rt6_exception *rt6_ex;
158935732d01SWei Wang 
159035732d01SWei Wang 	if (!from ||
1591442d713bSColin Ian King 	    !(rt->rt6i_flags & RTF_CACHE))
159235732d01SWei Wang 		return;
159335732d01SWei Wang 
159435732d01SWei Wang 	rcu_read_lock();
159535732d01SWei Wang 	bucket = rcu_dereference(from->rt6i_exception_bucket);
159635732d01SWei Wang 
159735732d01SWei Wang #ifdef CONFIG_IPV6_SUBTREES
159835732d01SWei Wang 	/* rt6i_src.plen != 0 indicates 'from' is in subtree
159935732d01SWei Wang 	 * and exception table is indexed by a hash of
160035732d01SWei Wang 	 * both rt6i_dst and rt6i_src.
160135732d01SWei Wang 	 * Otherwise, the exception table is indexed by
160235732d01SWei Wang 	 * a hash of only rt6i_dst.
160335732d01SWei Wang 	 */
160435732d01SWei Wang 	if (from->rt6i_src.plen)
160535732d01SWei Wang 		src_key = &rt->rt6i_src.addr;
160635732d01SWei Wang #endif
160735732d01SWei Wang 	rt6_ex = __rt6_find_exception_rcu(&bucket,
160835732d01SWei Wang 					  &rt->rt6i_dst.addr,
160935732d01SWei Wang 					  src_key);
161035732d01SWei Wang 	if (rt6_ex)
161135732d01SWei Wang 		rt6_ex->stamp = jiffies;
161235732d01SWei Wang 
161335732d01SWei Wang 	rcu_read_unlock();
161435732d01SWei Wang }
161535732d01SWei Wang 
161660006a48SWei Wang static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
161760006a48SWei Wang {
161860006a48SWei Wang 	struct rt6_exception_bucket *bucket;
161960006a48SWei Wang 	struct rt6_exception *rt6_ex;
162060006a48SWei Wang 	int i;
162160006a48SWei Wang 
162260006a48SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
162360006a48SWei Wang 					lockdep_is_held(&rt6_exception_lock));
162460006a48SWei Wang 
162560006a48SWei Wang 	if (bucket) {
162660006a48SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
162760006a48SWei Wang 			hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
162860006a48SWei Wang 				rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
162960006a48SWei Wang 			}
163060006a48SWei Wang 			bucket++;
163160006a48SWei Wang 		}
163260006a48SWei Wang 	}
163360006a48SWei Wang }
163460006a48SWei Wang 
1635e9fa1495SStefano Brivio static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1636e9fa1495SStefano Brivio 					 struct rt6_info *rt, int mtu)
1637e9fa1495SStefano Brivio {
1638e9fa1495SStefano Brivio 	/* If the new MTU is lower than the route PMTU, this new MTU will be the
1639e9fa1495SStefano Brivio 	 * lowest MTU in the path: always allow updating the route PMTU to
1640e9fa1495SStefano Brivio 	 * reflect PMTU decreases.
1641e9fa1495SStefano Brivio 	 *
1642e9fa1495SStefano Brivio 	 * If the new MTU is higher, and the route PMTU is equal to the local
1643e9fa1495SStefano Brivio 	 * MTU, this means the old MTU is the lowest in the path, so allow
1644e9fa1495SStefano Brivio 	 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1645e9fa1495SStefano Brivio 	 * handle this.
1646e9fa1495SStefano Brivio 	 */
1647e9fa1495SStefano Brivio 
1648e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) >= mtu)
1649e9fa1495SStefano Brivio 		return true;
1650e9fa1495SStefano Brivio 
1651e9fa1495SStefano Brivio 	if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1652e9fa1495SStefano Brivio 		return true;
1653e9fa1495SStefano Brivio 
1654e9fa1495SStefano Brivio 	return false;
1655e9fa1495SStefano Brivio }
1656e9fa1495SStefano Brivio 
1657e9fa1495SStefano Brivio static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1658e9fa1495SStefano Brivio 				       struct rt6_info *rt, int mtu)
1659f5bbe7eeSWei Wang {
1660f5bbe7eeSWei Wang 	struct rt6_exception_bucket *bucket;
1661f5bbe7eeSWei Wang 	struct rt6_exception *rt6_ex;
1662f5bbe7eeSWei Wang 	int i;
1663f5bbe7eeSWei Wang 
1664f5bbe7eeSWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1665f5bbe7eeSWei Wang 					lockdep_is_held(&rt6_exception_lock));
1666f5bbe7eeSWei Wang 
1667e9fa1495SStefano Brivio 	if (!bucket)
1668e9fa1495SStefano Brivio 		return;
1669e9fa1495SStefano Brivio 
1670f5bbe7eeSWei Wang 	for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1671f5bbe7eeSWei Wang 		hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1672f5bbe7eeSWei Wang 			struct rt6_info *entry = rt6_ex->rt6i;
1673e9fa1495SStefano Brivio 
1674e9fa1495SStefano Brivio 			/* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1675*d4ead6b3SDavid Ahern 			 * route), the metrics of its rt->from have already
1676f5bbe7eeSWei Wang 			 * been updated.
1677f5bbe7eeSWei Wang 			 */
1678*d4ead6b3SDavid Ahern 			if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
1679e9fa1495SStefano Brivio 			    rt6_mtu_change_route_allowed(idev, entry, mtu))
1680*d4ead6b3SDavid Ahern 				dst_metric_set(&entry->dst, RTAX_MTU, mtu);
1681f5bbe7eeSWei Wang 		}
1682f5bbe7eeSWei Wang 		bucket++;
1683f5bbe7eeSWei Wang 	}
1684f5bbe7eeSWei Wang }
1685f5bbe7eeSWei Wang 
1686b16cb459SWei Wang #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
1687b16cb459SWei Wang 
1688b16cb459SWei Wang static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1689b16cb459SWei Wang 					struct in6_addr *gateway)
1690b16cb459SWei Wang {
1691b16cb459SWei Wang 	struct rt6_exception_bucket *bucket;
1692b16cb459SWei Wang 	struct rt6_exception *rt6_ex;
1693b16cb459SWei Wang 	struct hlist_node *tmp;
1694b16cb459SWei Wang 	int i;
1695b16cb459SWei Wang 
1696b16cb459SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1697b16cb459SWei Wang 		return;
1698b16cb459SWei Wang 
1699b16cb459SWei Wang 	spin_lock_bh(&rt6_exception_lock);
1700b16cb459SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1701b16cb459SWei Wang 				     lockdep_is_held(&rt6_exception_lock));
1702b16cb459SWei Wang 
1703b16cb459SWei Wang 	if (bucket) {
1704b16cb459SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1705b16cb459SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1706b16cb459SWei Wang 						  &bucket->chain, hlist) {
1707b16cb459SWei Wang 				struct rt6_info *entry = rt6_ex->rt6i;
1708b16cb459SWei Wang 
1709b16cb459SWei Wang 				if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1710b16cb459SWei Wang 				    RTF_CACHE_GATEWAY &&
1711b16cb459SWei Wang 				    ipv6_addr_equal(gateway,
1712b16cb459SWei Wang 						    &entry->rt6i_gateway)) {
1713b16cb459SWei Wang 					rt6_remove_exception(bucket, rt6_ex);
1714b16cb459SWei Wang 				}
1715b16cb459SWei Wang 			}
1716b16cb459SWei Wang 			bucket++;
1717b16cb459SWei Wang 		}
1718b16cb459SWei Wang 	}
1719b16cb459SWei Wang 
1720b16cb459SWei Wang 	spin_unlock_bh(&rt6_exception_lock);
1721b16cb459SWei Wang }
1722b16cb459SWei Wang 
1723c757faa8SWei Wang static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1724c757faa8SWei Wang 				      struct rt6_exception *rt6_ex,
1725c757faa8SWei Wang 				      struct fib6_gc_args *gc_args,
1726c757faa8SWei Wang 				      unsigned long now)
1727c757faa8SWei Wang {
1728c757faa8SWei Wang 	struct rt6_info *rt = rt6_ex->rt6i;
1729c757faa8SWei Wang 
17301859bac0SPaolo Abeni 	/* we are pruning and obsoleting aged-out and non gateway exceptions
17311859bac0SPaolo Abeni 	 * even if others have still references to them, so that on next
17321859bac0SPaolo Abeni 	 * dst_check() such references can be dropped.
17331859bac0SPaolo Abeni 	 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
17341859bac0SPaolo Abeni 	 * expired, independently from their aging, as per RFC 8201 section 4
17351859bac0SPaolo Abeni 	 */
173631afeb42SWei Wang 	if (!(rt->rt6i_flags & RTF_EXPIRES)) {
173731afeb42SWei Wang 		if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1738c757faa8SWei Wang 			RT6_TRACE("aging clone %p\n", rt);
1739c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1740c757faa8SWei Wang 			return;
174131afeb42SWei Wang 		}
174231afeb42SWei Wang 	} else if (time_after(jiffies, rt->dst.expires)) {
174331afeb42SWei Wang 		RT6_TRACE("purging expired route %p\n", rt);
174431afeb42SWei Wang 		rt6_remove_exception(bucket, rt6_ex);
174531afeb42SWei Wang 		return;
174631afeb42SWei Wang 	}
174731afeb42SWei Wang 
174831afeb42SWei Wang 	if (rt->rt6i_flags & RTF_GATEWAY) {
1749c757faa8SWei Wang 		struct neighbour *neigh;
1750c757faa8SWei Wang 		__u8 neigh_flags = 0;
1751c757faa8SWei Wang 
17521bfa26ffSEric Dumazet 		neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
17531bfa26ffSEric Dumazet 		if (neigh)
1754c757faa8SWei Wang 			neigh_flags = neigh->flags;
17551bfa26ffSEric Dumazet 
1756c757faa8SWei Wang 		if (!(neigh_flags & NTF_ROUTER)) {
1757c757faa8SWei Wang 			RT6_TRACE("purging route %p via non-router but gateway\n",
1758c757faa8SWei Wang 				  rt);
1759c757faa8SWei Wang 			rt6_remove_exception(bucket, rt6_ex);
1760c757faa8SWei Wang 			return;
1761c757faa8SWei Wang 		}
1762c757faa8SWei Wang 	}
176331afeb42SWei Wang 
1764c757faa8SWei Wang 	gc_args->more++;
1765c757faa8SWei Wang }
1766c757faa8SWei Wang 
1767c757faa8SWei Wang void rt6_age_exceptions(struct rt6_info *rt,
1768c757faa8SWei Wang 			struct fib6_gc_args *gc_args,
1769c757faa8SWei Wang 			unsigned long now)
1770c757faa8SWei Wang {
1771c757faa8SWei Wang 	struct rt6_exception_bucket *bucket;
1772c757faa8SWei Wang 	struct rt6_exception *rt6_ex;
1773c757faa8SWei Wang 	struct hlist_node *tmp;
1774c757faa8SWei Wang 	int i;
1775c757faa8SWei Wang 
1776c757faa8SWei Wang 	if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1777c757faa8SWei Wang 		return;
1778c757faa8SWei Wang 
17791bfa26ffSEric Dumazet 	rcu_read_lock_bh();
17801bfa26ffSEric Dumazet 	spin_lock(&rt6_exception_lock);
1781c757faa8SWei Wang 	bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1782c757faa8SWei Wang 				    lockdep_is_held(&rt6_exception_lock));
1783c757faa8SWei Wang 
1784c757faa8SWei Wang 	if (bucket) {
1785c757faa8SWei Wang 		for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1786c757faa8SWei Wang 			hlist_for_each_entry_safe(rt6_ex, tmp,
1787c757faa8SWei Wang 						  &bucket->chain, hlist) {
1788c757faa8SWei Wang 				rt6_age_examine_exception(bucket, rt6_ex,
1789c757faa8SWei Wang 							  gc_args, now);
1790c757faa8SWei Wang 			}
1791c757faa8SWei Wang 			bucket++;
1792c757faa8SWei Wang 		}
1793c757faa8SWei Wang 	}
17941bfa26ffSEric Dumazet 	spin_unlock(&rt6_exception_lock);
17951bfa26ffSEric Dumazet 	rcu_read_unlock_bh();
1796c757faa8SWei Wang }
1797c757faa8SWei Wang 
17989ff74384SDavid Ahern struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1799b75cc8f9SDavid Ahern 			       int oif, struct flowi6 *fl6,
1800b75cc8f9SDavid Ahern 			       const struct sk_buff *skb, int flags)
18011da177e4SLinus Torvalds {
1802367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
18032b760fcfSWei Wang 	struct rt6_info *rt, *rt_cache;
1804c71099acSThomas Graf 	int strict = 0;
18051da177e4SLinus Torvalds 
180677d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
1807d5d32e4bSDavid Ahern 	strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1808367efcb9SMartin KaFai Lau 	if (net->ipv6.devconf_all->forwarding == 0)
1809367efcb9SMartin KaFai Lau 		strict |= RT6_LOOKUP_F_REACHABLE;
18101da177e4SLinus Torvalds 
181166f5d6ceSWei Wang 	rcu_read_lock();
18121da177e4SLinus Torvalds 
18134c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1814367efcb9SMartin KaFai Lau 	saved_fn = fn;
18151da177e4SLinus Torvalds 
1816ca254490SDavid Ahern 	if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1817ca254490SDavid Ahern 		oif = 0;
1818ca254490SDavid Ahern 
1819a3c00e46SMartin KaFai Lau redo_rt6_select:
18208d1040e8SWei Wang 	rt = rt6_select(net, fn, oif, strict);
182152bd4c0cSNicolas Dichtel 	if (rt->rt6i_nsiblings)
1822b4bac172SDavid Ahern 		rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
1823a3c00e46SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
1824a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1825a3c00e46SMartin KaFai Lau 		if (fn)
1826a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1827367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1828367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1829367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1830367efcb9SMartin KaFai Lau 			fn = saved_fn;
1831367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1832367efcb9SMartin KaFai Lau 		}
1833a3c00e46SMartin KaFai Lau 	}
1834a3c00e46SMartin KaFai Lau 
18352b760fcfSWei Wang 	/*Search through exception table */
18362b760fcfSWei Wang 	rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
18372b760fcfSWei Wang 	if (rt_cache)
18382b760fcfSWei Wang 		rt = rt_cache;
1839d52d3997SMartin KaFai Lau 
1840d3843fe5SWei Wang 	if (rt == net->ipv6.ip6_null_entry) {
184166f5d6ceSWei Wang 		rcu_read_unlock();
1842d3843fe5SWei Wang 		dst_hold(&rt->dst);
1843b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, rt, table, fl6);
1844d3843fe5SWei Wang 		return rt;
1845d3843fe5SWei Wang 	} else if (rt->rt6i_flags & RTF_CACHE) {
1846*d4ead6b3SDavid Ahern 		if (ip6_hold_safe(net, &rt, true))
1847d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
1848*d4ead6b3SDavid Ahern 
184966f5d6ceSWei Wang 		rcu_read_unlock();
1850b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, rt, table, fl6);
1851d52d3997SMartin KaFai Lau 		return rt;
18523da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
18533da59bd9SMartin KaFai Lau 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
18543da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
18553da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
18563da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
18573da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
18583da59bd9SMartin KaFai Lau 		 */
1859c71099acSThomas Graf 
18603da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
18613da59bd9SMartin KaFai Lau 
1862d3843fe5SWei Wang 		if (ip6_hold_safe(net, &rt, true)) {
1863d3843fe5SWei Wang 			dst_use_noref(&rt->dst, jiffies);
1864d3843fe5SWei Wang 		} else {
186566f5d6ceSWei Wang 			rcu_read_unlock();
1866d3843fe5SWei Wang 			uncached_rt = rt;
1867d3843fe5SWei Wang 			goto uncached_rt_out;
1868d3843fe5SWei Wang 		}
186966f5d6ceSWei Wang 		rcu_read_unlock();
1870d52d3997SMartin KaFai Lau 
18713da59bd9SMartin KaFai Lau 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
18723da59bd9SMartin KaFai Lau 		dst_release(&rt->dst);
18733da59bd9SMartin KaFai Lau 
18741cfb71eeSWei Wang 		if (uncached_rt) {
18751cfb71eeSWei Wang 			/* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
18761cfb71eeSWei Wang 			 * No need for another dst_hold()
18771cfb71eeSWei Wang 			 */
18788d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
187981eb8447SWei Wang 			atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
18801cfb71eeSWei Wang 		} else {
18813da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
18823da59bd9SMartin KaFai Lau 			dst_hold(&uncached_rt->dst);
18831cfb71eeSWei Wang 		}
1884b811580dSDavid Ahern 
1885d3843fe5SWei Wang uncached_rt_out:
1886b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, uncached_rt, table, fl6);
18873da59bd9SMartin KaFai Lau 		return uncached_rt;
18883da59bd9SMartin KaFai Lau 
1889d52d3997SMartin KaFai Lau 	} else {
1890d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1891d52d3997SMartin KaFai Lau 
1892d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1893d52d3997SMartin KaFai Lau 
1894d3843fe5SWei Wang 		dst_use_noref(&rt->dst, jiffies);
1895951f788aSEric Dumazet 		local_bh_disable();
1896d52d3997SMartin KaFai Lau 		pcpu_rt = rt6_get_pcpu_route(rt);
1897d52d3997SMartin KaFai Lau 
1898951f788aSEric Dumazet 		if (!pcpu_rt) {
1899a94b9367SWei Wang 			/* atomic_inc_not_zero() is needed when using rcu */
1900a94b9367SWei Wang 			if (atomic_inc_not_zero(&rt->rt6i_ref)) {
1901951f788aSEric Dumazet 				/* No dst_hold() on rt is needed because grabbing
1902a94b9367SWei Wang 				 * rt->rt6i_ref makes sure rt can't be released.
19039c7370a1SMartin KaFai Lau 				 */
1904afb1d4b5SDavid Ahern 				pcpu_rt = rt6_make_pcpu_route(net, rt);
1905a94b9367SWei Wang 				rt6_release(rt);
1906a94b9367SWei Wang 			} else {
1907a94b9367SWei Wang 				/* rt is already removed from tree */
1908a94b9367SWei Wang 				pcpu_rt = net->ipv6.ip6_null_entry;
1909a94b9367SWei Wang 				dst_hold(&pcpu_rt->dst);
1910a94b9367SWei Wang 			}
19119c7370a1SMartin KaFai Lau 		}
1912951f788aSEric Dumazet 		local_bh_enable();
1913951f788aSEric Dumazet 		rcu_read_unlock();
1914b65f164dSPaolo Abeni 		trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
1915d52d3997SMartin KaFai Lau 		return pcpu_rt;
1916d52d3997SMartin KaFai Lau 	}
1917c71099acSThomas Graf }
19189ff74384SDavid Ahern EXPORT_SYMBOL_GPL(ip6_pol_route);
1919c71099acSThomas Graf 
1920b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_input(struct net *net,
1921b75cc8f9SDavid Ahern 					    struct fib6_table *table,
1922b75cc8f9SDavid Ahern 					    struct flowi6 *fl6,
1923b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
1924b75cc8f9SDavid Ahern 					    int flags)
19254acad72dSPavel Emelyanov {
1926b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
19274acad72dSPavel Emelyanov }
19284acad72dSPavel Emelyanov 
1929d409b847SMahesh Bandewar struct dst_entry *ip6_route_input_lookup(struct net *net,
193072331bc0SShmulik Ladkani 					 struct net_device *dev,
1931b75cc8f9SDavid Ahern 					 struct flowi6 *fl6,
1932b75cc8f9SDavid Ahern 					 const struct sk_buff *skb,
1933b75cc8f9SDavid Ahern 					 int flags)
193472331bc0SShmulik Ladkani {
193572331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
193672331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
193772331bc0SShmulik Ladkani 
1938b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
193972331bc0SShmulik Ladkani }
1940d409b847SMahesh Bandewar EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
194172331bc0SShmulik Ladkani 
194223aebdacSJakub Sitnicki static void ip6_multipath_l3_keys(const struct sk_buff *skb,
19435e5d6fedSRoopa Prabhu 				  struct flow_keys *keys,
19445e5d6fedSRoopa Prabhu 				  struct flow_keys *flkeys)
194523aebdacSJakub Sitnicki {
194623aebdacSJakub Sitnicki 	const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
194723aebdacSJakub Sitnicki 	const struct ipv6hdr *key_iph = outer_iph;
19485e5d6fedSRoopa Prabhu 	struct flow_keys *_flkeys = flkeys;
194923aebdacSJakub Sitnicki 	const struct ipv6hdr *inner_iph;
195023aebdacSJakub Sitnicki 	const struct icmp6hdr *icmph;
195123aebdacSJakub Sitnicki 	struct ipv6hdr _inner_iph;
195223aebdacSJakub Sitnicki 
195323aebdacSJakub Sitnicki 	if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
195423aebdacSJakub Sitnicki 		goto out;
195523aebdacSJakub Sitnicki 
195623aebdacSJakub Sitnicki 	icmph = icmp6_hdr(skb);
195723aebdacSJakub Sitnicki 	if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
195823aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
195923aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
196023aebdacSJakub Sitnicki 	    icmph->icmp6_type != ICMPV6_PARAMPROB)
196123aebdacSJakub Sitnicki 		goto out;
196223aebdacSJakub Sitnicki 
196323aebdacSJakub Sitnicki 	inner_iph = skb_header_pointer(skb,
196423aebdacSJakub Sitnicki 				       skb_transport_offset(skb) + sizeof(*icmph),
196523aebdacSJakub Sitnicki 				       sizeof(_inner_iph), &_inner_iph);
196623aebdacSJakub Sitnicki 	if (!inner_iph)
196723aebdacSJakub Sitnicki 		goto out;
196823aebdacSJakub Sitnicki 
196923aebdacSJakub Sitnicki 	key_iph = inner_iph;
19705e5d6fedSRoopa Prabhu 	_flkeys = NULL;
197123aebdacSJakub Sitnicki out:
19725e5d6fedSRoopa Prabhu 	if (_flkeys) {
19735e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
19745e5d6fedSRoopa Prabhu 		keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
19755e5d6fedSRoopa Prabhu 		keys->tags.flow_label = _flkeys->tags.flow_label;
19765e5d6fedSRoopa Prabhu 		keys->basic.ip_proto = _flkeys->basic.ip_proto;
19775e5d6fedSRoopa Prabhu 	} else {
197823aebdacSJakub Sitnicki 		keys->addrs.v6addrs.src = key_iph->saddr;
197923aebdacSJakub Sitnicki 		keys->addrs.v6addrs.dst = key_iph->daddr;
198023aebdacSJakub Sitnicki 		keys->tags.flow_label = ip6_flowinfo(key_iph);
198123aebdacSJakub Sitnicki 		keys->basic.ip_proto = key_iph->nexthdr;
198223aebdacSJakub Sitnicki 	}
19835e5d6fedSRoopa Prabhu }
198423aebdacSJakub Sitnicki 
198523aebdacSJakub Sitnicki /* if skb is set it will be used and fl6 can be NULL */
1986b4bac172SDavid Ahern u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1987b4bac172SDavid Ahern 		       const struct sk_buff *skb, struct flow_keys *flkeys)
198823aebdacSJakub Sitnicki {
198923aebdacSJakub Sitnicki 	struct flow_keys hash_keys;
19909a2a537aSDavid Ahern 	u32 mhash;
199123aebdacSJakub Sitnicki 
1992bbfa047aSDavid S. Miller 	switch (ip6_multipath_hash_policy(net)) {
1993b4bac172SDavid Ahern 	case 0:
19946f74b6c2SDavid Ahern 		memset(&hash_keys, 0, sizeof(hash_keys));
19956f74b6c2SDavid Ahern 		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
19969a2a537aSDavid Ahern 		if (skb) {
19975e5d6fedSRoopa Prabhu 			ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
19989a2a537aSDavid Ahern 		} else {
19999a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
20009a2a537aSDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
20019a2a537aSDavid Ahern 			hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
20029a2a537aSDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
200323aebdacSJakub Sitnicki 		}
2004b4bac172SDavid Ahern 		break;
2005b4bac172SDavid Ahern 	case 1:
2006b4bac172SDavid Ahern 		if (skb) {
2007b4bac172SDavid Ahern 			unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2008b4bac172SDavid Ahern 			struct flow_keys keys;
2009b4bac172SDavid Ahern 
2010b4bac172SDavid Ahern 			/* short-circuit if we already have L4 hash present */
2011b4bac172SDavid Ahern 			if (skb->l4_hash)
2012b4bac172SDavid Ahern 				return skb_get_hash_raw(skb) >> 1;
2013b4bac172SDavid Ahern 
2014b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2015b4bac172SDavid Ahern 
2016b4bac172SDavid Ahern                         if (!flkeys) {
2017b4bac172SDavid Ahern 				skb_flow_dissect_flow_keys(skb, &keys, flag);
2018b4bac172SDavid Ahern 				flkeys = &keys;
2019b4bac172SDavid Ahern 			}
2020b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2021b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2022b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2023b4bac172SDavid Ahern 			hash_keys.ports.src = flkeys->ports.src;
2024b4bac172SDavid Ahern 			hash_keys.ports.dst = flkeys->ports.dst;
2025b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2026b4bac172SDavid Ahern 		} else {
2027b4bac172SDavid Ahern 			memset(&hash_keys, 0, sizeof(hash_keys));
2028b4bac172SDavid Ahern 			hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2029b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.src = fl6->saddr;
2030b4bac172SDavid Ahern 			hash_keys.addrs.v6addrs.dst = fl6->daddr;
2031b4bac172SDavid Ahern 			hash_keys.ports.src = fl6->fl6_sport;
2032b4bac172SDavid Ahern 			hash_keys.ports.dst = fl6->fl6_dport;
2033b4bac172SDavid Ahern 			hash_keys.basic.ip_proto = fl6->flowi6_proto;
2034b4bac172SDavid Ahern 		}
2035b4bac172SDavid Ahern 		break;
2036b4bac172SDavid Ahern 	}
20379a2a537aSDavid Ahern 	mhash = flow_hash_from_keys(&hash_keys);
203823aebdacSJakub Sitnicki 
20399a2a537aSDavid Ahern 	return mhash >> 1;
204023aebdacSJakub Sitnicki }
204123aebdacSJakub Sitnicki 
2042c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
2043c71099acSThomas Graf {
2044b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2045c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
2046adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2047904af04dSJiri Benc 	struct ip_tunnel_info *tun_info;
20484c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
2049e0d56fddSDavid Ahern 		.flowi6_iif = skb->dev->ifindex,
20504c9483b2SDavid S. Miller 		.daddr = iph->daddr,
20514c9483b2SDavid S. Miller 		.saddr = iph->saddr,
20526502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
20534c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
20544c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
2055c71099acSThomas Graf 	};
20565e5d6fedSRoopa Prabhu 	struct flow_keys *flkeys = NULL, _flkeys;
2057adaa70bbSThomas Graf 
2058904af04dSJiri Benc 	tun_info = skb_tunnel_info(skb);
205946fa062aSJiri Benc 	if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
2060904af04dSJiri Benc 		fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
20615e5d6fedSRoopa Prabhu 
20625e5d6fedSRoopa Prabhu 	if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
20635e5d6fedSRoopa Prabhu 		flkeys = &_flkeys;
20645e5d6fedSRoopa Prabhu 
206523aebdacSJakub Sitnicki 	if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
2066b4bac172SDavid Ahern 		fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
206706e9d040SJiri Benc 	skb_dst_drop(skb);
2068b75cc8f9SDavid Ahern 	skb_dst_set(skb,
2069b75cc8f9SDavid Ahern 		    ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
2070c71099acSThomas Graf }
2071c71099acSThomas Graf 
2072b75cc8f9SDavid Ahern static struct rt6_info *ip6_pol_route_output(struct net *net,
2073b75cc8f9SDavid Ahern 					     struct fib6_table *table,
2074b75cc8f9SDavid Ahern 					     struct flowi6 *fl6,
2075b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2076b75cc8f9SDavid Ahern 					     int flags)
2077c71099acSThomas Graf {
2078b75cc8f9SDavid Ahern 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
2079c71099acSThomas Graf }
2080c71099acSThomas Graf 
20816f21c96aSPaolo Abeni struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
20826f21c96aSPaolo Abeni 					 struct flowi6 *fl6, int flags)
2083c71099acSThomas Graf {
2084d46a9d67SDavid Ahern 	bool any_src;
2085c71099acSThomas Graf 
20864c1feac5SDavid Ahern 	if (rt6_need_strict(&fl6->daddr)) {
20874c1feac5SDavid Ahern 		struct dst_entry *dst;
20884c1feac5SDavid Ahern 
20894c1feac5SDavid Ahern 		dst = l3mdev_link_scope_lookup(net, fl6);
2090ca254490SDavid Ahern 		if (dst)
2091ca254490SDavid Ahern 			return dst;
20924c1feac5SDavid Ahern 	}
2093ca254490SDavid Ahern 
20941fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
20954dc27d1cSDavid McCullough 
2096d46a9d67SDavid Ahern 	any_src = ipv6_addr_any(&fl6->saddr);
2097741a11d9SDavid Ahern 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
2098d46a9d67SDavid Ahern 	    (fl6->flowi6_oif && any_src))
209977d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
2100c71099acSThomas Graf 
2101d46a9d67SDavid Ahern 	if (!any_src)
2102adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
21030c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
21040c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
2105adaa70bbSThomas Graf 
2106b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
21071da177e4SLinus Torvalds }
21086f21c96aSPaolo Abeni EXPORT_SYMBOL_GPL(ip6_route_output_flags);
21091da177e4SLinus Torvalds 
21102774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
211114e50e57SDavid S. Miller {
21125c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
21131dbe3252SWei Wang 	struct net_device *loopback_dev = net->loopback_dev;
211414e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
211514e50e57SDavid S. Miller 
21161dbe3252SWei Wang 	rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
211762cf27e5SSteffen Klassert 		       DST_OBSOLETE_DEAD, 0);
211814e50e57SDavid S. Miller 	if (rt) {
21190a1f5962SMartin KaFai Lau 		rt6_info_init(rt);
212081eb8447SWei Wang 		atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
21210a1f5962SMartin KaFai Lau 
2122d8d1f30bSChangli Gao 		new = &rt->dst;
212314e50e57SDavid S. Miller 		new->__use = 1;
2124352e512cSHerbert Xu 		new->input = dst_discard;
2125ede2059dSEric W. Biederman 		new->output = dst_discard_out;
212614e50e57SDavid S. Miller 
2127defb3519SDavid S. Miller 		dst_copy_metrics(new, &ort->dst);
212814e50e57SDavid S. Miller 
21291dbe3252SWei Wang 		rt->rt6i_idev = in6_dev_get(loopback_dev);
21304e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
21310a1f5962SMartin KaFai Lau 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
213214e50e57SDavid S. Miller 		rt->rt6i_metric = 0;
213314e50e57SDavid S. Miller 
213414e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
213514e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
213614e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
213714e50e57SDavid S. Miller #endif
213814e50e57SDavid S. Miller 	}
213914e50e57SDavid S. Miller 
214069ead7afSDavid S. Miller 	dst_release(dst_orig);
214169ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
214214e50e57SDavid S. Miller }
214314e50e57SDavid S. Miller 
21441da177e4SLinus Torvalds /*
21451da177e4SLinus Torvalds  *	Destination cache support functions
21461da177e4SLinus Torvalds  */
21471da177e4SLinus Torvalds 
21483da59bd9SMartin KaFai Lau static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
21493da59bd9SMartin KaFai Lau {
215036143645SSteffen Klassert 	u32 rt_cookie = 0;
2151c5cff856SWei Wang 
2152c5cff856SWei Wang 	if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
21533da59bd9SMartin KaFai Lau 		return NULL;
21543da59bd9SMartin KaFai Lau 
21553da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
21563da59bd9SMartin KaFai Lau 		return NULL;
21573da59bd9SMartin KaFai Lau 
21583da59bd9SMartin KaFai Lau 	return &rt->dst;
21593da59bd9SMartin KaFai Lau }
21603da59bd9SMartin KaFai Lau 
21613da59bd9SMartin KaFai Lau static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
21623da59bd9SMartin KaFai Lau {
21635973fb1eSMartin KaFai Lau 	if (!__rt6_check_expired(rt) &&
21645973fb1eSMartin KaFai Lau 	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
21653a2232e9SDavid Miller 	    rt6_check(rt->from, cookie))
21663da59bd9SMartin KaFai Lau 		return &rt->dst;
21673da59bd9SMartin KaFai Lau 	else
21683da59bd9SMartin KaFai Lau 		return NULL;
21693da59bd9SMartin KaFai Lau }
21703da59bd9SMartin KaFai Lau 
21711da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
21721da177e4SLinus Torvalds {
21731da177e4SLinus Torvalds 	struct rt6_info *rt;
21741da177e4SLinus Torvalds 
21751da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
21761da177e4SLinus Torvalds 
21776f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
21786f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
21796f3118b5SNicolas Dichtel 	 * into this function always.
21806f3118b5SNicolas Dichtel 	 */
2181e3bc10bdSHannes Frederic Sowa 
218202bcf4e0SMartin KaFai Lau 	if (rt->rt6i_flags & RTF_PCPU ||
21833a2232e9SDavid Miller 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
21843da59bd9SMartin KaFai Lau 		return rt6_dst_from_check(rt, cookie);
21853da59bd9SMartin KaFai Lau 	else
21863da59bd9SMartin KaFai Lau 		return rt6_check(rt, cookie);
21871da177e4SLinus Torvalds }
21881da177e4SLinus Torvalds 
21891da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
21901da177e4SLinus Torvalds {
21911da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
21921da177e4SLinus Torvalds 
21931da177e4SLinus Torvalds 	if (rt) {
219454c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
219554c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
2196afb1d4b5SDavid Ahern 				ip6_del_rt(dev_net(dst->dev), rt);
219754c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
21981da177e4SLinus Torvalds 			}
219954c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
220054c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
220154c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
220254c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
220354c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
220454c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
22051da177e4SLinus Torvalds }
22061da177e4SLinus Torvalds 
22071da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
22081da177e4SLinus Torvalds {
22091da177e4SLinus Torvalds 	struct rt6_info *rt;
22101da177e4SLinus Torvalds 
22113ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
22121da177e4SLinus Torvalds 
2213adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
22141da177e4SLinus Torvalds 	if (rt) {
22151eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
2216ad65a2f0SWei Wang 			if (dst_hold_safe(&rt->dst))
2217afb1d4b5SDavid Ahern 				ip6_del_rt(dev_net(rt->dst.dev), rt);
2218c5cff856SWei Wang 		} else {
2219c5cff856SWei Wang 			struct fib6_node *fn;
2220c5cff856SWei Wang 
2221c5cff856SWei Wang 			rcu_read_lock();
2222c5cff856SWei Wang 			fn = rcu_dereference(rt->rt6i_node);
2223c5cff856SWei Wang 			if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2224c5cff856SWei Wang 				fn->fn_sernum = -1;
2225c5cff856SWei Wang 			rcu_read_unlock();
22261da177e4SLinus Torvalds 		}
22271da177e4SLinus Torvalds 	}
22281eb4f758SHannes Frederic Sowa }
22291da177e4SLinus Torvalds 
223045e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
223145e4fd26SMartin KaFai Lau {
223245e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
223345e4fd26SMartin KaFai Lau 
2234*d4ead6b3SDavid Ahern 	dst_metric_set(&rt->dst, RTAX_MTU, mtu);
223545e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
223645e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
223745e4fd26SMartin KaFai Lau }
223845e4fd26SMartin KaFai Lau 
22390d3f6d29SMartin KaFai Lau static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
22400d3f6d29SMartin KaFai Lau {
22410d3f6d29SMartin KaFai Lau 	return !(rt->rt6i_flags & RTF_CACHE) &&
22424e587ea7SWei Wang 		(rt->rt6i_flags & RTF_PCPU ||
22434e587ea7SWei Wang 		 rcu_access_pointer(rt->rt6i_node));
22440d3f6d29SMartin KaFai Lau }
22450d3f6d29SMartin KaFai Lau 
224645e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
224745e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
22481da177e4SLinus Torvalds {
22490dec879fSJulian Anastasov 	const struct in6_addr *daddr, *saddr;
22501da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
22511da177e4SLinus Torvalds 
225245e4fd26SMartin KaFai Lau 	if (rt6->rt6i_flags & RTF_LOCAL)
225345e4fd26SMartin KaFai Lau 		return;
225445e4fd26SMartin KaFai Lau 
225519bda36cSXin Long 	if (dst_metric_locked(dst, RTAX_MTU))
225619bda36cSXin Long 		return;
225719bda36cSXin Long 
225845e4fd26SMartin KaFai Lau 	if (iph) {
225945e4fd26SMartin KaFai Lau 		daddr = &iph->daddr;
226045e4fd26SMartin KaFai Lau 		saddr = &iph->saddr;
226145e4fd26SMartin KaFai Lau 	} else if (sk) {
226245e4fd26SMartin KaFai Lau 		daddr = &sk->sk_v6_daddr;
226345e4fd26SMartin KaFai Lau 		saddr = &inet6_sk(sk)->saddr;
226445e4fd26SMartin KaFai Lau 	} else {
22650dec879fSJulian Anastasov 		daddr = NULL;
22660dec879fSJulian Anastasov 		saddr = NULL;
22671da177e4SLinus Torvalds 	}
22680dec879fSJulian Anastasov 	dst_confirm_neigh(dst, daddr);
22690dec879fSJulian Anastasov 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
22700dec879fSJulian Anastasov 	if (mtu >= dst_mtu(dst))
22710dec879fSJulian Anastasov 		return;
22720dec879fSJulian Anastasov 
22730dec879fSJulian Anastasov 	if (!rt6_cache_allowed_for_pmtu(rt6)) {
22740dec879fSJulian Anastasov 		rt6_do_update_pmtu(rt6, mtu);
22752b760fcfSWei Wang 		/* update rt6_ex->stamp for cache */
22762b760fcfSWei Wang 		if (rt6->rt6i_flags & RTF_CACHE)
22772b760fcfSWei Wang 			rt6_update_exception_stamp_rt(rt6);
22780dec879fSJulian Anastasov 	} else if (daddr) {
22790dec879fSJulian Anastasov 		struct rt6_info *nrt6;
22800dec879fSJulian Anastasov 
2281*d4ead6b3SDavid Ahern 		nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr);
228245e4fd26SMartin KaFai Lau 		if (nrt6) {
228345e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
2284*d4ead6b3SDavid Ahern 			if (rt6_insert_exception(nrt6, rt6->from))
22852b760fcfSWei Wang 				dst_release_immediate(&nrt6->dst);
228645e4fd26SMartin KaFai Lau 		}
228745e4fd26SMartin KaFai Lau 	}
228845e4fd26SMartin KaFai Lau }
228945e4fd26SMartin KaFai Lau 
229045e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
229145e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
229245e4fd26SMartin KaFai Lau {
229345e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
22941da177e4SLinus Torvalds }
22951da177e4SLinus Torvalds 
229642ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
2297e2d118a1SLorenzo Colitti 		     int oif, u32 mark, kuid_t uid)
229881aded24SDavid S. Miller {
229981aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
230081aded24SDavid S. Miller 	struct dst_entry *dst;
230181aded24SDavid S. Miller 	struct flowi6 fl6;
230281aded24SDavid S. Miller 
230381aded24SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
230481aded24SDavid S. Miller 	fl6.flowi6_oif = oif;
23051b3c61dcSLorenzo Colitti 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
230681aded24SDavid S. Miller 	fl6.daddr = iph->daddr;
230781aded24SDavid S. Miller 	fl6.saddr = iph->saddr;
23086502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
2309e2d118a1SLorenzo Colitti 	fl6.flowi6_uid = uid;
231081aded24SDavid S. Miller 
231181aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
231281aded24SDavid S. Miller 	if (!dst->error)
231345e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
231481aded24SDavid S. Miller 	dst_release(dst);
231581aded24SDavid S. Miller }
231681aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
231781aded24SDavid S. Miller 
231881aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
231981aded24SDavid S. Miller {
232033c162a9SMartin KaFai Lau 	struct dst_entry *dst;
232133c162a9SMartin KaFai Lau 
232281aded24SDavid S. Miller 	ip6_update_pmtu(skb, sock_net(sk), mtu,
2323e2d118a1SLorenzo Colitti 			sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
232433c162a9SMartin KaFai Lau 
232533c162a9SMartin KaFai Lau 	dst = __sk_dst_get(sk);
232633c162a9SMartin KaFai Lau 	if (!dst || !dst->obsolete ||
232733c162a9SMartin KaFai Lau 	    dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
232833c162a9SMartin KaFai Lau 		return;
232933c162a9SMartin KaFai Lau 
233033c162a9SMartin KaFai Lau 	bh_lock_sock(sk);
233133c162a9SMartin KaFai Lau 	if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
233233c162a9SMartin KaFai Lau 		ip6_datagram_dst_update(sk, false);
233333c162a9SMartin KaFai Lau 	bh_unlock_sock(sk);
233481aded24SDavid S. Miller }
233581aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
233681aded24SDavid S. Miller 
23377d6850f7SAlexey Kodanev void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
23387d6850f7SAlexey Kodanev 			   const struct flowi6 *fl6)
23397d6850f7SAlexey Kodanev {
23407d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
23417d6850f7SAlexey Kodanev 	struct ipv6_pinfo *np = inet6_sk(sk);
23427d6850f7SAlexey Kodanev #endif
23437d6850f7SAlexey Kodanev 
23447d6850f7SAlexey Kodanev 	ip6_dst_store(sk, dst,
23457d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
23467d6850f7SAlexey Kodanev 		      &sk->sk_v6_daddr : NULL,
23477d6850f7SAlexey Kodanev #ifdef CONFIG_IPV6_SUBTREES
23487d6850f7SAlexey Kodanev 		      ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
23497d6850f7SAlexey Kodanev 		      &np->saddr :
23507d6850f7SAlexey Kodanev #endif
23517d6850f7SAlexey Kodanev 		      NULL);
23527d6850f7SAlexey Kodanev }
23537d6850f7SAlexey Kodanev 
2354b55b76b2SDuan Jiong /* Handle redirects */
2355b55b76b2SDuan Jiong struct ip6rd_flowi {
2356b55b76b2SDuan Jiong 	struct flowi6 fl6;
2357b55b76b2SDuan Jiong 	struct in6_addr gateway;
2358b55b76b2SDuan Jiong };
2359b55b76b2SDuan Jiong 
2360b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
2361b55b76b2SDuan Jiong 					     struct fib6_table *table,
2362b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
2363b75cc8f9SDavid Ahern 					     const struct sk_buff *skb,
2364b55b76b2SDuan Jiong 					     int flags)
2365b55b76b2SDuan Jiong {
2366b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23672b760fcfSWei Wang 	struct rt6_info *rt, *rt_cache;
2368b55b76b2SDuan Jiong 	struct fib6_node *fn;
2369b55b76b2SDuan Jiong 
2370b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
237167c408cfSAlexander Alemayhu 	 * check if the redirect has come from appropriate router.
2372b55b76b2SDuan Jiong 	 *
2373b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
2374b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
2375b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
2376b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
2377b55b76b2SDuan Jiong 	 * routes.
2378b55b76b2SDuan Jiong 	 */
2379b55b76b2SDuan Jiong 
238066f5d6ceSWei Wang 	rcu_read_lock();
2381b55b76b2SDuan Jiong 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2382b55b76b2SDuan Jiong restart:
238366f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
23845e670d84SDavid Ahern 		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
23858067bb8cSIdo Schimmel 			continue;
2386b55b76b2SDuan Jiong 		if (rt6_check_expired(rt))
2387b55b76b2SDuan Jiong 			continue;
23886edb3c96SDavid Ahern 		if (rt->rt6i_flags & RTF_REJECT)
2389b55b76b2SDuan Jiong 			break;
2390b55b76b2SDuan Jiong 		if (!(rt->rt6i_flags & RTF_GATEWAY))
2391b55b76b2SDuan Jiong 			continue;
23925e670d84SDavid Ahern 		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
2393b55b76b2SDuan Jiong 			continue;
23942b760fcfSWei Wang 		/* rt_cache's gateway might be different from its 'parent'
23952b760fcfSWei Wang 		 * in the case of an ip redirect.
23962b760fcfSWei Wang 		 * So we keep searching in the exception table if the gateway
23972b760fcfSWei Wang 		 * is different.
23982b760fcfSWei Wang 		 */
23995e670d84SDavid Ahern 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
24002b760fcfSWei Wang 			rt_cache = rt6_find_cached_rt(rt,
24012b760fcfSWei Wang 						      &fl6->daddr,
24022b760fcfSWei Wang 						      &fl6->saddr);
24032b760fcfSWei Wang 			if (rt_cache &&
24042b760fcfSWei Wang 			    ipv6_addr_equal(&rdfl->gateway,
24052b760fcfSWei Wang 					    &rt_cache->rt6i_gateway)) {
24062b760fcfSWei Wang 				rt = rt_cache;
24072b760fcfSWei Wang 				break;
24082b760fcfSWei Wang 			}
2409b55b76b2SDuan Jiong 			continue;
24102b760fcfSWei Wang 		}
2411b55b76b2SDuan Jiong 		break;
2412b55b76b2SDuan Jiong 	}
2413b55b76b2SDuan Jiong 
2414b55b76b2SDuan Jiong 	if (!rt)
2415b55b76b2SDuan Jiong 		rt = net->ipv6.ip6_null_entry;
24166edb3c96SDavid Ahern 	else if (rt->rt6i_flags & RTF_REJECT) {
2417b55b76b2SDuan Jiong 		rt = net->ipv6.ip6_null_entry;
2418b0a1ba59SMartin KaFai Lau 		goto out;
2419b0a1ba59SMartin KaFai Lau 	}
2420b0a1ba59SMartin KaFai Lau 
2421b0a1ba59SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
2422a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
2423a3c00e46SMartin KaFai Lau 		if (fn)
2424a3c00e46SMartin KaFai Lau 			goto restart;
2425b55b76b2SDuan Jiong 	}
2426a3c00e46SMartin KaFai Lau 
2427b0a1ba59SMartin KaFai Lau out:
2428d3843fe5SWei Wang 	ip6_hold_safe(net, &rt, true);
2429b55b76b2SDuan Jiong 
243066f5d6ceSWei Wang 	rcu_read_unlock();
2431b55b76b2SDuan Jiong 
2432b65f164dSPaolo Abeni 	trace_fib6_table_lookup(net, rt, table, fl6);
2433b55b76b2SDuan Jiong 	return rt;
2434b55b76b2SDuan Jiong };
2435b55b76b2SDuan Jiong 
2436b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
2437b55b76b2SDuan Jiong 					    const struct flowi6 *fl6,
2438b75cc8f9SDavid Ahern 					    const struct sk_buff *skb,
2439b55b76b2SDuan Jiong 					    const struct in6_addr *gateway)
2440b55b76b2SDuan Jiong {
2441b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
2442b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
2443b55b76b2SDuan Jiong 
2444b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
2445b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
2446b55b76b2SDuan Jiong 
2447b75cc8f9SDavid Ahern 	return fib6_rule_lookup(net, &rdfl.fl6, skb,
2448b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
2449b55b76b2SDuan Jiong }
2450b55b76b2SDuan Jiong 
2451e2d118a1SLorenzo Colitti void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2452e2d118a1SLorenzo Colitti 		  kuid_t uid)
24533a5ad2eeSDavid S. Miller {
24543a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
24553a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
24563a5ad2eeSDavid S. Miller 	struct flowi6 fl6;
24573a5ad2eeSDavid S. Miller 
24583a5ad2eeSDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
2459e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
24603a5ad2eeSDavid S. Miller 	fl6.flowi6_oif = oif;
24613a5ad2eeSDavid S. Miller 	fl6.flowi6_mark = mark;
24623a5ad2eeSDavid S. Miller 	fl6.daddr = iph->daddr;
24633a5ad2eeSDavid S. Miller 	fl6.saddr = iph->saddr;
24646502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
2465e2d118a1SLorenzo Colitti 	fl6.flowi6_uid = uid;
24663a5ad2eeSDavid S. Miller 
2467b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
24686700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
24693a5ad2eeSDavid S. Miller 	dst_release(dst);
24703a5ad2eeSDavid S. Miller }
24713a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
24723a5ad2eeSDavid S. Miller 
2473c92a59ecSDuan Jiong void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2474c92a59ecSDuan Jiong 			    u32 mark)
2475c92a59ecSDuan Jiong {
2476c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
2477c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2478c92a59ecSDuan Jiong 	struct dst_entry *dst;
2479c92a59ecSDuan Jiong 	struct flowi6 fl6;
2480c92a59ecSDuan Jiong 
2481c92a59ecSDuan Jiong 	memset(&fl6, 0, sizeof(fl6));
2482e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
2483c92a59ecSDuan Jiong 	fl6.flowi6_oif = oif;
2484c92a59ecSDuan Jiong 	fl6.flowi6_mark = mark;
2485c92a59ecSDuan Jiong 	fl6.daddr = msg->dest;
2486c92a59ecSDuan Jiong 	fl6.saddr = iph->daddr;
2487e2d118a1SLorenzo Colitti 	fl6.flowi6_uid = sock_net_uid(net, NULL);
2488c92a59ecSDuan Jiong 
2489b75cc8f9SDavid Ahern 	dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
2490c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
2491c92a59ecSDuan Jiong 	dst_release(dst);
2492c92a59ecSDuan Jiong }
2493c92a59ecSDuan Jiong 
24943a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
24953a5ad2eeSDavid S. Miller {
2496e2d118a1SLorenzo Colitti 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2497e2d118a1SLorenzo Colitti 		     sk->sk_uid);
24983a5ad2eeSDavid S. Miller }
24993a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
25003a5ad2eeSDavid S. Miller 
25010dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
25021da177e4SLinus Torvalds {
25030dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
25040dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
25050dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
25060dbaee3bSDavid S. Miller 
25071da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
25081da177e4SLinus Torvalds 
25095578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
25105578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
25111da177e4SLinus Torvalds 
25121da177e4SLinus Torvalds 	/*
25131da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
25141da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
25151da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
25161da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
25171da177e4SLinus Torvalds 	 */
25181da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
25191da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
25201da177e4SLinus Torvalds 	return mtu;
25211da177e4SLinus Torvalds }
25221da177e4SLinus Torvalds 
2523ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
2524d33e4553SDavid S. Miller {
2525d33e4553SDavid S. Miller 	struct inet6_dev *idev;
2526*d4ead6b3SDavid Ahern 	unsigned int mtu;
2527618f9bc7SSteffen Klassert 
25284b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
25294b32b5adSMartin KaFai Lau 	if (mtu)
25304b32b5adSMartin KaFai Lau 		goto out;
25314b32b5adSMartin KaFai Lau 
2532618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
2533d33e4553SDavid S. Miller 
2534d33e4553SDavid S. Miller 	rcu_read_lock();
2535d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
2536d33e4553SDavid S. Miller 	if (idev)
2537d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
2538d33e4553SDavid S. Miller 	rcu_read_unlock();
2539d33e4553SDavid S. Miller 
254030f78d8eSEric Dumazet out:
254114972cbdSRoopa Prabhu 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
254214972cbdSRoopa Prabhu 
254314972cbdSRoopa Prabhu 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
2544d33e4553SDavid S. Miller }
2545d33e4553SDavid S. Miller 
25463b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
254787a11578SDavid S. Miller 				  struct flowi6 *fl6)
25481da177e4SLinus Torvalds {
254987a11578SDavid S. Miller 	struct dst_entry *dst;
25501da177e4SLinus Torvalds 	struct rt6_info *rt;
25511da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
2552c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
25531da177e4SLinus Torvalds 
255438308473SDavid S. Miller 	if (unlikely(!idev))
2555122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
25561da177e4SLinus Torvalds 
2557ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, dev, 0);
255838308473SDavid S. Miller 	if (unlikely(!rt)) {
25591da177e4SLinus Torvalds 		in6_dev_put(idev);
256087a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
25611da177e4SLinus Torvalds 		goto out;
25621da177e4SLinus Torvalds 	}
25631da177e4SLinus Torvalds 
25648e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
2565588753f1SBrendan McGrath 	rt->dst.input = ip6_input;
25668e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
2567550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
256887a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
25698e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
25708e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
257114edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
25721da177e4SLinus Torvalds 
25734c981e28SIdo Schimmel 	/* Add this dst into uncached_list so that rt6_disable_ip() can
2574587fea74SWei Wang 	 * do proper release of the net_device
2575587fea74SWei Wang 	 */
2576587fea74SWei Wang 	rt6_uncached_list_add(rt);
257781eb8447SWei Wang 	atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
25781da177e4SLinus Torvalds 
257987a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
258087a11578SDavid S. Miller 
25811da177e4SLinus Torvalds out:
258287a11578SDavid S. Miller 	return dst;
25831da177e4SLinus Torvalds }
25841da177e4SLinus Torvalds 
2585569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
25861da177e4SLinus Torvalds {
258786393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
25887019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
25897019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
25907019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
25917019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
25927019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
2593fc66f95cSEric Dumazet 	int entries;
25941da177e4SLinus Torvalds 
2595fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
259649a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
2597fc66f95cSEric Dumazet 	    entries <= rt_max_size)
25981da177e4SLinus Torvalds 		goto out;
25991da177e4SLinus Torvalds 
26006891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
260114956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
2602fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
2603fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
26047019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
26051da177e4SLinus Torvalds out:
26067019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
2607fc66f95cSEric Dumazet 	return entries > rt_max_size;
26081da177e4SLinus Torvalds }
26091da177e4SLinus Torvalds 
2610*d4ead6b3SDavid Ahern static int ip6_convert_metrics(struct net *net, struct rt6_info *rt,
2611*d4ead6b3SDavid Ahern 			       struct fib6_config *cfg)
2612e715b6d3SFlorian Westphal {
2613*d4ead6b3SDavid Ahern 	int err = 0;
2614e715b6d3SFlorian Westphal 
2615*d4ead6b3SDavid Ahern 	if (cfg->fc_mx) {
2616*d4ead6b3SDavid Ahern 		rt->fib6_metrics = kzalloc(sizeof(*rt->fib6_metrics),
2617*d4ead6b3SDavid Ahern 					   GFP_KERNEL);
2618*d4ead6b3SDavid Ahern 		if (unlikely(!rt->fib6_metrics))
2619e715b6d3SFlorian Westphal 			return -ENOMEM;
2620e715b6d3SFlorian Westphal 
2621*d4ead6b3SDavid Ahern 		refcount_set(&rt->fib6_metrics->refcnt, 1);
2622ea697639SDaniel Borkmann 
2623*d4ead6b3SDavid Ahern 		err = ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len,
2624*d4ead6b3SDavid Ahern 					 rt->fib6_metrics->metrics);
2625e715b6d3SFlorian Westphal 	}
2626e715b6d3SFlorian Westphal 
2627*d4ead6b3SDavid Ahern 	return err;
2628e715b6d3SFlorian Westphal }
26291da177e4SLinus Torvalds 
26308c14586fSDavid Ahern static struct rt6_info *ip6_nh_lookup_table(struct net *net,
26318c14586fSDavid Ahern 					    struct fib6_config *cfg,
2632f4797b33SDavid Ahern 					    const struct in6_addr *gw_addr,
2633f4797b33SDavid Ahern 					    u32 tbid, int flags)
26348c14586fSDavid Ahern {
26358c14586fSDavid Ahern 	struct flowi6 fl6 = {
26368c14586fSDavid Ahern 		.flowi6_oif = cfg->fc_ifindex,
26378c14586fSDavid Ahern 		.daddr = *gw_addr,
26388c14586fSDavid Ahern 		.saddr = cfg->fc_prefsrc,
26398c14586fSDavid Ahern 	};
26408c14586fSDavid Ahern 	struct fib6_table *table;
26418c14586fSDavid Ahern 	struct rt6_info *rt;
26428c14586fSDavid Ahern 
2643f4797b33SDavid Ahern 	table = fib6_get_table(net, tbid);
26448c14586fSDavid Ahern 	if (!table)
26458c14586fSDavid Ahern 		return NULL;
26468c14586fSDavid Ahern 
26478c14586fSDavid Ahern 	if (!ipv6_addr_any(&cfg->fc_prefsrc))
26488c14586fSDavid Ahern 		flags |= RT6_LOOKUP_F_HAS_SADDR;
26498c14586fSDavid Ahern 
2650f4797b33SDavid Ahern 	flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
2651b75cc8f9SDavid Ahern 	rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
26528c14586fSDavid Ahern 
26538c14586fSDavid Ahern 	/* if table lookup failed, fall back to full lookup */
26548c14586fSDavid Ahern 	if (rt == net->ipv6.ip6_null_entry) {
26558c14586fSDavid Ahern 		ip6_rt_put(rt);
26568c14586fSDavid Ahern 		rt = NULL;
26578c14586fSDavid Ahern 	}
26588c14586fSDavid Ahern 
26598c14586fSDavid Ahern 	return rt;
26608c14586fSDavid Ahern }
26618c14586fSDavid Ahern 
2662fc1e64e1SDavid Ahern static int ip6_route_check_nh_onlink(struct net *net,
2663fc1e64e1SDavid Ahern 				     struct fib6_config *cfg,
26649fbb704cSDavid Ahern 				     const struct net_device *dev,
2665fc1e64e1SDavid Ahern 				     struct netlink_ext_ack *extack)
2666fc1e64e1SDavid Ahern {
266744750f84SDavid Ahern 	u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
2668fc1e64e1SDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
2669fc1e64e1SDavid Ahern 	u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2670fc1e64e1SDavid Ahern 	struct rt6_info *grt;
2671fc1e64e1SDavid Ahern 	int err;
2672fc1e64e1SDavid Ahern 
2673fc1e64e1SDavid Ahern 	err = 0;
2674fc1e64e1SDavid Ahern 	grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2675fc1e64e1SDavid Ahern 	if (grt) {
267658e354c0SDavid Ahern 		if (!grt->dst.error &&
267758e354c0SDavid Ahern 		    (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
267844750f84SDavid Ahern 			NL_SET_ERR_MSG(extack,
267944750f84SDavid Ahern 				       "Nexthop has invalid gateway or device mismatch");
2680fc1e64e1SDavid Ahern 			err = -EINVAL;
2681fc1e64e1SDavid Ahern 		}
2682fc1e64e1SDavid Ahern 
2683fc1e64e1SDavid Ahern 		ip6_rt_put(grt);
2684fc1e64e1SDavid Ahern 	}
2685fc1e64e1SDavid Ahern 
2686fc1e64e1SDavid Ahern 	return err;
2687fc1e64e1SDavid Ahern }
2688fc1e64e1SDavid Ahern 
26891edce99fSDavid Ahern static int ip6_route_check_nh(struct net *net,
26901edce99fSDavid Ahern 			      struct fib6_config *cfg,
26911edce99fSDavid Ahern 			      struct net_device **_dev,
26921edce99fSDavid Ahern 			      struct inet6_dev **idev)
26931edce99fSDavid Ahern {
26941edce99fSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
26951edce99fSDavid Ahern 	struct net_device *dev = _dev ? *_dev : NULL;
26961edce99fSDavid Ahern 	struct rt6_info *grt = NULL;
26971edce99fSDavid Ahern 	int err = -EHOSTUNREACH;
26981edce99fSDavid Ahern 
26991edce99fSDavid Ahern 	if (cfg->fc_table) {
2700f4797b33SDavid Ahern 		int flags = RT6_LOOKUP_F_IFACE;
2701f4797b33SDavid Ahern 
2702f4797b33SDavid Ahern 		grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2703f4797b33SDavid Ahern 					  cfg->fc_table, flags);
27041edce99fSDavid Ahern 		if (grt) {
27051edce99fSDavid Ahern 			if (grt->rt6i_flags & RTF_GATEWAY ||
27061edce99fSDavid Ahern 			    (dev && dev != grt->dst.dev)) {
27071edce99fSDavid Ahern 				ip6_rt_put(grt);
27081edce99fSDavid Ahern 				grt = NULL;
27091edce99fSDavid Ahern 			}
27101edce99fSDavid Ahern 		}
27111edce99fSDavid Ahern 	}
27121edce99fSDavid Ahern 
27131edce99fSDavid Ahern 	if (!grt)
2714b75cc8f9SDavid Ahern 		grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
27151edce99fSDavid Ahern 
27161edce99fSDavid Ahern 	if (!grt)
27171edce99fSDavid Ahern 		goto out;
27181edce99fSDavid Ahern 
27191edce99fSDavid Ahern 	if (dev) {
27201edce99fSDavid Ahern 		if (dev != grt->dst.dev) {
27211edce99fSDavid Ahern 			ip6_rt_put(grt);
27221edce99fSDavid Ahern 			goto out;
27231edce99fSDavid Ahern 		}
27241edce99fSDavid Ahern 	} else {
27251edce99fSDavid Ahern 		*_dev = dev = grt->dst.dev;
27261edce99fSDavid Ahern 		*idev = grt->rt6i_idev;
27271edce99fSDavid Ahern 		dev_hold(dev);
27281edce99fSDavid Ahern 		in6_dev_hold(grt->rt6i_idev);
27291edce99fSDavid Ahern 	}
27301edce99fSDavid Ahern 
27311edce99fSDavid Ahern 	if (!(grt->rt6i_flags & RTF_GATEWAY))
27321edce99fSDavid Ahern 		err = 0;
27331edce99fSDavid Ahern 
27341edce99fSDavid Ahern 	ip6_rt_put(grt);
27351edce99fSDavid Ahern 
27361edce99fSDavid Ahern out:
27371edce99fSDavid Ahern 	return err;
27381edce99fSDavid Ahern }
27391edce99fSDavid Ahern 
27409fbb704cSDavid Ahern static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
27419fbb704cSDavid Ahern 			   struct net_device **_dev, struct inet6_dev **idev,
27429fbb704cSDavid Ahern 			   struct netlink_ext_ack *extack)
27439fbb704cSDavid Ahern {
27449fbb704cSDavid Ahern 	const struct in6_addr *gw_addr = &cfg->fc_gateway;
27459fbb704cSDavid Ahern 	int gwa_type = ipv6_addr_type(gw_addr);
2746232378e8SDavid Ahern 	bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
27479fbb704cSDavid Ahern 	const struct net_device *dev = *_dev;
2748232378e8SDavid Ahern 	bool need_addr_check = !dev;
27499fbb704cSDavid Ahern 	int err = -EINVAL;
27509fbb704cSDavid Ahern 
27519fbb704cSDavid Ahern 	/* if gw_addr is local we will fail to detect this in case
27529fbb704cSDavid Ahern 	 * address is still TENTATIVE (DAD in progress). rt6_lookup()
27539fbb704cSDavid Ahern 	 * will return already-added prefix route via interface that
27549fbb704cSDavid Ahern 	 * prefix route was assigned to, which might be non-loopback.
27559fbb704cSDavid Ahern 	 */
2756232378e8SDavid Ahern 	if (dev &&
2757232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2758232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
27599fbb704cSDavid Ahern 		goto out;
27609fbb704cSDavid Ahern 	}
27619fbb704cSDavid Ahern 
27629fbb704cSDavid Ahern 	if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
27639fbb704cSDavid Ahern 		/* IPv6 strictly inhibits using not link-local
27649fbb704cSDavid Ahern 		 * addresses as nexthop address.
27659fbb704cSDavid Ahern 		 * Otherwise, router will not able to send redirects.
27669fbb704cSDavid Ahern 		 * It is very good, but in some (rare!) circumstances
27679fbb704cSDavid Ahern 		 * (SIT, PtP, NBMA NOARP links) it is handy to allow
27689fbb704cSDavid Ahern 		 * some exceptions. --ANK
27699fbb704cSDavid Ahern 		 * We allow IPv4-mapped nexthops to support RFC4798-type
27709fbb704cSDavid Ahern 		 * addressing
27719fbb704cSDavid Ahern 		 */
27729fbb704cSDavid Ahern 		if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
27739fbb704cSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid gateway address");
27749fbb704cSDavid Ahern 			goto out;
27759fbb704cSDavid Ahern 		}
27769fbb704cSDavid Ahern 
27779fbb704cSDavid Ahern 		if (cfg->fc_flags & RTNH_F_ONLINK)
27789fbb704cSDavid Ahern 			err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
27799fbb704cSDavid Ahern 		else
27809fbb704cSDavid Ahern 			err = ip6_route_check_nh(net, cfg, _dev, idev);
27819fbb704cSDavid Ahern 
27829fbb704cSDavid Ahern 		if (err)
27839fbb704cSDavid Ahern 			goto out;
27849fbb704cSDavid Ahern 	}
27859fbb704cSDavid Ahern 
27869fbb704cSDavid Ahern 	/* reload in case device was changed */
27879fbb704cSDavid Ahern 	dev = *_dev;
27889fbb704cSDavid Ahern 
27899fbb704cSDavid Ahern 	err = -EINVAL;
27909fbb704cSDavid Ahern 	if (!dev) {
27919fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack, "Egress device not specified");
27929fbb704cSDavid Ahern 		goto out;
27939fbb704cSDavid Ahern 	} else if (dev->flags & IFF_LOOPBACK) {
27949fbb704cSDavid Ahern 		NL_SET_ERR_MSG(extack,
27959fbb704cSDavid Ahern 			       "Egress device can not be loopback device for this route");
27969fbb704cSDavid Ahern 		goto out;
27979fbb704cSDavid Ahern 	}
2798232378e8SDavid Ahern 
2799232378e8SDavid Ahern 	/* if we did not check gw_addr above, do so now that the
2800232378e8SDavid Ahern 	 * egress device has been resolved.
2801232378e8SDavid Ahern 	 */
2802232378e8SDavid Ahern 	if (need_addr_check &&
2803232378e8SDavid Ahern 	    ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2804232378e8SDavid Ahern 		NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2805232378e8SDavid Ahern 		goto out;
2806232378e8SDavid Ahern 	}
2807232378e8SDavid Ahern 
28089fbb704cSDavid Ahern 	err = 0;
28099fbb704cSDavid Ahern out:
28109fbb704cSDavid Ahern 	return err;
28119fbb704cSDavid Ahern }
28129fbb704cSDavid Ahern 
2813333c4301SDavid Ahern static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2814333c4301SDavid Ahern 					      struct netlink_ext_ack *extack)
28151da177e4SLinus Torvalds {
28165578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
28171da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
28181da177e4SLinus Torvalds 	struct net_device *dev = NULL;
28191da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
2820c71099acSThomas Graf 	struct fib6_table *table;
28211da177e4SLinus Torvalds 	int addr_type;
28228c5b83f0SRoopa Prabhu 	int err = -EINVAL;
28231da177e4SLinus Torvalds 
2824557c44beSDavid Ahern 	/* RTF_PCPU is an internal flag; can not be set by userspace */
2825d5d531cbSDavid Ahern 	if (cfg->fc_flags & RTF_PCPU) {
2826d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
2827557c44beSDavid Ahern 		goto out;
2828d5d531cbSDavid Ahern 	}
2829557c44beSDavid Ahern 
28302ea2352eSWei Wang 	/* RTF_CACHE is an internal flag; can not be set by userspace */
28312ea2352eSWei Wang 	if (cfg->fc_flags & RTF_CACHE) {
28322ea2352eSWei Wang 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
28332ea2352eSWei Wang 		goto out;
28342ea2352eSWei Wang 	}
28352ea2352eSWei Wang 
2836e8478e80SDavid Ahern 	if (cfg->fc_type > RTN_MAX) {
2837e8478e80SDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid route type");
2838e8478e80SDavid Ahern 		goto out;
2839e8478e80SDavid Ahern 	}
2840e8478e80SDavid Ahern 
2841d5d531cbSDavid Ahern 	if (cfg->fc_dst_len > 128) {
2842d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid prefix length");
28438c5b83f0SRoopa Prabhu 		goto out;
2844d5d531cbSDavid Ahern 	}
2845d5d531cbSDavid Ahern 	if (cfg->fc_src_len > 128) {
2846d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Invalid source address length");
2847d5d531cbSDavid Ahern 		goto out;
2848d5d531cbSDavid Ahern 	}
28491da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
2850d5d531cbSDavid Ahern 	if (cfg->fc_src_len) {
2851d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack,
2852d5d531cbSDavid Ahern 			       "Specifying source address requires IPV6_SUBTREES to be enabled");
28538c5b83f0SRoopa Prabhu 		goto out;
2854d5d531cbSDavid Ahern 	}
28551da177e4SLinus Torvalds #endif
285686872cb5SThomas Graf 	if (cfg->fc_ifindex) {
28571da177e4SLinus Torvalds 		err = -ENODEV;
28585578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
28591da177e4SLinus Torvalds 		if (!dev)
28601da177e4SLinus Torvalds 			goto out;
28611da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
28621da177e4SLinus Torvalds 		if (!idev)
28631da177e4SLinus Torvalds 			goto out;
28641da177e4SLinus Torvalds 	}
28651da177e4SLinus Torvalds 
286686872cb5SThomas Graf 	if (cfg->fc_metric == 0)
286786872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
28681da177e4SLinus Torvalds 
2869fc1e64e1SDavid Ahern 	if (cfg->fc_flags & RTNH_F_ONLINK) {
2870fc1e64e1SDavid Ahern 		if (!dev) {
2871fc1e64e1SDavid Ahern 			NL_SET_ERR_MSG(extack,
2872fc1e64e1SDavid Ahern 				       "Nexthop device required for onlink");
2873fc1e64e1SDavid Ahern 			err = -ENODEV;
2874fc1e64e1SDavid Ahern 			goto out;
2875fc1e64e1SDavid Ahern 		}
2876fc1e64e1SDavid Ahern 
2877fc1e64e1SDavid Ahern 		if (!(dev->flags & IFF_UP)) {
2878fc1e64e1SDavid Ahern 			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2879fc1e64e1SDavid Ahern 			err = -ENETDOWN;
2880fc1e64e1SDavid Ahern 			goto out;
2881fc1e64e1SDavid Ahern 		}
2882fc1e64e1SDavid Ahern 	}
2883fc1e64e1SDavid Ahern 
2884c71099acSThomas Graf 	err = -ENOBUFS;
288538308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
2886d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
2887d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
288838308473SDavid S. Miller 		if (!table) {
2889f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
2890d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
2891d71314b4SMatti Vaittinen 		}
2892d71314b4SMatti Vaittinen 	} else {
2893d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
2894d71314b4SMatti Vaittinen 	}
289538308473SDavid S. Miller 
289638308473SDavid S. Miller 	if (!table)
2897c71099acSThomas Graf 		goto out;
2898c71099acSThomas Graf 
2899ad706862SMartin KaFai Lau 	rt = ip6_dst_alloc(net, NULL,
2900ad706862SMartin KaFai Lau 			   (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
29011da177e4SLinus Torvalds 
290238308473SDavid S. Miller 	if (!rt) {
29031da177e4SLinus Torvalds 		err = -ENOMEM;
29041da177e4SLinus Torvalds 		goto out;
29051da177e4SLinus Torvalds 	}
29061da177e4SLinus Torvalds 
2907*d4ead6b3SDavid Ahern 	err = ip6_convert_metrics(net, rt, cfg);
2908*d4ead6b3SDavid Ahern 	if (err < 0)
2909*d4ead6b3SDavid Ahern 		goto out;
2910*d4ead6b3SDavid Ahern 
29111716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
29121716a961SGao feng 		rt6_set_expires(rt, jiffies +
29131716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
29141716a961SGao feng 	else
29151716a961SGao feng 		rt6_clean_expires(rt);
29161da177e4SLinus Torvalds 
291786872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
291886872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
291986872cb5SThomas Graf 	rt->rt6i_protocol = cfg->fc_protocol;
292086872cb5SThomas Graf 
292186872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
29221da177e4SLinus Torvalds 
292319e42e45SRoopa Prabhu 	if (cfg->fc_encap) {
292419e42e45SRoopa Prabhu 		struct lwtunnel_state *lwtstate;
292519e42e45SRoopa Prabhu 
292630357d7dSDavid Ahern 		err = lwtunnel_build_state(cfg->fc_encap_type,
2927127eb7cdSTom Herbert 					   cfg->fc_encap, AF_INET6, cfg,
29289ae28727SDavid Ahern 					   &lwtstate, extack);
292919e42e45SRoopa Prabhu 		if (err)
293019e42e45SRoopa Prabhu 			goto out;
29315e670d84SDavid Ahern 		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
293225368623STom Herbert 	}
293319e42e45SRoopa Prabhu 
293486872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
293586872cb5SThomas Graf 	rt->rt6i_dst.plen = cfg->fc_dst_len;
2936afc4eef8SMartin KaFai Lau 	if (rt->rt6i_dst.plen == 128)
293711d53b49SDavid S. Miller 		rt->dst.flags |= DST_HOST;
29381da177e4SLinus Torvalds 
29391da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
294086872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
294186872cb5SThomas Graf 	rt->rt6i_src.plen = cfg->fc_src_len;
29421da177e4SLinus Torvalds #endif
29431da177e4SLinus Torvalds 
294486872cb5SThomas Graf 	rt->rt6i_metric = cfg->fc_metric;
29455e670d84SDavid Ahern 	rt->fib6_nh.nh_weight = 1;
29461da177e4SLinus Torvalds 
2947e8478e80SDavid Ahern 	rt->fib6_type = cfg->fc_type;
2948e8478e80SDavid Ahern 
29491da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
29501da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
29511da177e4SLinus Torvalds 	 */
295286872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
295338308473SDavid S. Miller 	    (dev && (dev->flags & IFF_LOOPBACK) &&
295438308473SDavid S. Miller 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
295538308473SDavid S. Miller 	     !(cfg->fc_flags & RTF_LOCAL))) {
29561da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
29575578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
29581da177e4SLinus Torvalds 			if (dev) {
29591da177e4SLinus Torvalds 				dev_put(dev);
29601da177e4SLinus Torvalds 				in6_dev_put(idev);
29611da177e4SLinus Torvalds 			}
29625578689aSDaniel Lezcano 			dev = net->loopback_dev;
29631da177e4SLinus Torvalds 			dev_hold(dev);
29641da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
29651da177e4SLinus Torvalds 			if (!idev) {
29661da177e4SLinus Torvalds 				err = -ENODEV;
29671da177e4SLinus Torvalds 				goto out;
29681da177e4SLinus Torvalds 			}
29691da177e4SLinus Torvalds 		}
29701da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
29711da177e4SLinus Torvalds 		goto install_route;
29721da177e4SLinus Torvalds 	}
29731da177e4SLinus Torvalds 
297486872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
29759fbb704cSDavid Ahern 		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
29761da177e4SLinus Torvalds 		if (err)
29771da177e4SLinus Torvalds 			goto out;
29789fbb704cSDavid Ahern 
29795e670d84SDavid Ahern 		rt->fib6_nh.nh_gw = rt->rt6i_gateway = cfg->fc_gateway;
29801da177e4SLinus Torvalds 	}
29811da177e4SLinus Torvalds 
29821da177e4SLinus Torvalds 	err = -ENODEV;
298338308473SDavid S. Miller 	if (!dev)
29841da177e4SLinus Torvalds 		goto out;
29851da177e4SLinus Torvalds 
2986428604fbSLorenzo Bianconi 	if (idev->cnf.disable_ipv6) {
2987428604fbSLorenzo Bianconi 		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2988428604fbSLorenzo Bianconi 		err = -EACCES;
2989428604fbSLorenzo Bianconi 		goto out;
2990428604fbSLorenzo Bianconi 	}
2991428604fbSLorenzo Bianconi 
2992955ec4cbSDavid Ahern 	if (!(dev->flags & IFF_UP)) {
2993955ec4cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2994955ec4cbSDavid Ahern 		err = -ENETDOWN;
2995955ec4cbSDavid Ahern 		goto out;
2996955ec4cbSDavid Ahern 	}
2997955ec4cbSDavid Ahern 
2998c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2999c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
3000d5d531cbSDavid Ahern 			NL_SET_ERR_MSG(extack, "Invalid source address");
3001c3968a85SDaniel Walter 			err = -EINVAL;
3002c3968a85SDaniel Walter 			goto out;
3003c3968a85SDaniel Walter 		}
30044e3fd7a0SAlexey Dobriyan 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
3005c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 128;
3006c3968a85SDaniel Walter 	} else
3007c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 0;
3008c3968a85SDaniel Walter 
300986872cb5SThomas Graf 	rt->rt6i_flags = cfg->fc_flags;
30101da177e4SLinus Torvalds 
30111da177e4SLinus Torvalds install_route:
30125609b80aSIdo Schimmel 	if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
30135609b80aSIdo Schimmel 	    !netif_carrier_ok(dev))
30145e670d84SDavid Ahern 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
30155e670d84SDavid Ahern 	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
30165e670d84SDavid Ahern 	rt->fib6_nh.nh_dev = rt->dst.dev = dev;
30171da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
3018c71099acSThomas Graf 	rt->rt6i_table = table;
301963152fc0SDaniel Lezcano 
3020c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
302163152fc0SDaniel Lezcano 
30228c5b83f0SRoopa Prabhu 	return rt;
30231da177e4SLinus Torvalds out:
30241da177e4SLinus Torvalds 	if (dev)
30251da177e4SLinus Torvalds 		dev_put(dev);
30261da177e4SLinus Torvalds 	if (idev)
30271da177e4SLinus Torvalds 		in6_dev_put(idev);
3028587fea74SWei Wang 	if (rt)
3029587fea74SWei Wang 		dst_release_immediate(&rt->dst);
30306b9ea5a6SRoopa Prabhu 
30318c5b83f0SRoopa Prabhu 	return ERR_PTR(err);
30326b9ea5a6SRoopa Prabhu }
30336b9ea5a6SRoopa Prabhu 
3034*d4ead6b3SDavid Ahern int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack)
30356b9ea5a6SRoopa Prabhu {
30368c5b83f0SRoopa Prabhu 	struct rt6_info *rt;
30376b9ea5a6SRoopa Prabhu 	int err;
30386b9ea5a6SRoopa Prabhu 
3039333c4301SDavid Ahern 	rt = ip6_route_info_create(cfg, extack);
3040*d4ead6b3SDavid Ahern 	if (IS_ERR(rt))
3041*d4ead6b3SDavid Ahern 		return PTR_ERR(rt);
30426b9ea5a6SRoopa Prabhu 
3043*d4ead6b3SDavid Ahern 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
30446b9ea5a6SRoopa Prabhu 
30451da177e4SLinus Torvalds 	return err;
30461da177e4SLinus Torvalds }
30471da177e4SLinus Torvalds 
304886872cb5SThomas Graf static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
30491da177e4SLinus Torvalds {
3050afb1d4b5SDavid Ahern 	struct net *net = info->nl_net;
3051c71099acSThomas Graf 	struct fib6_table *table;
3052afb1d4b5SDavid Ahern 	int err;
30531da177e4SLinus Torvalds 
3054a4c2fd7fSWei Wang 	if (rt == net->ipv6.ip6_null_entry) {
30556825a26cSGao feng 		err = -ENOENT;
30566825a26cSGao feng 		goto out;
30576825a26cSGao feng 	}
30586c813a72SPatrick McHardy 
3059c71099acSThomas Graf 	table = rt->rt6i_table;
306066f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
306186872cb5SThomas Graf 	err = fib6_del(rt, info);
306266f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
30631da177e4SLinus Torvalds 
30646825a26cSGao feng out:
306594e187c0SAmerigo Wang 	ip6_rt_put(rt);
30661da177e4SLinus Torvalds 	return err;
30671da177e4SLinus Torvalds }
30681da177e4SLinus Torvalds 
3069afb1d4b5SDavid Ahern int ip6_del_rt(struct net *net, struct rt6_info *rt)
3070e0a1ad73SThomas Graf {
3071afb1d4b5SDavid Ahern 	struct nl_info info = { .nl_net = net };
3072afb1d4b5SDavid Ahern 
3073528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
3074e0a1ad73SThomas Graf }
3075e0a1ad73SThomas Graf 
30760ae81335SDavid Ahern static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
30770ae81335SDavid Ahern {
30780ae81335SDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
3079e3330039SWANG Cong 	struct net *net = info->nl_net;
308016a16cd3SDavid Ahern 	struct sk_buff *skb = NULL;
30810ae81335SDavid Ahern 	struct fib6_table *table;
3082e3330039SWANG Cong 	int err = -ENOENT;
30830ae81335SDavid Ahern 
3084e3330039SWANG Cong 	if (rt == net->ipv6.ip6_null_entry)
3085e3330039SWANG Cong 		goto out_put;
30860ae81335SDavid Ahern 	table = rt->rt6i_table;
308766f5d6ceSWei Wang 	spin_lock_bh(&table->tb6_lock);
30880ae81335SDavid Ahern 
30890ae81335SDavid Ahern 	if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
30900ae81335SDavid Ahern 		struct rt6_info *sibling, *next_sibling;
30910ae81335SDavid Ahern 
309216a16cd3SDavid Ahern 		/* prefer to send a single notification with all hops */
309316a16cd3SDavid Ahern 		skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
309416a16cd3SDavid Ahern 		if (skb) {
309516a16cd3SDavid Ahern 			u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
309616a16cd3SDavid Ahern 
3097*d4ead6b3SDavid Ahern 			if (rt6_fill_node(net, skb, rt, NULL,
309816a16cd3SDavid Ahern 					  NULL, NULL, 0, RTM_DELROUTE,
309916a16cd3SDavid Ahern 					  info->portid, seq, 0) < 0) {
310016a16cd3SDavid Ahern 				kfree_skb(skb);
310116a16cd3SDavid Ahern 				skb = NULL;
310216a16cd3SDavid Ahern 			} else
310316a16cd3SDavid Ahern 				info->skip_notify = 1;
310416a16cd3SDavid Ahern 		}
310516a16cd3SDavid Ahern 
31060ae81335SDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
31070ae81335SDavid Ahern 					 &rt->rt6i_siblings,
31080ae81335SDavid Ahern 					 rt6i_siblings) {
31090ae81335SDavid Ahern 			err = fib6_del(sibling, info);
31100ae81335SDavid Ahern 			if (err)
3111e3330039SWANG Cong 				goto out_unlock;
31120ae81335SDavid Ahern 		}
31130ae81335SDavid Ahern 	}
31140ae81335SDavid Ahern 
31150ae81335SDavid Ahern 	err = fib6_del(rt, info);
3116e3330039SWANG Cong out_unlock:
311766f5d6ceSWei Wang 	spin_unlock_bh(&table->tb6_lock);
3118e3330039SWANG Cong out_put:
31190ae81335SDavid Ahern 	ip6_rt_put(rt);
312016a16cd3SDavid Ahern 
312116a16cd3SDavid Ahern 	if (skb) {
3122e3330039SWANG Cong 		rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
312316a16cd3SDavid Ahern 			    info->nlh, gfp_any());
312416a16cd3SDavid Ahern 	}
31250ae81335SDavid Ahern 	return err;
31260ae81335SDavid Ahern }
31270ae81335SDavid Ahern 
3128333c4301SDavid Ahern static int ip6_route_del(struct fib6_config *cfg,
3129333c4301SDavid Ahern 			 struct netlink_ext_ack *extack)
31301da177e4SLinus Torvalds {
31312b760fcfSWei Wang 	struct rt6_info *rt, *rt_cache;
3132c71099acSThomas Graf 	struct fib6_table *table;
31331da177e4SLinus Torvalds 	struct fib6_node *fn;
31341da177e4SLinus Torvalds 	int err = -ESRCH;
31351da177e4SLinus Torvalds 
31365578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
3137d5d531cbSDavid Ahern 	if (!table) {
3138d5d531cbSDavid Ahern 		NL_SET_ERR_MSG(extack, "FIB table does not exist");
3139c71099acSThomas Graf 		return err;
3140d5d531cbSDavid Ahern 	}
31411da177e4SLinus Torvalds 
314266f5d6ceSWei Wang 	rcu_read_lock();
3143c71099acSThomas Graf 
3144c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
314586872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
314638fbeeeeSWei Wang 			 &cfg->fc_src, cfg->fc_src_len,
31472b760fcfSWei Wang 			 !(cfg->fc_flags & RTF_CACHE));
31481da177e4SLinus Torvalds 
31491da177e4SLinus Torvalds 	if (fn) {
315066f5d6ceSWei Wang 		for_each_fib6_node_rt_rcu(fn) {
31512b760fcfSWei Wang 			if (cfg->fc_flags & RTF_CACHE) {
31522b760fcfSWei Wang 				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
31532b760fcfSWei Wang 							      &cfg->fc_src);
31542b760fcfSWei Wang 				if (!rt_cache)
31551f56a01fSMartin KaFai Lau 					continue;
31562b760fcfSWei Wang 				rt = rt_cache;
31572b760fcfSWei Wang 			}
315886872cb5SThomas Graf 			if (cfg->fc_ifindex &&
31595e670d84SDavid Ahern 			    (!rt->fib6_nh.nh_dev ||
31605e670d84SDavid Ahern 			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
31611da177e4SLinus Torvalds 				continue;
316286872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
31635e670d84SDavid Ahern 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
31641da177e4SLinus Torvalds 				continue;
316586872cb5SThomas Graf 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
31661da177e4SLinus Torvalds 				continue;
3167c2ed1880SMantas M 			if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3168c2ed1880SMantas M 				continue;
3169d3843fe5SWei Wang 			if (!dst_hold_safe(&rt->dst))
3170d3843fe5SWei Wang 				break;
317166f5d6ceSWei Wang 			rcu_read_unlock();
31721da177e4SLinus Torvalds 
31730ae81335SDavid Ahern 			/* if gateway was specified only delete the one hop */
31740ae81335SDavid Ahern 			if (cfg->fc_flags & RTF_GATEWAY)
317586872cb5SThomas Graf 				return __ip6_del_rt(rt, &cfg->fc_nlinfo);
31760ae81335SDavid Ahern 
31770ae81335SDavid Ahern 			return __ip6_del_rt_siblings(rt, cfg);
31781da177e4SLinus Torvalds 		}
31791da177e4SLinus Torvalds 	}
318066f5d6ceSWei Wang 	rcu_read_unlock();
31811da177e4SLinus Torvalds 
31821da177e4SLinus Torvalds 	return err;
31831da177e4SLinus Torvalds }
31841da177e4SLinus Torvalds 
31856700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
3186a6279458SYOSHIFUJI Hideaki {
3187a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
3188e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
3189e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
3190e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
3191e8599ff4SDavid S. Miller 	struct neighbour *neigh;
319271bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
31936e157b6aSDavid S. Miller 	int optlen, on_link;
31946e157b6aSDavid S. Miller 	u8 *lladdr;
3195e8599ff4SDavid S. Miller 
319629a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
319771bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
3198e8599ff4SDavid S. Miller 
3199e8599ff4SDavid S. Miller 	if (optlen < 0) {
32006e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
3201e8599ff4SDavid S. Miller 		return;
3202e8599ff4SDavid S. Miller 	}
3203e8599ff4SDavid S. Miller 
320471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
3205e8599ff4SDavid S. Miller 
320671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
32076e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
3208e8599ff4SDavid S. Miller 		return;
3209e8599ff4SDavid S. Miller 	}
3210e8599ff4SDavid S. Miller 
32116e157b6aSDavid S. Miller 	on_link = 0;
321271bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
3213e8599ff4SDavid S. Miller 		on_link = 1;
321471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
3215e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
32166e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
3217e8599ff4SDavid S. Miller 		return;
3218e8599ff4SDavid S. Miller 	}
3219e8599ff4SDavid S. Miller 
3220e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
3221e8599ff4SDavid S. Miller 	if (!in6_dev)
3222e8599ff4SDavid S. Miller 		return;
3223e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3224e8599ff4SDavid S. Miller 		return;
3225e8599ff4SDavid S. Miller 
3226e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
3227e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
3228e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
3229e8599ff4SDavid S. Miller 	 */
3230e8599ff4SDavid S. Miller 
3231f997c55cSAlexander Aring 	if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
3232e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3233e8599ff4SDavid S. Miller 		return;
3234e8599ff4SDavid S. Miller 	}
32356e157b6aSDavid S. Miller 
32366e157b6aSDavid S. Miller 	lladdr = NULL;
3237e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
3238e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3239e8599ff4SDavid S. Miller 					     skb->dev);
3240e8599ff4SDavid S. Miller 		if (!lladdr) {
3241e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3242e8599ff4SDavid S. Miller 			return;
3243e8599ff4SDavid S. Miller 		}
3244e8599ff4SDavid S. Miller 	}
3245e8599ff4SDavid S. Miller 
32466e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
3247ec13ad1dSMatthias Schiffer 	if (rt->rt6i_flags & RTF_REJECT) {
32486e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
32496e157b6aSDavid S. Miller 		return;
32506e157b6aSDavid S. Miller 	}
32516e157b6aSDavid S. Miller 
32526e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
32536e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
32546e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
32556e157b6aSDavid S. Miller 	 */
32560dec879fSJulian Anastasov 	dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
32576e157b6aSDavid S. Miller 
325871bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
3259e8599ff4SDavid S. Miller 	if (!neigh)
3260e8599ff4SDavid S. Miller 		return;
3261e8599ff4SDavid S. Miller 
32621da177e4SLinus Torvalds 	/*
32631da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
32641da177e4SLinus Torvalds 	 */
32651da177e4SLinus Torvalds 
3266f997c55cSAlexander Aring 	ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
32671da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
32681da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
32691da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
3270f997c55cSAlexander Aring 				     NEIGH_UPDATE_F_ISROUTER)),
3271f997c55cSAlexander Aring 		     NDISC_REDIRECT, &ndopts);
32721da177e4SLinus Torvalds 
327383a09abdSMartin KaFai Lau 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
327438308473SDavid S. Miller 	if (!nrt)
32751da177e4SLinus Torvalds 		goto out;
32761da177e4SLinus Torvalds 
32771da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
32781da177e4SLinus Torvalds 	if (on_link)
32791da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
32801da177e4SLinus Torvalds 
3281b91d5329SXin Long 	nrt->rt6i_protocol = RTPROT_REDIRECT;
32824e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
32831da177e4SLinus Torvalds 
32842b760fcfSWei Wang 	/* No need to remove rt from the exception table if rt is
32852b760fcfSWei Wang 	 * a cached route because rt6_insert_exception() will
32862b760fcfSWei Wang 	 * takes care of it
32872b760fcfSWei Wang 	 */
3288*d4ead6b3SDavid Ahern 	if (rt6_insert_exception(nrt, rt->from)) {
32892b760fcfSWei Wang 		dst_release_immediate(&nrt->dst);
32902b760fcfSWei Wang 		goto out;
32912b760fcfSWei Wang 	}
32921da177e4SLinus Torvalds 
3293d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
3294d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
329571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
329660592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
32978d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
32988d71740cSTom Tucker 
32991da177e4SLinus Torvalds out:
3300e8599ff4SDavid S. Miller 	neigh_release(neigh);
33016e157b6aSDavid S. Miller }
33026e157b6aSDavid S. Miller 
330370ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
3304efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
3305b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3306830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3307830218c1SDavid Ahern 					   struct net_device *dev)
330870ceb4f5SYOSHIFUJI Hideaki {
3309830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3310830218c1SDavid Ahern 	int ifindex = dev->ifindex;
331170ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
331270ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt = NULL;
3313c71099acSThomas Graf 	struct fib6_table *table;
331470ceb4f5SYOSHIFUJI Hideaki 
3315830218c1SDavid Ahern 	table = fib6_get_table(net, tb_id);
331638308473SDavid S. Miller 	if (!table)
3317c71099acSThomas Graf 		return NULL;
3318c71099acSThomas Graf 
331966f5d6ceSWei Wang 	rcu_read_lock();
332038fbeeeeSWei Wang 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
332170ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
332270ceb4f5SYOSHIFUJI Hideaki 		goto out;
332370ceb4f5SYOSHIFUJI Hideaki 
332466f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(fn) {
33255e670d84SDavid Ahern 		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
332670ceb4f5SYOSHIFUJI Hideaki 			continue;
332770ceb4f5SYOSHIFUJI Hideaki 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
332870ceb4f5SYOSHIFUJI Hideaki 			continue;
33295e670d84SDavid Ahern 		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
333070ceb4f5SYOSHIFUJI Hideaki 			continue;
3331d3843fe5SWei Wang 		ip6_hold_safe(NULL, &rt, false);
333270ceb4f5SYOSHIFUJI Hideaki 		break;
333370ceb4f5SYOSHIFUJI Hideaki 	}
333470ceb4f5SYOSHIFUJI Hideaki out:
333566f5d6ceSWei Wang 	rcu_read_unlock();
333670ceb4f5SYOSHIFUJI Hideaki 	return rt;
333770ceb4f5SYOSHIFUJI Hideaki }
333870ceb4f5SYOSHIFUJI Hideaki 
3339efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
3340b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
3341830218c1SDavid Ahern 					   const struct in6_addr *gwaddr,
3342830218c1SDavid Ahern 					   struct net_device *dev,
334395c96174SEric Dumazet 					   unsigned int pref)
334470ceb4f5SYOSHIFUJI Hideaki {
334586872cb5SThomas Graf 	struct fib6_config cfg = {
3346238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
3347830218c1SDavid Ahern 		.fc_ifindex	= dev->ifindex,
334886872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
334986872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
335086872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
3351b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3352e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
335315e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
3354efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3355efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
335686872cb5SThomas Graf 	};
335770ceb4f5SYOSHIFUJI Hideaki 
3358830218c1SDavid Ahern 	cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
33594e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
33604e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
336186872cb5SThomas Graf 
3362e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
3363e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
336486872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
336570ceb4f5SYOSHIFUJI Hideaki 
3366333c4301SDavid Ahern 	ip6_route_add(&cfg, NULL);
336770ceb4f5SYOSHIFUJI Hideaki 
3368830218c1SDavid Ahern 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
336970ceb4f5SYOSHIFUJI Hideaki }
337070ceb4f5SYOSHIFUJI Hideaki #endif
337170ceb4f5SYOSHIFUJI Hideaki 
3372afb1d4b5SDavid Ahern struct rt6_info *rt6_get_dflt_router(struct net *net,
3373afb1d4b5SDavid Ahern 				     const struct in6_addr *addr,
3374afb1d4b5SDavid Ahern 				     struct net_device *dev)
33751da177e4SLinus Torvalds {
3376830218c1SDavid Ahern 	u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
33771da177e4SLinus Torvalds 	struct rt6_info *rt;
3378c71099acSThomas Graf 	struct fib6_table *table;
33791da177e4SLinus Torvalds 
3380afb1d4b5SDavid Ahern 	table = fib6_get_table(net, tb_id);
338138308473SDavid S. Miller 	if (!table)
3382c71099acSThomas Graf 		return NULL;
33831da177e4SLinus Torvalds 
338466f5d6ceSWei Wang 	rcu_read_lock();
338566f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
33865e670d84SDavid Ahern 		if (dev == rt->fib6_nh.nh_dev &&
3387045927ffSYOSHIFUJI Hideaki 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
33885e670d84SDavid Ahern 		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
33891da177e4SLinus Torvalds 			break;
33901da177e4SLinus Torvalds 	}
33911da177e4SLinus Torvalds 	if (rt)
3392d3843fe5SWei Wang 		ip6_hold_safe(NULL, &rt, false);
339366f5d6ceSWei Wang 	rcu_read_unlock();
33941da177e4SLinus Torvalds 	return rt;
33951da177e4SLinus Torvalds }
33961da177e4SLinus Torvalds 
3397afb1d4b5SDavid Ahern struct rt6_info *rt6_add_dflt_router(struct net *net,
3398afb1d4b5SDavid Ahern 				     const struct in6_addr *gwaddr,
3399ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
3400ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
34011da177e4SLinus Torvalds {
340286872cb5SThomas Graf 	struct fib6_config cfg = {
3403ca254490SDavid Ahern 		.fc_table	= l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
3404238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
340586872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
340686872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
340786872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
3408b91d5329SXin Long 		.fc_protocol = RTPROT_RA,
3409e8478e80SDavid Ahern 		.fc_type = RTN_UNICAST,
341015e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
34115578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
3412afb1d4b5SDavid Ahern 		.fc_nlinfo.nl_net = net,
341386872cb5SThomas Graf 	};
34141da177e4SLinus Torvalds 
34154e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
34161da177e4SLinus Torvalds 
3417333c4301SDavid Ahern 	if (!ip6_route_add(&cfg, NULL)) {
3418830218c1SDavid Ahern 		struct fib6_table *table;
3419830218c1SDavid Ahern 
3420830218c1SDavid Ahern 		table = fib6_get_table(dev_net(dev), cfg.fc_table);
3421830218c1SDavid Ahern 		if (table)
3422830218c1SDavid Ahern 			table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3423830218c1SDavid Ahern 	}
34241da177e4SLinus Torvalds 
3425afb1d4b5SDavid Ahern 	return rt6_get_dflt_router(net, gwaddr, dev);
34261da177e4SLinus Torvalds }
34271da177e4SLinus Torvalds 
3428afb1d4b5SDavid Ahern static void __rt6_purge_dflt_routers(struct net *net,
3429afb1d4b5SDavid Ahern 				     struct fib6_table *table)
34301da177e4SLinus Torvalds {
34311da177e4SLinus Torvalds 	struct rt6_info *rt;
34321da177e4SLinus Torvalds 
34331da177e4SLinus Torvalds restart:
343466f5d6ceSWei Wang 	rcu_read_lock();
343566f5d6ceSWei Wang 	for_each_fib6_node_rt_rcu(&table->tb6_root) {
34363e8b0ac3SLorenzo Colitti 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
34373e8b0ac3SLorenzo Colitti 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
3438d3843fe5SWei Wang 			if (dst_hold_safe(&rt->dst)) {
343966f5d6ceSWei Wang 				rcu_read_unlock();
3440afb1d4b5SDavid Ahern 				ip6_del_rt(net, rt);
3441d3843fe5SWei Wang 			} else {
344266f5d6ceSWei Wang 				rcu_read_unlock();
3443d3843fe5SWei Wang 			}
34441da177e4SLinus Torvalds 			goto restart;
34451da177e4SLinus Torvalds 		}
34461da177e4SLinus Torvalds 	}
344766f5d6ceSWei Wang 	rcu_read_unlock();
3448830218c1SDavid Ahern 
3449830218c1SDavid Ahern 	table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3450830218c1SDavid Ahern }
3451830218c1SDavid Ahern 
3452830218c1SDavid Ahern void rt6_purge_dflt_routers(struct net *net)
3453830218c1SDavid Ahern {
3454830218c1SDavid Ahern 	struct fib6_table *table;
3455830218c1SDavid Ahern 	struct hlist_head *head;
3456830218c1SDavid Ahern 	unsigned int h;
3457830218c1SDavid Ahern 
3458830218c1SDavid Ahern 	rcu_read_lock();
3459830218c1SDavid Ahern 
3460830218c1SDavid Ahern 	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3461830218c1SDavid Ahern 		head = &net->ipv6.fib_table_hash[h];
3462830218c1SDavid Ahern 		hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3463830218c1SDavid Ahern 			if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3464afb1d4b5SDavid Ahern 				__rt6_purge_dflt_routers(net, table);
3465830218c1SDavid Ahern 		}
3466830218c1SDavid Ahern 	}
3467830218c1SDavid Ahern 
3468830218c1SDavid Ahern 	rcu_read_unlock();
34691da177e4SLinus Torvalds }
34701da177e4SLinus Torvalds 
34715578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
34725578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
347386872cb5SThomas Graf 				 struct fib6_config *cfg)
347486872cb5SThomas Graf {
347586872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
347686872cb5SThomas Graf 
3477ca254490SDavid Ahern 	cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3478ca254490SDavid Ahern 			 : RT6_TABLE_MAIN;
347986872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
348086872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
348186872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
348286872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
348386872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
348486872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
3485e8478e80SDavid Ahern 	cfg->fc_type = rtmsg->rtmsg_type;
348686872cb5SThomas Graf 
34875578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
3488f1243c2dSBenjamin Thery 
34894e3fd7a0SAlexey Dobriyan 	cfg->fc_dst = rtmsg->rtmsg_dst;
34904e3fd7a0SAlexey Dobriyan 	cfg->fc_src = rtmsg->rtmsg_src;
34914e3fd7a0SAlexey Dobriyan 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
349286872cb5SThomas Graf }
349386872cb5SThomas Graf 
34945578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
34951da177e4SLinus Torvalds {
349686872cb5SThomas Graf 	struct fib6_config cfg;
34971da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
34981da177e4SLinus Torvalds 	int err;
34991da177e4SLinus Torvalds 
35001da177e4SLinus Torvalds 	switch (cmd) {
35011da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
35021da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
3503af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
35041da177e4SLinus Torvalds 			return -EPERM;
35051da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
35061da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
35071da177e4SLinus Torvalds 		if (err)
35081da177e4SLinus Torvalds 			return -EFAULT;
35091da177e4SLinus Torvalds 
35105578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
351186872cb5SThomas Graf 
35121da177e4SLinus Torvalds 		rtnl_lock();
35131da177e4SLinus Torvalds 		switch (cmd) {
35141da177e4SLinus Torvalds 		case SIOCADDRT:
3515333c4301SDavid Ahern 			err = ip6_route_add(&cfg, NULL);
35161da177e4SLinus Torvalds 			break;
35171da177e4SLinus Torvalds 		case SIOCDELRT:
3518333c4301SDavid Ahern 			err = ip6_route_del(&cfg, NULL);
35191da177e4SLinus Torvalds 			break;
35201da177e4SLinus Torvalds 		default:
35211da177e4SLinus Torvalds 			err = -EINVAL;
35221da177e4SLinus Torvalds 		}
35231da177e4SLinus Torvalds 		rtnl_unlock();
35241da177e4SLinus Torvalds 
35251da177e4SLinus Torvalds 		return err;
35263ff50b79SStephen Hemminger 	}
35271da177e4SLinus Torvalds 
35281da177e4SLinus Torvalds 	return -EINVAL;
35291da177e4SLinus Torvalds }
35301da177e4SLinus Torvalds 
35311da177e4SLinus Torvalds /*
35321da177e4SLinus Torvalds  *	Drop the packet on the floor
35331da177e4SLinus Torvalds  */
35341da177e4SLinus Torvalds 
3535d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
35361da177e4SLinus Torvalds {
3537612f09e8SYOSHIFUJI Hideaki 	int type;
3538adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
3539612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
3540612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
35410660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
354245bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
3543bdb7cc64SStephen Suryaputra 			IP6_INC_STATS(dev_net(dst->dev),
3544bdb7cc64SStephen Suryaputra 				      __in6_dev_get_safely(skb->dev),
35453bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
3546612f09e8SYOSHIFUJI Hideaki 			break;
3547612f09e8SYOSHIFUJI Hideaki 		}
3548612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
3549612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
35503bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
35513bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
3552612f09e8SYOSHIFUJI Hideaki 		break;
3553612f09e8SYOSHIFUJI Hideaki 	}
35543ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
35551da177e4SLinus Torvalds 	kfree_skb(skb);
35561da177e4SLinus Torvalds 	return 0;
35571da177e4SLinus Torvalds }
35581da177e4SLinus Torvalds 
35599ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
35609ce8ade0SThomas Graf {
3561612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
35629ce8ade0SThomas Graf }
35639ce8ade0SThomas Graf 
3564ede2059dSEric W. Biederman static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
35651da177e4SLinus Torvalds {
3566adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3567612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
35681da177e4SLinus Torvalds }
35691da177e4SLinus Torvalds 
35709ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
35719ce8ade0SThomas Graf {
3572612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
35739ce8ade0SThomas Graf }
35749ce8ade0SThomas Graf 
3575ede2059dSEric W. Biederman static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
35769ce8ade0SThomas Graf {
3577adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
3578612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
35799ce8ade0SThomas Graf }
35809ce8ade0SThomas Graf 
35811da177e4SLinus Torvalds /*
35821da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
35831da177e4SLinus Torvalds  */
35841da177e4SLinus Torvalds 
3585afb1d4b5SDavid Ahern struct rt6_info *addrconf_dst_alloc(struct net *net,
3586afb1d4b5SDavid Ahern 				    struct inet6_dev *idev,
35871da177e4SLinus Torvalds 				    const struct in6_addr *addr,
35888f031519SDavid S. Miller 				    bool anycast)
35891da177e4SLinus Torvalds {
3590ca254490SDavid Ahern 	u32 tb_id;
35914832c30dSDavid Ahern 	struct net_device *dev = idev->dev;
35925f02ce24SDavid Ahern 	struct rt6_info *rt;
35935f02ce24SDavid Ahern 
35945f02ce24SDavid Ahern 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
3595a3300ef4SHannes Frederic Sowa 	if (!rt)
35961da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
35971da177e4SLinus Torvalds 
35981da177e4SLinus Torvalds 	in6_dev_hold(idev);
35991da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
36001da177e4SLinus Torvalds 
36016edb3c96SDavid Ahern 	rt->dst.flags |= DST_HOST;
360294b5e0f9SDavid Ahern 	rt->rt6i_protocol = RTPROT_KERNEL;
36031da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
3604e8478e80SDavid Ahern 	if (anycast) {
3605e8478e80SDavid Ahern 		rt->fib6_type = RTN_ANYCAST;
360658c4fb86SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_ANYCAST;
3607e8478e80SDavid Ahern 	} else {
3608e8478e80SDavid Ahern 		rt->fib6_type = RTN_LOCAL;
36091da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
3610e8478e80SDavid Ahern 	}
36111da177e4SLinus Torvalds 
36125e670d84SDavid Ahern 	rt->fib6_nh.nh_gw = *addr;
36135e670d84SDavid Ahern 	rt->fib6_nh.nh_dev = dev;
3614550bab42SJulian Anastasov 	rt->rt6i_gateway  = *addr;
36154e3fd7a0SAlexey Dobriyan 	rt->rt6i_dst.addr = *addr;
36161da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
3617ca254490SDavid Ahern 	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3618ca254490SDavid Ahern 	rt->rt6i_table = fib6_get_table(net, tb_id);
36191da177e4SLinus Torvalds 
36201da177e4SLinus Torvalds 	return rt;
36211da177e4SLinus Torvalds }
36221da177e4SLinus Torvalds 
3623c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
3624c3968a85SDaniel Walter struct arg_dev_net_ip {
3625c3968a85SDaniel Walter 	struct net_device *dev;
3626c3968a85SDaniel Walter 	struct net *net;
3627c3968a85SDaniel Walter 	struct in6_addr *addr;
3628c3968a85SDaniel Walter };
3629c3968a85SDaniel Walter 
3630c3968a85SDaniel Walter static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3631c3968a85SDaniel Walter {
3632c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3633c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3634c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3635c3968a85SDaniel Walter 
36365e670d84SDavid Ahern 	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
3637c3968a85SDaniel Walter 	    rt != net->ipv6.ip6_null_entry &&
3638c3968a85SDaniel Walter 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
363960006a48SWei Wang 		spin_lock_bh(&rt6_exception_lock);
3640c3968a85SDaniel Walter 		/* remove prefsrc entry */
3641c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 0;
364260006a48SWei Wang 		/* need to update cache as well */
364360006a48SWei Wang 		rt6_exceptions_remove_prefsrc(rt);
364460006a48SWei Wang 		spin_unlock_bh(&rt6_exception_lock);
3645c3968a85SDaniel Walter 	}
3646c3968a85SDaniel Walter 	return 0;
3647c3968a85SDaniel Walter }
3648c3968a85SDaniel Walter 
3649c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3650c3968a85SDaniel Walter {
3651c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
3652c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
3653c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
3654c3968a85SDaniel Walter 		.net = net,
3655c3968a85SDaniel Walter 		.addr = &ifp->addr,
3656c3968a85SDaniel Walter 	};
36570c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
3658c3968a85SDaniel Walter }
3659c3968a85SDaniel Walter 
3660be7a010dSDuan Jiong #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
3661be7a010dSDuan Jiong 
3662be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
3663be7a010dSDuan Jiong static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3664be7a010dSDuan Jiong {
3665be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
3666be7a010dSDuan Jiong 
36672b760fcfSWei Wang 	if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
36685e670d84SDavid Ahern 	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
3669be7a010dSDuan Jiong 		return -1;
3670be7a010dSDuan Jiong 	}
3671b16cb459SWei Wang 
3672b16cb459SWei Wang 	/* Further clean up cached routes in exception table.
3673b16cb459SWei Wang 	 * This is needed because cached route may have a different
3674b16cb459SWei Wang 	 * gateway than its 'parent' in the case of an ip redirect.
3675b16cb459SWei Wang 	 */
3676b16cb459SWei Wang 	rt6_exceptions_clean_tohost(rt, gateway);
3677b16cb459SWei Wang 
3678be7a010dSDuan Jiong 	return 0;
3679be7a010dSDuan Jiong }
3680be7a010dSDuan Jiong 
3681be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3682be7a010dSDuan Jiong {
3683be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
3684be7a010dSDuan Jiong }
3685be7a010dSDuan Jiong 
36862127d95aSIdo Schimmel struct arg_netdev_event {
36872127d95aSIdo Schimmel 	const struct net_device *dev;
36884c981e28SIdo Schimmel 	union {
36892127d95aSIdo Schimmel 		unsigned int nh_flags;
36904c981e28SIdo Schimmel 		unsigned long event;
36914c981e28SIdo Schimmel 	};
36922127d95aSIdo Schimmel };
36932127d95aSIdo Schimmel 
3694d7dedee1SIdo Schimmel static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3695d7dedee1SIdo Schimmel {
3696d7dedee1SIdo Schimmel 	struct rt6_info *iter;
3697d7dedee1SIdo Schimmel 	struct fib6_node *fn;
3698d7dedee1SIdo Schimmel 
3699d7dedee1SIdo Schimmel 	fn = rcu_dereference_protected(rt->rt6i_node,
3700d7dedee1SIdo Schimmel 			lockdep_is_held(&rt->rt6i_table->tb6_lock));
3701d7dedee1SIdo Schimmel 	iter = rcu_dereference_protected(fn->leaf,
3702d7dedee1SIdo Schimmel 			lockdep_is_held(&rt->rt6i_table->tb6_lock));
3703d7dedee1SIdo Schimmel 	while (iter) {
3704d7dedee1SIdo Schimmel 		if (iter->rt6i_metric == rt->rt6i_metric &&
3705d7dedee1SIdo Schimmel 		    rt6_qualify_for_ecmp(iter))
3706d7dedee1SIdo Schimmel 			return iter;
3707d7dedee1SIdo Schimmel 		iter = rcu_dereference_protected(iter->rt6_next,
3708d7dedee1SIdo Schimmel 				lockdep_is_held(&rt->rt6i_table->tb6_lock));
3709d7dedee1SIdo Schimmel 	}
3710d7dedee1SIdo Schimmel 
3711d7dedee1SIdo Schimmel 	return NULL;
3712d7dedee1SIdo Schimmel }
3713d7dedee1SIdo Schimmel 
3714d7dedee1SIdo Schimmel static bool rt6_is_dead(const struct rt6_info *rt)
3715d7dedee1SIdo Schimmel {
37165e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
37175e670d84SDavid Ahern 	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
3718d7dedee1SIdo Schimmel 	     rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3719d7dedee1SIdo Schimmel 		return true;
3720d7dedee1SIdo Schimmel 
3721d7dedee1SIdo Schimmel 	return false;
3722d7dedee1SIdo Schimmel }
3723d7dedee1SIdo Schimmel 
3724d7dedee1SIdo Schimmel static int rt6_multipath_total_weight(const struct rt6_info *rt)
3725d7dedee1SIdo Schimmel {
3726d7dedee1SIdo Schimmel 	struct rt6_info *iter;
3727d7dedee1SIdo Schimmel 	int total = 0;
3728d7dedee1SIdo Schimmel 
3729d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt))
37305e670d84SDavid Ahern 		total += rt->fib6_nh.nh_weight;
3731d7dedee1SIdo Schimmel 
3732d7dedee1SIdo Schimmel 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3733d7dedee1SIdo Schimmel 		if (!rt6_is_dead(iter))
37345e670d84SDavid Ahern 			total += iter->fib6_nh.nh_weight;
3735d7dedee1SIdo Schimmel 	}
3736d7dedee1SIdo Schimmel 
3737d7dedee1SIdo Schimmel 	return total;
3738d7dedee1SIdo Schimmel }
3739d7dedee1SIdo Schimmel 
3740d7dedee1SIdo Schimmel static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3741d7dedee1SIdo Schimmel {
3742d7dedee1SIdo Schimmel 	int upper_bound = -1;
3743d7dedee1SIdo Schimmel 
3744d7dedee1SIdo Schimmel 	if (!rt6_is_dead(rt)) {
37455e670d84SDavid Ahern 		*weight += rt->fib6_nh.nh_weight;
3746d7dedee1SIdo Schimmel 		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3747d7dedee1SIdo Schimmel 						    total) - 1;
3748d7dedee1SIdo Schimmel 	}
37495e670d84SDavid Ahern 	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
3750d7dedee1SIdo Schimmel }
3751d7dedee1SIdo Schimmel 
3752d7dedee1SIdo Schimmel static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3753d7dedee1SIdo Schimmel {
3754d7dedee1SIdo Schimmel 	struct rt6_info *iter;
3755d7dedee1SIdo Schimmel 	int weight = 0;
3756d7dedee1SIdo Schimmel 
3757d7dedee1SIdo Schimmel 	rt6_upper_bound_set(rt, &weight, total);
3758d7dedee1SIdo Schimmel 
3759d7dedee1SIdo Schimmel 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3760d7dedee1SIdo Schimmel 		rt6_upper_bound_set(iter, &weight, total);
3761d7dedee1SIdo Schimmel }
3762d7dedee1SIdo Schimmel 
3763d7dedee1SIdo Schimmel void rt6_multipath_rebalance(struct rt6_info *rt)
3764d7dedee1SIdo Schimmel {
3765d7dedee1SIdo Schimmel 	struct rt6_info *first;
3766d7dedee1SIdo Schimmel 	int total;
3767d7dedee1SIdo Schimmel 
3768d7dedee1SIdo Schimmel 	/* In case the entire multipath route was marked for flushing,
3769d7dedee1SIdo Schimmel 	 * then there is no need to rebalance upon the removal of every
3770d7dedee1SIdo Schimmel 	 * sibling route.
3771d7dedee1SIdo Schimmel 	 */
3772d7dedee1SIdo Schimmel 	if (!rt->rt6i_nsiblings || rt->should_flush)
3773d7dedee1SIdo Schimmel 		return;
3774d7dedee1SIdo Schimmel 
3775d7dedee1SIdo Schimmel 	/* During lookup routes are evaluated in order, so we need to
3776d7dedee1SIdo Schimmel 	 * make sure upper bounds are assigned from the first sibling
3777d7dedee1SIdo Schimmel 	 * onwards.
3778d7dedee1SIdo Schimmel 	 */
3779d7dedee1SIdo Schimmel 	first = rt6_multipath_first_sibling(rt);
3780d7dedee1SIdo Schimmel 	if (WARN_ON_ONCE(!first))
3781d7dedee1SIdo Schimmel 		return;
3782d7dedee1SIdo Schimmel 
3783d7dedee1SIdo Schimmel 	total = rt6_multipath_total_weight(first);
3784d7dedee1SIdo Schimmel 	rt6_multipath_upper_bound_set(first, total);
3785d7dedee1SIdo Schimmel }
3786d7dedee1SIdo Schimmel 
37872127d95aSIdo Schimmel static int fib6_ifup(struct rt6_info *rt, void *p_arg)
37882127d95aSIdo Schimmel {
37892127d95aSIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
37907aef6859SDavid Ahern 	struct net *net = dev_net(arg->dev);
37912127d95aSIdo Schimmel 
37925e670d84SDavid Ahern 	if (rt != net->ipv6.ip6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
37935e670d84SDavid Ahern 		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
37947aef6859SDavid Ahern 		fib6_update_sernum_upto_root(net, rt);
3795d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
37961de178edSIdo Schimmel 	}
37972127d95aSIdo Schimmel 
37982127d95aSIdo Schimmel 	return 0;
37992127d95aSIdo Schimmel }
38002127d95aSIdo Schimmel 
38012127d95aSIdo Schimmel void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
38022127d95aSIdo Schimmel {
38032127d95aSIdo Schimmel 	struct arg_netdev_event arg = {
38042127d95aSIdo Schimmel 		.dev = dev,
38056802f3adSIdo Schimmel 		{
38062127d95aSIdo Schimmel 			.nh_flags = nh_flags,
38076802f3adSIdo Schimmel 		},
38082127d95aSIdo Schimmel 	};
38092127d95aSIdo Schimmel 
38102127d95aSIdo Schimmel 	if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
38112127d95aSIdo Schimmel 		arg.nh_flags |= RTNH_F_LINKDOWN;
38122127d95aSIdo Schimmel 
38132127d95aSIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
38142127d95aSIdo Schimmel }
38152127d95aSIdo Schimmel 
38161de178edSIdo Schimmel static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
38171de178edSIdo Schimmel 				   const struct net_device *dev)
38181de178edSIdo Schimmel {
38191de178edSIdo Schimmel 	struct rt6_info *iter;
38201de178edSIdo Schimmel 
38215e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == dev)
38221de178edSIdo Schimmel 		return true;
38231de178edSIdo Schimmel 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
38245e670d84SDavid Ahern 		if (iter->fib6_nh.nh_dev == dev)
38251de178edSIdo Schimmel 			return true;
38261de178edSIdo Schimmel 
38271de178edSIdo Schimmel 	return false;
38281de178edSIdo Schimmel }
38291de178edSIdo Schimmel 
38301de178edSIdo Schimmel static void rt6_multipath_flush(struct rt6_info *rt)
38311de178edSIdo Schimmel {
38321de178edSIdo Schimmel 	struct rt6_info *iter;
38331de178edSIdo Schimmel 
38341de178edSIdo Schimmel 	rt->should_flush = 1;
38351de178edSIdo Schimmel 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
38361de178edSIdo Schimmel 		iter->should_flush = 1;
38371de178edSIdo Schimmel }
38381de178edSIdo Schimmel 
38391de178edSIdo Schimmel static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
38401de178edSIdo Schimmel 					     const struct net_device *down_dev)
38411de178edSIdo Schimmel {
38421de178edSIdo Schimmel 	struct rt6_info *iter;
38431de178edSIdo Schimmel 	unsigned int dead = 0;
38441de178edSIdo Schimmel 
38455e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == down_dev ||
38465e670d84SDavid Ahern 	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
38471de178edSIdo Schimmel 		dead++;
38481de178edSIdo Schimmel 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
38495e670d84SDavid Ahern 		if (iter->fib6_nh.nh_dev == down_dev ||
38505e670d84SDavid Ahern 		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
38511de178edSIdo Schimmel 			dead++;
38521de178edSIdo Schimmel 
38531de178edSIdo Schimmel 	return dead;
38541de178edSIdo Schimmel }
38551de178edSIdo Schimmel 
38561de178edSIdo Schimmel static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
38571de178edSIdo Schimmel 				       const struct net_device *dev,
38581de178edSIdo Schimmel 				       unsigned int nh_flags)
38591de178edSIdo Schimmel {
38601de178edSIdo Schimmel 	struct rt6_info *iter;
38611de178edSIdo Schimmel 
38625e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == dev)
38635e670d84SDavid Ahern 		rt->fib6_nh.nh_flags |= nh_flags;
38641de178edSIdo Schimmel 	list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
38655e670d84SDavid Ahern 		if (iter->fib6_nh.nh_dev == dev)
38665e670d84SDavid Ahern 			iter->fib6_nh.nh_flags |= nh_flags;
38671de178edSIdo Schimmel }
38681de178edSIdo Schimmel 
3869a1a22c12SDavid Ahern /* called with write lock held for table with rt */
38704c981e28SIdo Schimmel static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
38711da177e4SLinus Torvalds {
38724c981e28SIdo Schimmel 	const struct arg_netdev_event *arg = p_arg;
38734c981e28SIdo Schimmel 	const struct net_device *dev = arg->dev;
38747aef6859SDavid Ahern 	struct net *net = dev_net(dev);
38758ed67789SDaniel Lezcano 
38761de178edSIdo Schimmel 	if (rt == net->ipv6.ip6_null_entry)
387727c6fa73SIdo Schimmel 		return 0;
387827c6fa73SIdo Schimmel 
387927c6fa73SIdo Schimmel 	switch (arg->event) {
388027c6fa73SIdo Schimmel 	case NETDEV_UNREGISTER:
38815e670d84SDavid Ahern 		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
388227c6fa73SIdo Schimmel 	case NETDEV_DOWN:
38831de178edSIdo Schimmel 		if (rt->should_flush)
388427c6fa73SIdo Schimmel 			return -1;
38851de178edSIdo Schimmel 		if (!rt->rt6i_nsiblings)
38865e670d84SDavid Ahern 			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
38871de178edSIdo Schimmel 		if (rt6_multipath_uses_dev(rt, dev)) {
38881de178edSIdo Schimmel 			unsigned int count;
38891de178edSIdo Schimmel 
38901de178edSIdo Schimmel 			count = rt6_multipath_dead_count(rt, dev);
38911de178edSIdo Schimmel 			if (rt->rt6i_nsiblings + 1 == count) {
38921de178edSIdo Schimmel 				rt6_multipath_flush(rt);
38931de178edSIdo Schimmel 				return -1;
38941de178edSIdo Schimmel 			}
38951de178edSIdo Schimmel 			rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
38961de178edSIdo Schimmel 						   RTNH_F_LINKDOWN);
38977aef6859SDavid Ahern 			fib6_update_sernum(net, rt);
3898d7dedee1SIdo Schimmel 			rt6_multipath_rebalance(rt);
38991de178edSIdo Schimmel 		}
39001de178edSIdo Schimmel 		return -2;
390127c6fa73SIdo Schimmel 	case NETDEV_CHANGE:
39025e670d84SDavid Ahern 		if (rt->fib6_nh.nh_dev != dev ||
39031de178edSIdo Schimmel 		    rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
390427c6fa73SIdo Schimmel 			break;
39055e670d84SDavid Ahern 		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3906d7dedee1SIdo Schimmel 		rt6_multipath_rebalance(rt);
390727c6fa73SIdo Schimmel 		break;
39082b241361SIdo Schimmel 	}
3909c159d30cSDavid S. Miller 
39101da177e4SLinus Torvalds 	return 0;
39111da177e4SLinus Torvalds }
39121da177e4SLinus Torvalds 
391327c6fa73SIdo Schimmel void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
39141da177e4SLinus Torvalds {
39154c981e28SIdo Schimmel 	struct arg_netdev_event arg = {
39168ed67789SDaniel Lezcano 		.dev = dev,
39176802f3adSIdo Schimmel 		{
39184c981e28SIdo Schimmel 			.event = event,
39196802f3adSIdo Schimmel 		},
39208ed67789SDaniel Lezcano 	};
39218ed67789SDaniel Lezcano 
39224c981e28SIdo Schimmel 	fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
39234c981e28SIdo Schimmel }
39244c981e28SIdo Schimmel 
39254c981e28SIdo Schimmel void rt6_disable_ip(struct net_device *dev, unsigned long event)
39264c981e28SIdo Schimmel {
39274c981e28SIdo Schimmel 	rt6_sync_down_dev(dev, event);
39284c981e28SIdo Schimmel 	rt6_uncached_list_flush_dev(dev_net(dev), dev);
39294c981e28SIdo Schimmel 	neigh_ifdown(&nd_tbl, dev);
39301da177e4SLinus Torvalds }
39311da177e4SLinus Torvalds 
393295c96174SEric Dumazet struct rt6_mtu_change_arg {
39331da177e4SLinus Torvalds 	struct net_device *dev;
393495c96174SEric Dumazet 	unsigned int mtu;
39351da177e4SLinus Torvalds };
39361da177e4SLinus Torvalds 
39371da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
39381da177e4SLinus Torvalds {
39391da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
39401da177e4SLinus Torvalds 	struct inet6_dev *idev;
39411da177e4SLinus Torvalds 
39421da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
39431da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
39441da177e4SLinus Torvalds 	   We still use this lock to block changes
39451da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
39461da177e4SLinus Torvalds 	*/
39471da177e4SLinus Torvalds 
39481da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
394938308473SDavid S. Miller 	if (!idev)
39501da177e4SLinus Torvalds 		return 0;
39511da177e4SLinus Torvalds 
39521da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
39531da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
39541da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
39551da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
39561da177e4SLinus Torvalds 	 */
39575e670d84SDavid Ahern 	if (rt->fib6_nh.nh_dev == arg->dev &&
3958*d4ead6b3SDavid Ahern 	    !fib6_metric_locked(rt, RTAX_MTU)) {
3959*d4ead6b3SDavid Ahern 		u32 mtu = rt->fib6_pmtu;
3960*d4ead6b3SDavid Ahern 
3961*d4ead6b3SDavid Ahern 		if (mtu >= arg->mtu ||
3962*d4ead6b3SDavid Ahern 		    (mtu < arg->mtu && mtu == idev->cnf.mtu6))
3963*d4ead6b3SDavid Ahern 			fib6_metric_set(rt, RTAX_MTU, arg->mtu);
3964*d4ead6b3SDavid Ahern 
3965f5bbe7eeSWei Wang 		spin_lock_bh(&rt6_exception_lock);
3966e9fa1495SStefano Brivio 		rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
3967f5bbe7eeSWei Wang 		spin_unlock_bh(&rt6_exception_lock);
39684b32b5adSMartin KaFai Lau 	}
39691da177e4SLinus Torvalds 	return 0;
39701da177e4SLinus Torvalds }
39711da177e4SLinus Torvalds 
397295c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
39731da177e4SLinus Torvalds {
3974c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
3975c71099acSThomas Graf 		.dev = dev,
3976c71099acSThomas Graf 		.mtu = mtu,
3977c71099acSThomas Graf 	};
39781da177e4SLinus Torvalds 
39790c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
39801da177e4SLinus Torvalds }
39811da177e4SLinus Torvalds 
3982ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
39835176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
398486872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
3985ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
398686872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
398786872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
398851ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
3989c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
399019e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
399119e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
399232bc201eSXin Long 	[RTA_EXPIRES]		= { .type = NLA_U32 },
3993622ec2c9SLorenzo Colitti 	[RTA_UID]		= { .type = NLA_U32 },
39943b45a410SLiping Zhang 	[RTA_MARK]		= { .type = NLA_U32 },
399586872cb5SThomas Graf };
399686872cb5SThomas Graf 
399786872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
3998333c4301SDavid Ahern 			      struct fib6_config *cfg,
3999333c4301SDavid Ahern 			      struct netlink_ext_ack *extack)
40001da177e4SLinus Torvalds {
400186872cb5SThomas Graf 	struct rtmsg *rtm;
400286872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
4003c78ba6d6SLubomir Rintel 	unsigned int pref;
400486872cb5SThomas Graf 	int err;
40051da177e4SLinus Torvalds 
4006fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4007fceb6435SJohannes Berg 			  NULL);
400886872cb5SThomas Graf 	if (err < 0)
400986872cb5SThomas Graf 		goto errout;
40101da177e4SLinus Torvalds 
401186872cb5SThomas Graf 	err = -EINVAL;
401286872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
401386872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
401486872cb5SThomas Graf 
401586872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
401686872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
401786872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
401886872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
401986872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
4020ef2c7d7bSNicolas Dichtel 	cfg->fc_type = rtm->rtm_type;
402186872cb5SThomas Graf 
4022ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
4023ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
4024b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
4025b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
402686872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
402786872cb5SThomas Graf 
4028ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
4029ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
4030ab79ad14SMaciej Żenczykowski 
40311f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
40321f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
40331f56a01fSMartin KaFai Lau 
4034fc1e64e1SDavid Ahern 	cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4035fc1e64e1SDavid Ahern 
403615e47304SEric W. Biederman 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
403786872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
40383b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
403986872cb5SThomas Graf 
404086872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
404167b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
404286872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
40431da177e4SLinus Torvalds 	}
404486872cb5SThomas Graf 
404586872cb5SThomas Graf 	if (tb[RTA_DST]) {
404686872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
404786872cb5SThomas Graf 
404886872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
404986872cb5SThomas Graf 			goto errout;
405086872cb5SThomas Graf 
405186872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
40521da177e4SLinus Torvalds 	}
405386872cb5SThomas Graf 
405486872cb5SThomas Graf 	if (tb[RTA_SRC]) {
405586872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
405686872cb5SThomas Graf 
405786872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
405886872cb5SThomas Graf 			goto errout;
405986872cb5SThomas Graf 
406086872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
40611da177e4SLinus Torvalds 	}
406286872cb5SThomas Graf 
4063c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
406467b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
4065c3968a85SDaniel Walter 
406686872cb5SThomas Graf 	if (tb[RTA_OIF])
406786872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
406886872cb5SThomas Graf 
406986872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
407086872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
407186872cb5SThomas Graf 
407286872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
407386872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
407486872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
40751da177e4SLinus Torvalds 	}
407686872cb5SThomas Graf 
407786872cb5SThomas Graf 	if (tb[RTA_TABLE])
407886872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
407986872cb5SThomas Graf 
408051ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
408151ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
408251ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
40839ed59592SDavid Ahern 
40849ed59592SDavid Ahern 		err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
4085c255bd68SDavid Ahern 						     cfg->fc_mp_len, extack);
40869ed59592SDavid Ahern 		if (err < 0)
40879ed59592SDavid Ahern 			goto errout;
408851ebd318SNicolas Dichtel 	}
408951ebd318SNicolas Dichtel 
4090c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
4091c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
4092c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
4093c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
4094c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
4095c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
4096c78ba6d6SLubomir Rintel 	}
4097c78ba6d6SLubomir Rintel 
409819e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
409919e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
410019e42e45SRoopa Prabhu 
41019ed59592SDavid Ahern 	if (tb[RTA_ENCAP_TYPE]) {
410219e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
410319e42e45SRoopa Prabhu 
4104c255bd68SDavid Ahern 		err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
41059ed59592SDavid Ahern 		if (err < 0)
41069ed59592SDavid Ahern 			goto errout;
41079ed59592SDavid Ahern 	}
41089ed59592SDavid Ahern 
410932bc201eSXin Long 	if (tb[RTA_EXPIRES]) {
411032bc201eSXin Long 		unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
411132bc201eSXin Long 
411232bc201eSXin Long 		if (addrconf_finite_timeout(timeout)) {
411332bc201eSXin Long 			cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
411432bc201eSXin Long 			cfg->fc_flags |= RTF_EXPIRES;
411532bc201eSXin Long 		}
411632bc201eSXin Long 	}
411732bc201eSXin Long 
411886872cb5SThomas Graf 	err = 0;
411986872cb5SThomas Graf errout:
412086872cb5SThomas Graf 	return err;
41211da177e4SLinus Torvalds }
41221da177e4SLinus Torvalds 
41236b9ea5a6SRoopa Prabhu struct rt6_nh {
41246b9ea5a6SRoopa Prabhu 	struct rt6_info *rt6_info;
41256b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
41266b9ea5a6SRoopa Prabhu 	struct list_head next;
41276b9ea5a6SRoopa Prabhu };
41286b9ea5a6SRoopa Prabhu 
41296b9ea5a6SRoopa Prabhu static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
41306b9ea5a6SRoopa Prabhu {
41316b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
41326b9ea5a6SRoopa Prabhu 
41336b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
41347d4d5065SDavid Ahern 		pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
41356b9ea5a6SRoopa Prabhu 		        &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
41366b9ea5a6SRoopa Prabhu 		        nh->r_cfg.fc_ifindex);
41376b9ea5a6SRoopa Prabhu 	}
41386b9ea5a6SRoopa Prabhu }
41396b9ea5a6SRoopa Prabhu 
4140*d4ead6b3SDavid Ahern static int ip6_route_info_append(struct net *net,
4141*d4ead6b3SDavid Ahern 				 struct list_head *rt6_nh_list,
41426b9ea5a6SRoopa Prabhu 				 struct rt6_info *rt, struct fib6_config *r_cfg)
41436b9ea5a6SRoopa Prabhu {
41446b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh;
41456b9ea5a6SRoopa Prabhu 	int err = -EEXIST;
41466b9ea5a6SRoopa Prabhu 
41476b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, rt6_nh_list, next) {
41486b9ea5a6SRoopa Prabhu 		/* check if rt6_info already exists */
4149f06b7549SDavid Ahern 		if (rt6_duplicate_nexthop(nh->rt6_info, rt))
41506b9ea5a6SRoopa Prabhu 			return err;
41516b9ea5a6SRoopa Prabhu 	}
41526b9ea5a6SRoopa Prabhu 
41536b9ea5a6SRoopa Prabhu 	nh = kzalloc(sizeof(*nh), GFP_KERNEL);
41546b9ea5a6SRoopa Prabhu 	if (!nh)
41556b9ea5a6SRoopa Prabhu 		return -ENOMEM;
41566b9ea5a6SRoopa Prabhu 	nh->rt6_info = rt;
4157*d4ead6b3SDavid Ahern 	err = ip6_convert_metrics(net, rt, r_cfg);
41586b9ea5a6SRoopa Prabhu 	if (err) {
41596b9ea5a6SRoopa Prabhu 		kfree(nh);
41606b9ea5a6SRoopa Prabhu 		return err;
41616b9ea5a6SRoopa Prabhu 	}
41626b9ea5a6SRoopa Prabhu 	memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
41636b9ea5a6SRoopa Prabhu 	list_add_tail(&nh->next, rt6_nh_list);
41646b9ea5a6SRoopa Prabhu 
41656b9ea5a6SRoopa Prabhu 	return 0;
41666b9ea5a6SRoopa Prabhu }
41676b9ea5a6SRoopa Prabhu 
41683b1137feSDavid Ahern static void ip6_route_mpath_notify(struct rt6_info *rt,
41693b1137feSDavid Ahern 				   struct rt6_info *rt_last,
41703b1137feSDavid Ahern 				   struct nl_info *info,
41713b1137feSDavid Ahern 				   __u16 nlflags)
41723b1137feSDavid Ahern {
41733b1137feSDavid Ahern 	/* if this is an APPEND route, then rt points to the first route
41743b1137feSDavid Ahern 	 * inserted and rt_last points to last route inserted. Userspace
41753b1137feSDavid Ahern 	 * wants a consistent dump of the route which starts at the first
41763b1137feSDavid Ahern 	 * nexthop. Since sibling routes are always added at the end of
41773b1137feSDavid Ahern 	 * the list, find the first sibling of the last route appended
41783b1137feSDavid Ahern 	 */
41793b1137feSDavid Ahern 	if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
41803b1137feSDavid Ahern 		rt = list_first_entry(&rt_last->rt6i_siblings,
41813b1137feSDavid Ahern 				      struct rt6_info,
41823b1137feSDavid Ahern 				      rt6i_siblings);
41833b1137feSDavid Ahern 	}
41843b1137feSDavid Ahern 
41853b1137feSDavid Ahern 	if (rt)
41863b1137feSDavid Ahern 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
41873b1137feSDavid Ahern }
41883b1137feSDavid Ahern 
4189333c4301SDavid Ahern static int ip6_route_multipath_add(struct fib6_config *cfg,
4190333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
419151ebd318SNicolas Dichtel {
41923b1137feSDavid Ahern 	struct rt6_info *rt_notif = NULL, *rt_last = NULL;
41933b1137feSDavid Ahern 	struct nl_info *info = &cfg->fc_nlinfo;
419451ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
419551ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
41966b9ea5a6SRoopa Prabhu 	struct rt6_info *rt;
41976b9ea5a6SRoopa Prabhu 	struct rt6_nh *err_nh;
41986b9ea5a6SRoopa Prabhu 	struct rt6_nh *nh, *nh_safe;
41993b1137feSDavid Ahern 	__u16 nlflags;
420051ebd318SNicolas Dichtel 	int remaining;
420151ebd318SNicolas Dichtel 	int attrlen;
42026b9ea5a6SRoopa Prabhu 	int err = 1;
42036b9ea5a6SRoopa Prabhu 	int nhn = 0;
42046b9ea5a6SRoopa Prabhu 	int replace = (cfg->fc_nlinfo.nlh &&
42056b9ea5a6SRoopa Prabhu 		       (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
42066b9ea5a6SRoopa Prabhu 	LIST_HEAD(rt6_nh_list);
420751ebd318SNicolas Dichtel 
42083b1137feSDavid Ahern 	nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
42093b1137feSDavid Ahern 	if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
42103b1137feSDavid Ahern 		nlflags |= NLM_F_APPEND;
42113b1137feSDavid Ahern 
421235f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
421351ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
421451ebd318SNicolas Dichtel 
42156b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry and build a list (rt6_nh_list) of
42166b9ea5a6SRoopa Prabhu 	 * rt6_info structs per nexthop
42176b9ea5a6SRoopa Prabhu 	 */
421851ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
421951ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
422051ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
422151ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
422251ebd318SNicolas Dichtel 
422351ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
422451ebd318SNicolas Dichtel 		if (attrlen > 0) {
422551ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
422651ebd318SNicolas Dichtel 
422751ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
422851ebd318SNicolas Dichtel 			if (nla) {
422967b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
423051ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
423151ebd318SNicolas Dichtel 			}
423219e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
423319e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
423419e42e45SRoopa Prabhu 			if (nla)
423519e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
423651ebd318SNicolas Dichtel 		}
42376b9ea5a6SRoopa Prabhu 
423868e2ffdeSDavid Ahern 		r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
4239333c4301SDavid Ahern 		rt = ip6_route_info_create(&r_cfg, extack);
42408c5b83f0SRoopa Prabhu 		if (IS_ERR(rt)) {
42418c5b83f0SRoopa Prabhu 			err = PTR_ERR(rt);
42428c5b83f0SRoopa Prabhu 			rt = NULL;
42436b9ea5a6SRoopa Prabhu 			goto cleanup;
42448c5b83f0SRoopa Prabhu 		}
42456b9ea5a6SRoopa Prabhu 
42465e670d84SDavid Ahern 		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
4247398958aeSIdo Schimmel 
4248*d4ead6b3SDavid Ahern 		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4249*d4ead6b3SDavid Ahern 					    rt, &r_cfg);
425051ebd318SNicolas Dichtel 		if (err) {
4251587fea74SWei Wang 			dst_release_immediate(&rt->dst);
42526b9ea5a6SRoopa Prabhu 			goto cleanup;
425351ebd318SNicolas Dichtel 		}
42546b9ea5a6SRoopa Prabhu 
42556b9ea5a6SRoopa Prabhu 		rtnh = rtnh_next(rtnh, &remaining);
425651ebd318SNicolas Dichtel 	}
42576b9ea5a6SRoopa Prabhu 
42583b1137feSDavid Ahern 	/* for add and replace send one notification with all nexthops.
42593b1137feSDavid Ahern 	 * Skip the notification in fib6_add_rt2node and send one with
42603b1137feSDavid Ahern 	 * the full route when done
42613b1137feSDavid Ahern 	 */
42623b1137feSDavid Ahern 	info->skip_notify = 1;
42633b1137feSDavid Ahern 
42646b9ea5a6SRoopa Prabhu 	err_nh = NULL;
42656b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
42663b1137feSDavid Ahern 		rt_last = nh->rt6_info;
4267*d4ead6b3SDavid Ahern 		err = __ip6_ins_rt(nh->rt6_info, info, extack);
42683b1137feSDavid Ahern 		/* save reference to first route for notification */
42693b1137feSDavid Ahern 		if (!rt_notif && !err)
42703b1137feSDavid Ahern 			rt_notif = nh->rt6_info;
42713b1137feSDavid Ahern 
42726b9ea5a6SRoopa Prabhu 		/* nh->rt6_info is used or freed at this point, reset to NULL*/
42736b9ea5a6SRoopa Prabhu 		nh->rt6_info = NULL;
42746b9ea5a6SRoopa Prabhu 		if (err) {
42756b9ea5a6SRoopa Prabhu 			if (replace && nhn)
42766b9ea5a6SRoopa Prabhu 				ip6_print_replace_route_err(&rt6_nh_list);
42776b9ea5a6SRoopa Prabhu 			err_nh = nh;
42786b9ea5a6SRoopa Prabhu 			goto add_errout;
42796b9ea5a6SRoopa Prabhu 		}
42806b9ea5a6SRoopa Prabhu 
42811a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
428227596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
428327596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
428427596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
428527596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
428627596472SMichal Kubeček 		 * be added to it.
42871a72418bSNicolas Dichtel 		 */
428827596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
428927596472SMichal Kubeček 						     NLM_F_REPLACE);
42906b9ea5a6SRoopa Prabhu 		nhn++;
42916b9ea5a6SRoopa Prabhu 	}
42926b9ea5a6SRoopa Prabhu 
42933b1137feSDavid Ahern 	/* success ... tell user about new route */
42943b1137feSDavid Ahern 	ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
42956b9ea5a6SRoopa Prabhu 	goto cleanup;
42966b9ea5a6SRoopa Prabhu 
42976b9ea5a6SRoopa Prabhu add_errout:
42983b1137feSDavid Ahern 	/* send notification for routes that were added so that
42993b1137feSDavid Ahern 	 * the delete notifications sent by ip6_route_del are
43003b1137feSDavid Ahern 	 * coherent
43013b1137feSDavid Ahern 	 */
43023b1137feSDavid Ahern 	if (rt_notif)
43033b1137feSDavid Ahern 		ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
43043b1137feSDavid Ahern 
43056b9ea5a6SRoopa Prabhu 	/* Delete routes that were already added */
43066b9ea5a6SRoopa Prabhu 	list_for_each_entry(nh, &rt6_nh_list, next) {
43076b9ea5a6SRoopa Prabhu 		if (err_nh == nh)
43086b9ea5a6SRoopa Prabhu 			break;
4309333c4301SDavid Ahern 		ip6_route_del(&nh->r_cfg, extack);
43106b9ea5a6SRoopa Prabhu 	}
43116b9ea5a6SRoopa Prabhu 
43126b9ea5a6SRoopa Prabhu cleanup:
43136b9ea5a6SRoopa Prabhu 	list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
4314587fea74SWei Wang 		if (nh->rt6_info)
4315587fea74SWei Wang 			dst_release_immediate(&nh->rt6_info->dst);
43166b9ea5a6SRoopa Prabhu 		list_del(&nh->next);
43176b9ea5a6SRoopa Prabhu 		kfree(nh);
43186b9ea5a6SRoopa Prabhu 	}
43196b9ea5a6SRoopa Prabhu 
43206b9ea5a6SRoopa Prabhu 	return err;
43216b9ea5a6SRoopa Prabhu }
43226b9ea5a6SRoopa Prabhu 
4323333c4301SDavid Ahern static int ip6_route_multipath_del(struct fib6_config *cfg,
4324333c4301SDavid Ahern 				   struct netlink_ext_ack *extack)
43256b9ea5a6SRoopa Prabhu {
43266b9ea5a6SRoopa Prabhu 	struct fib6_config r_cfg;
43276b9ea5a6SRoopa Prabhu 	struct rtnexthop *rtnh;
43286b9ea5a6SRoopa Prabhu 	int remaining;
43296b9ea5a6SRoopa Prabhu 	int attrlen;
43306b9ea5a6SRoopa Prabhu 	int err = 1, last_err = 0;
43316b9ea5a6SRoopa Prabhu 
43326b9ea5a6SRoopa Prabhu 	remaining = cfg->fc_mp_len;
43336b9ea5a6SRoopa Prabhu 	rtnh = (struct rtnexthop *)cfg->fc_mp;
43346b9ea5a6SRoopa Prabhu 
43356b9ea5a6SRoopa Prabhu 	/* Parse a Multipath Entry */
43366b9ea5a6SRoopa Prabhu 	while (rtnh_ok(rtnh, remaining)) {
43376b9ea5a6SRoopa Prabhu 		memcpy(&r_cfg, cfg, sizeof(*cfg));
43386b9ea5a6SRoopa Prabhu 		if (rtnh->rtnh_ifindex)
43396b9ea5a6SRoopa Prabhu 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
43406b9ea5a6SRoopa Prabhu 
43416b9ea5a6SRoopa Prabhu 		attrlen = rtnh_attrlen(rtnh);
43426b9ea5a6SRoopa Prabhu 		if (attrlen > 0) {
43436b9ea5a6SRoopa Prabhu 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
43446b9ea5a6SRoopa Prabhu 
43456b9ea5a6SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
43466b9ea5a6SRoopa Prabhu 			if (nla) {
43476b9ea5a6SRoopa Prabhu 				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
43486b9ea5a6SRoopa Prabhu 				r_cfg.fc_flags |= RTF_GATEWAY;
43496b9ea5a6SRoopa Prabhu 			}
43506b9ea5a6SRoopa Prabhu 		}
4351333c4301SDavid Ahern 		err = ip6_route_del(&r_cfg, extack);
43526b9ea5a6SRoopa Prabhu 		if (err)
43536b9ea5a6SRoopa Prabhu 			last_err = err;
43546b9ea5a6SRoopa Prabhu 
435551ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
435651ebd318SNicolas Dichtel 	}
435751ebd318SNicolas Dichtel 
435851ebd318SNicolas Dichtel 	return last_err;
435951ebd318SNicolas Dichtel }
436051ebd318SNicolas Dichtel 
4361c21ef3e3SDavid Ahern static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4362c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
43631da177e4SLinus Torvalds {
436486872cb5SThomas Graf 	struct fib6_config cfg;
436586872cb5SThomas Graf 	int err;
43661da177e4SLinus Torvalds 
4367333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
436886872cb5SThomas Graf 	if (err < 0)
436986872cb5SThomas Graf 		return err;
437086872cb5SThomas Graf 
437151ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4372333c4301SDavid Ahern 		return ip6_route_multipath_del(&cfg, extack);
43730ae81335SDavid Ahern 	else {
43740ae81335SDavid Ahern 		cfg.fc_delete_all_nh = 1;
4375333c4301SDavid Ahern 		return ip6_route_del(&cfg, extack);
43761da177e4SLinus Torvalds 	}
43770ae81335SDavid Ahern }
43781da177e4SLinus Torvalds 
4379c21ef3e3SDavid Ahern static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4380c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
43811da177e4SLinus Torvalds {
438286872cb5SThomas Graf 	struct fib6_config cfg;
438386872cb5SThomas Graf 	int err;
43841da177e4SLinus Torvalds 
4385333c4301SDavid Ahern 	err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
438686872cb5SThomas Graf 	if (err < 0)
438786872cb5SThomas Graf 		return err;
438886872cb5SThomas Graf 
438951ebd318SNicolas Dichtel 	if (cfg.fc_mp)
4390333c4301SDavid Ahern 		return ip6_route_multipath_add(&cfg, extack);
439151ebd318SNicolas Dichtel 	else
4392333c4301SDavid Ahern 		return ip6_route_add(&cfg, extack);
43931da177e4SLinus Torvalds }
43941da177e4SLinus Torvalds 
4395beb1afacSDavid Ahern static size_t rt6_nlmsg_size(struct rt6_info *rt)
4396339bf98fSThomas Graf {
4397beb1afacSDavid Ahern 	int nexthop_len = 0;
4398beb1afacSDavid Ahern 
4399beb1afacSDavid Ahern 	if (rt->rt6i_nsiblings) {
4400beb1afacSDavid Ahern 		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
4401beb1afacSDavid Ahern 			    + NLA_ALIGN(sizeof(struct rtnexthop))
4402beb1afacSDavid Ahern 			    + nla_total_size(16) /* RTA_GATEWAY */
44035e670d84SDavid Ahern 			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
4404beb1afacSDavid Ahern 
4405beb1afacSDavid Ahern 		nexthop_len *= rt->rt6i_nsiblings;
4406beb1afacSDavid Ahern 	}
4407beb1afacSDavid Ahern 
4408339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
4409339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
4410339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
4411339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
4412339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
4413339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
4414339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
4415339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
4416339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
44176a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
4418ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
4419c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
442019e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
44215e670d84SDavid Ahern 	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
4422beb1afacSDavid Ahern 	       + nexthop_len;
4423beb1afacSDavid Ahern }
4424beb1afacSDavid Ahern 
4425beb1afacSDavid Ahern static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
44265be083ceSDavid Ahern 			    unsigned int *flags, bool skip_oif)
4427beb1afacSDavid Ahern {
44285e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
4429f9d882eaSIdo Schimmel 		*flags |= RTNH_F_DEAD;
4430f9d882eaSIdo Schimmel 
44315e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
4432beb1afacSDavid Ahern 		*flags |= RTNH_F_LINKDOWN;
4433beb1afacSDavid Ahern 		if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4434beb1afacSDavid Ahern 			*flags |= RTNH_F_DEAD;
4435beb1afacSDavid Ahern 	}
4436beb1afacSDavid Ahern 
4437beb1afacSDavid Ahern 	if (rt->rt6i_flags & RTF_GATEWAY) {
44385e670d84SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
4439beb1afacSDavid Ahern 			goto nla_put_failure;
4440beb1afacSDavid Ahern 	}
4441beb1afacSDavid Ahern 
44425e670d84SDavid Ahern 	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
44435e670d84SDavid Ahern 	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
444461e4d01eSIdo Schimmel 		*flags |= RTNH_F_OFFLOAD;
444561e4d01eSIdo Schimmel 
44465be083ceSDavid Ahern 	/* not needed for multipath encoding b/c it has a rtnexthop struct */
44475e670d84SDavid Ahern 	if (!skip_oif && rt->fib6_nh.nh_dev &&
44485e670d84SDavid Ahern 	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
4449beb1afacSDavid Ahern 		goto nla_put_failure;
4450beb1afacSDavid Ahern 
44515e670d84SDavid Ahern 	if (rt->fib6_nh.nh_lwtstate &&
44525e670d84SDavid Ahern 	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
4453beb1afacSDavid Ahern 		goto nla_put_failure;
4454beb1afacSDavid Ahern 
4455beb1afacSDavid Ahern 	return 0;
4456beb1afacSDavid Ahern 
4457beb1afacSDavid Ahern nla_put_failure:
4458beb1afacSDavid Ahern 	return -EMSGSIZE;
4459beb1afacSDavid Ahern }
4460beb1afacSDavid Ahern 
44615be083ceSDavid Ahern /* add multipath next hop */
4462beb1afacSDavid Ahern static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4463beb1afacSDavid Ahern {
44645e670d84SDavid Ahern 	const struct net_device *dev = rt->fib6_nh.nh_dev;
4465beb1afacSDavid Ahern 	struct rtnexthop *rtnh;
4466beb1afacSDavid Ahern 	unsigned int flags = 0;
4467beb1afacSDavid Ahern 
4468beb1afacSDavid Ahern 	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4469beb1afacSDavid Ahern 	if (!rtnh)
4470beb1afacSDavid Ahern 		goto nla_put_failure;
4471beb1afacSDavid Ahern 
44725e670d84SDavid Ahern 	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
44735e670d84SDavid Ahern 	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
4474beb1afacSDavid Ahern 
44755be083ceSDavid Ahern 	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
4476beb1afacSDavid Ahern 		goto nla_put_failure;
4477beb1afacSDavid Ahern 
4478beb1afacSDavid Ahern 	rtnh->rtnh_flags = flags;
4479beb1afacSDavid Ahern 
4480beb1afacSDavid Ahern 	/* length of rtnetlink header + attributes */
4481beb1afacSDavid Ahern 	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4482beb1afacSDavid Ahern 
4483beb1afacSDavid Ahern 	return 0;
4484beb1afacSDavid Ahern 
4485beb1afacSDavid Ahern nla_put_failure:
4486beb1afacSDavid Ahern 	return -EMSGSIZE;
4487339bf98fSThomas Graf }
4488339bf98fSThomas Graf 
4489*d4ead6b3SDavid Ahern static int rt6_fill_node(struct net *net, struct sk_buff *skb,
4490*d4ead6b3SDavid Ahern 			 struct rt6_info *rt, struct dst_entry *dst,
4491*d4ead6b3SDavid Ahern 			 struct in6_addr *dest, struct in6_addr *src,
449215e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
4493f8cfe2ceSDavid Ahern 			 unsigned int flags)
44941da177e4SLinus Torvalds {
44951da177e4SLinus Torvalds 	struct rtmsg *rtm;
44961da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
4497*d4ead6b3SDavid Ahern 	long expires = 0;
4498*d4ead6b3SDavid Ahern 	u32 *pmetrics;
44999e762a4aSPatrick McHardy 	u32 table;
45001da177e4SLinus Torvalds 
450115e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
450238308473SDavid S. Miller 	if (!nlh)
450326932566SPatrick McHardy 		return -EMSGSIZE;
45042d7202bfSThomas Graf 
45052d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
45061da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
45071da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
45081da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
45091da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
4510c71099acSThomas Graf 	if (rt->rt6i_table)
45119e762a4aSPatrick McHardy 		table = rt->rt6i_table->tb6_id;
4512c71099acSThomas Graf 	else
45139e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
45149e762a4aSPatrick McHardy 	rtm->rtm_table = table;
4515c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
4516c78679e8SDavid S. Miller 		goto nla_put_failure;
4517e8478e80SDavid Ahern 
4518e8478e80SDavid Ahern 	rtm->rtm_type = rt->fib6_type;
45191da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
45201da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
45211da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
45221da177e4SLinus Torvalds 
45231da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE)
45241da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
45251da177e4SLinus Torvalds 
4526*d4ead6b3SDavid Ahern 	if (dest) {
4527*d4ead6b3SDavid Ahern 		if (nla_put_in6_addr(skb, RTA_DST, dest))
4528c78679e8SDavid S. Miller 			goto nla_put_failure;
45291da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
45301da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
4531930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
4532c78679e8SDavid S. Miller 			goto nla_put_failure;
45331da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
45341da177e4SLinus Torvalds 	if (src) {
4535930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
4536c78679e8SDavid S. Miller 			goto nla_put_failure;
45371da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
4538c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
4539930345eaSJiri Benc 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
4540c78679e8SDavid S. Miller 		goto nla_put_failure;
45411da177e4SLinus Torvalds #endif
45427bc570c8SYOSHIFUJI Hideaki 	if (iif) {
45437bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
45447bc570c8SYOSHIFUJI Hideaki 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
4545fd61c6baSDavid Ahern 			int err = ip6mr_get_route(net, skb, rtm, portid);
45462cf75070SNikolay Aleksandrov 
45477bc570c8SYOSHIFUJI Hideaki 			if (err == 0)
45487bc570c8SYOSHIFUJI Hideaki 				return 0;
4549fd61c6baSDavid Ahern 			if (err < 0)
45507bc570c8SYOSHIFUJI Hideaki 				goto nla_put_failure;
45517bc570c8SYOSHIFUJI Hideaki 		} else
45527bc570c8SYOSHIFUJI Hideaki #endif
4553c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
4554c78679e8SDavid S. Miller 				goto nla_put_failure;
4555*d4ead6b3SDavid Ahern 	} else if (dest) {
45561da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
4557*d4ead6b3SDavid Ahern 		if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
4558930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4559c78679e8SDavid S. Miller 			goto nla_put_failure;
4560c3968a85SDaniel Walter 	}
4561c3968a85SDaniel Walter 
4562c3968a85SDaniel Walter 	if (rt->rt6i_prefsrc.plen) {
4563c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
45644e3fd7a0SAlexey Dobriyan 		saddr_buf = rt->rt6i_prefsrc.addr;
4565930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
4566c78679e8SDavid S. Miller 			goto nla_put_failure;
45671da177e4SLinus Torvalds 	}
45682d7202bfSThomas Graf 
4569*d4ead6b3SDavid Ahern 	pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4570*d4ead6b3SDavid Ahern 	if (rtnetlink_put_metrics(skb, pmetrics) < 0)
45712d7202bfSThomas Graf 		goto nla_put_failure;
45722d7202bfSThomas Graf 
4573beb1afacSDavid Ahern 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4574beb1afacSDavid Ahern 		goto nla_put_failure;
4575beb1afacSDavid Ahern 
4576beb1afacSDavid Ahern 	/* For multipath routes, walk the siblings list and add
4577beb1afacSDavid Ahern 	 * each as a nexthop within RTA_MULTIPATH.
4578beb1afacSDavid Ahern 	 */
4579beb1afacSDavid Ahern 	if (rt->rt6i_nsiblings) {
4580beb1afacSDavid Ahern 		struct rt6_info *sibling, *next_sibling;
4581beb1afacSDavid Ahern 		struct nlattr *mp;
4582beb1afacSDavid Ahern 
4583beb1afacSDavid Ahern 		mp = nla_nest_start(skb, RTA_MULTIPATH);
4584beb1afacSDavid Ahern 		if (!mp)
4585beb1afacSDavid Ahern 			goto nla_put_failure;
4586beb1afacSDavid Ahern 
4587beb1afacSDavid Ahern 		if (rt6_add_nexthop(skb, rt) < 0)
4588beb1afacSDavid Ahern 			goto nla_put_failure;
4589beb1afacSDavid Ahern 
4590beb1afacSDavid Ahern 		list_for_each_entry_safe(sibling, next_sibling,
4591beb1afacSDavid Ahern 					 &rt->rt6i_siblings, rt6i_siblings) {
4592beb1afacSDavid Ahern 			if (rt6_add_nexthop(skb, sibling) < 0)
459394f826b8SEric Dumazet 				goto nla_put_failure;
459494f826b8SEric Dumazet 		}
45952d7202bfSThomas Graf 
4596beb1afacSDavid Ahern 		nla_nest_end(skb, mp);
4597beb1afacSDavid Ahern 	} else {
45985be083ceSDavid Ahern 		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
4599c78679e8SDavid S. Miller 			goto nla_put_failure;
4600beb1afacSDavid Ahern 	}
46018253947eSLi Wei 
4602*d4ead6b3SDavid Ahern 	if (rt->rt6i_flags & RTF_EXPIRES && dst)
4603*d4ead6b3SDavid Ahern 		expires = dst->expires - jiffies;
460469cdf8f9SYOSHIFUJI Hideaki 
4605*d4ead6b3SDavid Ahern 	if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
4606e3703b3dSThomas Graf 		goto nla_put_failure;
46071da177e4SLinus Torvalds 
4608c78ba6d6SLubomir Rintel 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4609c78ba6d6SLubomir Rintel 		goto nla_put_failure;
4610c78ba6d6SLubomir Rintel 
461119e42e45SRoopa Prabhu 
4612053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
4613053c095aSJohannes Berg 	return 0;
46142d7202bfSThomas Graf 
46152d7202bfSThomas Graf nla_put_failure:
461626932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
461726932566SPatrick McHardy 	return -EMSGSIZE;
46181da177e4SLinus Torvalds }
46191da177e4SLinus Torvalds 
46201b43af54SPatrick McHardy int rt6_dump_route(struct rt6_info *rt, void *p_arg)
46211da177e4SLinus Torvalds {
46221da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
46231f17e2f2SDavid Ahern 	struct net *net = arg->net;
46241f17e2f2SDavid Ahern 
46251f17e2f2SDavid Ahern 	if (rt == net->ipv6.ip6_null_entry)
46261f17e2f2SDavid Ahern 		return 0;
46271da177e4SLinus Torvalds 
46282d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
46292d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
4630f8cfe2ceSDavid Ahern 
4631f8cfe2ceSDavid Ahern 		/* user wants prefix routes only */
4632f8cfe2ceSDavid Ahern 		if (rtm->rtm_flags & RTM_F_PREFIX &&
4633f8cfe2ceSDavid Ahern 		    !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4634f8cfe2ceSDavid Ahern 			/* success since this is not a prefix route */
4635f8cfe2ceSDavid Ahern 			return 1;
4636f8cfe2ceSDavid Ahern 		}
4637f8cfe2ceSDavid Ahern 	}
46381da177e4SLinus Torvalds 
4639*d4ead6b3SDavid Ahern 	return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4640*d4ead6b3SDavid Ahern 			     RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4641*d4ead6b3SDavid Ahern 			     arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
46421da177e4SLinus Torvalds }
46431da177e4SLinus Torvalds 
4644c21ef3e3SDavid Ahern static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4645c21ef3e3SDavid Ahern 			      struct netlink_ext_ack *extack)
46461da177e4SLinus Torvalds {
46473b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
4648ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
464918c3a61cSRoopa Prabhu 	int err, iif = 0, oif = 0;
465018c3a61cSRoopa Prabhu 	struct dst_entry *dst;
46511da177e4SLinus Torvalds 	struct rt6_info *rt;
4652ab364a6fSThomas Graf 	struct sk_buff *skb;
4653ab364a6fSThomas Graf 	struct rtmsg *rtm;
46544c9483b2SDavid S. Miller 	struct flowi6 fl6;
465518c3a61cSRoopa Prabhu 	bool fibmatch;
4656ab364a6fSThomas Graf 
4657fceb6435SJohannes Berg 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4658c21ef3e3SDavid Ahern 			  extack);
4659ab364a6fSThomas Graf 	if (err < 0)
4660ab364a6fSThomas Graf 		goto errout;
4661ab364a6fSThomas Graf 
4662ab364a6fSThomas Graf 	err = -EINVAL;
46634c9483b2SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
466438b7097bSHannes Frederic Sowa 	rtm = nlmsg_data(nlh);
466538b7097bSHannes Frederic Sowa 	fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
466618c3a61cSRoopa Prabhu 	fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
4667ab364a6fSThomas Graf 
4668ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
4669ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4670ab364a6fSThomas Graf 			goto errout;
4671ab364a6fSThomas Graf 
46724e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
4673ab364a6fSThomas Graf 	}
4674ab364a6fSThomas Graf 
4675ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
4676ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4677ab364a6fSThomas Graf 			goto errout;
4678ab364a6fSThomas Graf 
46794e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
4680ab364a6fSThomas Graf 	}
4681ab364a6fSThomas Graf 
4682ab364a6fSThomas Graf 	if (tb[RTA_IIF])
4683ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
4684ab364a6fSThomas Graf 
4685ab364a6fSThomas Graf 	if (tb[RTA_OIF])
468672331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
4687ab364a6fSThomas Graf 
46882e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
46892e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
46902e47b291SLorenzo Colitti 
4691622ec2c9SLorenzo Colitti 	if (tb[RTA_UID])
4692622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = make_kuid(current_user_ns(),
4693622ec2c9SLorenzo Colitti 					   nla_get_u32(tb[RTA_UID]));
4694622ec2c9SLorenzo Colitti 	else
4695622ec2c9SLorenzo Colitti 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4696622ec2c9SLorenzo Colitti 
4697ab364a6fSThomas Graf 	if (iif) {
4698ab364a6fSThomas Graf 		struct net_device *dev;
469972331bc0SShmulik Ladkani 		int flags = 0;
470072331bc0SShmulik Ladkani 
4701121622dbSFlorian Westphal 		rcu_read_lock();
4702121622dbSFlorian Westphal 
4703121622dbSFlorian Westphal 		dev = dev_get_by_index_rcu(net, iif);
4704ab364a6fSThomas Graf 		if (!dev) {
4705121622dbSFlorian Westphal 			rcu_read_unlock();
4706ab364a6fSThomas Graf 			err = -ENODEV;
4707ab364a6fSThomas Graf 			goto errout;
4708ab364a6fSThomas Graf 		}
470972331bc0SShmulik Ladkani 
471072331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
471172331bc0SShmulik Ladkani 
471272331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
471372331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
471472331bc0SShmulik Ladkani 
4715b75cc8f9SDavid Ahern 		dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
4716121622dbSFlorian Westphal 
4717121622dbSFlorian Westphal 		rcu_read_unlock();
471872331bc0SShmulik Ladkani 	} else {
471972331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
472072331bc0SShmulik Ladkani 
472118c3a61cSRoopa Prabhu 		dst = ip6_route_output(net, NULL, &fl6);
472218c3a61cSRoopa Prabhu 	}
472318c3a61cSRoopa Prabhu 
472418c3a61cSRoopa Prabhu 
472518c3a61cSRoopa Prabhu 	rt = container_of(dst, struct rt6_info, dst);
472618c3a61cSRoopa Prabhu 	if (rt->dst.error) {
472718c3a61cSRoopa Prabhu 		err = rt->dst.error;
472818c3a61cSRoopa Prabhu 		ip6_rt_put(rt);
472918c3a61cSRoopa Prabhu 		goto errout;
4730ab364a6fSThomas Graf 	}
47311da177e4SLinus Torvalds 
47329d6acb3bSWANG Cong 	if (rt == net->ipv6.ip6_null_entry) {
47339d6acb3bSWANG Cong 		err = rt->dst.error;
47349d6acb3bSWANG Cong 		ip6_rt_put(rt);
47359d6acb3bSWANG Cong 		goto errout;
47369d6acb3bSWANG Cong 	}
47379d6acb3bSWANG Cong 
4738fba961abSDavid S. Miller 	if (fibmatch && rt->from) {
4739fba961abSDavid S. Miller 		struct rt6_info *ort = rt->from;
474058acfd71SIdo Schimmel 
474158acfd71SIdo Schimmel 		dst_hold(&ort->dst);
474258acfd71SIdo Schimmel 		ip6_rt_put(rt);
474358acfd71SIdo Schimmel 		rt = ort;
474458acfd71SIdo Schimmel 	}
474558acfd71SIdo Schimmel 
47461da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
474738308473SDavid S. Miller 	if (!skb) {
474894e187c0SAmerigo Wang 		ip6_rt_put(rt);
4749ab364a6fSThomas Graf 		err = -ENOBUFS;
4750ab364a6fSThomas Graf 		goto errout;
4751ab364a6fSThomas Graf 	}
47521da177e4SLinus Torvalds 
4753d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
475418c3a61cSRoopa Prabhu 	if (fibmatch)
4755*d4ead6b3SDavid Ahern 		err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, iif,
475618c3a61cSRoopa Prabhu 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
475718c3a61cSRoopa Prabhu 				    nlh->nlmsg_seq, 0);
475818c3a61cSRoopa Prabhu 	else
4759*d4ead6b3SDavid Ahern 		err = rt6_fill_node(net, skb, rt, dst, &fl6.daddr, &fl6.saddr,
4760*d4ead6b3SDavid Ahern 				    iif, RTM_NEWROUTE,
4761*d4ead6b3SDavid Ahern 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4762*d4ead6b3SDavid Ahern 				    0);
47631da177e4SLinus Torvalds 	if (err < 0) {
4764ab364a6fSThomas Graf 		kfree_skb(skb);
4765ab364a6fSThomas Graf 		goto errout;
47661da177e4SLinus Torvalds 	}
47671da177e4SLinus Torvalds 
476815e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
4769ab364a6fSThomas Graf errout:
47701da177e4SLinus Torvalds 	return err;
47711da177e4SLinus Torvalds }
47721da177e4SLinus Torvalds 
477337a1d361SRoopa Prabhu void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
477437a1d361SRoopa Prabhu 		     unsigned int nlm_flags)
47751da177e4SLinus Torvalds {
47761da177e4SLinus Torvalds 	struct sk_buff *skb;
47775578689aSDaniel Lezcano 	struct net *net = info->nl_net;
4778528c4cebSDenis V. Lunev 	u32 seq;
4779528c4cebSDenis V. Lunev 	int err;
47800d51aa80SJamal Hadi Salim 
4781528c4cebSDenis V. Lunev 	err = -ENOBUFS;
478238308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
478386872cb5SThomas Graf 
478419e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
478538308473SDavid S. Miller 	if (!skb)
478621713ebcSThomas Graf 		goto errout;
47871da177e4SLinus Torvalds 
4788*d4ead6b3SDavid Ahern 	err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4789f8cfe2ceSDavid Ahern 			    event, info->portid, seq, nlm_flags);
479026932566SPatrick McHardy 	if (err < 0) {
479126932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
479226932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
479326932566SPatrick McHardy 		kfree_skb(skb);
479426932566SPatrick McHardy 		goto errout;
479526932566SPatrick McHardy 	}
479615e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
47975578689aSDaniel Lezcano 		    info->nlh, gfp_any());
47981ce85fe4SPablo Neira Ayuso 	return;
479921713ebcSThomas Graf errout:
480021713ebcSThomas Graf 	if (err < 0)
48015578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
48021da177e4SLinus Torvalds }
48031da177e4SLinus Torvalds 
48048ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
4805351638e7SJiri Pirko 				unsigned long event, void *ptr)
48068ed67789SDaniel Lezcano {
4807351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
4808c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
48098ed67789SDaniel Lezcano 
4810242d3a49SWANG Cong 	if (!(dev->flags & IFF_LOOPBACK))
4811242d3a49SWANG Cong 		return NOTIFY_OK;
4812242d3a49SWANG Cong 
4813242d3a49SWANG Cong 	if (event == NETDEV_REGISTER) {
4814d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
48158ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
48168ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4817d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
48188ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
4819d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
48208ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
48218ed67789SDaniel Lezcano #endif
482276da0704SWANG Cong 	 } else if (event == NETDEV_UNREGISTER &&
482376da0704SWANG Cong 		    dev->reg_state != NETREG_UNREGISTERED) {
482476da0704SWANG Cong 		/* NETDEV_UNREGISTER could be fired for multiple times by
482576da0704SWANG Cong 		 * netdev_wait_allrefs(). Make sure we only call this once.
482676da0704SWANG Cong 		 */
482712d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
4828242d3a49SWANG Cong #ifdef CONFIG_IPV6_MULTIPLE_TABLES
482912d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
483012d94a80SEric Dumazet 		in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
4831242d3a49SWANG Cong #endif
48328ed67789SDaniel Lezcano 	}
48338ed67789SDaniel Lezcano 
48348ed67789SDaniel Lezcano 	return NOTIFY_OK;
48358ed67789SDaniel Lezcano }
48368ed67789SDaniel Lezcano 
48371da177e4SLinus Torvalds /*
48381da177e4SLinus Torvalds  *	/proc
48391da177e4SLinus Torvalds  */
48401da177e4SLinus Torvalds 
48411da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
48421da177e4SLinus Torvalds 
484333120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
484433120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
484533120b30SAlexey Dobriyan 	.read		= seq_read,
484633120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
48478d2ca1d7SHannes Frederic Sowa 	.release	= seq_release_net,
484833120b30SAlexey Dobriyan };
484933120b30SAlexey Dobriyan 
48501da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
48511da177e4SLinus Torvalds {
485269ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
48531da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
485469ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
485569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
485681eb8447SWei Wang 		   atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
485769ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
485869ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
4859fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
486069ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
48611da177e4SLinus Torvalds 
48621da177e4SLinus Torvalds 	return 0;
48631da177e4SLinus Torvalds }
48641da177e4SLinus Torvalds 
48651da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
48661da177e4SLinus Torvalds {
4867de05c557SPavel Emelyanov 	return single_open_net(inode, file, rt6_stats_seq_show);
486869ddb805SDaniel Lezcano }
486969ddb805SDaniel Lezcano 
48709a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
48711da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
48721da177e4SLinus Torvalds 	.read	 = seq_read,
48731da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
4874b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
48751da177e4SLinus Torvalds };
48761da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
48771da177e4SLinus Torvalds 
48781da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
48791da177e4SLinus Torvalds 
48801da177e4SLinus Torvalds static
4881fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
48821da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
48831da177e4SLinus Torvalds {
4884c486da34SLucian Adrian Grijincu 	struct net *net;
4885c486da34SLucian Adrian Grijincu 	int delay;
4886c486da34SLucian Adrian Grijincu 	if (!write)
4887c486da34SLucian Adrian Grijincu 		return -EINVAL;
4888c486da34SLucian Adrian Grijincu 
4889c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
4890c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
48918d65af78SAlexey Dobriyan 	proc_dointvec(ctl, write, buffer, lenp, ppos);
48922ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
48931da177e4SLinus Torvalds 	return 0;
48941da177e4SLinus Torvalds }
48951da177e4SLinus Torvalds 
4896fe2c6338SJoe Perches struct ctl_table ipv6_route_table_template[] = {
48971da177e4SLinus Torvalds 	{
48981da177e4SLinus Torvalds 		.procname	=	"flush",
48994990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
49001da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
490189c8b3a1SDave Jones 		.mode		=	0200,
49026d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
49031da177e4SLinus Torvalds 	},
49041da177e4SLinus Torvalds 	{
49051da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
49069a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
49071da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49081da177e4SLinus Torvalds 		.mode		=	0644,
49096d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
49101da177e4SLinus Torvalds 	},
49111da177e4SLinus Torvalds 	{
49121da177e4SLinus Torvalds 		.procname	=	"max_size",
49134990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
49141da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49151da177e4SLinus Torvalds 		.mode		=	0644,
49166d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
49171da177e4SLinus Torvalds 	},
49181da177e4SLinus Torvalds 	{
49191da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
49204990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
49211da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49221da177e4SLinus Torvalds 		.mode		=	0644,
49236d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49241da177e4SLinus Torvalds 	},
49251da177e4SLinus Torvalds 	{
49261da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
49274990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
49281da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49291da177e4SLinus Torvalds 		.mode		=	0644,
49306d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49311da177e4SLinus Torvalds 	},
49321da177e4SLinus Torvalds 	{
49331da177e4SLinus Torvalds 		.procname	=	"gc_interval",
49344990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
49351da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49361da177e4SLinus Torvalds 		.mode		=	0644,
49376d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49381da177e4SLinus Torvalds 	},
49391da177e4SLinus Torvalds 	{
49401da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
49414990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
49421da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49431da177e4SLinus Torvalds 		.mode		=	0644,
4944f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
49451da177e4SLinus Torvalds 	},
49461da177e4SLinus Torvalds 	{
49471da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
49484990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
49491da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49501da177e4SLinus Torvalds 		.mode		=	0644,
49516d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
49521da177e4SLinus Torvalds 	},
49531da177e4SLinus Torvalds 	{
49541da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
49554990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
49561da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49571da177e4SLinus Torvalds 		.mode		=	0644,
4958f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
49591da177e4SLinus Torvalds 	},
49601da177e4SLinus Torvalds 	{
49611da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
49624990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
49631da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
49641da177e4SLinus Torvalds 		.mode		=	0644,
49656d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
49661da177e4SLinus Torvalds 	},
4967f8572d8fSEric W. Biederman 	{ }
49681da177e4SLinus Torvalds };
49691da177e4SLinus Torvalds 
49702c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
4971760f2d01SDaniel Lezcano {
4972760f2d01SDaniel Lezcano 	struct ctl_table *table;
4973760f2d01SDaniel Lezcano 
4974760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
4975760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
4976760f2d01SDaniel Lezcano 			GFP_KERNEL);
49775ee09105SYOSHIFUJI Hideaki 
49785ee09105SYOSHIFUJI Hideaki 	if (table) {
49795ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
4980c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
498186393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
49825ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
49835ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
49845ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
49855ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
49865ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
49875ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
49885ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
49899c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
4990464dc801SEric W. Biederman 
4991464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
4992464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
4993464dc801SEric W. Biederman 			table[0].procname = NULL;
49945ee09105SYOSHIFUJI Hideaki 	}
49955ee09105SYOSHIFUJI Hideaki 
4996760f2d01SDaniel Lezcano 	return table;
4997760f2d01SDaniel Lezcano }
49981da177e4SLinus Torvalds #endif
49991da177e4SLinus Torvalds 
50002c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
5001cdb18761SDaniel Lezcano {
5002633d424bSPavel Emelyanov 	int ret = -ENOMEM;
50038ed67789SDaniel Lezcano 
500486393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
500586393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
5006f2fc6a54SBenjamin Thery 
5007fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5008fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
5009fc66f95cSEric Dumazet 
50108ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
50118ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
50128ed67789SDaniel Lezcano 					   GFP_KERNEL);
50138ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
5014fc66f95cSEric Dumazet 		goto out_ip6_dst_entries;
5015d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
501662fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
501762fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
50188ed67789SDaniel Lezcano 
50198ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5020feca7d8cSVincent Bernat 	net->ipv6.fib6_has_custom_rules = false;
50218ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
50228ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
50238ed67789SDaniel Lezcano 					       GFP_KERNEL);
502468fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
502568fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
5026d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
502762fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
502862fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
50298ed67789SDaniel Lezcano 
50308ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
50318ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
50328ed67789SDaniel Lezcano 					       GFP_KERNEL);
503368fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
503468fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
5035d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
503662fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
503762fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
50388ed67789SDaniel Lezcano #endif
50398ed67789SDaniel Lezcano 
5040b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
5041b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
5042b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5043b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5044b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5045b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5046b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5047b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5048b339a47cSPeter Zijlstra 
50496891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
50506891a346SBenjamin Thery 
50518ed67789SDaniel Lezcano 	ret = 0;
50528ed67789SDaniel Lezcano out:
50538ed67789SDaniel Lezcano 	return ret;
5054f2fc6a54SBenjamin Thery 
505568fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
505668fffc67SPeter Zijlstra out_ip6_prohibit_entry:
505768fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
505868fffc67SPeter Zijlstra out_ip6_null_entry:
505968fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
506068fffc67SPeter Zijlstra #endif
5061fc66f95cSEric Dumazet out_ip6_dst_entries:
5062fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5063f2fc6a54SBenjamin Thery out_ip6_dst_ops:
5064f2fc6a54SBenjamin Thery 	goto out;
5065cdb18761SDaniel Lezcano }
5066cdb18761SDaniel Lezcano 
50672c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
5068cdb18761SDaniel Lezcano {
50698ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
50708ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
50718ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
50728ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
50738ed67789SDaniel Lezcano #endif
507441bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
5075cdb18761SDaniel Lezcano }
5076cdb18761SDaniel Lezcano 
5077d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
5078d189634eSThomas Graf {
5079d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5080d4beaa66SGao feng 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
5081d6444062SJoe Perches 	proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
5082d189634eSThomas Graf #endif
5083d189634eSThomas Graf 	return 0;
5084d189634eSThomas Graf }
5085d189634eSThomas Graf 
5086d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
5087d189634eSThomas Graf {
5088d189634eSThomas Graf #ifdef CONFIG_PROC_FS
5089ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
5090ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
5091d189634eSThomas Graf #endif
5092d189634eSThomas Graf }
5093d189634eSThomas Graf 
5094cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
5095cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
5096cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
5097cdb18761SDaniel Lezcano };
5098cdb18761SDaniel Lezcano 
5099c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
5100c3426b47SDavid S. Miller {
5101c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5102c3426b47SDavid S. Miller 
5103c3426b47SDavid S. Miller 	if (!bp)
5104c3426b47SDavid S. Miller 		return -ENOMEM;
5105c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
5106c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
5107c3426b47SDavid S. Miller 	return 0;
5108c3426b47SDavid S. Miller }
5109c3426b47SDavid S. Miller 
5110c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
5111c3426b47SDavid S. Miller {
5112c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
5113c3426b47SDavid S. Miller 
5114c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
511556a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
5116c3426b47SDavid S. Miller 	kfree(bp);
5117c3426b47SDavid S. Miller }
5118c3426b47SDavid S. Miller 
51192b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
5120c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
5121c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
5122c3426b47SDavid S. Miller };
5123c3426b47SDavid S. Miller 
5124d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
5125d189634eSThomas Graf 	.init = ip6_route_net_init_late,
5126d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
5127d189634eSThomas Graf };
5128d189634eSThomas Graf 
51298ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
51308ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
5131242d3a49SWANG Cong 	.priority = ADDRCONF_NOTIFY_PRIORITY - 10,
51328ed67789SDaniel Lezcano };
51338ed67789SDaniel Lezcano 
51342f460933SWANG Cong void __init ip6_route_init_special_entries(void)
51352f460933SWANG Cong {
51362f460933SWANG Cong 	/* Registering of the loopback is done before this portion of code,
51372f460933SWANG Cong 	 * the loopback reference in rt6_info will not be taken, do it
51382f460933SWANG Cong 	 * manually for init_net */
51392f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
51402f460933SWANG Cong 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
51412f460933SWANG Cong   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
51422f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
51432f460933SWANG Cong 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
51442f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
51452f460933SWANG Cong 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
51462f460933SWANG Cong   #endif
51472f460933SWANG Cong }
51482f460933SWANG Cong 
5149433d49c3SDaniel Lezcano int __init ip6_route_init(void)
51501da177e4SLinus Torvalds {
5151433d49c3SDaniel Lezcano 	int ret;
51528d0b94afSMartin KaFai Lau 	int cpu;
5153433d49c3SDaniel Lezcano 
51549a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
51559a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
51569a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
51579a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
51589a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
5159c19a28e1SFernando Carrijo 		goto out;
516014e50e57SDavid S. Miller 
5161fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
51628ed67789SDaniel Lezcano 	if (ret)
5163bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
5164bdb3289fSDaniel Lezcano 
5165c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5166c3426b47SDavid S. Miller 	if (ret)
5167e8803b6cSDavid S. Miller 		goto out_dst_entries;
51682a0c451aSThomas Graf 
51697e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
51707e52b33bSDavid S. Miller 	if (ret)
51717e52b33bSDavid S. Miller 		goto out_register_inetpeer;
5172c3426b47SDavid S. Miller 
51735dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
51745dc121e9SArnaud Ebalard 
5175e8803b6cSDavid S. Miller 	ret = fib6_init();
5176433d49c3SDaniel Lezcano 	if (ret)
51778ed67789SDaniel Lezcano 		goto out_register_subsys;
5178433d49c3SDaniel Lezcano 
5179433d49c3SDaniel Lezcano 	ret = xfrm6_init();
5180433d49c3SDaniel Lezcano 	if (ret)
5181e8803b6cSDavid S. Miller 		goto out_fib6_init;
5182c35b7e72SDaniel Lezcano 
5183433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
5184433d49c3SDaniel Lezcano 	if (ret)
5185433d49c3SDaniel Lezcano 		goto xfrm6_init;
51867e5449c2SDaniel Lezcano 
5187d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
5188d189634eSThomas Graf 	if (ret)
5189d189634eSThomas Graf 		goto fib6_rules_init;
5190d189634eSThomas Graf 
519116feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
519216feebcfSFlorian Westphal 				   inet6_rtm_newroute, NULL, 0);
519316feebcfSFlorian Westphal 	if (ret < 0)
519416feebcfSFlorian Westphal 		goto out_register_late_subsys;
519516feebcfSFlorian Westphal 
519616feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
519716feebcfSFlorian Westphal 				   inet6_rtm_delroute, NULL, 0);
519816feebcfSFlorian Westphal 	if (ret < 0)
519916feebcfSFlorian Westphal 		goto out_register_late_subsys;
520016feebcfSFlorian Westphal 
520116feebcfSFlorian Westphal 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
520216feebcfSFlorian Westphal 				   inet6_rtm_getroute, NULL,
520316feebcfSFlorian Westphal 				   RTNL_FLAG_DOIT_UNLOCKED);
520416feebcfSFlorian Westphal 	if (ret < 0)
5205d189634eSThomas Graf 		goto out_register_late_subsys;
5206433d49c3SDaniel Lezcano 
52078ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
5208cdb18761SDaniel Lezcano 	if (ret)
5209d189634eSThomas Graf 		goto out_register_late_subsys;
52108ed67789SDaniel Lezcano 
52118d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
52128d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
52138d0b94afSMartin KaFai Lau 
52148d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
52158d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
52168d0b94afSMartin KaFai Lau 	}
52178d0b94afSMartin KaFai Lau 
5218433d49c3SDaniel Lezcano out:
5219433d49c3SDaniel Lezcano 	return ret;
5220433d49c3SDaniel Lezcano 
5221d189634eSThomas Graf out_register_late_subsys:
522216feebcfSFlorian Westphal 	rtnl_unregister_all(PF_INET6);
5223d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5224433d49c3SDaniel Lezcano fib6_rules_init:
5225433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
5226433d49c3SDaniel Lezcano xfrm6_init:
5227433d49c3SDaniel Lezcano 	xfrm6_fini();
52282a0c451aSThomas Graf out_fib6_init:
52292a0c451aSThomas Graf 	fib6_gc_cleanup();
52308ed67789SDaniel Lezcano out_register_subsys:
52318ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
52327e52b33bSDavid S. Miller out_register_inetpeer:
52337e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
5234fc66f95cSEric Dumazet out_dst_entries:
5235fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5236433d49c3SDaniel Lezcano out_kmem_cache:
5237f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
5238433d49c3SDaniel Lezcano 	goto out;
52391da177e4SLinus Torvalds }
52401da177e4SLinus Torvalds 
52411da177e4SLinus Torvalds void ip6_route_cleanup(void)
52421da177e4SLinus Torvalds {
52438ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
5244d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
5245101367c2SThomas Graf 	fib6_rules_cleanup();
52461da177e4SLinus Torvalds 	xfrm6_fini();
52471da177e4SLinus Torvalds 	fib6_gc_cleanup();
5248c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
52498ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
525041bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
5251f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
52521da177e4SLinus Torvalds }
5253