xref: /openbmc/linux/net/ipv6/route.c (revision 19e42e45150672124b6a4341e2bc7982d247f0ac)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  *	Linux INET6 implementation
31da177e4SLinus Torvalds  *	FIB front-end.
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  *	Authors:
61da177e4SLinus Torvalds  *	Pedro Roque		<roque@di.fc.ul.pt>
71da177e4SLinus Torvalds  *
81da177e4SLinus Torvalds  *	This program is free software; you can redistribute it and/or
91da177e4SLinus Torvalds  *      modify it under the terms of the GNU General Public License
101da177e4SLinus Torvalds  *      as published by the Free Software Foundation; either version
111da177e4SLinus Torvalds  *      2 of the License, or (at your option) any later version.
121da177e4SLinus Torvalds  */
131da177e4SLinus Torvalds 
141da177e4SLinus Torvalds /*	Changes:
151da177e4SLinus Torvalds  *
161da177e4SLinus Torvalds  *	YOSHIFUJI Hideaki @USAGI
171da177e4SLinus Torvalds  *		reworked default router selection.
181da177e4SLinus Torvalds  *		- respect outgoing interface
191da177e4SLinus Torvalds  *		- select from (probably) reachable routers (i.e.
201da177e4SLinus Torvalds  *		routers in REACHABLE, STALE, DELAY or PROBE states).
211da177e4SLinus Torvalds  *		- always select the same router if it is (probably)
221da177e4SLinus Torvalds  *		reachable.  otherwise, round-robin the list.
23c0bece9fSYOSHIFUJI Hideaki  *	Ville Nuorvala
24c0bece9fSYOSHIFUJI Hideaki  *		Fixed routing subtrees.
251da177e4SLinus Torvalds  */
261da177e4SLinus Torvalds 
27f3213831SJoe Perches #define pr_fmt(fmt) "IPv6: " fmt
28f3213831SJoe Perches 
294fc268d2SRandy Dunlap #include <linux/capability.h>
301da177e4SLinus Torvalds #include <linux/errno.h>
31bc3b2d7fSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/types.h>
331da177e4SLinus Torvalds #include <linux/times.h>
341da177e4SLinus Torvalds #include <linux/socket.h>
351da177e4SLinus Torvalds #include <linux/sockios.h>
361da177e4SLinus Torvalds #include <linux/net.h>
371da177e4SLinus Torvalds #include <linux/route.h>
381da177e4SLinus Torvalds #include <linux/netdevice.h>
391da177e4SLinus Torvalds #include <linux/in6.h>
407bc570c8SYOSHIFUJI Hideaki #include <linux/mroute6.h>
411da177e4SLinus Torvalds #include <linux/init.h>
421da177e4SLinus Torvalds #include <linux/if_arp.h>
431da177e4SLinus Torvalds #include <linux/proc_fs.h>
441da177e4SLinus Torvalds #include <linux/seq_file.h>
455b7c931dSDaniel Lezcano #include <linux/nsproxy.h>
465a0e3ad6STejun Heo #include <linux/slab.h>
47457c4cbcSEric W. Biederman #include <net/net_namespace.h>
481da177e4SLinus Torvalds #include <net/snmp.h>
491da177e4SLinus Torvalds #include <net/ipv6.h>
501da177e4SLinus Torvalds #include <net/ip6_fib.h>
511da177e4SLinus Torvalds #include <net/ip6_route.h>
521da177e4SLinus Torvalds #include <net/ndisc.h>
531da177e4SLinus Torvalds #include <net/addrconf.h>
541da177e4SLinus Torvalds #include <net/tcp.h>
551da177e4SLinus Torvalds #include <linux/rtnetlink.h>
561da177e4SLinus Torvalds #include <net/dst.h>
571da177e4SLinus Torvalds #include <net/xfrm.h>
588d71740cSTom Tucker #include <net/netevent.h>
5921713ebcSThomas Graf #include <net/netlink.h>
6051ebd318SNicolas Dichtel #include <net/nexthop.h>
61*19e42e45SRoopa Prabhu #include <net/lwtunnel.h>
621da177e4SLinus Torvalds 
631da177e4SLinus Torvalds #include <asm/uaccess.h>
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
661da177e4SLinus Torvalds #include <linux/sysctl.h>
671da177e4SLinus Torvalds #endif
681da177e4SLinus Torvalds 
69afc154e9SHannes Frederic Sowa enum rt6_nud_state {
707e980569SJiri Benc 	RT6_NUD_FAIL_HARD = -3,
717e980569SJiri Benc 	RT6_NUD_FAIL_PROBE = -2,
727e980569SJiri Benc 	RT6_NUD_FAIL_DO_RR = -1,
73afc154e9SHannes Frederic Sowa 	RT6_NUD_SUCCEED = 1
74afc154e9SHannes Frederic Sowa };
75afc154e9SHannes Frederic Sowa 
7683a09abdSMartin KaFai Lau static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
771da177e4SLinus Torvalds static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
780dbaee3bSDavid S. Miller static unsigned int	 ip6_default_advmss(const struct dst_entry *dst);
79ebb762f2SSteffen Klassert static unsigned int	 ip6_mtu(const struct dst_entry *dst);
801da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *);
811da177e4SLinus Torvalds static void		ip6_dst_destroy(struct dst_entry *);
821da177e4SLinus Torvalds static void		ip6_dst_ifdown(struct dst_entry *,
831da177e4SLinus Torvalds 				       struct net_device *dev, int how);
84569d3645SDaniel Lezcano static int		 ip6_dst_gc(struct dst_ops *ops);
851da177e4SLinus Torvalds 
861da177e4SLinus Torvalds static int		ip6_pkt_discard(struct sk_buff *skb);
87aad88724SEric Dumazet static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
887150aedeSKamala R static int		ip6_pkt_prohibit(struct sk_buff *skb);
89aad88724SEric Dumazet static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
901da177e4SLinus Torvalds static void		ip6_link_failure(struct sk_buff *skb);
916700c270SDavid S. Miller static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
926700c270SDavid S. Miller 					   struct sk_buff *skb, u32 mtu);
936700c270SDavid S. Miller static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
946700c270SDavid S. Miller 					struct sk_buff *skb);
954b32b5adSMartin KaFai Lau static void		rt6_dst_from_metrics_check(struct rt6_info *rt);
9652bd4c0cSNicolas Dichtel static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
971da177e4SLinus Torvalds 
9870ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
99efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
100b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
101b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex,
10295c96174SEric Dumazet 					   unsigned int pref);
103efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
104b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
105b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex);
10670ceb4f5SYOSHIFUJI Hideaki #endif
10770ceb4f5SYOSHIFUJI Hideaki 
1088d0b94afSMartin KaFai Lau struct uncached_list {
1098d0b94afSMartin KaFai Lau 	spinlock_t		lock;
1108d0b94afSMartin KaFai Lau 	struct list_head	head;
1118d0b94afSMartin KaFai Lau };
1128d0b94afSMartin KaFai Lau 
1138d0b94afSMartin KaFai Lau static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
1148d0b94afSMartin KaFai Lau 
1158d0b94afSMartin KaFai Lau static void rt6_uncached_list_add(struct rt6_info *rt)
1168d0b94afSMartin KaFai Lau {
1178d0b94afSMartin KaFai Lau 	struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
1188d0b94afSMartin KaFai Lau 
1198d0b94afSMartin KaFai Lau 	rt->dst.flags |= DST_NOCACHE;
1208d0b94afSMartin KaFai Lau 	rt->rt6i_uncached_list = ul;
1218d0b94afSMartin KaFai Lau 
1228d0b94afSMartin KaFai Lau 	spin_lock_bh(&ul->lock);
1238d0b94afSMartin KaFai Lau 	list_add_tail(&rt->rt6i_uncached, &ul->head);
1248d0b94afSMartin KaFai Lau 	spin_unlock_bh(&ul->lock);
1258d0b94afSMartin KaFai Lau }
1268d0b94afSMartin KaFai Lau 
1278d0b94afSMartin KaFai Lau static void rt6_uncached_list_del(struct rt6_info *rt)
1288d0b94afSMartin KaFai Lau {
1298d0b94afSMartin KaFai Lau 	if (!list_empty(&rt->rt6i_uncached)) {
1308d0b94afSMartin KaFai Lau 		struct uncached_list *ul = rt->rt6i_uncached_list;
1318d0b94afSMartin KaFai Lau 
1328d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1338d0b94afSMartin KaFai Lau 		list_del(&rt->rt6i_uncached);
1348d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1358d0b94afSMartin KaFai Lau 	}
1368d0b94afSMartin KaFai Lau }
1378d0b94afSMartin KaFai Lau 
1388d0b94afSMartin KaFai Lau static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
1398d0b94afSMartin KaFai Lau {
1408d0b94afSMartin KaFai Lau 	struct net_device *loopback_dev = net->loopback_dev;
1418d0b94afSMartin KaFai Lau 	int cpu;
1428d0b94afSMartin KaFai Lau 
1438d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
1448d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
1458d0b94afSMartin KaFai Lau 		struct rt6_info *rt;
1468d0b94afSMartin KaFai Lau 
1478d0b94afSMartin KaFai Lau 		spin_lock_bh(&ul->lock);
1488d0b94afSMartin KaFai Lau 		list_for_each_entry(rt, &ul->head, rt6i_uncached) {
1498d0b94afSMartin KaFai Lau 			struct inet6_dev *rt_idev = rt->rt6i_idev;
1508d0b94afSMartin KaFai Lau 			struct net_device *rt_dev = rt->dst.dev;
1518d0b94afSMartin KaFai Lau 
1528d0b94afSMartin KaFai Lau 			if (rt_idev && (rt_idev->dev == dev || !dev) &&
1538d0b94afSMartin KaFai Lau 			    rt_idev->dev != loopback_dev) {
1548d0b94afSMartin KaFai Lau 				rt->rt6i_idev = in6_dev_get(loopback_dev);
1558d0b94afSMartin KaFai Lau 				in6_dev_put(rt_idev);
1568d0b94afSMartin KaFai Lau 			}
1578d0b94afSMartin KaFai Lau 
1588d0b94afSMartin KaFai Lau 			if (rt_dev && (rt_dev == dev || !dev) &&
1598d0b94afSMartin KaFai Lau 			    rt_dev != loopback_dev) {
1608d0b94afSMartin KaFai Lau 				rt->dst.dev = loopback_dev;
1618d0b94afSMartin KaFai Lau 				dev_hold(rt->dst.dev);
1628d0b94afSMartin KaFai Lau 				dev_put(rt_dev);
1638d0b94afSMartin KaFai Lau 			}
1648d0b94afSMartin KaFai Lau 		}
1658d0b94afSMartin KaFai Lau 		spin_unlock_bh(&ul->lock);
1668d0b94afSMartin KaFai Lau 	}
1678d0b94afSMartin KaFai Lau }
1688d0b94afSMartin KaFai Lau 
169d52d3997SMartin KaFai Lau static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
170d52d3997SMartin KaFai Lau {
171d52d3997SMartin KaFai Lau 	return dst_metrics_write_ptr(rt->dst.from);
172d52d3997SMartin KaFai Lau }
173d52d3997SMartin KaFai Lau 
17406582540SDavid S. Miller static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
17506582540SDavid S. Miller {
17606582540SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *)dst;
17706582540SDavid S. Miller 
178d52d3997SMartin KaFai Lau 	if (rt->rt6i_flags & RTF_PCPU)
179d52d3997SMartin KaFai Lau 		return rt6_pcpu_cow_metrics(rt);
180d52d3997SMartin KaFai Lau 	else if (rt->rt6i_flags & RTF_CACHE)
1814b32b5adSMartin KaFai Lau 		return NULL;
1824b32b5adSMartin KaFai Lau 	else
1833b471175SMartin KaFai Lau 		return dst_cow_metrics_generic(dst, old);
18406582540SDavid S. Miller }
18506582540SDavid S. Miller 
186f894cbf8SDavid S. Miller static inline const void *choose_neigh_daddr(struct rt6_info *rt,
187f894cbf8SDavid S. Miller 					     struct sk_buff *skb,
188f894cbf8SDavid S. Miller 					     const void *daddr)
18939232973SDavid S. Miller {
19039232973SDavid S. Miller 	struct in6_addr *p = &rt->rt6i_gateway;
19139232973SDavid S. Miller 
192a7563f34SDavid S. Miller 	if (!ipv6_addr_any(p))
19339232973SDavid S. Miller 		return (const void *) p;
194f894cbf8SDavid S. Miller 	else if (skb)
195f894cbf8SDavid S. Miller 		return &ipv6_hdr(skb)->daddr;
19639232973SDavid S. Miller 	return daddr;
19739232973SDavid S. Miller }
19839232973SDavid S. Miller 
199f894cbf8SDavid S. Miller static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
200f894cbf8SDavid S. Miller 					  struct sk_buff *skb,
201f894cbf8SDavid S. Miller 					  const void *daddr)
202d3aaeb38SDavid S. Miller {
20339232973SDavid S. Miller 	struct rt6_info *rt = (struct rt6_info *) dst;
20439232973SDavid S. Miller 	struct neighbour *n;
20539232973SDavid S. Miller 
206f894cbf8SDavid S. Miller 	daddr = choose_neigh_daddr(rt, skb, daddr);
2078e022ee6SYOSHIFUJI Hideaki / 吉藤英明 	n = __ipv6_neigh_lookup(dst->dev, daddr);
208f83c7790SDavid S. Miller 	if (n)
209f83c7790SDavid S. Miller 		return n;
210f83c7790SDavid S. Miller 	return neigh_create(&nd_tbl, daddr, dst->dev);
211f83c7790SDavid S. Miller }
212f83c7790SDavid S. Miller 
2139a7ec3a9SDaniel Lezcano static struct dst_ops ip6_dst_ops_template = {
2141da177e4SLinus Torvalds 	.family			=	AF_INET6,
2151da177e4SLinus Torvalds 	.gc			=	ip6_dst_gc,
2161da177e4SLinus Torvalds 	.gc_thresh		=	1024,
2171da177e4SLinus Torvalds 	.check			=	ip6_dst_check,
2180dbaee3bSDavid S. Miller 	.default_advmss		=	ip6_default_advmss,
219ebb762f2SSteffen Klassert 	.mtu			=	ip6_mtu,
22006582540SDavid S. Miller 	.cow_metrics		=	ipv6_cow_metrics,
2211da177e4SLinus Torvalds 	.destroy		=	ip6_dst_destroy,
2221da177e4SLinus Torvalds 	.ifdown			=	ip6_dst_ifdown,
2231da177e4SLinus Torvalds 	.negative_advice	=	ip6_negative_advice,
2241da177e4SLinus Torvalds 	.link_failure		=	ip6_link_failure,
2251da177e4SLinus Torvalds 	.update_pmtu		=	ip6_rt_update_pmtu,
2266e157b6aSDavid S. Miller 	.redirect		=	rt6_do_redirect,
2271ac06e03SHerbert Xu 	.local_out		=	__ip6_local_out,
228d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ip6_neigh_lookup,
2291da177e4SLinus Torvalds };
2301da177e4SLinus Torvalds 
231ebb762f2SSteffen Klassert static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
232ec831ea7SRoland Dreier {
233618f9bc7SSteffen Klassert 	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
234618f9bc7SSteffen Klassert 
235618f9bc7SSteffen Klassert 	return mtu ? : dst->dev->mtu;
236ec831ea7SRoland Dreier }
237ec831ea7SRoland Dreier 
2386700c270SDavid S. Miller static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2396700c270SDavid S. Miller 					 struct sk_buff *skb, u32 mtu)
24014e50e57SDavid S. Miller {
24114e50e57SDavid S. Miller }
24214e50e57SDavid S. Miller 
2436700c270SDavid S. Miller static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2446700c270SDavid S. Miller 				      struct sk_buff *skb)
245b587ee3bSDavid S. Miller {
246b587ee3bSDavid S. Miller }
247b587ee3bSDavid S. Miller 
2480972ddb2SHeld Bernhard static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
2490972ddb2SHeld Bernhard 					 unsigned long old)
2500972ddb2SHeld Bernhard {
2510972ddb2SHeld Bernhard 	return NULL;
2520972ddb2SHeld Bernhard }
2530972ddb2SHeld Bernhard 
25414e50e57SDavid S. Miller static struct dst_ops ip6_dst_blackhole_ops = {
25514e50e57SDavid S. Miller 	.family			=	AF_INET6,
25614e50e57SDavid S. Miller 	.destroy		=	ip6_dst_destroy,
25714e50e57SDavid S. Miller 	.check			=	ip6_dst_check,
258ebb762f2SSteffen Klassert 	.mtu			=	ip6_blackhole_mtu,
259214f45c9SEric Dumazet 	.default_advmss		=	ip6_default_advmss,
26014e50e57SDavid S. Miller 	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
261b587ee3bSDavid S. Miller 	.redirect		=	ip6_rt_blackhole_redirect,
2620972ddb2SHeld Bernhard 	.cow_metrics		=	ip6_rt_blackhole_cow_metrics,
263d3aaeb38SDavid S. Miller 	.neigh_lookup		=	ip6_neigh_lookup,
26414e50e57SDavid S. Miller };
26514e50e57SDavid S. Miller 
26662fa8a84SDavid S. Miller static const u32 ip6_template_metrics[RTAX_MAX] = {
26714edd87dSLi RongQing 	[RTAX_HOPLIMIT - 1] = 0,
26862fa8a84SDavid S. Miller };
26962fa8a84SDavid S. Miller 
270fb0af4c7SEric Dumazet static const struct rt6_info ip6_null_entry_template = {
2711da177e4SLinus Torvalds 	.dst = {
2721da177e4SLinus Torvalds 		.__refcnt	= ATOMIC_INIT(1),
2731da177e4SLinus Torvalds 		.__use		= 1,
2742c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
2751da177e4SLinus Torvalds 		.error		= -ENETUNREACH,
2761da177e4SLinus Torvalds 		.input		= ip6_pkt_discard,
2771da177e4SLinus Torvalds 		.output		= ip6_pkt_discard_out,
2781da177e4SLinus Torvalds 	},
2791da177e4SLinus Torvalds 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
2804f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
2811da177e4SLinus Torvalds 	.rt6i_metric	= ~(u32) 0,
2821da177e4SLinus Torvalds 	.rt6i_ref	= ATOMIC_INIT(1),
2831da177e4SLinus Torvalds };
2841da177e4SLinus Torvalds 
285101367c2SThomas Graf #ifdef CONFIG_IPV6_MULTIPLE_TABLES
286101367c2SThomas Graf 
287fb0af4c7SEric Dumazet static const struct rt6_info ip6_prohibit_entry_template = {
288101367c2SThomas Graf 	.dst = {
289101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
290101367c2SThomas Graf 		.__use		= 1,
2912c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
292101367c2SThomas Graf 		.error		= -EACCES,
2939ce8ade0SThomas Graf 		.input		= ip6_pkt_prohibit,
2949ce8ade0SThomas Graf 		.output		= ip6_pkt_prohibit_out,
295101367c2SThomas Graf 	},
296101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
2974f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
298101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
299101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
300101367c2SThomas Graf };
301101367c2SThomas Graf 
302fb0af4c7SEric Dumazet static const struct rt6_info ip6_blk_hole_entry_template = {
303101367c2SThomas Graf 	.dst = {
304101367c2SThomas Graf 		.__refcnt	= ATOMIC_INIT(1),
305101367c2SThomas Graf 		.__use		= 1,
3062c20cbd7SNicolas Dichtel 		.obsolete	= DST_OBSOLETE_FORCE_CHK,
307101367c2SThomas Graf 		.error		= -EINVAL,
308352e512cSHerbert Xu 		.input		= dst_discard,
309aad88724SEric Dumazet 		.output		= dst_discard_sk,
310101367c2SThomas Graf 	},
311101367c2SThomas Graf 	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
3124f724279SJean-Mickael Guerin 	.rt6i_protocol  = RTPROT_KERNEL,
313101367c2SThomas Graf 	.rt6i_metric	= ~(u32) 0,
314101367c2SThomas Graf 	.rt6i_ref	= ATOMIC_INIT(1),
315101367c2SThomas Graf };
316101367c2SThomas Graf 
317101367c2SThomas Graf #endif
318101367c2SThomas Graf 
3191da177e4SLinus Torvalds /* allocate dst with ip6_dst_ops */
320d52d3997SMartin KaFai Lau static struct rt6_info *__ip6_dst_alloc(struct net *net,
321957c665fSDavid S. Miller 					struct net_device *dev,
3228b96d22dSDavid S. Miller 					int flags,
3238b96d22dSDavid S. Miller 					struct fib6_table *table)
3241da177e4SLinus Torvalds {
32597bab73fSDavid S. Miller 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
3266f3118b5SNicolas Dichtel 					0, DST_OBSOLETE_FORCE_CHK, flags);
327cf911662SDavid S. Miller 
32897bab73fSDavid S. Miller 	if (rt) {
3298104891bSSteffen Klassert 		struct dst_entry *dst = &rt->dst;
3308104891bSSteffen Klassert 
3318104891bSSteffen Klassert 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
33251ebd318SNicolas Dichtel 		INIT_LIST_HEAD(&rt->rt6i_siblings);
3338d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&rt->rt6i_uncached);
33497bab73fSDavid S. Miller 	}
335cf911662SDavid S. Miller 	return rt;
3361da177e4SLinus Torvalds }
3371da177e4SLinus Torvalds 
338d52d3997SMartin KaFai Lau static struct rt6_info *ip6_dst_alloc(struct net *net,
339d52d3997SMartin KaFai Lau 				      struct net_device *dev,
340d52d3997SMartin KaFai Lau 				      int flags,
341d52d3997SMartin KaFai Lau 				      struct fib6_table *table)
342d52d3997SMartin KaFai Lau {
343d52d3997SMartin KaFai Lau 	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
344d52d3997SMartin KaFai Lau 
345d52d3997SMartin KaFai Lau 	if (rt) {
346d52d3997SMartin KaFai Lau 		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
347d52d3997SMartin KaFai Lau 		if (rt->rt6i_pcpu) {
348d52d3997SMartin KaFai Lau 			int cpu;
349d52d3997SMartin KaFai Lau 
350d52d3997SMartin KaFai Lau 			for_each_possible_cpu(cpu) {
351d52d3997SMartin KaFai Lau 				struct rt6_info **p;
352d52d3997SMartin KaFai Lau 
353d52d3997SMartin KaFai Lau 				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
354d52d3997SMartin KaFai Lau 				/* no one shares rt */
355d52d3997SMartin KaFai Lau 				*p =  NULL;
356d52d3997SMartin KaFai Lau 			}
357d52d3997SMartin KaFai Lau 		} else {
358d52d3997SMartin KaFai Lau 			dst_destroy((struct dst_entry *)rt);
359d52d3997SMartin KaFai Lau 			return NULL;
360d52d3997SMartin KaFai Lau 		}
361d52d3997SMartin KaFai Lau 	}
362d52d3997SMartin KaFai Lau 
363d52d3997SMartin KaFai Lau 	return rt;
364d52d3997SMartin KaFai Lau }
365d52d3997SMartin KaFai Lau 
3661da177e4SLinus Torvalds static void ip6_dst_destroy(struct dst_entry *dst)
3671da177e4SLinus Torvalds {
3681da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
369ecd98837SYOSHIFUJI Hideaki / 吉藤英明 	struct dst_entry *from = dst->from;
3708d0b94afSMartin KaFai Lau 	struct inet6_dev *idev;
3711da177e4SLinus Torvalds 
3728e2ec639SYan, Zheng 	dst_destroy_metrics_generic(dst);
373d52d3997SMartin KaFai Lau 	free_percpu(rt->rt6i_pcpu);
3748d0b94afSMartin KaFai Lau 	rt6_uncached_list_del(rt);
3758d0b94afSMartin KaFai Lau 
3768d0b94afSMartin KaFai Lau 	idev = rt->rt6i_idev;
37738308473SDavid S. Miller 	if (idev) {
3781da177e4SLinus Torvalds 		rt->rt6i_idev = NULL;
3791da177e4SLinus Torvalds 		in6_dev_put(idev);
3801da177e4SLinus Torvalds 	}
3811716a961SGao feng 
382ecd98837SYOSHIFUJI Hideaki / 吉藤英明 	dst->from = NULL;
383ecd98837SYOSHIFUJI Hideaki / 吉藤英明 	dst_release(from);
384b3419363SDavid S. Miller }
385b3419363SDavid S. Miller 
3861da177e4SLinus Torvalds static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
3871da177e4SLinus Torvalds 			   int how)
3881da177e4SLinus Torvalds {
3891da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *)dst;
3901da177e4SLinus Torvalds 	struct inet6_dev *idev = rt->rt6i_idev;
3915a3e55d6SDenis V. Lunev 	struct net_device *loopback_dev =
392c346dca1SYOSHIFUJI Hideaki 		dev_net(dev)->loopback_dev;
3931da177e4SLinus Torvalds 
39497cac082SDavid S. Miller 	if (dev != loopback_dev) {
39597cac082SDavid S. Miller 		if (idev && idev->dev == dev) {
3965a3e55d6SDenis V. Lunev 			struct inet6_dev *loopback_idev =
3975a3e55d6SDenis V. Lunev 				in6_dev_get(loopback_dev);
39838308473SDavid S. Miller 			if (loopback_idev) {
3991da177e4SLinus Torvalds 				rt->rt6i_idev = loopback_idev;
4001da177e4SLinus Torvalds 				in6_dev_put(idev);
4011da177e4SLinus Torvalds 			}
4021da177e4SLinus Torvalds 		}
40397cac082SDavid S. Miller 	}
4041da177e4SLinus Torvalds }
4051da177e4SLinus Torvalds 
406a50feda5SEric Dumazet static bool rt6_check_expired(const struct rt6_info *rt)
4071da177e4SLinus Torvalds {
4081716a961SGao feng 	if (rt->rt6i_flags & RTF_EXPIRES) {
4091716a961SGao feng 		if (time_after(jiffies, rt->dst.expires))
410a50feda5SEric Dumazet 			return true;
4111716a961SGao feng 	} else if (rt->dst.from) {
4123fd91fb3SLi RongQing 		return rt6_check_expired((struct rt6_info *) rt->dst.from);
4131716a961SGao feng 	}
414a50feda5SEric Dumazet 	return false;
4151da177e4SLinus Torvalds }
4161da177e4SLinus Torvalds 
41751ebd318SNicolas Dichtel /* Multipath route selection:
41851ebd318SNicolas Dichtel  *   Hash based function using packet header and flowlabel.
41951ebd318SNicolas Dichtel  * Adapted from fib_info_hashfn()
42051ebd318SNicolas Dichtel  */
42151ebd318SNicolas Dichtel static int rt6_info_hash_nhsfn(unsigned int candidate_count,
42251ebd318SNicolas Dichtel 			       const struct flowi6 *fl6)
42351ebd318SNicolas Dichtel {
42451ebd318SNicolas Dichtel 	unsigned int val = fl6->flowi6_proto;
42551ebd318SNicolas Dichtel 
426c08977bbSYOSHIFUJI Hideaki / 吉藤英明 	val ^= ipv6_addr_hash(&fl6->daddr);
427c08977bbSYOSHIFUJI Hideaki / 吉藤英明 	val ^= ipv6_addr_hash(&fl6->saddr);
42851ebd318SNicolas Dichtel 
42951ebd318SNicolas Dichtel 	/* Work only if this not encapsulated */
43051ebd318SNicolas Dichtel 	switch (fl6->flowi6_proto) {
43151ebd318SNicolas Dichtel 	case IPPROTO_UDP:
43251ebd318SNicolas Dichtel 	case IPPROTO_TCP:
43351ebd318SNicolas Dichtel 	case IPPROTO_SCTP:
434b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_sport;
435b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_dport;
43651ebd318SNicolas Dichtel 		break;
43751ebd318SNicolas Dichtel 
43851ebd318SNicolas Dichtel 	case IPPROTO_ICMPV6:
439b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_icmp_type;
440b3ce5ae1SNicolas Dichtel 		val ^= (__force u16)fl6->fl6_icmp_code;
44151ebd318SNicolas Dichtel 		break;
44251ebd318SNicolas Dichtel 	}
44351ebd318SNicolas Dichtel 	/* RFC6438 recommands to use flowlabel */
444b3ce5ae1SNicolas Dichtel 	val ^= (__force u32)fl6->flowlabel;
44551ebd318SNicolas Dichtel 
44651ebd318SNicolas Dichtel 	/* Perhaps, we need to tune, this function? */
44751ebd318SNicolas Dichtel 	val = val ^ (val >> 7) ^ (val >> 12);
44851ebd318SNicolas Dichtel 	return val % candidate_count;
44951ebd318SNicolas Dichtel }
45051ebd318SNicolas Dichtel 
45151ebd318SNicolas Dichtel static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
45252bd4c0cSNicolas Dichtel 					     struct flowi6 *fl6, int oif,
45352bd4c0cSNicolas Dichtel 					     int strict)
45451ebd318SNicolas Dichtel {
45551ebd318SNicolas Dichtel 	struct rt6_info *sibling, *next_sibling;
45651ebd318SNicolas Dichtel 	int route_choosen;
45751ebd318SNicolas Dichtel 
45851ebd318SNicolas Dichtel 	route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
45951ebd318SNicolas Dichtel 	/* Don't change the route, if route_choosen == 0
46051ebd318SNicolas Dichtel 	 * (siblings does not include ourself)
46151ebd318SNicolas Dichtel 	 */
46251ebd318SNicolas Dichtel 	if (route_choosen)
46351ebd318SNicolas Dichtel 		list_for_each_entry_safe(sibling, next_sibling,
46451ebd318SNicolas Dichtel 				&match->rt6i_siblings, rt6i_siblings) {
46551ebd318SNicolas Dichtel 			route_choosen--;
46651ebd318SNicolas Dichtel 			if (route_choosen == 0) {
46752bd4c0cSNicolas Dichtel 				if (rt6_score_route(sibling, oif, strict) < 0)
46852bd4c0cSNicolas Dichtel 					break;
46951ebd318SNicolas Dichtel 				match = sibling;
47051ebd318SNicolas Dichtel 				break;
47151ebd318SNicolas Dichtel 			}
47251ebd318SNicolas Dichtel 		}
47351ebd318SNicolas Dichtel 	return match;
47451ebd318SNicolas Dichtel }
47551ebd318SNicolas Dichtel 
4761da177e4SLinus Torvalds /*
477c71099acSThomas Graf  *	Route lookup. Any table->tb6_lock is implied.
4781da177e4SLinus Torvalds  */
4791da177e4SLinus Torvalds 
4808ed67789SDaniel Lezcano static inline struct rt6_info *rt6_device_match(struct net *net,
4818ed67789SDaniel Lezcano 						    struct rt6_info *rt,
482b71d1d42SEric Dumazet 						    const struct in6_addr *saddr,
4831da177e4SLinus Torvalds 						    int oif,
484d420895eSYOSHIFUJI Hideaki 						    int flags)
4851da177e4SLinus Torvalds {
4861da177e4SLinus Torvalds 	struct rt6_info *local = NULL;
4871da177e4SLinus Torvalds 	struct rt6_info *sprt;
4881da177e4SLinus Torvalds 
489dd3abc4eSYOSHIFUJI Hideaki 	if (!oif && ipv6_addr_any(saddr))
490dd3abc4eSYOSHIFUJI Hideaki 		goto out;
491dd3abc4eSYOSHIFUJI Hideaki 
492d8d1f30bSChangli Gao 	for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
493d1918542SDavid S. Miller 		struct net_device *dev = sprt->dst.dev;
494dd3abc4eSYOSHIFUJI Hideaki 
495dd3abc4eSYOSHIFUJI Hideaki 		if (oif) {
4961da177e4SLinus Torvalds 			if (dev->ifindex == oif)
4971da177e4SLinus Torvalds 				return sprt;
4981da177e4SLinus Torvalds 			if (dev->flags & IFF_LOOPBACK) {
49938308473SDavid S. Miller 				if (!sprt->rt6i_idev ||
5001da177e4SLinus Torvalds 				    sprt->rt6i_idev->dev->ifindex != oif) {
501d420895eSYOSHIFUJI Hideaki 					if (flags & RT6_LOOKUP_F_IFACE && oif)
5021da177e4SLinus Torvalds 						continue;
5031da177e4SLinus Torvalds 					if (local && (!oif ||
5041da177e4SLinus Torvalds 						      local->rt6i_idev->dev->ifindex == oif))
5051da177e4SLinus Torvalds 						continue;
5061da177e4SLinus Torvalds 				}
5071da177e4SLinus Torvalds 				local = sprt;
5081da177e4SLinus Torvalds 			}
509dd3abc4eSYOSHIFUJI Hideaki 		} else {
510dd3abc4eSYOSHIFUJI Hideaki 			if (ipv6_chk_addr(net, saddr, dev,
511dd3abc4eSYOSHIFUJI Hideaki 					  flags & RT6_LOOKUP_F_IFACE))
512dd3abc4eSYOSHIFUJI Hideaki 				return sprt;
513dd3abc4eSYOSHIFUJI Hideaki 		}
5141da177e4SLinus Torvalds 	}
5151da177e4SLinus Torvalds 
516dd3abc4eSYOSHIFUJI Hideaki 	if (oif) {
5171da177e4SLinus Torvalds 		if (local)
5181da177e4SLinus Torvalds 			return local;
5191da177e4SLinus Torvalds 
520d420895eSYOSHIFUJI Hideaki 		if (flags & RT6_LOOKUP_F_IFACE)
5218ed67789SDaniel Lezcano 			return net->ipv6.ip6_null_entry;
5221da177e4SLinus Torvalds 	}
523dd3abc4eSYOSHIFUJI Hideaki out:
5241da177e4SLinus Torvalds 	return rt;
5251da177e4SLinus Torvalds }
5261da177e4SLinus Torvalds 
52727097255SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
528c2f17e82SHannes Frederic Sowa struct __rt6_probe_work {
529c2f17e82SHannes Frederic Sowa 	struct work_struct work;
530c2f17e82SHannes Frederic Sowa 	struct in6_addr target;
531c2f17e82SHannes Frederic Sowa 	struct net_device *dev;
532c2f17e82SHannes Frederic Sowa };
533c2f17e82SHannes Frederic Sowa 
534c2f17e82SHannes Frederic Sowa static void rt6_probe_deferred(struct work_struct *w)
535c2f17e82SHannes Frederic Sowa {
536c2f17e82SHannes Frederic Sowa 	struct in6_addr mcaddr;
537c2f17e82SHannes Frederic Sowa 	struct __rt6_probe_work *work =
538c2f17e82SHannes Frederic Sowa 		container_of(w, struct __rt6_probe_work, work);
539c2f17e82SHannes Frederic Sowa 
540c2f17e82SHannes Frederic Sowa 	addrconf_addr_solict_mult(&work->target, &mcaddr);
541c2f17e82SHannes Frederic Sowa 	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
542c2f17e82SHannes Frederic Sowa 	dev_put(work->dev);
543662f5533SMichael Büsch 	kfree(work);
544c2f17e82SHannes Frederic Sowa }
545c2f17e82SHannes Frederic Sowa 
54627097255SYOSHIFUJI Hideaki static void rt6_probe(struct rt6_info *rt)
54727097255SYOSHIFUJI Hideaki {
548f2c31e32SEric Dumazet 	struct neighbour *neigh;
54927097255SYOSHIFUJI Hideaki 	/*
55027097255SYOSHIFUJI Hideaki 	 * Okay, this does not seem to be appropriate
55127097255SYOSHIFUJI Hideaki 	 * for now, however, we need to check if it
55227097255SYOSHIFUJI Hideaki 	 * is really so; aka Router Reachability Probing.
55327097255SYOSHIFUJI Hideaki 	 *
55427097255SYOSHIFUJI Hideaki 	 * Router Reachability Probe MUST be rate-limited
55527097255SYOSHIFUJI Hideaki 	 * to no more than one per minute.
55627097255SYOSHIFUJI Hideaki 	 */
5572152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
558fdd6681dSAmerigo Wang 		return;
5592152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
5602152caeaSYOSHIFUJI Hideaki / 吉藤英明 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
5612152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
5622152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_lock(&neigh->lock);
5632152caeaSYOSHIFUJI Hideaki / 吉藤英明 		if (neigh->nud_state & NUD_VALID)
5642152caeaSYOSHIFUJI Hideaki / 吉藤英明 			goto out;
5657ff74a59SYOSHIFUJI Hideaki / 吉藤英明 	}
5662152caeaSYOSHIFUJI Hideaki / 吉藤英明 
5672152caeaSYOSHIFUJI Hideaki / 吉藤英明 	if (!neigh ||
56852e16356SYOSHIFUJI Hideaki 	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
569c2f17e82SHannes Frederic Sowa 		struct __rt6_probe_work *work;
57027097255SYOSHIFUJI Hideaki 
571c2f17e82SHannes Frederic Sowa 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
572c2f17e82SHannes Frederic Sowa 
573c2f17e82SHannes Frederic Sowa 		if (neigh && work)
5747e980569SJiri Benc 			__neigh_set_probe_once(neigh);
5752152caeaSYOSHIFUJI Hideaki / 吉藤英明 
576c2f17e82SHannes Frederic Sowa 		if (neigh)
577c2f17e82SHannes Frederic Sowa 			write_unlock(&neigh->lock);
578c2f17e82SHannes Frederic Sowa 
579c2f17e82SHannes Frederic Sowa 		if (work) {
580c2f17e82SHannes Frederic Sowa 			INIT_WORK(&work->work, rt6_probe_deferred);
581c2f17e82SHannes Frederic Sowa 			work->target = rt->rt6i_gateway;
582c2f17e82SHannes Frederic Sowa 			dev_hold(rt->dst.dev);
583c2f17e82SHannes Frederic Sowa 			work->dev = rt->dst.dev;
584c2f17e82SHannes Frederic Sowa 			schedule_work(&work->work);
585c2f17e82SHannes Frederic Sowa 		}
586f2c31e32SEric Dumazet 	} else {
5872152caeaSYOSHIFUJI Hideaki / 吉藤英明 out:
5882152caeaSYOSHIFUJI Hideaki / 吉藤英明 		write_unlock(&neigh->lock);
58927097255SYOSHIFUJI Hideaki 	}
5902152caeaSYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
591f2c31e32SEric Dumazet }
59227097255SYOSHIFUJI Hideaki #else
59327097255SYOSHIFUJI Hideaki static inline void rt6_probe(struct rt6_info *rt)
59427097255SYOSHIFUJI Hideaki {
59527097255SYOSHIFUJI Hideaki }
59627097255SYOSHIFUJI Hideaki #endif
59727097255SYOSHIFUJI Hideaki 
5981da177e4SLinus Torvalds /*
599554cfb7eSYOSHIFUJI Hideaki  * Default Router Selection (RFC 2461 6.3.6)
6001da177e4SLinus Torvalds  */
601b6f99a21SDave Jones static inline int rt6_check_dev(struct rt6_info *rt, int oif)
6021da177e4SLinus Torvalds {
603d1918542SDavid S. Miller 	struct net_device *dev = rt->dst.dev;
604161980f4SDavid S. Miller 	if (!oif || dev->ifindex == oif)
605554cfb7eSYOSHIFUJI Hideaki 		return 2;
606161980f4SDavid S. Miller 	if ((dev->flags & IFF_LOOPBACK) &&
607161980f4SDavid S. Miller 	    rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
608161980f4SDavid S. Miller 		return 1;
609554cfb7eSYOSHIFUJI Hideaki 	return 0;
6101da177e4SLinus Torvalds }
6111da177e4SLinus Torvalds 
612afc154e9SHannes Frederic Sowa static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
6131da177e4SLinus Torvalds {
614f2c31e32SEric Dumazet 	struct neighbour *neigh;
615afc154e9SHannes Frederic Sowa 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
616f2c31e32SEric Dumazet 
6174d0c5911SYOSHIFUJI Hideaki 	if (rt->rt6i_flags & RTF_NONEXTHOP ||
6184d0c5911SYOSHIFUJI Hideaki 	    !(rt->rt6i_flags & RTF_GATEWAY))
619afc154e9SHannes Frederic Sowa 		return RT6_NUD_SUCCEED;
620145a3621SYOSHIFUJI Hideaki / 吉藤英明 
621145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_lock_bh();
622145a3621SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
623145a3621SYOSHIFUJI Hideaki / 吉藤英明 	if (neigh) {
624145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_lock(&neigh->lock);
625554cfb7eSYOSHIFUJI Hideaki 		if (neigh->nud_state & NUD_VALID)
626afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
627398bcbebSYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
628a5a81f0bSPaul Marks 		else if (!(neigh->nud_state & NUD_FAILED))
629afc154e9SHannes Frederic Sowa 			ret = RT6_NUD_SUCCEED;
6307e980569SJiri Benc 		else
6317e980569SJiri Benc 			ret = RT6_NUD_FAIL_PROBE;
632398bcbebSYOSHIFUJI Hideaki #endif
633145a3621SYOSHIFUJI Hideaki / 吉藤英明 		read_unlock(&neigh->lock);
634afc154e9SHannes Frederic Sowa 	} else {
635afc154e9SHannes Frederic Sowa 		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
6367e980569SJiri Benc 		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
637a5a81f0bSPaul Marks 	}
638145a3621SYOSHIFUJI Hideaki / 吉藤英明 	rcu_read_unlock_bh();
639145a3621SYOSHIFUJI Hideaki / 吉藤英明 
640a5a81f0bSPaul Marks 	return ret;
6411da177e4SLinus Torvalds }
6421da177e4SLinus Torvalds 
643554cfb7eSYOSHIFUJI Hideaki static int rt6_score_route(struct rt6_info *rt, int oif,
644554cfb7eSYOSHIFUJI Hideaki 			   int strict)
645554cfb7eSYOSHIFUJI Hideaki {
646a5a81f0bSPaul Marks 	int m;
6474d0c5911SYOSHIFUJI Hideaki 
6484d0c5911SYOSHIFUJI Hideaki 	m = rt6_check_dev(rt, oif);
64977d16f45SYOSHIFUJI Hideaki 	if (!m && (strict & RT6_LOOKUP_F_IFACE))
650afc154e9SHannes Frederic Sowa 		return RT6_NUD_FAIL_HARD;
651ebacaaa0SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTER_PREF
652ebacaaa0SYOSHIFUJI Hideaki 	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
653ebacaaa0SYOSHIFUJI Hideaki #endif
654afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE) {
655afc154e9SHannes Frederic Sowa 		int n = rt6_check_neigh(rt);
656afc154e9SHannes Frederic Sowa 		if (n < 0)
657afc154e9SHannes Frederic Sowa 			return n;
658afc154e9SHannes Frederic Sowa 	}
659554cfb7eSYOSHIFUJI Hideaki 	return m;
660554cfb7eSYOSHIFUJI Hideaki }
661554cfb7eSYOSHIFUJI Hideaki 
662f11e6659SDavid S. Miller static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
663afc154e9SHannes Frederic Sowa 				   int *mpri, struct rt6_info *match,
664afc154e9SHannes Frederic Sowa 				   bool *do_rr)
665554cfb7eSYOSHIFUJI Hideaki {
666554cfb7eSYOSHIFUJI Hideaki 	int m;
667afc154e9SHannes Frederic Sowa 	bool match_do_rr = false;
668554cfb7eSYOSHIFUJI Hideaki 
669554cfb7eSYOSHIFUJI Hideaki 	if (rt6_check_expired(rt))
670f11e6659SDavid S. Miller 		goto out;
671554cfb7eSYOSHIFUJI Hideaki 
672554cfb7eSYOSHIFUJI Hideaki 	m = rt6_score_route(rt, oif, strict);
6737e980569SJiri Benc 	if (m == RT6_NUD_FAIL_DO_RR) {
674afc154e9SHannes Frederic Sowa 		match_do_rr = true;
675afc154e9SHannes Frederic Sowa 		m = 0; /* lowest valid score */
6767e980569SJiri Benc 	} else if (m == RT6_NUD_FAIL_HARD) {
677f11e6659SDavid S. Miller 		goto out;
6781da177e4SLinus Torvalds 	}
679f11e6659SDavid S. Miller 
680afc154e9SHannes Frederic Sowa 	if (strict & RT6_LOOKUP_F_REACHABLE)
681afc154e9SHannes Frederic Sowa 		rt6_probe(rt);
682afc154e9SHannes Frederic Sowa 
6837e980569SJiri Benc 	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
684afc154e9SHannes Frederic Sowa 	if (m > *mpri) {
685afc154e9SHannes Frederic Sowa 		*do_rr = match_do_rr;
686afc154e9SHannes Frederic Sowa 		*mpri = m;
687afc154e9SHannes Frederic Sowa 		match = rt;
688afc154e9SHannes Frederic Sowa 	}
689f11e6659SDavid S. Miller out:
690f11e6659SDavid S. Miller 	return match;
6911da177e4SLinus Torvalds }
6921da177e4SLinus Torvalds 
693f11e6659SDavid S. Miller static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
694f11e6659SDavid S. Miller 				     struct rt6_info *rr_head,
695afc154e9SHannes Frederic Sowa 				     u32 metric, int oif, int strict,
696afc154e9SHannes Frederic Sowa 				     bool *do_rr)
697f11e6659SDavid S. Miller {
6989fbdcfafSSteffen Klassert 	struct rt6_info *rt, *match, *cont;
699f11e6659SDavid S. Miller 	int mpri = -1;
700f11e6659SDavid S. Miller 
701f11e6659SDavid S. Miller 	match = NULL;
7029fbdcfafSSteffen Klassert 	cont = NULL;
7039fbdcfafSSteffen Klassert 	for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
7049fbdcfafSSteffen Klassert 		if (rt->rt6i_metric != metric) {
7059fbdcfafSSteffen Klassert 			cont = rt;
7069fbdcfafSSteffen Klassert 			break;
7079fbdcfafSSteffen Klassert 		}
7089fbdcfafSSteffen Klassert 
709afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7109fbdcfafSSteffen Klassert 	}
7119fbdcfafSSteffen Klassert 
7129fbdcfafSSteffen Klassert 	for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
7139fbdcfafSSteffen Klassert 		if (rt->rt6i_metric != metric) {
7149fbdcfafSSteffen Klassert 			cont = rt;
7159fbdcfafSSteffen Klassert 			break;
7169fbdcfafSSteffen Klassert 		}
7179fbdcfafSSteffen Klassert 
7189fbdcfafSSteffen Klassert 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
7199fbdcfafSSteffen Klassert 	}
7209fbdcfafSSteffen Klassert 
7219fbdcfafSSteffen Klassert 	if (match || !cont)
7229fbdcfafSSteffen Klassert 		return match;
7239fbdcfafSSteffen Klassert 
7249fbdcfafSSteffen Klassert 	for (rt = cont; rt; rt = rt->dst.rt6_next)
725afc154e9SHannes Frederic Sowa 		match = find_match(rt, oif, strict, &mpri, match, do_rr);
726f11e6659SDavid S. Miller 
727f11e6659SDavid S. Miller 	return match;
728f11e6659SDavid S. Miller }
729f11e6659SDavid S. Miller 
730f11e6659SDavid S. Miller static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
731f11e6659SDavid S. Miller {
732f11e6659SDavid S. Miller 	struct rt6_info *match, *rt0;
7338ed67789SDaniel Lezcano 	struct net *net;
734afc154e9SHannes Frederic Sowa 	bool do_rr = false;
735f11e6659SDavid S. Miller 
736f11e6659SDavid S. Miller 	rt0 = fn->rr_ptr;
737f11e6659SDavid S. Miller 	if (!rt0)
738f11e6659SDavid S. Miller 		fn->rr_ptr = rt0 = fn->leaf;
739f11e6659SDavid S. Miller 
740afc154e9SHannes Frederic Sowa 	match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
741afc154e9SHannes Frederic Sowa 			     &do_rr);
742f11e6659SDavid S. Miller 
743afc154e9SHannes Frederic Sowa 	if (do_rr) {
744d8d1f30bSChangli Gao 		struct rt6_info *next = rt0->dst.rt6_next;
745f11e6659SDavid S. Miller 
746554cfb7eSYOSHIFUJI Hideaki 		/* no entries matched; do round-robin */
747f11e6659SDavid S. Miller 		if (!next || next->rt6i_metric != rt0->rt6i_metric)
748f11e6659SDavid S. Miller 			next = fn->leaf;
749f11e6659SDavid S. Miller 
750f11e6659SDavid S. Miller 		if (next != rt0)
751f11e6659SDavid S. Miller 			fn->rr_ptr = next;
752554cfb7eSYOSHIFUJI Hideaki 	}
753554cfb7eSYOSHIFUJI Hideaki 
754d1918542SDavid S. Miller 	net = dev_net(rt0->dst.dev);
755a02cec21SEric Dumazet 	return match ? match : net->ipv6.ip6_null_entry;
7561da177e4SLinus Torvalds }
7571da177e4SLinus Torvalds 
7588b9df265SMartin KaFai Lau static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
7598b9df265SMartin KaFai Lau {
7608b9df265SMartin KaFai Lau 	return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
7618b9df265SMartin KaFai Lau }
7628b9df265SMartin KaFai Lau 
76370ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
76470ceb4f5SYOSHIFUJI Hideaki int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
765b71d1d42SEric Dumazet 		  const struct in6_addr *gwaddr)
76670ceb4f5SYOSHIFUJI Hideaki {
767c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
76870ceb4f5SYOSHIFUJI Hideaki 	struct route_info *rinfo = (struct route_info *) opt;
76970ceb4f5SYOSHIFUJI Hideaki 	struct in6_addr prefix_buf, *prefix;
77070ceb4f5SYOSHIFUJI Hideaki 	unsigned int pref;
7714bed72e4SYOSHIFUJI Hideaki 	unsigned long lifetime;
77270ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt;
77370ceb4f5SYOSHIFUJI Hideaki 
77470ceb4f5SYOSHIFUJI Hideaki 	if (len < sizeof(struct route_info)) {
77570ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
77670ceb4f5SYOSHIFUJI Hideaki 	}
77770ceb4f5SYOSHIFUJI Hideaki 
77870ceb4f5SYOSHIFUJI Hideaki 	/* Sanity check for prefix_len and length */
77970ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length > 3) {
78070ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
78170ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 128) {
78270ceb4f5SYOSHIFUJI Hideaki 		return -EINVAL;
78370ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 64) {
78470ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 2) {
78570ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
78670ceb4f5SYOSHIFUJI Hideaki 		}
78770ceb4f5SYOSHIFUJI Hideaki 	} else if (rinfo->prefix_len > 0) {
78870ceb4f5SYOSHIFUJI Hideaki 		if (rinfo->length < 1) {
78970ceb4f5SYOSHIFUJI Hideaki 			return -EINVAL;
79070ceb4f5SYOSHIFUJI Hideaki 		}
79170ceb4f5SYOSHIFUJI Hideaki 	}
79270ceb4f5SYOSHIFUJI Hideaki 
79370ceb4f5SYOSHIFUJI Hideaki 	pref = rinfo->route_pref;
79470ceb4f5SYOSHIFUJI Hideaki 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
7953933fc95SJens Rosenboom 		return -EINVAL;
79670ceb4f5SYOSHIFUJI Hideaki 
7974bed72e4SYOSHIFUJI Hideaki 	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
79870ceb4f5SYOSHIFUJI Hideaki 
79970ceb4f5SYOSHIFUJI Hideaki 	if (rinfo->length == 3)
80070ceb4f5SYOSHIFUJI Hideaki 		prefix = (struct in6_addr *)rinfo->prefix;
80170ceb4f5SYOSHIFUJI Hideaki 	else {
80270ceb4f5SYOSHIFUJI Hideaki 		/* this function is safe */
80370ceb4f5SYOSHIFUJI Hideaki 		ipv6_addr_prefix(&prefix_buf,
80470ceb4f5SYOSHIFUJI Hideaki 				 (struct in6_addr *)rinfo->prefix,
80570ceb4f5SYOSHIFUJI Hideaki 				 rinfo->prefix_len);
80670ceb4f5SYOSHIFUJI Hideaki 		prefix = &prefix_buf;
80770ceb4f5SYOSHIFUJI Hideaki 	}
80870ceb4f5SYOSHIFUJI Hideaki 
809f104a567SDuan Jiong 	if (rinfo->prefix_len == 0)
810f104a567SDuan Jiong 		rt = rt6_get_dflt_router(gwaddr, dev);
811f104a567SDuan Jiong 	else
812f104a567SDuan Jiong 		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
813f104a567SDuan Jiong 					gwaddr, dev->ifindex);
81470ceb4f5SYOSHIFUJI Hideaki 
81570ceb4f5SYOSHIFUJI Hideaki 	if (rt && !lifetime) {
816e0a1ad73SThomas Graf 		ip6_del_rt(rt);
81770ceb4f5SYOSHIFUJI Hideaki 		rt = NULL;
81870ceb4f5SYOSHIFUJI Hideaki 	}
81970ceb4f5SYOSHIFUJI Hideaki 
82070ceb4f5SYOSHIFUJI Hideaki 	if (!rt && lifetime)
821efa2cea0SDaniel Lezcano 		rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
82270ceb4f5SYOSHIFUJI Hideaki 					pref);
82370ceb4f5SYOSHIFUJI Hideaki 	else if (rt)
82470ceb4f5SYOSHIFUJI Hideaki 		rt->rt6i_flags = RTF_ROUTEINFO |
82570ceb4f5SYOSHIFUJI Hideaki 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
82670ceb4f5SYOSHIFUJI Hideaki 
82770ceb4f5SYOSHIFUJI Hideaki 	if (rt) {
8281716a961SGao feng 		if (!addrconf_finite_timeout(lifetime))
8291716a961SGao feng 			rt6_clean_expires(rt);
8301716a961SGao feng 		else
8311716a961SGao feng 			rt6_set_expires(rt, jiffies + HZ * lifetime);
8321716a961SGao feng 
83394e187c0SAmerigo Wang 		ip6_rt_put(rt);
83470ceb4f5SYOSHIFUJI Hideaki 	}
83570ceb4f5SYOSHIFUJI Hideaki 	return 0;
83670ceb4f5SYOSHIFUJI Hideaki }
83770ceb4f5SYOSHIFUJI Hideaki #endif
83870ceb4f5SYOSHIFUJI Hideaki 
839a3c00e46SMartin KaFai Lau static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
840a3c00e46SMartin KaFai Lau 					struct in6_addr *saddr)
841a3c00e46SMartin KaFai Lau {
842a3c00e46SMartin KaFai Lau 	struct fib6_node *pn;
843a3c00e46SMartin KaFai Lau 	while (1) {
844a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_TL_ROOT)
845a3c00e46SMartin KaFai Lau 			return NULL;
846a3c00e46SMartin KaFai Lau 		pn = fn->parent;
847a3c00e46SMartin KaFai Lau 		if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
848a3c00e46SMartin KaFai Lau 			fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
849a3c00e46SMartin KaFai Lau 		else
850a3c00e46SMartin KaFai Lau 			fn = pn;
851a3c00e46SMartin KaFai Lau 		if (fn->fn_flags & RTN_RTINFO)
852a3c00e46SMartin KaFai Lau 			return fn;
853a3c00e46SMartin KaFai Lau 	}
854a3c00e46SMartin KaFai Lau }
855c71099acSThomas Graf 
8568ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_lookup(struct net *net,
8578ed67789SDaniel Lezcano 					     struct fib6_table *table,
8584c9483b2SDavid S. Miller 					     struct flowi6 *fl6, int flags)
8591da177e4SLinus Torvalds {
8601da177e4SLinus Torvalds 	struct fib6_node *fn;
8611da177e4SLinus Torvalds 	struct rt6_info *rt;
8621da177e4SLinus Torvalds 
863c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
8644c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
865c71099acSThomas Graf restart:
866c71099acSThomas Graf 	rt = fn->leaf;
8674c9483b2SDavid S. Miller 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
86851ebd318SNicolas Dichtel 	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
86952bd4c0cSNicolas Dichtel 		rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
870a3c00e46SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
871a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
872a3c00e46SMartin KaFai Lau 		if (fn)
873a3c00e46SMartin KaFai Lau 			goto restart;
874a3c00e46SMartin KaFai Lau 	}
875d8d1f30bSChangli Gao 	dst_use(&rt->dst, jiffies);
876c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
8771da177e4SLinus Torvalds 	return rt;
878c71099acSThomas Graf 
879c71099acSThomas Graf }
880c71099acSThomas Graf 
881ea6e574eSFlorian Westphal struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
882ea6e574eSFlorian Westphal 				    int flags)
883ea6e574eSFlorian Westphal {
884ea6e574eSFlorian Westphal 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
885ea6e574eSFlorian Westphal }
886ea6e574eSFlorian Westphal EXPORT_SYMBOL_GPL(ip6_route_lookup);
887ea6e574eSFlorian Westphal 
8889acd9f3aSYOSHIFUJI Hideaki struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
8899acd9f3aSYOSHIFUJI Hideaki 			    const struct in6_addr *saddr, int oif, int strict)
890c71099acSThomas Graf {
8914c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
8924c9483b2SDavid S. Miller 		.flowi6_oif = oif,
8934c9483b2SDavid S. Miller 		.daddr = *daddr,
894c71099acSThomas Graf 	};
895c71099acSThomas Graf 	struct dst_entry *dst;
89677d16f45SYOSHIFUJI Hideaki 	int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
897c71099acSThomas Graf 
898adaa70bbSThomas Graf 	if (saddr) {
8994c9483b2SDavid S. Miller 		memcpy(&fl6.saddr, saddr, sizeof(*saddr));
900adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
901adaa70bbSThomas Graf 	}
902adaa70bbSThomas Graf 
9034c9483b2SDavid S. Miller 	dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
904c71099acSThomas Graf 	if (dst->error == 0)
905c71099acSThomas Graf 		return (struct rt6_info *) dst;
906c71099acSThomas Graf 
907c71099acSThomas Graf 	dst_release(dst);
908c71099acSThomas Graf 
9091da177e4SLinus Torvalds 	return NULL;
9101da177e4SLinus Torvalds }
9117159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(rt6_lookup);
9127159039aSYOSHIFUJI Hideaki 
913c71099acSThomas Graf /* ip6_ins_rt is called with FREE table->tb6_lock.
9141da177e4SLinus Torvalds    It takes new route entry, the addition fails by any reason the
9151da177e4SLinus Torvalds    route is freed. In any case, if caller does not hold it, it may
9161da177e4SLinus Torvalds    be destroyed.
9171da177e4SLinus Torvalds  */
9181da177e4SLinus Torvalds 
919e5fd387aSMichal Kubeček static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
920e715b6d3SFlorian Westphal 			struct mx6_config *mxc)
9211da177e4SLinus Torvalds {
9221da177e4SLinus Torvalds 	int err;
923c71099acSThomas Graf 	struct fib6_table *table;
9241da177e4SLinus Torvalds 
925c71099acSThomas Graf 	table = rt->rt6i_table;
926c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
927e715b6d3SFlorian Westphal 	err = fib6_add(&table->tb6_root, rt, info, mxc);
928c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
9291da177e4SLinus Torvalds 
9301da177e4SLinus Torvalds 	return err;
9311da177e4SLinus Torvalds }
9321da177e4SLinus Torvalds 
93340e22e8fSThomas Graf int ip6_ins_rt(struct rt6_info *rt)
93440e22e8fSThomas Graf {
935e715b6d3SFlorian Westphal 	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
936e715b6d3SFlorian Westphal 	struct mx6_config mxc = { .mx = NULL, };
937e715b6d3SFlorian Westphal 
938e715b6d3SFlorian Westphal 	return __ip6_ins_rt(rt, &info, &mxc);
93940e22e8fSThomas Graf }
94040e22e8fSThomas Graf 
9418b9df265SMartin KaFai Lau static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
94221efcfa0SEric Dumazet 					   const struct in6_addr *daddr,
943b71d1d42SEric Dumazet 					   const struct in6_addr *saddr)
9441da177e4SLinus Torvalds {
9451da177e4SLinus Torvalds 	struct rt6_info *rt;
9461da177e4SLinus Torvalds 
9471da177e4SLinus Torvalds 	/*
9481da177e4SLinus Torvalds 	 *	Clone the route.
9491da177e4SLinus Torvalds 	 */
9501da177e4SLinus Torvalds 
951d52d3997SMartin KaFai Lau 	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
95283a09abdSMartin KaFai Lau 		ort = (struct rt6_info *)ort->dst.from;
9531da177e4SLinus Torvalds 
954d52d3997SMartin KaFai Lau 	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
95583a09abdSMartin KaFai Lau 			     0, ort->rt6i_table);
95683a09abdSMartin KaFai Lau 
95783a09abdSMartin KaFai Lau 	if (!rt)
95883a09abdSMartin KaFai Lau 		return NULL;
95983a09abdSMartin KaFai Lau 
96083a09abdSMartin KaFai Lau 	ip6_rt_copy_init(rt, ort);
9618b9df265SMartin KaFai Lau 	rt->rt6i_flags |= RTF_CACHE;
96283a09abdSMartin KaFai Lau 	rt->rt6i_metric = 0;
96383a09abdSMartin KaFai Lau 	rt->dst.flags |= DST_HOST;
96483a09abdSMartin KaFai Lau 	rt->rt6i_dst.addr = *daddr;
96583a09abdSMartin KaFai Lau 	rt->rt6i_dst.plen = 128;
9668b9df265SMartin KaFai Lau 
9678b9df265SMartin KaFai Lau 	if (!rt6_is_gw_or_nonexthop(ort)) {
968bb3c3686SDavid S. Miller 		if (ort->rt6i_dst.plen != 128 &&
96921efcfa0SEric Dumazet 		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
97058c4fb86SYOSHIFUJI Hideaki 			rt->rt6i_flags |= RTF_ANYCAST;
9711da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
9721da177e4SLinus Torvalds 		if (rt->rt6i_src.plen && saddr) {
9734e3fd7a0SAlexey Dobriyan 			rt->rt6i_src.addr = *saddr;
9741da177e4SLinus Torvalds 			rt->rt6i_src.plen = 128;
9751da177e4SLinus Torvalds 		}
9761da177e4SLinus Torvalds #endif
97795a9a5baSYOSHIFUJI Hideaki 	}
97895a9a5baSYOSHIFUJI Hideaki 
979299d9939SYOSHIFUJI Hideaki 	return rt;
980299d9939SYOSHIFUJI Hideaki }
981299d9939SYOSHIFUJI Hideaki 
982d52d3997SMartin KaFai Lau static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
983d52d3997SMartin KaFai Lau {
984d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt;
985d52d3997SMartin KaFai Lau 
986d52d3997SMartin KaFai Lau 	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
987d52d3997SMartin KaFai Lau 				  rt->dst.dev, rt->dst.flags,
988d52d3997SMartin KaFai Lau 				  rt->rt6i_table);
989d52d3997SMartin KaFai Lau 
990d52d3997SMartin KaFai Lau 	if (!pcpu_rt)
991d52d3997SMartin KaFai Lau 		return NULL;
992d52d3997SMartin KaFai Lau 	ip6_rt_copy_init(pcpu_rt, rt);
993d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
994d52d3997SMartin KaFai Lau 	pcpu_rt->rt6i_flags |= RTF_PCPU;
995d52d3997SMartin KaFai Lau 	return pcpu_rt;
996d52d3997SMartin KaFai Lau }
997d52d3997SMartin KaFai Lau 
998d52d3997SMartin KaFai Lau /* It should be called with read_lock_bh(&tb6_lock) acquired */
999d52d3997SMartin KaFai Lau static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1000d52d3997SMartin KaFai Lau {
1001d52d3997SMartin KaFai Lau 	struct rt6_info *pcpu_rt, *prev, **p;
1002d52d3997SMartin KaFai Lau 
1003d52d3997SMartin KaFai Lau 	p = this_cpu_ptr(rt->rt6i_pcpu);
1004d52d3997SMartin KaFai Lau 	pcpu_rt = *p;
1005d52d3997SMartin KaFai Lau 
1006d52d3997SMartin KaFai Lau 	if (pcpu_rt)
1007d52d3997SMartin KaFai Lau 		goto done;
1008d52d3997SMartin KaFai Lau 
1009d52d3997SMartin KaFai Lau 	pcpu_rt = ip6_rt_pcpu_alloc(rt);
1010d52d3997SMartin KaFai Lau 	if (!pcpu_rt) {
1011d52d3997SMartin KaFai Lau 		struct net *net = dev_net(rt->dst.dev);
1012d52d3997SMartin KaFai Lau 
1013d52d3997SMartin KaFai Lau 		pcpu_rt = net->ipv6.ip6_null_entry;
1014d52d3997SMartin KaFai Lau 		goto done;
1015d52d3997SMartin KaFai Lau 	}
1016d52d3997SMartin KaFai Lau 
1017d52d3997SMartin KaFai Lau 	prev = cmpxchg(p, NULL, pcpu_rt);
1018d52d3997SMartin KaFai Lau 	if (prev) {
1019d52d3997SMartin KaFai Lau 		/* If someone did it before us, return prev instead */
1020d52d3997SMartin KaFai Lau 		dst_destroy(&pcpu_rt->dst);
1021d52d3997SMartin KaFai Lau 		pcpu_rt = prev;
1022d52d3997SMartin KaFai Lau 	}
1023d52d3997SMartin KaFai Lau 
1024d52d3997SMartin KaFai Lau done:
1025d52d3997SMartin KaFai Lau 	dst_hold(&pcpu_rt->dst);
1026d52d3997SMartin KaFai Lau 	rt6_dst_from_metrics_check(pcpu_rt);
1027d52d3997SMartin KaFai Lau 	return pcpu_rt;
1028d52d3997SMartin KaFai Lau }
1029d52d3997SMartin KaFai Lau 
10308ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
10314c9483b2SDavid S. Miller 				      struct flowi6 *fl6, int flags)
10321da177e4SLinus Torvalds {
1033367efcb9SMartin KaFai Lau 	struct fib6_node *fn, *saved_fn;
103445e4fd26SMartin KaFai Lau 	struct rt6_info *rt;
1035c71099acSThomas Graf 	int strict = 0;
10361da177e4SLinus Torvalds 
103777d16f45SYOSHIFUJI Hideaki 	strict |= flags & RT6_LOOKUP_F_IFACE;
1038367efcb9SMartin KaFai Lau 	if (net->ipv6.devconf_all->forwarding == 0)
1039367efcb9SMartin KaFai Lau 		strict |= RT6_LOOKUP_F_REACHABLE;
10401da177e4SLinus Torvalds 
1041c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
10421da177e4SLinus Torvalds 
10434c9483b2SDavid S. Miller 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1044367efcb9SMartin KaFai Lau 	saved_fn = fn;
10451da177e4SLinus Torvalds 
1046a3c00e46SMartin KaFai Lau redo_rt6_select:
1047367efcb9SMartin KaFai Lau 	rt = rt6_select(fn, oif, strict);
104852bd4c0cSNicolas Dichtel 	if (rt->rt6i_nsiblings)
1049367efcb9SMartin KaFai Lau 		rt = rt6_multipath_select(rt, fl6, oif, strict);
1050a3c00e46SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
1051a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1052a3c00e46SMartin KaFai Lau 		if (fn)
1053a3c00e46SMartin KaFai Lau 			goto redo_rt6_select;
1054367efcb9SMartin KaFai Lau 		else if (strict & RT6_LOOKUP_F_REACHABLE) {
1055367efcb9SMartin KaFai Lau 			/* also consider unreachable route */
1056367efcb9SMartin KaFai Lau 			strict &= ~RT6_LOOKUP_F_REACHABLE;
1057367efcb9SMartin KaFai Lau 			fn = saved_fn;
1058367efcb9SMartin KaFai Lau 			goto redo_rt6_select;
1059367efcb9SMartin KaFai Lau 		}
1060a3c00e46SMartin KaFai Lau 	}
1061a3c00e46SMartin KaFai Lau 
1062d52d3997SMartin KaFai Lau 
1063d52d3997SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
10643da59bd9SMartin KaFai Lau 		dst_use(&rt->dst, jiffies);
1065c71099acSThomas Graf 		read_unlock_bh(&table->tb6_lock);
10661da177e4SLinus Torvalds 
1067d52d3997SMartin KaFai Lau 		rt6_dst_from_metrics_check(rt);
1068d52d3997SMartin KaFai Lau 		return rt;
10693da59bd9SMartin KaFai Lau 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
10703da59bd9SMartin KaFai Lau 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
10713da59bd9SMartin KaFai Lau 		/* Create a RTF_CACHE clone which will not be
10723da59bd9SMartin KaFai Lau 		 * owned by the fib6 tree.  It is for the special case where
10733da59bd9SMartin KaFai Lau 		 * the daddr in the skb during the neighbor look-up is different
10743da59bd9SMartin KaFai Lau 		 * from the fl6->daddr used to look-up route here.
10753da59bd9SMartin KaFai Lau 		 */
1076c71099acSThomas Graf 
10773da59bd9SMartin KaFai Lau 		struct rt6_info *uncached_rt;
10783da59bd9SMartin KaFai Lau 
1079d52d3997SMartin KaFai Lau 		dst_use(&rt->dst, jiffies);
1080d52d3997SMartin KaFai Lau 		read_unlock_bh(&table->tb6_lock);
1081d52d3997SMartin KaFai Lau 
10823da59bd9SMartin KaFai Lau 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
10833da59bd9SMartin KaFai Lau 		dst_release(&rt->dst);
10843da59bd9SMartin KaFai Lau 
10853da59bd9SMartin KaFai Lau 		if (uncached_rt)
10868d0b94afSMartin KaFai Lau 			rt6_uncached_list_add(uncached_rt);
10873da59bd9SMartin KaFai Lau 		else
10883da59bd9SMartin KaFai Lau 			uncached_rt = net->ipv6.ip6_null_entry;
1089d52d3997SMartin KaFai Lau 
10903da59bd9SMartin KaFai Lau 		dst_hold(&uncached_rt->dst);
10913da59bd9SMartin KaFai Lau 		return uncached_rt;
10923da59bd9SMartin KaFai Lau 
1093d52d3997SMartin KaFai Lau 	} else {
1094d52d3997SMartin KaFai Lau 		/* Get a percpu copy */
1095d52d3997SMartin KaFai Lau 
1096d52d3997SMartin KaFai Lau 		struct rt6_info *pcpu_rt;
1097d52d3997SMartin KaFai Lau 
1098d52d3997SMartin KaFai Lau 		rt->dst.lastuse = jiffies;
1099d52d3997SMartin KaFai Lau 		rt->dst.__use++;
1100d52d3997SMartin KaFai Lau 		pcpu_rt = rt6_get_pcpu_route(rt);
1101d52d3997SMartin KaFai Lau 		read_unlock_bh(&table->tb6_lock);
1102d52d3997SMartin KaFai Lau 
1103d52d3997SMartin KaFai Lau 		return pcpu_rt;
1104d52d3997SMartin KaFai Lau 	}
1105c71099acSThomas Graf }
1106c71099acSThomas Graf 
11078ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
11084c9483b2SDavid S. Miller 					    struct flowi6 *fl6, int flags)
11094acad72dSPavel Emelyanov {
11104c9483b2SDavid S. Miller 	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
11114acad72dSPavel Emelyanov }
11124acad72dSPavel Emelyanov 
111372331bc0SShmulik Ladkani static struct dst_entry *ip6_route_input_lookup(struct net *net,
111472331bc0SShmulik Ladkani 						struct net_device *dev,
111572331bc0SShmulik Ladkani 						struct flowi6 *fl6, int flags)
111672331bc0SShmulik Ladkani {
111772331bc0SShmulik Ladkani 	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
111872331bc0SShmulik Ladkani 		flags |= RT6_LOOKUP_F_IFACE;
111972331bc0SShmulik Ladkani 
112072331bc0SShmulik Ladkani 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
112172331bc0SShmulik Ladkani }
112272331bc0SShmulik Ladkani 
1123c71099acSThomas Graf void ip6_route_input(struct sk_buff *skb)
1124c71099acSThomas Graf {
1125b71d1d42SEric Dumazet 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1126c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(skb->dev);
1127adaa70bbSThomas Graf 	int flags = RT6_LOOKUP_F_HAS_SADDR;
11284c9483b2SDavid S. Miller 	struct flowi6 fl6 = {
11294c9483b2SDavid S. Miller 		.flowi6_iif = skb->dev->ifindex,
11304c9483b2SDavid S. Miller 		.daddr = iph->daddr,
11314c9483b2SDavid S. Miller 		.saddr = iph->saddr,
11326502ca52SYOSHIFUJI Hideaki / 吉藤英明 		.flowlabel = ip6_flowinfo(iph),
11334c9483b2SDavid S. Miller 		.flowi6_mark = skb->mark,
11344c9483b2SDavid S. Miller 		.flowi6_proto = iph->nexthdr,
1135c71099acSThomas Graf 	};
1136adaa70bbSThomas Graf 
113772331bc0SShmulik Ladkani 	skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1138c71099acSThomas Graf }
1139c71099acSThomas Graf 
11408ed67789SDaniel Lezcano static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
11414c9483b2SDavid S. Miller 					     struct flowi6 *fl6, int flags)
1142c71099acSThomas Graf {
11434c9483b2SDavid S. Miller 	return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1144c71099acSThomas Graf }
1145c71099acSThomas Graf 
11469c7a4f9cSFlorian Westphal struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
11474c9483b2SDavid S. Miller 				    struct flowi6 *fl6)
1148c71099acSThomas Graf {
1149c71099acSThomas Graf 	int flags = 0;
1150c71099acSThomas Graf 
11511fb9489bSPavel Emelyanov 	fl6->flowi6_iif = LOOPBACK_IFINDEX;
11524dc27d1cSDavid McCullough 
11534c9483b2SDavid S. Miller 	if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
115477d16f45SYOSHIFUJI Hideaki 		flags |= RT6_LOOKUP_F_IFACE;
1155c71099acSThomas Graf 
11564c9483b2SDavid S. Miller 	if (!ipv6_addr_any(&fl6->saddr))
1157adaa70bbSThomas Graf 		flags |= RT6_LOOKUP_F_HAS_SADDR;
11580c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 	else if (sk)
11590c9a2ac1SYOSHIFUJI Hideaki / 吉藤英明 		flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1160adaa70bbSThomas Graf 
11614c9483b2SDavid S. Miller 	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
11621da177e4SLinus Torvalds }
11637159039aSYOSHIFUJI Hideaki EXPORT_SYMBOL(ip6_route_output);
11641da177e4SLinus Torvalds 
11652774c131SDavid S. Miller struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
116614e50e57SDavid S. Miller {
11675c1e6aa3SDavid S. Miller 	struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
116814e50e57SDavid S. Miller 	struct dst_entry *new = NULL;
116914e50e57SDavid S. Miller 
1170f5b0a874SDavid S. Miller 	rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
117114e50e57SDavid S. Miller 	if (rt) {
1172d8d1f30bSChangli Gao 		new = &rt->dst;
117314e50e57SDavid S. Miller 
11748104891bSSteffen Klassert 		memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
11758104891bSSteffen Klassert 
117614e50e57SDavid S. Miller 		new->__use = 1;
1177352e512cSHerbert Xu 		new->input = dst_discard;
1178aad88724SEric Dumazet 		new->output = dst_discard_sk;
117914e50e57SDavid S. Miller 
118021efcfa0SEric Dumazet 		if (dst_metrics_read_only(&ort->dst))
118121efcfa0SEric Dumazet 			new->_metrics = ort->dst._metrics;
118221efcfa0SEric Dumazet 		else
1183defb3519SDavid S. Miller 			dst_copy_metrics(new, &ort->dst);
118414e50e57SDavid S. Miller 		rt->rt6i_idev = ort->rt6i_idev;
118514e50e57SDavid S. Miller 		if (rt->rt6i_idev)
118614e50e57SDavid S. Miller 			in6_dev_hold(rt->rt6i_idev);
118714e50e57SDavid S. Miller 
11884e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = ort->rt6i_gateway;
11891716a961SGao feng 		rt->rt6i_flags = ort->rt6i_flags;
119014e50e57SDavid S. Miller 		rt->rt6i_metric = 0;
119114e50e57SDavid S. Miller 
119214e50e57SDavid S. Miller 		memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
119314e50e57SDavid S. Miller #ifdef CONFIG_IPV6_SUBTREES
119414e50e57SDavid S. Miller 		memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
119514e50e57SDavid S. Miller #endif
119614e50e57SDavid S. Miller 
119714e50e57SDavid S. Miller 		dst_free(new);
119814e50e57SDavid S. Miller 	}
119914e50e57SDavid S. Miller 
120069ead7afSDavid S. Miller 	dst_release(dst_orig);
120169ead7afSDavid S. Miller 	return new ? new : ERR_PTR(-ENOMEM);
120214e50e57SDavid S. Miller }
120314e50e57SDavid S. Miller 
12041da177e4SLinus Torvalds /*
12051da177e4SLinus Torvalds  *	Destination cache support functions
12061da177e4SLinus Torvalds  */
12071da177e4SLinus Torvalds 
12084b32b5adSMartin KaFai Lau static void rt6_dst_from_metrics_check(struct rt6_info *rt)
12094b32b5adSMartin KaFai Lau {
12104b32b5adSMartin KaFai Lau 	if (rt->dst.from &&
12114b32b5adSMartin KaFai Lau 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
12124b32b5adSMartin KaFai Lau 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
12134b32b5adSMartin KaFai Lau }
12144b32b5adSMartin KaFai Lau 
12153da59bd9SMartin KaFai Lau static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
12163da59bd9SMartin KaFai Lau {
12173da59bd9SMartin KaFai Lau 	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
12183da59bd9SMartin KaFai Lau 		return NULL;
12193da59bd9SMartin KaFai Lau 
12203da59bd9SMartin KaFai Lau 	if (rt6_check_expired(rt))
12213da59bd9SMartin KaFai Lau 		return NULL;
12223da59bd9SMartin KaFai Lau 
12233da59bd9SMartin KaFai Lau 	return &rt->dst;
12243da59bd9SMartin KaFai Lau }
12253da59bd9SMartin KaFai Lau 
12263da59bd9SMartin KaFai Lau static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
12273da59bd9SMartin KaFai Lau {
12283da59bd9SMartin KaFai Lau 	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
12293da59bd9SMartin KaFai Lau 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
12303da59bd9SMartin KaFai Lau 		return &rt->dst;
12313da59bd9SMartin KaFai Lau 	else
12323da59bd9SMartin KaFai Lau 		return NULL;
12333da59bd9SMartin KaFai Lau }
12343da59bd9SMartin KaFai Lau 
12351da177e4SLinus Torvalds static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
12361da177e4SLinus Torvalds {
12371da177e4SLinus Torvalds 	struct rt6_info *rt;
12381da177e4SLinus Torvalds 
12391da177e4SLinus Torvalds 	rt = (struct rt6_info *) dst;
12401da177e4SLinus Torvalds 
12416f3118b5SNicolas Dichtel 	/* All IPV6 dsts are created with ->obsolete set to the value
12426f3118b5SNicolas Dichtel 	 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
12436f3118b5SNicolas Dichtel 	 * into this function always.
12446f3118b5SNicolas Dichtel 	 */
1245e3bc10bdSHannes Frederic Sowa 
12464b32b5adSMartin KaFai Lau 	rt6_dst_from_metrics_check(rt);
12474b32b5adSMartin KaFai Lau 
1248d52d3997SMartin KaFai Lau 	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
12493da59bd9SMartin KaFai Lau 		return rt6_dst_from_check(rt, cookie);
12503da59bd9SMartin KaFai Lau 	else
12513da59bd9SMartin KaFai Lau 		return rt6_check(rt, cookie);
12521da177e4SLinus Torvalds }
12531da177e4SLinus Torvalds 
12541da177e4SLinus Torvalds static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
12551da177e4SLinus Torvalds {
12561da177e4SLinus Torvalds 	struct rt6_info *rt = (struct rt6_info *) dst;
12571da177e4SLinus Torvalds 
12581da177e4SLinus Torvalds 	if (rt) {
125954c1a859SYOSHIFUJI Hideaki / 吉藤英明 		if (rt->rt6i_flags & RTF_CACHE) {
126054c1a859SYOSHIFUJI Hideaki / 吉藤英明 			if (rt6_check_expired(rt)) {
1261e0a1ad73SThomas Graf 				ip6_del_rt(rt);
126254c1a859SYOSHIFUJI Hideaki / 吉藤英明 				dst = NULL;
12631da177e4SLinus Torvalds 			}
126454c1a859SYOSHIFUJI Hideaki / 吉藤英明 		} else {
126554c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst_release(dst);
126654c1a859SYOSHIFUJI Hideaki / 吉藤英明 			dst = NULL;
126754c1a859SYOSHIFUJI Hideaki / 吉藤英明 		}
126854c1a859SYOSHIFUJI Hideaki / 吉藤英明 	}
126954c1a859SYOSHIFUJI Hideaki / 吉藤英明 	return dst;
12701da177e4SLinus Torvalds }
12711da177e4SLinus Torvalds 
12721da177e4SLinus Torvalds static void ip6_link_failure(struct sk_buff *skb)
12731da177e4SLinus Torvalds {
12741da177e4SLinus Torvalds 	struct rt6_info *rt;
12751da177e4SLinus Torvalds 
12763ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
12771da177e4SLinus Torvalds 
1278adf30907SEric Dumazet 	rt = (struct rt6_info *) skb_dst(skb);
12791da177e4SLinus Torvalds 	if (rt) {
12801eb4f758SHannes Frederic Sowa 		if (rt->rt6i_flags & RTF_CACHE) {
12811eb4f758SHannes Frederic Sowa 			dst_hold(&rt->dst);
12821eb4f758SHannes Frederic Sowa 			if (ip6_del_rt(rt))
12831eb4f758SHannes Frederic Sowa 				dst_free(&rt->dst);
12841eb4f758SHannes Frederic Sowa 		} else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
12851da177e4SLinus Torvalds 			rt->rt6i_node->fn_sernum = -1;
12861da177e4SLinus Torvalds 		}
12871da177e4SLinus Torvalds 	}
12881eb4f758SHannes Frederic Sowa }
12891da177e4SLinus Torvalds 
129045e4fd26SMartin KaFai Lau static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
129145e4fd26SMartin KaFai Lau {
129245e4fd26SMartin KaFai Lau 	struct net *net = dev_net(rt->dst.dev);
129345e4fd26SMartin KaFai Lau 
129445e4fd26SMartin KaFai Lau 	rt->rt6i_flags |= RTF_MODIFIED;
129545e4fd26SMartin KaFai Lau 	rt->rt6i_pmtu = mtu;
129645e4fd26SMartin KaFai Lau 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
129745e4fd26SMartin KaFai Lau }
129845e4fd26SMartin KaFai Lau 
129945e4fd26SMartin KaFai Lau static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
130045e4fd26SMartin KaFai Lau 				 const struct ipv6hdr *iph, u32 mtu)
13011da177e4SLinus Torvalds {
13021da177e4SLinus Torvalds 	struct rt6_info *rt6 = (struct rt6_info *)dst;
13031da177e4SLinus Torvalds 
130445e4fd26SMartin KaFai Lau 	if (rt6->rt6i_flags & RTF_LOCAL)
130545e4fd26SMartin KaFai Lau 		return;
130645e4fd26SMartin KaFai Lau 
130781aded24SDavid S. Miller 	dst_confirm(dst);
130845e4fd26SMartin KaFai Lau 	mtu = max_t(u32, mtu, IPV6_MIN_MTU);
130945e4fd26SMartin KaFai Lau 	if (mtu >= dst_mtu(dst))
131045e4fd26SMartin KaFai Lau 		return;
131181aded24SDavid S. Miller 
131245e4fd26SMartin KaFai Lau 	if (rt6->rt6i_flags & RTF_CACHE) {
131345e4fd26SMartin KaFai Lau 		rt6_do_update_pmtu(rt6, mtu);
131445e4fd26SMartin KaFai Lau 	} else {
131545e4fd26SMartin KaFai Lau 		const struct in6_addr *daddr, *saddr;
131645e4fd26SMartin KaFai Lau 		struct rt6_info *nrt6;
13179d289715SHagen Paul Pfeifer 
131845e4fd26SMartin KaFai Lau 		if (iph) {
131945e4fd26SMartin KaFai Lau 			daddr = &iph->daddr;
132045e4fd26SMartin KaFai Lau 			saddr = &iph->saddr;
132145e4fd26SMartin KaFai Lau 		} else if (sk) {
132245e4fd26SMartin KaFai Lau 			daddr = &sk->sk_v6_daddr;
132345e4fd26SMartin KaFai Lau 			saddr = &inet6_sk(sk)->saddr;
132445e4fd26SMartin KaFai Lau 		} else {
132545e4fd26SMartin KaFai Lau 			return;
13261da177e4SLinus Torvalds 		}
132745e4fd26SMartin KaFai Lau 		nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
132845e4fd26SMartin KaFai Lau 		if (nrt6) {
132945e4fd26SMartin KaFai Lau 			rt6_do_update_pmtu(nrt6, mtu);
133045e4fd26SMartin KaFai Lau 
133145e4fd26SMartin KaFai Lau 			/* ip6_ins_rt(nrt6) will bump the
133245e4fd26SMartin KaFai Lau 			 * rt6->rt6i_node->fn_sernum
133345e4fd26SMartin KaFai Lau 			 * which will fail the next rt6_check() and
133445e4fd26SMartin KaFai Lau 			 * invalidate the sk->sk_dst_cache.
133545e4fd26SMartin KaFai Lau 			 */
133645e4fd26SMartin KaFai Lau 			ip6_ins_rt(nrt6);
133745e4fd26SMartin KaFai Lau 		}
133845e4fd26SMartin KaFai Lau 	}
133945e4fd26SMartin KaFai Lau }
134045e4fd26SMartin KaFai Lau 
134145e4fd26SMartin KaFai Lau static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
134245e4fd26SMartin KaFai Lau 			       struct sk_buff *skb, u32 mtu)
134345e4fd26SMartin KaFai Lau {
134445e4fd26SMartin KaFai Lau 	__ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
13451da177e4SLinus Torvalds }
13461da177e4SLinus Torvalds 
134742ae66c8SDavid S. Miller void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
134842ae66c8SDavid S. Miller 		     int oif, u32 mark)
134981aded24SDavid S. Miller {
135081aded24SDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
135181aded24SDavid S. Miller 	struct dst_entry *dst;
135281aded24SDavid S. Miller 	struct flowi6 fl6;
135381aded24SDavid S. Miller 
135481aded24SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
135581aded24SDavid S. Miller 	fl6.flowi6_oif = oif;
13561b3c61dcSLorenzo Colitti 	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
135781aded24SDavid S. Miller 	fl6.daddr = iph->daddr;
135881aded24SDavid S. Miller 	fl6.saddr = iph->saddr;
13596502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
136081aded24SDavid S. Miller 
136181aded24SDavid S. Miller 	dst = ip6_route_output(net, NULL, &fl6);
136281aded24SDavid S. Miller 	if (!dst->error)
136345e4fd26SMartin KaFai Lau 		__ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
136481aded24SDavid S. Miller 	dst_release(dst);
136581aded24SDavid S. Miller }
136681aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_update_pmtu);
136781aded24SDavid S. Miller 
136881aded24SDavid S. Miller void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
136981aded24SDavid S. Miller {
137081aded24SDavid S. Miller 	ip6_update_pmtu(skb, sock_net(sk), mtu,
137181aded24SDavid S. Miller 			sk->sk_bound_dev_if, sk->sk_mark);
137281aded24SDavid S. Miller }
137381aded24SDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
137481aded24SDavid S. Miller 
1375b55b76b2SDuan Jiong /* Handle redirects */
1376b55b76b2SDuan Jiong struct ip6rd_flowi {
1377b55b76b2SDuan Jiong 	struct flowi6 fl6;
1378b55b76b2SDuan Jiong 	struct in6_addr gateway;
1379b55b76b2SDuan Jiong };
1380b55b76b2SDuan Jiong 
1381b55b76b2SDuan Jiong static struct rt6_info *__ip6_route_redirect(struct net *net,
1382b55b76b2SDuan Jiong 					     struct fib6_table *table,
1383b55b76b2SDuan Jiong 					     struct flowi6 *fl6,
1384b55b76b2SDuan Jiong 					     int flags)
1385b55b76b2SDuan Jiong {
1386b55b76b2SDuan Jiong 	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1387b55b76b2SDuan Jiong 	struct rt6_info *rt;
1388b55b76b2SDuan Jiong 	struct fib6_node *fn;
1389b55b76b2SDuan Jiong 
1390b55b76b2SDuan Jiong 	/* Get the "current" route for this destination and
1391b55b76b2SDuan Jiong 	 * check if the redirect has come from approriate router.
1392b55b76b2SDuan Jiong 	 *
1393b55b76b2SDuan Jiong 	 * RFC 4861 specifies that redirects should only be
1394b55b76b2SDuan Jiong 	 * accepted if they come from the nexthop to the target.
1395b55b76b2SDuan Jiong 	 * Due to the way the routes are chosen, this notion
1396b55b76b2SDuan Jiong 	 * is a bit fuzzy and one might need to check all possible
1397b55b76b2SDuan Jiong 	 * routes.
1398b55b76b2SDuan Jiong 	 */
1399b55b76b2SDuan Jiong 
1400b55b76b2SDuan Jiong 	read_lock_bh(&table->tb6_lock);
1401b55b76b2SDuan Jiong 	fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1402b55b76b2SDuan Jiong restart:
1403b55b76b2SDuan Jiong 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1404b55b76b2SDuan Jiong 		if (rt6_check_expired(rt))
1405b55b76b2SDuan Jiong 			continue;
1406b55b76b2SDuan Jiong 		if (rt->dst.error)
1407b55b76b2SDuan Jiong 			break;
1408b55b76b2SDuan Jiong 		if (!(rt->rt6i_flags & RTF_GATEWAY))
1409b55b76b2SDuan Jiong 			continue;
1410b55b76b2SDuan Jiong 		if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1411b55b76b2SDuan Jiong 			continue;
1412b55b76b2SDuan Jiong 		if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1413b55b76b2SDuan Jiong 			continue;
1414b55b76b2SDuan Jiong 		break;
1415b55b76b2SDuan Jiong 	}
1416b55b76b2SDuan Jiong 
1417b55b76b2SDuan Jiong 	if (!rt)
1418b55b76b2SDuan Jiong 		rt = net->ipv6.ip6_null_entry;
1419b55b76b2SDuan Jiong 	else if (rt->dst.error) {
1420b55b76b2SDuan Jiong 		rt = net->ipv6.ip6_null_entry;
1421b0a1ba59SMartin KaFai Lau 		goto out;
1422b0a1ba59SMartin KaFai Lau 	}
1423b0a1ba59SMartin KaFai Lau 
1424b0a1ba59SMartin KaFai Lau 	if (rt == net->ipv6.ip6_null_entry) {
1425a3c00e46SMartin KaFai Lau 		fn = fib6_backtrack(fn, &fl6->saddr);
1426a3c00e46SMartin KaFai Lau 		if (fn)
1427a3c00e46SMartin KaFai Lau 			goto restart;
1428b55b76b2SDuan Jiong 	}
1429a3c00e46SMartin KaFai Lau 
1430b0a1ba59SMartin KaFai Lau out:
1431b55b76b2SDuan Jiong 	dst_hold(&rt->dst);
1432b55b76b2SDuan Jiong 
1433b55b76b2SDuan Jiong 	read_unlock_bh(&table->tb6_lock);
1434b55b76b2SDuan Jiong 
1435b55b76b2SDuan Jiong 	return rt;
1436b55b76b2SDuan Jiong };
1437b55b76b2SDuan Jiong 
1438b55b76b2SDuan Jiong static struct dst_entry *ip6_route_redirect(struct net *net,
1439b55b76b2SDuan Jiong 					const struct flowi6 *fl6,
1440b55b76b2SDuan Jiong 					const struct in6_addr *gateway)
1441b55b76b2SDuan Jiong {
1442b55b76b2SDuan Jiong 	int flags = RT6_LOOKUP_F_HAS_SADDR;
1443b55b76b2SDuan Jiong 	struct ip6rd_flowi rdfl;
1444b55b76b2SDuan Jiong 
1445b55b76b2SDuan Jiong 	rdfl.fl6 = *fl6;
1446b55b76b2SDuan Jiong 	rdfl.gateway = *gateway;
1447b55b76b2SDuan Jiong 
1448b55b76b2SDuan Jiong 	return fib6_rule_lookup(net, &rdfl.fl6,
1449b55b76b2SDuan Jiong 				flags, __ip6_route_redirect);
1450b55b76b2SDuan Jiong }
1451b55b76b2SDuan Jiong 
14523a5ad2eeSDavid S. Miller void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
14533a5ad2eeSDavid S. Miller {
14543a5ad2eeSDavid S. Miller 	const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
14553a5ad2eeSDavid S. Miller 	struct dst_entry *dst;
14563a5ad2eeSDavid S. Miller 	struct flowi6 fl6;
14573a5ad2eeSDavid S. Miller 
14583a5ad2eeSDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
1459e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
14603a5ad2eeSDavid S. Miller 	fl6.flowi6_oif = oif;
14613a5ad2eeSDavid S. Miller 	fl6.flowi6_mark = mark;
14623a5ad2eeSDavid S. Miller 	fl6.daddr = iph->daddr;
14633a5ad2eeSDavid S. Miller 	fl6.saddr = iph->saddr;
14646502ca52SYOSHIFUJI Hideaki / 吉藤英明 	fl6.flowlabel = ip6_flowinfo(iph);
14653a5ad2eeSDavid S. Miller 
1466b55b76b2SDuan Jiong 	dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
14676700c270SDavid S. Miller 	rt6_do_redirect(dst, NULL, skb);
14683a5ad2eeSDavid S. Miller 	dst_release(dst);
14693a5ad2eeSDavid S. Miller }
14703a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_redirect);
14713a5ad2eeSDavid S. Miller 
1472c92a59ecSDuan Jiong void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1473c92a59ecSDuan Jiong 			    u32 mark)
1474c92a59ecSDuan Jiong {
1475c92a59ecSDuan Jiong 	const struct ipv6hdr *iph = ipv6_hdr(skb);
1476c92a59ecSDuan Jiong 	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1477c92a59ecSDuan Jiong 	struct dst_entry *dst;
1478c92a59ecSDuan Jiong 	struct flowi6 fl6;
1479c92a59ecSDuan Jiong 
1480c92a59ecSDuan Jiong 	memset(&fl6, 0, sizeof(fl6));
1481e374c618SJulian Anastasov 	fl6.flowi6_iif = LOOPBACK_IFINDEX;
1482c92a59ecSDuan Jiong 	fl6.flowi6_oif = oif;
1483c92a59ecSDuan Jiong 	fl6.flowi6_mark = mark;
1484c92a59ecSDuan Jiong 	fl6.daddr = msg->dest;
1485c92a59ecSDuan Jiong 	fl6.saddr = iph->daddr;
1486c92a59ecSDuan Jiong 
1487b55b76b2SDuan Jiong 	dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1488c92a59ecSDuan Jiong 	rt6_do_redirect(dst, NULL, skb);
1489c92a59ecSDuan Jiong 	dst_release(dst);
1490c92a59ecSDuan Jiong }
1491c92a59ecSDuan Jiong 
14923a5ad2eeSDavid S. Miller void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
14933a5ad2eeSDavid S. Miller {
14943a5ad2eeSDavid S. Miller 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
14953a5ad2eeSDavid S. Miller }
14963a5ad2eeSDavid S. Miller EXPORT_SYMBOL_GPL(ip6_sk_redirect);
14973a5ad2eeSDavid S. Miller 
14980dbaee3bSDavid S. Miller static unsigned int ip6_default_advmss(const struct dst_entry *dst)
14991da177e4SLinus Torvalds {
15000dbaee3bSDavid S. Miller 	struct net_device *dev = dst->dev;
15010dbaee3bSDavid S. Miller 	unsigned int mtu = dst_mtu(dst);
15020dbaee3bSDavid S. Miller 	struct net *net = dev_net(dev);
15030dbaee3bSDavid S. Miller 
15041da177e4SLinus Torvalds 	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
15051da177e4SLinus Torvalds 
15065578689aSDaniel Lezcano 	if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
15075578689aSDaniel Lezcano 		mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
15081da177e4SLinus Torvalds 
15091da177e4SLinus Torvalds 	/*
15101da177e4SLinus Torvalds 	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
15111da177e4SLinus Torvalds 	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
15121da177e4SLinus Torvalds 	 * IPV6_MAXPLEN is also valid and means: "any MSS,
15131da177e4SLinus Torvalds 	 * rely only on pmtu discovery"
15141da177e4SLinus Torvalds 	 */
15151da177e4SLinus Torvalds 	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
15161da177e4SLinus Torvalds 		mtu = IPV6_MAXPLEN;
15171da177e4SLinus Torvalds 	return mtu;
15181da177e4SLinus Torvalds }
15191da177e4SLinus Torvalds 
1520ebb762f2SSteffen Klassert static unsigned int ip6_mtu(const struct dst_entry *dst)
1521d33e4553SDavid S. Miller {
15224b32b5adSMartin KaFai Lau 	const struct rt6_info *rt = (const struct rt6_info *)dst;
15234b32b5adSMartin KaFai Lau 	unsigned int mtu = rt->rt6i_pmtu;
1524d33e4553SDavid S. Miller 	struct inet6_dev *idev;
1525618f9bc7SSteffen Klassert 
1526618f9bc7SSteffen Klassert 	if (mtu)
152730f78d8eSEric Dumazet 		goto out;
1528618f9bc7SSteffen Klassert 
15294b32b5adSMartin KaFai Lau 	mtu = dst_metric_raw(dst, RTAX_MTU);
15304b32b5adSMartin KaFai Lau 	if (mtu)
15314b32b5adSMartin KaFai Lau 		goto out;
15324b32b5adSMartin KaFai Lau 
1533618f9bc7SSteffen Klassert 	mtu = IPV6_MIN_MTU;
1534d33e4553SDavid S. Miller 
1535d33e4553SDavid S. Miller 	rcu_read_lock();
1536d33e4553SDavid S. Miller 	idev = __in6_dev_get(dst->dev);
1537d33e4553SDavid S. Miller 	if (idev)
1538d33e4553SDavid S. Miller 		mtu = idev->cnf.mtu6;
1539d33e4553SDavid S. Miller 	rcu_read_unlock();
1540d33e4553SDavid S. Miller 
154130f78d8eSEric Dumazet out:
154230f78d8eSEric Dumazet 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
1543d33e4553SDavid S. Miller }
1544d33e4553SDavid S. Miller 
15453b00944cSYOSHIFUJI Hideaki static struct dst_entry *icmp6_dst_gc_list;
15463b00944cSYOSHIFUJI Hideaki static DEFINE_SPINLOCK(icmp6_dst_lock);
15475d0bbeebSThomas Graf 
15483b00944cSYOSHIFUJI Hideaki struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
154987a11578SDavid S. Miller 				  struct flowi6 *fl6)
15501da177e4SLinus Torvalds {
155187a11578SDavid S. Miller 	struct dst_entry *dst;
15521da177e4SLinus Torvalds 	struct rt6_info *rt;
15531da177e4SLinus Torvalds 	struct inet6_dev *idev = in6_dev_get(dev);
1554c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
15551da177e4SLinus Torvalds 
155638308473SDavid S. Miller 	if (unlikely(!idev))
1557122bdf67SEric Dumazet 		return ERR_PTR(-ENODEV);
15581da177e4SLinus Torvalds 
15598b96d22dSDavid S. Miller 	rt = ip6_dst_alloc(net, dev, 0, NULL);
156038308473SDavid S. Miller 	if (unlikely(!rt)) {
15611da177e4SLinus Torvalds 		in6_dev_put(idev);
156287a11578SDavid S. Miller 		dst = ERR_PTR(-ENOMEM);
15631da177e4SLinus Torvalds 		goto out;
15641da177e4SLinus Torvalds 	}
15651da177e4SLinus Torvalds 
15668e2ec639SYan, Zheng 	rt->dst.flags |= DST_HOST;
15678e2ec639SYan, Zheng 	rt->dst.output  = ip6_output;
1568d8d1f30bSChangli Gao 	atomic_set(&rt->dst.__refcnt, 1);
1569550bab42SJulian Anastasov 	rt->rt6i_gateway  = fl6->daddr;
157087a11578SDavid S. Miller 	rt->rt6i_dst.addr = fl6->daddr;
15718e2ec639SYan, Zheng 	rt->rt6i_dst.plen = 128;
15728e2ec639SYan, Zheng 	rt->rt6i_idev     = idev;
157314edd87dSLi RongQing 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
15741da177e4SLinus Torvalds 
15753b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
1576d8d1f30bSChangli Gao 	rt->dst.next = icmp6_dst_gc_list;
1577d8d1f30bSChangli Gao 	icmp6_dst_gc_list = &rt->dst;
15783b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
15791da177e4SLinus Torvalds 
15805578689aSDaniel Lezcano 	fib6_force_start_gc(net);
15811da177e4SLinus Torvalds 
158287a11578SDavid S. Miller 	dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
158387a11578SDavid S. Miller 
15841da177e4SLinus Torvalds out:
158587a11578SDavid S. Miller 	return dst;
15861da177e4SLinus Torvalds }
15871da177e4SLinus Torvalds 
15883d0f24a7SStephen Hemminger int icmp6_dst_gc(void)
15891da177e4SLinus Torvalds {
1590e9476e95SHagen Paul Pfeifer 	struct dst_entry *dst, **pprev;
15913d0f24a7SStephen Hemminger 	int more = 0;
15921da177e4SLinus Torvalds 
15933b00944cSYOSHIFUJI Hideaki 	spin_lock_bh(&icmp6_dst_lock);
15943b00944cSYOSHIFUJI Hideaki 	pprev = &icmp6_dst_gc_list;
15955d0bbeebSThomas Graf 
15961da177e4SLinus Torvalds 	while ((dst = *pprev) != NULL) {
15971da177e4SLinus Torvalds 		if (!atomic_read(&dst->__refcnt)) {
15981da177e4SLinus Torvalds 			*pprev = dst->next;
15991da177e4SLinus Torvalds 			dst_free(dst);
16001da177e4SLinus Torvalds 		} else {
16011da177e4SLinus Torvalds 			pprev = &dst->next;
16023d0f24a7SStephen Hemminger 			++more;
16031da177e4SLinus Torvalds 		}
16041da177e4SLinus Torvalds 	}
16051da177e4SLinus Torvalds 
16063b00944cSYOSHIFUJI Hideaki 	spin_unlock_bh(&icmp6_dst_lock);
16075d0bbeebSThomas Graf 
16083d0f24a7SStephen Hemminger 	return more;
16091da177e4SLinus Torvalds }
16101da177e4SLinus Torvalds 
16111e493d19SDavid S. Miller static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
16121e493d19SDavid S. Miller 			    void *arg)
16131e493d19SDavid S. Miller {
16141e493d19SDavid S. Miller 	struct dst_entry *dst, **pprev;
16151e493d19SDavid S. Miller 
16161e493d19SDavid S. Miller 	spin_lock_bh(&icmp6_dst_lock);
16171e493d19SDavid S. Miller 	pprev = &icmp6_dst_gc_list;
16181e493d19SDavid S. Miller 	while ((dst = *pprev) != NULL) {
16191e493d19SDavid S. Miller 		struct rt6_info *rt = (struct rt6_info *) dst;
16201e493d19SDavid S. Miller 		if (func(rt, arg)) {
16211e493d19SDavid S. Miller 			*pprev = dst->next;
16221e493d19SDavid S. Miller 			dst_free(dst);
16231e493d19SDavid S. Miller 		} else {
16241e493d19SDavid S. Miller 			pprev = &dst->next;
16251e493d19SDavid S. Miller 		}
16261e493d19SDavid S. Miller 	}
16271e493d19SDavid S. Miller 	spin_unlock_bh(&icmp6_dst_lock);
16281e493d19SDavid S. Miller }
16291e493d19SDavid S. Miller 
1630569d3645SDaniel Lezcano static int ip6_dst_gc(struct dst_ops *ops)
16311da177e4SLinus Torvalds {
163286393e52SAlexey Dobriyan 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
16337019b78eSDaniel Lezcano 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
16347019b78eSDaniel Lezcano 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
16357019b78eSDaniel Lezcano 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
16367019b78eSDaniel Lezcano 	int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
16377019b78eSDaniel Lezcano 	unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1638fc66f95cSEric Dumazet 	int entries;
16391da177e4SLinus Torvalds 
1640fc66f95cSEric Dumazet 	entries = dst_entries_get_fast(ops);
164149a18d86SMichal Kubeček 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1642fc66f95cSEric Dumazet 	    entries <= rt_max_size)
16431da177e4SLinus Torvalds 		goto out;
16441da177e4SLinus Torvalds 
16456891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire++;
164614956643SLi RongQing 	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1647fc66f95cSEric Dumazet 	entries = dst_entries_get_slow(ops);
1648fc66f95cSEric Dumazet 	if (entries < ops->gc_thresh)
16497019b78eSDaniel Lezcano 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
16501da177e4SLinus Torvalds out:
16517019b78eSDaniel Lezcano 	net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1652fc66f95cSEric Dumazet 	return entries > rt_max_size;
16531da177e4SLinus Torvalds }
16541da177e4SLinus Torvalds 
1655e715b6d3SFlorian Westphal static int ip6_convert_metrics(struct mx6_config *mxc,
1656e715b6d3SFlorian Westphal 			       const struct fib6_config *cfg)
1657e715b6d3SFlorian Westphal {
1658e715b6d3SFlorian Westphal 	struct nlattr *nla;
1659e715b6d3SFlorian Westphal 	int remaining;
1660e715b6d3SFlorian Westphal 	u32 *mp;
1661e715b6d3SFlorian Westphal 
166263159f29SIan Morris 	if (!cfg->fc_mx)
1663e715b6d3SFlorian Westphal 		return 0;
1664e715b6d3SFlorian Westphal 
1665e715b6d3SFlorian Westphal 	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1666e715b6d3SFlorian Westphal 	if (unlikely(!mp))
1667e715b6d3SFlorian Westphal 		return -ENOMEM;
1668e715b6d3SFlorian Westphal 
1669e715b6d3SFlorian Westphal 	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1670e715b6d3SFlorian Westphal 		int type = nla_type(nla);
1671e715b6d3SFlorian Westphal 
1672e715b6d3SFlorian Westphal 		if (type) {
1673ea697639SDaniel Borkmann 			u32 val;
1674ea697639SDaniel Borkmann 
1675e715b6d3SFlorian Westphal 			if (unlikely(type > RTAX_MAX))
1676e715b6d3SFlorian Westphal 				goto err;
1677ea697639SDaniel Borkmann 			if (type == RTAX_CC_ALGO) {
1678ea697639SDaniel Borkmann 				char tmp[TCP_CA_NAME_MAX];
1679e715b6d3SFlorian Westphal 
1680ea697639SDaniel Borkmann 				nla_strlcpy(tmp, nla, sizeof(tmp));
1681ea697639SDaniel Borkmann 				val = tcp_ca_get_key_by_name(tmp);
1682ea697639SDaniel Borkmann 				if (val == TCP_CA_UNSPEC)
1683ea697639SDaniel Borkmann 					goto err;
1684ea697639SDaniel Borkmann 			} else {
1685ea697639SDaniel Borkmann 				val = nla_get_u32(nla);
1686ea697639SDaniel Borkmann 			}
1687ea697639SDaniel Borkmann 
1688ea697639SDaniel Borkmann 			mp[type - 1] = val;
1689e715b6d3SFlorian Westphal 			__set_bit(type - 1, mxc->mx_valid);
1690e715b6d3SFlorian Westphal 		}
1691e715b6d3SFlorian Westphal 	}
1692e715b6d3SFlorian Westphal 
1693e715b6d3SFlorian Westphal 	mxc->mx = mp;
1694e715b6d3SFlorian Westphal 
1695e715b6d3SFlorian Westphal 	return 0;
1696e715b6d3SFlorian Westphal  err:
1697e715b6d3SFlorian Westphal 	kfree(mp);
1698e715b6d3SFlorian Westphal 	return -EINVAL;
1699e715b6d3SFlorian Westphal }
17001da177e4SLinus Torvalds 
170186872cb5SThomas Graf int ip6_route_add(struct fib6_config *cfg)
17021da177e4SLinus Torvalds {
17031da177e4SLinus Torvalds 	int err;
17045578689aSDaniel Lezcano 	struct net *net = cfg->fc_nlinfo.nl_net;
17051da177e4SLinus Torvalds 	struct rt6_info *rt = NULL;
17061da177e4SLinus Torvalds 	struct net_device *dev = NULL;
17071da177e4SLinus Torvalds 	struct inet6_dev *idev = NULL;
1708c71099acSThomas Graf 	struct fib6_table *table;
1709e715b6d3SFlorian Westphal 	struct mx6_config mxc = { .mx = NULL, };
17101da177e4SLinus Torvalds 	int addr_type;
17111da177e4SLinus Torvalds 
171286872cb5SThomas Graf 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
17131da177e4SLinus Torvalds 		return -EINVAL;
17141da177e4SLinus Torvalds #ifndef CONFIG_IPV6_SUBTREES
171586872cb5SThomas Graf 	if (cfg->fc_src_len)
17161da177e4SLinus Torvalds 		return -EINVAL;
17171da177e4SLinus Torvalds #endif
171886872cb5SThomas Graf 	if (cfg->fc_ifindex) {
17191da177e4SLinus Torvalds 		err = -ENODEV;
17205578689aSDaniel Lezcano 		dev = dev_get_by_index(net, cfg->fc_ifindex);
17211da177e4SLinus Torvalds 		if (!dev)
17221da177e4SLinus Torvalds 			goto out;
17231da177e4SLinus Torvalds 		idev = in6_dev_get(dev);
17241da177e4SLinus Torvalds 		if (!idev)
17251da177e4SLinus Torvalds 			goto out;
17261da177e4SLinus Torvalds 	}
17271da177e4SLinus Torvalds 
172886872cb5SThomas Graf 	if (cfg->fc_metric == 0)
172986872cb5SThomas Graf 		cfg->fc_metric = IP6_RT_PRIO_USER;
17301da177e4SLinus Torvalds 
1731c71099acSThomas Graf 	err = -ENOBUFS;
173238308473SDavid S. Miller 	if (cfg->fc_nlinfo.nlh &&
1733d71314b4SMatti Vaittinen 	    !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1734d71314b4SMatti Vaittinen 		table = fib6_get_table(net, cfg->fc_table);
173538308473SDavid S. Miller 		if (!table) {
1736f3213831SJoe Perches 			pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1737d71314b4SMatti Vaittinen 			table = fib6_new_table(net, cfg->fc_table);
1738d71314b4SMatti Vaittinen 		}
1739d71314b4SMatti Vaittinen 	} else {
1740d71314b4SMatti Vaittinen 		table = fib6_new_table(net, cfg->fc_table);
1741d71314b4SMatti Vaittinen 	}
174238308473SDavid S. Miller 
174338308473SDavid S. Miller 	if (!table)
1744c71099acSThomas Graf 		goto out;
1745c71099acSThomas Graf 
1746c88507fbSSabrina Dubroca 	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
17471da177e4SLinus Torvalds 
174838308473SDavid S. Miller 	if (!rt) {
17491da177e4SLinus Torvalds 		err = -ENOMEM;
17501da177e4SLinus Torvalds 		goto out;
17511da177e4SLinus Torvalds 	}
17521da177e4SLinus Torvalds 
17531716a961SGao feng 	if (cfg->fc_flags & RTF_EXPIRES)
17541716a961SGao feng 		rt6_set_expires(rt, jiffies +
17551716a961SGao feng 				clock_t_to_jiffies(cfg->fc_expires));
17561716a961SGao feng 	else
17571716a961SGao feng 		rt6_clean_expires(rt);
17581da177e4SLinus Torvalds 
175986872cb5SThomas Graf 	if (cfg->fc_protocol == RTPROT_UNSPEC)
176086872cb5SThomas Graf 		cfg->fc_protocol = RTPROT_BOOT;
176186872cb5SThomas Graf 	rt->rt6i_protocol = cfg->fc_protocol;
176286872cb5SThomas Graf 
176386872cb5SThomas Graf 	addr_type = ipv6_addr_type(&cfg->fc_dst);
17641da177e4SLinus Torvalds 
17651da177e4SLinus Torvalds 	if (addr_type & IPV6_ADDR_MULTICAST)
1766d8d1f30bSChangli Gao 		rt->dst.input = ip6_mc_input;
1767ab79ad14SMaciej Żenczykowski 	else if (cfg->fc_flags & RTF_LOCAL)
1768ab79ad14SMaciej Żenczykowski 		rt->dst.input = ip6_input;
17691da177e4SLinus Torvalds 	else
1770d8d1f30bSChangli Gao 		rt->dst.input = ip6_forward;
17711da177e4SLinus Torvalds 
1772d8d1f30bSChangli Gao 	rt->dst.output = ip6_output;
17731da177e4SLinus Torvalds 
1774*19e42e45SRoopa Prabhu 	if (cfg->fc_encap) {
1775*19e42e45SRoopa Prabhu 		struct lwtunnel_state *lwtstate;
1776*19e42e45SRoopa Prabhu 
1777*19e42e45SRoopa Prabhu 		err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1778*19e42e45SRoopa Prabhu 					   cfg->fc_encap, &lwtstate);
1779*19e42e45SRoopa Prabhu 		if (err)
1780*19e42e45SRoopa Prabhu 			goto out;
1781*19e42e45SRoopa Prabhu 		lwtunnel_state_get(lwtstate);
1782*19e42e45SRoopa Prabhu 		rt->rt6i_lwtstate = lwtstate;
1783*19e42e45SRoopa Prabhu 	}
1784*19e42e45SRoopa Prabhu 
178586872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
178686872cb5SThomas Graf 	rt->rt6i_dst.plen = cfg->fc_dst_len;
1787afc4eef8SMartin KaFai Lau 	if (rt->rt6i_dst.plen == 128)
178811d53b49SDavid S. Miller 		rt->dst.flags |= DST_HOST;
17891da177e4SLinus Torvalds 
17901da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
179186872cb5SThomas Graf 	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
179286872cb5SThomas Graf 	rt->rt6i_src.plen = cfg->fc_src_len;
17931da177e4SLinus Torvalds #endif
17941da177e4SLinus Torvalds 
179586872cb5SThomas Graf 	rt->rt6i_metric = cfg->fc_metric;
17961da177e4SLinus Torvalds 
17971da177e4SLinus Torvalds 	/* We cannot add true routes via loopback here,
17981da177e4SLinus Torvalds 	   they would result in kernel looping; promote them to reject routes
17991da177e4SLinus Torvalds 	 */
180086872cb5SThomas Graf 	if ((cfg->fc_flags & RTF_REJECT) ||
180138308473SDavid S. Miller 	    (dev && (dev->flags & IFF_LOOPBACK) &&
180238308473SDavid S. Miller 	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
180338308473SDavid S. Miller 	     !(cfg->fc_flags & RTF_LOCAL))) {
18041da177e4SLinus Torvalds 		/* hold loopback dev/idev if we haven't done so. */
18055578689aSDaniel Lezcano 		if (dev != net->loopback_dev) {
18061da177e4SLinus Torvalds 			if (dev) {
18071da177e4SLinus Torvalds 				dev_put(dev);
18081da177e4SLinus Torvalds 				in6_dev_put(idev);
18091da177e4SLinus Torvalds 			}
18105578689aSDaniel Lezcano 			dev = net->loopback_dev;
18111da177e4SLinus Torvalds 			dev_hold(dev);
18121da177e4SLinus Torvalds 			idev = in6_dev_get(dev);
18131da177e4SLinus Torvalds 			if (!idev) {
18141da177e4SLinus Torvalds 				err = -ENODEV;
18151da177e4SLinus Torvalds 				goto out;
18161da177e4SLinus Torvalds 			}
18171da177e4SLinus Torvalds 		}
18181da177e4SLinus Torvalds 		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1819ef2c7d7bSNicolas Dichtel 		switch (cfg->fc_type) {
1820ef2c7d7bSNicolas Dichtel 		case RTN_BLACKHOLE:
1821ef2c7d7bSNicolas Dichtel 			rt->dst.error = -EINVAL;
1822aad88724SEric Dumazet 			rt->dst.output = dst_discard_sk;
18237150aedeSKamala R 			rt->dst.input = dst_discard;
1824ef2c7d7bSNicolas Dichtel 			break;
1825ef2c7d7bSNicolas Dichtel 		case RTN_PROHIBIT:
1826ef2c7d7bSNicolas Dichtel 			rt->dst.error = -EACCES;
18277150aedeSKamala R 			rt->dst.output = ip6_pkt_prohibit_out;
18287150aedeSKamala R 			rt->dst.input = ip6_pkt_prohibit;
1829ef2c7d7bSNicolas Dichtel 			break;
1830b4949ab2SNicolas Dichtel 		case RTN_THROW:
1831ef2c7d7bSNicolas Dichtel 		default:
18327150aedeSKamala R 			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
18337150aedeSKamala R 					: -ENETUNREACH;
18347150aedeSKamala R 			rt->dst.output = ip6_pkt_discard_out;
18357150aedeSKamala R 			rt->dst.input = ip6_pkt_discard;
1836ef2c7d7bSNicolas Dichtel 			break;
1837ef2c7d7bSNicolas Dichtel 		}
18381da177e4SLinus Torvalds 		goto install_route;
18391da177e4SLinus Torvalds 	}
18401da177e4SLinus Torvalds 
184186872cb5SThomas Graf 	if (cfg->fc_flags & RTF_GATEWAY) {
1842b71d1d42SEric Dumazet 		const struct in6_addr *gw_addr;
18431da177e4SLinus Torvalds 		int gwa_type;
18441da177e4SLinus Torvalds 
184586872cb5SThomas Graf 		gw_addr = &cfg->fc_gateway;
184648ed7b26SFlorian Westphal 
184748ed7b26SFlorian Westphal 		/* if gw_addr is local we will fail to detect this in case
184848ed7b26SFlorian Westphal 		 * address is still TENTATIVE (DAD in progress). rt6_lookup()
184948ed7b26SFlorian Westphal 		 * will return already-added prefix route via interface that
185048ed7b26SFlorian Westphal 		 * prefix route was assigned to, which might be non-loopback.
185148ed7b26SFlorian Westphal 		 */
185248ed7b26SFlorian Westphal 		err = -EINVAL;
185348ed7b26SFlorian Westphal 		if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
185448ed7b26SFlorian Westphal 			goto out;
185548ed7b26SFlorian Westphal 
18564e3fd7a0SAlexey Dobriyan 		rt->rt6i_gateway = *gw_addr;
18571da177e4SLinus Torvalds 		gwa_type = ipv6_addr_type(gw_addr);
18581da177e4SLinus Torvalds 
18591da177e4SLinus Torvalds 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
18601da177e4SLinus Torvalds 			struct rt6_info *grt;
18611da177e4SLinus Torvalds 
18621da177e4SLinus Torvalds 			/* IPv6 strictly inhibits using not link-local
18631da177e4SLinus Torvalds 			   addresses as nexthop address.
18641da177e4SLinus Torvalds 			   Otherwise, router will not able to send redirects.
18651da177e4SLinus Torvalds 			   It is very good, but in some (rare!) circumstances
18661da177e4SLinus Torvalds 			   (SIT, PtP, NBMA NOARP links) it is handy to allow
18671da177e4SLinus Torvalds 			   some exceptions. --ANK
18681da177e4SLinus Torvalds 			 */
18691da177e4SLinus Torvalds 			if (!(gwa_type & IPV6_ADDR_UNICAST))
18701da177e4SLinus Torvalds 				goto out;
18711da177e4SLinus Torvalds 
18725578689aSDaniel Lezcano 			grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
18731da177e4SLinus Torvalds 
18741da177e4SLinus Torvalds 			err = -EHOSTUNREACH;
187538308473SDavid S. Miller 			if (!grt)
18761da177e4SLinus Torvalds 				goto out;
18771da177e4SLinus Torvalds 			if (dev) {
1878d1918542SDavid S. Miller 				if (dev != grt->dst.dev) {
187994e187c0SAmerigo Wang 					ip6_rt_put(grt);
18801da177e4SLinus Torvalds 					goto out;
18811da177e4SLinus Torvalds 				}
18821da177e4SLinus Torvalds 			} else {
1883d1918542SDavid S. Miller 				dev = grt->dst.dev;
18841da177e4SLinus Torvalds 				idev = grt->rt6i_idev;
18851da177e4SLinus Torvalds 				dev_hold(dev);
18861da177e4SLinus Torvalds 				in6_dev_hold(grt->rt6i_idev);
18871da177e4SLinus Torvalds 			}
18881da177e4SLinus Torvalds 			if (!(grt->rt6i_flags & RTF_GATEWAY))
18891da177e4SLinus Torvalds 				err = 0;
189094e187c0SAmerigo Wang 			ip6_rt_put(grt);
18911da177e4SLinus Torvalds 
18921da177e4SLinus Torvalds 			if (err)
18931da177e4SLinus Torvalds 				goto out;
18941da177e4SLinus Torvalds 		}
18951da177e4SLinus Torvalds 		err = -EINVAL;
189638308473SDavid S. Miller 		if (!dev || (dev->flags & IFF_LOOPBACK))
18971da177e4SLinus Torvalds 			goto out;
18981da177e4SLinus Torvalds 	}
18991da177e4SLinus Torvalds 
19001da177e4SLinus Torvalds 	err = -ENODEV;
190138308473SDavid S. Miller 	if (!dev)
19021da177e4SLinus Torvalds 		goto out;
19031da177e4SLinus Torvalds 
1904c3968a85SDaniel Walter 	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1905c3968a85SDaniel Walter 		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1906c3968a85SDaniel Walter 			err = -EINVAL;
1907c3968a85SDaniel Walter 			goto out;
1908c3968a85SDaniel Walter 		}
19094e3fd7a0SAlexey Dobriyan 		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1910c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 128;
1911c3968a85SDaniel Walter 	} else
1912c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 0;
1913c3968a85SDaniel Walter 
191486872cb5SThomas Graf 	rt->rt6i_flags = cfg->fc_flags;
19151da177e4SLinus Torvalds 
19161da177e4SLinus Torvalds install_route:
1917d8d1f30bSChangli Gao 	rt->dst.dev = dev;
19181da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
1919c71099acSThomas Graf 	rt->rt6i_table = table;
192063152fc0SDaniel Lezcano 
1921c346dca1SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = dev_net(dev);
192263152fc0SDaniel Lezcano 
1923e715b6d3SFlorian Westphal 	err = ip6_convert_metrics(&mxc, cfg);
1924e715b6d3SFlorian Westphal 	if (err)
1925e715b6d3SFlorian Westphal 		goto out;
19261da177e4SLinus Torvalds 
1927e715b6d3SFlorian Westphal 	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1928e715b6d3SFlorian Westphal 
1929e715b6d3SFlorian Westphal 	kfree(mxc.mx);
1930e715b6d3SFlorian Westphal 	return err;
19311da177e4SLinus Torvalds out:
19321da177e4SLinus Torvalds 	if (dev)
19331da177e4SLinus Torvalds 		dev_put(dev);
19341da177e4SLinus Torvalds 	if (idev)
19351da177e4SLinus Torvalds 		in6_dev_put(idev);
19361da177e4SLinus Torvalds 	if (rt)
1937d8d1f30bSChangli Gao 		dst_free(&rt->dst);
19381da177e4SLinus Torvalds 	return err;
19391da177e4SLinus Torvalds }
19401da177e4SLinus Torvalds 
194186872cb5SThomas Graf static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
19421da177e4SLinus Torvalds {
19431da177e4SLinus Torvalds 	int err;
1944c71099acSThomas Graf 	struct fib6_table *table;
1945d1918542SDavid S. Miller 	struct net *net = dev_net(rt->dst.dev);
19461da177e4SLinus Torvalds 
19476825a26cSGao feng 	if (rt == net->ipv6.ip6_null_entry) {
19486825a26cSGao feng 		err = -ENOENT;
19496825a26cSGao feng 		goto out;
19506825a26cSGao feng 	}
19516c813a72SPatrick McHardy 
1952c71099acSThomas Graf 	table = rt->rt6i_table;
1953c71099acSThomas Graf 	write_lock_bh(&table->tb6_lock);
195486872cb5SThomas Graf 	err = fib6_del(rt, info);
1955c71099acSThomas Graf 	write_unlock_bh(&table->tb6_lock);
19561da177e4SLinus Torvalds 
19576825a26cSGao feng out:
195894e187c0SAmerigo Wang 	ip6_rt_put(rt);
19591da177e4SLinus Torvalds 	return err;
19601da177e4SLinus Torvalds }
19611da177e4SLinus Torvalds 
1962e0a1ad73SThomas Graf int ip6_del_rt(struct rt6_info *rt)
1963e0a1ad73SThomas Graf {
19644d1169c1SDenis V. Lunev 	struct nl_info info = {
1965d1918542SDavid S. Miller 		.nl_net = dev_net(rt->dst.dev),
19664d1169c1SDenis V. Lunev 	};
1967528c4cebSDenis V. Lunev 	return __ip6_del_rt(rt, &info);
1968e0a1ad73SThomas Graf }
1969e0a1ad73SThomas Graf 
197086872cb5SThomas Graf static int ip6_route_del(struct fib6_config *cfg)
19711da177e4SLinus Torvalds {
1972c71099acSThomas Graf 	struct fib6_table *table;
19731da177e4SLinus Torvalds 	struct fib6_node *fn;
19741da177e4SLinus Torvalds 	struct rt6_info *rt;
19751da177e4SLinus Torvalds 	int err = -ESRCH;
19761da177e4SLinus Torvalds 
19775578689aSDaniel Lezcano 	table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
197838308473SDavid S. Miller 	if (!table)
1979c71099acSThomas Graf 		return err;
19801da177e4SLinus Torvalds 
1981c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
1982c71099acSThomas Graf 
1983c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root,
198486872cb5SThomas Graf 			 &cfg->fc_dst, cfg->fc_dst_len,
198586872cb5SThomas Graf 			 &cfg->fc_src, cfg->fc_src_len);
19861da177e4SLinus Torvalds 
19871da177e4SLinus Torvalds 	if (fn) {
1988d8d1f30bSChangli Gao 		for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
19891f56a01fSMartin KaFai Lau 			if ((rt->rt6i_flags & RTF_CACHE) &&
19901f56a01fSMartin KaFai Lau 			    !(cfg->fc_flags & RTF_CACHE))
19911f56a01fSMartin KaFai Lau 				continue;
199286872cb5SThomas Graf 			if (cfg->fc_ifindex &&
1993d1918542SDavid S. Miller 			    (!rt->dst.dev ||
1994d1918542SDavid S. Miller 			     rt->dst.dev->ifindex != cfg->fc_ifindex))
19951da177e4SLinus Torvalds 				continue;
199686872cb5SThomas Graf 			if (cfg->fc_flags & RTF_GATEWAY &&
199786872cb5SThomas Graf 			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
19981da177e4SLinus Torvalds 				continue;
199986872cb5SThomas Graf 			if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
20001da177e4SLinus Torvalds 				continue;
2001d8d1f30bSChangli Gao 			dst_hold(&rt->dst);
2002c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
20031da177e4SLinus Torvalds 
200486872cb5SThomas Graf 			return __ip6_del_rt(rt, &cfg->fc_nlinfo);
20051da177e4SLinus Torvalds 		}
20061da177e4SLinus Torvalds 	}
2007c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
20081da177e4SLinus Torvalds 
20091da177e4SLinus Torvalds 	return err;
20101da177e4SLinus Torvalds }
20111da177e4SLinus Torvalds 
20126700c270SDavid S. Miller static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2013a6279458SYOSHIFUJI Hideaki {
2014e8599ff4SDavid S. Miller 	struct net *net = dev_net(skb->dev);
2015a6279458SYOSHIFUJI Hideaki 	struct netevent_redirect netevent;
2016e8599ff4SDavid S. Miller 	struct rt6_info *rt, *nrt = NULL;
2017e8599ff4SDavid S. Miller 	struct ndisc_options ndopts;
2018e8599ff4SDavid S. Miller 	struct inet6_dev *in6_dev;
2019e8599ff4SDavid S. Miller 	struct neighbour *neigh;
202071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	struct rd_msg *msg;
20216e157b6aSDavid S. Miller 	int optlen, on_link;
20226e157b6aSDavid S. Miller 	u8 *lladdr;
2023e8599ff4SDavid S. Miller 
202429a3cad5SSimon Horman 	optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
202571bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	optlen -= sizeof(*msg);
2026e8599ff4SDavid S. Miller 
2027e8599ff4SDavid S. Miller 	if (optlen < 0) {
20286e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2029e8599ff4SDavid S. Miller 		return;
2030e8599ff4SDavid S. Miller 	}
2031e8599ff4SDavid S. Miller 
203271bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	msg = (struct rd_msg *)icmp6_hdr(skb);
2033e8599ff4SDavid S. Miller 
203471bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_is_multicast(&msg->dest)) {
20356e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2036e8599ff4SDavid S. Miller 		return;
2037e8599ff4SDavid S. Miller 	}
2038e8599ff4SDavid S. Miller 
20396e157b6aSDavid S. Miller 	on_link = 0;
204071bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2041e8599ff4SDavid S. Miller 		on_link = 1;
204271bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	} else if (ipv6_addr_type(&msg->target) !=
2043e8599ff4SDavid S. Miller 		   (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
20446e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2045e8599ff4SDavid S. Miller 		return;
2046e8599ff4SDavid S. Miller 	}
2047e8599ff4SDavid S. Miller 
2048e8599ff4SDavid S. Miller 	in6_dev = __in6_dev_get(skb->dev);
2049e8599ff4SDavid S. Miller 	if (!in6_dev)
2050e8599ff4SDavid S. Miller 		return;
2051e8599ff4SDavid S. Miller 	if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2052e8599ff4SDavid S. Miller 		return;
2053e8599ff4SDavid S. Miller 
2054e8599ff4SDavid S. Miller 	/* RFC2461 8.1:
2055e8599ff4SDavid S. Miller 	 *	The IP source address of the Redirect MUST be the same as the current
2056e8599ff4SDavid S. Miller 	 *	first-hop router for the specified ICMP Destination Address.
2057e8599ff4SDavid S. Miller 	 */
2058e8599ff4SDavid S. Miller 
205971bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
2060e8599ff4SDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2061e8599ff4SDavid S. Miller 		return;
2062e8599ff4SDavid S. Miller 	}
20636e157b6aSDavid S. Miller 
20646e157b6aSDavid S. Miller 	lladdr = NULL;
2065e8599ff4SDavid S. Miller 	if (ndopts.nd_opts_tgt_lladdr) {
2066e8599ff4SDavid S. Miller 		lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2067e8599ff4SDavid S. Miller 					     skb->dev);
2068e8599ff4SDavid S. Miller 		if (!lladdr) {
2069e8599ff4SDavid S. Miller 			net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2070e8599ff4SDavid S. Miller 			return;
2071e8599ff4SDavid S. Miller 		}
2072e8599ff4SDavid S. Miller 	}
2073e8599ff4SDavid S. Miller 
20746e157b6aSDavid S. Miller 	rt = (struct rt6_info *) dst;
20756e157b6aSDavid S. Miller 	if (rt == net->ipv6.ip6_null_entry) {
20766e157b6aSDavid S. Miller 		net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
20776e157b6aSDavid S. Miller 		return;
20786e157b6aSDavid S. Miller 	}
20796e157b6aSDavid S. Miller 
20806e157b6aSDavid S. Miller 	/* Redirect received -> path was valid.
20816e157b6aSDavid S. Miller 	 * Look, redirects are sent only in response to data packets,
20826e157b6aSDavid S. Miller 	 * so that this nexthop apparently is reachable. --ANK
20836e157b6aSDavid S. Miller 	 */
20846e157b6aSDavid S. Miller 	dst_confirm(&rt->dst);
20856e157b6aSDavid S. Miller 
208671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2087e8599ff4SDavid S. Miller 	if (!neigh)
2088e8599ff4SDavid S. Miller 		return;
2089e8599ff4SDavid S. Miller 
20901da177e4SLinus Torvalds 	/*
20911da177e4SLinus Torvalds 	 *	We have finally decided to accept it.
20921da177e4SLinus Torvalds 	 */
20931da177e4SLinus Torvalds 
20941da177e4SLinus Torvalds 	neigh_update(neigh, lladdr, NUD_STALE,
20951da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_WEAK_OVERRIDE|
20961da177e4SLinus Torvalds 		     NEIGH_UPDATE_F_OVERRIDE|
20971da177e4SLinus Torvalds 		     (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
20981da177e4SLinus Torvalds 				     NEIGH_UPDATE_F_ISROUTER))
20991da177e4SLinus Torvalds 		     );
21001da177e4SLinus Torvalds 
210183a09abdSMartin KaFai Lau 	nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
210238308473SDavid S. Miller 	if (!nrt)
21031da177e4SLinus Torvalds 		goto out;
21041da177e4SLinus Torvalds 
21051da177e4SLinus Torvalds 	nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
21061da177e4SLinus Torvalds 	if (on_link)
21071da177e4SLinus Torvalds 		nrt->rt6i_flags &= ~RTF_GATEWAY;
21081da177e4SLinus Torvalds 
21094e3fd7a0SAlexey Dobriyan 	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
21101da177e4SLinus Torvalds 
211140e22e8fSThomas Graf 	if (ip6_ins_rt(nrt))
21121da177e4SLinus Torvalds 		goto out;
21131da177e4SLinus Torvalds 
2114d8d1f30bSChangli Gao 	netevent.old = &rt->dst;
2115d8d1f30bSChangli Gao 	netevent.new = &nrt->dst;
211671bcdba0SYOSHIFUJI Hideaki / 吉藤英明 	netevent.daddr = &msg->dest;
211760592833SYOSHIFUJI Hideaki / 吉藤英明 	netevent.neigh = neigh;
21188d71740cSTom Tucker 	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
21198d71740cSTom Tucker 
21201da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE) {
21216e157b6aSDavid S. Miller 		rt = (struct rt6_info *) dst_clone(&rt->dst);
2122e0a1ad73SThomas Graf 		ip6_del_rt(rt);
21231da177e4SLinus Torvalds 	}
21241da177e4SLinus Torvalds 
21251da177e4SLinus Torvalds out:
2126e8599ff4SDavid S. Miller 	neigh_release(neigh);
21276e157b6aSDavid S. Miller }
21286e157b6aSDavid S. Miller 
21291da177e4SLinus Torvalds /*
21301da177e4SLinus Torvalds  *	Misc support functions
21311da177e4SLinus Torvalds  */
21321da177e4SLinus Torvalds 
21334b32b5adSMartin KaFai Lau static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
21344b32b5adSMartin KaFai Lau {
21354b32b5adSMartin KaFai Lau 	BUG_ON(from->dst.from);
21364b32b5adSMartin KaFai Lau 
21374b32b5adSMartin KaFai Lau 	rt->rt6i_flags &= ~RTF_EXPIRES;
21384b32b5adSMartin KaFai Lau 	dst_hold(&from->dst);
21394b32b5adSMartin KaFai Lau 	rt->dst.from = &from->dst;
21404b32b5adSMartin KaFai Lau 	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
21414b32b5adSMartin KaFai Lau }
21424b32b5adSMartin KaFai Lau 
214383a09abdSMartin KaFai Lau static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
21441da177e4SLinus Torvalds {
2145d8d1f30bSChangli Gao 	rt->dst.input = ort->dst.input;
2146d8d1f30bSChangli Gao 	rt->dst.output = ort->dst.output;
214783a09abdSMartin KaFai Lau 	rt->rt6i_dst = ort->rt6i_dst;
2148d8d1f30bSChangli Gao 	rt->dst.error = ort->dst.error;
21491da177e4SLinus Torvalds 	rt->rt6i_idev = ort->rt6i_idev;
21501da177e4SLinus Torvalds 	if (rt->rt6i_idev)
21511da177e4SLinus Torvalds 		in6_dev_hold(rt->rt6i_idev);
2152d8d1f30bSChangli Gao 	rt->dst.lastuse = jiffies;
21534e3fd7a0SAlexey Dobriyan 	rt->rt6i_gateway = ort->rt6i_gateway;
21541716a961SGao feng 	rt->rt6i_flags = ort->rt6i_flags;
21551716a961SGao feng 	rt6_set_from(rt, ort);
215683a09abdSMartin KaFai Lau 	rt->rt6i_metric = ort->rt6i_metric;
21571da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
215883a09abdSMartin KaFai Lau 	rt->rt6i_src = ort->rt6i_src;
21591da177e4SLinus Torvalds #endif
216083a09abdSMartin KaFai Lau 	rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2161c71099acSThomas Graf 	rt->rt6i_table = ort->rt6i_table;
21621da177e4SLinus Torvalds }
21631da177e4SLinus Torvalds 
216470ceb4f5SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_ROUTE_INFO
2165efa2cea0SDaniel Lezcano static struct rt6_info *rt6_get_route_info(struct net *net,
2166b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
2167b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex)
216870ceb4f5SYOSHIFUJI Hideaki {
216970ceb4f5SYOSHIFUJI Hideaki 	struct fib6_node *fn;
217070ceb4f5SYOSHIFUJI Hideaki 	struct rt6_info *rt = NULL;
2171c71099acSThomas Graf 	struct fib6_table *table;
217270ceb4f5SYOSHIFUJI Hideaki 
2173efa2cea0SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_INFO);
217438308473SDavid S. Miller 	if (!table)
2175c71099acSThomas Graf 		return NULL;
2176c71099acSThomas Graf 
21775744dd9bSLi RongQing 	read_lock_bh(&table->tb6_lock);
2178c71099acSThomas Graf 	fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
217970ceb4f5SYOSHIFUJI Hideaki 	if (!fn)
218070ceb4f5SYOSHIFUJI Hideaki 		goto out;
218170ceb4f5SYOSHIFUJI Hideaki 
2182d8d1f30bSChangli Gao 	for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2183d1918542SDavid S. Miller 		if (rt->dst.dev->ifindex != ifindex)
218470ceb4f5SYOSHIFUJI Hideaki 			continue;
218570ceb4f5SYOSHIFUJI Hideaki 		if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
218670ceb4f5SYOSHIFUJI Hideaki 			continue;
218770ceb4f5SYOSHIFUJI Hideaki 		if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
218870ceb4f5SYOSHIFUJI Hideaki 			continue;
2189d8d1f30bSChangli Gao 		dst_hold(&rt->dst);
219070ceb4f5SYOSHIFUJI Hideaki 		break;
219170ceb4f5SYOSHIFUJI Hideaki 	}
219270ceb4f5SYOSHIFUJI Hideaki out:
21935744dd9bSLi RongQing 	read_unlock_bh(&table->tb6_lock);
219470ceb4f5SYOSHIFUJI Hideaki 	return rt;
219570ceb4f5SYOSHIFUJI Hideaki }
219670ceb4f5SYOSHIFUJI Hideaki 
2197efa2cea0SDaniel Lezcano static struct rt6_info *rt6_add_route_info(struct net *net,
2198b71d1d42SEric Dumazet 					   const struct in6_addr *prefix, int prefixlen,
2199b71d1d42SEric Dumazet 					   const struct in6_addr *gwaddr, int ifindex,
220095c96174SEric Dumazet 					   unsigned int pref)
220170ceb4f5SYOSHIFUJI Hideaki {
220286872cb5SThomas Graf 	struct fib6_config cfg = {
220386872cb5SThomas Graf 		.fc_table	= RT6_TABLE_INFO,
2204238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
220586872cb5SThomas Graf 		.fc_ifindex	= ifindex,
220686872cb5SThomas Graf 		.fc_dst_len	= prefixlen,
220786872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
220886872cb5SThomas Graf 				  RTF_UP | RTF_PREF(pref),
220915e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
2210efa2cea0SDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
2211efa2cea0SDaniel Lezcano 		.fc_nlinfo.nl_net = net,
221286872cb5SThomas Graf 	};
221370ceb4f5SYOSHIFUJI Hideaki 
22144e3fd7a0SAlexey Dobriyan 	cfg.fc_dst = *prefix;
22154e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
221686872cb5SThomas Graf 
2217e317da96SYOSHIFUJI Hideaki 	/* We should treat it as a default route if prefix length is 0. */
2218e317da96SYOSHIFUJI Hideaki 	if (!prefixlen)
221986872cb5SThomas Graf 		cfg.fc_flags |= RTF_DEFAULT;
222070ceb4f5SYOSHIFUJI Hideaki 
222186872cb5SThomas Graf 	ip6_route_add(&cfg);
222270ceb4f5SYOSHIFUJI Hideaki 
2223efa2cea0SDaniel Lezcano 	return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
222470ceb4f5SYOSHIFUJI Hideaki }
222570ceb4f5SYOSHIFUJI Hideaki #endif
222670ceb4f5SYOSHIFUJI Hideaki 
2227b71d1d42SEric Dumazet struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
22281da177e4SLinus Torvalds {
22291da177e4SLinus Torvalds 	struct rt6_info *rt;
2230c71099acSThomas Graf 	struct fib6_table *table;
22311da177e4SLinus Torvalds 
2232c346dca1SYOSHIFUJI Hideaki 	table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
223338308473SDavid S. Miller 	if (!table)
2234c71099acSThomas Graf 		return NULL;
22351da177e4SLinus Torvalds 
22365744dd9bSLi RongQing 	read_lock_bh(&table->tb6_lock);
2237d8d1f30bSChangli Gao 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2238d1918542SDavid S. Miller 		if (dev == rt->dst.dev &&
2239045927ffSYOSHIFUJI Hideaki 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
22401da177e4SLinus Torvalds 		    ipv6_addr_equal(&rt->rt6i_gateway, addr))
22411da177e4SLinus Torvalds 			break;
22421da177e4SLinus Torvalds 	}
22431da177e4SLinus Torvalds 	if (rt)
2244d8d1f30bSChangli Gao 		dst_hold(&rt->dst);
22455744dd9bSLi RongQing 	read_unlock_bh(&table->tb6_lock);
22461da177e4SLinus Torvalds 	return rt;
22471da177e4SLinus Torvalds }
22481da177e4SLinus Torvalds 
2249b71d1d42SEric Dumazet struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2250ebacaaa0SYOSHIFUJI Hideaki 				     struct net_device *dev,
2251ebacaaa0SYOSHIFUJI Hideaki 				     unsigned int pref)
22521da177e4SLinus Torvalds {
225386872cb5SThomas Graf 	struct fib6_config cfg = {
225486872cb5SThomas Graf 		.fc_table	= RT6_TABLE_DFLT,
2255238fc7eaSRami Rosen 		.fc_metric	= IP6_RT_PRIO_USER,
225686872cb5SThomas Graf 		.fc_ifindex	= dev->ifindex,
225786872cb5SThomas Graf 		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
225886872cb5SThomas Graf 				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
225915e47304SEric W. Biederman 		.fc_nlinfo.portid = 0,
22605578689aSDaniel Lezcano 		.fc_nlinfo.nlh = NULL,
2261c346dca1SYOSHIFUJI Hideaki 		.fc_nlinfo.nl_net = dev_net(dev),
226286872cb5SThomas Graf 	};
22631da177e4SLinus Torvalds 
22644e3fd7a0SAlexey Dobriyan 	cfg.fc_gateway = *gwaddr;
22651da177e4SLinus Torvalds 
226686872cb5SThomas Graf 	ip6_route_add(&cfg);
22671da177e4SLinus Torvalds 
22681da177e4SLinus Torvalds 	return rt6_get_dflt_router(gwaddr, dev);
22691da177e4SLinus Torvalds }
22701da177e4SLinus Torvalds 
22717b4da532SDaniel Lezcano void rt6_purge_dflt_routers(struct net *net)
22721da177e4SLinus Torvalds {
22731da177e4SLinus Torvalds 	struct rt6_info *rt;
2274c71099acSThomas Graf 	struct fib6_table *table;
2275c71099acSThomas Graf 
2276c71099acSThomas Graf 	/* NOTE: Keep consistent with rt6_get_dflt_router */
22777b4da532SDaniel Lezcano 	table = fib6_get_table(net, RT6_TABLE_DFLT);
227838308473SDavid S. Miller 	if (!table)
2279c71099acSThomas Graf 		return;
22801da177e4SLinus Torvalds 
22811da177e4SLinus Torvalds restart:
2282c71099acSThomas Graf 	read_lock_bh(&table->tb6_lock);
2283d8d1f30bSChangli Gao 	for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
22843e8b0ac3SLorenzo Colitti 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
22853e8b0ac3SLorenzo Colitti 		    (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2286d8d1f30bSChangli Gao 			dst_hold(&rt->dst);
2287c71099acSThomas Graf 			read_unlock_bh(&table->tb6_lock);
2288e0a1ad73SThomas Graf 			ip6_del_rt(rt);
22891da177e4SLinus Torvalds 			goto restart;
22901da177e4SLinus Torvalds 		}
22911da177e4SLinus Torvalds 	}
2292c71099acSThomas Graf 	read_unlock_bh(&table->tb6_lock);
22931da177e4SLinus Torvalds }
22941da177e4SLinus Torvalds 
22955578689aSDaniel Lezcano static void rtmsg_to_fib6_config(struct net *net,
22965578689aSDaniel Lezcano 				 struct in6_rtmsg *rtmsg,
229786872cb5SThomas Graf 				 struct fib6_config *cfg)
229886872cb5SThomas Graf {
229986872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
230086872cb5SThomas Graf 
230186872cb5SThomas Graf 	cfg->fc_table = RT6_TABLE_MAIN;
230286872cb5SThomas Graf 	cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
230386872cb5SThomas Graf 	cfg->fc_metric = rtmsg->rtmsg_metric;
230486872cb5SThomas Graf 	cfg->fc_expires = rtmsg->rtmsg_info;
230586872cb5SThomas Graf 	cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
230686872cb5SThomas Graf 	cfg->fc_src_len = rtmsg->rtmsg_src_len;
230786872cb5SThomas Graf 	cfg->fc_flags = rtmsg->rtmsg_flags;
230886872cb5SThomas Graf 
23095578689aSDaniel Lezcano 	cfg->fc_nlinfo.nl_net = net;
2310f1243c2dSBenjamin Thery 
23114e3fd7a0SAlexey Dobriyan 	cfg->fc_dst = rtmsg->rtmsg_dst;
23124e3fd7a0SAlexey Dobriyan 	cfg->fc_src = rtmsg->rtmsg_src;
23134e3fd7a0SAlexey Dobriyan 	cfg->fc_gateway = rtmsg->rtmsg_gateway;
231486872cb5SThomas Graf }
231586872cb5SThomas Graf 
23165578689aSDaniel Lezcano int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
23171da177e4SLinus Torvalds {
231886872cb5SThomas Graf 	struct fib6_config cfg;
23191da177e4SLinus Torvalds 	struct in6_rtmsg rtmsg;
23201da177e4SLinus Torvalds 	int err;
23211da177e4SLinus Torvalds 
23221da177e4SLinus Torvalds 	switch (cmd) {
23231da177e4SLinus Torvalds 	case SIOCADDRT:		/* Add a route */
23241da177e4SLinus Torvalds 	case SIOCDELRT:		/* Delete a route */
2325af31f412SEric W. Biederman 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
23261da177e4SLinus Torvalds 			return -EPERM;
23271da177e4SLinus Torvalds 		err = copy_from_user(&rtmsg, arg,
23281da177e4SLinus Torvalds 				     sizeof(struct in6_rtmsg));
23291da177e4SLinus Torvalds 		if (err)
23301da177e4SLinus Torvalds 			return -EFAULT;
23311da177e4SLinus Torvalds 
23325578689aSDaniel Lezcano 		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
233386872cb5SThomas Graf 
23341da177e4SLinus Torvalds 		rtnl_lock();
23351da177e4SLinus Torvalds 		switch (cmd) {
23361da177e4SLinus Torvalds 		case SIOCADDRT:
233786872cb5SThomas Graf 			err = ip6_route_add(&cfg);
23381da177e4SLinus Torvalds 			break;
23391da177e4SLinus Torvalds 		case SIOCDELRT:
234086872cb5SThomas Graf 			err = ip6_route_del(&cfg);
23411da177e4SLinus Torvalds 			break;
23421da177e4SLinus Torvalds 		default:
23431da177e4SLinus Torvalds 			err = -EINVAL;
23441da177e4SLinus Torvalds 		}
23451da177e4SLinus Torvalds 		rtnl_unlock();
23461da177e4SLinus Torvalds 
23471da177e4SLinus Torvalds 		return err;
23483ff50b79SStephen Hemminger 	}
23491da177e4SLinus Torvalds 
23501da177e4SLinus Torvalds 	return -EINVAL;
23511da177e4SLinus Torvalds }
23521da177e4SLinus Torvalds 
23531da177e4SLinus Torvalds /*
23541da177e4SLinus Torvalds  *	Drop the packet on the floor
23551da177e4SLinus Torvalds  */
23561da177e4SLinus Torvalds 
2357d5fdd6baSBrian Haley static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
23581da177e4SLinus Torvalds {
2359612f09e8SYOSHIFUJI Hideaki 	int type;
2360adf30907SEric Dumazet 	struct dst_entry *dst = skb_dst(skb);
2361612f09e8SYOSHIFUJI Hideaki 	switch (ipstats_mib_noroutes) {
2362612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_INNOROUTES:
23630660e03fSArnaldo Carvalho de Melo 		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
236445bb0060SUlrich Weber 		if (type == IPV6_ADDR_ANY) {
23653bd653c8SDenis V. Lunev 			IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
23663bd653c8SDenis V. Lunev 				      IPSTATS_MIB_INADDRERRORS);
2367612f09e8SYOSHIFUJI Hideaki 			break;
2368612f09e8SYOSHIFUJI Hideaki 		}
2369612f09e8SYOSHIFUJI Hideaki 		/* FALLTHROUGH */
2370612f09e8SYOSHIFUJI Hideaki 	case IPSTATS_MIB_OUTNOROUTES:
23713bd653c8SDenis V. Lunev 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
23723bd653c8SDenis V. Lunev 			      ipstats_mib_noroutes);
2373612f09e8SYOSHIFUJI Hideaki 		break;
2374612f09e8SYOSHIFUJI Hideaki 	}
23753ffe533cSAlexey Dobriyan 	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
23761da177e4SLinus Torvalds 	kfree_skb(skb);
23771da177e4SLinus Torvalds 	return 0;
23781da177e4SLinus Torvalds }
23791da177e4SLinus Torvalds 
23809ce8ade0SThomas Graf static int ip6_pkt_discard(struct sk_buff *skb)
23819ce8ade0SThomas Graf {
2382612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
23839ce8ade0SThomas Graf }
23849ce8ade0SThomas Graf 
2385aad88724SEric Dumazet static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
23861da177e4SLinus Torvalds {
2387adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
2388612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
23891da177e4SLinus Torvalds }
23901da177e4SLinus Torvalds 
23919ce8ade0SThomas Graf static int ip6_pkt_prohibit(struct sk_buff *skb)
23929ce8ade0SThomas Graf {
2393612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
23949ce8ade0SThomas Graf }
23959ce8ade0SThomas Graf 
2396aad88724SEric Dumazet static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
23979ce8ade0SThomas Graf {
2398adf30907SEric Dumazet 	skb->dev = skb_dst(skb)->dev;
2399612f09e8SYOSHIFUJI Hideaki 	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
24009ce8ade0SThomas Graf }
24019ce8ade0SThomas Graf 
24021da177e4SLinus Torvalds /*
24031da177e4SLinus Torvalds  *	Allocate a dst for local (unicast / anycast) address.
24041da177e4SLinus Torvalds  */
24051da177e4SLinus Torvalds 
24061da177e4SLinus Torvalds struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
24071da177e4SLinus Torvalds 				    const struct in6_addr *addr,
24088f031519SDavid S. Miller 				    bool anycast)
24091da177e4SLinus Torvalds {
2410c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(idev->dev);
2411a3300ef4SHannes Frederic Sowa 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2412a3300ef4SHannes Frederic Sowa 					    DST_NOCOUNT, NULL);
2413a3300ef4SHannes Frederic Sowa 	if (!rt)
24141da177e4SLinus Torvalds 		return ERR_PTR(-ENOMEM);
24151da177e4SLinus Torvalds 
24161da177e4SLinus Torvalds 	in6_dev_hold(idev);
24171da177e4SLinus Torvalds 
241811d53b49SDavid S. Miller 	rt->dst.flags |= DST_HOST;
2419d8d1f30bSChangli Gao 	rt->dst.input = ip6_input;
2420d8d1f30bSChangli Gao 	rt->dst.output = ip6_output;
24211da177e4SLinus Torvalds 	rt->rt6i_idev = idev;
24221da177e4SLinus Torvalds 
24231da177e4SLinus Torvalds 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
242458c4fb86SYOSHIFUJI Hideaki 	if (anycast)
242558c4fb86SYOSHIFUJI Hideaki 		rt->rt6i_flags |= RTF_ANYCAST;
242658c4fb86SYOSHIFUJI Hideaki 	else
24271da177e4SLinus Torvalds 		rt->rt6i_flags |= RTF_LOCAL;
24281da177e4SLinus Torvalds 
2429550bab42SJulian Anastasov 	rt->rt6i_gateway  = *addr;
24304e3fd7a0SAlexey Dobriyan 	rt->rt6i_dst.addr = *addr;
24311da177e4SLinus Torvalds 	rt->rt6i_dst.plen = 128;
24325578689aSDaniel Lezcano 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
24331da177e4SLinus Torvalds 
2434d8d1f30bSChangli Gao 	atomic_set(&rt->dst.__refcnt, 1);
24351da177e4SLinus Torvalds 
24361da177e4SLinus Torvalds 	return rt;
24371da177e4SLinus Torvalds }
24381da177e4SLinus Torvalds 
2439c3968a85SDaniel Walter int ip6_route_get_saddr(struct net *net,
2440c3968a85SDaniel Walter 			struct rt6_info *rt,
2441b71d1d42SEric Dumazet 			const struct in6_addr *daddr,
2442c3968a85SDaniel Walter 			unsigned int prefs,
2443c3968a85SDaniel Walter 			struct in6_addr *saddr)
2444c3968a85SDaniel Walter {
2445e16e888bSMarkus Stenberg 	struct inet6_dev *idev =
2446e16e888bSMarkus Stenberg 		rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
2447c3968a85SDaniel Walter 	int err = 0;
2448e16e888bSMarkus Stenberg 	if (rt && rt->rt6i_prefsrc.plen)
24494e3fd7a0SAlexey Dobriyan 		*saddr = rt->rt6i_prefsrc.addr;
2450c3968a85SDaniel Walter 	else
2451c3968a85SDaniel Walter 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2452c3968a85SDaniel Walter 					 daddr, prefs, saddr);
2453c3968a85SDaniel Walter 	return err;
2454c3968a85SDaniel Walter }
2455c3968a85SDaniel Walter 
2456c3968a85SDaniel Walter /* remove deleted ip from prefsrc entries */
2457c3968a85SDaniel Walter struct arg_dev_net_ip {
2458c3968a85SDaniel Walter 	struct net_device *dev;
2459c3968a85SDaniel Walter 	struct net *net;
2460c3968a85SDaniel Walter 	struct in6_addr *addr;
2461c3968a85SDaniel Walter };
2462c3968a85SDaniel Walter 
2463c3968a85SDaniel Walter static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2464c3968a85SDaniel Walter {
2465c3968a85SDaniel Walter 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2466c3968a85SDaniel Walter 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2467c3968a85SDaniel Walter 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2468c3968a85SDaniel Walter 
2469d1918542SDavid S. Miller 	if (((void *)rt->dst.dev == dev || !dev) &&
2470c3968a85SDaniel Walter 	    rt != net->ipv6.ip6_null_entry &&
2471c3968a85SDaniel Walter 	    ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2472c3968a85SDaniel Walter 		/* remove prefsrc entry */
2473c3968a85SDaniel Walter 		rt->rt6i_prefsrc.plen = 0;
2474c3968a85SDaniel Walter 	}
2475c3968a85SDaniel Walter 	return 0;
2476c3968a85SDaniel Walter }
2477c3968a85SDaniel Walter 
2478c3968a85SDaniel Walter void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2479c3968a85SDaniel Walter {
2480c3968a85SDaniel Walter 	struct net *net = dev_net(ifp->idev->dev);
2481c3968a85SDaniel Walter 	struct arg_dev_net_ip adni = {
2482c3968a85SDaniel Walter 		.dev = ifp->idev->dev,
2483c3968a85SDaniel Walter 		.net = net,
2484c3968a85SDaniel Walter 		.addr = &ifp->addr,
2485c3968a85SDaniel Walter 	};
24860c3584d5SLi RongQing 	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2487c3968a85SDaniel Walter }
2488c3968a85SDaniel Walter 
2489be7a010dSDuan Jiong #define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2490be7a010dSDuan Jiong #define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE)
2491be7a010dSDuan Jiong 
2492be7a010dSDuan Jiong /* Remove routers and update dst entries when gateway turn into host. */
2493be7a010dSDuan Jiong static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2494be7a010dSDuan Jiong {
2495be7a010dSDuan Jiong 	struct in6_addr *gateway = (struct in6_addr *)arg;
2496be7a010dSDuan Jiong 
2497be7a010dSDuan Jiong 	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2498be7a010dSDuan Jiong 	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2499be7a010dSDuan Jiong 	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2500be7a010dSDuan Jiong 		return -1;
2501be7a010dSDuan Jiong 	}
2502be7a010dSDuan Jiong 	return 0;
2503be7a010dSDuan Jiong }
2504be7a010dSDuan Jiong 
2505be7a010dSDuan Jiong void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2506be7a010dSDuan Jiong {
2507be7a010dSDuan Jiong 	fib6_clean_all(net, fib6_clean_tohost, gateway);
2508be7a010dSDuan Jiong }
2509be7a010dSDuan Jiong 
25108ed67789SDaniel Lezcano struct arg_dev_net {
25118ed67789SDaniel Lezcano 	struct net_device *dev;
25128ed67789SDaniel Lezcano 	struct net *net;
25138ed67789SDaniel Lezcano };
25148ed67789SDaniel Lezcano 
25151da177e4SLinus Torvalds static int fib6_ifdown(struct rt6_info *rt, void *arg)
25161da177e4SLinus Torvalds {
2517bc3ef660Sstephen hemminger 	const struct arg_dev_net *adn = arg;
2518bc3ef660Sstephen hemminger 	const struct net_device *dev = adn->dev;
25198ed67789SDaniel Lezcano 
2520d1918542SDavid S. Miller 	if ((rt->dst.dev == dev || !dev) &&
2521c159d30cSDavid S. Miller 	    rt != adn->net->ipv6.ip6_null_entry)
25221da177e4SLinus Torvalds 		return -1;
2523c159d30cSDavid S. Miller 
25241da177e4SLinus Torvalds 	return 0;
25251da177e4SLinus Torvalds }
25261da177e4SLinus Torvalds 
2527f3db4851SDaniel Lezcano void rt6_ifdown(struct net *net, struct net_device *dev)
25281da177e4SLinus Torvalds {
25298ed67789SDaniel Lezcano 	struct arg_dev_net adn = {
25308ed67789SDaniel Lezcano 		.dev = dev,
25318ed67789SDaniel Lezcano 		.net = net,
25328ed67789SDaniel Lezcano 	};
25338ed67789SDaniel Lezcano 
25340c3584d5SLi RongQing 	fib6_clean_all(net, fib6_ifdown, &adn);
25351e493d19SDavid S. Miller 	icmp6_clean_all(fib6_ifdown, &adn);
25368d0b94afSMartin KaFai Lau 	rt6_uncached_list_flush_dev(net, dev);
25371da177e4SLinus Torvalds }
25381da177e4SLinus Torvalds 
253995c96174SEric Dumazet struct rt6_mtu_change_arg {
25401da177e4SLinus Torvalds 	struct net_device *dev;
254195c96174SEric Dumazet 	unsigned int mtu;
25421da177e4SLinus Torvalds };
25431da177e4SLinus Torvalds 
25441da177e4SLinus Torvalds static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
25451da177e4SLinus Torvalds {
25461da177e4SLinus Torvalds 	struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
25471da177e4SLinus Torvalds 	struct inet6_dev *idev;
25481da177e4SLinus Torvalds 
25491da177e4SLinus Torvalds 	/* In IPv6 pmtu discovery is not optional,
25501da177e4SLinus Torvalds 	   so that RTAX_MTU lock cannot disable it.
25511da177e4SLinus Torvalds 	   We still use this lock to block changes
25521da177e4SLinus Torvalds 	   caused by addrconf/ndisc.
25531da177e4SLinus Torvalds 	*/
25541da177e4SLinus Torvalds 
25551da177e4SLinus Torvalds 	idev = __in6_dev_get(arg->dev);
255638308473SDavid S. Miller 	if (!idev)
25571da177e4SLinus Torvalds 		return 0;
25581da177e4SLinus Torvalds 
25591da177e4SLinus Torvalds 	/* For administrative MTU increase, there is no way to discover
25601da177e4SLinus Torvalds 	   IPv6 PMTU increase, so PMTU increase should be updated here.
25611da177e4SLinus Torvalds 	   Since RFC 1981 doesn't include administrative MTU increase
25621da177e4SLinus Torvalds 	   update PMTU increase is a MUST. (i.e. jumbo frame)
25631da177e4SLinus Torvalds 	 */
25641da177e4SLinus Torvalds 	/*
25651da177e4SLinus Torvalds 	   If new MTU is less than route PMTU, this new MTU will be the
25661da177e4SLinus Torvalds 	   lowest MTU in the path, update the route PMTU to reflect PMTU
25671da177e4SLinus Torvalds 	   decreases; if new MTU is greater than route PMTU, and the
25681da177e4SLinus Torvalds 	   old MTU is the lowest MTU in the path, update the route PMTU
25691da177e4SLinus Torvalds 	   to reflect the increase. In this case if the other nodes' MTU
25701da177e4SLinus Torvalds 	   also have the lowest MTU, TOO BIG MESSAGE will be lead to
25711da177e4SLinus Torvalds 	   PMTU discouvery.
25721da177e4SLinus Torvalds 	 */
2573d1918542SDavid S. Miller 	if (rt->dst.dev == arg->dev &&
25744b32b5adSMartin KaFai Lau 	    !dst_metric_locked(&rt->dst, RTAX_MTU)) {
25754b32b5adSMartin KaFai Lau 		if (rt->rt6i_flags & RTF_CACHE) {
25764b32b5adSMartin KaFai Lau 			/* For RTF_CACHE with rt6i_pmtu == 0
25774b32b5adSMartin KaFai Lau 			 * (i.e. a redirected route),
25784b32b5adSMartin KaFai Lau 			 * the metrics of its rt->dst.from has already
25794b32b5adSMartin KaFai Lau 			 * been updated.
25804b32b5adSMartin KaFai Lau 			 */
25814b32b5adSMartin KaFai Lau 			if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
25824b32b5adSMartin KaFai Lau 				rt->rt6i_pmtu = arg->mtu;
25834b32b5adSMartin KaFai Lau 		} else if (dst_mtu(&rt->dst) >= arg->mtu ||
2584d8d1f30bSChangli Gao 			   (dst_mtu(&rt->dst) < arg->mtu &&
25854b32b5adSMartin KaFai Lau 			    dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2586defb3519SDavid S. Miller 			dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2587566cfd8fSSimon Arlott 		}
25884b32b5adSMartin KaFai Lau 	}
25891da177e4SLinus Torvalds 	return 0;
25901da177e4SLinus Torvalds }
25911da177e4SLinus Torvalds 
259295c96174SEric Dumazet void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
25931da177e4SLinus Torvalds {
2594c71099acSThomas Graf 	struct rt6_mtu_change_arg arg = {
2595c71099acSThomas Graf 		.dev = dev,
2596c71099acSThomas Graf 		.mtu = mtu,
2597c71099acSThomas Graf 	};
25981da177e4SLinus Torvalds 
25990c3584d5SLi RongQing 	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
26001da177e4SLinus Torvalds }
26011da177e4SLinus Torvalds 
2602ef7c79edSPatrick McHardy static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
26035176f91eSThomas Graf 	[RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
260486872cb5SThomas Graf 	[RTA_OIF]               = { .type = NLA_U32 },
2605ab364a6fSThomas Graf 	[RTA_IIF]		= { .type = NLA_U32 },
260686872cb5SThomas Graf 	[RTA_PRIORITY]          = { .type = NLA_U32 },
260786872cb5SThomas Graf 	[RTA_METRICS]           = { .type = NLA_NESTED },
260851ebd318SNicolas Dichtel 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
2609c78ba6d6SLubomir Rintel 	[RTA_PREF]              = { .type = NLA_U8 },
2610*19e42e45SRoopa Prabhu 	[RTA_ENCAP_TYPE]	= { .type = NLA_U16 },
2611*19e42e45SRoopa Prabhu 	[RTA_ENCAP]		= { .type = NLA_NESTED },
261286872cb5SThomas Graf };
261386872cb5SThomas Graf 
261486872cb5SThomas Graf static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
261586872cb5SThomas Graf 			      struct fib6_config *cfg)
26161da177e4SLinus Torvalds {
261786872cb5SThomas Graf 	struct rtmsg *rtm;
261886872cb5SThomas Graf 	struct nlattr *tb[RTA_MAX+1];
2619c78ba6d6SLubomir Rintel 	unsigned int pref;
262086872cb5SThomas Graf 	int err;
26211da177e4SLinus Torvalds 
262286872cb5SThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
262386872cb5SThomas Graf 	if (err < 0)
262486872cb5SThomas Graf 		goto errout;
26251da177e4SLinus Torvalds 
262686872cb5SThomas Graf 	err = -EINVAL;
262786872cb5SThomas Graf 	rtm = nlmsg_data(nlh);
262886872cb5SThomas Graf 	memset(cfg, 0, sizeof(*cfg));
262986872cb5SThomas Graf 
263086872cb5SThomas Graf 	cfg->fc_table = rtm->rtm_table;
263186872cb5SThomas Graf 	cfg->fc_dst_len = rtm->rtm_dst_len;
263286872cb5SThomas Graf 	cfg->fc_src_len = rtm->rtm_src_len;
263386872cb5SThomas Graf 	cfg->fc_flags = RTF_UP;
263486872cb5SThomas Graf 	cfg->fc_protocol = rtm->rtm_protocol;
2635ef2c7d7bSNicolas Dichtel 	cfg->fc_type = rtm->rtm_type;
263686872cb5SThomas Graf 
2637ef2c7d7bSNicolas Dichtel 	if (rtm->rtm_type == RTN_UNREACHABLE ||
2638ef2c7d7bSNicolas Dichtel 	    rtm->rtm_type == RTN_BLACKHOLE ||
2639b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_PROHIBIT ||
2640b4949ab2SNicolas Dichtel 	    rtm->rtm_type == RTN_THROW)
264186872cb5SThomas Graf 		cfg->fc_flags |= RTF_REJECT;
264286872cb5SThomas Graf 
2643ab79ad14SMaciej Żenczykowski 	if (rtm->rtm_type == RTN_LOCAL)
2644ab79ad14SMaciej Żenczykowski 		cfg->fc_flags |= RTF_LOCAL;
2645ab79ad14SMaciej Żenczykowski 
26461f56a01fSMartin KaFai Lau 	if (rtm->rtm_flags & RTM_F_CLONED)
26471f56a01fSMartin KaFai Lau 		cfg->fc_flags |= RTF_CACHE;
26481f56a01fSMartin KaFai Lau 
264915e47304SEric W. Biederman 	cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
265086872cb5SThomas Graf 	cfg->fc_nlinfo.nlh = nlh;
26513b1e0a65SYOSHIFUJI Hideaki 	cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
265286872cb5SThomas Graf 
265386872cb5SThomas Graf 	if (tb[RTA_GATEWAY]) {
265467b61f6cSJiri Benc 		cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
265586872cb5SThomas Graf 		cfg->fc_flags |= RTF_GATEWAY;
26561da177e4SLinus Torvalds 	}
265786872cb5SThomas Graf 
265886872cb5SThomas Graf 	if (tb[RTA_DST]) {
265986872cb5SThomas Graf 		int plen = (rtm->rtm_dst_len + 7) >> 3;
266086872cb5SThomas Graf 
266186872cb5SThomas Graf 		if (nla_len(tb[RTA_DST]) < plen)
266286872cb5SThomas Graf 			goto errout;
266386872cb5SThomas Graf 
266486872cb5SThomas Graf 		nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
26651da177e4SLinus Torvalds 	}
266686872cb5SThomas Graf 
266786872cb5SThomas Graf 	if (tb[RTA_SRC]) {
266886872cb5SThomas Graf 		int plen = (rtm->rtm_src_len + 7) >> 3;
266986872cb5SThomas Graf 
267086872cb5SThomas Graf 		if (nla_len(tb[RTA_SRC]) < plen)
267186872cb5SThomas Graf 			goto errout;
267286872cb5SThomas Graf 
267386872cb5SThomas Graf 		nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
26741da177e4SLinus Torvalds 	}
267586872cb5SThomas Graf 
2676c3968a85SDaniel Walter 	if (tb[RTA_PREFSRC])
267767b61f6cSJiri Benc 		cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2678c3968a85SDaniel Walter 
267986872cb5SThomas Graf 	if (tb[RTA_OIF])
268086872cb5SThomas Graf 		cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
268186872cb5SThomas Graf 
268286872cb5SThomas Graf 	if (tb[RTA_PRIORITY])
268386872cb5SThomas Graf 		cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
268486872cb5SThomas Graf 
268586872cb5SThomas Graf 	if (tb[RTA_METRICS]) {
268686872cb5SThomas Graf 		cfg->fc_mx = nla_data(tb[RTA_METRICS]);
268786872cb5SThomas Graf 		cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
26881da177e4SLinus Torvalds 	}
268986872cb5SThomas Graf 
269086872cb5SThomas Graf 	if (tb[RTA_TABLE])
269186872cb5SThomas Graf 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
269286872cb5SThomas Graf 
269351ebd318SNicolas Dichtel 	if (tb[RTA_MULTIPATH]) {
269451ebd318SNicolas Dichtel 		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
269551ebd318SNicolas Dichtel 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
269651ebd318SNicolas Dichtel 	}
269751ebd318SNicolas Dichtel 
2698c78ba6d6SLubomir Rintel 	if (tb[RTA_PREF]) {
2699c78ba6d6SLubomir Rintel 		pref = nla_get_u8(tb[RTA_PREF]);
2700c78ba6d6SLubomir Rintel 		if (pref != ICMPV6_ROUTER_PREF_LOW &&
2701c78ba6d6SLubomir Rintel 		    pref != ICMPV6_ROUTER_PREF_HIGH)
2702c78ba6d6SLubomir Rintel 			pref = ICMPV6_ROUTER_PREF_MEDIUM;
2703c78ba6d6SLubomir Rintel 		cfg->fc_flags |= RTF_PREF(pref);
2704c78ba6d6SLubomir Rintel 	}
2705c78ba6d6SLubomir Rintel 
2706*19e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP])
2707*19e42e45SRoopa Prabhu 		cfg->fc_encap = tb[RTA_ENCAP];
2708*19e42e45SRoopa Prabhu 
2709*19e42e45SRoopa Prabhu 	if (tb[RTA_ENCAP_TYPE])
2710*19e42e45SRoopa Prabhu 		cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2711*19e42e45SRoopa Prabhu 
271286872cb5SThomas Graf 	err = 0;
271386872cb5SThomas Graf errout:
271486872cb5SThomas Graf 	return err;
27151da177e4SLinus Torvalds }
27161da177e4SLinus Torvalds 
271751ebd318SNicolas Dichtel static int ip6_route_multipath(struct fib6_config *cfg, int add)
271851ebd318SNicolas Dichtel {
271951ebd318SNicolas Dichtel 	struct fib6_config r_cfg;
272051ebd318SNicolas Dichtel 	struct rtnexthop *rtnh;
272151ebd318SNicolas Dichtel 	int remaining;
272251ebd318SNicolas Dichtel 	int attrlen;
272351ebd318SNicolas Dichtel 	int err = 0, last_err = 0;
272451ebd318SNicolas Dichtel 
272535f1b4e9SMichal Kubeček 	remaining = cfg->fc_mp_len;
272651ebd318SNicolas Dichtel beginning:
272751ebd318SNicolas Dichtel 	rtnh = (struct rtnexthop *)cfg->fc_mp;
272851ebd318SNicolas Dichtel 
272951ebd318SNicolas Dichtel 	/* Parse a Multipath Entry */
273051ebd318SNicolas Dichtel 	while (rtnh_ok(rtnh, remaining)) {
273151ebd318SNicolas Dichtel 		memcpy(&r_cfg, cfg, sizeof(*cfg));
273251ebd318SNicolas Dichtel 		if (rtnh->rtnh_ifindex)
273351ebd318SNicolas Dichtel 			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
273451ebd318SNicolas Dichtel 
273551ebd318SNicolas Dichtel 		attrlen = rtnh_attrlen(rtnh);
273651ebd318SNicolas Dichtel 		if (attrlen > 0) {
273751ebd318SNicolas Dichtel 			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
273851ebd318SNicolas Dichtel 
273951ebd318SNicolas Dichtel 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
274051ebd318SNicolas Dichtel 			if (nla) {
274167b61f6cSJiri Benc 				r_cfg.fc_gateway = nla_get_in6_addr(nla);
274251ebd318SNicolas Dichtel 				r_cfg.fc_flags |= RTF_GATEWAY;
274351ebd318SNicolas Dichtel 			}
2744*19e42e45SRoopa Prabhu 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2745*19e42e45SRoopa Prabhu 			nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2746*19e42e45SRoopa Prabhu 			if (nla)
2747*19e42e45SRoopa Prabhu 				r_cfg.fc_encap_type = nla_get_u16(nla);
274851ebd318SNicolas Dichtel 		}
274951ebd318SNicolas Dichtel 		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
275051ebd318SNicolas Dichtel 		if (err) {
275151ebd318SNicolas Dichtel 			last_err = err;
275251ebd318SNicolas Dichtel 			/* If we are trying to remove a route, do not stop the
275351ebd318SNicolas Dichtel 			 * loop when ip6_route_del() fails (because next hop is
275451ebd318SNicolas Dichtel 			 * already gone), we should try to remove all next hops.
275551ebd318SNicolas Dichtel 			 */
275651ebd318SNicolas Dichtel 			if (add) {
275751ebd318SNicolas Dichtel 				/* If add fails, we should try to delete all
275851ebd318SNicolas Dichtel 				 * next hops that have been already added.
275951ebd318SNicolas Dichtel 				 */
276051ebd318SNicolas Dichtel 				add = 0;
276135f1b4e9SMichal Kubeček 				remaining = cfg->fc_mp_len - remaining;
276251ebd318SNicolas Dichtel 				goto beginning;
276351ebd318SNicolas Dichtel 			}
276451ebd318SNicolas Dichtel 		}
27651a72418bSNicolas Dichtel 		/* Because each route is added like a single route we remove
276627596472SMichal Kubeček 		 * these flags after the first nexthop: if there is a collision,
276727596472SMichal Kubeček 		 * we have already failed to add the first nexthop:
276827596472SMichal Kubeček 		 * fib6_add_rt2node() has rejected it; when replacing, old
276927596472SMichal Kubeček 		 * nexthops have been replaced by first new, the rest should
277027596472SMichal Kubeček 		 * be added to it.
27711a72418bSNicolas Dichtel 		 */
277227596472SMichal Kubeček 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
277327596472SMichal Kubeček 						     NLM_F_REPLACE);
277451ebd318SNicolas Dichtel 		rtnh = rtnh_next(rtnh, &remaining);
277551ebd318SNicolas Dichtel 	}
277651ebd318SNicolas Dichtel 
277751ebd318SNicolas Dichtel 	return last_err;
277851ebd318SNicolas Dichtel }
277951ebd318SNicolas Dichtel 
2780661d2967SThomas Graf static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
27811da177e4SLinus Torvalds {
278286872cb5SThomas Graf 	struct fib6_config cfg;
278386872cb5SThomas Graf 	int err;
27841da177e4SLinus Torvalds 
278586872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
278686872cb5SThomas Graf 	if (err < 0)
278786872cb5SThomas Graf 		return err;
278886872cb5SThomas Graf 
278951ebd318SNicolas Dichtel 	if (cfg.fc_mp)
279051ebd318SNicolas Dichtel 		return ip6_route_multipath(&cfg, 0);
279151ebd318SNicolas Dichtel 	else
279286872cb5SThomas Graf 		return ip6_route_del(&cfg);
27931da177e4SLinus Torvalds }
27941da177e4SLinus Torvalds 
2795661d2967SThomas Graf static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
27961da177e4SLinus Torvalds {
279786872cb5SThomas Graf 	struct fib6_config cfg;
279886872cb5SThomas Graf 	int err;
27991da177e4SLinus Torvalds 
280086872cb5SThomas Graf 	err = rtm_to_fib6_config(skb, nlh, &cfg);
280186872cb5SThomas Graf 	if (err < 0)
280286872cb5SThomas Graf 		return err;
280386872cb5SThomas Graf 
280451ebd318SNicolas Dichtel 	if (cfg.fc_mp)
280551ebd318SNicolas Dichtel 		return ip6_route_multipath(&cfg, 1);
280651ebd318SNicolas Dichtel 	else
280786872cb5SThomas Graf 		return ip6_route_add(&cfg);
28081da177e4SLinus Torvalds }
28091da177e4SLinus Torvalds 
2810*19e42e45SRoopa Prabhu static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
2811339bf98fSThomas Graf {
2812339bf98fSThomas Graf 	return NLMSG_ALIGN(sizeof(struct rtmsg))
2813339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_SRC */
2814339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_DST */
2815339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_GATEWAY */
2816339bf98fSThomas Graf 	       + nla_total_size(16) /* RTA_PREFSRC */
2817339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_TABLE */
2818339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_IIF */
2819339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_OIF */
2820339bf98fSThomas Graf 	       + nla_total_size(4) /* RTA_PRIORITY */
28216a2b9ce0SNoriaki TAKAMIYA 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2822ea697639SDaniel Borkmann 	       + nla_total_size(sizeof(struct rta_cacheinfo))
2823c78ba6d6SLubomir Rintel 	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2824*19e42e45SRoopa Prabhu 	       + nla_total_size(1) /* RTA_PREF */
2825*19e42e45SRoopa Prabhu 	       + lwtunnel_get_encap_size(rt->rt6i_lwtstate);
2826339bf98fSThomas Graf }
2827339bf98fSThomas Graf 
2828191cd582SBrian Haley static int rt6_fill_node(struct net *net,
2829191cd582SBrian Haley 			 struct sk_buff *skb, struct rt6_info *rt,
28300d51aa80SJamal Hadi Salim 			 struct in6_addr *dst, struct in6_addr *src,
283115e47304SEric W. Biederman 			 int iif, int type, u32 portid, u32 seq,
28327bc570c8SYOSHIFUJI Hideaki 			 int prefix, int nowait, unsigned int flags)
28331da177e4SLinus Torvalds {
28344b32b5adSMartin KaFai Lau 	u32 metrics[RTAX_MAX];
28351da177e4SLinus Torvalds 	struct rtmsg *rtm;
28361da177e4SLinus Torvalds 	struct nlmsghdr *nlh;
2837e3703b3dSThomas Graf 	long expires;
28389e762a4aSPatrick McHardy 	u32 table;
28391da177e4SLinus Torvalds 
28401da177e4SLinus Torvalds 	if (prefix) {	/* user wants prefix routes only */
28411da177e4SLinus Torvalds 		if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
28421da177e4SLinus Torvalds 			/* success since this is not a prefix route */
28431da177e4SLinus Torvalds 			return 1;
28441da177e4SLinus Torvalds 		}
28451da177e4SLinus Torvalds 	}
28461da177e4SLinus Torvalds 
284715e47304SEric W. Biederman 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
284838308473SDavid S. Miller 	if (!nlh)
284926932566SPatrick McHardy 		return -EMSGSIZE;
28502d7202bfSThomas Graf 
28512d7202bfSThomas Graf 	rtm = nlmsg_data(nlh);
28521da177e4SLinus Torvalds 	rtm->rtm_family = AF_INET6;
28531da177e4SLinus Torvalds 	rtm->rtm_dst_len = rt->rt6i_dst.plen;
28541da177e4SLinus Torvalds 	rtm->rtm_src_len = rt->rt6i_src.plen;
28551da177e4SLinus Torvalds 	rtm->rtm_tos = 0;
2856c71099acSThomas Graf 	if (rt->rt6i_table)
28579e762a4aSPatrick McHardy 		table = rt->rt6i_table->tb6_id;
2858c71099acSThomas Graf 	else
28599e762a4aSPatrick McHardy 		table = RT6_TABLE_UNSPEC;
28609e762a4aSPatrick McHardy 	rtm->rtm_table = table;
2861c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_TABLE, table))
2862c78679e8SDavid S. Miller 		goto nla_put_failure;
2863ef2c7d7bSNicolas Dichtel 	if (rt->rt6i_flags & RTF_REJECT) {
2864ef2c7d7bSNicolas Dichtel 		switch (rt->dst.error) {
2865ef2c7d7bSNicolas Dichtel 		case -EINVAL:
2866ef2c7d7bSNicolas Dichtel 			rtm->rtm_type = RTN_BLACKHOLE;
2867ef2c7d7bSNicolas Dichtel 			break;
2868ef2c7d7bSNicolas Dichtel 		case -EACCES:
2869ef2c7d7bSNicolas Dichtel 			rtm->rtm_type = RTN_PROHIBIT;
2870ef2c7d7bSNicolas Dichtel 			break;
2871b4949ab2SNicolas Dichtel 		case -EAGAIN:
2872b4949ab2SNicolas Dichtel 			rtm->rtm_type = RTN_THROW;
2873b4949ab2SNicolas Dichtel 			break;
2874ef2c7d7bSNicolas Dichtel 		default:
28751da177e4SLinus Torvalds 			rtm->rtm_type = RTN_UNREACHABLE;
2876ef2c7d7bSNicolas Dichtel 			break;
2877ef2c7d7bSNicolas Dichtel 		}
2878ef2c7d7bSNicolas Dichtel 	}
2879ab79ad14SMaciej Żenczykowski 	else if (rt->rt6i_flags & RTF_LOCAL)
2880ab79ad14SMaciej Żenczykowski 		rtm->rtm_type = RTN_LOCAL;
2881d1918542SDavid S. Miller 	else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
28821da177e4SLinus Torvalds 		rtm->rtm_type = RTN_LOCAL;
28831da177e4SLinus Torvalds 	else
28841da177e4SLinus Torvalds 		rtm->rtm_type = RTN_UNICAST;
28851da177e4SLinus Torvalds 	rtm->rtm_flags = 0;
28861da177e4SLinus Torvalds 	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
28871da177e4SLinus Torvalds 	rtm->rtm_protocol = rt->rt6i_protocol;
28881da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_DYNAMIC)
28891da177e4SLinus Torvalds 		rtm->rtm_protocol = RTPROT_REDIRECT;
2890f0396f60SDenis Ovsienko 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
2891f0396f60SDenis Ovsienko 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
28921da177e4SLinus Torvalds 			rtm->rtm_protocol = RTPROT_RA;
2893f0396f60SDenis Ovsienko 		else
2894f0396f60SDenis Ovsienko 			rtm->rtm_protocol = RTPROT_KERNEL;
2895f0396f60SDenis Ovsienko 	}
28961da177e4SLinus Torvalds 
28971da177e4SLinus Torvalds 	if (rt->rt6i_flags & RTF_CACHE)
28981da177e4SLinus Torvalds 		rtm->rtm_flags |= RTM_F_CLONED;
28991da177e4SLinus Torvalds 
29001da177e4SLinus Torvalds 	if (dst) {
2901930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_DST, dst))
2902c78679e8SDavid S. Miller 			goto nla_put_failure;
29031da177e4SLinus Torvalds 		rtm->rtm_dst_len = 128;
29041da177e4SLinus Torvalds 	} else if (rtm->rtm_dst_len)
2905930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
2906c78679e8SDavid S. Miller 			goto nla_put_failure;
29071da177e4SLinus Torvalds #ifdef CONFIG_IPV6_SUBTREES
29081da177e4SLinus Torvalds 	if (src) {
2909930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_SRC, src))
2910c78679e8SDavid S. Miller 			goto nla_put_failure;
29111da177e4SLinus Torvalds 		rtm->rtm_src_len = 128;
2912c78679e8SDavid S. Miller 	} else if (rtm->rtm_src_len &&
2913930345eaSJiri Benc 		   nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
2914c78679e8SDavid S. Miller 		goto nla_put_failure;
29151da177e4SLinus Torvalds #endif
29167bc570c8SYOSHIFUJI Hideaki 	if (iif) {
29177bc570c8SYOSHIFUJI Hideaki #ifdef CONFIG_IPV6_MROUTE
29187bc570c8SYOSHIFUJI Hideaki 		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
29198229efdaSBenjamin Thery 			int err = ip6mr_get_route(net, skb, rtm, nowait);
29207bc570c8SYOSHIFUJI Hideaki 			if (err <= 0) {
29217bc570c8SYOSHIFUJI Hideaki 				if (!nowait) {
29227bc570c8SYOSHIFUJI Hideaki 					if (err == 0)
29237bc570c8SYOSHIFUJI Hideaki 						return 0;
29247bc570c8SYOSHIFUJI Hideaki 					goto nla_put_failure;
29257bc570c8SYOSHIFUJI Hideaki 				} else {
29267bc570c8SYOSHIFUJI Hideaki 					if (err == -EMSGSIZE)
29277bc570c8SYOSHIFUJI Hideaki 						goto nla_put_failure;
29287bc570c8SYOSHIFUJI Hideaki 				}
29297bc570c8SYOSHIFUJI Hideaki 			}
29307bc570c8SYOSHIFUJI Hideaki 		} else
29317bc570c8SYOSHIFUJI Hideaki #endif
2932c78679e8SDavid S. Miller 			if (nla_put_u32(skb, RTA_IIF, iif))
2933c78679e8SDavid S. Miller 				goto nla_put_failure;
29347bc570c8SYOSHIFUJI Hideaki 	} else if (dst) {
29351da177e4SLinus Torvalds 		struct in6_addr saddr_buf;
2936c78679e8SDavid S. Miller 		if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2937930345eaSJiri Benc 		    nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2938c78679e8SDavid S. Miller 			goto nla_put_failure;
2939c3968a85SDaniel Walter 	}
2940c3968a85SDaniel Walter 
2941c3968a85SDaniel Walter 	if (rt->rt6i_prefsrc.plen) {
2942c3968a85SDaniel Walter 		struct in6_addr saddr_buf;
29434e3fd7a0SAlexey Dobriyan 		saddr_buf = rt->rt6i_prefsrc.addr;
2944930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
2945c78679e8SDavid S. Miller 			goto nla_put_failure;
29461da177e4SLinus Torvalds 	}
29472d7202bfSThomas Graf 
29484b32b5adSMartin KaFai Lau 	memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
29494b32b5adSMartin KaFai Lau 	if (rt->rt6i_pmtu)
29504b32b5adSMartin KaFai Lau 		metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
29514b32b5adSMartin KaFai Lau 	if (rtnetlink_put_metrics(skb, metrics) < 0)
29522d7202bfSThomas Graf 		goto nla_put_failure;
29532d7202bfSThomas Graf 
2954dd0cbf29SYOSHIFUJI Hideaki / 吉藤英明 	if (rt->rt6i_flags & RTF_GATEWAY) {
2955930345eaSJiri Benc 		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
295694f826b8SEric Dumazet 			goto nla_put_failure;
295794f826b8SEric Dumazet 	}
29582d7202bfSThomas Graf 
2959c78679e8SDavid S. Miller 	if (rt->dst.dev &&
2960c78679e8SDavid S. Miller 	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2961c78679e8SDavid S. Miller 		goto nla_put_failure;
2962c78679e8SDavid S. Miller 	if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2963c78679e8SDavid S. Miller 		goto nla_put_failure;
29648253947eSLi Wei 
29658253947eSLi Wei 	expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
296669cdf8f9SYOSHIFUJI Hideaki 
296787a50699SDavid S. Miller 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2968e3703b3dSThomas Graf 		goto nla_put_failure;
29691da177e4SLinus Torvalds 
2970c78ba6d6SLubomir Rintel 	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2971c78ba6d6SLubomir Rintel 		goto nla_put_failure;
2972c78ba6d6SLubomir Rintel 
2973*19e42e45SRoopa Prabhu 	lwtunnel_fill_encap(skb, rt->rt6i_lwtstate);
2974*19e42e45SRoopa Prabhu 
2975053c095aSJohannes Berg 	nlmsg_end(skb, nlh);
2976053c095aSJohannes Berg 	return 0;
29772d7202bfSThomas Graf 
29782d7202bfSThomas Graf nla_put_failure:
297926932566SPatrick McHardy 	nlmsg_cancel(skb, nlh);
298026932566SPatrick McHardy 	return -EMSGSIZE;
29811da177e4SLinus Torvalds }
29821da177e4SLinus Torvalds 
29831b43af54SPatrick McHardy int rt6_dump_route(struct rt6_info *rt, void *p_arg)
29841da177e4SLinus Torvalds {
29851da177e4SLinus Torvalds 	struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
29861da177e4SLinus Torvalds 	int prefix;
29871da177e4SLinus Torvalds 
29882d7202bfSThomas Graf 	if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
29892d7202bfSThomas Graf 		struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
29901da177e4SLinus Torvalds 		prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
29911da177e4SLinus Torvalds 	} else
29921da177e4SLinus Torvalds 		prefix = 0;
29931da177e4SLinus Torvalds 
2994191cd582SBrian Haley 	return rt6_fill_node(arg->net,
2995191cd582SBrian Haley 		     arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
299615e47304SEric W. Biederman 		     NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
29977bc570c8SYOSHIFUJI Hideaki 		     prefix, 0, NLM_F_MULTI);
29981da177e4SLinus Torvalds }
29991da177e4SLinus Torvalds 
3000661d2967SThomas Graf static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
30011da177e4SLinus Torvalds {
30023b1e0a65SYOSHIFUJI Hideaki 	struct net *net = sock_net(in_skb->sk);
3003ab364a6fSThomas Graf 	struct nlattr *tb[RTA_MAX+1];
30041da177e4SLinus Torvalds 	struct rt6_info *rt;
3005ab364a6fSThomas Graf 	struct sk_buff *skb;
3006ab364a6fSThomas Graf 	struct rtmsg *rtm;
30074c9483b2SDavid S. Miller 	struct flowi6 fl6;
300872331bc0SShmulik Ladkani 	int err, iif = 0, oif = 0;
3009ab364a6fSThomas Graf 
3010ab364a6fSThomas Graf 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3011ab364a6fSThomas Graf 	if (err < 0)
3012ab364a6fSThomas Graf 		goto errout;
3013ab364a6fSThomas Graf 
3014ab364a6fSThomas Graf 	err = -EINVAL;
30154c9483b2SDavid S. Miller 	memset(&fl6, 0, sizeof(fl6));
3016ab364a6fSThomas Graf 
3017ab364a6fSThomas Graf 	if (tb[RTA_SRC]) {
3018ab364a6fSThomas Graf 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3019ab364a6fSThomas Graf 			goto errout;
3020ab364a6fSThomas Graf 
30214e3fd7a0SAlexey Dobriyan 		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3022ab364a6fSThomas Graf 	}
3023ab364a6fSThomas Graf 
3024ab364a6fSThomas Graf 	if (tb[RTA_DST]) {
3025ab364a6fSThomas Graf 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3026ab364a6fSThomas Graf 			goto errout;
3027ab364a6fSThomas Graf 
30284e3fd7a0SAlexey Dobriyan 		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3029ab364a6fSThomas Graf 	}
3030ab364a6fSThomas Graf 
3031ab364a6fSThomas Graf 	if (tb[RTA_IIF])
3032ab364a6fSThomas Graf 		iif = nla_get_u32(tb[RTA_IIF]);
3033ab364a6fSThomas Graf 
3034ab364a6fSThomas Graf 	if (tb[RTA_OIF])
303572331bc0SShmulik Ladkani 		oif = nla_get_u32(tb[RTA_OIF]);
3036ab364a6fSThomas Graf 
30372e47b291SLorenzo Colitti 	if (tb[RTA_MARK])
30382e47b291SLorenzo Colitti 		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
30392e47b291SLorenzo Colitti 
3040ab364a6fSThomas Graf 	if (iif) {
3041ab364a6fSThomas Graf 		struct net_device *dev;
304272331bc0SShmulik Ladkani 		int flags = 0;
304372331bc0SShmulik Ladkani 
30445578689aSDaniel Lezcano 		dev = __dev_get_by_index(net, iif);
3045ab364a6fSThomas Graf 		if (!dev) {
3046ab364a6fSThomas Graf 			err = -ENODEV;
3047ab364a6fSThomas Graf 			goto errout;
3048ab364a6fSThomas Graf 		}
304972331bc0SShmulik Ladkani 
305072331bc0SShmulik Ladkani 		fl6.flowi6_iif = iif;
305172331bc0SShmulik Ladkani 
305272331bc0SShmulik Ladkani 		if (!ipv6_addr_any(&fl6.saddr))
305372331bc0SShmulik Ladkani 			flags |= RT6_LOOKUP_F_HAS_SADDR;
305472331bc0SShmulik Ladkani 
305572331bc0SShmulik Ladkani 		rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
305672331bc0SShmulik Ladkani 							       flags);
305772331bc0SShmulik Ladkani 	} else {
305872331bc0SShmulik Ladkani 		fl6.flowi6_oif = oif;
305972331bc0SShmulik Ladkani 
306072331bc0SShmulik Ladkani 		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3061ab364a6fSThomas Graf 	}
30621da177e4SLinus Torvalds 
30631da177e4SLinus Torvalds 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
306438308473SDavid S. Miller 	if (!skb) {
306594e187c0SAmerigo Wang 		ip6_rt_put(rt);
3066ab364a6fSThomas Graf 		err = -ENOBUFS;
3067ab364a6fSThomas Graf 		goto errout;
3068ab364a6fSThomas Graf 	}
30691da177e4SLinus Torvalds 
30701da177e4SLinus Torvalds 	/* Reserve room for dummy headers, this skb can pass
30711da177e4SLinus Torvalds 	   through good chunk of routing engine.
30721da177e4SLinus Torvalds 	 */
3073459a98edSArnaldo Carvalho de Melo 	skb_reset_mac_header(skb);
30741da177e4SLinus Torvalds 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
30751da177e4SLinus Torvalds 
3076d8d1f30bSChangli Gao 	skb_dst_set(skb, &rt->dst);
30771da177e4SLinus Torvalds 
30784c9483b2SDavid S. Miller 	err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
307915e47304SEric W. Biederman 			    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
30807bc570c8SYOSHIFUJI Hideaki 			    nlh->nlmsg_seq, 0, 0, 0);
30811da177e4SLinus Torvalds 	if (err < 0) {
3082ab364a6fSThomas Graf 		kfree_skb(skb);
3083ab364a6fSThomas Graf 		goto errout;
30841da177e4SLinus Torvalds 	}
30851da177e4SLinus Torvalds 
308615e47304SEric W. Biederman 	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3087ab364a6fSThomas Graf errout:
30881da177e4SLinus Torvalds 	return err;
30891da177e4SLinus Torvalds }
30901da177e4SLinus Torvalds 
309186872cb5SThomas Graf void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
30921da177e4SLinus Torvalds {
30931da177e4SLinus Torvalds 	struct sk_buff *skb;
30945578689aSDaniel Lezcano 	struct net *net = info->nl_net;
3095528c4cebSDenis V. Lunev 	u32 seq;
3096528c4cebSDenis V. Lunev 	int err;
30970d51aa80SJamal Hadi Salim 
3098528c4cebSDenis V. Lunev 	err = -ENOBUFS;
309938308473SDavid S. Miller 	seq = info->nlh ? info->nlh->nlmsg_seq : 0;
310086872cb5SThomas Graf 
3101*19e42e45SRoopa Prabhu 	skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
310238308473SDavid S. Miller 	if (!skb)
310321713ebcSThomas Graf 		goto errout;
31041da177e4SLinus Torvalds 
3105191cd582SBrian Haley 	err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
310615e47304SEric W. Biederman 				event, info->portid, seq, 0, 0, 0);
310726932566SPatrick McHardy 	if (err < 0) {
310826932566SPatrick McHardy 		/* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
310926932566SPatrick McHardy 		WARN_ON(err == -EMSGSIZE);
311026932566SPatrick McHardy 		kfree_skb(skb);
311126932566SPatrick McHardy 		goto errout;
311226932566SPatrick McHardy 	}
311315e47304SEric W. Biederman 	rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
31145578689aSDaniel Lezcano 		    info->nlh, gfp_any());
31151ce85fe4SPablo Neira Ayuso 	return;
311621713ebcSThomas Graf errout:
311721713ebcSThomas Graf 	if (err < 0)
31185578689aSDaniel Lezcano 		rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
31191da177e4SLinus Torvalds }
31201da177e4SLinus Torvalds 
31218ed67789SDaniel Lezcano static int ip6_route_dev_notify(struct notifier_block *this,
3122351638e7SJiri Pirko 				unsigned long event, void *ptr)
31238ed67789SDaniel Lezcano {
3124351638e7SJiri Pirko 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3125c346dca1SYOSHIFUJI Hideaki 	struct net *net = dev_net(dev);
31268ed67789SDaniel Lezcano 
31278ed67789SDaniel Lezcano 	if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
3128d8d1f30bSChangli Gao 		net->ipv6.ip6_null_entry->dst.dev = dev;
31298ed67789SDaniel Lezcano 		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
31308ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3131d8d1f30bSChangli Gao 		net->ipv6.ip6_prohibit_entry->dst.dev = dev;
31328ed67789SDaniel Lezcano 		net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3133d8d1f30bSChangli Gao 		net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
31348ed67789SDaniel Lezcano 		net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
31358ed67789SDaniel Lezcano #endif
31368ed67789SDaniel Lezcano 	}
31378ed67789SDaniel Lezcano 
31388ed67789SDaniel Lezcano 	return NOTIFY_OK;
31398ed67789SDaniel Lezcano }
31408ed67789SDaniel Lezcano 
31411da177e4SLinus Torvalds /*
31421da177e4SLinus Torvalds  *	/proc
31431da177e4SLinus Torvalds  */
31441da177e4SLinus Torvalds 
31451da177e4SLinus Torvalds #ifdef CONFIG_PROC_FS
31461da177e4SLinus Torvalds 
314733120b30SAlexey Dobriyan static const struct file_operations ipv6_route_proc_fops = {
314833120b30SAlexey Dobriyan 	.owner		= THIS_MODULE,
314933120b30SAlexey Dobriyan 	.open		= ipv6_route_open,
315033120b30SAlexey Dobriyan 	.read		= seq_read,
315133120b30SAlexey Dobriyan 	.llseek		= seq_lseek,
31528d2ca1d7SHannes Frederic Sowa 	.release	= seq_release_net,
315333120b30SAlexey Dobriyan };
315433120b30SAlexey Dobriyan 
31551da177e4SLinus Torvalds static int rt6_stats_seq_show(struct seq_file *seq, void *v)
31561da177e4SLinus Torvalds {
315769ddb805SDaniel Lezcano 	struct net *net = (struct net *)seq->private;
31581da177e4SLinus Torvalds 	seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
315969ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_nodes,
316069ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_route_nodes,
316169ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_alloc,
316269ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_entries,
316369ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_rt_cache,
3164fc66f95cSEric Dumazet 		   dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
316569ddb805SDaniel Lezcano 		   net->ipv6.rt6_stats->fib_discarded_routes);
31661da177e4SLinus Torvalds 
31671da177e4SLinus Torvalds 	return 0;
31681da177e4SLinus Torvalds }
31691da177e4SLinus Torvalds 
31701da177e4SLinus Torvalds static int rt6_stats_seq_open(struct inode *inode, struct file *file)
31711da177e4SLinus Torvalds {
3172de05c557SPavel Emelyanov 	return single_open_net(inode, file, rt6_stats_seq_show);
317369ddb805SDaniel Lezcano }
317469ddb805SDaniel Lezcano 
31759a32144eSArjan van de Ven static const struct file_operations rt6_stats_seq_fops = {
31761da177e4SLinus Torvalds 	.owner	 = THIS_MODULE,
31771da177e4SLinus Torvalds 	.open	 = rt6_stats_seq_open,
31781da177e4SLinus Torvalds 	.read	 = seq_read,
31791da177e4SLinus Torvalds 	.llseek	 = seq_lseek,
3180b6fcbdb4SPavel Emelyanov 	.release = single_release_net,
31811da177e4SLinus Torvalds };
31821da177e4SLinus Torvalds #endif	/* CONFIG_PROC_FS */
31831da177e4SLinus Torvalds 
31841da177e4SLinus Torvalds #ifdef CONFIG_SYSCTL
31851da177e4SLinus Torvalds 
31861da177e4SLinus Torvalds static
3187fe2c6338SJoe Perches int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
31881da177e4SLinus Torvalds 			      void __user *buffer, size_t *lenp, loff_t *ppos)
31891da177e4SLinus Torvalds {
3190c486da34SLucian Adrian Grijincu 	struct net *net;
3191c486da34SLucian Adrian Grijincu 	int delay;
3192c486da34SLucian Adrian Grijincu 	if (!write)
3193c486da34SLucian Adrian Grijincu 		return -EINVAL;
3194c486da34SLucian Adrian Grijincu 
3195c486da34SLucian Adrian Grijincu 	net = (struct net *)ctl->extra1;
3196c486da34SLucian Adrian Grijincu 	delay = net->ipv6.sysctl.flush_delay;
31978d65af78SAlexey Dobriyan 	proc_dointvec(ctl, write, buffer, lenp, ppos);
31982ac3ac8fSMichal Kubeček 	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
31991da177e4SLinus Torvalds 	return 0;
32001da177e4SLinus Torvalds }
32011da177e4SLinus Torvalds 
3202fe2c6338SJoe Perches struct ctl_table ipv6_route_table_template[] = {
32031da177e4SLinus Torvalds 	{
32041da177e4SLinus Torvalds 		.procname	=	"flush",
32054990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.flush_delay,
32061da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
320789c8b3a1SDave Jones 		.mode		=	0200,
32086d9f239aSAlexey Dobriyan 		.proc_handler	=	ipv6_sysctl_rtcache_flush
32091da177e4SLinus Torvalds 	},
32101da177e4SLinus Torvalds 	{
32111da177e4SLinus Torvalds 		.procname	=	"gc_thresh",
32129a7ec3a9SDaniel Lezcano 		.data		=	&ip6_dst_ops_template.gc_thresh,
32131da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32141da177e4SLinus Torvalds 		.mode		=	0644,
32156d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
32161da177e4SLinus Torvalds 	},
32171da177e4SLinus Torvalds 	{
32181da177e4SLinus Torvalds 		.procname	=	"max_size",
32194990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_max_size,
32201da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32211da177e4SLinus Torvalds 		.mode		=	0644,
32226d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec,
32231da177e4SLinus Torvalds 	},
32241da177e4SLinus Torvalds 	{
32251da177e4SLinus Torvalds 		.procname	=	"gc_min_interval",
32264990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
32271da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32281da177e4SLinus Torvalds 		.mode		=	0644,
32296d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32301da177e4SLinus Torvalds 	},
32311da177e4SLinus Torvalds 	{
32321da177e4SLinus Torvalds 		.procname	=	"gc_timeout",
32334990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_timeout,
32341da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32351da177e4SLinus Torvalds 		.mode		=	0644,
32366d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32371da177e4SLinus Torvalds 	},
32381da177e4SLinus Torvalds 	{
32391da177e4SLinus Torvalds 		.procname	=	"gc_interval",
32404990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_interval,
32411da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32421da177e4SLinus Torvalds 		.mode		=	0644,
32436d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32441da177e4SLinus Torvalds 	},
32451da177e4SLinus Torvalds 	{
32461da177e4SLinus Torvalds 		.procname	=	"gc_elasticity",
32474990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
32481da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32491da177e4SLinus Torvalds 		.mode		=	0644,
3250f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
32511da177e4SLinus Torvalds 	},
32521da177e4SLinus Torvalds 	{
32531da177e4SLinus Torvalds 		.procname	=	"mtu_expires",
32544990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_mtu_expires,
32551da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32561da177e4SLinus Torvalds 		.mode		=	0644,
32576d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_jiffies,
32581da177e4SLinus Torvalds 	},
32591da177e4SLinus Torvalds 	{
32601da177e4SLinus Torvalds 		.procname	=	"min_adv_mss",
32614990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_min_advmss,
32621da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32631da177e4SLinus Torvalds 		.mode		=	0644,
3264f3d3f616SMin Zhang 		.proc_handler	=	proc_dointvec,
32651da177e4SLinus Torvalds 	},
32661da177e4SLinus Torvalds 	{
32671da177e4SLinus Torvalds 		.procname	=	"gc_min_interval_ms",
32684990509fSDaniel Lezcano 		.data		=	&init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
32691da177e4SLinus Torvalds 		.maxlen		=	sizeof(int),
32701da177e4SLinus Torvalds 		.mode		=	0644,
32716d9f239aSAlexey Dobriyan 		.proc_handler	=	proc_dointvec_ms_jiffies,
32721da177e4SLinus Torvalds 	},
3273f8572d8fSEric W. Biederman 	{ }
32741da177e4SLinus Torvalds };
32751da177e4SLinus Torvalds 
32762c8c1e72SAlexey Dobriyan struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3277760f2d01SDaniel Lezcano {
3278760f2d01SDaniel Lezcano 	struct ctl_table *table;
3279760f2d01SDaniel Lezcano 
3280760f2d01SDaniel Lezcano 	table = kmemdup(ipv6_route_table_template,
3281760f2d01SDaniel Lezcano 			sizeof(ipv6_route_table_template),
3282760f2d01SDaniel Lezcano 			GFP_KERNEL);
32835ee09105SYOSHIFUJI Hideaki 
32845ee09105SYOSHIFUJI Hideaki 	if (table) {
32855ee09105SYOSHIFUJI Hideaki 		table[0].data = &net->ipv6.sysctl.flush_delay;
3286c486da34SLucian Adrian Grijincu 		table[0].extra1 = net;
328786393e52SAlexey Dobriyan 		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
32885ee09105SYOSHIFUJI Hideaki 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
32895ee09105SYOSHIFUJI Hideaki 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
32905ee09105SYOSHIFUJI Hideaki 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
32915ee09105SYOSHIFUJI Hideaki 		table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
32925ee09105SYOSHIFUJI Hideaki 		table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
32935ee09105SYOSHIFUJI Hideaki 		table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
32945ee09105SYOSHIFUJI Hideaki 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
32959c69fabeSAlexey Dobriyan 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3296464dc801SEric W. Biederman 
3297464dc801SEric W. Biederman 		/* Don't export sysctls to unprivileged users */
3298464dc801SEric W. Biederman 		if (net->user_ns != &init_user_ns)
3299464dc801SEric W. Biederman 			table[0].procname = NULL;
33005ee09105SYOSHIFUJI Hideaki 	}
33015ee09105SYOSHIFUJI Hideaki 
3302760f2d01SDaniel Lezcano 	return table;
3303760f2d01SDaniel Lezcano }
33041da177e4SLinus Torvalds #endif
33051da177e4SLinus Torvalds 
33062c8c1e72SAlexey Dobriyan static int __net_init ip6_route_net_init(struct net *net)
3307cdb18761SDaniel Lezcano {
3308633d424bSPavel Emelyanov 	int ret = -ENOMEM;
33098ed67789SDaniel Lezcano 
331086393e52SAlexey Dobriyan 	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
331186393e52SAlexey Dobriyan 	       sizeof(net->ipv6.ip6_dst_ops));
3312f2fc6a54SBenjamin Thery 
3313fc66f95cSEric Dumazet 	if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3314fc66f95cSEric Dumazet 		goto out_ip6_dst_ops;
3315fc66f95cSEric Dumazet 
33168ed67789SDaniel Lezcano 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
33178ed67789SDaniel Lezcano 					   sizeof(*net->ipv6.ip6_null_entry),
33188ed67789SDaniel Lezcano 					   GFP_KERNEL);
33198ed67789SDaniel Lezcano 	if (!net->ipv6.ip6_null_entry)
3320fc66f95cSEric Dumazet 		goto out_ip6_dst_entries;
3321d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.path =
33228ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_null_entry;
3323d8d1f30bSChangli Gao 	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
332462fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
332562fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
33268ed67789SDaniel Lezcano 
33278ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
33288ed67789SDaniel Lezcano 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
33298ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_prohibit_entry),
33308ed67789SDaniel Lezcano 					       GFP_KERNEL);
333168fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_prohibit_entry)
333268fffc67SPeter Zijlstra 		goto out_ip6_null_entry;
3333d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.path =
33348ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3335d8d1f30bSChangli Gao 	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
333662fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
333762fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
33388ed67789SDaniel Lezcano 
33398ed67789SDaniel Lezcano 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
33408ed67789SDaniel Lezcano 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
33418ed67789SDaniel Lezcano 					       GFP_KERNEL);
334268fffc67SPeter Zijlstra 	if (!net->ipv6.ip6_blk_hole_entry)
334368fffc67SPeter Zijlstra 		goto out_ip6_prohibit_entry;
3344d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.path =
33458ed67789SDaniel Lezcano 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3346d8d1f30bSChangli Gao 	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
334762fa8a84SDavid S. Miller 	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
334862fa8a84SDavid S. Miller 			 ip6_template_metrics, true);
33498ed67789SDaniel Lezcano #endif
33508ed67789SDaniel Lezcano 
3351b339a47cSPeter Zijlstra 	net->ipv6.sysctl.flush_delay = 0;
3352b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_max_size = 4096;
3353b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3354b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3355b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3356b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3357b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3358b339a47cSPeter Zijlstra 	net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3359b339a47cSPeter Zijlstra 
33606891a346SBenjamin Thery 	net->ipv6.ip6_rt_gc_expire = 30*HZ;
33616891a346SBenjamin Thery 
33628ed67789SDaniel Lezcano 	ret = 0;
33638ed67789SDaniel Lezcano out:
33648ed67789SDaniel Lezcano 	return ret;
3365f2fc6a54SBenjamin Thery 
336668fffc67SPeter Zijlstra #ifdef CONFIG_IPV6_MULTIPLE_TABLES
336768fffc67SPeter Zijlstra out_ip6_prohibit_entry:
336868fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_prohibit_entry);
336968fffc67SPeter Zijlstra out_ip6_null_entry:
337068fffc67SPeter Zijlstra 	kfree(net->ipv6.ip6_null_entry);
337168fffc67SPeter Zijlstra #endif
3372fc66f95cSEric Dumazet out_ip6_dst_entries:
3373fc66f95cSEric Dumazet 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3374f2fc6a54SBenjamin Thery out_ip6_dst_ops:
3375f2fc6a54SBenjamin Thery 	goto out;
3376cdb18761SDaniel Lezcano }
3377cdb18761SDaniel Lezcano 
33782c8c1e72SAlexey Dobriyan static void __net_exit ip6_route_net_exit(struct net *net)
3379cdb18761SDaniel Lezcano {
33808ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_null_entry);
33818ed67789SDaniel Lezcano #ifdef CONFIG_IPV6_MULTIPLE_TABLES
33828ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_prohibit_entry);
33838ed67789SDaniel Lezcano 	kfree(net->ipv6.ip6_blk_hole_entry);
33848ed67789SDaniel Lezcano #endif
338541bb78b4SXiaotian Feng 	dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3386cdb18761SDaniel Lezcano }
3387cdb18761SDaniel Lezcano 
3388d189634eSThomas Graf static int __net_init ip6_route_net_init_late(struct net *net)
3389d189634eSThomas Graf {
3390d189634eSThomas Graf #ifdef CONFIG_PROC_FS
3391d4beaa66SGao feng 	proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3392d4beaa66SGao feng 	proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3393d189634eSThomas Graf #endif
3394d189634eSThomas Graf 	return 0;
3395d189634eSThomas Graf }
3396d189634eSThomas Graf 
3397d189634eSThomas Graf static void __net_exit ip6_route_net_exit_late(struct net *net)
3398d189634eSThomas Graf {
3399d189634eSThomas Graf #ifdef CONFIG_PROC_FS
3400ece31ffdSGao feng 	remove_proc_entry("ipv6_route", net->proc_net);
3401ece31ffdSGao feng 	remove_proc_entry("rt6_stats", net->proc_net);
3402d189634eSThomas Graf #endif
3403d189634eSThomas Graf }
3404d189634eSThomas Graf 
3405cdb18761SDaniel Lezcano static struct pernet_operations ip6_route_net_ops = {
3406cdb18761SDaniel Lezcano 	.init = ip6_route_net_init,
3407cdb18761SDaniel Lezcano 	.exit = ip6_route_net_exit,
3408cdb18761SDaniel Lezcano };
3409cdb18761SDaniel Lezcano 
3410c3426b47SDavid S. Miller static int __net_init ipv6_inetpeer_init(struct net *net)
3411c3426b47SDavid S. Miller {
3412c3426b47SDavid S. Miller 	struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3413c3426b47SDavid S. Miller 
3414c3426b47SDavid S. Miller 	if (!bp)
3415c3426b47SDavid S. Miller 		return -ENOMEM;
3416c3426b47SDavid S. Miller 	inet_peer_base_init(bp);
3417c3426b47SDavid S. Miller 	net->ipv6.peers = bp;
3418c3426b47SDavid S. Miller 	return 0;
3419c3426b47SDavid S. Miller }
3420c3426b47SDavid S. Miller 
3421c3426b47SDavid S. Miller static void __net_exit ipv6_inetpeer_exit(struct net *net)
3422c3426b47SDavid S. Miller {
3423c3426b47SDavid S. Miller 	struct inet_peer_base *bp = net->ipv6.peers;
3424c3426b47SDavid S. Miller 
3425c3426b47SDavid S. Miller 	net->ipv6.peers = NULL;
342656a6b248SDavid S. Miller 	inetpeer_invalidate_tree(bp);
3427c3426b47SDavid S. Miller 	kfree(bp);
3428c3426b47SDavid S. Miller }
3429c3426b47SDavid S. Miller 
34302b823f72SDavid S. Miller static struct pernet_operations ipv6_inetpeer_ops = {
3431c3426b47SDavid S. Miller 	.init	=	ipv6_inetpeer_init,
3432c3426b47SDavid S. Miller 	.exit	=	ipv6_inetpeer_exit,
3433c3426b47SDavid S. Miller };
3434c3426b47SDavid S. Miller 
3435d189634eSThomas Graf static struct pernet_operations ip6_route_net_late_ops = {
3436d189634eSThomas Graf 	.init = ip6_route_net_init_late,
3437d189634eSThomas Graf 	.exit = ip6_route_net_exit_late,
3438d189634eSThomas Graf };
3439d189634eSThomas Graf 
34408ed67789SDaniel Lezcano static struct notifier_block ip6_route_dev_notifier = {
34418ed67789SDaniel Lezcano 	.notifier_call = ip6_route_dev_notify,
34428ed67789SDaniel Lezcano 	.priority = 0,
34438ed67789SDaniel Lezcano };
34448ed67789SDaniel Lezcano 
3445433d49c3SDaniel Lezcano int __init ip6_route_init(void)
34461da177e4SLinus Torvalds {
3447433d49c3SDaniel Lezcano 	int ret;
34488d0b94afSMartin KaFai Lau 	int cpu;
3449433d49c3SDaniel Lezcano 
34509a7ec3a9SDaniel Lezcano 	ret = -ENOMEM;
34519a7ec3a9SDaniel Lezcano 	ip6_dst_ops_template.kmem_cachep =
34529a7ec3a9SDaniel Lezcano 		kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
34539a7ec3a9SDaniel Lezcano 				  SLAB_HWCACHE_ALIGN, NULL);
34549a7ec3a9SDaniel Lezcano 	if (!ip6_dst_ops_template.kmem_cachep)
3455c19a28e1SFernando Carrijo 		goto out;
345614e50e57SDavid S. Miller 
3457fc66f95cSEric Dumazet 	ret = dst_entries_init(&ip6_dst_blackhole_ops);
34588ed67789SDaniel Lezcano 	if (ret)
3459bdb3289fSDaniel Lezcano 		goto out_kmem_cache;
3460bdb3289fSDaniel Lezcano 
3461c3426b47SDavid S. Miller 	ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3462c3426b47SDavid S. Miller 	if (ret)
3463e8803b6cSDavid S. Miller 		goto out_dst_entries;
34642a0c451aSThomas Graf 
34657e52b33bSDavid S. Miller 	ret = register_pernet_subsys(&ip6_route_net_ops);
34667e52b33bSDavid S. Miller 	if (ret)
34677e52b33bSDavid S. Miller 		goto out_register_inetpeer;
3468c3426b47SDavid S. Miller 
34695dc121e9SArnaud Ebalard 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
34705dc121e9SArnaud Ebalard 
34718ed67789SDaniel Lezcano 	/* Registering of the loopback is done before this portion of code,
34728ed67789SDaniel Lezcano 	 * the loopback reference in rt6_info will not be taken, do it
34738ed67789SDaniel Lezcano 	 * manually for init_net */
3474d8d1f30bSChangli Gao 	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
34758ed67789SDaniel Lezcano 	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3476bdb3289fSDaniel Lezcano   #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3477d8d1f30bSChangli Gao 	init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
34788ed67789SDaniel Lezcano 	init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3479d8d1f30bSChangli Gao 	init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
34808ed67789SDaniel Lezcano 	init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3481bdb3289fSDaniel Lezcano   #endif
3482e8803b6cSDavid S. Miller 	ret = fib6_init();
3483433d49c3SDaniel Lezcano 	if (ret)
34848ed67789SDaniel Lezcano 		goto out_register_subsys;
3485433d49c3SDaniel Lezcano 
3486433d49c3SDaniel Lezcano 	ret = xfrm6_init();
3487433d49c3SDaniel Lezcano 	if (ret)
3488e8803b6cSDavid S. Miller 		goto out_fib6_init;
3489c35b7e72SDaniel Lezcano 
3490433d49c3SDaniel Lezcano 	ret = fib6_rules_init();
3491433d49c3SDaniel Lezcano 	if (ret)
3492433d49c3SDaniel Lezcano 		goto xfrm6_init;
34937e5449c2SDaniel Lezcano 
3494d189634eSThomas Graf 	ret = register_pernet_subsys(&ip6_route_net_late_ops);
3495d189634eSThomas Graf 	if (ret)
3496d189634eSThomas Graf 		goto fib6_rules_init;
3497d189634eSThomas Graf 
3498433d49c3SDaniel Lezcano 	ret = -ENOBUFS;
3499c7ac8679SGreg Rose 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3500c7ac8679SGreg Rose 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3501c7ac8679SGreg Rose 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3502d189634eSThomas Graf 		goto out_register_late_subsys;
3503433d49c3SDaniel Lezcano 
35048ed67789SDaniel Lezcano 	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3505cdb18761SDaniel Lezcano 	if (ret)
3506d189634eSThomas Graf 		goto out_register_late_subsys;
35078ed67789SDaniel Lezcano 
35088d0b94afSMartin KaFai Lau 	for_each_possible_cpu(cpu) {
35098d0b94afSMartin KaFai Lau 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
35108d0b94afSMartin KaFai Lau 
35118d0b94afSMartin KaFai Lau 		INIT_LIST_HEAD(&ul->head);
35128d0b94afSMartin KaFai Lau 		spin_lock_init(&ul->lock);
35138d0b94afSMartin KaFai Lau 	}
35148d0b94afSMartin KaFai Lau 
3515433d49c3SDaniel Lezcano out:
3516433d49c3SDaniel Lezcano 	return ret;
3517433d49c3SDaniel Lezcano 
3518d189634eSThomas Graf out_register_late_subsys:
3519d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3520433d49c3SDaniel Lezcano fib6_rules_init:
3521433d49c3SDaniel Lezcano 	fib6_rules_cleanup();
3522433d49c3SDaniel Lezcano xfrm6_init:
3523433d49c3SDaniel Lezcano 	xfrm6_fini();
35242a0c451aSThomas Graf out_fib6_init:
35252a0c451aSThomas Graf 	fib6_gc_cleanup();
35268ed67789SDaniel Lezcano out_register_subsys:
35278ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
35287e52b33bSDavid S. Miller out_register_inetpeer:
35297e52b33bSDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
3530fc66f95cSEric Dumazet out_dst_entries:
3531fc66f95cSEric Dumazet 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3532433d49c3SDaniel Lezcano out_kmem_cache:
3533f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3534433d49c3SDaniel Lezcano 	goto out;
35351da177e4SLinus Torvalds }
35361da177e4SLinus Torvalds 
35371da177e4SLinus Torvalds void ip6_route_cleanup(void)
35381da177e4SLinus Torvalds {
35398ed67789SDaniel Lezcano 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
3540d189634eSThomas Graf 	unregister_pernet_subsys(&ip6_route_net_late_ops);
3541101367c2SThomas Graf 	fib6_rules_cleanup();
35421da177e4SLinus Torvalds 	xfrm6_fini();
35431da177e4SLinus Torvalds 	fib6_gc_cleanup();
3544c3426b47SDavid S. Miller 	unregister_pernet_subsys(&ipv6_inetpeer_ops);
35458ed67789SDaniel Lezcano 	unregister_pernet_subsys(&ip6_route_net_ops);
354641bb78b4SXiaotian Feng 	dst_entries_destroy(&ip6_dst_blackhole_ops);
3547f2fc6a54SBenjamin Thery 	kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
35481da177e4SLinus Torvalds }
3549